diff --git a/Numpy (26.02)/Numpy_Task.ipynb b/Numpy (26.02)/Numpy_Task.ipynb index 593ba20..586ceb8 100644 --- a/Numpy (26.02)/Numpy_Task.ipynb +++ b/Numpy (26.02)/Numpy_Task.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "id": "medieval-detail" }, @@ -25,16 +25,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "id": "entertaining-automation" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "data": { + "text/plain": "numpy.ndarray" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "python_list = [1, 12, 13, 45, 76, 45, 98, 0]\n", "print()\n", - "python_list = \n", - "print()" + "python_list = python_list if type(python_list) == \"numpy.ndarray\" \\\n", + " else np.array(python_list)\n", + "print()\n", + "type(python_list)" ] }, { @@ -49,13 +68,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "id": "included-polymer" }, - "outputs": [], - "source": [ - "z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5]\n" + ] + } + ], + "source": [ + "z = np.array([1.5 for _ in range(10)])\n", "print(z)" ] }, @@ -71,13 +98,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "id": "alert-endorsement" }, - "outputs": [], - "source": [ - "z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0.]\n" + ] + } + ], + "source": [ + "z = np.zeros(25)\n", "print(z)" ] }, @@ -93,13 +129,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "id": "static-filing" }, - "outputs": [], - "source": [ - "ones = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n" + ] + } + ], + "source": [ + "ones = np.ones(12)\n", "print(ones)" ] }, @@ -116,13 +160,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { "id": "outstanding-deviation" }, - "outputs": [], - "source": [ - "ones = \n", + "outputs": [ + { + "data": { + "text/plain": "(3, 4)" + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ones = ones.reshape((3,4))\n", "ones.shape" ] }, @@ -139,15 +192,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": { "id": "foster-memory" }, - "outputs": [], - "source": [ - "Z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 1 2 3 4 5]\n", + " [ 6 7 8 9 10]\n", + " [11 12 13 14 15]\n", + " [16 17 18 19 20]]\n", + "[[ 1 2 3 4 5]\n", + " [ 6 7 8 9 10]\n", + " [ 11 12 13 -99 15]\n", + " [ 16 17 18 19 20]]\n" + ] + } + ], + "source": [ + "Z = np.arange(1, 21, 1).reshape(4,5)\n", "print(Z)\n", - "\n", + "Z[2, 3] = -99\n", "print(Z)" ] }, @@ -164,15 +232,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": { "id": "magnetic-leone" }, - "outputs": [], - "source": [ - "first = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 9 1 -9 -4 9 -10 3 1 -7 7 8 7 -8 -9 8]\n", + "[ 8 -9 -8 7 8 7 -7 1 3 -10 9 -4 -9 1 9]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-10, 11, 15)\n", "print(first)\n", - "second = \n", + "second = first[::-1]\n", "print(second)" ] }, @@ -189,15 +266,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": { "id": "pharmaceutical-sigma" }, - "outputs": [], - "source": [ - "first = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 12 -15 -1 -11 13]\n", + " [ 12 -8 -12 -3 1]\n", + " [ 6 -5 -15 3 -6]\n", + " [ 15 12 10 -6 -14]\n", + " [ -2 0 -10 14 -14]]\n", + "[[ 12 225 1 121 13]\n", + " [ 12 64 144 9 1]\n", + " [ 6 25 225 3 36]\n", + " [ 15 12 10 36 196]\n", + " [ 4 0 100 14 196]]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-15, 16, 25).reshape(5, 5)\n", "print(first)\n", - "\n", + "first = np.array([x ** 2 if x < 0 else x for x in first.flatten()]).reshape(5, 5)\n", "print(first)" ] }, @@ -216,14 +310,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": { "id": "saving-conference" }, - "outputs": [], - "source": [ - "first = \n", - "print(first)\n" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ -8 3 -7 -14 14]\n", + " [ 14 -2 11 10 1]\n", + " [ 15 3 -2 -11 -6]]\n", + "Max: 15\n", + "Min: -14\n", + "Avg_cols: [ 7. 1.33333333 0.66666667 -5. 3. ]\n", + "Avg_rows: [-2.4 6.8 -0.2]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-15, 16, 15).reshape(3, 5)\n", + "print(first)\n", + "print(\"Max:\", first.max())\n", + "print(\"Min:\", first.min())\n", + "print(\"Avg_cols:\", first.mean(axis=0))\n", + "print(\"Avg_rows:\", first.mean(axis=1))" ] }, { @@ -240,19 +352,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "metadata": { "id": "olympic-qatar" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ -5 -24 124]\n", + " [ -19 56 -188]]\n" + ] + } + ], "source": [ "a = np.random.randint(-10, 10, (2, 5))\n", "first_axis = np.random.randint(4, 6)\n", "b = np.random.randint(-10, 10, (first_axis, 3))\n", - "if :\n", - " print(a @ b)\n", + "if a.shape[1] == b.shape[0]:\n", + " print(a.dot(b))\n", "else:\n", - " " + " raise Exception()" ] }, { @@ -268,15 +389,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "metadata": { "id": "suffering-mauritius" }, - "outputs": [], - "source": [ - "mask = \n", - "matrix = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[11.34527969 2.01466258 11.74243638 10.33842813 11.48963295]\n", + " [ 4.62917535 4.5069499 5.53824699 5.35688835 2.13551617]\n", + " [ 5.34957499 11.11617866 8.60974402 3.08627017 11.31107221]\n", + " [ 8.58655168 7.32698461 4.28040029 4.99249972 8.13897787]\n", + " [ 5.12864719 8.06178645 2.31779939 5.93053843 4.709031 ]]\n", + "[[11.34527969 2.01466258 11.74243638 10.33842813 11.48963295]\n", + " [ 0. 4.5069499 5.53824699 5.35688835 2.13551617]\n", + " [ 0. 0. 8.60974402 3.08627017 11.31107221]\n", + " [ 0. 0. 0. 4.99249972 8.13897787]\n", + " [ 0. 0. 0. 0. 4.709031 ]]\n" + ] + } + ], + "source": [ + "mask = np.random.uniform(2, 12, (5, 5))\n", + "print(mask)\n", "\n", + "matrix = np.copy(mask)\n", + "matrix[np.tril_indices(matrix.shape[0], -1)] = 0\n", "print(matrix)" ] }, @@ -293,15 +433,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": { "id": "refined-stuff" }, - "outputs": [], - "source": [ - "mask = \n", - "matrix = \n", - "\n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0. 11.35101015 8.72700401 10.70494328 14.28039615]\n", + " [11.28112684 0. 10.36182253 7.84065481 8.1533886 ]\n", + " [11.0541833 13.41408266 0. 12.12184502 10.81467365]\n", + " [12.2724768 10.94786634 11.69426005 0. 9.78443676]\n", + " [10.92850546 9.04317546 7.82131952 7.68371555 0. ]]\n" + ] + } + ], + "source": [ + "mask = np.random.normal(10, 2, (5, 5))\n", + "matrix = np.copy(mask)\n", + "matrix[np.diag_indices(mask.shape[0])] = 0\n", "print(matrix)" ] }, @@ -317,17 +469,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": { "id": "french-fighter" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0 0 0 0 1]\n", + "[1 1 1 0 1]\n" + ] + }, + { + "data": { + "text/plain": "False" + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "a = np.random.randint(0,2,5)\n", "print(a)\n", "b = np.random.randint(0,2,5)\n", "print(b)\n", - "equal = \n", + "equal = np.array_equal(a, b)\n", "equal" ] }, @@ -347,18 +516,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": { "id": "close-daisy" }, - "outputs": [], - "source": [ - "r, c = \n", - "a = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.84373036 0.00397302 0.52309687 0.1018975 0.32168295 0.92905985\n", + " 0.7180733 0.07126375 0.20369508 0.67877644]\n", + " [0.33073912 0.05107029 0.03593789 0.64748002 0.62716661 0.13137381\n", + " 0.27586571 0.43360165 0.83960686 0.79980386]\n", + " [0.99700093 0.36933418 0.48857643 0.56709103 0.26452354 0.09897186\n", + " 0.52805588 0.94799276 0.71129171 0.92899743]\n", + " [0.13356677 0.17532113 0.49374973 0.72863344 0.08648496 0.67612459\n", + " 0.96946029 0.76324935 0.86706841 0.38122644]]\n", + "10\n", + "[0.03593789 0.13356677 0.38122644 0.05107029 0.26452354 0.03593789\n", + " 0.99700093 0.08648496 0.72863344 0.03593789]\n" + ] + } + ], + "source": [ + "r, c = (np.random.randint(3, 8), np.random.randint(2, 13))\n", + "a = np.random.sample((r, c))\n", "print(a)\n", - "N = \n", + "N = np.random.randint(1, r * c / 2)\n", "print(N)\n", - "sample = \n", + "sample = np.random.choice(a.flatten(), N)\n", "print(sample)" ] }, @@ -376,15 +563,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "metadata": { "id": "taken-fabric" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "array([1., 0., 0.])" + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "a = np.array([1, np.NaN, np.Inf], float)\n", - "\n", - "\n", + "a[np.isnan(a)] = 0\n", + "a[np.isinf(a)] = 0\n", "a" ] }, @@ -401,16 +597,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 87, "metadata": { "id": "imposed-digest" }, - "outputs": [], - "source": [ - "axis = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2 3 4 4]\n", + "4\n" + ] + } + ], + "source": [ + "axis = np.random.randint(2, 5, np.random.randint(2, 6))\n", "print(axis)\n", - "matrix = \n", - "print(...)" + "matrix = np.random.sample(axis)\n", + "print(len(matrix.shape))" ] }, { @@ -427,17 +632,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 89, "metadata": { "id": "concerned-anthropology" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[66.15597449 47.42057041 38.82998757]\n", + " [55.84056188 15.50344143 48.74772998]\n", + " [35.99536209 50.72372854 46.87417828]\n", + " [46.94622747 76.11995598 44.219867 ]\n", + " [33.83872696 48.41484734 54.11942771]\n", + " [51.94068577 38.57115887 54.534821 ]\n", + " [48.8739375 56.05043443 63.93989534]\n", + " [65.37294656 55.52770359 65.04007844]\n", + " [41.37469639 64.84316923 58.51511933]\n", + " [51.90204023 47.10458827 51.72679914]]\n", + "[0 0 1 1 2 2 2 0 1 0]\n", + "[66.15597448735122, 55.84056187596195, 50.723728535502666, 76.11995597772992, 54.11942770890298, 54.534821004397706, 63.939895335125925, 65.37294655600674, 64.8431692250061, 51.90204023111272]\n" + ] + } + ], "source": [ "matrix = np.random.normal(50, 10, (10,3))\n", "print(matrix)\n", - "indexes = \n", + "indexes = np.argmax(matrix, axis=1)\n", "print(indexes)\n", - "print(...)" + "print([matrix[i, j] for i, j in enumerate(indexes)])" ] } ], @@ -467,4 +691,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/Pandas (06.03)/Pandas. Task. Part 1.ipynb b/Pandas (06.03)/Pandas. Task. Part 1.ipynb index 5172e85..e8a450f 100644 --- a/Pandas (06.03)/Pandas. Task. Part 1.ipynb +++ b/Pandas (06.03)/Pandas. Task. Part 1.ipynb @@ -1 +1,460 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"anaconda-cloud":{},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.6"},"colab":{"name":"01_task_pandas.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"UTKVH3sMutTM"},"source":["**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**"]},{"cell_type":"markdown","metadata":{"id":"3lUT-CqYutTO"},"source":["Уникальные значения признаков (больше информации по ссылке выше):\n","- age: continuous.\n","- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n","- fnlwgt: continuous.\n","- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n","- education-num: continuous.\n","- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n","- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n","- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n","- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n","- sex: Female, Male.\n","- capital-gain: continuous.\n","- capital-loss: continuous.\n","- hours-per-week: continuous.\n","- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n","- salary: >50K,<=50K"]},{"cell_type":"code","metadata":{"id":"6GzulHvOutTR"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"SJ3LbaoiutTT","colab":{"base_uri":"https://localhost:8080/","height":380},"executionInfo":{"status":"ok","timestamp":1626441443051,"user_tz":-300,"elapsed":499,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"eab110b9-0f5f-4bcd-db91-328a0b391379"},"source":["data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n","data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n","
"],"text/plain":[" age workclass fnlwgt ... hours-per-week native-country salary\n","0 39 State-gov 77516 ... 40 United-States <=50K\n","1 50 Self-emp-not-inc 83311 ... 13 United-States <=50K\n","2 38 Private 215646 ... 40 United-States <=50K\n","3 53 Private 234721 ... 40 United-States <=50K\n","4 28 Private 338409 ... 40 Cuba <=50K\n","\n","[5 rows x 15 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"EpQFv8t1ds05"},"source":["# def married(row):\n","# return \"Married\" in row\n","data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":756},"id":"3Bb2mRTEeoJK","executionInfo":{"status":"ok","timestamp":1626441731759,"user_tz":-300,"elapsed":481,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"9dd7d83b-f51a-4e11-f6dc-035a844f81c9"},"source":["data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n","

32561 rows × 16 columns

\n","
"],"text/plain":[" age workclass fnlwgt ... native-country salary married\n","0 39 State-gov 77516 ... United-States <=50K False\n","1 50 Self-emp-not-inc 83311 ... United-States <=50K True\n","2 38 Private 215646 ... United-States <=50K False\n","3 53 Private 234721 ... United-States <=50K True\n","4 28 Private 338409 ... Cuba <=50K True\n","... ... ... ... ... ... ... ...\n","32556 27 Private 257302 ... United-States <=50K True\n","32557 40 Private 154374 ... United-States >50K True\n","32558 58 Private 151910 ... United-States <=50K False\n","32559 22 Private 201490 ... United-States <=50K False\n","32560 52 Self-emp-inc 287927 ... United-States >50K True\n","\n","[32561 rows x 16 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"MoK8B5fIutTW"},"source":["**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"hdzky90TutTY"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"adF8lgVbutTZ"},"source":["**2. Каков средний возраст (признак *age*) женщин?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"K6C2qZ_zutTb"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"-Cz1S7-HutTd"},"source":["**3. Какова доля граждан Германии (признак *native-country*)?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"Y4mmqN6outTf"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Do-rEgaautTg"},"source":["**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год? **"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eSuk0CAnutTh"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rK9SwvI_utTj"},"source":["**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eygYabkdutTj"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4DqPASEsutTk"},"source":["**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"fYkBDZMdutTl"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"cn-jYXhzutTl"},"source":["**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"4hIQXgGAutTm"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Rsh8YvoXutTm"},"source":["**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"RK1JQSIZutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"kUXV84AjutTn"},"source":["**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"3gzYG3CDutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "colab": { + "name": "01_task_pandas.ipynb", + "provenance": [], + "collapsed_sections": [] + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "UTKVH3sMutTM" + }, + "source": [ + "**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3lUT-CqYutTO" + }, + "source": [ + "Уникальные значения признаков (больше информации по ссылке выше):\n", + "- age: continuous.\n", + "- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n", + "- fnlwgt: continuous.\n", + "- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n", + "- education-num: continuous.\n", + "- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n", + "- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n", + "- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n", + "- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n", + "- sex: Female, Male.\n", + "- capital-gain: continuous.\n", + "- capital-loss: continuous.\n", + "- hours-per-week: continuous.\n", + "- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n", + "- salary: >50K,<=50K" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6GzulHvOutTR" + }, + "source": [ + "import pandas as pd\n", + "import numpy as np" + ], + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "SJ3LbaoiutTT", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 380 + }, + "executionInfo": { + "status": "ok", + "timestamp": 1626441443051, + "user_tz": -300, + "elapsed": 499, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + } + }, + "outputId": "eab110b9-0f5f-4bcd-db91-328a0b391379" + }, + "source": [ + "data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n", + "data.head()" + ], + "execution_count": 3, + "outputs": [ + { + "data": { + "text/plain": " age workclass fnlwgt education education-num \\\n0 39 State-gov 77516 Bachelors 13 \n1 50 Self-emp-not-inc 83311 Bachelors 13 \n2 38 Private 215646 HS-grad 9 \n3 53 Private 234721 11th 7 \n4 28 Private 338409 Bachelors 13 \n\n marital-status occupation relationship race sex \\\n0 Never-married Adm-clerical Not-in-family White Male \n1 Married-civ-spouse Exec-managerial Husband White Male \n2 Divorced Handlers-cleaners Not-in-family White Male \n3 Married-civ-spouse Handlers-cleaners Husband Black Male \n4 Married-civ-spouse Prof-specialty Wife Black Female \n\n capital-gain capital-loss hours-per-week native-country salary \n0 2174 0 40 United-States <=50K \n1 0 0 13 United-States <=50K \n2 0 0 40 United-States <=50K \n3 0 0 40 United-States <=50K \n4 0 0 40 Cuba <=50K ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n
" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EpQFv8t1ds05" + }, + "source": [ + "# def married(row):\n", + "# return \"Married\" in row\n", + "data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row)" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 756 + }, + "id": "3Bb2mRTEeoJK", + "executionInfo": { + "status": "ok", + "timestamp": 1626441731759, + "user_tz": -300, + "elapsed": 481, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + } + }, + "outputId": "9dd7d83b-f51a-4e11-f6dc-035a844f81c9" + }, + "source": [ + "data" + ], + "execution_count": 5, + "outputs": [ + { + "data": { + "text/plain": " age workclass fnlwgt education education-num \\\n0 39 State-gov 77516 Bachelors 13 \n1 50 Self-emp-not-inc 83311 Bachelors 13 \n2 38 Private 215646 HS-grad 9 \n3 53 Private 234721 11th 7 \n4 28 Private 338409 Bachelors 13 \n... ... ... ... ... ... \n32556 27 Private 257302 Assoc-acdm 12 \n32557 40 Private 154374 HS-grad 9 \n32558 58 Private 151910 HS-grad 9 \n32559 22 Private 201490 HS-grad 9 \n32560 52 Self-emp-inc 287927 HS-grad 9 \n\n marital-status occupation relationship race sex \\\n0 Never-married Adm-clerical Not-in-family White Male \n1 Married-civ-spouse Exec-managerial Husband White Male \n2 Divorced Handlers-cleaners Not-in-family White Male \n3 Married-civ-spouse Handlers-cleaners Husband Black Male \n4 Married-civ-spouse Prof-specialty Wife Black Female \n... ... ... ... ... ... \n32556 Married-civ-spouse Tech-support Wife White Female \n32557 Married-civ-spouse Machine-op-inspct Husband White Male \n32558 Widowed Adm-clerical Unmarried White Female \n32559 Never-married Adm-clerical Own-child White Male \n32560 Married-civ-spouse Exec-managerial Wife White Female \n\n capital-gain capital-loss hours-per-week native-country salary \\\n0 2174 0 40 United-States <=50K \n1 0 0 13 United-States <=50K \n2 0 0 40 United-States <=50K \n3 0 0 40 United-States <=50K \n4 0 0 40 Cuba <=50K \n... ... ... ... ... ... \n32556 0 0 38 United-States <=50K \n32557 0 0 40 United-States >50K \n32558 0 0 40 United-States <=50K \n32559 0 0 20 United-States <=50K \n32560 15024 0 40 United-States >50K \n\n married \n0 False \n1 True \n2 False \n3 True \n4 True \n... ... \n32556 True \n32557 True \n32558 False \n32559 False \n32560 True \n\n[32561 rows x 16 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n

32561 rows × 16 columns

\n
" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MoK8B5fIutTW" + }, + "source": [ + "**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "hdzky90TutTY" + }, + "source": [ + "# Ваш код здесь\n", + "data['sex'].value_counts()" + ], + "execution_count": 15, + "outputs": [ + { + "data": { + "text/plain": "Male 21790\nFemale 10771\nName: sex, dtype: int64" + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "adF8lgVbutTZ" + }, + "source": [ + "**2. Каков средний возраст (признак *age*) женщин?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "K6C2qZ_zutTb" + }, + "source": [ + "# Ваш код здесь\n", + "data[data['sex'] == 'Female']['age'].mean()" + ], + "execution_count": 21, + "outputs": [ + { + "data": { + "text/plain": "36.85823043357163" + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-Cz1S7-HutTd" + }, + "source": [ + "**3. Какова доля граждан Германии (признак *native-country*)?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "Y4mmqN6outTf" + }, + "source": [ + "# Ваш код здесь\n", + "len(data[data['native-country'] == 'Germany'])/len(data)" + ], + "execution_count": 24, + "outputs": [ + { + "data": { + "text/plain": "0.004207487485028101" + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Do-rEgaautTg" + }, + "source": [ + "**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год? **" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "eSuk0CAnutTh" + }, + "source": [ + "# Ваш код здесь\n", + "gt50 = data[data[\"salary\"] == \">50K\"]['age']\n", + "lt50 = data[data[\"salary\"] == \"<=50K\"]['age']\n", + "print(\"Means: \", gt50.mean(), \", \", lt50.mean())\n", + "print(\"StDevs: \", np.std(gt50), \", \", np.std(lt50))" + ], + "execution_count": 29, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Means: 44.24984058155847 , 36.78373786407767\n", + "StDevs: 10.51835692766163 , 14.019804910115282\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rK9SwvI_utTj" + }, + "source": [ + "**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "eygYabkdutTj" + }, + "source": [ + "# Ваш код здесь\n", + "high_ed_min = np.min([data[data['education'] == ed]['education-num'].iloc[0] \\\n", + " for ed in ['Bachelors', 'Prof-school', 'Assoc-acdm', 'Assoc-voc', 'Masters', 'Doctorate']])\n", + "\n", + "np.all(data[data['salary'] == '>50K']['education-num'] >= high_ed_min)" + ], + "execution_count": 69, + "outputs": [ + { + "data": { + "text/plain": "True" + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4DqPASEsutTk" + }, + "source": [ + "**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "outputs": [ + { + "data": { + "text/plain": "82.0" + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Ваш код здесь\n", + "stats = data.groupby(['race', 'sex'])['age']\n", + "stats.describe().loc['Amer-Indian-Eskimo'].loc['Male']['max']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cn-jYXhzutTl" + }, + "source": [ + "**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "4hIQXgGAutTm" + }, + "source": [ + "# Ваш код здесь\n", + "cross = pd.crosstab(data['salary'], data['marital-status'], normalize=True)\n", + "(cross.loc['>50K', 'Married-civ-spouse'] + cross.loc['>50K', 'Married-spouse-absent'] +\n", + " cross.loc['>50K', 'Married-AF-spouse']) > 0.5" + ], + "execution_count": 123, + "outputs": [ + { + "data": { + "text/plain": "False" + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rsh8YvoXutTm" + }, + "source": [ + "**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "RK1JQSIZutTn" + }, + "source": [ + "# Ваш код здесь\n", + "max = data['hours-per-week'].max()\n", + "print('Max: ', max)\n", + "max_hpw = data[data['hours-per-week'] == max]\n", + "count = len(max_hpw)\n", + "print('Count: ', count)\n", + "print('Percent: ', len(max_hpw[max_hpw['salary'] == '>50K']) / count)" + ], + "execution_count": 127, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Max: 99\n", + "Count: 85\n", + "Percent: 0.29411764705882354\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kUXV84AjutTn" + }, + "source": [ + "**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "3gzYG3CDutTn" + }, + "source": [ + "# Ваш код здесь\n", + "data.groupby(['native-country', 'salary'])['hours-per-week'].mean()" + ], + "execution_count": 128, + "outputs": [ + { + "data": { + "text/plain": "native-country salary\n? <=50K 40.164760\n >50K 45.547945\nCambodia <=50K 41.416667\n >50K 40.000000\nCanada <=50K 37.914634\n ... \nUnited-States >50K 45.505369\nVietnam <=50K 37.193548\n >50K 39.200000\nYugoslavia <=50K 41.600000\n >50K 49.500000\nName: hours-per-week, Length: 82, dtype: float64" + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ] + } + ] +} \ No newline at end of file diff --git a/Pandas and EDA (12.03)/Pandas and EDA. Task.ipynb b/Pandas and EDA (12.03)/Pandas and EDA. Task.ipynb index bb60a1c..260fe24 100644 --- a/Pandas and EDA (12.03)/Pandas and EDA. Task.ipynb +++ b/Pandas and EDA (12.03)/Pandas and EDA. Task.ipynb @@ -1 +1,481 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.8"},"colab":{"name":"02_pandas_task.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"EmV0s8YY05p7"},"source":["- __ID__ - Unique number for each athlete\n","- __Name__ - Athlete's name\n","- __Sex__ - M or F\n","- __Age__ - Integer\n","- __Height__ - In centimeters\n","- __Weight__ - In kilograms\n","- __Team__ - Team name\n","- __NOC__ - National Olympic Committee 3-letter code\n","- __Games__ - Year and season\n","- __Year__ - Integer\n","- __Season__ - Summer or Winter\n","- __City__ - Host city\n","- __Sport__ - Sport\n","- __Event__ - Event\n","- __Medal__ - Gold, Silver, Bronze, or NA"]},{"cell_type":"code","metadata":{"id":"rVCrMDMh05p_"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"D5Q4Z-JW05qC"},"source":["# не меняем путь!\n","PATH = 'https://github.com/aksenov7/Kaggle_competition_group/blob/master/athlete_events.csv.zip?raw=true'"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mI0LtqkY4Kp-"},"source":["__0. Откройте файл используя необходимые параметры и не меняя переменную PATH__"]},{"cell_type":"code","metadata":{"id":"h5SQwBLr05qG","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1615627554682,"user_tz":-300,"elapsed":2477,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"882f9e83-5fd7-4c3b-b005-56917b15a0fd"},"source":["data = \n","data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
IDNameSexAgeHeightWeightTeamNOCGamesYearSeasonCitySportEventMedal
01A DijiangM24.0180.080.0ChinaCHN1992 Summer1992SummerBarcelonaBasketballBasketball Men's BasketballNaN
12A LamusiM23.0170.060.0ChinaCHN2012 Summer2012SummerLondonJudoJudo Men's Extra-LightweightNaN
23Gunnar Nielsen AabyM24.0NaNNaNDenmarkDEN1920 Summer1920SummerAntwerpenFootballFootball Men's FootballNaN
34Edgar Lindenau AabyeM34.0NaNNaNDenmark/SwedenDEN1900 Summer1900SummerParisTug-Of-WarTug-Of-War Men's Tug-Of-WarGold
45Christine Jacoba AaftinkF21.0185.082.0NetherlandsNED1988 Winter1988WinterCalgarySpeed SkatingSpeed Skating Women's 500 metresNaN
\n","
"],"text/plain":[" ID Name ... Event Medal\n","0 1 A Dijiang ... Basketball Men's Basketball NaN\n","1 2 A Lamusi ... Judo Men's Extra-Lightweight NaN\n","2 3 Gunnar Nielsen Aaby ... Football Men's Football NaN\n","3 4 Edgar Lindenau Aabye ... Tug-Of-War Men's Tug-Of-War Gold\n","4 5 Christine Jacoba Aaftink ... Speed Skating Women's 500 metres NaN\n","\n","[5 rows x 15 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"stYR4EbV05qP"},"source":["__1. Сколько лет было самым молодым мужчинам и женщинам-участникам Олимпийских игр 1992 года ?__\n","- 16 и 15\n","- 14 и 13 \n","- 13 и 11\n","- 11 и 12"]},{"cell_type":"code","metadata":{"id":"HgiqBXtb05qR"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GQ290dsi05qc"},"source":["__2. Каков был процент баскетболистов-мужчин среди всех мужчин-участников Олимпийских игр 2012 года? Округлите ответ до первого десятичного знака.__\n","\n","Здесь и далее при необходимости отбрасывайте дублированных спортсменов, чтобы считать только уникальных . \n","- 0.2\n","- 1.5 \n","- 2.5\n","- 7.7"]},{"cell_type":"code","metadata":{"id":"-fI5MqWP05qi"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"u5WrTgIC05qv"},"source":["__3. Каковы среднее и стандартное отклонение роста теннисисток, участвовавших в Олимпийских играх 2000 года? Округлите ответ до первого десятичного знака.__\n","\n","- 171.8 и 6.5\n","- 179.4 и 10\n","- 180.7 и 6.7\n","- 182.4 и 9.1 "]},{"cell_type":"code","metadata":{"id":"vsKTqn6405qw"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"xOOEzhNQ05qy"},"source":["__4. Найдите спортсмена, который участвовал в Олимпийских играх 2006 года, с наибольшим весом среди других участников той же Олимпиады. Каким спортом он или она занимался?__\n","\n","- Judo\n","- Bobsleigh \n","- Skeleton\n","- Boxing"]},{"cell_type":"code","metadata":{"id":"EkWD1Tnb05qz"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"UQzxZ3HT05q0"},"source":["__5. Сколько раз John Aalberg участвовал в Олимпийских играх в разные годы?__\n","\n","Один год - это один раз. Неважно сколько участий внутри одного года\n","- 0\n","- 1 \n","- 2\n","- 3 "]},{"cell_type":"code","metadata":{"id":"ZSfkdjPO05q0"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"8EnLcNrk05q3"},"source":["__6. Сколько золотых медалей по теннису выиграли спортсмены сборной Switzerland на Олимпиаде-2008? Считайте каждую медаль от каждого спортсмена.__\n","\n","- 0\n","- 1 \n","- 2\n","- 3 "]},{"cell_type":"code","metadata":{"id":"Y754OGI-05q3"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"v3h5sQF805q5"},"source":["__7. Правда ли, что на Олимпийских играх 2016 Spain выиграла меньше медалей, чем Италия?__ \n","\n","- Да\n","- Нет"]},{"cell_type":"code","metadata":{"id":"gqJqDi2605q7"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"kkSYL5mK05q-"},"source":["__8. К какой возрастной категории принадлежало наименьшее и наибольшее количество участников Олимпиады-2008?__\n","\n","- [45-55] и [25-35) соответственно\n","- [45-55] и [15-25) соответственно\n","- [35-45) и [25-35) соответственно\n","- [45-55] и [35-45) соответственно"]},{"cell_type":"code","metadata":{"id":"pMAQtW7i05q_"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"JQmJPiXv05rB"},"source":["__9. Правда ли, что в Atlanta проводились летние Олимпийские игры? Правда ли, что в Squaw Valley проводились зимние Олимпийские игры? ?__\n","\n","- Да, Да\n","- Да, Нет\n","- Нет, Да \n","- Нет, Нет "]},{"cell_type":"code","metadata":{"id":"UU66wRHC05rB"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4hxR5D-t05rF"},"source":["__10. Какова абсолютная разница между количеством уникальных видов спорта на Олимпиаде 1986 года и Олимпиаде 2002 года?__\n","\n","- 3 \n","- 10\n","- 15\n","- 27 "]},{"cell_type":"code","metadata":{"id":"WKIr-TR105rF"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "colab": { + "name": "02_pandas_task.ipynb", + "provenance": [], + "collapsed_sections": [] + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "EmV0s8YY05p7" + }, + "source": [ + "- __ID__ - Unique number for each athlete\n", + "- __Name__ - Athlete's name\n", + "- __Sex__ - M or F\n", + "- __Age__ - Integer\n", + "- __Height__ - In centimeters\n", + "- __Weight__ - In kilograms\n", + "- __Team__ - Team name\n", + "- __NOC__ - National Olympic Committee 3-letter code\n", + "- __Games__ - Year and season\n", + "- __Year__ - Integer\n", + "- __Season__ - Summer or Winter\n", + "- __City__ - Host city\n", + "- __Sport__ - Sport\n", + "- __Event__ - Event\n", + "- __Medal__ - Gold, Silver, Bronze, or NA" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "rVCrMDMh05p_" + }, + "source": [ + "import pandas as pd" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "D5Q4Z-JW05qC" + }, + "source": [ + "# не меняем путь!\n", + "PATH = 'https://github.com/aksenov7/Kaggle_competition_group/blob/master/athlete_events.csv.zip?raw=true'" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mI0LtqkY4Kp-" + }, + "source": [ + "__0. Откройте файл используя необходимые параметры и не меняя переменную PATH__" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "h5SQwBLr05qG", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "executionInfo": { + "status": "ok", + "timestamp": 1615627554682, + "user_tz": -300, + "elapsed": 2477, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + } + }, + "outputId": "882f9e83-5fd7-4c3b-b005-56917b15a0fd" + }, + "source": [ + "\n", + "data = pd.read_csv(PATH, compression='zip', header=0, sep=',', quotechar='\"')\n", + "data.head()" + ], + "execution_count": 7, + "outputs": [ + { + "data": { + "text/plain": " ID Name Sex Age Height Weight Team \\\n0 1 A Dijiang M 24.0 180.0 80.0 China \n1 2 A Lamusi M 23.0 170.0 60.0 China \n2 3 Gunnar Nielsen Aaby M 24.0 NaN NaN Denmark \n3 4 Edgar Lindenau Aabye M 34.0 NaN NaN Denmark/Sweden \n4 5 Christine Jacoba Aaftink F 21.0 185.0 82.0 Netherlands \n\n NOC Games Year Season City Sport \\\n0 CHN 1992 Summer 1992 Summer Barcelona Basketball \n1 CHN 2012 Summer 2012 Summer London Judo \n2 DEN 1920 Summer 1920 Summer Antwerpen Football \n3 DEN 1900 Summer 1900 Summer Paris Tug-Of-War \n4 NED 1988 Winter 1988 Winter Calgary Speed Skating \n\n Event Medal \n0 Basketball Men's Basketball NaN \n1 Judo Men's Extra-Lightweight NaN \n2 Football Men's Football NaN \n3 Tug-Of-War Men's Tug-Of-War Gold \n4 Speed Skating Women's 500 metres NaN ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
IDNameSexAgeHeightWeightTeamNOCGamesYearSeasonCitySportEventMedal
01A DijiangM24.0180.080.0ChinaCHN1992 Summer1992SummerBarcelonaBasketballBasketball Men's BasketballNaN
12A LamusiM23.0170.060.0ChinaCHN2012 Summer2012SummerLondonJudoJudo Men's Extra-LightweightNaN
23Gunnar Nielsen AabyM24.0NaNNaNDenmarkDEN1920 Summer1920SummerAntwerpenFootballFootball Men's FootballNaN
34Edgar Lindenau AabyeM34.0NaNNaNDenmark/SwedenDEN1900 Summer1900SummerParisTug-Of-WarTug-Of-War Men's Tug-Of-WarGold
45Christine Jacoba AaftinkF21.0185.082.0NetherlandsNED1988 Winter1988WinterCalgarySpeed SkatingSpeed Skating Women's 500 metresNaN
\n
" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "stYR4EbV05qP" + }, + "source": [ + "__1. Сколько лет было самым молодым мужчинам и женщинам-участникам Олимпийских игр 1992 года ?__\n", + "- 16 и 15\n", + "- 14 и 13 \n", + "- 13 и 11\n", + "- 11 и 12" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HgiqBXtb05qR" + }, + "source": [ + "data.groupby('Sex')['Age'].min()" + ], + "execution_count": 33, + "outputs": [ + { + "data": { + "text/plain": "Sex\nF 11.0\nM 10.0\nName: Age, dtype: float64" + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GQ290dsi05qc" + }, + "source": [ + "__2. Каков был процент баскетболистов-мужчин среди всех мужчин-участников Олимпийских игр 2012 года? Округлите ответ до первого десятичного знака.__\n", + "\n", + "Здесь и далее при необходимости отбрасывайте дублированных спортсменов, чтобы считать только уникальных . \n", + "- 0.2\n", + "- 1.5 \n", + "- 2.5\n", + "- 7.7" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-fI5MqWP05qi" + }, + "source": [ + "len(data[(data['Sex'] == 'M') & (data['Year'] == 2012) & (data['Sport'] == 'Basketball')]) / \\\n", + "len(data[(data['Sex'] == 'M') & (data['Year'] == 2012)])" + ], + "execution_count": 39, + "outputs": [ + { + "data": { + "text/plain": "0.020267417311752287" + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u5WrTgIC05qv" + }, + "source": [ + "__3. Каковы среднее и стандартное отклонение роста теннисисток, участвовавших в Олимпийских играх 2000 года? Округлите ответ до первого десятичного знака.__\n", + "\n", + "- 171.8 и 6.5\n", + "- 179.4 и 10\n", + "- 180.7 и 6.7\n", + "- 182.4 и 9.1 " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vsKTqn6405qw" + }, + "source": [ + "data[(data['Year'] == 2000) & (data['Sex'] == 'F')]['Height'].agg(['mean', 'std'])" + ], + "execution_count": 66, + "outputs": [ + { + "data": { + "text/plain": "mean 168.972903\nstd 9.460832\nName: Height, dtype: float64" + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xOOEzhNQ05qy" + }, + "source": [ + "__4. Найдите спортсмена, который участвовал в Олимпийских играх 2006 года, с наибольшим весом среди других участников той же Олимпиады. Каким спортом он или она занимался?__\n", + "\n", + "- Judo\n", + "- Bobsleigh \n", + "- Skeleton\n", + "- Boxing" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EkWD1Tnb05qz" + }, + "source": [ + "max_weight = data[data['Year'] == 2006]['Weight'].max()\n", + "data[(data['Year'] == 2006) & (data['Weight'] == max_weight)]['Sport']" + ], + "execution_count": 79, + "outputs": [ + { + "data": { + "text/plain": "8102 Skeleton\nName: Sport, dtype: object" + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UQzxZ3HT05q0" + }, + "source": [ + "__5. Сколько раз John Aalberg участвовал в Олимпийских играх в разные годы?__\n", + "\n", + "Один год - это один раз. Неважно сколько участий внутри одного года\n", + "- 0\n", + "- 1 \n", + "- 2\n", + "- 3 " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZSfkdjPO05q0" + }, + "source": [ + "data[data['Name'] == 'John Aalberg']['Year'].nunique()" + ], + "execution_count": 84, + "outputs": [ + { + "data": { + "text/plain": "2" + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8EnLcNrk05q3" + }, + "source": [ + "__6. Сколько золотых медалей по теннису выиграли спортсмены сборной Switzerland на Олимпиаде-2008? Считайте каждую медаль от каждого спортсмена.__\n", + "\n", + "- 0\n", + "- 1 \n", + "- 2\n", + "- 3 " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Y754OGI-05q3" + }, + "source": [ + "len(data[(data['Year'] == 2008) & (data['Team'] == 'Switzerland') & (data['Medal'] == 'Gold')])" + ], + "execution_count": 91, + "outputs": [ + { + "data": { + "text/plain": "3" + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v3h5sQF805q5" + }, + "source": [ + "__7. Правда ли, что на Олимпийских играх 2016 Spain выиграла меньше медалей, чем Италия?__ \n", + "\n", + "- Да\n", + "- Нет" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gqJqDi2605q7" + }, + "source": [ + "len(data[(data['Year'] == 2016) & (data['Team'] == 'Spain') & (data['Medal'].notna())]) < \\\n", + "len(data[(data['Year'] == 2016) & (data['Team'] == 'Italy') & (data['Medal'].notna())])" + ], + "execution_count": 98, + "outputs": [ + { + "data": { + "text/plain": "True" + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kkSYL5mK05q-" + }, + "source": [ + "__8. К какой возрастной категории принадлежало наименьшее и наибольшее количество участников Олимпиады-2008?__\n", + "\n", + "- [45-55] и [25-35) соответственно\n", + "- [45-55] и [15-25) соответственно\n", + "- [35-45) и [25-35) соответственно\n", + "- [45-55] и [35-45) соответственно" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pMAQtW7i05q_" + }, + "source": [ + "#(15 <= data[data['Year'] == 2008]['Age']) & (data[data['Year'] == 2008]['Age'] < 25)\n", + "categories =[len(data[data['Year'] == 2008][(data['Age'] < 25 + 10*i) & (data['Age'] >= 15 + 10*i)]) for i in range(4)]\n", + "max_i = categories.index(max(categories))\n", + "min_i = categories.index(min(categories))\n", + "print('Max category: ', f\"[{15 + 10 * max_i}-{25 + 10 * max_i}]\")\n", + "print('Min category: ', f\"[{15 + 10 * min_i}-{25 + 10 * min_i}]\")" + ], + "execution_count": 121, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Max category: [25-35]\n", + "Min category: [45-55]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Матвей\\AppData\\Local\\Temp\\ipykernel_16644\\3673336271.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", + " categories =[len(data[data['Year'] == 2008][(data['Age'] < 25 + 10*i) & (data['Age'] >= 15 + 10*i)]) for i in range(4)]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JQmJPiXv05rB" + }, + "source": [ + "__9. Правда ли, что в Atlanta проводились летние Олимпийские игры? Правда ли, что в Squaw Valley проводились зимние Олимпийские игры? ?__\n", + "\n", + "- Да, Да\n", + "- Да, Нет\n", + "- Нет, Да \n", + "- Нет, Нет " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "UU66wRHC05rB" + }, + "source": [ + "(any(data[data['City'] == 'Atlanta']['Season'] == 'Summer'),\n", + "any(data[data['City'] == 'Squaw Valley']['Season'] == 'Winter'))" + ], + "execution_count": 128, + "outputs": [ + { + "data": { + "text/plain": "(True, True)" + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4hxR5D-t05rF" + }, + "source": [ + "__10. Какова абсолютная разница между количеством уникальных видов спорта на Олимпиаде 1986 года и Олимпиаде 2002 года?__\n", + "\n", + "- 3 \n", + "- 10\n", + "- 15\n", + "- 27 " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WKIr-TR105rF" + }, + "source": [ + "# data[data['Year'] == 1986]['Sport'].nunique() == 0 => Разница между 2002 и 1986 == 15 - 0 = 15\n", + "abs(data[data['Year'] == 2002]['Sport'].nunique() - data[data['Year'] == 1996]['Sport'].nunique())" + ], + "execution_count": 138, + "outputs": [ + { + "data": { + "text/plain": "16" + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ] + } + ] +} \ No newline at end of file