diff --git a/Numpy (26.02)/Numpy_Task.ipynb b/Numpy (26.02)/Numpy_Task.ipynb index 593ba20..a673f47 100644 --- a/Numpy (26.02)/Numpy_Task.ipynb +++ b/Numpy (26.02)/Numpy_Task.ipynb @@ -2,7 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 3, + "id": "c5376440", "metadata": { "id": "medieval-detail" }, @@ -13,6 +14,7 @@ }, { "cell_type": "markdown", + "id": "603fe6ef", "metadata": { "id": "abstract-istanbul" }, @@ -25,20 +27,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, + "id": "825d718f", "metadata": { "id": "entertaining-automation" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + } + ], "source": [ "python_list = [1, 12, 13, 45, 76, 45, 98, 0]\n", - "print()\n", - "python_list = \n", - "print()" + "print(type(python_list))\n", + "python_list = np.array(python_list)\n", + "print(type(python_list))" ] }, { "cell_type": "markdown", + "id": "65a78963", "metadata": { "id": "loose-tobago" }, @@ -49,18 +62,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, + "id": "026d255c", "metadata": { "id": "included-polymer" }, - "outputs": [], - "source": [ - "z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5]\n" + ] + } + ], + "source": [ + "z = np.full(10, 1.5) #np.array([1.5]*10)\n", "print(z)" ] }, { "cell_type": "markdown", + "id": "cdcf6110", "metadata": { "id": "threatened-theme" }, @@ -71,18 +94,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, + "id": "0262597f", "metadata": { "id": "alert-endorsement" }, - "outputs": [], - "source": [ - "z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]]\n" + ] + } + ], + "source": [ + "z = np.zeros((5, 5)) #np.full((5, 5), 0, dtype='float')\n", "print(z)" ] }, { "cell_type": "markdown", + "id": "a432c057", "metadata": { "id": "federal-blackberry" }, @@ -93,18 +130,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, + "id": "33630258", "metadata": { "id": "static-filing" }, - "outputs": [], - "source": [ - "ones = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n" + ] + } + ], + "source": [ + "ones = np.ones(12) \n", "print(ones)" ] }, { "cell_type": "markdown", + "id": "67f98f36", "metadata": { "id": "whole-chassis" }, @@ -116,18 +163,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, + "id": "4d613945", "metadata": { "id": "outstanding-deviation" }, - "outputs": [], - "source": [ - "ones = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1. 1. 1. 1.]\n", + " [1. 1. 1. 1.]\n", + " [1. 1. 1. 1.]]\n" + ] + } + ], + "source": [ + "ones = ones.reshape((3, 4))\n", "ones.shape" ] }, { "cell_type": "markdown", + "id": "b901e3e3", "metadata": { "id": "cubic-noise" }, @@ -139,20 +198,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, + "id": "ffcbb5b4", "metadata": { "id": "foster-memory" }, - "outputs": [], - "source": [ - "Z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 1 2 3 4 5]\n", + " [ 6 7 8 9 10]\n", + " [11 12 13 14 15]\n", + " [16 17 18 19 20]]\n", + "[[ 1 2 3 4 5]\n", + " [ 6 7 8 9 10]\n", + " [ 11 12 13 -99 15]\n", + " [ 16 17 18 19 20]]\n" + ] + } + ], + "source": [ + "Z = np.arange(1, 21).reshape((4, 5))\n", "print(Z)\n", - "\n", + "Z[2, 3] = -99 #индексация с нуля\n", "print(Z)" ] }, { "cell_type": "markdown", + "id": "b574f0b4", "metadata": { "id": "helpful-table" }, @@ -164,20 +240,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, + "id": "3e41f241", "metadata": { "id": "magnetic-leone" }, - "outputs": [], - "source": [ - "first = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-6 1 -3 0 7 5 9 -1 -6 3 -5 6 9 -4 5]\n", + "[ 5 -4 9 6 -5 3 -6 -1 9 5 7 0 -3 1 -6]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-10, 10, 15)\n", "print(first)\n", - "second = \n", + "second = np.flip(first) #first[::-1]\n", "print(second)" ] }, { "cell_type": "markdown", + "id": "2060d03b", "metadata": { "id": "executed-september" }, @@ -189,20 +276,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, + "id": "3f02d634", "metadata": { "id": "pharmaceutical-sigma" }, - "outputs": [], - "source": [ - "first = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-11 13 -15 0 2]\n", + " [ 2 -3 -7 -4 -8]\n", + " [-14 1 -15 5 5]\n", + " [ 8 -13 -14 -15 4]\n", + " [ 13 -9 -13 -9 9]]\n", + "[[121 13 225 0 2]\n", + " [ 2 9 49 16 64]\n", + " [196 1 225 5 5]\n", + " [ 8 169 196 225 4]\n", + " [ 13 81 169 81 9]]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-15, 15, (5, 5))\n", "print(first)\n", - "\n", + "#first[first < 0] **= 2\n", + "#first[np.where(first < 0)] **= 2\n", + "first = np.where(first < 0, first**2, first)\n", "print(first)" ] }, { "cell_type": "markdown", + "id": "6c506236", "metadata": { "id": "floral-difference" }, @@ -216,18 +324,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, + "id": "1bbc7b2f", "metadata": { "id": "saving-conference" }, - "outputs": [], - "source": [ - "first = \n", - "print(first)\n" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ -2 14 5 -8 -9]\n", + " [ -5 10 -9 5 2]\n", + " [ 3 11 -14 -10 -14]]\n", + "14\n", + "-14\n", + "[ 0. 0.6 -4.8]\n", + "[-1.33333333 11.66666667 -6. -4.33333333 -7. ]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-15, 15, (3, 5))\n", + "print(first)\n", + "print(np.amax(first)) #максимум\n", + "print(np.amin(first)) #минимум\n", + "print(np.mean(first, 1)) #среднее по строкам\n", + "print(np.mean(first, 0)) #среднее по столбцам" ] }, { "cell_type": "markdown", + "id": "dfc028a4", "metadata": { "id": "diagnostic-departure" }, @@ -240,23 +368,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 76, + "id": "65e6b462", "metadata": { "id": "olympic-qatar" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-14 23 114]\n", + " [-12 -24 79]]\n" + ] + } + ], "source": [ "a = np.random.randint(-10, 10, (2, 5))\n", "first_axis = np.random.randint(4, 6)\n", "b = np.random.randint(-10, 10, (first_axis, 3))\n", - "if :\n", + "if a.shape[1] == b.shape[0]:\n", " print(a @ b)\n", "else:\n", - " " + " raise Exception('Матрицы не согласованы')" ] }, { "cell_type": "markdown", + "id": "0870eb01", "metadata": { "id": "governmental-austin" }, @@ -268,20 +407,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 144, + "id": "06dbcd48", "metadata": { "id": "suffering-mauritius" }, - "outputs": [], - "source": [ - "mask = \n", - "matrix = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[8.25038355 6.08825806 3.16237398 2.28191764 3.33556773]\n", + " [0. 4.9366809 6.9472132 4.23223229 9.86861145]\n", + " [0. 0. 6.50718322 9.88483789 8.81063026]\n", + " [0. 0. 0. 6.533728 7.96559933]\n", + " [0. 0. 0. 0. 7.51449851]]\n" + ] + } + ], + "source": [ + "mask = np.random.uniform(2, 12, (5, 5))\n", + "matrix = np.triu(mask)\n", "\n", "print(matrix)" ] }, { "cell_type": "markdown", + "id": "815fd3f0", "metadata": { "id": "altered-baghdad" }, @@ -293,20 +446,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 150, + "id": "6d095bd6", "metadata": { "id": "refined-stuff" }, - "outputs": [], - "source": [ - "mask = \n", - "matrix = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 3.46280477 4.20558853 6.45725307 2.69067203]\n", + " [7.12653369 0. 6.44202366 2.50661627 2.0254555 ]\n", + " [8.80324417 9.95471846 0. 7.43085754 6.38382735]\n", + " [3.31048899 9.81316014 5.91944475 0. 8.63145251]\n", + " [3.1321131 8.55712022 7.50059044 2.02695731 0. ]]\n" + ] + } + ], + "source": [ + "matrix = np.random.uniform(10, 2, (5, 5))\n", + "np.fill_diagonal(matrix, 0)\n", "\n", "print(matrix)" ] }, { "cell_type": "markdown", + "id": "804d4a3f", "metadata": { "id": "quiet-complement" }, @@ -317,22 +484,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 80, + "id": "73c5e947", "metadata": { "id": "french-fighter" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 0 0 0 1]\n", + "[1 1 1 0 1]\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "a = np.random.randint(0,2,5)\n", "print(a)\n", "b = np.random.randint(0,2,5)\n", "print(b)\n", - "equal = \n", + "#equal = a == b => equal.all() или np.equal(a, b).all()\n", + "equal = np.array_equal(a, b)\n", "equal" ] }, { "cell_type": "markdown", + "id": "95b0d5e4", "metadata": { "id": "color-amplifier" }, @@ -347,23 +536,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 106, + "id": "de9d9fc6", "metadata": { "id": "close-daisy" }, - "outputs": [], - "source": [ - "r, c = \n", - "a = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[14 1 14 8 3 11 7 10 3]\n", + " [ 6 7 4 7 5 5 3 14 2]\n", + " [ 1 2 11 5 10 8 3 7 3]\n", + " [ 6 1 13 8 9 11 5 7 1]]\n", + "8\n", + "[ 3 11 10 3 2 2 5 7]\n" + ] + } + ], + "source": [ + "r, c = np.random.randint([3, 2], [7, 12])\n", + "a = np.random.randint(1, 15, (r, c))\n", "print(a)\n", - "N = \n", + "N = np.random.randint(1, (r*c)/2)\n", "print(N)\n", - "sample = \n", + "sample = np.random.choice(a.flatten(), N)\n", "print(sample)" ] }, { "cell_type": "markdown", + "id": "f658a12f", "metadata": { "id": "patent-african" }, @@ -376,20 +580,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 179, + "id": "f3bdd69d", "metadata": { "id": "taken-fabric" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n", + "True\n", + "[1. 0. 0.]\n" + ] + } + ], "source": [ "a = np.array([1, np.NaN, np.Inf], float)\n", - "\n", - "\n", - "a" + "print(np.isnan(a).any())\n", + "print(np.isinf(a).any())\n", + "a[np.isnan(a)] = 0.\n", + "a[np.isinf(a)] = 0.\n", + "print(a)" ] }, { "cell_type": "markdown", + "id": "50690063", "metadata": { "id": "analyzed-ireland" }, @@ -401,20 +619,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 137, + "id": "c3d78565", "metadata": { "id": "imposed-digest" }, - "outputs": [], - "source": [ - "axis = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n", + "3\n" + ] + } + ], + "source": [ + "axis = np.random.randint(1, 15)\n", "print(axis)\n", - "matrix = \n", - "print(...)" + "matrix = np.zeros(range(axis))\n", + "print(matrix.ndim)" ] }, { "cell_type": "markdown", + "id": "2e00ab99", "metadata": { "id": "regulation-colleague" }, @@ -427,17 +656,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 199, + "id": "6b1eab0b", "metadata": { "id": "concerned-anthropology" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[50.94747793 42.91001562 54.84269122]\n", + " [47.42321719 50.99581998 59.70228688]\n", + " [51.68759433 52.10665795 27.19269554]\n", + " [36.41076644 55.63482713 40.28882282]\n", + " [37.82787003 47.03523973 73.10854609]\n", + " [62.57896407 57.83790414 65.86469857]\n", + " [48.36785779 53.27431004 61.46946736]\n", + " [54.45003777 55.66186162 29.65398427]\n", + " [46.12617365 66.8101111 34.61421603]\n", + " [58.65697598 38.72072121 45.39562589]]\n", + "[2 2 1 1 2 2 2 1 1 0]\n", + "[54.84269122 59.70228688 52.10665795 55.63482713 73.10854609 65.86469857\n", + " 61.46946736 55.66186162 66.8101111 58.65697598]\n" + ] + } + ], "source": [ "matrix = np.random.normal(50, 10, (10,3))\n", "print(matrix)\n", - "indexes = \n", + "indexes = np.argmax(matrix, 1)\n", "print(indexes)\n", - "print(...)" + "print(matrix[range(matrix.shape[0]), indexes])" ] } ], @@ -448,7 +698,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -462,9 +712,10 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 } + diff --git a/Pandas (06.03)/Pandas. Task. Part 1.ipynb b/Pandas (06.03)/Pandas. Task. Part 1.ipynb index 5172e85..b89d58d 100644 --- a/Pandas (06.03)/Pandas. Task. Part 1.ipynb +++ b/Pandas (06.03)/Pandas. Task. Part 1.ipynb @@ -1 +1,1160 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"anaconda-cloud":{},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.6"},"colab":{"name":"01_task_pandas.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"UTKVH3sMutTM"},"source":["**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**"]},{"cell_type":"markdown","metadata":{"id":"3lUT-CqYutTO"},"source":["Уникальные значения признаков (больше информации по ссылке выше):\n","- age: continuous.\n","- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n","- fnlwgt: continuous.\n","- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n","- education-num: continuous.\n","- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n","- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n","- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n","- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n","- sex: Female, Male.\n","- capital-gain: continuous.\n","- capital-loss: continuous.\n","- hours-per-week: continuous.\n","- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n","- salary: >50K,<=50K"]},{"cell_type":"code","metadata":{"id":"6GzulHvOutTR"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"SJ3LbaoiutTT","colab":{"base_uri":"https://localhost:8080/","height":380},"executionInfo":{"status":"ok","timestamp":1626441443051,"user_tz":-300,"elapsed":499,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"eab110b9-0f5f-4bcd-db91-328a0b391379"},"source":["data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n","data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n","
"],"text/plain":[" age workclass fnlwgt ... hours-per-week native-country salary\n","0 39 State-gov 77516 ... 40 United-States <=50K\n","1 50 Self-emp-not-inc 83311 ... 13 United-States <=50K\n","2 38 Private 215646 ... 40 United-States <=50K\n","3 53 Private 234721 ... 40 United-States <=50K\n","4 28 Private 338409 ... 40 Cuba <=50K\n","\n","[5 rows x 15 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"EpQFv8t1ds05"},"source":["# def married(row):\n","# return \"Married\" in row\n","data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":756},"id":"3Bb2mRTEeoJK","executionInfo":{"status":"ok","timestamp":1626441731759,"user_tz":-300,"elapsed":481,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"9dd7d83b-f51a-4e11-f6dc-035a844f81c9"},"source":["data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n","

32561 rows × 16 columns

\n","
"],"text/plain":[" age workclass fnlwgt ... native-country salary married\n","0 39 State-gov 77516 ... United-States <=50K False\n","1 50 Self-emp-not-inc 83311 ... United-States <=50K True\n","2 38 Private 215646 ... United-States <=50K False\n","3 53 Private 234721 ... United-States <=50K True\n","4 28 Private 338409 ... Cuba <=50K True\n","... ... ... ... ... ... ... ...\n","32556 27 Private 257302 ... United-States <=50K True\n","32557 40 Private 154374 ... United-States >50K True\n","32558 58 Private 151910 ... United-States <=50K False\n","32559 22 Private 201490 ... United-States <=50K False\n","32560 52 Self-emp-inc 287927 ... United-States >50K True\n","\n","[32561 rows x 16 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"MoK8B5fIutTW"},"source":["**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"hdzky90TutTY"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"adF8lgVbutTZ"},"source":["**2. Каков средний возраст (признак *age*) женщин?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"K6C2qZ_zutTb"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"-Cz1S7-HutTd"},"source":["**3. Какова доля граждан Германии (признак *native-country*)?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"Y4mmqN6outTf"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Do-rEgaautTg"},"source":["**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год? **"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eSuk0CAnutTh"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rK9SwvI_utTj"},"source":["**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eygYabkdutTj"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4DqPASEsutTk"},"source":["**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"fYkBDZMdutTl"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"cn-jYXhzutTl"},"source":["**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"4hIQXgGAutTm"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Rsh8YvoXutTm"},"source":["**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"RK1JQSIZutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"kUXV84AjutTn"},"source":["**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"3gzYG3CDutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "UTKVH3sMutTM" + }, + "source": [ + "**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3lUT-CqYutTO" + }, + "source": [ + "Уникальные значения признаков (больше информации по ссылке выше):\n", + "- age: continuous.\n", + "- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n", + "- fnlwgt: continuous.\n", + "- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n", + "- education-num: continuous.\n", + "- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n", + "- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n", + "- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n", + "- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n", + "- sex: Female, Male.\n", + "- capital-gain: continuous.\n", + "- capital-loss: continuous.\n", + "- hours-per-week: continuous.\n", + "- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n", + "- salary: >50K,<=50K" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "6GzulHvOutTR" + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 380 + }, + "executionInfo": { + "elapsed": 499, + "status": "ok", + "timestamp": 1626441443051, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "SJ3LbaoiutTT", + "outputId": "eab110b9-0f5f-4bcd-db91-328a0b391379" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education-num \\\n", + "0 39 State-gov 77516 Bachelors 13 \n", + "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", + "2 38 Private 215646 HS-grad 9 \n", + "3 53 Private 234721 11th 7 \n", + "4 28 Private 338409 Bachelors 13 \n", + "\n", + " marital-status occupation relationship race sex \\\n", + "0 Never-married Adm-clerical Not-in-family White Male \n", + "1 Married-civ-spouse Exec-managerial Husband White Male \n", + "2 Divorced Handlers-cleaners Not-in-family White Male \n", + "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", + "4 Married-civ-spouse Prof-specialty Wife Black Female \n", + "\n", + " capital-gain capital-loss hours-per-week native-country salary \n", + "0 2174 0 40 United-States <=50K \n", + "1 0 0 13 United-States <=50K \n", + "2 0 0 40 United-States <=50K \n", + "3 0 0 40 United-States <=50K \n", + "4 0 0 40 Cuba <=50K " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "EpQFv8t1ds05" + }, + "outputs": [], + "source": [ + "# def married(row):\n", + "# return \"Married\" in row\n", + "data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row) " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 756 + }, + "executionInfo": { + "elapsed": 481, + "status": "ok", + "timestamp": 1626441731759, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "3Bb2mRTEeoJK", + "outputId": "9dd7d83b-f51a-4e11-f6dc-035a844f81c9" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n", + "

32561 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education-num \\\n", + "0 39 State-gov 77516 Bachelors 13 \n", + "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", + "2 38 Private 215646 HS-grad 9 \n", + "3 53 Private 234721 11th 7 \n", + "4 28 Private 338409 Bachelors 13 \n", + "... ... ... ... ... ... \n", + "32556 27 Private 257302 Assoc-acdm 12 \n", + "32557 40 Private 154374 HS-grad 9 \n", + "32558 58 Private 151910 HS-grad 9 \n", + "32559 22 Private 201490 HS-grad 9 \n", + "32560 52 Self-emp-inc 287927 HS-grad 9 \n", + "\n", + " marital-status occupation relationship race sex \\\n", + "0 Never-married Adm-clerical Not-in-family White Male \n", + "1 Married-civ-spouse Exec-managerial Husband White Male \n", + "2 Divorced Handlers-cleaners Not-in-family White Male \n", + "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", + "4 Married-civ-spouse Prof-specialty Wife Black Female \n", + "... ... ... ... ... ... \n", + "32556 Married-civ-spouse Tech-support Wife White Female \n", + "32557 Married-civ-spouse Machine-op-inspct Husband White Male \n", + "32558 Widowed Adm-clerical Unmarried White Female \n", + "32559 Never-married Adm-clerical Own-child White Male \n", + "32560 Married-civ-spouse Exec-managerial Wife White Female \n", + "\n", + " capital-gain capital-loss hours-per-week native-country salary \\\n", + "0 2174 0 40 United-States <=50K \n", + "1 0 0 13 United-States <=50K \n", + "2 0 0 40 United-States <=50K \n", + "3 0 0 40 United-States <=50K \n", + "4 0 0 40 Cuba <=50K \n", + "... ... ... ... ... ... \n", + "32556 0 0 38 United-States <=50K \n", + "32557 0 0 40 United-States >50K \n", + "32558 0 0 40 United-States <=50K \n", + "32559 0 0 20 United-States <=50K \n", + "32560 15024 0 40 United-States >50K \n", + "\n", + " married \n", + "0 False \n", + "1 True \n", + "2 False \n", + "3 True \n", + "4 True \n", + "... ... \n", + "32556 True \n", + "32557 True \n", + "32558 False \n", + "32559 False \n", + "32560 True \n", + "\n", + "[32561 rows x 16 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MoK8B5fIutTW" + }, + "source": [ + "**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "hdzky90TutTY" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Male 21790\n", + "Female 10771\n", + "Name: sex, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['sex'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "adF8lgVbutTZ" + }, + "source": [ + "**2. Каков средний возраст (признак *age*) женщин?**" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "K6C2qZ_zutTb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "36.85823043357163" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#data.groupby('sex')['age'].mean()\n", + "data[data.sex =='Female']['age'].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-Cz1S7-HutTd" + }, + "source": [ + "**3. Какова доля граждан Германии (признак *native-country*)?**" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "id": "Y4mmqN6outTf" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.004207487485028101" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#import numpy as np\n", + "#np.count_nonzero((data['native-country'] == 'Germany').values == True) / len(data['native-country'])", + "\n", + "list(data['native-country'] == 'Germany').count(True) / data.shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Do-rEgaautTg" + }, + "source": [ + "**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год?**" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "id": "eSuk0CAnutTh" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
salary
<=50K36.78373814.020088
>50K44.24984110.519028
\n", + "
" + ], + "text/plain": [ + " mean std\n", + "salary \n", + "<=50K 36.783738 14.020088\n", + ">50K 44.249841 10.519028" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "data.groupby('salary')['age'].agg([np.mean, np.std])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rK9SwvI_utTj" + }, + "source": [ + "**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "id": "eygYabkdutTj" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_educ = ['Bachelors', 'Prof-school', 'Assoc-acdm', 'Assoc-voc', 'Masters', 'Doctorate']\n", + "all(list(data[data.salary == '>50K']['education'].apply(lambda value: value in high_educ)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4DqPASEsutTk" + }, + "source": [ + "**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "id": "fYkBDZMdutTl" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
racesex
Amer-Indian-EskimoFemale119.037.11764713.11499117.027.036.046.0080.0
Male192.037.20833312.04956317.028.035.045.0082.0
Asian-Pac-IslanderFemale346.035.08959512.30084517.025.033.043.7575.0
Male693.039.07359312.88394418.029.037.046.0090.0
BlackFemale1555.037.85401912.63719717.028.037.046.0090.0
Male1569.037.68260012.88261217.027.036.046.0090.0
OtherFemale109.031.67889911.63159917.023.029.039.0074.0
Male162.034.65432111.35553117.026.032.042.0077.0
WhiteFemale8642.036.81161814.32909317.025.035.046.0090.0
Male19174.039.65249813.43602917.029.038.049.0090.0
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% \\\n", + "race sex \n", + "Amer-Indian-Eskimo Female 119.0 37.117647 13.114991 17.0 27.0 36.0 \n", + " Male 192.0 37.208333 12.049563 17.0 28.0 35.0 \n", + "Asian-Pac-Islander Female 346.0 35.089595 12.300845 17.0 25.0 33.0 \n", + " Male 693.0 39.073593 12.883944 18.0 29.0 37.0 \n", + "Black Female 1555.0 37.854019 12.637197 17.0 28.0 37.0 \n", + " Male 1569.0 37.682600 12.882612 17.0 27.0 36.0 \n", + "Other Female 109.0 31.678899 11.631599 17.0 23.0 29.0 \n", + " Male 162.0 34.654321 11.355531 17.0 26.0 32.0 \n", + "White Female 8642.0 36.811618 14.329093 17.0 25.0 35.0 \n", + " Male 19174.0 39.652498 13.436029 17.0 29.0 38.0 \n", + "\n", + " 75% max \n", + "race sex \n", + "Amer-Indian-Eskimo Female 46.00 80.0 \n", + " Male 45.00 82.0 \n", + "Asian-Pac-Islander Female 43.75 75.0 \n", + " Male 46.00 90.0 \n", + "Black Female 46.00 90.0 \n", + " Male 46.00 90.0 \n", + "Other Female 39.00 74.0 \n", + " Male 42.00 77.0 \n", + "White Female 46.00 90.0 \n", + " Male 49.00 90.0 " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby(['race', 'sex']).describe()['age']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cn-jYXhzutTl" + }, + "source": [ + "**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": { + "id": "4hIQXgGAutTm" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "married salary\n", + "False <=50K 0.915505\n", + " >50K 0.084495\n", + "True <=50K 0.559486\n", + " >50K 0.440514\n", + "Name: salary, dtype: float64" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.sex == 'Male'].groupby('married')['salary'].value_counts(normalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rsh8YvoXutTm" + }, + "source": [ + "**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "id": "RK1JQSIZutTn" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Максимальное число часов человек работает в неделю: 99\n", + "Такое кол-во часов работают 85 человек\n", + "Процент зарабатывающих много: 29%\n" + ] + } + ], + "source": [ + "max_hours = data['hours-per-week'].max()\n", + "people_count = data[data['hours-per-week'] == max_hours].shape[0]\n", + "percentage = int(data[(data['hours-per-week'] == max_hours) & (data['salary'] == '>50K')].shape[0] / people_count * 100)\n", + "\n", + "print('Максимальное число часов человек работает в неделю: {0}'.format(max_hours))\n", + "print('Такое кол-во часов работают {0} человек'.format(people_count))\n", + "print('Процент зарабатывающих много: {0}%'.format(percentage))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kUXV84AjutTn" + }, + "source": [ + "**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": { + "id": "3gzYG3CDutTn" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "native-country salary\n", + "? <=50K 40.164760\n", + " >50K 45.547945\n", + "Cambodia <=50K 41.416667\n", + " >50K 40.000000\n", + "Canada <=50K 37.914634\n", + " ... \n", + "United-States >50K 45.505369\n", + "Vietnam <=50K 37.193548\n", + " >50K 39.200000\n", + "Yugoslavia <=50K 41.600000\n", + " >50K 49.500000\n", + "Name: hours-per-week, Length: 82, dtype: float64" + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby(['native-country', 'salary'])['hours-per-week'].mean()" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "collapsed_sections": [], + "name": "01_task_pandas.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}