diff --git a/Numpy (26.02)/Numpy_Task.ipynb b/Numpy (26.02)/Numpy_Task.ipynb index 593ba20..501ba34 100644 --- a/Numpy (26.02)/Numpy_Task.ipynb +++ b/Numpy (26.02)/Numpy_Task.ipynb @@ -2,7 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 3, + "id": "a2fb2918", "metadata": { "id": "medieval-detail" }, @@ -13,6 +14,7 @@ }, { "cell_type": "markdown", + "id": "d179aab9", "metadata": { "id": "abstract-istanbul" }, @@ -25,20 +27,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, + "id": "f8e04056", "metadata": { "id": "entertaining-automation" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + } + ], "source": [ "python_list = [1, 12, 13, 45, 76, 45, 98, 0]\n", - "print()\n", - "python_list = \n", - "print()" + "print(type(python_list))\n", + "#видим, что python_list является структурой list. Преобразуем к ndarray\n", + "python_list = np.array(python_list, int)\n", + "#проверим успешность проебразования к типу ndarray\n", + "print(type(python_list))" ] }, { "cell_type": "markdown", + "id": "25da6e67", "metadata": { "id": "loose-tobago" }, @@ -49,18 +64,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, + "id": "9f6dddfe", "metadata": { "id": "included-polymer" }, - "outputs": [], - "source": [ - "z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5]\n" + ] + } + ], + "source": [ + "z = np.linspace(1.5, 1.5, 10, dtype = float)\n", "print(z)" ] }, { "cell_type": "markdown", + "id": "781e69f7", "metadata": { "id": "threatened-theme" }, @@ -71,18 +96,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, + "id": "5f6b7135", "metadata": { "id": "alert-endorsement" }, - "outputs": [], - "source": [ - "z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]]\n" + ] + } + ], + "source": [ + "z = np.zeros((5,5))\n", "print(z)" ] }, { "cell_type": "markdown", + "id": "0090bbd1", "metadata": { "id": "federal-blackberry" }, @@ -93,18 +132,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, + "id": "43039bbf", "metadata": { "id": "static-filing" }, - "outputs": [], - "source": [ - "ones = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 1 1 1 1 1 1 1 1 1 1 1]\n" + ] + } + ], + "source": [ + "ones = np.ones(12, dtype=int)\n", "print(ones)" ] }, { "cell_type": "markdown", + "id": "f41b901e", "metadata": { "id": "whole-chassis" }, @@ -116,18 +165,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, + "id": "e0a2f1d2", "metadata": { "id": "outstanding-deviation" }, - "outputs": [], - "source": [ - "ones = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1 1 1 1]\n", + " [1 1 1 1]\n", + " [1 1 1 1]]\n" + ] + }, + { + "data": { + "text/plain": [ + "(3, 4)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ones = ones.reshape(3,4)\n", + "print(ones)\n", "ones.shape" ] }, { "cell_type": "markdown", + "id": "3bc9c510", "metadata": { "id": "cubic-noise" }, @@ -139,20 +211,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, + "id": "1286ab82", "metadata": { "id": "foster-memory" }, - "outputs": [], - "source": [ - "Z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0 1 2 3 4]\n", + " [ 5 6 7 8 9]\n", + " [10 11 12 13 14]\n", + " [15 16 17 18 19]]\n", + "[[ 0 1 2 3 4]\n", + " [ 5 6 7 8 9]\n", + " [ 10 11 12 -99 14]\n", + " [ 15 16 17 18 19]]\n" + ] + } + ], + "source": [ + "Z = np.arange(20).reshape(4,5)\n", "print(Z)\n", - "\n", + "Z[2][3] = (-99)\n", "print(Z)" ] }, { "cell_type": "markdown", + "id": "aedc281f", "metadata": { "id": "helpful-table" }, @@ -164,20 +253,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, + "id": "96234591", "metadata": { "id": "magnetic-leone" }, - "outputs": [], - "source": [ - "first = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ -8 5 -4 7 4 0 -10 -5 3 -9 1 -3 -4 -1 4]\n", + "[ 4 -1 -4 -3 1 -9 3 -5 -10 0 4 7 -4 5 -8]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-10,10,15)\n", "print(first)\n", - "second = \n", + "second = first[::-1]\n", "print(second)" ] }, { "cell_type": "markdown", + "id": "6b19895a", "metadata": { "id": "executed-september" }, @@ -189,20 +289,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, + "id": "7ddcebd3", "metadata": { "id": "pharmaceutical-sigma" }, - "outputs": [], - "source": [ - "first = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 6 10 2 -10 5]\n", + " [ -5 -9 1 4 1]\n", + " [ 6 -9 -5 -1 -7]\n", + " [ 4 7 -1 2 -7]\n", + " [ 4 9 5 -14 13]]\n", + "[[ 6 10 2 100 5]\n", + " [ 25 81 1 4 1]\n", + " [ 6 81 25 1 49]\n", + " [ 4 7 1 2 49]\n", + " [ 4 9 5 196 13]]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-15,15,(5,5))\n", "print(first)\n", - "\n", + "for i in range(5):\n", + " for j in range(5):\n", + " if first[i][j] < 0:\n", + " first[i][j] = first[i][j] * first[i][j]\n", "print(first)" ] }, { "cell_type": "markdown", + "id": "dc0757f2", "metadata": { "id": "floral-difference" }, @@ -216,18 +338,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, + "id": "6ae32afd", "metadata": { "id": "saving-conference" }, - "outputs": [], - "source": [ - "first = \n", - "print(first)\n" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-14 13 13 14 -11]\n", + " [ 14 -2 13 -4 -4]\n", + " [ 13 10 5 -15 -15]]\n", + "max: 14 min: -15\n", + "mean vertical: [ 4.33333333 7. 10.33333333 -1.66666667 -10. ]\n", + "mean horizontal: [ 3. 3.4 -0.4]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-15,15,(3,5))\n", + "print(first)\n", + "print('max:', first.max(), 'min:', first.min())\n", + "print('mean vertical:', first.mean(axis=0))\n", + "print('mean horizontal:', first.mean(axis=1))" ] }, { "cell_type": "markdown", + "id": "6fdc0fa1", "metadata": { "id": "diagnostic-departure" }, @@ -240,23 +380,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 94, + "id": "378f1354", "metadata": { "id": "olympic-qatar" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5\n", + "[[-84 21 -21]\n", + " [ 34 -31 -7]]\n" + ] + } + ], "source": [ "a = np.random.randint(-10, 10, (2, 5))\n", "first_axis = np.random.randint(4, 6)\n", + "print(first_axis)\n", "b = np.random.randint(-10, 10, (first_axis, 3))\n", - "if :\n", + "if a.shape[1] == first_axis:\n", " print(a @ b)\n", "else:\n", + " print('traceback')\n", " " ] }, { "cell_type": "markdown", + "id": "23625924", "metadata": { "id": "governmental-austin" }, @@ -268,20 +422,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 99, + "id": "07bf9f5e", "metadata": { "id": "suffering-mauritius" }, - "outputs": [], - "source": [ - "mask = \n", - "matrix = \n", - "\n", - "print(matrix)" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[11.7516993 6.14459459 7.32788001 10.31842091 4.97231603]\n", + " [ 8.76782407 8.20939579 8.3091281 6.91625086 2.38918889]\n", + " [11.03867593 3.08975957 4.70298586 8.49362661 4.54681523]\n", + " [10.79542262 8.93234998 5.1097531 9.40557474 6.1502076 ]\n", + " [ 3.41394667 4.65751701 2.64677893 5.90179377 9.83536372]] \n", + "\n", + "[[11.7516993 6.14459459 7.32788001 10.31842091 4.97231603]\n", + " [ 0. 8.20939579 8.3091281 6.91625086 2.38918889]\n", + " [ 0. 0. 4.70298586 8.49362661 4.54681523]\n", + " [ 0. 0. 0. 9.40557474 6.1502076 ]\n", + " [ 0. 0. 0. 0. 9.83536372]]\n" + ] + } + ], + "source": [ + "mask = np.random.uniform(2, 12, (5,5))\n", + "print(mask, '\\n')\n", + "mask[np.tril_indices(5, -1)] = 0\n", + "print(mask)" ] }, { "cell_type": "markdown", + "id": "fc443624", "metadata": { "id": "altered-baghdad" }, @@ -293,46 +467,83 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 91, + "id": "27de05c3", "metadata": { "id": "refined-stuff" }, - "outputs": [], - "source": [ - "mask = \n", - "matrix = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0. 12.86127131 11.22168971 7.21720825 10.47671893]\n", + " [11.96135823 0. 13.14356861 9.90337982 12.11014366]\n", + " [13.13628799 7.28890965 0. 7.23622246 11.15415884]\n", + " [ 9.35751857 10.12510034 9.94678946 0. 10.46373431]\n", + " [13.25006165 9.11060909 9.34040538 9.6598091 0. ]]\n" + ] + } + ], + "source": [ + "mask = np.random.normal(10, 2, (5, 5))\n", + "di = np.diag_indices(5)\n", + "matrix = mask\n", + "mask[di] = 0\n", "\n", "print(matrix)" ] }, { "cell_type": "markdown", + "id": "b6846e14", "metadata": { "id": "quiet-complement" }, "source": [ - "13. Задание\n", + "## 13. Задание\n", "- дано два массива, проверить одинаковы ли они" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 102, + "id": "1164d346", "metadata": { "id": "french-fighter" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0 1 1 1 0]\n", + "[1 0 1 1 0]\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "a = np.random.randint(0,2,5)\n", "print(a)\n", "b = np.random.randint(0,2,5)\n", "print(b)\n", - "equal = \n", + "equal = np.array_equal(a,b)\n", "equal" ] }, { "cell_type": "markdown", + "id": "ea136ed8", "metadata": { "id": "color-amplifier" }, @@ -347,23 +558,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 125, + "id": "f418b5cc", "metadata": { "id": "close-daisy" }, - "outputs": [], - "source": [ - "r, c = \n", - "a = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.09276302 0.0772502 0.53037219 0.86408608]\n", + " [0.29126194 0.60865055 0.5691283 0.72585249]\n", + " [0.93778753 0.65489803 0.54313851 0.91065248]\n", + " [0.76604798 0.19713809 0.97469657 0.70819056]]\n", + "7\n", + "[0.72585249 0.60865055 0.86408608 0.97469657 0.09276302 0.86408608\n", + " 0.76604798]\n" + ] + } + ], + "source": [ + "r, c = np.random.randint(3,7), np.random.randint(2,12)\n", + "a = np.random.sample((r,c))\n", "print(a)\n", - "N = \n", + "N = np.random.randint(0, r*c/2) \n", "print(N)\n", - "sample = \n", + "sample = np.random.choice(a.reshape(r*c), N)\n", "print(sample)" ] }, { "cell_type": "markdown", + "id": "2100ddd2", "metadata": { "id": "patent-african" }, @@ -376,20 +603,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 132, + "id": "3b326a30", "metadata": { "id": "taken-fabric" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "is it NaN? [False True False]\n", + "is it Infinity? [False False True]\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 1., nan, inf])" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "a = np.array([1, np.NaN, np.Inf], float)\n", - "\n", - "\n", + "print('is it NaN? ', np.isnan(a))\n", + "print('is it Infinity? ', np.isinf(a))\n", "a" ] }, { "cell_type": "markdown", + "id": "d2f060f1", "metadata": { "id": "analyzed-ireland" }, @@ -401,20 +649,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 155, + "id": "9fc20263", "metadata": { "id": "imposed-digest" }, - "outputs": [], - "source": [ - "axis = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6\n", + "6\n" + ] + } + ], + "source": [ + "axis = np.random.randint(0, 10)\n", "print(axis)\n", - "matrix = \n", - "print(...)" + "matrix = np.arange(axis**axis).reshape(*[axis]*axis)\n", + "print(matrix.ndim)" ] }, { "cell_type": "markdown", + "id": "33ad782e", "metadata": { "id": "regulation-colleague" }, @@ -427,18 +686,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 145, + "id": "d2d05ada", "metadata": { "id": "concerned-anthropology" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[59.50722007 55.64882258 43.601435 ]\n", + " [56.64101752 58.81946836 40.94583216]\n", + " [69.44451729 55.30175182 54.80710386]\n", + " [42.44817153 57.14702188 32.49550562]\n", + " [47.76582561 59.55932534 57.05173243]\n", + " [43.96263667 55.42736882 56.24908581]\n", + " [61.1270066 59.481786 50.71685145]\n", + " [45.8277303 49.38359947 45.56134551]\n", + " [60.70464341 60.40841661 63.36467888]\n", + " [54.93177829 45.39530205 56.13291731]]\n", + "[0 1 0 1 1 2 0 1 2 2]\n", + "[59.50722007 58.81946836 69.44451729 57.14702188 59.55932534 56.24908581\n", + " 61.1270066 49.38359947 63.36467888 56.13291731]\n" + ] + } + ], "source": [ "matrix = np.random.normal(50, 10, (10,3))\n", "print(matrix)\n", - "indexes = \n", + "indexes = matrix.argmax(axis = 1)\n", "print(indexes)\n", - "print(...)" + "print(matrix.max(axis=1))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf735928", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -462,7 +750,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/Pandas (06.03)/Pandas. Task. Part 1.ipynb b/Pandas (06.03)/Pandas. Task. Part 1.ipynb index 5172e85..a22e985 100644 --- a/Pandas (06.03)/Pandas. Task. Part 1.ipynb +++ b/Pandas (06.03)/Pandas. Task. Part 1.ipynb @@ -1 +1,1274 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"anaconda-cloud":{},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.6"},"colab":{"name":"01_task_pandas.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"UTKVH3sMutTM"},"source":["**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**"]},{"cell_type":"markdown","metadata":{"id":"3lUT-CqYutTO"},"source":["Уникальные значения признаков (больше информации по ссылке выше):\n","- age: continuous.\n","- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n","- fnlwgt: continuous.\n","- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n","- education-num: continuous.\n","- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n","- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n","- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n","- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n","- sex: Female, Male.\n","- capital-gain: continuous.\n","- capital-loss: continuous.\n","- hours-per-week: continuous.\n","- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n","- salary: >50K,<=50K"]},{"cell_type":"code","metadata":{"id":"6GzulHvOutTR"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"SJ3LbaoiutTT","colab":{"base_uri":"https://localhost:8080/","height":380},"executionInfo":{"status":"ok","timestamp":1626441443051,"user_tz":-300,"elapsed":499,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"eab110b9-0f5f-4bcd-db91-328a0b391379"},"source":["data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n","data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n","
"],"text/plain":[" age workclass fnlwgt ... hours-per-week native-country salary\n","0 39 State-gov 77516 ... 40 United-States <=50K\n","1 50 Self-emp-not-inc 83311 ... 13 United-States <=50K\n","2 38 Private 215646 ... 40 United-States <=50K\n","3 53 Private 234721 ... 40 United-States <=50K\n","4 28 Private 338409 ... 40 Cuba <=50K\n","\n","[5 rows x 15 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"EpQFv8t1ds05"},"source":["# def married(row):\n","# return \"Married\" in row\n","data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":756},"id":"3Bb2mRTEeoJK","executionInfo":{"status":"ok","timestamp":1626441731759,"user_tz":-300,"elapsed":481,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"9dd7d83b-f51a-4e11-f6dc-035a844f81c9"},"source":["data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n","

32561 rows × 16 columns

\n","
"],"text/plain":[" age workclass fnlwgt ... native-country salary married\n","0 39 State-gov 77516 ... United-States <=50K False\n","1 50 Self-emp-not-inc 83311 ... United-States <=50K True\n","2 38 Private 215646 ... United-States <=50K False\n","3 53 Private 234721 ... United-States <=50K True\n","4 28 Private 338409 ... Cuba <=50K True\n","... ... ... ... ... ... ... ...\n","32556 27 Private 257302 ... United-States <=50K True\n","32557 40 Private 154374 ... United-States >50K True\n","32558 58 Private 151910 ... United-States <=50K False\n","32559 22 Private 201490 ... United-States <=50K False\n","32560 52 Self-emp-inc 287927 ... United-States >50K True\n","\n","[32561 rows x 16 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"MoK8B5fIutTW"},"source":["**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"hdzky90TutTY"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"adF8lgVbutTZ"},"source":["**2. Каков средний возраст (признак *age*) женщин?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"K6C2qZ_zutTb"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"-Cz1S7-HutTd"},"source":["**3. Какова доля граждан Германии (признак *native-country*)?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"Y4mmqN6outTf"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Do-rEgaautTg"},"source":["**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год? **"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eSuk0CAnutTh"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rK9SwvI_utTj"},"source":["**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eygYabkdutTj"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4DqPASEsutTk"},"source":["**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"fYkBDZMdutTl"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"cn-jYXhzutTl"},"source":["**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"4hIQXgGAutTm"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Rsh8YvoXutTm"},"source":["**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"RK1JQSIZutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"kUXV84AjutTn"},"source":["**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"3gzYG3CDutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "UTKVH3sMutTM" + }, + "source": [ + "**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3lUT-CqYutTO" + }, + "source": [ + "Уникальные значения признаков (больше информации по ссылке выше):\n", + "- age: continuous.\n", + "- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n", + "- fnlwgt: continuous.\n", + "- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n", + "- education-num: continuous.\n", + "- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n", + "- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n", + "- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n", + "- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n", + "- sex: Female, Male.\n", + "- capital-gain: continuous.\n", + "- capital-loss: continuous.\n", + "- hours-per-week: continuous.\n", + "- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n", + "- salary: >50K,<=50K" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "6GzulHvOutTR" + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 380 + }, + "executionInfo": { + "elapsed": 499, + "status": "ok", + "timestamp": 1626441443051, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "SJ3LbaoiutTT", + "outputId": "eab110b9-0f5f-4bcd-db91-328a0b391379" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education-num \\\n", + "0 39 State-gov 77516 Bachelors 13 \n", + "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", + "2 38 Private 215646 HS-grad 9 \n", + "3 53 Private 234721 11th 7 \n", + "4 28 Private 338409 Bachelors 13 \n", + "\n", + " marital-status occupation relationship race sex \\\n", + "0 Never-married Adm-clerical Not-in-family White Male \n", + "1 Married-civ-spouse Exec-managerial Husband White Male \n", + "2 Divorced Handlers-cleaners Not-in-family White Male \n", + "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", + "4 Married-civ-spouse Prof-specialty Wife Black Female \n", + "\n", + " capital-gain capital-loss hours-per-week native-country salary \n", + "0 2174 0 40 United-States <=50K \n", + "1 0 0 13 United-States <=50K \n", + "2 0 0 40 United-States <=50K \n", + "3 0 0 40 United-States <=50K \n", + "4 0 0 40 Cuba <=50K " + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": { + "id": "EpQFv8t1ds05" + }, + "outputs": [], + "source": [ + "# def married(row):\n", + "# return \"Married\" in row\n", + "data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row)" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 756 + }, + "executionInfo": { + "elapsed": 481, + "status": "ok", + "timestamp": 1626441731759, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "3Bb2mRTEeoJK", + "outputId": "9dd7d83b-f51a-4e11-f6dc-035a844f81c9" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n", + "

32561 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education-num \\\n", + "0 39 State-gov 77516 Bachelors 13 \n", + "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", + "2 38 Private 215646 HS-grad 9 \n", + "3 53 Private 234721 11th 7 \n", + "4 28 Private 338409 Bachelors 13 \n", + "... ... ... ... ... ... \n", + "32556 27 Private 257302 Assoc-acdm 12 \n", + "32557 40 Private 154374 HS-grad 9 \n", + "32558 58 Private 151910 HS-grad 9 \n", + "32559 22 Private 201490 HS-grad 9 \n", + "32560 52 Self-emp-inc 287927 HS-grad 9 \n", + "\n", + " marital-status occupation relationship race sex \\\n", + "0 Never-married Adm-clerical Not-in-family White Male \n", + "1 Married-civ-spouse Exec-managerial Husband White Male \n", + "2 Divorced Handlers-cleaners Not-in-family White Male \n", + "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", + "4 Married-civ-spouse Prof-specialty Wife Black Female \n", + "... ... ... ... ... ... \n", + "32556 Married-civ-spouse Tech-support Wife White Female \n", + "32557 Married-civ-spouse Machine-op-inspct Husband White Male \n", + "32558 Widowed Adm-clerical Unmarried White Female \n", + "32559 Never-married Adm-clerical Own-child White Male \n", + "32560 Married-civ-spouse Exec-managerial Wife White Female \n", + "\n", + " capital-gain capital-loss hours-per-week native-country salary \\\n", + "0 2174 0 40 United-States <=50K \n", + "1 0 0 13 United-States <=50K \n", + "2 0 0 40 United-States <=50K \n", + "3 0 0 40 United-States <=50K \n", + "4 0 0 40 Cuba <=50K \n", + "... ... ... ... ... ... \n", + "32556 0 0 38 United-States <=50K \n", + "32557 0 0 40 United-States >50K \n", + "32558 0 0 40 United-States <=50K \n", + "32559 0 0 20 United-States <=50K \n", + "32560 15024 0 40 United-States >50K \n", + "\n", + " married \n", + "0 False \n", + "1 True \n", + "2 False \n", + "3 True \n", + "4 True \n", + "... ... \n", + "32556 True \n", + "32557 True \n", + "32558 False \n", + "32559 False \n", + "32560 True \n", + "\n", + "[32561 rows x 16 columns]" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MoK8B5fIutTW" + }, + "source": [ + "**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "hdzky90TutTY" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Male 21790\n", + "Female 10771\n", + "Name: sex, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['sex'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "adF8lgVbutTZ" + }, + "source": [ + "**2. Каков средний возраст (признак *age*) женщин?**" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "K6C2qZ_zutTb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "count 10771.000000\n", + "mean 36.858230\n", + "std 14.013697\n", + "min 17.000000\n", + "25% 25.000000\n", + "50% 35.000000\n", + "75% 46.000000\n", + "max 90.000000\n", + "Name: age, dtype: float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data['sex'] == 'Female']['age'].describe()\n", + "#Смотрим соответственно в строке \"mean\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-Cz1S7-HutTd" + }, + "source": [ + "**3. Какова доля граждан Германии (признак *native-country*)?**" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "id": "Y4mmqN6outTf" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Доля граждан Германии в исследовании: 0.004\n" + ] + } + ], + "source": [ + "part_german = data[data['native-country'] == 'Germany'].shape[0] / data.shape[0]\n", + "print('Доля граждан Германии в исследовании:', round(part_german, 3))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Do-rEgaautTg" + }, + "source": [ + "**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год? **" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "id": "eSuk0CAnutTh" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
salary
<=50K24720.036.78373814.02008817.025.034.046.090.0
>50K7841.044.24984110.51902819.036.044.051.090.0
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% 75% max\n", + "salary \n", + "<=50K 24720.0 36.783738 14.020088 17.0 25.0 34.0 46.0 90.0\n", + ">50K 7841.0 44.249841 10.519028 19.0 36.0 44.0 51.0 90.0" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#средние значения и среднеквадратичные отклонения смотрим в столбцах \"mean\" и \"std\"\n", + "data.groupby('salary')['age'].describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rK9SwvI_utTj" + }, + "source": [ + "**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": { + "id": "eygYabkdutTj" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.5783701058538452" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Сначала выберем из датасета только людей с доходом более 50.000 в год.\n", + "#Проверим имеют ли люди из выборки высшее образование и посчитаем среднее значение \n", + "#Метод isin имеет значения True и False. После применения mean() значения преобразуются к 0 и 1.\n", + "data[data['salary'] == '>50K'].education.isin(['Bachelors','Prof-school',\n", + " 'Assoc-acdm','Assoc-voc','Masters','Doctorate']).mean()\n", + "#Получается, что 58% людей с доходом 50.000+ имеют высшее образование. Корреляция довольно-таки слабая." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4DqPASEsutTk" + }, + "source": [ + "**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "id": "fYkBDZMdutTl" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
racesex
Amer-Indian-EskimoFemale119.037.11764713.11499117.027.036.046.0080.0
Male192.037.20833312.04956317.028.035.045.0082.0
Asian-Pac-IslanderFemale346.035.08959512.30084517.025.033.043.7575.0
Male693.039.07359312.88394418.029.037.046.0090.0
BlackFemale1555.037.85401912.63719717.028.037.046.0090.0
Male1569.037.68260012.88261217.027.036.046.0090.0
OtherFemale109.031.67889911.63159917.023.029.039.0074.0
Male162.034.65432111.35553117.026.032.042.0077.0
WhiteFemale8642.036.81161814.32909317.025.035.046.0090.0
Male19174.039.65249813.43602917.029.038.049.0090.0
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% \\\n", + "race sex \n", + "Amer-Indian-Eskimo Female 119.0 37.117647 13.114991 17.0 27.0 36.0 \n", + " Male 192.0 37.208333 12.049563 17.0 28.0 35.0 \n", + "Asian-Pac-Islander Female 346.0 35.089595 12.300845 17.0 25.0 33.0 \n", + " Male 693.0 39.073593 12.883944 18.0 29.0 37.0 \n", + "Black Female 1555.0 37.854019 12.637197 17.0 28.0 37.0 \n", + " Male 1569.0 37.682600 12.882612 17.0 27.0 36.0 \n", + "Other Female 109.0 31.678899 11.631599 17.0 23.0 29.0 \n", + " Male 162.0 34.654321 11.355531 17.0 26.0 32.0 \n", + "White Female 8642.0 36.811618 14.329093 17.0 25.0 35.0 \n", + " Male 19174.0 39.652498 13.436029 17.0 29.0 38.0 \n", + "\n", + " 75% max \n", + "race sex \n", + "Amer-Indian-Eskimo Female 46.00 80.0 \n", + " Male 45.00 82.0 \n", + "Asian-Pac-Islander Female 43.75 75.0 \n", + " Male 46.00 90.0 \n", + "Black Female 46.00 90.0 \n", + " Male 46.00 90.0 \n", + "Other Female 39.00 74.0 \n", + " Male 42.00 77.0 \n", + "White Female 46.00 90.0 \n", + " Male 49.00 90.0 " + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby(['race','sex'])['age'].describe()\n", + "#смотрим на пересечение столбцаа max и строки Amer-Indian-Eskimo - Male" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cn-jYXhzutTl" + }, + "source": [ + "**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**" + ] + }, + { + "cell_type": "code", + "execution_count": 229, + "metadata": { + "id": "4hIQXgGAutTm" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agefnlwgteducation-numcapital-gaincapital-losshours-per-weekyes
married
False32.484907198454.1579599.794399605.78264063.86640839.7199660.084495
True43.666568187700.43017510.2908201770.170076122.35536544.0778380.440514
\n", + "
" + ], + "text/plain": [ + " age fnlwgt education-num capital-gain capital-loss \\\n", + "married \n", + "False 32.484907 198454.157959 9.794399 605.782640 63.866408 \n", + "True 43.666568 187700.430175 10.290820 1770.170076 122.355365 \n", + "\n", + " hours-per-week yes \n", + "married \n", + "False 39.719966 0.084495 \n", + "True 44.077838 0.440514 " + ] + }, + "execution_count": 229, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#вместо булевого столбца salary создам столбец 'yes', в котором тру и фолз заменю на 0 и 1 (для удобства подсчета среднегоdat)\n", + "def bias(x):\n", + " if x == '>50K':\n", + " return 1\n", + " else:\n", + " return 0\n", + "\n", + "data['yes'] = data['salary'].apply(bias)\n", + "data[data['sex']=='Male'].groupby(['married']).mean()\n", + "#как мы видим, среди женатых мужчин доля, тех кто зарабатывает больше 50К, равна 0.44\n", + "#среди тех, кто зарабатывает меньше, она равна 0.08" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rsh8YvoXutTm" + }, + "source": [ + "**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**" + ] + }, + { + "cell_type": "code", + "execution_count": 228, + "metadata": { + "id": "RK1JQSIZutTn" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "maximal work-time per week: 99\n", + "how many people work for this time: 85\n", + "How many those persons earn more than 50K$ per year: 29.4 %\n" + ] + } + ], + "source": [ + "max_hour = data['hours-per-week'].max()\n", + "print('maximal work-time per week:', max_hour)\n", + "print('how many people work for this time: ',data[data['hours-per-week'] == max_hour].shape[0])\n", + "print('How many those persons earn more than 50K$ per year: ', \n", + " round(data[data['hours-per-week'] == max_hour].mean()[-1], 3) * 100,'%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kUXV84AjutTn" + }, + "source": [ + "**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**" + ] + }, + { + "cell_type": "code", + "execution_count": 233, + "metadata": { + "id": "3gzYG3CDutTn" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "native-country salary\n", + "? <=50K 40.164760\n", + " >50K 45.547945\n", + "Cambodia <=50K 41.416667\n", + " >50K 40.000000\n", + "Canada <=50K 37.914634\n", + " ... \n", + "United-States >50K 45.505369\n", + "Vietnam <=50K 37.193548\n", + " >50K 39.200000\n", + "Yugoslavia <=50K 41.600000\n", + " >50K 49.500000\n", + "Name: hours-per-week, Length: 82, dtype: float64" + ] + }, + "execution_count": 233, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby(['native-country','salary']).mean()['hours-per-week']\n", + "#В итоге мы имеем 82 записи - правый столбец содержит среднее время работы" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "collapsed_sections": [], + "name": "01_task_pandas.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/Pandas (06.03)/Pandas. Task. Part 2.ipynb b/Pandas (06.03)/Pandas. Task. Part 2.ipynb index bb60a1c..855d6ee 100644 --- a/Pandas (06.03)/Pandas. Task. Part 2.ipynb +++ b/Pandas (06.03)/Pandas. Task. Part 2.ipynb @@ -1 +1,737 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.8"},"colab":{"name":"02_pandas_task.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"EmV0s8YY05p7"},"source":["- __ID__ - Unique number for each athlete\n","- __Name__ - Athlete's name\n","- __Sex__ - M or F\n","- __Age__ - Integer\n","- __Height__ - In centimeters\n","- __Weight__ - In kilograms\n","- __Team__ - Team name\n","- __NOC__ - National Olympic Committee 3-letter code\n","- __Games__ - Year and season\n","- __Year__ - Integer\n","- __Season__ - Summer or Winter\n","- __City__ - Host city\n","- __Sport__ - Sport\n","- __Event__ - Event\n","- __Medal__ - Gold, Silver, Bronze, or NA"]},{"cell_type":"code","metadata":{"id":"rVCrMDMh05p_"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"D5Q4Z-JW05qC"},"source":["# не меняем путь!\n","PATH = 'https://github.com/aksenov7/Kaggle_competition_group/blob/master/athlete_events.csv.zip?raw=true'"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mI0LtqkY4Kp-"},"source":["__0. Откройте файл используя необходимые параметры и не меняя переменную PATH__"]},{"cell_type":"code","metadata":{"id":"h5SQwBLr05qG","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1615627554682,"user_tz":-300,"elapsed":2477,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"882f9e83-5fd7-4c3b-b005-56917b15a0fd"},"source":["data = \n","data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
IDNameSexAgeHeightWeightTeamNOCGamesYearSeasonCitySportEventMedal
01A DijiangM24.0180.080.0ChinaCHN1992 Summer1992SummerBarcelonaBasketballBasketball Men's BasketballNaN
12A LamusiM23.0170.060.0ChinaCHN2012 Summer2012SummerLondonJudoJudo Men's Extra-LightweightNaN
23Gunnar Nielsen AabyM24.0NaNNaNDenmarkDEN1920 Summer1920SummerAntwerpenFootballFootball Men's FootballNaN
34Edgar Lindenau AabyeM34.0NaNNaNDenmark/SwedenDEN1900 Summer1900SummerParisTug-Of-WarTug-Of-War Men's Tug-Of-WarGold
45Christine Jacoba AaftinkF21.0185.082.0NetherlandsNED1988 Winter1988WinterCalgarySpeed SkatingSpeed Skating Women's 500 metresNaN
\n","
"],"text/plain":[" ID Name ... Event Medal\n","0 1 A Dijiang ... Basketball Men's Basketball NaN\n","1 2 A Lamusi ... Judo Men's Extra-Lightweight NaN\n","2 3 Gunnar Nielsen Aaby ... Football Men's Football NaN\n","3 4 Edgar Lindenau Aabye ... Tug-Of-War Men's Tug-Of-War Gold\n","4 5 Christine Jacoba Aaftink ... Speed Skating Women's 500 metres NaN\n","\n","[5 rows x 15 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"stYR4EbV05qP"},"source":["__1. Сколько лет было самым молодым мужчинам и женщинам-участникам Олимпийских игр 1992 года ?__\n","- 16 и 15\n","- 14 и 13 \n","- 13 и 11\n","- 11 и 12"]},{"cell_type":"code","metadata":{"id":"HgiqBXtb05qR"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GQ290dsi05qc"},"source":["__2. Каков был процент баскетболистов-мужчин среди всех мужчин-участников Олимпийских игр 2012 года? Округлите ответ до первого десятичного знака.__\n","\n","Здесь и далее при необходимости отбрасывайте дублированных спортсменов, чтобы считать только уникальных . \n","- 0.2\n","- 1.5 \n","- 2.5\n","- 7.7"]},{"cell_type":"code","metadata":{"id":"-fI5MqWP05qi"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"u5WrTgIC05qv"},"source":["__3. Каковы среднее и стандартное отклонение роста теннисисток, участвовавших в Олимпийских играх 2000 года? Округлите ответ до первого десятичного знака.__\n","\n","- 171.8 и 6.5\n","- 179.4 и 10\n","- 180.7 и 6.7\n","- 182.4 и 9.1 "]},{"cell_type":"code","metadata":{"id":"vsKTqn6405qw"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"xOOEzhNQ05qy"},"source":["__4. Найдите спортсмена, который участвовал в Олимпийских играх 2006 года, с наибольшим весом среди других участников той же Олимпиады. Каким спортом он или она занимался?__\n","\n","- Judo\n","- Bobsleigh \n","- Skeleton\n","- Boxing"]},{"cell_type":"code","metadata":{"id":"EkWD1Tnb05qz"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"UQzxZ3HT05q0"},"source":["__5. Сколько раз John Aalberg участвовал в Олимпийских играх в разные годы?__\n","\n","Один год - это один раз. Неважно сколько участий внутри одного года\n","- 0\n","- 1 \n","- 2\n","- 3 "]},{"cell_type":"code","metadata":{"id":"ZSfkdjPO05q0"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"8EnLcNrk05q3"},"source":["__6. Сколько золотых медалей по теннису выиграли спортсмены сборной Switzerland на Олимпиаде-2008? Считайте каждую медаль от каждого спортсмена.__\n","\n","- 0\n","- 1 \n","- 2\n","- 3 "]},{"cell_type":"code","metadata":{"id":"Y754OGI-05q3"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"v3h5sQF805q5"},"source":["__7. Правда ли, что на Олимпийских играх 2016 Spain выиграла меньше медалей, чем Италия?__ \n","\n","- Да\n","- Нет"]},{"cell_type":"code","metadata":{"id":"gqJqDi2605q7"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"kkSYL5mK05q-"},"source":["__8. К какой возрастной категории принадлежало наименьшее и наибольшее количество участников Олимпиады-2008?__\n","\n","- [45-55] и [25-35) соответственно\n","- [45-55] и [15-25) соответственно\n","- [35-45) и [25-35) соответственно\n","- [45-55] и [35-45) соответственно"]},{"cell_type":"code","metadata":{"id":"pMAQtW7i05q_"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"JQmJPiXv05rB"},"source":["__9. Правда ли, что в Atlanta проводились летние Олимпийские игры? Правда ли, что в Squaw Valley проводились зимние Олимпийские игры? ?__\n","\n","- Да, Да\n","- Да, Нет\n","- Нет, Да \n","- Нет, Нет "]},{"cell_type":"code","metadata":{"id":"UU66wRHC05rB"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4hxR5D-t05rF"},"source":["__10. Какова абсолютная разница между количеством уникальных видов спорта на Олимпиаде 1986 года и Олимпиаде 2002 года?__\n","\n","- 3 \n","- 10\n","- 15\n","- 27 "]},{"cell_type":"code","metadata":{"id":"WKIr-TR105rF"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "EmV0s8YY05p7" + }, + "source": [ + "- __ID__ - Unique number for each athlete\n", + "- __Name__ - Athlete's name\n", + "- __Sex__ - M or F\n", + "- __Age__ - Integer\n", + "- __Height__ - In centimeters\n", + "- __Weight__ - In kilograms\n", + "- __Team__ - Team name\n", + "- __NOC__ - National Olympic Committee 3-letter code\n", + "- __Games__ - Year and season\n", + "- __Year__ - Integer\n", + "- __Season__ - Summer or Winter\n", + "- __City__ - Host city\n", + "- __Sport__ - Sport\n", + "- __Event__ - Event\n", + "- __Medal__ - Gold, Silver, Bronze, or NA" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "rVCrMDMh05p_" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import zipfile" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "D5Q4Z-JW05qC" + }, + "outputs": [], + "source": [ + "# не меняем путь!\n", + "PATH = 'https://github.com/aksenov7/Kaggle_competition_group/blob/master/athlete_events.csv.zip?raw=true'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mI0LtqkY4Kp-" + }, + "source": [ + "__0. Откройте файл используя необходимые параметры и не меняя переменную PATH__" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "executionInfo": { + "elapsed": 2477, + "status": "ok", + "timestamp": 1615627554682, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "h5SQwBLr05qG", + "outputId": "882f9e83-5fd7-4c3b-b005-56917b15a0fd" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDNameSexAgeHeightWeightTeamNOCGamesYearSeasonCitySportEventMedal
01A DijiangM24.0180.080.0ChinaCHN1992 Summer1992SummerBarcelonaBasketballBasketball Men's BasketballNaN
12A LamusiM23.0170.060.0ChinaCHN2012 Summer2012SummerLondonJudoJudo Men's Extra-LightweightNaN
23Gunnar Nielsen AabyM24.0NaNNaNDenmarkDEN1920 Summer1920SummerAntwerpenFootballFootball Men's FootballNaN
34Edgar Lindenau AabyeM34.0NaNNaNDenmark/SwedenDEN1900 Summer1900SummerParisTug-Of-WarTug-Of-War Men's Tug-Of-WarGold
45Christine Jacoba AaftinkF21.0185.082.0NetherlandsNED1988 Winter1988WinterCalgarySpeed SkatingSpeed Skating Women's 500 metresNaN
\n", + "
" + ], + "text/plain": [ + " ID Name Sex Age Height Weight Team \\\n", + "0 1 A Dijiang M 24.0 180.0 80.0 China \n", + "1 2 A Lamusi M 23.0 170.0 60.0 China \n", + "2 3 Gunnar Nielsen Aaby M 24.0 NaN NaN Denmark \n", + "3 4 Edgar Lindenau Aabye M 34.0 NaN NaN Denmark/Sweden \n", + "4 5 Christine Jacoba Aaftink F 21.0 185.0 82.0 Netherlands \n", + "\n", + " NOC Games Year Season City Sport \\\n", + "0 CHN 1992 Summer 1992 Summer Barcelona Basketball \n", + "1 CHN 2012 Summer 2012 Summer London Judo \n", + "2 DEN 1920 Summer 1920 Summer Antwerpen Football \n", + "3 DEN 1900 Summer 1900 Summer Paris Tug-Of-War \n", + "4 NED 1988 Winter 1988 Winter Calgary Speed Skating \n", + "\n", + " Event Medal \n", + "0 Basketball Men's Basketball NaN \n", + "1 Judo Men's Extra-Lightweight NaN \n", + "2 Football Men's Football NaN \n", + "3 Tug-Of-War Men's Tug-Of-War Gold \n", + "4 Speed Skating Women's 500 metres NaN " + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(PATH, compression='zip', sep=',')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "stYR4EbV05qP" + }, + "source": [ + "__1. Сколько лет было самым молодым мужчинам и женщинам-участникам Олимпийских игр 1992 года ?__\n", + "- 16 и 15\n", + "- 14 и 13 \n", + "- 13 и 11\n", + "- 11 и 12" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "HgiqBXtb05qR" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Sex\n", + "F 12.0\n", + "M 11.0\n", + "Name: Age, dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Результаты прямо скажем удивительные. Не подумал бы что совсем дети могут участвовать в играх.\n", + "data[data['Year']==1992].groupby('Sex')['Age'].min()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GQ290dsi05qc" + }, + "source": [ + "__2. Каков был процент баскетболистов-мужчин среди всех мужчин-участников Олимпийских игр 2012 года? Округлите ответ до первого десятичного знака.__\n", + "\n", + "Здесь и далее при необходимости отбрасывайте дублированных спортсменов, чтобы считать только уникальных . \n", + "- 0.2\n", + "- 1.5 \n", + "- 2.5\n", + "- 7.7" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": { + "id": "-fI5MqWP05qi" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2.5" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = data[(data['Sex']=='M')&(data['Sport']=='Basketball')&(data['Year']==2012)]['Name'].nunique()\n", + "b = data[(data['Sex']=='M')&(data['Year']==2012)]['Name'].nunique()\n", + "round(a/b*100,1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u5WrTgIC05qv" + }, + "source": [ + "__3. Каковы среднее и стандартное отклонение роста теннисисток, участвовавших в Олимпийских играх 2000 года? Округлите ответ до первого десятичного знака.__\n", + "\n", + "- 171.8 и 6.5\n", + "- 179.4 и 10\n", + "- 180.7 и 6.7\n", + "- 182.4 и 9.1 " + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "id": "vsKTqn6405qw" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "171.8\n", + "6.5\n" + ] + } + ], + "source": [ + "a = data[(data['Sex']=='F')&(data['Year']==2000)&(data['Sport']=='Tennis')].drop_duplicates()['Height']\n", + "print(round(a.mean(),1))\n", + "print(round(a.std(),1))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xOOEzhNQ05qy" + }, + "source": [ + "__4. Найдите спортсмена, который участвовал в Олимпийских играх 2006 года, с наибольшим весом среди других участников той же Олимпиады. Каким спортом он или она занимался?__\n", + "\n", + "- Judo\n", + "- Bobsleigh \n", + "- Skeleton\n", + "- Boxing" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "id": "EkWD1Tnb05qz" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDNameSexAgeHeightWeightTeamNOCGamesYearSeasonCitySportEventMedal
81024476Patrick R. AntakiM41.0185.0127.0LebanonLIB2006 Winter2006WinterTorinoSkeletonSkeleton Men's SkeletonNaN
\n", + "
" + ], + "text/plain": [ + " ID Name Sex Age Height Weight Team NOC \\\n", + "8102 4476 Patrick R. Antaki M 41.0 185.0 127.0 Lebanon LIB \n", + "\n", + " Games Year Season City Sport Event \\\n", + "8102 2006 Winter 2006 Winter Torino Skeleton Skeleton Men's Skeleton \n", + "\n", + " Medal \n", + "8102 NaN " + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = data[(data['Year']==2006)]['Weight'].max()\n", + "data[(data['Weight']==a)&(data['Year']==2006)]\n", + "#Господин Patrick R. Antaki занимался скелетоном" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UQzxZ3HT05q0" + }, + "source": [ + "__5. Сколько раз John Aalberg участвовал в Олимпийских играх в разные годы?__\n", + "\n", + "Один год - это один раз. Неважно сколько участий внутри одного года\n", + "- 0\n", + "- 1 \n", + "- 2\n", + "- 3 " + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": { + "id": "ZSfkdjPO05q0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data['Name']=='John Aalberg']['Year'].nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8EnLcNrk05q3" + }, + "source": [ + "__6. Сколько золотых медалей по теннису выиграли спортсмены сборной Switzerland на Олимпиаде-2008? Считайте каждую медаль от каждого спортсмена.__\n", + "\n", + "- 0\n", + "- 1 \n", + "- 2\n", + "- 3 " + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": { + "id": "Y754OGI-05q3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[(data['Team']=='Switzerland')&(data['Year']==2008)&(data['Medal']=='Gold')].shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v3h5sQF805q5" + }, + "source": [ + "__7. Правда ли, что на Олимпийских играх 2016 Spain выиграла меньше медалей, чем Италия?__ \n", + "\n", + "- Да\n", + "- Нет" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": { + "id": "gqJqDi2605q7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Team Medal \n", + "Italy Silver 38\n", + " Bronze 24\n", + " Gold 8\n", + "Spain Silver 19\n", + " Bronze 17\n", + " Gold 7\n", + "Name: Medal, dtype: int64" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[(data['Team'].isin(['Spain','Italy']))&(data['Year']==2016)].groupby(['Team'])['Medal'].value_counts()\n", + "#Мы видим, что итальянцы взяли больше медалей" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kkSYL5mK05q-" + }, + "source": [ + "__8. К какой возрастной категории принадлежало наименьшее и наибольшее количество участников Олимпиады-2008?__\n", + "\n", + "- [45-55] и [25-35) соответственно\n", + "- [45-55] и [15-25) соответственно\n", + "- [35-45) и [25-35) соответственно\n", + "- [45-55] и [35-45) соответственно" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[25-35) 6367\n", + "[15-25) 6294\n", + "[35-45) 790\n", + "[45-55] 119\n", + "Name: cat, dtype: int64" + ] + }, + "execution_count": 159, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def categories(a):\n", + " if a in range(15,25):\n", + " return '[15-25)'\n", + " elif a in range(25,35):\n", + " return '[25-35)'\n", + " elif a in range(35,45):\n", + " return '[35-45)'\n", + " elif a in range(45,56):\n", + " return '[45-55]'\n", + " \n", + "data['cat'] = data['Age'].apply(categories)\n", + "data[data['Year']==2008]['cat'].value_counts()\n", + "#Видим, что категория [25-35) самая многочисленная, а [45-55] самая малочисленная" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JQmJPiXv05rB" + }, + "source": [ + "__9. Правда ли, что в Atlanta проводились летние Олимпийские игры? Правда ли, что в Squaw Valley проводились зимние Олимпийские игры? ?__\n", + "\n", + "- Да, Да\n", + "- Да, Нет\n", + "- Нет, Да \n", + "- Нет, Нет " + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": { + "id": "UU66wRHC05rB" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Atlanta: 1996 Summer\n", + "Squaw Valley: 1960 Winter\n" + ] + } + ], + "source": [ + "print('Atlanta:', data[data['City']=='Atlanta']['Games'].unique()[0])\n", + "print('Squaw Valley:', data[data['City']=='Squaw Valley']['Games'].unique()[0])\n", + "#Как видно, в Атланте проводились летние игры (в 1996), а в Долине Скво проводилсь зимние игры (в 1960)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4hxR5D-t05rF" + }, + "source": [ + "__10. Какова абсолютная разница между количеством уникальных видов спорта на Олимпиаде 1986 года и Олимпиаде 2002 года?__\n", + "\n", + "- 3 \n", + "- 10\n", + "- 15\n", + "- 27 " + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": { + "id": "WKIr-TR105rF" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6\n" + ] + } + ], + "source": [ + "#Полагаю, что автор опечатался и имел ввиду игры 1896 года, потому что в 1986 году Олимпиада не проводилась\n", + "a = data[data['Year']==1896]['Sport'].unique().shape[0]\n", + "b = data[data['Year']==2002]['Sport'].unique().shape[0]\n", + "print(abs(a-b))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "02_pandas_task.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}