diff --git a/Pandas (06.03)/Pandas. Task. Part 1.ipynb b/Pandas (06.03)/Pandas. Task. Part 1.ipynb index 5172e85..4f4b836 100644 --- a/Pandas (06.03)/Pandas. Task. Part 1.ipynb +++ b/Pandas (06.03)/Pandas. Task. Part 1.ipynb @@ -1 +1,434 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"anaconda-cloud":{},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.6"},"colab":{"name":"01_task_pandas.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"UTKVH3sMutTM"},"source":["**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**"]},{"cell_type":"markdown","metadata":{"id":"3lUT-CqYutTO"},"source":["Уникальные значения признаков (больше информации по ссылке выше):\n","- age: continuous.\n","- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n","- fnlwgt: continuous.\n","- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n","- education-num: continuous.\n","- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n","- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n","- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n","- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n","- sex: Female, Male.\n","- capital-gain: continuous.\n","- capital-loss: continuous.\n","- hours-per-week: continuous.\n","- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n","- salary: >50K,<=50K"]},{"cell_type":"code","metadata":{"id":"6GzulHvOutTR"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"SJ3LbaoiutTT","colab":{"base_uri":"https://localhost:8080/","height":380},"executionInfo":{"status":"ok","timestamp":1626441443051,"user_tz":-300,"elapsed":499,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"eab110b9-0f5f-4bcd-db91-328a0b391379"},"source":["data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n","data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n","
"],"text/plain":[" age workclass fnlwgt ... hours-per-week native-country salary\n","0 39 State-gov 77516 ... 40 United-States <=50K\n","1 50 Self-emp-not-inc 83311 ... 13 United-States <=50K\n","2 38 Private 215646 ... 40 United-States <=50K\n","3 53 Private 234721 ... 40 United-States <=50K\n","4 28 Private 338409 ... 40 Cuba <=50K\n","\n","[5 rows x 15 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"EpQFv8t1ds05"},"source":["# def married(row):\n","# return \"Married\" in row\n","data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":756},"id":"3Bb2mRTEeoJK","executionInfo":{"status":"ok","timestamp":1626441731759,"user_tz":-300,"elapsed":481,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"9dd7d83b-f51a-4e11-f6dc-035a844f81c9"},"source":["data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n","

32561 rows × 16 columns

\n","
"],"text/plain":[" age workclass fnlwgt ... native-country salary married\n","0 39 State-gov 77516 ... United-States <=50K False\n","1 50 Self-emp-not-inc 83311 ... United-States <=50K True\n","2 38 Private 215646 ... United-States <=50K False\n","3 53 Private 234721 ... United-States <=50K True\n","4 28 Private 338409 ... Cuba <=50K True\n","... ... ... ... ... ... ... ...\n","32556 27 Private 257302 ... United-States <=50K True\n","32557 40 Private 154374 ... United-States >50K True\n","32558 58 Private 151910 ... United-States <=50K False\n","32559 22 Private 201490 ... United-States <=50K False\n","32560 52 Self-emp-inc 287927 ... United-States >50K True\n","\n","[32561 rows x 16 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"MoK8B5fIutTW"},"source":["**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"hdzky90TutTY"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"adF8lgVbutTZ"},"source":["**2. Каков средний возраст (признак *age*) женщин?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"K6C2qZ_zutTb"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"-Cz1S7-HutTd"},"source":["**3. Какова доля граждан Германии (признак *native-country*)?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"Y4mmqN6outTf"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Do-rEgaautTg"},"source":["**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год? **"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eSuk0CAnutTh"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rK9SwvI_utTj"},"source":["**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eygYabkdutTj"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4DqPASEsutTk"},"source":["**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"fYkBDZMdutTl"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"cn-jYXhzutTl"},"source":["**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"4hIQXgGAutTm"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Rsh8YvoXutTm"},"source":["**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"RK1JQSIZutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"kUXV84AjutTn"},"source":["**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"3gzYG3CDutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 27, + "id": "f28bf51b-9f96-40e4-a2e5-25680f516a78", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a3481be5-11ef-45cf-9a0c-a5fbc950ab14", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education-num \\\n", + "0 39 State-gov 77516 Bachelors 13 \n", + "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", + "2 38 Private 215646 HS-grad 9 \n", + "3 53 Private 234721 11th 7 \n", + "4 28 Private 338409 Bachelors 13 \n", + "\n", + " marital-status occupation relationship race sex \\\n", + "0 Never-married Adm-clerical Not-in-family White Male \n", + "1 Married-civ-spouse Exec-managerial Husband White Male \n", + "2 Divorced Handlers-cleaners Not-in-family White Male \n", + "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", + "4 Married-civ-spouse Prof-specialty Wife Black Female \n", + "\n", + " capital-gain capital-loss hours-per-week native-country salary \n", + "0 2174 0 40 United-States <=50K \n", + "1 0 0 13 United-States <=50K \n", + "2 0 0 40 United-States <=50K \n", + "3 0 0 40 United-States <=50K \n", + "4 0 0 40 Cuba <=50K " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "e082fd28-ed54-475b-aa96-e5604084c2b4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "количество мужчин 21790 количество женщин 10771 общее количество 32561\n" + ] + } + ], + "source": [ + "gender=data[\"sex\"]\n", + "male_count=len([i for i in gender if i==\"Male\"])\n", + "female_count=len([i for i in gender if i ==\"Female\"])\n", + "print(\"количество мужчин\",male_count,\"количество женщин\",female_count,\"общее количество\",male_count+female_count)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "66b27f98-5f56-4040-aa4e-0b2375b7311c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "средний возраст женщин 36.85823043357163\n" + ] + } + ], + "source": [ + "print(\"средний возраст женщин\",data.loc[data.sex==\"Female\"][\"age\"].mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "927c0a22-c417-44f4-9df7-d8fa534c1682", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "доля граждан Германии 0.42074874850281013\n" + ] + } + ], + "source": [ + "df=data.groupby(\"native-country\").size()/len(data)*100\n", + "print(\"доля граждан Германии\",df[\"Germany\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "f71b2ca2-bc5b-4320-b2cd-55c63f56c2df", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "средние значения возраста тех, кто получает меньше 50K либо 50K в год 36.78373786407767\n", + "средние значения возраста тех, кто получает больше 50K в год 44.24984058155847\n" + ] + } + ], + "source": [ + "df=data.groupby(\"salary\")[\"age\"].mean()\n", + "print(\"средние значения возраста тех, кто получает меньше 50K либо 50K в год\",df[\"<=50K\"])\n", + "print(\"средние значения возраста тех, кто получает больше 50K в год\",df[\">50K\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "461a6bb1-b34b-4b4f-9ec5-ed0064ee9ca5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "среднеквадратичные отклонение возраста тех, кто получает меньше 50K либо 50K в год 14.020088490824866\n", + "среднеквадратичные отклонения возраста тех, кто получает больше 50K в год 10.519027719851843\n" + ] + } + ], + "source": [ + "df=data.groupby(\"salary\")[\"age\"].std()\n", + "print(\"среднеквадратичные отклонение возраста тех, кто получает меньше 50K либо 50K в год\",df[\"<=50K\"])\n", + "print(\"среднеквадратичные отклонения возраста тех, кто получает больше 50K в год\",df[\">50K\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "ee92d27b-e556-44d7-a247-36969ea59a5c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? False\n" + ] + } + ], + "source": [ + "df=data[data[\"salary\"]==\">50K\"]\n", + "count_people=len(df[df[\"education\"]==\"Bachelors\"])+len(df[df[\"education\"]==\"Prof-school\"])+len(df[df[\"education\"]==\"Assoc-acdm\"])+len(df[df[\"education\"]==\"Assoc-voc\"])+len(df[df[\"education\"]==\"Masters\"])+len(df[df[\"education\"]==\"Doctorate\"])\n", + "print(\"Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование?\",len(df)==count_people)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "8f5ba28c-18a6-480b-a9f5-469d4ec98b75", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Максимальный возраст мужчин расы Amer-Indian-Eskimo. 82.0\n" + ] + } + ], + "source": [ + "df=data.groupby([\"race\",\"sex\"]).describe().loc['Amer-Indian-Eskimo'].loc['Male']\n", + "print(\"Максимальный возраст мужчин расы Amer-Indian-Eskimo.\",df[\"age\"][\"max\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "24200b3e-7288-4b78-8168-614fa16cfa63", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Доля среди женатых больше True\n" + ] + } + ], + "source": [ + "df=data[data[\"salary\"]==\">50K\"]\n", + "df=df[df[\"sex\"]==\"Male\"]\n", + "count_men=len(df[df[\"marital-status\"]==\"Married-civ-spouse\"])+len(df[df[\"marital-status\"]==\"Married-spouse-absent\"])+len(df[df[\"marital-status\"]==\"Married-AF-spouse\"])\n", + "print(\"Доля среди женатых больше\",count_men>len(df)-count_men)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "c735400b-4ef1-4573-8167-674910358732", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "максимальное время работы человека 99.0\n", + "кол-во людей работающих максимальное количество времени 85\n", + "процент зарабатывающих много 29.411764705882355\n" + ] + } + ], + "source": [ + "max_number=data[\"hours-per-week\"].describe()[\"max\"]\n", + "max_count=len(data[data[\"hours-per-week\"]==max_number])\n", + "max_count_people=data[data[\"hours-per-week\"]==max_number]\n", + "max_count_rich=len(max_count_people[max_count_people[\"salary\"]==\">50K\"])\n", + "print(\"максимальное время работы человека\",max_number)\n", + "print(\"кол-во людей работающих максимальное количество времени\",max_count)\n", + "print(\"процент зарабатывающих много\",max_count_rich/max_count*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "3fbfdfd0-7326-4424-a843-b7b4faad292b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "среднее время работы зарабатывающих мало и много для каждой страны\n", + "native-country salary\n", + "? <=50K 40.164760\n", + " >50K 45.547945\n", + "Cambodia <=50K 41.416667\n", + " >50K 40.000000\n", + "Canada <=50K 37.914634\n", + " ... \n", + "United-States >50K 45.505369\n", + "Vietnam <=50K 37.193548\n", + " >50K 39.200000\n", + "Yugoslavia <=50K 41.600000\n", + " >50K 49.500000\n", + "Name: hours-per-week, Length: 82, dtype: float64\n" + ] + } + ], + "source": [ + "print(\"среднее время работы зарабатывающих мало и много для каждой страны\")\n", + "print(data.groupby(['native-country', 'salary'])['hours-per-week'].mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6cda6f7-acf7-4eaa-b781-b9fbd8cf74ab", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}