From 3469b7e8efb18f49b7285ca4fde4422875f38c29 Mon Sep 17 00:00:00 2001 From: ooonush Date: Sat, 5 Mar 2022 13:51:08 +0500 Subject: [PATCH 1/5] =?UTF-8?q?=D0=94=D0=97=20Numpy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Numpy (26.02)/Numpy_Task.ipynb | 434 ++++++++++++++++++++++++++------- 1 file changed, 351 insertions(+), 83 deletions(-) diff --git a/Numpy (26.02)/Numpy_Task.ipynb b/Numpy (26.02)/Numpy_Task.ipynb index 593ba20..1949baa 100644 --- a/Numpy (26.02)/Numpy_Task.ipynb +++ b/Numpy (26.02)/Numpy_Task.ipynb @@ -2,7 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, + "id": "feb7f6f2", "metadata": { "id": "medieval-detail" }, @@ -13,6 +14,7 @@ }, { "cell_type": "markdown", + "id": "4f80823f", "metadata": { "id": "abstract-istanbul" }, @@ -25,20 +27,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, + "id": "54d8a235", "metadata": { "id": "entertaining-automation" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + } + ], "source": [ "python_list = [1, 12, 13, 45, 76, 45, 98, 0]\n", - "print()\n", - "python_list = \n", - "print()" + "print(type(python_list))\n", + "python_list = np.array(python_list)\n", + "print(type(python_list))" ] }, { "cell_type": "markdown", + "id": "0f235321", "metadata": { "id": "loose-tobago" }, @@ -49,18 +62,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, + "id": "07b9bc7e", "metadata": { "id": "included-polymer" }, - "outputs": [], - "source": [ - "z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5]\n" + ] + } + ], + "source": [ + "z = np.full(10, 1.5)\n", "print(z)" ] }, { "cell_type": "markdown", + "id": "3b15fd7d", "metadata": { "id": "threatened-theme" }, @@ -71,18 +94,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, + "id": "963f2be0", "metadata": { "id": "alert-endorsement" }, - "outputs": [], - "source": [ - "z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]]\n" + ] + } + ], + "source": [ + "z = z = np.zeros((5, 5))\n", + "\n", "print(z)" ] }, { "cell_type": "markdown", + "id": "b4d9ed58", "metadata": { "id": "federal-blackberry" }, @@ -93,18 +131,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, + "id": "5be6c607", "metadata": { "id": "static-filing" }, - "outputs": [], - "source": [ - "ones = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n" + ] + } + ], + "source": [ + "ones = np.ones(12)\n", "print(ones)" ] }, { "cell_type": "markdown", + "id": "dd144105", "metadata": { "id": "whole-chassis" }, @@ -116,18 +164,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, + "id": "977bb23a", "metadata": { "id": "outstanding-deviation" }, - "outputs": [], - "source": [ - "ones = \n", + "outputs": [ + { + "data": { + "text/plain": [ + "(3, 4)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ones = ones.reshape(3,4)\n", "ones.shape" ] }, { "cell_type": "markdown", + "id": "117bb1bc", "metadata": { "id": "cubic-noise" }, @@ -139,20 +200,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, + "id": "1892658d", "metadata": { "id": "foster-memory" }, - "outputs": [], - "source": [ - "Z = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 1 2 3 4 5]\n", + " [ 6 7 8 9 10]\n", + " [11 12 13 14 15]\n", + " [16 17 18 19 20]]\n", + "[[ 1 2 3 4 5]\n", + " [ 6 7 8 9 10]\n", + " [ 11 12 13 -99 15]\n", + " [ 16 17 18 19 20]]\n" + ] + } + ], + "source": [ + "Z = np.arange(1, 21, 1).reshape(4, 5)\n", "print(Z)\n", - "\n", + "Z[2, 3] = -99\n", "print(Z)" ] }, { "cell_type": "markdown", + "id": "4dcf81f8", "metadata": { "id": "helpful-table" }, @@ -164,20 +242,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, + "id": "84b1215a", "metadata": { "id": "magnetic-leone" }, - "outputs": [], - "source": [ - "first = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ -3 -10 -10 10 2 4 -9 10 4 1 -7 7 3 -5 -3]\n", + "[ -3 -5 3 7 -7 1 4 10 -9 4 2 10 -10 -10 -3]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-10, 11, 15)\n", "print(first)\n", - "second = \n", + "second = first[::-1]\n", "print(second)" ] }, { "cell_type": "markdown", + "id": "0dbd17ef", "metadata": { "id": "executed-september" }, @@ -189,20 +278,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, + "id": "455f7019", "metadata": { "id": "pharmaceutical-sigma" }, - "outputs": [], - "source": [ - "first = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 2 -1 -15 -11 4]\n", + " [ 11 -5 -9 0 -13]\n", + " [ -7 1 -12 -12 -3]\n", + " [ 1 -7 -15 2 -9]\n", + " [ 0 3 -12 0 13]]\n", + "[[ 2 1 225 121 4]\n", + " [ 11 25 81 0 169]\n", + " [ 49 1 144 144 9]\n", + " [ 1 49 225 2 81]\n", + " [ 0 3 144 0 13]]\n" + ] + } + ], + "source": [ + "first = np.random.randint(-15, 16, 25).reshape(5, 5)\n", "print(first)\n", - "\n", + "first = np.where(first < 0, first**2, first)\n", "print(first)" ] }, { "cell_type": "markdown", + "id": "5d36be00", "metadata": { "id": "floral-difference" }, @@ -216,18 +324,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, + "id": "acb86da0", "metadata": { "id": "saving-conference" }, - "outputs": [], - "source": [ - "first = \n", - "print(first)\n" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 12 5 -3 11 6]\n", + " [ 4 -5 -3 0 14]\n", + " [ 15 -1 -10 -9 3]]\n" + ] + }, + { + "data": { + "text/plain": [ + "(15,\n", + " -10,\n", + " array([10.33333333, -0.33333333, -5.33333333, 0.66666667, 7.66666667]),\n", + " array([ 6.2, 2. , -0.4]))" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "first = np.random.randint(-15, 16, 15).reshape(3, 5)\n", + "print(first)\n", + "first.max(), first.min(), first.mean(axis=0), first.mean(axis=1)" ] }, { "cell_type": "markdown", + "id": "45907f32", "metadata": { "id": "diagnostic-departure" }, @@ -240,23 +374,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, + "id": "cc76e88c", "metadata": { "id": "olympic-qatar" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 36 -54 96]\n", + " [ -47 -137 79]]\n" + ] + } + ], "source": [ "a = np.random.randint(-10, 10, (2, 5))\n", "first_axis = np.random.randint(4, 6)\n", "b = np.random.randint(-10, 10, (first_axis, 3))\n", - "if :\n", + "if a.shape[1] == b.shape[0]:\n", " print(a @ b)\n", "else:\n", - " " + " print('err')" ] }, { "cell_type": "markdown", + "id": "2ded5995", "metadata": { "id": "governmental-austin" }, @@ -268,20 +413,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, + "id": "9b454b9c", "metadata": { "id": "suffering-mauritius" }, - "outputs": [], - "source": [ - "mask = \n", - "matrix = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ -2.61775722 -1.67086735 -2.27801851 -6.93616721 -7.45792731]\n", + " [ 0. -6.15690924 0.62688566 0.60474397 -7.53560122]\n", + " [ 0. 0. -11.70636778 -3.90126739 -1.98764308]\n", + " [ 0. 0. 0. -9.70455908 -10.8963226 ]\n", + " [ 0. 0. 0. 0. -2.1782692 ]]\n" + ] + } + ], + "source": [ + "mask = np.random.uniform(2, -12, (5, 5)) \n", + "matrix = np.copy(mask)\n", + "for i in range(5):\n", + " for j in range(5):\n", + " if j < i:\n", + " matrix[i, j] = 0\n", "\n", "print(matrix)" ] }, { "cell_type": "markdown", + "id": "40985b25", "metadata": { "id": "altered-baghdad" }, @@ -293,20 +456,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, + "id": "cec7e7ea", "metadata": { "id": "refined-stuff" }, - "outputs": [], - "source": [ - "mask = \n", - "matrix = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0. 10.18238614 11.91385332 8.31314191 7.16254416]\n", + " [ 9.49416669 0. 8.87762329 11.12641883 10.55408449]\n", + " [ 6.60673358 6.12302958 0. 11.78786698 11.4615063 ]\n", + " [10.08653577 10.05468692 10.00824118 0. 12.99399982]\n", + " [12.01515492 7.92418921 9.0942127 9.01347768 0. ]]\n" + ] + } + ], + "source": [ + "mask = np.random.normal(10, 2, 25).reshape(5, 5)\n", + "matrix = np.copy(mask)\n", + "\n", + "for i in range(5):\n", + " for j in range(5):\n", + " if j == i:\n", + " matrix[i, j] = 0\n", "\n", "print(matrix)" ] }, { "cell_type": "markdown", + "id": "2d9b132e", "metadata": { "id": "quiet-complement" }, @@ -317,22 +499,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, + "id": "be1e172f", "metadata": { "id": "french-fighter" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 0 0 0 0]\n", + "[1 1 0 1 0]\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "a = np.random.randint(0,2,5)\n", "print(a)\n", "b = np.random.randint(0,2,5)\n", "print(b)\n", - "equal = \n", + "equal = sum(a == b) == len(a)\n", "equal" ] }, { "cell_type": "markdown", + "id": "eb176893", "metadata": { "id": "color-amplifier" }, @@ -347,23 +550,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, + "id": "a0255fb7", "metadata": { "id": "close-daisy" }, - "outputs": [], - "source": [ - "r, c = \n", - "a = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[61 2 49 59 9 31 40 42 45 4 70]\n", + " [11 22 73 31 14 48 51 26 96 69 69]\n", + " [18 80 30 20 67 73 68 82 12 83 21]\n", + " [ 7 42 72 85 57 36 66 63 74 37 18]\n", + " [ 2 87 58 27 79 71 24 67 80 18 29]]\n", + "20\n", + "[67 70 42 37 69 24 72 18 48 57 31 21 85 69 2 67 74 79 40 96]\n" + ] + } + ], + "source": [ + "import random\n", + "import math\n", + "\n", + "r, c = random.randint(3, 7), random.randint(2, 12)\n", + "a = np.random.randint(0, 100, r * c).reshape(r, c)\n", "print(a)\n", - "N = \n", + "N = random.randint(1, math.floor(r * c / 2))\n", "print(N)\n", - "sample = \n", + "sample = np.random.choice(a.flatten(), N)\n", "print(sample)" ] }, { "cell_type": "markdown", + "id": "80f200c2", "metadata": { "id": "patent-african" }, @@ -376,20 +598,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, + "id": "f653032e", "metadata": { "id": "taken-fabric" }, - "outputs": [], - "source": [ - "a = np.array([1, np.NaN, np.Inf], float)\n", - "\n", - "\n", + "outputs": [ + { + "data": { + "text/plain": [ + "array([1., 0., 0.])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.array([1, np.NaN, np.Inf], float)\n", + "for i, e in enumerate(a):\n", + " if np.isnan(e) or np.isinf(e):\n", + " a[i] = 0\n", "a" ] }, { "cell_type": "markdown", + "id": "136ca513", "metadata": { "id": "analyzed-ireland" }, @@ -401,20 +637,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, + "id": "113bddd8", "metadata": { "id": "imposed-digest" }, - "outputs": [], - "source": [ - "axis = \n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "24\n", + "24\n" + ] + } + ], + "source": [ + "axis = random.randint(1, 32)\n", "print(axis)\n", - "matrix = \n", - "print(...)" + "matrix = np.zeros(tuple(np.ones((axis), dtype=int)))\n", + "print(len(matrix.shape))" ] }, { "cell_type": "markdown", + "id": "dd32794d", "metadata": { "id": "regulation-colleague" }, @@ -427,17 +674,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, + "id": "1321e582", "metadata": { "id": "concerned-anthropology" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[37.40050676 50.32378478 27.00206948]\n", + " [53.26304936 49.19480125 50.78815875]\n", + " [56.14512038 62.83737387 37.25231629]\n", + " [66.89832777 49.51868862 60.56248615]\n", + " [46.90543492 43.61393481 57.70402322]\n", + " [56.99272333 73.20515882 45.65732603]\n", + " [41.55582669 52.51231989 32.21895004]\n", + " [48.05808254 50.78640518 37.77281941]\n", + " [57.72747562 66.61338244 37.49564976]\n", + " [67.92895952 38.51751088 65.11972469]]\n", + "[2, 1, 2, 1, 1, 2, 2, 2, 2, 1]\n", + "[27.00206948 49.19480125 37.25231629 49.51868862 43.61393481 45.65732603\n", + " 32.21895004 37.77281941 37.49564976 38.51751088]\n" + ] + } + ], "source": [ "matrix = np.random.normal(50, 10, (10,3))\n", "print(matrix)\n", - "indexes = \n", + "indexes = ([list(item).index(min(item)) for item in matrix])\n", "print(indexes)\n", - "print(...)" + "print(matrix.min(axis=1))" ] } ], @@ -462,7 +730,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.8" } }, "nbformat": 4, From 54c43a89d3c8313e1e1aec9f5ee8285880f8a7d3 Mon Sep 17 00:00:00 2001 From: ooonush Date: Sat, 12 Mar 2022 10:43:37 +0500 Subject: [PATCH 2/5] =?UTF-8?q?pd=20=D0=B4=D0=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Pandas (06.03)/Pandas. Lecture. Part 1.ipynb | 5765 +++++++++++++++++- 1 file changed, 5764 insertions(+), 1 deletion(-) diff --git a/Pandas (06.03)/Pandas. Lecture. Part 1.ipynb b/Pandas (06.03)/Pandas. Lecture. Part 1.ipynb index a0c1d04..f1f44db 100644 --- a/Pandas (06.03)/Pandas. Lecture. Part 1.ipynb +++ b/Pandas (06.03)/Pandas. Lecture. Part 1.ipynb @@ -1 +1,5764 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"01_Pandas.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyPGZA72+5Brg/wHtKFk27jK"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"qCUpgW4Chxlt"},"source":["# Игрушечные наборы данных\n","https://scikit-learn.org/stable/datasets/index.html"]},{"cell_type":"code","metadata":{"id":"6-e8Ub9ghvMA","executionInfo":{"status":"ok","timestamp":1632403984813,"user_tz":-300,"elapsed":867,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["import sklearn.datasets as sets\n","datasets = {0:'boston', 1:'iris', 2:'diabets', 3:'digits', 4:'linnerud', 5:'wine', 6:'cancer', 7:'olivetti_faces', 8:'20_newsgroups',\n"," 9:'20_newsgroups_vec', 10:'people_labeled_faces', 11:'pairs_labeled_faces', 12:'covertype', 13:'RCV1_multilabel',\n"," 14:'kddcup99', 15:'california_housing', }\n","choise = 1\n","if choise == 0:\n"," ds = sets.load_boston() #regression\n","elif choise == 1:\n"," ds = sets.load_iris() # classification\n","elif choise == 2:\n"," ds = sets.load_diabetes() # regression\n","elif choise == 3:\n"," ds = sets.load_digits() # classification\n","elif choise == 4:\n"," ds = sets.load_linnerud() # multivariate regression\n","elif choise == 5:\n"," ds = sets.load_wine() # classification\n","elif choise == 6:\n"," ds = sets.load_breast_cancer() # classification\n","elif choise == 7:\n"," ds = sets.fetch_olivetti_faces() # classification\n","elif choise == 8:\n"," ds = sets.fetch_20newsgroups() # classification\n","elif choise == 9:\n"," ds = sets.fetch_20newsgroups_vectorized() # classification\n","elif choise == 10:\n"," ds = sets.fetch_lfw_people() # classification\n","elif choise == 11:\n"," ds = sets.fetch_lfw_pairs() # classification\n","elif choise == 12:\n"," ds = sets.fetch_covtype() # classification\n","elif choise == 13:\n"," ds = sets.fetch_rcv1() # classification\n","elif choise == 14:\n"," ds = sets.fetch_kddcup99() # classification\n","elif choise == 15:\n"," ds = sets.fetch_california_housing() # regression"],"execution_count":1,"outputs":[]},{"cell_type":"code","metadata":{"id":"rHDZmzjAiy7N","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615295304765,"user_tz":-300,"elapsed":1064,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"160c86a8-b336-429a-b12b-52cf5bb6a14b"},"source":["print(ds.DESCR)"],"execution_count":null,"outputs":[{"output_type":"stream","text":[".. _iris_dataset:\n","\n","Iris plants dataset\n","--------------------\n","\n","**Data Set Characteristics:**\n","\n"," :Number of Instances: 150 (50 in each of three classes)\n"," :Number of Attributes: 4 numeric, predictive attributes and the class\n"," :Attribute Information:\n"," - sepal length in cm\n"," - sepal width in cm\n"," - petal length in cm\n"," - petal width in cm\n"," - class:\n"," - Iris-Setosa\n"," - Iris-Versicolour\n"," - Iris-Virginica\n"," \n"," :Summary Statistics:\n","\n"," ============== ==== ==== ======= ===== ====================\n"," Min Max Mean SD Class Correlation\n"," ============== ==== ==== ======= ===== ====================\n"," sepal length: 4.3 7.9 5.84 0.83 0.7826\n"," sepal width: 2.0 4.4 3.05 0.43 -0.4194\n"," petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)\n"," petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)\n"," ============== ==== ==== ======= ===== ====================\n","\n"," :Missing Attribute Values: None\n"," :Class Distribution: 33.3% for each of 3 classes.\n"," :Creator: R.A. Fisher\n"," :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n"," :Date: July, 1988\n","\n","The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\n","from Fisher's paper. Note that it's the same as in R, but not as in the UCI\n","Machine Learning Repository, which has two wrong data points.\n","\n","This is perhaps the best known database to be found in the\n","pattern recognition literature. Fisher's paper is a classic in the field and\n","is referenced frequently to this day. (See Duda & Hart, for example.) The\n","data set contains 3 classes of 50 instances each, where each class refers to a\n","type of iris plant. One class is linearly separable from the other 2; the\n","latter are NOT linearly separable from each other.\n","\n",".. topic:: References\n","\n"," - Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\n"," Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n"," Mathematical Statistics\" (John Wiley, NY, 1950).\n"," - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n"," (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n"," - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n"," Structure and Classification Rule for Recognition in Partially Exposed\n"," Environments\". IEEE Transactions on Pattern Analysis and Machine\n"," Intelligence, Vol. PAMI-2, No. 1, 67-71.\n"," - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\". IEEE Transactions\n"," on Information Theory, May 1972, 431-433.\n"," - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al\"s AUTOCLASS II\n"," conceptual clustering system finds 3 classes in the data.\n"," - Many, many more ...\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"59mLor4WoeZg","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1632404056458,"user_tz":-300,"elapsed":683,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"3548322c-6765-4349-8dea-66ab12f3f7d9"},"source":["print(ds.feature_names)\n","print(ds.target_names)"],"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n","['setosa' 'versicolor' 'virginica']\n"]}]},{"cell_type":"code","metadata":{"id":"9Yt4tJ2_otjm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1632404071563,"user_tz":-300,"elapsed":420,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"b471a124-b71b-456d-de41-fe29676b6604"},"source":["data = ds.data\n","type(data)"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":["numpy.ndarray"]},"metadata":{},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZgxY_56q3YVG","executionInfo":{"status":"ok","timestamp":1632404086557,"user_tz":-300,"elapsed":402,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"b4e3ee4f-16b7-4b1e-f5be-34d0e5f4dd31"},"source":["data"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[5.1, 3.5, 1.4, 0.2],\n"," [4.9, 3. , 1.4, 0.2],\n"," [4.7, 3.2, 1.3, 0.2],\n"," [4.6, 3.1, 1.5, 0.2],\n"," [5. , 3.6, 1.4, 0.2],\n"," [5.4, 3.9, 1.7, 0.4],\n"," [4.6, 3.4, 1.4, 0.3],\n"," [5. , 3.4, 1.5, 0.2],\n"," [4.4, 2.9, 1.4, 0.2],\n"," [4.9, 3.1, 1.5, 0.1],\n"," [5.4, 3.7, 1.5, 0.2],\n"," [4.8, 3.4, 1.6, 0.2],\n"," [4.8, 3. , 1.4, 0.1],\n"," [4.3, 3. , 1.1, 0.1],\n"," [5.8, 4. , 1.2, 0.2],\n"," [5.7, 4.4, 1.5, 0.4],\n"," [5.4, 3.9, 1.3, 0.4],\n"," [5.1, 3.5, 1.4, 0.3],\n"," [5.7, 3.8, 1.7, 0.3],\n"," [5.1, 3.8, 1.5, 0.3],\n"," [5.4, 3.4, 1.7, 0.2],\n"," [5.1, 3.7, 1.5, 0.4],\n"," [4.6, 3.6, 1. , 0.2],\n"," [5.1, 3.3, 1.7, 0.5],\n"," [4.8, 3.4, 1.9, 0.2],\n"," [5. , 3. , 1.6, 0.2],\n"," [5. , 3.4, 1.6, 0.4],\n"," [5.2, 3.5, 1.5, 0.2],\n"," [5.2, 3.4, 1.4, 0.2],\n"," [4.7, 3.2, 1.6, 0.2],\n"," [4.8, 3.1, 1.6, 0.2],\n"," [5.4, 3.4, 1.5, 0.4],\n"," [5.2, 4.1, 1.5, 0.1],\n"," [5.5, 4.2, 1.4, 0.2],\n"," [4.9, 3.1, 1.5, 0.2],\n"," [5. , 3.2, 1.2, 0.2],\n"," [5.5, 3.5, 1.3, 0.2],\n"," [4.9, 3.6, 1.4, 0.1],\n"," [4.4, 3. , 1.3, 0.2],\n"," [5.1, 3.4, 1.5, 0.2],\n"," [5. , 3.5, 1.3, 0.3],\n"," [4.5, 2.3, 1.3, 0.3],\n"," [4.4, 3.2, 1.3, 0.2],\n"," [5. , 3.5, 1.6, 0.6],\n"," [5.1, 3.8, 1.9, 0.4],\n"," [4.8, 3. , 1.4, 0.3],\n"," [5.1, 3.8, 1.6, 0.2],\n"," [4.6, 3.2, 1.4, 0.2],\n"," [5.3, 3.7, 1.5, 0.2],\n"," [5. , 3.3, 1.4, 0.2],\n"," [7. , 3.2, 4.7, 1.4],\n"," [6.4, 3.2, 4.5, 1.5],\n"," [6.9, 3.1, 4.9, 1.5],\n"," [5.5, 2.3, 4. , 1.3],\n"," [6.5, 2.8, 4.6, 1.5],\n"," [5.7, 2.8, 4.5, 1.3],\n"," [6.3, 3.3, 4.7, 1.6],\n"," [4.9, 2.4, 3.3, 1. ],\n"," [6.6, 2.9, 4.6, 1.3],\n"," [5.2, 2.7, 3.9, 1.4],\n"," [5. , 2. , 3.5, 1. ],\n"," [5.9, 3. , 4.2, 1.5],\n"," [6. , 2.2, 4. , 1. ],\n"," [6.1, 2.9, 4.7, 1.4],\n"," [5.6, 2.9, 3.6, 1.3],\n"," [6.7, 3.1, 4.4, 1.4],\n"," [5.6, 3. , 4.5, 1.5],\n"," [5.8, 2.7, 4.1, 1. ],\n"," [6.2, 2.2, 4.5, 1.5],\n"," [5.6, 2.5, 3.9, 1.1],\n"," [5.9, 3.2, 4.8, 1.8],\n"," [6.1, 2.8, 4. , 1.3],\n"," [6.3, 2.5, 4.9, 1.5],\n"," [6.1, 2.8, 4.7, 1.2],\n"," [6.4, 2.9, 4.3, 1.3],\n"," [6.6, 3. , 4.4, 1.4],\n"," [6.8, 2.8, 4.8, 1.4],\n"," [6.7, 3. , 5. , 1.7],\n"," [6. , 2.9, 4.5, 1.5],\n"," [5.7, 2.6, 3.5, 1. ],\n"," [5.5, 2.4, 3.8, 1.1],\n"," [5.5, 2.4, 3.7, 1. ],\n"," [5.8, 2.7, 3.9, 1.2],\n"," [6. , 2.7, 5.1, 1.6],\n"," [5.4, 3. , 4.5, 1.5],\n"," [6. , 3.4, 4.5, 1.6],\n"," [6.7, 3.1, 4.7, 1.5],\n"," [6.3, 2.3, 4.4, 1.3],\n"," [5.6, 3. , 4.1, 1.3],\n"," [5.5, 2.5, 4. , 1.3],\n"," [5.5, 2.6, 4.4, 1.2],\n"," [6.1, 3. , 4.6, 1.4],\n"," [5.8, 2.6, 4. , 1.2],\n"," [5. , 2.3, 3.3, 1. ],\n"," [5.6, 2.7, 4.2, 1.3],\n"," [5.7, 3. , 4.2, 1.2],\n"," [5.7, 2.9, 4.2, 1.3],\n"," [6.2, 2.9, 4.3, 1.3],\n"," [5.1, 2.5, 3. , 1.1],\n"," [5.7, 2.8, 4.1, 1.3],\n"," [6.3, 3.3, 6. , 2.5],\n"," [5.8, 2.7, 5.1, 1.9],\n"," [7.1, 3. , 5.9, 2.1],\n"," [6.3, 2.9, 5.6, 1.8],\n"," [6.5, 3. , 5.8, 2.2],\n"," [7.6, 3. , 6.6, 2.1],\n"," [4.9, 2.5, 4.5, 1.7],\n"," [7.3, 2.9, 6.3, 1.8],\n"," [6.7, 2.5, 5.8, 1.8],\n"," [7.2, 3.6, 6.1, 2.5],\n"," [6.5, 3.2, 5.1, 2. ],\n"," [6.4, 2.7, 5.3, 1.9],\n"," [6.8, 3. , 5.5, 2.1],\n"," [5.7, 2.5, 5. , 2. ],\n"," [5.8, 2.8, 5.1, 2.4],\n"," [6.4, 3.2, 5.3, 2.3],\n"," [6.5, 3. , 5.5, 1.8],\n"," [7.7, 3.8, 6.7, 2.2],\n"," [7.7, 2.6, 6.9, 2.3],\n"," [6. , 2.2, 5. , 1.5],\n"," [6.9, 3.2, 5.7, 2.3],\n"," [5.6, 2.8, 4.9, 2. ],\n"," [7.7, 2.8, 6.7, 2. ],\n"," [6.3, 2.7, 4.9, 1.8],\n"," [6.7, 3.3, 5.7, 2.1],\n"," [7.2, 3.2, 6. , 1.8],\n"," [6.2, 2.8, 4.8, 1.8],\n"," [6.1, 3. , 4.9, 1.8],\n"," [6.4, 2.8, 5.6, 2.1],\n"," [7.2, 3. , 5.8, 1.6],\n"," [7.4, 2.8, 6.1, 1.9],\n"," [7.9, 3.8, 6.4, 2. ],\n"," [6.4, 2.8, 5.6, 2.2],\n"," [6.3, 2.8, 5.1, 1.5],\n"," [6.1, 2.6, 5.6, 1.4],\n"," [7.7, 3. , 6.1, 2.3],\n"," [6.3, 3.4, 5.6, 2.4],\n"," [6.4, 3.1, 5.5, 1.8],\n"," [6. , 3. , 4.8, 1.8],\n"," [6.9, 3.1, 5.4, 2.1],\n"," [6.7, 3.1, 5.6, 2.4],\n"," [6.9, 3.1, 5.1, 2.3],\n"," [5.8, 2.7, 5.1, 1.9],\n"," [6.8, 3.2, 5.9, 2.3],\n"," [6.7, 3.3, 5.7, 2.5],\n"," [6.7, 3. , 5.2, 2.3],\n"," [6.3, 2.5, 5. , 1.9],\n"," [6.5, 3. , 5.2, 2. ],\n"," [6.2, 3.4, 5.4, 2.3],\n"," [5.9, 3. , 5.1, 1.8]])"]},"metadata":{},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"-7ejnqmmwr_J","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615295357693,"user_tz":-300,"elapsed":855,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"e22abd6b-c840-4e43-aa62-d9c1a5cdd231"},"source":["data.shape"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(150, 4)"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"RmRL0mZ3o5ri","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1632404107395,"user_tz":-300,"elapsed":420,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"bcace884-7ac8-49ce-d14e-05c8f625bb38"},"source":["target = ds.target\n","target[:5], target.shape"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(array([0, 0, 0, 0, 0]), (150,))"]},"metadata":{},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"fpcR7aEBJoGq"},"source":["# Pandas"]},{"cell_type":"code","metadata":{"id":"FVTPYh-hhvah","executionInfo":{"status":"ok","timestamp":1632404228644,"user_tz":-300,"elapsed":546,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["import pandas as pd\n","import numpy as np"],"execution_count":7,"outputs":[]},{"cell_type":"code","metadata":{"id":"CZzMZXcyDnCx","colab":{"base_uri":"https://localhost:8080/","height":423},"executionInfo":{"status":"ok","timestamp":1632404365934,"user_tz":-300,"elapsed":20,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"55d262ac-6243-4338-a45e-57217f23a610"},"source":["df = pd.DataFrame(data, columns=ds.feature_names) # data - может быть как лист, так и numpy array\n","df['target'] = ds.target\n","df"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
..................
1456.73.05.22.32
1466.32.55.01.92
1476.53.05.22.02
1486.23.45.42.32
1495.93.05.11.82
\n","

150 rows × 5 columns

\n","
"],"text/plain":[" sepal length (cm) sepal width (cm) ... petal width (cm) target\n","0 5.1 3.5 ... 0.2 0\n","1 4.9 3.0 ... 0.2 0\n","2 4.7 3.2 ... 0.2 0\n","3 4.6 3.1 ... 0.2 0\n","4 5.0 3.6 ... 0.2 0\n",".. ... ... ... ... ...\n","145 6.7 3.0 ... 2.3 2\n","146 6.3 2.5 ... 1.9 2\n","147 6.5 3.0 ... 2.0 2\n","148 6.2 3.4 ... 2.3 2\n","149 5.9 3.0 ... 1.8 2\n","\n","[150 rows x 5 columns]"]},"metadata":{},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":112},"id":"WMx25DeePe80","executionInfo":{"status":"ok","timestamp":1632404401169,"user_tz":-300,"elapsed":1482,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"ca1eb41f-18e0-47de-cc77-b8648b89cec5"},"source":["df.head(2) #tail()"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
\n","
"],"text/plain":[" sepal length (cm) sepal width (cm) ... petal width (cm) target\n","0 5.1 3.5 ... 0.2 0\n","1 4.9 3.0 ... 0.2 0\n","\n","[2 rows x 5 columns]"]},"metadata":{},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"yY02uqmWhvlj","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1632404414446,"user_tz":-300,"elapsed":580,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"f4adccbb-22f7-4192-a8f7-67d00c8ff7c3"},"source":["df.sample(5)"],"execution_count":13,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
1206.93.25.72.32
75.03.41.50.20
656.73.14.41.41
776.73.05.01.71
985.12.53.01.11
\n","
"],"text/plain":[" sepal length (cm) sepal width (cm) ... petal width (cm) target\n","120 6.9 3.2 ... 2.3 2\n","7 5.0 3.4 ... 0.2 0\n","65 6.7 3.1 ... 1.4 1\n","77 6.7 3.0 ... 1.7 1\n","98 5.1 2.5 ... 1.1 1\n","\n","[5 rows x 5 columns]"]},"metadata":{},"execution_count":13}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"L0oDISZyHqUh","executionInfo":{"status":"ok","timestamp":1632404445651,"user_tz":-300,"elapsed":486,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"f2586af7-7f30-4106-861b-539f5ed618d6"},"source":["type(df)"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/plain":["pandas.core.frame.DataFrame"]},"metadata":{},"execution_count":14}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"arTjJfy442ss","executionInfo":{"status":"ok","timestamp":1632404485030,"user_tz":-300,"elapsed":433,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"6d630c99-cbed-42e1-d69f-c71e595be995"},"source":["type(df[\"target\"])"],"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/plain":["pandas.core.series.Series"]},"metadata":{},"execution_count":15}]},{"cell_type":"markdown","metadata":{"id":"xX_Qut-QR_ia"},"source":["### Индексация и срезы данных"]},{"cell_type":"code","metadata":{"id":"jXimDZePWyIp","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614783881358,"user_tz":-300,"elapsed":3256,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"00860947-6e2c-484e-90ae-8149d6c2bb45"},"source":["df['sepal length (cm)'] # выбор столбца по названию"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 5.1\n","1 4.9\n","2 4.7\n","3 4.6\n","4 5.0\n"," ... \n","145 6.7\n","146 6.3\n","147 6.5\n","148 6.2\n","149 5.9\n","Name: sepal length (cm), Length: 150, dtype: float64"]},"metadata":{"tags":[]},"execution_count":96}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"rOBV0RUtHxLh","executionInfo":{"status":"ok","timestamp":1615295621844,"user_tz":-300,"elapsed":619,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"2e25e363-6fd5-477f-9e38-afe8f91522ac"},"source":["type(df['sepal length (cm)'])"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["pandas.core.series.Series"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"sq2YmKFr5m-1","executionInfo":{"status":"ok","timestamp":1632404667952,"user_tz":-300,"elapsed":523,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"e9f125e0-3f1f-4a4b-d39c-5e6091047c86"},"source":["df.columns"],"execution_count":18,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',\n"," 'petal width (cm)', 'target'],\n"," dtype='object')"]},"metadata":{},"execution_count":18}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"o5CI-Ha6P4AX","executionInfo":{"status":"ok","timestamp":1614783884339,"user_tz":-300,"elapsed":1699,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"ee350cf3-212a-4bdd-daf8-f0decfe313c0"},"source":["{name : '_'.join(name.split(' ')) for name in df.columns}"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{'petal length (cm)': 'petal_length_(cm)',\n"," 'petal width (cm)': 'petal_width_(cm)',\n"," 'sepal length (cm)': 'sepal_length_(cm)',\n"," 'sepal width (cm)': 'sepal_width_(cm)',\n"," 'target': 'target'}"]},"metadata":{"tags":[]},"execution_count":97}]},{"cell_type":"code","metadata":{"id":"ztRKBaVlxM8d","executionInfo":{"status":"ok","timestamp":1632404857471,"user_tz":-300,"elapsed":585,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["# df = df.rename(columns={name : '_'.join(name.split(' ')) for name in df.columns}) # смена имен столбцов\n","df.rename(columns={name : '_'.join(name.split(' ')) for name in df.columns}, inplace=True)"],"execution_count":21,"outputs":[]},{"cell_type":"code","metadata":{"id":"Bryqf6bCxNC5","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1632404863328,"user_tz":-300,"elapsed":29,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"2fb81e40-0667-4c5b-9b50-4ba23010385b"},"source":["df.columns"],"execution_count":22,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['sepal_length_(cm)', 'sepal_width_(cm)', 'petal_length_(cm)',\n"," 'petal_width_(cm)', 'target'],\n"," dtype='object')"]},"metadata":{},"execution_count":22}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uesXOV19QcNX","executionInfo":{"status":"ok","timestamp":1615295826923,"user_tz":-300,"elapsed":438,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"6476924c-249d-4876-89be-920b127e125b"},"source":["df.target"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 0\n","1 0\n","2 0\n","3 0\n","4 0\n"," ..\n","145 2\n","146 2\n","147 2\n","148 2\n","149 2\n","Name: target, Length: 150, dtype: int64"]},"metadata":{"tags":[]},"execution_count":20}]},{"cell_type":"code","metadata":{"id":"J2il4fodbWLb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614777840378,"user_tz":-300,"elapsed":566,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"b6d5c2a4-dc69-497d-997c-8127f174765a"},"source":["df.target[-10:] # возможен такой стиль обращения к столбцам, если его имя не содержит пробелов"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["140 2\n","141 2\n","142 2\n","143 2\n","144 2\n","145 2\n","146 2\n","147 2\n","148 2\n","149 2\n","Name: target, dtype: int64"]},"metadata":{"tags":[]},"execution_count":22}]},{"cell_type":"code","metadata":{"id":"2IaGUtDoYIAO","colab":{"base_uri":"https://localhost:8080/","height":357},"executionInfo":{"status":"ok","timestamp":1614777891289,"user_tz":-300,"elapsed":607,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"c64f553c-27a2-4f0d-a1e3-aa82ee895acf"},"source":["df.loc[140: , 'sepal_width_(cm)':'petal_width_(cm)'] # возможность среза данных по ИМЕНАМ строк и столбцов"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_width_(cm)petal_length_(cm)petal_width_(cm)
1403.15.62.4
1413.15.12.3
1422.75.11.9
1433.25.92.3
1443.35.72.5
1453.05.22.3
1462.55.01.9
1473.05.22.0
1483.45.42.3
1493.05.11.8
\n","
"],"text/plain":[" sepal_width_(cm) petal_length_(cm) petal_width_(cm)\n","140 3.1 5.6 2.4\n","141 3.1 5.1 2.3\n","142 2.7 5.1 1.9\n","143 3.2 5.9 2.3\n","144 3.3 5.7 2.5\n","145 3.0 5.2 2.3\n","146 2.5 5.0 1.9\n","147 3.0 5.2 2.0\n","148 3.4 5.4 2.3\n","149 3.0 5.1 1.8"]},"metadata":{"tags":[]},"execution_count":23}]},{"cell_type":"code","metadata":{"id":"RwTuNV_BxNtH","colab":{"base_uri":"https://localhost:8080/","height":357},"executionInfo":{"status":"ok","timestamp":1614777918498,"user_tz":-300,"elapsed":735,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"89004bbc-fd5d-4bb9-fbdc-6756fa31cb1b"},"source":["df.iloc[:10,:4] # возможность среза данных по ПОРЯДКОВЫМ НОМЕРАМ строк и столбцов "],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
55.43.91.70.4
64.63.41.40.3
75.03.41.50.2
84.42.91.40.2
94.93.11.50.1
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) petal_length_(cm) petal_width_(cm)\n","0 5.1 3.5 1.4 0.2\n","1 4.9 3.0 1.4 0.2\n","2 4.7 3.2 1.3 0.2\n","3 4.6 3.1 1.5 0.2\n","4 5.0 3.6 1.4 0.2\n","5 5.4 3.9 1.7 0.4\n","6 4.6 3.4 1.4 0.3\n","7 5.0 3.4 1.5 0.2\n","8 4.4 2.9 1.4 0.2\n","9 4.9 3.1 1.5 0.1"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QSAbGcDbJP9B","executionInfo":{"status":"ok","timestamp":1632405184550,"user_tz":-300,"elapsed":413,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"20274561-ff6c-4031-e1a7-a26a2399cea5"},"source":["[column for column in df.columns if column.startswith('sepal')]"],"execution_count":23,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['sepal_length_(cm)', 'sepal_width_(cm)']"]},"metadata":{},"execution_count":23}]},{"cell_type":"code","metadata":{"id":"pytaw0cAxNp8","colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"status":"ok","timestamp":1614784351268,"user_tz":-300,"elapsed":1370,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"81983e96-8834-40e4-b828-6706a4f3bbb6"},"source":["df[[column for column in df.columns if column.startswith('sepal')]] # выбор столбцов по условию"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)
05.13.5
14.93.0
24.73.2
34.63.1
45.03.6
.........
1456.73.0
1466.32.5
1476.53.0
1486.23.4
1495.93.0
\n","

150 rows × 2 columns

\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm)\n","0 5.1 3.5\n","1 4.9 3.0\n","2 4.7 3.2\n","3 4.6 3.1\n","4 5.0 3.6\n",".. ... ...\n","145 6.7 3.0\n","146 6.3 2.5\n","147 6.5 3.0\n","148 6.2 3.4\n","149 5.9 3.0\n","\n","[150 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":102}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"bHiE8tk872bY","executionInfo":{"status":"ok","timestamp":1632405255702,"user_tz":-300,"elapsed":666,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"5cab46f0-7d00-4c5a-a435-ecd145b8c82c"},"source":["df.target==1.0"],"execution_count":24,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 False\n","1 False\n","2 False\n","3 False\n","4 False\n"," ... \n","145 False\n","146 False\n","147 False\n","148 False\n","149 False\n","Name: target, Length: 150, dtype: bool"]},"metadata":{},"execution_count":24}]},{"cell_type":"code","metadata":{"id":"YNxRaJqqavOz","colab":{"base_uri":"https://localhost:8080/","height":347},"executionInfo":{"status":"ok","timestamp":1615296046504,"user_tz":-300,"elapsed":815,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"2baa1152-611c-43a3-eea6-c9ae07cfea4e"},"source":["df[df.target==1.0][:10] # выбор данных по условию. В данном случае хотим увидеть данные у которых целевой класс = 1\n","# так же можно увидеть что обращаться к столбцу можно"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
507.03.24.71.41
516.43.24.51.51
526.93.14.91.51
535.52.34.01.31
546.52.84.61.51
555.72.84.51.31
566.33.34.71.61
574.92.43.31.01
586.62.94.61.31
595.22.73.91.41
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n","50 7.0 3.2 ... 1.4 1\n","51 6.4 3.2 ... 1.5 1\n","52 6.9 3.1 ... 1.5 1\n","53 5.5 2.3 ... 1.3 1\n","54 6.5 2.8 ... 1.5 1\n","55 5.7 2.8 ... 1.3 1\n","56 6.3 3.3 ... 1.6 1\n","57 4.9 2.4 ... 1.0 1\n","58 6.6 2.9 ... 1.3 1\n","59 5.2 2.7 ... 1.4 1\n","\n","[10 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":22}]},{"cell_type":"markdown","metadata":{"id":"i4V1_5AOgmB9"},"source":["### Описательная статистика"]},{"cell_type":"code","metadata":{"id":"EuwQ-U54xNnA","colab":{"base_uri":"https://localhost:8080/","height":300},"executionInfo":{"status":"ok","timestamp":1614766986724,"user_tz":-300,"elapsed":1283,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"5ed73970-f852-49b2-82a7-bfe43b1ad3c3"},"source":["df.describe() # статистическое описание набора данных"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
count150.000000150.000000150.000000150.000000150.000000
mean5.8433333.0573333.7580001.1993331.000000
std0.8280660.4358661.7652980.7622380.819232
min4.3000002.0000001.0000000.1000000.000000
25%5.1000002.8000001.6000000.3000000.000000
50%5.8000003.0000004.3500001.3000001.000000
75%6.4000003.3000005.1000001.8000002.000000
max7.9000004.4000006.9000002.5000002.000000
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n","count 150.000000 150.000000 ... 150.000000 150.000000\n","mean 5.843333 3.057333 ... 1.199333 1.000000\n","std 0.828066 0.435866 ... 0.762238 0.819232\n","min 4.300000 2.000000 ... 0.100000 0.000000\n","25% 5.100000 2.800000 ... 0.300000 0.000000\n","50% 5.800000 3.000000 ... 1.300000 1.000000\n","75% 6.400000 3.300000 ... 1.800000 2.000000\n","max 7.900000 4.400000 ... 2.500000 2.000000\n","\n","[8 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"code","metadata":{"id":"X4ykTpKtxNiG","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614778091397,"user_tz":-300,"elapsed":627,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"e62b683d-f476-4422-d691-774ead34e63f"},"source":["df.info() # информация об индексах, пропусках в данных, типах данных и объеме оперативной памяти занимаемой данными"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\n","RangeIndex: 150 entries, 0 to 149\n","Data columns (total 5 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 sepal_length_(cm) 150 non-null float64\n"," 1 sepal_width_(cm) 150 non-null float64\n"," 2 petal_length_(cm) 150 non-null float64\n"," 3 petal_width_(cm) 150 non-null float64\n"," 4 target 150 non-null int64 \n","dtypes: float64(4), int64(1)\n","memory usage: 6.0 KB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"b7khmMfj8mDB","executionInfo":{"status":"ok","timestamp":1632405484185,"user_tz":-300,"elapsed":51,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"8e7ccfa9-cffa-4872-c0a5-d00635211e12"},"source":["df.target.unique(), df.target.nunique()"],"execution_count":26,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(array([0, 1, 2]), 3)"]},"metadata":{},"execution_count":26}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":167},"id":"n1XzQbdFRx7Z","executionInfo":{"status":"ok","timestamp":1615296303195,"user_tz":-300,"elapsed":783,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"a4acff70-40cf-4462-f2b6-03546318b29b"},"source":["df.groupby('target').mean() #df.groupby('target')['petal_length_(cm)'].mean()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
target
05.0063.4281.4620.246
15.9362.7704.2601.326
26.5882.9745.5522.026
\n","
"],"text/plain":[" sepal_length_(cm) ... petal_width_(cm)\n","target ... \n","0 5.006 ... 0.246\n","1 5.936 ... 1.326\n","2 6.588 ... 2.026\n","\n","[3 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"code","metadata":{"id":"MRiTYhiixNfC","colab":{"base_uri":"https://localhost:8080/","height":217},"executionInfo":{"status":"ok","timestamp":1615296321113,"user_tz":-300,"elapsed":724,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"3bd6da21-1bde-404e-b9fb-d4e36e94634c"},"source":["df.groupby('target').agg([min, max, np.mean, np.std, np.size]) # применение общих функций группировки для всех столбцов"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
minmaxmeanstdsizeminmaxmeanstdsizeminmaxmeanstdsizeminmaxmeanstdsize
target
04.35.85.0060.35249050.02.34.43.4280.37906450.01.01.91.4620.17366450.00.10.60.2460.10538650.0
14.97.05.9360.51617150.02.03.42.7700.31379850.03.05.14.2600.46991150.01.01.81.3260.19775350.0
24.97.96.5880.63588050.02.23.82.9740.32249750.04.56.95.5520.55189550.01.42.52.0260.27465050.0
\n","
"],"text/plain":[" sepal_length_(cm) ... petal_width_(cm) \n"," min max mean std ... max mean std size\n","target ... \n","0 4.3 5.8 5.006 0.352490 ... 0.6 0.246 0.105386 50.0\n","1 4.9 7.0 5.936 0.516171 ... 1.8 1.326 0.197753 50.0\n","2 4.9 7.9 6.588 0.635880 ... 2.5 2.026 0.274650 50.0\n","\n","[3 rows x 20 columns]"]},"metadata":{"tags":[]},"execution_count":25}]},{"cell_type":"code","metadata":{"id":"w_oHay4KxNdC","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296592781,"user_tz":-300,"elapsed":511,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"2b52fff3-b9c7-4c74-ea6f-e52b965f4e6b"},"source":["df.groupby('target').agg({'sepal_length_(cm)':[np.mean, np.std], 'petal_width_(cm)':[min, max]}) # индивидуальное применение функций группировки"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)petal_width_(cm)
meanstdminmax
target
05.0060.3524900.10.6
15.9360.5161711.01.8
26.5880.6358801.42.5
\n","
"],"text/plain":[" sepal_length_(cm) petal_width_(cm) \n"," mean std min max\n","target \n","0 5.006 0.352490 0.1 0.6\n","1 5.936 0.516171 1.0 1.8\n","2 6.588 0.635880 1.4 2.5"]},"metadata":{"tags":[]},"execution_count":30}]},{"cell_type":"markdown","metadata":{"id":"NCfoXnc41fmW"},"source":["### Полезные функции, которые конкретно сейчас не нужны, но часто применимы"]},{"cell_type":"code","metadata":{"id":"KV8EM_b41m0m","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296494311,"user_tz":-300,"elapsed":747,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"b898ccdb-16f0-415b-a629-25b794f42859"},"source":["d = df.copy()\n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n","0 5.1 3.5 ... 0.2 0\n","1 4.9 3.0 ... 0.2 0\n","2 4.7 3.2 ... 0.2 0\n","3 4.6 3.1 ... 0.2 0\n","4 5.0 3.6 ... 0.2 0\n","\n","[5 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":26}]},{"cell_type":"code","metadata":{"id":"pGOooxXo1xqA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615296536700,"user_tz":-300,"elapsed":737,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"a3a5fe7b-d857-49c8-8d59-ed08472c37e9"},"source":["targets = {float(i):target for i, target in enumerate(ds.target_names)}\n","targets"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{0.0: 'setosa', 1.0: 'versicolor', 2.0: 'virginica'}"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"code","metadata":{"id":"1qI4cEd81xxK","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296574079,"user_tz":-300,"elapsed":474,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"7e62a1d9-dc06-4fc5-8270-6da0236d7341"},"source":["d.target = d.target.map(targets) # заменим цифровые обозначения классов на буквенные подписи\n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n","0 5.1 3.5 ... 0.2 setosa\n","1 4.9 3.0 ... 0.2 setosa\n","2 4.7 3.2 ... 0.2 setosa\n","3 4.6 3.1 ... 0.2 setosa\n","4 5.0 3.6 ... 0.2 setosa\n","\n","[5 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":28}]},{"cell_type":"code","metadata":{"id":"q1W6kwXe1xuc","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296637939,"user_tz":-300,"elapsed":647,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"cbd628aa-1e1b-4a98-e5f5-b9ef80aa9544"},"source":["d['sepal_length_on_width'] = d['sepal_length_(cm)'] / d['sepal_width_(cm)'] # операции непосредственно со столбцами много быстрее поэлементных операций \n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)targetsepal_length_on_width
05.13.51.40.2setosa1.457143
14.93.01.40.2setosa1.633333
24.73.21.30.2setosa1.468750
34.63.11.50.2setosa1.483871
45.03.61.40.2setosa1.388889
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... target sepal_length_on_width\n","0 5.1 3.5 ... setosa 1.457143\n","1 4.9 3.0 ... setosa 1.633333\n","2 4.7 3.2 ... setosa 1.468750\n","3 4.6 3.1 ... setosa 1.483871\n","4 5.0 3.6 ... setosa 1.388889\n","\n","[5 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":31}]},{"cell_type":"code","metadata":{"id":"dRp4-vhV1xmt"},"source":["d.sepal_length_on_width = d.sepal_length_on_width.apply(np.sin)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"357_A4ny1xjb","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296813029,"user_tz":-300,"elapsed":767,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"196d0d5d-1883-4552-ec7c-890f592130de"},"source":["def bias(x):\n"," if x < 1.0:\n"," return 0\n"," return 1\n","d['petal_width_(cm)'] = d['petal_width_(cm)'].apply(bias)\n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)targetsepal_length_on_width
05.13.51.40setosa0.993548
14.93.01.40setosa0.998045
24.73.21.30setosa0.994798
34.63.11.50setosa0.996224
45.03.61.40setosa0.983500
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... target sepal_length_on_width\n","0 5.1 3.5 ... setosa 0.993548\n","1 4.9 3.0 ... setosa 0.998045\n","2 4.7 3.2 ... setosa 0.994798\n","3 4.6 3.1 ... setosa 0.996224\n","4 5.0 3.6 ... setosa 0.983500\n","\n","[5 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"code","metadata":{"id":"aeUhqZEX1xey"},"source":["d.drop([column for column in d.columns if column.endswith('length_(cm)')], axis=1, inplace=True)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"gQJ6De486fsw","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615296912439,"user_tz":-300,"elapsed":684,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"f39caff3-2866-4a3b-b6ac-8510ddad127f"},"source":["d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_width_(cm)petal_width_(cm)targetsepal_length_on_width
03.50setosa0.993548
13.00setosa0.998045
23.20setosa0.994798
33.10setosa0.996224
43.60setosa0.983500
\n","
"],"text/plain":[" sepal_width_(cm) petal_width_(cm) target sepal_length_on_width\n","0 3.5 0 setosa 0.993548\n","1 3.0 0 setosa 0.998045\n","2 3.2 0 setosa 0.994798\n","3 3.1 0 setosa 0.996224\n","4 3.6 0 setosa 0.983500"]},"metadata":{"tags":[]},"execution_count":36}]},{"cell_type":"code","metadata":{"id":"H6wlNTB76hoP","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615296981297,"user_tz":-300,"elapsed":589,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"ef4c98c9-53fc-403d-e5ad-91b96ab8f864"},"source":["f = pd.concat([d,d], axis=0)\n","d.shape, f.shape"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((150, 4), (300, 4))"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"code","metadata":{"id":"8wvhQgCh6stP","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615297019618,"user_tz":-300,"elapsed":572,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"ed87bb36-d869-49c6-cc1a-516cd9daa65b"},"source":["f = pd.concat([d,d], axis=1)\n","d.shape, f.shape"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((150, 4), (150, 8))"]},"metadata":{"tags":[]},"execution_count":38}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":143},"id":"hYXfdNRds8wc","executionInfo":{"status":"ok","timestamp":1632405950884,"user_tz":-300,"elapsed":476,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"2b21ce4b-5d17-4800-ea16-396dc95557c3"},"source":["df_1 = pd.DataFrame(index=[1,2,3], data=[[1,2],[1,2],[1,2]], columns=[4,5])\n","df_2 = pd.DataFrame(index=[5,6,3], data=[[1,2],[1,2],[1,2]], columns=[5,7])\n","df_1"],"execution_count":27,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
45
112
212
312
\n","
"],"text/plain":[" 4 5\n","1 1 2\n","2 1 2\n","3 1 2"]},"metadata":{},"execution_count":27}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":143},"id":"-bALOOiOs_xk","executionInfo":{"status":"ok","timestamp":1632405952831,"user_tz":-300,"elapsed":12,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"56478aab-30e8-477d-8628-2352f3ed3ac4"},"source":["df_2"],"execution_count":28,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
57
512
612
312
\n","
"],"text/plain":[" 5 7\n","5 1 2\n","6 1 2\n","3 1 2"]},"metadata":{},"execution_count":28}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":237},"id":"nspfyfjMUepW","executionInfo":{"status":"ok","timestamp":1632405958777,"user_tz":-300,"elapsed":400,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"946cd0f5-3470-4620-a1ea-52221f5a06b1"},"source":["df_1 = pd.DataFrame(index=[1,2,3], data=[[1,2],[1,2],[1,2]], columns=[4,5])\n","df_2 = pd.DataFrame(index=[5,6,3], data=[[1,2],[1,2],[1,2]], columns=[5,7])\n","\n","pd.concat([df_1,df_2], axis=0)"],"execution_count":29,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
457
11.02NaN
21.02NaN
31.02NaN
5NaN12.0
6NaN12.0
3NaN12.0
\n","
"],"text/plain":[" 4 5 7\n","1 1.0 2 NaN\n","2 1.0 2 NaN\n","3 1.0 2 NaN\n","5 NaN 1 2.0\n","6 NaN 1 2.0\n","3 NaN 1 2.0"]},"metadata":{},"execution_count":29}]},{"cell_type":"code","metadata":{"id":"hfsafxqc6wl0","colab":{"base_uri":"https://localhost:8080/","height":217},"executionInfo":{"status":"ok","timestamp":1615297123302,"user_tz":-300,"elapsed":594,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"80ab6214-48dd-4847-9637-c8eda376ce2b"},"source":["f.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_width_(cm)petal_width_(cm)targetsepal_length_on_widthsepal_width_(cm)petal_width_(cm)targetsepal_length_on_width
03.50setosa0.9935483.50setosa0.993548
13.00setosa0.9980453.00setosa0.998045
23.20setosa0.9947983.20setosa0.994798
33.10setosa0.9962243.10setosa0.996224
43.60setosa0.9835003.60setosa0.983500
\n","
"],"text/plain":[" sepal_width_(cm) petal_width_(cm) ... target sepal_length_on_width\n","0 3.5 0 ... setosa 0.993548\n","1 3.0 0 ... setosa 0.998045\n","2 3.2 0 ... setosa 0.994798\n","3 3.1 0 ... setosa 0.996224\n","4 3.6 0 ... setosa 0.983500\n","\n","[5 rows x 8 columns]"]},"metadata":{"tags":[]},"execution_count":44}]},{"cell_type":"code","metadata":{"id":"HRY-rDbb8gGk"},"source":["g = d.drop(['sepal_width_(cm)', 'petal_width_(cm)'], axis=1)\n","h = d.drop(['sepal_length_on_width'], axis=1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"tsgVE2Si8oFG","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615297139175,"user_tz":-300,"elapsed":429,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"dea93f74-7d0d-4030-c81c-84ea655c5f6d"},"source":["g.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
targetsepal_length_on_width
0setosa0.993548
1setosa0.998045
2setosa0.994798
3setosa0.996224
4setosa0.983500
\n","
"],"text/plain":[" target sepal_length_on_width\n","0 setosa 0.993548\n","1 setosa 0.998045\n","2 setosa 0.994798\n","3 setosa 0.996224\n","4 setosa 0.983500"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"code","metadata":{"id":"kny_HFf489cy","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615297148886,"user_tz":-300,"elapsed":628,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"59ae8694-c22e-4118-e25f-f31a2e148c4e"},"source":["h.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_width_(cm)petal_width_(cm)target
03.50setosa
13.00setosa
23.20setosa
33.10setosa
43.60setosa
\n","
"],"text/plain":[" sepal_width_(cm) petal_width_(cm) target\n","0 3.5 0 setosa\n","1 3.0 0 setosa\n","2 3.2 0 setosa\n","3 3.1 0 setosa\n","4 3.6 0 setosa"]},"metadata":{"tags":[]},"execution_count":47}]},{"cell_type":"code","metadata":{"id":"ZAKyHnni8_wx","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615297241757,"user_tz":-300,"elapsed":588,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"cc83133f-1f83-4c7a-f041-b23d01f14cf4"},"source":["d = g.merge(h, on='target')\n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
targetsepal_length_on_widthsepal_width_(cm)petal_width_(cm)
0setosa0.9935483.50
1setosa0.9935483.00
2setosa0.9935483.20
3setosa0.9935483.10
4setosa0.9935483.60
\n","
"],"text/plain":[" target sepal_length_on_width sepal_width_(cm) petal_width_(cm)\n","0 setosa 0.993548 3.5 0\n","1 setosa 0.993548 3.0 0\n","2 setosa 0.993548 3.2 0\n","3 setosa 0.993548 3.1 0\n","4 setosa 0.993548 3.6 0"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"code","metadata":{"id":"m6ec0Exh9K8V","colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"status":"ok","timestamp":1614767389654,"user_tz":-300,"elapsed":712,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"1c97b950-0ba5-4b63-f8b7-2560f8decceb"},"source":["pd.get_dummies(d.target)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
setosaversicolorvirginica
0100
1100
2100
3100
4100
............
7495001
7496001
7497001
7498001
7499001
\n","

7500 rows × 3 columns

\n","
"],"text/plain":[" setosa versicolor virginica\n","0 1 0 0\n","1 1 0 0\n","2 1 0 0\n","3 1 0 0\n","4 1 0 0\n","... ... ... ...\n","7495 0 0 1\n","7496 0 0 1\n","7497 0 0 1\n","7498 0 0 1\n","7499 0 0 1\n","\n","[7500 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"code","metadata":{"id":"Hrp_HGEb9t4d","colab":{"base_uri":"https://localhost:8080/","height":197},"executionInfo":{"status":"ok","timestamp":1615297478580,"user_tz":-300,"elapsed":440,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"b3b9983b-e598-4288-90ee-7b0d1abe5ff8"},"source":["d = pd.get_dummies(data=d, columns=['target'])\n","d.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_on_widthsepal_width_(cm)petal_width_(cm)target_setosatarget_versicolortarget_virginica
00.9935483.50100
10.9935483.00100
20.9935483.20100
30.9935483.10100
40.9935483.60100
\n","
"],"text/plain":[" sepal_length_on_width sepal_width_(cm) ... target_versicolor target_virginica\n","0 0.993548 3.5 ... 0 0\n","1 0.993548 3.0 ... 0 0\n","2 0.993548 3.2 ... 0 0\n","3 0.993548 3.1 ... 0 0\n","4 0.993548 3.6 ... 0 0\n","\n","[5 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"markdown","metadata":{"id":"Ym2h89BMguk6"},"source":["### Графическое представление"]},{"cell_type":"code","metadata":{"id":"EB8GRu9XxNaZ"},"source":["%matplotlib inline\n","import seaborn as sns\n","from matplotlib import pyplot as plt"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"hbipgoEZxNOg"},"source":["sns.set_style(\"whitegrid\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"VlMb-EWdxNMn","colab":{"base_uri":"https://localhost:8080/","height":122},"executionInfo":{"status":"ok","timestamp":1614779517504,"user_tz":-300,"elapsed":587,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"9907624b-bf04-4f40-f152-94951d92a782"},"source":["print(sns.color_palette())\n","sns.palplot(sns.color_palette())"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[(0.12156862745098039, 0.4666666666666667, 0.7058823529411765), (1.0, 0.4980392156862745, 0.054901960784313725), (0.17254901960784313, 0.6274509803921569, 0.17254901960784313), (0.8392156862745098, 0.15294117647058825, 0.1568627450980392), (0.5803921568627451, 0.403921568627451, 0.7411764705882353), (0.5490196078431373, 0.33725490196078434, 0.29411764705882354), (0.8901960784313725, 0.4666666666666667, 0.7607843137254902), (0.4980392156862745, 0.4980392156862745, 0.4980392156862745), (0.7372549019607844, 0.7411764705882353, 0.13333333333333333), (0.09019607843137255, 0.7450980392156863, 0.8117647058823529)]\n"],"name":"stdout"},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAjwAAABECAYAAACF4e8fAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAChklEQVR4nO3bMWtTYRiG4a+pqJBQEO1gF4UiIo6ZdWpH/4WLm5s4WKgOrs6CILp1chGnThkEMZuODnWR2hRsaYjRmuMfaDsIX488XNf6Lg8cONxwOHNN0xQAgGSdtgcAANQmeACAeIIHAIgneACAeIIHAIh35qTj+w8fm697h6e15dRd747Lucl22zOq2e4tldHvUdszqlj+1pTZpcXSGe20PaWK6eWrZXowa3tGNWe70zLd/9H2jGoWLiyV+XHu8xt3D8tkMml7RhW93m7pdBbLbJb5bhl3lsv34J+zr82XUb/fXzzqdmLwzJqmPNjcrbPqP/Buda/cGNxre0Y1n1delPUv623PqGLj6WH5+eRxOf9ore0pVew/e1k+vRm3PaOam3cOyvDV87ZnVLNyd61c3Jy2PaOa0eqvMhgM2p5Rxa3br0uv+7QcjB+2PaWKnd5GuT+eb3tGNW8X/mwdd/NJCwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIN9c0zbHH4XC4U0rZOr05AAD/7Eq/31886nBi8AAAJPBJCwCIJ3gAgHiCBwCIJ3gAgHiCBwCI9xdiZWLdKNW9eAAAAABJRU5ErkJggg==\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"4umRGJuKqHuO","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615297767532,"user_tz":-300,"elapsed":622,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"49a1d76f-c4ba-4088-817f-e1bdce211bdc"},"source":["targets = {float(i):target for i, target in enumerate(ds.target_names)}\n","targets"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{0.0: 'setosa', 1.0: 'versicolor', 2.0: 'virginica'}"]},"metadata":{"tags":[]},"execution_count":54}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"SDeuDnTEXKQk","executionInfo":{"status":"ok","timestamp":1615297774179,"user_tz":-300,"elapsed":456,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"53cf3a73-56d9-42cc-f715-9dee1f23fd15"},"source":["df[df.target==1]"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
507.03.24.71.41
516.43.24.51.51
526.93.14.91.51
535.52.34.01.31
546.52.84.61.51
555.72.84.51.31
566.33.34.71.61
574.92.43.31.01
586.62.94.61.31
595.22.73.91.41
605.02.03.51.01
615.93.04.21.51
626.02.24.01.01
636.12.94.71.41
645.62.93.61.31
656.73.14.41.41
665.63.04.51.51
675.82.74.11.01
686.22.24.51.51
695.62.53.91.11
705.93.24.81.81
716.12.84.01.31
726.32.54.91.51
736.12.84.71.21
746.42.94.31.31
756.63.04.41.41
766.82.84.81.41
776.73.05.01.71
786.02.94.51.51
795.72.63.51.01
805.52.43.81.11
815.52.43.71.01
825.82.73.91.21
836.02.75.11.61
845.43.04.51.51
856.03.44.51.61
866.73.14.71.51
876.32.34.41.31
885.63.04.11.31
895.52.54.01.31
905.52.64.41.21
916.13.04.61.41
925.82.64.01.21
935.02.33.31.01
945.62.74.21.31
955.73.04.21.21
965.72.94.21.31
976.22.94.31.31
985.12.53.01.11
995.72.84.11.31
\n","
"],"text/plain":[" sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n","50 7.0 3.2 ... 1.4 1\n","51 6.4 3.2 ... 1.5 1\n","52 6.9 3.1 ... 1.5 1\n","53 5.5 2.3 ... 1.3 1\n","54 6.5 2.8 ... 1.5 1\n","55 5.7 2.8 ... 1.3 1\n","56 6.3 3.3 ... 1.6 1\n","57 4.9 2.4 ... 1.0 1\n","58 6.6 2.9 ... 1.3 1\n","59 5.2 2.7 ... 1.4 1\n","60 5.0 2.0 ... 1.0 1\n","61 5.9 3.0 ... 1.5 1\n","62 6.0 2.2 ... 1.0 1\n","63 6.1 2.9 ... 1.4 1\n","64 5.6 2.9 ... 1.3 1\n","65 6.7 3.1 ... 1.4 1\n","66 5.6 3.0 ... 1.5 1\n","67 5.8 2.7 ... 1.0 1\n","68 6.2 2.2 ... 1.5 1\n","69 5.6 2.5 ... 1.1 1\n","70 5.9 3.2 ... 1.8 1\n","71 6.1 2.8 ... 1.3 1\n","72 6.3 2.5 ... 1.5 1\n","73 6.1 2.8 ... 1.2 1\n","74 6.4 2.9 ... 1.3 1\n","75 6.6 3.0 ... 1.4 1\n","76 6.8 2.8 ... 1.4 1\n","77 6.7 3.0 ... 1.7 1\n","78 6.0 2.9 ... 1.5 1\n","79 5.7 2.6 ... 1.0 1\n","80 5.5 2.4 ... 1.1 1\n","81 5.5 2.4 ... 1.0 1\n","82 5.8 2.7 ... 1.2 1\n","83 6.0 2.7 ... 1.6 1\n","84 5.4 3.0 ... 1.5 1\n","85 6.0 3.4 ... 1.6 1\n","86 6.7 3.1 ... 1.5 1\n","87 6.3 2.3 ... 1.3 1\n","88 5.6 3.0 ... 1.3 1\n","89 5.5 2.5 ... 1.3 1\n","90 5.5 2.6 ... 1.2 1\n","91 6.1 3.0 ... 1.4 1\n","92 5.8 2.6 ... 1.2 1\n","93 5.0 2.3 ... 1.0 1\n","94 5.6 2.7 ... 1.3 1\n","95 5.7 3.0 ... 1.2 1\n","96 5.7 2.9 ... 1.3 1\n","97 6.2 2.9 ... 1.3 1\n","98 5.1 2.5 ... 1.1 1\n","99 5.7 2.8 ... 1.3 1\n","\n","[50 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":55}]},{"cell_type":"markdown","metadata":{"id":"Rg_HMRSVzGz-"},"source":["Строим гистограммы"]},{"cell_type":"code","metadata":{"id":"mx_PNSF8xNKe","colab":{"base_uri":"https://localhost:8080/","height":406},"executionInfo":{"status":"ok","timestamp":1615297826988,"user_tz":-300,"elapsed":1244,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"5d46e25e-fb29-467c-d88f-b3b689306815"},"source":["for target in targets:\n"," sns.distplot(df[df.target==target]['sepal_length_(cm)'],kde=True,kde_kws={\"label\":targets[target]})"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n"],"name":"stderr"},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"bNUuVXgzhvz1","colab":{"base_uri":"https://localhost:8080/","height":406},"executionInfo":{"status":"ok","timestamp":1615297848522,"user_tz":-300,"elapsed":1136,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"7ef13877-988b-4be0-a9b2-b4983762d161"},"source":["for target in targets:\n"," sns.distplot(df[df.target==target]['sepal_width_(cm)'],kde=True,kde_kws={\"label\":targets[target]})"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n"],"name":"stderr"},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAYIAAAEHCAYAAACjh0HiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeXiU5bn48e87+2SbJDPJZF8ICSbsCAgqAVkEQUWpbfHU1qPS9mertcfaWntaWz3Ho0dbW2tbrcXjXusuCgiIiLigIAHCEpZAQkL2dbLNPvP7YyQQk8xMyExmJnk+18V1hZnnfeYOTObO+yz3I7ndbjeCIAjCmCULdQCCIAhCaIlEIAiCMMaJRCAIgjDGiUQgCIIwxolEIAiCMMYpQh3AUO3btw+1Wu21jdVq9dkmHEVi3CLmkROJcUdizBCZcfuK2Wq1Mm3atAGfi7hEoFarKSws9NqmrKzMZ5twFIlxi5hHTiTGHYkxQ2TG7SvmsrKyQZ8TQ0OCIAhjnEgEgiAIY5xIBIIgCGOcSASCIAhjnEgEgiAIY5xIBIIgCGNc0BLBPffcw9y5c7nyyisHbfPFF1+wcuVKVqxYwQ033BCsUARBEAQvgraPYNWqVdxwww3cfffdAz7f0dHBfffdx9q1a0lLS6OlpSVYoQiCIAheBO2OYNasWeh0ukGff/fdd1myZAlpaWkA6PX6YIUiCIIgeBGyncWVlZU4HA6++93v0t3dzfe+9z2uueYan9dZrVavO+QALBaLzzbhKBLjjsSYXSoXpZWlAelLLalx9bgC0pcvkfhvHYkxQ2TGPZyYQ5YInE4nhw4d4tlnn8VisbB69WqmTp1Kbm6u1+tEiYnwEokxl1aWctR2NCB9XZJ2CWnZaQHpy5dI/LeOxJghMuMeTomJkCWClJQU4uPjiYqKIioqipkzZ3LkyBGfiUAQBEEIrJAtH120aBF79uzB4XBgNpspLS0lLy8vVOEIgiCMWUG7I7jzzjvZtWsXbW1tFBcXc/vtt+NwOAC4/vrrycvLY968eVx99dXIZDKuu+46CgoKghWOIAiCMIigJYJHH33UZ5s1a9awZs2aYIUgCIIg+EHsLBYEQRjjIu5gGmGU6mkDW2dg+1TFQlRCYPsUhFFIJAIhPNg6ofyDwPY5fpFIBILgBzE0JAiCMMaJRCAIgjDGiUQgCIIwxolEIAiCMMaJRCAIgjDGiUQgCIIwxolEIAiCMMaJRCAIgjDGiUQgCIIwxolEIAiCMMaJRCAIgjDGiUQgCIIwxolEIAiCMMYFLRHcc889zJ07lyuvvNJru9LSUoqKiti0aVOwQhEEQRC8CFoiWLVqFWvXrvXaxul08vvf/55LLrkkWGEIgiAIPgQtEcyaNQudTue1zQsvvMDSpUvR6/XBCkMQBEHwIWRzBA0NDWzdupXrr78+VCEIgiAIhPCEsgceeIC77roLmWxouchqtVJWVua1jcVi8dkmHEVi3IGKOUlpwV5fH4CIzlLGt9BU193vcYfSQX1dYF6rWdWMqdoUkL58Gcvvj5EWiXEPJ+aQJYKDBw9y5513AtDW1sZHH32EQqFg8eLFXq9Tq9UUFhZ6bVNWVuazTTiKxLgDFnN7FaSkDL+fcxn0GOKz+j1cWllKSmpgXstgMJAWkxaQvnwZ0++PERaJcfuK2VuSCFki2LZtW+/Xv/zlL1mwYIHPJCAIgiAEXtASwZ133smuXbtoa2ujuLiY22+/HYfDASDmBQRBEMJI0BLBo48+6nfbhx56KFhhCIIgCD6IncWCIAhjnEgEgiAIY5xIBIIgCGOcSASCIAhjnEgEgiAIY5xIBIIgCGOcSASCIAhjnEgEgiAIY5xIBIIgCGNcyGoNCUKoSHIlbd22gPTVYbHjsvcAEKtWoItSBaRfQRhJIhEIY47DLXG8sSsgfeVE99Dc1gxAcYFBJAIhIomhIUEQhDFOJAJBEIQxTiQCQRCEMU7MEQijmsVh4Y3jb7CpYhOnu06TqEmkILYIqzMBtTw61OEJQlgQiUAYtQ61H+fuD3/EqY5TFCYWMi99HvXd9WyoWodCUjMlfglJ6uxQhykIIScSgTAq7ew6xR1H/oZOE89TS55ibtrc3ufWl23lf0oeoKRtA9PjryBZkxvCSAUh9II2R3DPPfcwd+5crrzyygGff+edd7jqqqu46qqrWL16NUeOHAlWKMIYc9Bczx1V68iMTuWfy//ZJwkApEdncFHiKuKUBva1b6LT3hyiSAUhPAQtEaxatYq1a9cO+nxGRgYvvvgi7777Lrfeeiu/+c1vghWKMNrZuuHEh/Dl/9Hy5Vp+cvI1EhVR/H3u/SRFJQ14iUKmYkb8lShlava1b8bpdoxw0IIQPoKWCGbNmoVOpxv0+RkzZvQ+P23aNOrr64MVijCa1e2HD/8Hytbh7qzjV7I2OlxWHqs5jaH1lNdL1fIoJuuW0O1s42TXlyMUsCCEn7BYPvr6669TXFwc6jCESFP5Cex5BqL1UPxz1k27hs/Ucu5KmM4EtwLeWAOVn3rtwqDOJE0zgZPdJXQ72kcocEEILyGfLP788895/fXX+ec//+lXe6vVSllZmdc2FovFZ5twFIlxByrmJKUF+xDuCjWtR0g4+TaW+PG0jbuG9i47D7d/yERFEvPkRdQXjMNw8i2kl77NyaXP44hO7b3WrXVhMpl6/57KZOop53DbJxSo5g0p7q6uLurqOwBoSZTorPd+FzIcY/n9MdIiMe7hxBzSRHDkyBF+/etf849//IOEhAS/rlGr1RQWFnptU1ZW5rNNOIrEuAMWc3sVpKT417arAfZuhIRcNHN+SKpcyZ9Pv4cZBw9kLyddo/e0m/4kvPRt8g/8Hm5cDzLPDfDeqiNfG7bUkdM5jZPde5Ci5hCnNPgddkxMDKkpnv0IeoOejIRMv68dqjH9/hhhkRi3r5i9JYmQDQ3V1tZy++238/DDD5ObK5bvCX5yOaHkBZApYcaNIFdS0l3DelMZtxhmkXcmCQDoMmHZg3DqU9j/stduc6NnIJeUnOzeE+RvQBDCT9DuCO6880527dpFW1sbxcXF3H777TgcnpUZ119/PX/9619pb2/nvvvuA0Aul/Pmm28GKxxhtDj5IXSchhn/Dtp43G43f2r8mCRFNLcYZvdvP/0GKHketv4OilaCOmbAbpUyNVlRk6jo3kePYy5RirigfhuCEE6ClggeffRRr88/8MADPPDAA8F6eWE0spjg+BYwToK0aQDs6Kpgb08tv0ldjFam7H+NJMHS/4GnF8Puf8Cl/zFo99lRU6ns3s+pnv0Uxg1trkAQIllYrBoSBL8c2+QZGipaCYDT7eJPDR+TpYrn2oSJg1+XOQvyFsFnj3v2HAxCI48hWZ1Lrfmo2FcgjCkiEQiRobMOqj6HnEsg2rNJbJPpKOXWFm5LvhilJPd+ffHPoafF51xBZtRE7G4LDZaTgYpcEMKeSARCZCh7FxRqyF8KgMvtZm3zLvLUepbGTfB9fdYcSJ0GX/wd3K5Bm+lVmWjlsZw2HwpU5IIQ9kQiEMJfezU0HvYM76g8SzU/7qqg3NrCLYZZyCTJdx+SBBf9P2g+Rkx9iZdmEhnaibTaasQGM2HMEIlACH/lW0GhgZxLex96unkXqcpYlun8uBs4o2glqGJJPL7Ra7N07QVISJw2Hz7fiAUhoohEIIS3znqoL4WceaDUAlDSXcPenlpu1M/0PTdwLlUUTLyG+MptKJ22QZtp5DEY1NnUmo/idruH+x0IQtgTiUAIbyc+ALkSxs3vfeiZlt0kyLWsSpg09P6mXo/cYWZy6wGvzVI1+Vhd3bTZ64b+GoIQYUQiEMKXxQQ1ezwTvSrPRrDTNhMfdZ7kmwlTBt434EvWXKwxacxs8r6DOFmdiww59Zbj5xO5IEQUkQiE8HXqM88Kn5yzm7tebd2PDIlvJk45vz5lMlrHLyfPdAKddfDJYIVMRZI6h3rLCdxeVhkJwmggEoEQnlxOqNoJSRf07hswu+y80X6AhXHjSVHGnnfXbXlLkeFmcutBr+1StfnYXD202mrP+7UEIRKIRCCEp/pSsHb0uRvYZDpKh9PK9YnThtW1VZdFvdbIRB+JIEmdjVxSUieGh4RRTiQCITxVfgxRekj2lNV1u938s3Uv49V6ZkZlDLv7g4kTGddRQZR98JITcklJsjqHBssJXGJ4SBjFRCIQwk9HLbSehOxLQPK8Rfeb6zhiaeL6xOlI/mwg8+Fg4iRkuClq875XwKgZj91toU0MDwmjmEgEQvip/MRz3kDmRb0P/bN1L7EyNVfqLgjIS9REp9OmimeSj+EhgyoTGXIarRUBeV1BCEciEQjhxd4DNV9C+vTechLNjm7e7zjOyviJRMlVgXkdSeJQ4kQK2o+jcloHbaaQqdCrM2i0VojNZcKoJRKBEF6qd4PTBtlnJ4nXt5fhcLv4ZuLkgL7UwcSJKN0OCtq9TwYnqXMxOzvocrQG9PUFIVyIRCCED7cLTn0C8dkQ7zn71+1281b7QaZp0xin1vvoYGgqY3OwyNVMaD/qtV2yOgcgKMNDbrcbk9lOe49N3HEIIRO0E8ruuecetm/fjl6vZ/369f2ed7vdPPDAA3z00UdoNBoeeughJk70criIMPo1H4fuJph2Q+9D+811nLS2cl/akoC/nFOmoDxuvCcRuN2eCqUD0Mhj0CmTabRWkBczc9iva3e62HigjnX7atlV0UqX1XMIToxawcV5eq67MIMlRcaATIoLgj+ClghWrVrFDTfcwN133z3g8zt27KCyspItW7awf/9+fve73/Haa68FKxwhElR+7CklkXp2n8DbbQfRypT+nTlwHo4mTGBS2yGSzY00RhkHbZesHsfxrs+xOLvRyKPP67XcbjfvHazngQ1l1LSbyUjQsnJaGrkGT38nm7v5oKyBLYcbmJ4VzyPXTWF88vlvnBMEfwUtEcyaNYvTp08P+vwHH3zANddcgyRJTJs2jY6ODhobG0lOTg5WSEI466iFhkMwfjHIPW/LHped9zqOsjSugOhATRJ/zVFdAQAFpmM+EkEOx7s+p9l6ioyooiG/TnuPjXvePMB7B+u5ICWWp2+cyWUTkpHJ+v7W77h6Im+UnObhTUe56vFPefi6KVw1NW3IrycIQxG0ROBLQ0MDKSkpvX9PSUmhoaHBZyKwWq2UlZV5bWOxWHy2CUeRGHegYs4++BxaoFE7Hld9PQBbLCfpcdkpdqVSV1+PI3U8Zpn/wyWyzmZ62vvXE5KUckwmEwAm5NSp9OQ1HWJD1OD1i9xuBSqiqO0qJ9ae3vt4V1cXdfUdALQkSnTWn+p37ak2G7/bVk9Tt4ObL0xkVZEOOa0cPTrw5POUGHhseQoPftTIT17ey7HKapYXxI3p98dIi8S4hxNzyBLB+VKr1RQWFnptU1ZW5rNNOIrEuAMSs90C6zaCcRLG7LNDQNsqPiJbFc/ijMlIkkRtXAz7K7b436/UAlGJ/R7OU01Ap9P1/v24vpC59Z+jj4nCIR+8ommyKYd6SzmxcTHIvjoHISYmhtQUz9CO3qAnIyGzzzVfnGzhrs1folXJef3W2UzPSvA7/DemOLn1xT08vrOJ8dkZFGgYm++PEIjEuH3F7C1JhGzVkNFopP6r3/wA6uvrMRoHvzUXRrFDb4GlHXLPLhmttLZR0lPDNfGTgj5pejR+Akq3g7wO7wfWG9RZONw22u0NfvX7yfFmbnxmF8Y4Net+fMmQkgCARinnye9eyEW5idz12n4O1JuHdL0g+CtkiWDhwoW8/fbbuN1u9u3bR2xsrJgfGKt2/wMSckGf3/vQO+2HkCFxdfzQx+OH6mTsOBySnPEm7/sJ9KpMJGQ0W/sP/3zdh0caufm53eToo3nlh3NJi9eeV2xqhZynvjuTzMQoHvyokcZOy3n1IwjeBC0R3HnnnaxevZqKigqKi4t57bXXePnll3n55ZcBmD9/PpmZmSxZsoTf/OY3/Pa3vw1WKEI4q9nj+TP1273LN11uNxtMR5gbk02yMiboITjkSk7FZjO+44TXdkqZmnhlCs3WKq/tPitv5ocv7KHAGMPL35+DIUY9rPh0UUqe+M6F9Nhd/PRf+3C5xH4DIbD8miO47bbbuO666yguLkYm8y93PProo16flyRJfPgLsGutZ8lo4Uqo/gKAvT011No7uD35khELozwujyWnt6J19GBWRA3azqDO4njX51id3agHWEZ6sMbED17YQ44hipdumYMu6jxOURvAhJRYbr1Iz58+a+a5nZXcdEluQPoVBPDzjuDf/u3fePfdd7n88sv5/e9/z8mT3sdSBcEv3S1w8A2Y8m1Qn/3N/11TGVqZkoVx40cslBO68chwM87HPEGSOhuApgHuCmrazPz7M7vQaZU8f/NFAUsCZ1w+PpbLJiTxv5uOUNE8ePlsQRgqvxLBxRdfzB/+8Afeeust0tPTuemmm1i9ejVvvPEGdrs92DEKo1XJs+C0wuzv9z5kdTnYYjrGotjxRJ3PmcTnqSomE5tMyXiT9+GhWIUBtSyKZlvfRNBjdfCz1/bjdLl57ubZpOg0AY9RkiQe+sYUlDIZv3vnkChJIQSM38tH29raeOedd1i3bh2FhYVcffXV7Nmzh7fffpsXXnghmDEKo5HTDrufhnELPIfPtHs+WD/uqqDTZeWq+AAs3XO5oKf/Wn1VjJMMdf9J1zpdFhd0HGfPAM+dKyMqi6ruit7DahwuFy/tqqKhw8Lj109Ho5Rxuq1n+PEDsWoFuqizm+mMcRruWJzPf28oY2tZI0uKxEo7Yfj8SgQ//vGPqaioYOXKlTz55JO9q3uWL1/OqlWrghqgMEqVvQsdNbDiD30eXt9ehkERzezorOG/htMGzZX9HnbFFmCtO9Tv8Up5NPN7TiCv3kOPYvDf6LPj0jjRdQSTvQG32827+2upaO7m50sn0NptZ8ex5uHH/pXiAkOfRABw48U5vLK7mv9af5h5+QY0SnnAXk8Ym/waGvrWt77Fxo0b+eEPf9ibBGw2GwBvvvlm8KITRq8v/u5ZMpq/tPchk8PMjq4KrtBNQCGN/MrmU1Ge93ZWT6PXdumadECi2XqKDw53sLuyjQUTklh4wcgsf1bKZdx39USqWnv4xw4xXycMn18/bX/605/6Pfbtb3874MEIY0TtXqj+HGb/AM5Zhbal4zh2t5MrdaHZ0dmgScAqU5LV433DmFquJl5ppK6nmtd3tVCUGsfiwpEdorl4vIHlk1P46/ZyatvFRjNheLwODTU1NdHQ0IDFYuHw4cO9k1NdXV2YzeLNJ5ynz5/0LBmd/p0+D683lTFOnUihJjQbC92SjGptEtk+7ggAdPJsKm27MMRb+eaFGchCUDL6V8sL2Xq4kT9tPcbD100d8dcXRg+vieCTTz7hzTffpL6+ngcffLD38ejoaO68886gByeMQl2NniWjM28Czdl6P/XmJkp6argt+eKQ1uGvikpmfHctsfYeOpUD7ydwuqCmajJS8hcsnNGIOkRj9BkJUdwwJ5tnP6vgB8V5jE8O/uY7YXTymgiuvfZarr32WjZv3szSpUu9NRUE/3zxJLgcMPuHfR7eXPMJAFcE6dwBf52dJ2jgkG7gTVufHDXQ1hJHfLKGRvshYPkIRtjXjy/L45XdVTz6/lH+9p0LQxaHENm8JoJ169axcuVKampqeOaZZ/o9f9NNNwUtMGEUsnR4dhIXXQ2GvpvFNtXsoEhjJEs9tMJsgdakjscsU5HZ0zRgIjjVkUzJqXjy0ruQaTI42r6fRYnukN3F6GPU3DJvHH/+4Dilp9uZkhEfkjiEyOZ1svjMPEBPTw/d3d39/gjCkHz5NFhNcOl/9Hm4urOag+3HWPbVITEhJUlURyWRZe4/T9BjV7Otejr6GCtT89rRqzLpsLf121w20r4/L5eEKCWPbPZ+9rIgDMbrHcHq1asBT60hQRgWuxl2/g3GXQZp0/s8tblyM0DQjqMcqmptEgVdNX3mCdxu2FY9HZtTyXVTa7DIZRjUnr0OFd0lvaUnQiFWo+RHC8bzwMYydlW0Mju3/xkMguCNX8tHH374Ybq6urDb7dx4443MmTOHdevWBTs2YTTZ9xJ0N8K8/osMNlVsYmrCBaSp4kIQWH/VX80TZJ6zeuhgSy5VnUYuTjuEIdazh0Yrj8WoTedkd0lI4jzXDXOyMcSoeHyb91LagjAQvxLBp59+SkxMDNu3byc9PZ3333+fp59+OtixCaOF0wGf/hnSZ0LOvD5PnTSd5GjbUZalF4couP6a1DosMiWZ5iYAOmxadtYVkRnbwCR9RZ+2BbqpVPccxO6yhiLUXlqVnO/PG8fHx5vZc6otpLEIkcevROB0OgHYvn07y5YtIzY2NqhBCaPMwTeg/ZTnbuBrk6qbKzYjIXF52qUhCq4/tyTjtDaJrJ5G3G74qHoaErAgY//Xw2dC/FQcbhvV5v4lK0baDXOySYhSirsCYcj8SgQLFixg2bJlHDp0iLlz59La2opaPbzDNoQxwmmH7f8DxslQcEWfp9xuN5sqN3Gh8UKStfoQBTiw6qgkEuxdNDQnUN2VzJzUw8Sq+m+izIsrQi4pqOgK/fBQtFrBmnnj2H60idLT7aEOR4ggfiWCu+66i3/961+88cYbKJVKtFotf/vb34IdmzAa7H0R2iph4a/7lJMAON5+nJOmkyzLWRaa2Lw4M09AI6REtfQbEjpDJVeTqZ1IRU/oEwHA9+Zmo9Mq+fMH5aEORYggfpehPnnyJDU1Nb3DRADXXHON12t27NjBAw88gMvl4pvf/CY/+MEP+jxfW1vL3XffTWdnJ06nk7vuuov58+cP8VsQwpbdAjsegYxZUNB/Q+Kmik3IJTmLsxeDNbyWIzeo4+lGzUzpGKpMe78hoXPlRl/Ih03/R6ulCRi5MxQGEqtRcvMlufxx6zEO1ZqYmKbzfZEw5vl1R/Dzn/+chx9+mD179nDgwAEOHDjAwYMHvV7jdDq5//77Wbt2LRs2bGD9+vWUl/f9LeWJJ57giiuu4O233+aPf/wj99133/l/J0L42b3WU2p64W/6zQ2cGRaanTIbfZgNCwE0mBP4wllIsbKURE2X17bjomcAsL9l10iE5tO/X5JDrFrBX7aJuwLBP37dERw8eJCNGzcOafdkaWkp2dnZZGZmArBixQo++OADxo8/u6NUkiS6ujw/ZJ2dnb0lroVRoLsFPnoY8hbBuP53eYdbD1PdWc2ayWtCEJx3bjd8WjuZFNpY6NpHtMNMt0I7aPskdQ7R8gT2t+xifvzInbM8GJ1Wyb9fksPj28o5Wt/JhBSxuEPwzq9EkJ+fT1NT05A+qBsaGkhJSen9u9FopLS0tE+b2267jVtuuYUXX3wRs9k8YBmLr7NarZSVlXltY7FYfLYJR5EY92AxG/f8ngRbFyfzb8E2wPMvVb2EXJKTacmkrKyMJKUFe32919fqksdi6ujwOzZttA3zQO3deK2eW9GZSV23HpNeDt2Q3F7D4ej0sw2sVkwmz3JRtcyOVtXI+NhCSlu+YImhDtl5nKUgOdWcrO0fU0uiRGf9KWBo749Lk5ysVUg88PYe7pkfulPMIvE9DZEZ93Bi9isRtLW1sWLFCqZMmYJSeXYM9MknnzyvFz1jw4YNXHvttdx8883s3buXX/ziF6xfvx6ZbPAfJLVaTWGh93r1ZWVlPtuEo0iMe8CYG4/Aibdg5k3kzVnR7xq3282Xh77kkvRLmDV5lufB9io45xeHgbhjotHFDWHTmUqFaqD2Emi1A/+G73DJ2FMxHb3GREyaE1u5gjyniQrt2TtZtVqN7qszieVK2NlUgtsdTZe9ky11m9Eph/7BuyS3mFRX/zpBeoOejATPXfVQ3x831iv4+44T3KvPDFll0kh8T0Nkxu0rZm9Jwq9EcPvttw85KKPRSP05v+E1NDRgNPb9AXn99ddZu3YtANOnT8dqtdLW1oZeH35jxoKf3G547+ee8wYW3DNgk/1N+6nrruP26UN/XwXboZYcOu1RXJ35KZJMxmmtweeJZQB6lefDutlafV6JIBjWzMvluc8q+cu24/xp9XTfFwhjll/3sLNnzyY9PR2Hw8Hs2bOZPHkyRUVFXq+ZPHkylZWVVFdXY7PZ2LBhAwsXLuzTJjU1lZ07dwJw4sQJrFYriYmiTkpEK30VKnbA4nsh2jBgk02Vm1DJVFyWedkIB+edwyVjb2M+6TFNZMR6zh2ujkrCYOtA6/B+oL1aHoVBY6TZGtoCdOcyxKi5YU4W73x1prIgDMavO4JXX32VV155BZPJxNatW2loaOC3v/0tzz333OAdKxTce++9rFmzBqfTyTe+8Q3y8/N57LHHmDRpEosWLeKXv/wlv/71r3n22WeRJImHHnoopIeSCMNkboPNv/KUkrjw5gGbOF1OtlRuYV7GPGJU4XWQyqGWHHocGi43ftn7WG/dIXMTx2IzvV6fEZ3D/pbdOFw2FDKV17b+cjhdnG7rAcCmiOn92l9XTknluZ2neGTTEX61wjNsoJCBwxWQ8HrFqhXoogLzPQsjz69E8NJLL/Haa6/xrW99C4CcnBxaW1t9Xjd//vx++wLuuOOO3q/Hjx/Pv/71r6HEK4SzLb/2JIPvvtVv89gZJY0lNJmbWJYbXpvI7C45JV/dDaTFtPQ+Xq9JxCbJyezxIxHE5LKv5QtabKcxasYFJC6z3cXeE56ftbr6OlJT3EPuY2Z2ApsO1VOUpiMxWsX0rHj2VgV253FxgUEkggjm19CQSqVCpTr7n+xwOIIWkBChjm327CK+5CeQOmXQZpsqNqFVaCkOoyJzAIdbsjE7NMwy9q3p75Jk1GoNfSqRDsYYlY5cUobV8BBAcX4SMkli+1Hf34MwNvmVCGbNmsWTTz6JxWLh008/5Y477ug33i+MYT2t8M7tYJw06AQxgMPlYGvVVuZnzCdqkPOAQ8Hhkg14N3BGVVQyyTYTGqf3CqNySU6iKp0WW3WwQj0vcVolM3MSKalqo63HFupwhDDkd62hxMRECgoKeOWVV5g/fz4//elPgx2bECk23OlJBtc+CYrBixHuqt9Fq6U17GoLHWvLxOzQMNN4bMDnq7VJAKE/f0wAACAASURBVGT0NPnsy6DKosdposdhCmiMwzW/IAlJkvjomO/vQRh7/JojkMlkLF68mMWLF4tVPUIfcVXvw6G3PGUkUiZ7bbu5cjPRymguzQijktNu2N80DoOmnbTo5gHb1GsSsUtyssxNlMdmeO3PoM6CTmi2VZGl8P7vMZJ0WiUXZiewp7KNps7Qnp0ghB+vdwRut5vHH3+ciy66iGXLlrFs2TLmzJnDX/7yl5GKTwhnrRWkfPmQp6jcJd7vEO1OO1tPbeWyzMtQy8OnhHl1ZxJt1jimJJ0ctLCcUyanVqv3a54gSq5DK48Nu3kC8NwVALy2J7yGroTQ85oInn32WUpKSnj99dfZtWsXu3bt4rXXXmPv3r08++yzIxSiEJYcVnjt3wEZfONpkHu/udxZt5MOW0fYDQuVNucRpbCQH1/jtV21Nplkaztqp/cxdkmSMKiyaLGdxuV2em070hKiVMzIjmfTwXo6zPZQhyOEEa+JYN26dfzhD3/oLRwHkJmZySOPPMLbb78d9OCEMLbl11C3j9qLfgMJvg9u31y5mVhVLBenXTwCwfmn1RJDVaeRSYYK5DLvC+uro5KQgAyz7zF2vToLp9tOu9177aRQmF+QjNPlZsdxMVcgnOU1ETgcjgHnBBITE8US0rHs0Nuw6ymYextdfiwDtTqtbKvaxqKsRSjloa3Xf67SpjzkkpOJ+kqfbWs1ehySjEw/Joz1qgwkJJqt4TcEkxitYlGhkV0VrXRaxF2B4OE1EZxbYG4ozwmjWOtJz1LR9Jmw6Ld+XfJpzad02bvCaljI6lRwrC2DgoTTaBW+l1Q6ZXJqNXq/6g4pZWp0ypSwnCcAWD0rE6fLzcfHB54cF8YerwO7R44cYcaMGf0ed7vd2GxiPfKYc2ZeQJLgm8+Awr+dpJsqNxGvjmd26uzgxjcEx9oycbgVTBzkCMqBVEclM7flMCqHBdB4bWtQZ1LetQuby4xKNvhZBqGQFq9lWmY8X1S0UFyQRIza74MKhVHK6zsg0upxC0G25ddQtx9WvwzxWX5dYnaY2V69nRXjVqCUhcddpNvt5nBLNgZtO8lR/q/3r9YmcQlu0kxVnIzpXzL6XAZVFuXsosVaTaq2YLghB9yCCcnsq27nk+PNLJvkvfy3MPoN/QQNYWw6vM4zLzDnx3DBcr8v+/j0x5gd5rAaFipvdtFi0VGUeGpI19Vq9TiRkd5e6bOtTpmMUlLTbAvP4aGkWDVTMnR8frKFLquY7xvrRCIQfGutgHW3Q/qFsPh3Q7p0U+Um9Bo9M40zgxLa+dhyxIlCcpCfcHpI1zlkCmq1iWS0+x5OkiQZenUmzdZq3O6hF4obCZddkIzd6WKH2G085olEIHjnsMHrX5WUvu7//J4XAOix9/Dx6Y9Zkr0EuUwepACHxuqQs+OEg7z4WtTyof8mXK1NJrmzDrXT+/kE4Bkesrq66XL4rtQbCsmxGqZnxfP5yRaxr2CME4lA8O6D+6C2BFb+BRJyhnTpR6c/wuK0hFXJ6f2NRsx2KNIPbVjojOqoJGS4yO70fb1e/dWpZWE6PASw8AIjLreb7cdEZdKxTCQCYXBH34Odf4HZP4Ciq4d8+aaKTSRHJTM9OXyOSdxVm0ZmvERK1Pn9ll6rNeCUZIzrOOmzrVYeS7Q8IWyXkYJnX8HMnER2V4jKpGOZSATCwDrr4e1bIWUKLPmvIV/eZevik5pPuDz7cmRSeLzNmnu0nOqIZ2GBYtC6Qr7YZQoaYtPJM/lOBOApQtdmq8XpDt8J2csmJCNJ8OERcVcwVgX1J3THjh0sXbqUJUuW8NRTTw3YZuPGjSxfvpwVK1bws5/9LJjhCP5yuz2bxuxmz7yA0vua+YF8WP0hNpctrIaFShpSkHAzP2948xU18Tlkdlej9FF3CDyJwIWTVpv3WkahpNMquSjXc15Bc5eoTDoWBS0ROJ1O7r//ftauXcuGDRtYv3495eXlfdpUVlby1FNP8fLLL7NhwwZ+9atfBSscYShKnofjW2DxfWDIP68uNlVuIi06jSmGwU8rG0luN5TUp5KX0IohZnhv+9PxOcjdLnL8mCdIVKUhQ05LGJabOFdxQRJymcQHZQ2hDkUIgaAlgtLSUrKzs8nMzESlUrFixQo++OCDPm1effVVvvOd76DT6QDQ6/XBCkfwV1ul5wD6nHmeuYHzYLKa+Kz2M5bmLEU63zGYAKvqiKPFHMWMlOEXgquLy8KJf/MEcklJgiotrCeMAWI1Si7OM1B62kR9h+8VUcLoErS95Q0NDaSknN2xaDQaKS0t7dOmsrISgNWrV+NyubjtttsoLvZexMxqtfrc8WyxWCJyV3TI43a7yPrwx2hcLk5OuhPH0aMDNkvQgOKr5ZPxkoPm8r19nt/Y8BEOl4OLFDk0l+/FplHR4/Y+5KCQwCaP9drGhQpTR4ff34422ob5q/afV+WikJxka06AexZms9nvfvpxQpUmhZzWo1TbbZhMnt3JthhF79fninYl0eKoprG9FrUUPWCXXV1d1NX3/94uMKioq68DwGG39349HOf2ea6COBc75RIb9p5i+YS4IfXZkijRWd//Dink7+nzFIlxDyfmkBYZcTqdnDp1ihdeeIH6+npuuOEG3n33XeLiBn8TqtVqCgsLvfZbVlbms004Cnnce56Dpr1w9ePkz/ByJnV7FZR/DkBdfT2GlL4lCj6t20qmSsfF9nak9n3U6lLYW7HF+2sn5HjuRryYql+Fzst7ox+VClVcHE6XxOH2TCYmNZGcEAUSaLXnX/9HrVZzQn8Bl9Vsp8lt772jValUvV+fS2YvoKqlBJuqneSotAH7jImJITWlf5LQRkWRmpIKQF19Xe/Xw3Fun183r1vBB2WNuNTxpCf4/2+kN+jJSMjs93jI39PnKRLj9hWztyQRtKEho9FIff3Z2/CGhgaMRmO/NgsXLkSpVJKZmUlOTk7vXYIwwrqa4P17IftSmP7d8+6m1dHDF91VLIubEDbDQkdb9fTYVQEZFjrjuC4fOS50dSU+28Yo9KhlUTSH2aH2A7kkz4BWKef9svA7S0EInqAlgsmTJ1NZWUl1dTU2m40NGzawcGHf3zIXL17Mrl27AGhtbaWysrLPITjCCHr/N2Drhisf5bzXVgJbO8px4mapbkIAgxuekvoUopQ2JiS2BKzPU7HZWGUq4mt3+2wrSRJ6VSYt1ircbu8H4ISaRilnwYQkjjV0cbKpK9ThCCMkaIlAoVBw7733smbNGpYvX84VV1xBfn4+jz32WO+k8bx584iPj2f58uXceOON/OIXvyAhISFYIQmDqdgB+1+GS+6ApOF9gG8yHSFXlUiB2hCg4IbH5pRxuDmJKUmNyGWBq/njlCk4ocsjocZ3IgDPMlK724rJHv5r9eeM06PTKtl8qD5s6yQJgRXUOYL58+czf/78Po/dcccdvV9LksQ999zDPffcE8wwBG8cVlh/p2eMvviuYXVVb+/ky57T/Cjp4rAZFjrSYsDukjPVGPhlkcd0BRRVriPR0kKrxvuKN4M6GwmJRmsl8arwLvuslMtYdEEyb+6t4XBdBxPT+s97CKNLeGz5FEJn1z+g5Thc8Qgoh3eAyibTUdzA8jAaFtrfaCRGaWVcfFvA+z4W79ljUWA67rOtSqYhQZVGo9X/g3BCaXpWAkkxarYcasDpEncFo51IBGNZTyvseBjyFkHB5cPubqPpCJO1KWSpw2N4z+aQKGs2MDm5EVkQblCaNElYoo0UtB/zq32SOocuRws9Dv+XwIaKXCZx+UQjTV1W9lYFPokK4UUkgrHso4fB2gmX//ewuzppbaHM0sgK3QUBCCwwyuqjPMNCyUEal5ck2tNnMd5Ujszt9Nk8WZ0LEDF3BUWpcWQmaNla1oDdGd6T3MLwiEQwVrWcgN3/8CwVNRYNu7sN7UeQIbE0LnyGhUpPxxKjspIbhGGhM9rSZqN1Wsjs8n3ITbQinmh5Ak0RkggkSWLppBQ6LA52ngjciish/IhEMFa9fy8oNHDZfw67K7fbzUbTES6KzsKgHHjn7EizOWWU1UcxOSk4w0JnmNIuxIXk9/BQsiaXVlstdldkFHcbZ4ihwBjDR8eaMNt83/UIkSmkO4uFEdDTBrbOvo/V7oMj62HubeC0enYKD4W9by2aA+Z6TttN/DBpzjCDDZyyZgN2p4ypycEtouZQx3E6JoOC9mMMXJCjr2R1LhXdJTRbq0jVnl9Bv5F2eVEKf/mwnB3Hm1g6MbxXPAnnRySC0c7WCeV9i/2x86+gigFdZv/n/JExq89fN5jKUElyFseNH0aggbW/0UisxkFufHvQX+to/AQWnf4Aja0LXzfZ8UojKpmWRmtFxCSCtHgtUzN0fHaimbnj9MRplaEOSQgwMTQ01jQf9ywXHb8EFOphd+d0u9hkOsb82HHEyIffXyBYHXLKWgxMSe8K6rDQGYcTCpHhJrvxsM+2kiQjSZ1Dk/UULj8mmMPFkqIUXC74QBxeMyqJRDCWuN1wdCNodJB9cUC63GtvoNXZE16rhVoMOFxypqSPTImEmuh0TMpYchsP+NU+WZ2Lw22lzTb8SqIjJTFaxazcRPacaqWpMzLmNwT/iUQwljQdgbYKz92APDC399tslcTK1MyLyQ1If4Gwv9FInMpKjmFk6uq7JRllCYVkN5Uhd/k+klKvykSGPGKWkZ5x2YQkFDIZ7x8WBelGG5EIxgq323MYvTYBsgIzqWtx2fnUWs2SuHxUsvCYbrI45Bxp0TM5uWFEhoXOKEsoROWwMK7D94e7QqbEoM6iwXIiomr5xGqUXJpv4GBtB1Ut3aEORwggkQjGisbDYKqC/KUQoA/t7Z0nMeNgeTgNCzV/NSwUrE1kgziuy8chU1LY5nueAMCoycPi6sJkj6yjIeflG4hVK9hwoC6ikpjgnUgEY4Hb7TmDWJvYb8XPcGw0HUEvaZkZnRGwPoertOmrYSFd8FcLncsuV1FtKKCorczz7+1DsjoXCRkN1hMjEF3gqBVylhQZqW4zc6Cm/2lsQmQSiWAsaCmH9lOQtxBk8oB0aXJa+LirggXqbORSeLyNzgwLTRnhYaEzKpIno7e2YjT7/i1fKVOjV2VSH2HDQwAzshNIidOw+VA9DlF6YlQIj59gIbjKt4I6FjIvCliXW0zHcLhdXKbOCVifw3V2WCg0wy2VxkkAnrsCP6Ro8jA7O+hwNAUzrICTSRLLJ6fS1mNn50lRemI0EIlgtGs4BM1HIXdBwFYKAbzTfpg8tZ58eXhUGoUzq4UsZOtCM2TRrYnndHQ6RX7OEyRrcpGQaLBE1vAQwPjkGCYYY/nwaCPdVt8rpYTwJhLBaLd7reecgexLAtZlZXcd+8y1XB1fFDYH0Fgcco626pkSpJLT54pWQobaQobaQrTc2ft1jKuD00kFZHdWUUBT7+OD/RmnlUjVpNJsPU6CMnI2l52xbFIKNodLbDIbBYKaCHbs2MHSpUtZsmQJTz311KDtNm/ezIQJEzhwwL8NOYKfGo94hoVy5oFSE7Bu36ndgQyJK3WFAetzuA41J+EI0klkXydzObDWHcJadwhnT/vZr+vLOIQaCTfZJ7b1Pu7tT5ZdQ4ejg+pO/4rWhRNjnIaZOYnsqmihqqUn1OEIwxC0ROB0Orn//vtZu3YtGzZsYP369ZSXl/dr19XVxfPPP8/UqVODFcrY9emfQKGF3OKAdelyu1lf+wlzY7JIVsYErN/hKm00Eq+2kBUX2pUsLao4WlSxTOiq9qt9tkyPDImdjTuDHFlwLC40opTL+Nv2yBveEs4KWiIoLS0lOzubzMxMVCoVK1as6D20/lyPPfYY3//+91Grw6NOzajRdgpKX4XJ3/AUmAuQ3d3V1FmaWRk/MWB9DpfZruBoCFcL9SFJHI3NJLOniSiH753NKklBupTAzqadEVV76IwYtYIFBUl8Ut4sziyIYEHbDtrQ0EBKytmStUajkdLS0j5tDh06RH19PQsWLODpp5/2q1+r1UpZmfdVGRaLxWebcBTIuI17fk+CJNGWczXW6pLz6sOROh7z1z5ZX+w4TrRCS0b8NI7JlDhTo+mUef99woUKU4f34xm10TbMPtrYbLYB+yltzsLplpEXfbLP84P26Qaz2ez1tbxxOJ291zudjrNfq+WYzWb2K5O4mMNkt1VQEpPjs7904qh2V7L39HbSlH0PCbrAoKKu3lOTyGG39349HOf2GQjjYt0kxyj51eslPH5lOvJz3jPiZ3HkDCfmkNUFcLlcPPTQQzz44INDuk6tVlNY6H1suqyszGebcBSwuDsb4PV3Yer1JGYXgr32vLqpjYthf8WW3r/bXA4+bT/IZN14jrTsAsDU0YEuLs5rP1P1q3y2QaVC5aONSqUasJ/jFdnEq80UpjqRpLhzLxi4Twm0Wq33eLxQyOW918vlinO+9jzeqdHQ2hLDJEsDZUm+/z9z3Sr2uxqoU5RyYcqiPs9po6JITUkFoK6+rvfr4Ti3z0C5IzWV/3z7ILvbtdx0ydm6U2P+Z3EE+YrZW5II2tCQ0Wikvv5scaqGhgaMRmPv37u7uzl27Bjf+973WLhwIfv27ePWW28VE8aB8PnfwGWHS/8joN2WWRqxu51MiQ+fOvo9dgXHWvVMNTYQJguYeoeHsnsa/RoeUkhyZhpmcrTz04g5uezrigsMFBck8eiWYzR2jkyxPyFwgpYIJk+eTGVlJdXV1dhsNjZs2MDChQt7n4+NjeWLL75g27ZtbNu2jWnTpvHEE08wefLkYIU0NpjbYffTUHQN6PMC2vU+cx2Jci2ZWqPvxiPkUHMSTrcseAfUn6eyuCxkuJnQ6d+k8aXJl2JzmTnWGZmTxpIkcd/VE7E6XDy08UiowxGGKGiJQKFQcO+997JmzRqWL1/OFVdcQX5+Po899tiAk8ZCgOz6h+dUsnl3BrTbFkc3p2xtTItKD5u9A+BZLZSgMZMR631+YaQ1q+NpUMczsaPSr/aF8YXEK43sN20ObmBBlGuI5gfF43hzbw1fiB3HESWocwTz589n/vz5fR674447Bmz7wgsvBDOUscHW7RkWyr8cUgJ7Z1XSU4sMiWnawI4tD4dnWCiR4syq8BkWOkdZXDYLmvYTb+ukXRXrta1MkjFFdzk7ml+gzVZHgip8/p2H4seXjeetvTXcu+4Q639yaajDEfwkdhaPJnueBXMrzLsroN063C72m2uZoEkKm+MoAQ42JeNyB/+A+vN1ODYLN1DUUeVX+ym6xUjIKDVt8d04TGlVcu69qoijDZ08v/NUqMMR/CQSwWjhsMJnj0P2pZAVuOJyAEctjfS47EyPSg9ov8O1v9FIoqaH9NjOUIcyoC5lFFVRyRR1VPpVmjpWaWBc9IWUmrZG5J6CMy4vMrJgQhJ/fP8YTd2iDlEkEIlgtNj/MnTWQfHPAt71np4adHINearEgPd9vjqsKo63JjI9pT4sh4XOOByXTaK9izSLf2PmU+Mvp8vRyomu3UGOLHgkSeK/Vk7C6XLz+M6miCuzPRaJRDAaOB3wyR8hbTqMuyygXbc6eqi0tTFDG16TxPsaUnAjMcMY3ufnHo3NxCYpmGI66Vf78TGziVXo+bLt3SBHFlyZiVH8fOkEdteYeWtvTajDEXwQiWA0OPQWtFV65gYC/GFd0lODhMS0qLSA9jtcJQ0pZMSaSI4O72JnNpmSI3GZXNBRjcpl99leLimYkXAllT37aLJWBj/AILrx4hwKk9Tcv/4wTZ2RuT9irBCJINK5XPDxHyCpECYsD2jXdpeD/eY6CtQGYsNokrihO5qazjhmpIT33cAZpbpxqNwOJnT4t6dgWvwyFJKK3a3vBDmy4JLLJH56cRI9Vif3rjsohojCmEgEke7Ye9BU5tk34KPmz1B93FJKt8vGhWE2SVxSn4JMcjFtBEpOB0KtRk+zKs7v4aEoeRyTdAs52LGNDltbkKMLrqx4FT9dks97B+t5o0QMEYWrkNUaEgLA7YaP/hcScmDiqgB37eatuk/Ry6PIU+sD2vdwuNyeRFCQ2EqsyhbqcPwjSRzQ5XJZ034M1naa1fE+L5mVsJJ97Zt4//Q6ximu8fuljPEunJL3gnp2yYkhwXsZCLlbS0N7YH6x+GFxHtuPNvHbdQeZlZNAtj46IP0KgSMSQSQ7sh7q9sPKv4E8sP+V+811HOuq5oq4CWE1SVzZHk+7VcvyvP5nW4SzA7pcLm0+yIy2crakzPTZ3qDOYlz0TDaeepXv51yOWh7l1+s4JTPvV+zw2iYrMYqqVu9zK0tyi4HAfGDLZRJ//PY0lv1pB//xyj5e/eFcFHIxGBFOxP9GpHK54MP/Af14mPLtgHf/UsteouUapobRTmKAPfWpqOQOJiZF1oHvFrmasrgsijoqUTv9u5OZZ/g3Ou0m9rSvD3J0wZcer+WBaydTUtXOn7dFVhIfC0QiiFSH34LGw7DgnoDfDdTbO3m/4xhLk2ehkoXPTWO3zc2+RiNTkxtQyV2hDmfISuLzUbmdTDZV+NU+TTuB6Ya5fNHyJlZneK+O8sfVU9P4xowMHt92nO1Hw6tI4FgnEkEkcjrgwwc9K4UCPDcA8M+WvbiBlamBO/A+ED48bsfmVDAnLTInHRs1CZzWGpjeXu7XTmOAb+bdgsXVyZdtkb2C6Iz/vmYSF6TEcce/9lHtY3hKGDkiEUSiA69By3G47J6ArxQyOcy80rafZboJpGjCZycxwMbDNlKiO8mMC69Ko0NREp9Pgr2L8V3+HRY0XlfI+OjZ7Gp9E4srPEtpDIVWJefJG2bgdru59aU9WOyRW0pjNBGJIIwkaID2Ku9/mo/Dtv+CpAsgdarv9vahHRLyz9Z99Ljs3GKYFZxv8jyd7oilvMnFnPSasC4p4cvR2AzaldFc1Frm913BZck3YXNZ2GN5I8jRjYxsfTR/Wj2NgzUd3P1GqdhfEAbCZwBYQOG0QPnn3huVb4WOGpjzIzjxoe9OM/z/QO922nixtYQFsXkUaJI4vwMug+OL2nRUcsK+pIQvbknG7oQJLGksIcPczOmoJJ/XGNRZzEpcyRetb1JjPkK69oIRiDS4Fl5g5OdLJ/DI5qNkJkRx19IJoQ5pTBN3BJHE0gHH3wfjJDAUBLz719tK6XBaWRNmdwNWh5y9DSkUj1eiVUZ+NcsDulx65GrPXYGfLtFfj1bSsaXhiYiuTHquHy3IY/WsTP7yYTn/2uVfqW4hOEQiiCRHN3rOIi68OuBddzmtPN28mznRWUwNs7pC+xqNWJ0KVhQpQx1KQDhkCkri88nrriPJ0u7XNWp5FLO036LeUs7nLa8HOcKRIUkS/3XNJIoLkvjPtw+y7Uhk7BQfjYKaCHbs2MHSpUtZsmQJTz31VL/nn3nmGZYvX85VV13FjTfeSE1NZK4GGREdNVD9BeTMg5jkgHf/XMse2pxm7jCG16lSLjfsqMoiNaaTohR5qMMJmJKE8VhlCi5uOeT3NbnK2RTGFrOj+UVO9xwOYnQjRymX8bfvzKAoNY7/92IJO45F1v6Q0SJoicDpdHL//fezdu1aNmzYwPr16ykv77uRpLCwkDfeeIN3332XpUuX8sgjjwQrnMjmdsOht0GphfylAe++2dHNcy17uDyugEnalID3PxxHW/Q09sQwP/NUWO1wHi6LXM2XCROY0HWaZIt/9YQkSWJZym3olMmsq30YszPyVxEBxKgVvHDLbPKSYvj+81/yWXlzqEMac4KWCEpLS8nOziYzMxOVSsWKFSv6HVo/Z84ctFotANOmTaO+PrInAoOmZrdnueiE5aDyr9TAUPy96XNsLgc/SQ6vfQMAH1Vlo1NbmBohBeaG4suEAiwyJZc2H/T7Go08mpVpd9PlaOON0/+N3TU6yjvHR6l4ac1F5Oijufm53Xx8XNwZjKSgrRpqaGggJeXsb5dGo5HS0tJB27/++usUFxf77NdqtVJW5n2SzWKx+GwTjuIlB3VfS4aSvYfkQ2/hiE6nRZ0HQ0yWcYlmOrxcU+Fo51VTKcvVeajarNRxtm2XPBZTR/81+9poG+avHnc6nQO2OZfNZvPZ5tw+z6jrjudEeyKLMg7Q3WXyqx9ffQLgBrPZe2E2bxxOZ+/1Tqfj7Ndq+ZD6NQOfxeax0HSE8tbDaJTj+7dxWNAoTQDkpIEkb2CcUseqzFt4o/op1tX9jtVZt6H4age4EiUmk8nr69piFD7bdHV1UVfv/791S6JEZ33/M4qH+rP4u/mJ/Op9Kzc9s4ufXZrMgtwYv68NpEj8DBlOzGGxfHTdunUcPHiQF1980WdbtVpNYWGh1zZlZWU+24Sj5vK9GFK+NjSz90Vw2lDNvIHU2POo+xOlJfrrfX7F5Xbz84oP0ck13J2zhHiFts/z7phodHFx/S9UqVB99bipo2PgNn2aq3y2ObfPM9ZXF6GWO5g/rhmtIs6/fnz0CYBE753o+VDI5b3Xy+WKc76WD7nfUnURs7sqSdn1F/6e+91+BwvVOM8WiDOZTOh0uq+eUVIUN59DHdv5e/mDTNEtQSFTsVI/95w2A1OpVD7bxMTEkJrif9E5vUFPRkJmv8fP52fx7cIJfP/5L/nfHY2o4wzcfGnukK4PhEj8DPEVs7ckEbShIaPR2Geop6GhAaPR2K/dZ599xpNPPskTTzyBSqUKVjiRqb4Uar6E8YvgfJKAD2+2HWC/uY6fpRT3SwKh1mbWUNqYzEVpNWgVo2O55EBsMiWfGCYR03CQKS2D3zEPJDNqEoWxxTRZK/m89XW6Hf6tQAp3Oq2S52+ezbKJKdy//jD/+dYBbI7Iqy0VSYKWCCZPnkxlZSXV1dXYbDY2bNjAwoUL+7Q5fPgw9957L0888QR6ffjUvA8Llg4ofQV0GZB/ecC7b7Z388fGj5kZlcHVuqKA9z9c26pykIB5maN/ffkBXS7mxDxWVG1E4cdxlufKjp7CZcj2RAAAHZtJREFUzISVWJ09fNbyCu+dehunO/L3WmiUcv76nRn8v/l5vPRFFf/2j89p7BzaLnnBf0EbGlIoFNx7772sWbMGp9PJN77xDfLz83nssceYNGkSixYt4uGHH6anp4c77rgDgNTUVJ588slghRQ53C4o/Rc4bDDtBghwBVCX281/1mzC5nLym7TFYbcap9WsYVdtGhel1RCvGR2Tod64JRmnL/oR+e/9jPm1O/ggY9GQrterM7jY8G2OdHzMOxWvoZHFkBk1iTTtBLTy2CBF3ZfD6eJ0W/8icjZFzICP+0MhgxvmZJESp+bB946w/LGP+c2VRVyYnTCsWGPVCnRRYvThXEGdI5g/fz7z58/v89iZD32AZ599NpgvH7lObPOUmJ74DYgN/HLO51q+5LPuU9ybuphx6vAqLAfwfsU4ZJKbRTmVoQ5lxHSlX8h+/RQWnf6A/fopNGt9l544l1Yey/SE5RSmxPPc4Wc43vU5x7s+J06RTIIqFZ0ymWhFPFHyeCDwK8/Mdhd7T7T2e7yuvo7UlPOrJTQ9K569Ve2oFHJ+UDyOl3dV8dN/7WNefhKLi5JRnGfBxeICg0gEXxMWk8XCOZqPwZENkDYdcgK/uau0p44/N3zKkrh8rkuYHPD+h6umM4Y99anMy6xCpx79dwPneifnKgraj7Gq4i2eKvx+v4ljfxTEFzI78Vp6HCZqLcdotZ2muucgpzg7z/JZSxRqWRxRch1aeRyxSj06ZTIaWWzY3R2ekarTcttl+Ww4UMuO402UN3WyanoGafHhNbcVqUQiCCOyrjooed6zc3jK6vP6IPCmxmbiJ9XrMCpj+W3akrD7oXe74d3jBWiVdhbn+Hd4y2jSodLxXtYVrKp4i5lNe/gy2feRloOJUugYHzMLmIXL7aTb0U6Ps51uhwmZoovGnhZabTVYXEd7r1HLojCos0lW5+JwXRyA7yiwVAoZ107PoMAYy7p9tfxtezkX5xlYXGhEpRDVcoZDJIJwYW4j7sNfgtsJM28GhTqg3ZucFm499RY2l5P/G/ctdHJNQPsPhNKmZE60J3JNwZFRUVzufHxuvIhpzftYWbmOE3HjCMQwjkySE6vUE6v0LMg498xip9tBl6MFk72RVlsNDZYT1JjLOL7nEybFLeHChKuJUxqGHUMgTUzTMc4Qw6ZDdXxS3szBWhMrp6Yz4f+3d+5hUZfp/3/NkeEww0k5eUbFA4qKmq6tKChZq6YZupma9dXst5pUmqbbtgdLa8u0017l1ppWptmqWdpaJoquieIBEURBEwSFAYbzaY7P74+pWckTIjgjfF7XNdfFfJ6H5/Oe55p57s9zuO876M7sh7REJDPqCpjr4IsZKCovwaBZ4HX1MdvbodJqZF7ONvLM5bzd8UGX3BeoNcvZntmDdtqKuzYDWVMgZHI2dbPnoH7k3CZkonmPTSpkSrxVgXT06Et/n/uJCZjFQN/xdNb24HDJNj74aRbfFbxPpdnQrDpuFXe1gocGtOfJ4aGoFHLWH8pm/Y/ZFFZIJ4sag2QInI3FBF8+DtkHqBq62J6MvgkpNVUwK/tL0uv0vN7+dwz2vNrpxxXYmepPlUlNXI8MFPLWnaikVOPHti4TCa3MZtC57+7oveUyBW3dOvFEz0X8oeu/6KsbRUrZf/jnhac4bNjickdTu7TxZH50N+4PDyKnpJp3ErL4KuUSlXW3dgy3tSMZAmdiNcPW2ZD5Hxj7JsbQpvUXuGgs5fEjy/jJaOCdDhMYrevepO03FelFbTic7U1Uxxza61pGILXb5XibSI61iWRI5rf0KD3jFA3eqgAeCI5nTugaOrj3IaFoLWsvxJNfl+UUPddDqZATFdaWhbE9GNLFn6PZJazanckPGXpqTS3XGbEpkQyBszDXwhcz4PR2GLMCBs9u0uYTK3/ikZ8+x2Aq5/1OkxiuvfNu+g2hrM6NL8/0JsTbyP2h550tx3WQydgSOoliXQiPZm3Ev855SzO+6mCmdPgrce1eos5WxSfZC/lv8ecuNzvwdFMyvl8Iz44Ko1uAFwlnCnnj+zPsydBLuZFvgrRZ7AxqS2HTdMg5CGPfbFIjUGMzs7IshS/zD9DNsx0vDXyeIIu5UWkn69yad/PNbJXzyakILDY50+65hNLaupeEfo1ZoebbgU8Sd+B1/i9jLa91nA7cOEZQc9JdO5T2HuF8r3+fA8UbyKo6woPBz+Pv1t5pmq5FG60b04Z04nJZLQlnCtlzppCD54v5bbc2DOvqWhvfroJkCO40hRmw6VEoy4WHP4K+cU3WdFLVRZbl7ybXVM5gj/aM1nbnfHE650uzG9Vev56Tmkzbr7EJ+CKjN7mV3szse5IAnSc0LCx/q6LCow3re8zkydMf8nTuF3zkMxezwnnOUO4KLRNCFhPm9Rt2FfyDtdnxjA54kv4dHnGapusR4uPO9KF2g7DnTCE/ZBTy33PF5BiqmT+qO228mvZk3t2MZAjuJGd2wtY5oPKAx3dCxyFN0myuqYyVBYkkVJ6ng9qbN8KforLkxsssJqsNi/XGJ1LMNkGN6erpv8pmw/zzdZtM6aijVMhRK26+2mgTsPVsT04WBjG2WyZ92hYB1450eaXO6+m5HlfqrI9r+U/cjAu6LmwIe5THzn7K42fXs67HTKcaA4BeuuG0d+/NjvzV7NK/R3FKCvd6z8VD6bwZy/UI8XFnxtBOXCqrZe+ZQj45lMOm5FwmD7KfOurk3/Aoqy0VyRDcCcx1kPAyHHoPQiLh95+Bd7vbbvaSqZwPiw6zvew0KrmCZwJ+ywz/SAze7Tl4E0NgsdrQV97Yc7fWZL1mnTbeNop/vl5bW4u7u31JJ1DrdlNDYLXJ2JzRm+P6YGI6XWBkxxsHlbtS5/X0XI8rdd7tpPv1YX3IOGZe3sGsjH/xca8nMDrZF0Sr8ueRDstILv2axKJ1ZJSkMzb4Wbp6Nd4Rrjlp9/MMobO/B1+fvMzm5Dw+P3yRB/oG81RUKBHtfZwt0WlIhqC5uZwC256CojN2H4ExK0B1ez/gi8ZS1hmOsa0sDRkypvhFMLvNPbRVOSeJR0OpMKr5LK0vF8p9eSD0HNGdsp0t6a7ikE8EKk8tU7M28f/S17Cux0zK3Zw7eMlkcu7xm8j9XX/L34/9mc15f2GQ74NEt30Cpdw14/l09PfgtYcjWBAbxtqD2WxIymFnaj6RHX2YOawzD/Rp+pDvro5kCJoLYyXsX2mfBXi2helboNvoRjcnhOBEzWXWG46yt/I8Cpmch336MrvtPQSpXNujUgjBsfwgvj4Xhtmq4NHepxgQ1PJST94JTrbpj1HhxrSsz3nm1Dus7/EYOdrOzpZFJ203Hu+8mn1F6zha+jXZ1Sd5IOhpFNxepNDmJECnYckDPZkX3ZV/H8vjk0M5PLMphZe9MogNdeeZkDqCvF3PA785kAxBU2OzwcnPYc8yqNJDv0dhzHLwaJw3r9FmYXdFFhtKjpNWq8dboWF2myFM9et33RlAtUlwqVJLca07Br0vhhJ3aiwqaswq6ixKBGC1gdVmQ60wo1ZYcFeY8FLX4KWqRauuRauuwXybp3isNhnpxW1Zc6qac0V96KQrY3Kv0wR6Ni4ssYSdM769eLfP0zxxZh1/SPuAPe1j2NNuFDa5wqm6VHI3YgOfItRzIP8peJdPLy6im/pefmeZi6fSdZddtBoVT9zbhZm/6cx/zxXzyaFsNqUWsjktgVE9A3h4YHuiewS06HhGkiFoKqxmOPVvOPiWfRmo/WB4ZCO0H9io5s7WFbG19BQ7yjOosBrppPbhT8GjeNCnN+5ylaOe0Qqny5SklKg4UaIipUTFxWoB/G8j2ktlxEttxkNlxtutDrlMYBM2as0Ck1VFtVlDca031WYNV26kbjpbi6dqNN7qarzdfn6pq7F5qzFa1Lgp/ue9KQQYLQrKTZ5crtRyvsyXjOI2VJndCNIJpvRKZ2BQPvK7a5/WZSn0COTtiHgmXtjOfXk/0Lv0NF93ngD8xtnS6Oo1iDmhH3CweCOHS77inz+lcG+bqQzweQCV3HWfsOVyGVFhbYkKa8veI6kkFavYcvwS35/W4+uh4sF+IUwc0I7+HXxcLmDj7SIZgtulUg+nNsPhf0L5RQgIh7i1ED7plqOHFtQVs7P4GP8pP0N6nR6VTMFoXTce9unLYM8OyJCRVyPnuEHlGPhPlykx2ez3CdRYGeBv5oGeSmqMJ2njXoN/SFvcKq+O5Fljsly18WoVMqrNGqpMHlSYPPD3v4eU7BLKTV6cK2uH0frzmm82QCgASpkFGzJsov7TqEZppoefgcigAqYMG05aZv4t9YXEzalTurOp+yOk+YUzIftr5qa/T3HVaY7q+nHJy7ln+9Vyd6ID/o8gcz9SrNvYU/gRSYYtDPGbRIRP7B1LmNNYgrQqlt7Ti0VjenAgq5h/H89jY3Iu6w/lEKTTENs7kNjegQwN9W8RMwXJEDSGmhI4twfStkDW9/aIoR1/A797A8LGNNgAWGwW0g3pJF1OYl/uPtIMaQD00gSwJGgk97iFc7FCy485KtaUKkktVVFstH/pNApBhK+Zx7vVMMDPQn8/M8Ee9mOWl709OHih0H4TZcMdaBQygU5di05dSwgGYiJ/S1tx3FFeZ1FRbvJEeHWjqNBAnUVFjUmGWqlAIbfh4w4BHiaCvaoI8Kx2PP0rpGlAs5Lm35ezPj0YeTmRmMs/8mxOIud0XUkOGMwpvz5OPWrqowhhartXuFiTxoHiz0go+hf7iz8jXDeC3rqRdPTog1zm3CWtG6FUyInuGUB0zwDKa83sPq1n9+kCvjyWy6dJOWjdlAzu4seQLn4MCfWnT4gOZQOOULsazWoI9u/fz/Lly7HZbEyePJk5c+bUKzeZTCxevJj09HR8fHxYvXo17du7lpciABX5cPk4XDoOFxIh7ygg7Anl74237wO0DbtpM8W1xZw2nOa04TTpxekc1R+lylwFQBdtT4Z7PoRfZQgFBQG8k6FEX2f/gcgRdNNZGRFkZICfmf5+Fnp4W1Dd4e+bRmlGoyyjTUglxTb7LMN+fNSeHCRQ64aHWnq2cAZmhZrdHWLRjlhAyYF3GapPYuq5TTwsV3FB24VMnzCyvLuhd2/ayLYNpaNHH6Z1fA193XmOle7kdMU+TpZ/j4fChy6eA+js0Y927j3xVYe4rGHwdlcRN7A9cQPbU2e28t+sYvac0XP4pxISztgfvDzVCsLbedMrSEvPYB09g7R0C/BCq1HdpHXn0my/WqvVyrJly/j4448JDAwkLi6OmJgYunX7X3TNL7/8Ep1Ox+7du9m5cycrV67krbfeai5JV4izgLkGLHX2mD/mWqgxQE0xVBdBdTGU5kDJT1By3n4NQKawZw4buQRb11HUBfTCKMzUmuuoNPxEhbGK4poSimsNGOpKMNSWcLnqMvnVlygyXqLOWvWzABluIhCFqR+Ud6GqrDOpVi9SAZVM0FlrZViAmb6+NUT4WujtY8ZDGl8lGoBV7cW+diNJDImic2U2EYZTdC/PYnzODgAsMgWl2iBy1QGUqX2oUOt+fmmpU2gwKtwwydXIrUYQHk2eHClQ05XfBccTGziH81XHOFt1kAvVJ0iv2AuAUqamjVsn2qo7ctbUnppqLVqVPxqFFje5x88vT9RyDTKZ8568NSoFo3sHMrq33bAWVtRxJLuEIxdKSLtUzr+P5VF9RcA7b3cV7XzcaefrTjsfd3w91Ph4qPDxUOHtrsLHQ42XmxI3pRw3lRw3pQI3pd1BU34HZtTNNrykpqbSqVMnOnSwhz0eO3Yse/bsqWcIEhISePrppwEYM2YMy5YtQwjRPBsxxedg3Vj7YG9rgHeqNgT8QiHsfggMh5BIir1DmP7DkxTmbsacs+GmTQghR5h9sJn8sZnDsZnaYKtrh4+iC8E6H4K9NQQHamjn605oGy/8ai8SYU2jBSw5SjgZIZNzQRfKBZ19L8fbWEaXigsE1+QTai6kc0U2OlMFSnGdYGxH/4ZNpsAq15AU8TK5QbFNqk8l19BTdy89dfcihKDYlEN+bRaFxmyKjBe4UJNC2k97EVzb+72dey8e67SySTXdDgE6DeMiQhgXEQKAzSa4VFZLRn4F54uquVRWw6XSWnIM1SSdN1BpbLiHvFohR620v1ZOjiCmZ9PP6mRCiGaJ9LVr1y4OHDjA8uXLAfjqq69ITU3lz3/+s6POuHHj+OijjwgKsidoHz16NJs3b8bP7/pHLVNSUnBzk2KESEhISNwKRqOR/v37X7PsrltwuN4HkZCQkJBoHM22CBEYGEhBQYHjvV6vJzAw8Ko6+fn2Y4UWi4XKykp8fV3XE1FCQkKiJdJshqBv375kZ2eTm5uLyWRi586dxMTE1KsTExPDtm3bAPjuu+8YOnRoi3PUkJCQkHB1mm2PACAxMZEVK1ZgtVp5+OGH+cMf/sDbb79Nnz59GDVqFEajkUWLFpGRkYG3tzerV692bC5LSEhISNwZmtUQSEhISEi4PtJBRQkJCYlWjmQIJCQkJFo5d93x0V9YunQp+/btw9/fnx07dlxVfvjwYebOnesIWREbG+twXnMW+fn5LF68GIPBgEwmY8qUKcycObNeHSEEy5cvJzExEY1Gw2uvvUZ4eLiTFNtpiG5X62+j0ci0adMwmUxYrVbGjBlDfHx8vTquGOKkIbq3bt3K66+/7jiFN336dCZPnuwMufX4ZS8wMDCQNWvW1Ctzxb6GG2t21X6OiYnB09MTuVyOQqFg69at9cobNYaIu5QjR46ItLQ0MXbs2GuWJyUliTlz5txhVTdGr9eLtLQ0IYQQlZWV4r777hNZWVn16uzbt0/MmjVL2Gw2ceLECREXF+cMqfVoiG5X62+bzSaqqqqEEEKYTCYRFxcnTpw4Ua/OZ599Jl566SUhhBA7duwQzzzzzB3X+WsaonvLli3ib3/7mzPk3ZC1a9eKBQsWXPN74Ip9LcSNNbtqP0dHRwuDwXDd8saMIXft0tDgwYPx9na9RNk3IiAgwGGZvby8CA0NRa+vn6lrz549TJw4EZlMRv/+/amoqKCwsNAZch00RLerIZPJ8PS0JyW3WCxYLJarjiYnJCTw0EMPAfYQJ4cOHUI4+exEQ3S7IgUFBezbt4+4uLhrlrtiX99M891KY8aQu9YQNISUlBQefPBBZs+eTVZWlrPl1CMvL4+MjAz69etX77per3eE3AAICgpyqUH3errB9frbarUyYcIEhg0bxrBhw67Z18HB9vy0SqUSrVZLaWmpM6TW42a6Ab7//nvGjx9PfHy8wynTmaxYsYJFixYhl197SHHFvr6ZZnC9fv6FWbNmMWnSJL744ouryhozhrRYQxAeHk5CQgJff/01M2bMYN68ec6W5KC6upr4+Hj++Mc/4uXl2gnnr+RGul2xvxUKBdu3bycxMZHU1FQyMzOdLalB3Ex3dHQ0CQkJfPPNNwwbNowXXnjBSUrt7N27Fz8/P/r06eNUHbdCQzS7Wj//wsaNG9m2bRsffvghGzZsIDk5+bbbbLGGwMvLyzHFHjFiBBaLhZKSEierArPZTHx8POPHj+e+++67qvzXoTkKCgquCs3hDG6m21X7G0Cn0zFkyBAOHDhQ77qrhzi5nm5fX1/UanuymcmTJ5Oenu4MeQ6OHz9OQkICMTExLFiwgKSkJJ5//vl6dVytrxui2dX6+Rd+GQ/8/f2JjY0lNTX1qvJbHUNarCEoKipyrEGmpqZis9mc/iMXQvDiiy8SGhrKE088cc06MTExfPXVVwghSElJQavVEhAQcIeV1qchul2tv0tKSqioqACgrq6OH3/8kdDQ0Hp1XDHESUN0X7nem5CQQNeuXe+oxl+zcOFC9u/fT0JCAqtWrWLo0KGsXFk/RLSr9XVDNLtaPwPU1NRQVVXl+PvgwYN07969Xp3GjCF37fHRBQsWcOTIEUpLS4mKimL+/PlYLPYY31OnTuW7775j48aNKBQKNBoNq1atcvqP/NixY2zfvp2wsDAmTJgA2D/H5cuXAbvuESNGkJiYSGxsLO7u7qxYscKZkoGG6Xa1/i4sLGTJkiVYrVaEENx///1ER0fXC3ESFxfHokWLiI2NdYQ4cTYN0f3pp5+SkJCAQqHA29ubV1991dmyr4mr9/W1cPV+NhgMjmVXq9XKuHHjiIqKYuPGjUDjxxApxISEhIREK6fFLg1JSEhISDQMyRBISEhItHIkQyAhISHRypEMgYSEhEQrRzIEEhISEq0cyRBISEhItHIkQyDRqjl8+DBPPfVUo///1KlTvPLKK9csi4mJcTiIbdiw4bbuGR8fT25ubqN1/sJzzz1Hdnb2bbcj0bKQDIGExG3Qt29f/vSnP92wTkVFhcPhpzFkZWVhtVqbJJ/31KlT+eijj267HYmWxV3rWSzReqipqeHZZ5+loKAAm83G3Llz6dixI6+99ho1NTX4+vry6quvEhAQwIwZM+jRowfJyclYrVZWrFhBREQEqampLF++HKPRiEajYcWKFVeFbrgW48ePZ8OGDWi1WoYOHcrSpUuZOHEiixcvZsKECSiVStauXcuaNWsoLS1l4cKF6PV6+vfv7wi58eabb3Lx4kVHRNGRI0dSU1NDfHw8mZmZhIeHs3Llyut6Yn/zzTeMGjXK8X7//v2sXr0aq9WKr68v69ev59133yUvL4/c3Fzy8/NZunQpKSkpHDhwgICAAD744ANUKhWDBg1iyZIlWCwWlErp5y/xM7eZI0FCotnZtWuXePHFFx3vKyoqxO9//3tHco6dO3eKJUuWCCGEmD59uqPukSNHHImLKisrhdlsFkIIcfDgQfH0008LIW6eUOell14Se/fuFWfPnhWTJk1ytB0bGyuqq6vr/f/LL78s3n33XSGEEHv37hVhYWHCYDCI3NzcegmUkpKSRGRkpMjPzxdWq1VMmTJFJCcnX1fDtGnTxJkzZ4QQQhgMBhEVFSUuXrwohBCitLRUCCHEO++8Ix555BFhMplERkaGiIiIEPv27RNCCDF37lyxe/duR3uPP/64OHXq1HXvJ9H6kB4JJFyesLAw/v73v/PGG28QHR2NTqcjMzPTEQDPZrPRtm1bR/2xY8cC9uRFVVVVVFRUUF1dzQsvvEBOTg4ymQyz2dygew8aNIjk5GRCQkKYOnUqmzdvRq/Xo9Pp8PDwqFc3OTmZ9957D4CRI0feMHFSRESEI2Z8z549uXTpEoMGDbpm3aKiIvz8/AB7zodBgwY5lol8fHwc9aKiolCpVISFhWG1WomKinL0X15enqOen5+f05MdSbgWkiGQcHm6dOnC1q1bSUxM5K233mLo0KF07979mkk5gKuWWGQyGW+//TZDhgzhH//4B3l5eTz22GMNuvfgwYP5/PPPyc/P57nnnuOHH35g165d1x20G8ov4Y3Bnn/AarVet66bmxtGo7HBbcrlclQqlaMf5HJ5vfZNJhMajaax0iVaINJmsYTLo9frcXd3Z8KECcyaNYuTJ09SUlLCiRMnAHuuhCszon377bcAHD16FK1Wi1arpbKy0hGT/ZdwyA0hODiY0tJSsrOz6dChA5GRkaxdu/aahmDw4MF88803ACQmJlJeXg6Ap6cn1dXVjfvwQNeuXbl48SIA/fv35+jRo44TRGVlZbfcXnZ29lWhiyVaN9KMQMLlyczM5PXXX0cul6NUKvnrX/+KUqnklVdeobKyEqvVysyZMx2Dm5ubGxMnTsRisThC8M6ePZslS5bw/vvvM2LEiFu6f0REBDabDbAvFa1atYqBAwdeVW/evHksXLiQsWPHMmDAAEJCQgB7gpPIyEjGjRvH8OHDGTly5C3df8SIERw+fJhhw4bh5+fHsmXLmD9/PjabDX9/fz7++OMGt1VcXIybm1u9pTQJCSkMtUSLYsaMGSxevJi+ffs6W0qTUVdXx2OPPebI93A7rFu3Dk9PTyZPntxE6iRaAtLSkISEi6PRaJg/f/5NE5A3BK1Wy0MPPdQEqiRaEtKMQEIC2LJlC5988km9a5GRkfzlL3+5YxrmzZtX73QPwPPPP8/w4cPvmAaJ1olkCCQkJCRaOdLSkISEhEQrRzIEEhISEq0cyRBISEhItHIkQyAhISHRyvn/I3ZJoakJKeoAAAAASUVORK5CYII=\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"6Li1iREOhvts","colab":{"base_uri":"https://localhost:8080/","height":408},"executionInfo":{"status":"ok","timestamp":1615297853838,"user_tz":-300,"elapsed":923,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"56d8c257-b464-465f-c365-a0dbc90b03b6"},"source":["for target in targets:\n"," sns.distplot(df[df.target==target]['petal_length_(cm)'],kde=True,kde_kws={\"label\":targets[target]})"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n"],"name":"stderr"},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"oCQEu59thvri","colab":{"base_uri":"https://localhost:8080/","height":404},"executionInfo":{"status":"ok","timestamp":1614779712345,"user_tz":-300,"elapsed":1286,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"1e523154-41f1-4e2c-ca8e-0aebf1cee232"},"source":["for target in targets:\n"," sns.distplot(df[df.target==target]['petal_width_(cm)'],kde=True,kde_kws={\"label\":targets[target]})"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n","/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n"," warnings.warn(msg, FutureWarning)\n"],"name":"stderr"},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"48op7eXwzKb4"},"source":["Строим точечные графики взаимного влияния параметров"]},{"cell_type":"code","metadata":{"id":"hKh-KV27whqi","colab":{"base_uri":"https://localhost:8080/","height":225},"executionInfo":{"status":"ok","timestamp":1614767506092,"user_tz":-300,"elapsed":1461,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"b6d7f703-3029-4c58-e075-ba67e8307bcf"},"source":["g = sns.FacetGrid(df, hue='target')\n","g.map(plt.scatter, 'sepal_length_(cm)', 'sepal_width_(cm)');\n","g.add_legend();"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"ArJjRTF6ySuO","colab":{"base_uri":"https://localhost:8080/","height":225},"executionInfo":{"status":"ok","timestamp":1614767511831,"user_tz":-300,"elapsed":1523,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"a857d46d-33d9-417c-8a78-e3b3cf5dcbd9"},"source":["g = sns.FacetGrid(df, hue='target')\n","g.map(plt.scatter, 'petal_length_(cm)', 'petal_width_(cm)');\n","g.add_legend();"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAPcAAADQCAYAAADbJffdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2dd1xUZxb3f3fGoQQQgUixREUhBQsi2SUaG9YVRTSisuobXU1ioqJBUdDEYDeJMYvmfVezblxdXaImxIZZSUDRFI0KiIklGkOkqyCiSJ153j/GGWdgyp1ymcL5fj5+hHufciiH5z6/e55zOMYYA0EQdofI0gYQBCEM5NwEYaeQcxOEnULOTRB2Cjk3QdgpNunc169ft9jc+fn5FpvbHJD9rQebdO7GxkaLzV1TU2Oxuc0B2d96sEnnJghCP+TcBGGnkHMThJ3SRsjBS0pKsHTpUpSXl4PjOEyePBmvvvqqWpuzZ8/irbfeQqdOnQAAI0aMwPz584U0i7BS0m6mITk7GaXVpfB18cXCkIWI8I9Qu1dSXQLRORFkTAY/Fz+1NoQ6gjq3WCxGQkICgoKC8PDhQ7zyyisYMGAAevToodYuNDQU27dvF9IUwspJu5mGpB+SUCutBQCUVJcg6Yck5X3VezIma9aGHLw5gjq3t7c3vL29AQCurq7w9/dHWVlZM+cmiOTsZKXzKqiV1iI5O1n5sSYUbci5myOoc6tSWFiIK1euoE+fPs3u5ebmIjIyEt7e3li2bBkCAgJ0jlVXV4crV64IZapOamtrLTa3ObBW+0urSw263rSNUF/T888/L8i4LUGLOHd1dTViY2OxfPlyuLq6qt0LCgpCZmYmXFxckJWVhXnz5iE9PV3neI6Ojhb7pl+5csWmf+DWar/vL74oqS5pft3FFwA03lNtY41fk6URXC1vaGhAbGwsxo0bh5EjRza77+rqChcXFwDA4MGD0djYiIqKCqHNIqyMhSEL4SR2UrvmJHbCwpCFGu81bUM0R9CVmzGGFStWwN/fH7NmzdLY5s6dO3j66afBcRzy8vIgk8ng4eEhpFmEgOhSvDWx9sxaHPj1gFIke6rNU6hprNHYV6mWc6SW80FQ575w4QIOHTqEwMBAjB8/HgAQFxeH4uJiAEBMTAyOHz+OlJQUiMViODk5YfPmzeA4TkizCIHQpXhrcsC1Z9Zi37V9atceNT7ClGen4J2wd9SuR/hHIMI/wmq3FdYIZ4tpliz5A7b1Xy4h7R/5xUiNe2M/Fz+kT2quo/TZ3Ue5Yqsi4kS4+H8uapzD1r//LQlFqBFmw1DFW5Nj67pOGAY5N2E2FMo23+siTvOvn7brhGHQd5EwG7oUb01EB0YbdJ0wDNpz29Dc5kBo+zWp348aHykV7qZKd87tHGV7DhycxE6oldbC18UXgzoNwqnCU2rKe1FREb4o+4K3Gq8LQ5V9W4Oc24bmNgdC2t9ULdeHk9gJSf2TEOEfwauvRCSBTCaDFFKNY5hqq7FjWSv0WE6YDU3x4bpQjR3n07dB1qDm2E3HMNVWY8eyVsi5CbPBJw5cWx9j+ppjXnOMZa2QcxNmQ5sqzqePMX3NMa85xrJWyLkJs6ErBlwTqko6n74SkQRiiLWOYaqt9han3mJHPgn7RyFEKRTotg5twXEcKusqtarlgDyyrbS6FO6O7nAUO+J+/X2t7Y1VyzUp40n9k0gttzZILTcea7Jfk2ItEUnAGEMje5K+WlXFNsb+1qCMa4IeywmLoUmxbpA1qDk2YLqK3RqUcU2QcxMWwxBlWgg13Z6UcU2QcxMWwxBlWgg13Z6UcU2QcxMWQ5NiLRFJ0IZT13lNVbFbgzKuCVLLiRZDVbF2d3QHY0xtL8yBQxuuDWpkNWpquer+2B/+Rs1ZK61tdRlcaOUmWgSFYl1SXQIGhsq6Styvv6/WhoGhRiov9Kc4eNI0R/npu6eNmlMxlmLFtnfHBsi5iRbC0LhzTdRKa5FSmGLSnK1BJVdAzk20COZSpsvry02e095VcgXk3ESLYC5l2svBy+Q57V0lV0DOTbQIhsada8JJ7ISYTjEmzdkaVHIFFq/yyRjDunXrkJWVBScnJ2zcuBFBQUFCmkVYgKZx5wq1XDWOvJ1jOzDGUFVfpTUTi38df7W86Zz2GD+uC4tX+Tx16hTy8/ORnp6OixcvIikpCQcOHBDSLMJMaHq1peqs7g7u4DgO9+vuKx1LU4pjQ9BVE0xb2iRFppfk7GQknk5EcnZyq3Byi1f5zMjIQFRUFDiOQ3BwMKqqqnD79m1lP8I6aXoYo7KuUnlP8fpK9VWX0OV2DSkB3FpK/7bYnltblc+ysjL4+j4ROHx9fVFWVtZSZhFGYsyrLSFfQ+l67dVaX4lZvMqnMVAJX+Mxl/3Gvk4ytdyuNvuNee3FxxZrOR5rDII7t74qnz4+PigtffIDKC0thY+Pj84xqYSv8ZjLfm0ld/X2M7Hcrjb7jSkBbO+lfwV9LOdT5TM8PBwHDx4EYwy5ublwc3Oj/bYNYMyrLSFfQxlaArg1vBLjtXLn5OTg8OHDOH/+PO7cuQMnJycEBARgyJAhiIyMhJubm8Z+fKp8Dh48GFlZWRgxYgScnZ2xfv16M31phJA0fc2kKCbAoJ7Yx0HkgHpZfbMDIMYKWafvnsbCLxZqVMRV7dFWArg1vRLTm2Zpzpw58Pb2xrBhw9CzZ094eXmhrq4O+fn5OHv2LE6cOIGZM2di2LBhLWUzpVkyASHs11dQQAQRZHhS3M+UQgIrv1+Jelm9yWO1BvQ6d0VFBTw9PXUOwqeNOSHnNh4h7NdWulcX2sr6GjOPMWO1BvQ+ljd12ocPH6Kx8UmOq3bt2rWoYxPWhzmLAhjTp7UcBDEU3mr5559/jq1bt8LR0VF5jeM4ZGRkCGIYYTv4uhiunBtbSECXIk6ow9u5P/vsMxw5coRWaaIZC0MWGrznNraQgKY9t72r3sbC27k7d+4MZ2dnIW0hbIS1R2fiwN3zT9yV4wA8KderSphvGKICopop1Tm3c7D8u+XKUFUAaoUKmrZXXFMo79oKG7QWJZwPvIsSXL58GYmJiejTpw8cHByU19955x3BjNMGCWrGY6r9a4/OxL6755UO3RQxJ4aU6S6xu/bMWuy7tk9j/zZcG3AchwZZg85rinEBtMqCA3zgvXKvXLkSYWFhCAwMhEhEx8BbKwd0ODYANccGnsRwqzragV+1n/prZI1o8qpc4zXVd+ba4sbJuXnS2NiIxMREIW0hbACZ/ibNaKpmqz6Km4K+uPHWDu8leNCgQdi3bx9u376NyspK5T+idWHMM1tTNVvEmefJz9fFt9WnUtIF75X76NGjAIDt27crr9GrsNZH9NOhBu+5m6rZ0YHRZtlzK8bVtOcmBd0A587MzBTSDsLK0JbV5J2x/wY0qOUicIiulaFv1W0ke3miVMzBV0vy/3fC5CLsgV8PGKyWU9w4f3ir5Xv37sW4cePQtm1bAMD9+/dx9OhRTJs2TVADNUFqufHwsd/gkrd5+4EjsUBDzZNrEmdg3Bag92RzmQ7A9r//LQnvzc/+/fuVjg0A7u7ulOvMTjE4c0nGanXHBuSfZ6wWyEKCD7ydWyaTQXWRl0qlaGho0NGDsFUMjuG+X2jYdaJF4L3nfvnll7Fo0SJMnToVgDzWfODAgYIZRlgOg2O43TsB9ws0XycsBu+VOz4+HmFhYUhJSUFKSgpeeuklxMfHC2kbYSEMzlwybKV8j62KxFl+nbAYvFdukUiEmJgYxMTwr/hA2CYR/hHArTNIvvkVSkWArwxY2GU0Ih5WAx/3lD9uu3eSO2/vyU9Es4zV8hWcE6vvuQ0U1bQp9YRh6HXuuXPnYvLkyRg4cCAkEonavYKCAqSmpqJjx46YNGmSYEYSLUzefkR8/09EqIpkRdsAxgDFu+b7BXKFHFB3cFXVvGkbHujKP04Obhh6nXvNmjXYuXMn1q9fD3d3d3h6eqKurg6FhYXo0qULpk2bhuHDh7eErURLoUn9ltY3b6dYnVVXbm2qOU/n1qXUk3Mbhl7nbt++PZYuXYqlS5eisLBQmSCxa9eudATUXjFE5VZtawbVnLKtmA+Dgnw7deqEvn374vnnn2/m2FOmTDGrYYQFMUTlVm2rrZ8B41GsuPkw29nNuro6cw1FWBpN6rfYARCpay7NFHEzqOatNce4EJit4gin4SBBYmIiTp48CS8vL+XBE1XOnj2Lt956C506yf+yjxgxAvPnzzeXSYQ+8vY/Vrg1qN9pSwCo7J+l9YDEBWBSgMnkirh7F+CruUDqa0hzdUWyTweUdGoPEWOQAfCTAQv9JyDCALW8tZfdNSeClhOaOHEipk+fjmXLlmltExoaqnbSjGghmsaDqyrb6SuBuvvN+zRUP/mYSYG7VwEAaS5PIcmrHWplciFM9vgPfYkYSCr8H3AzzCDnVC0yQBiP2R7LNZ0/efHFF+Hu7m6uKQhzokvZfmhYJtNkj3ao1ZKdpzVU07RWzLZyf/DBB0b1y83NRWRkJLy9vbFs2TIEBATo7UNVPo1HYf9z9wuh6UQ2e6xsa0+k1JzSNmLd902s7KlKS3//bfkEGm/nTk9Px6ZNm1BeXg7GGBhj4DgO2dnZAIDAwECDJw8KCkJmZiZcXFyQlZWFefPmIT1df+UIqvJpPEr7tcSDc9rixHXg2yhFiUT7r5I5q2na+ve/JeH9WP7hhx/iH//4By5cuIDs7Gzk5OQoHdtYXF1d4eLiAgAYPHgwGhsbUVFRYdKYBE90KduufgYNtfBeJZxkmvOi2avSXVVVhb179wo+z7fffosbN24Y1Ze3c3t5eaF79+5GTaKNO3fuKPfqeXl5kMlk8PDwMOscrZq8/fJY8KR28v/z9j+513uyPJmCe2cAnPz/Tn+Sq98G7rkjqh8hqbwSfiL5KyxFjjQ/Fz+9KYbTbqZh5Bcj0XtXb4z8YiTSbqYZ/GVagqqqKqSkpPBuzxiDTMsfQF2Y4tx6M7EoHpN/+ukn3L17F8OHD1fLWz5y5EitfePi4vDTTz/h3r178PLywoIFC5R1xmJiYrBnzx6kpKRALBbDyckJCQkJCAkJ0Ws0ZWLhgZbsKEX9lqHj6Lebtz8aB5z/V/PrIjEgkza/roqRWVcMzvgC6/n+v/3228jIyEC3bt3w5z//GdeuXUNVVRUaGxuxcOFCDB8+HIWFhZg9ezb69OmDX375BZ9++ikOHjyIw4cPw9PTE35+fggKCsLs2bNx69YtrFq1Cvfu3YOTkxPWrFmD+/fvY+7cuXB1dYWbmxu2bt2KZ555hreNevfcJ06cUH7s7OyM77//Xu2+LufevHmzzrGnT5+O6dOn6zOBMAYtanj7vG2AJue+8G/N4+hz7MfjGhI/rsCW48gXL16M69ev49ChQ2hsbERtbS1cXV1RUVGBKVOmKEta//HHH3j//fcRHByMvLw8pKen4/Dhw2hoaMDEiRMRFBQEAHj33XexatUqdO3aFRcvXsSqVauwe/duhIeHY8iQIRg9erTBNup17g0bNgAALly4gH79+qndu3DhgsETEi2ElnhuyaMyze0ZDyc2Yj5d2EscOWMMmzdvxrlz5yASiVBWVoa7d+8CADp06IDg4GAAQHZ2NoYNGwZHR0c4Ojpi6NChAIDq6mrk5ORg4cIn2kR9vYaDOgbCWy1fu3YtvvrqK73XCCtBi+rd8JQPHDQ0Byc2zcGNyLpiL1U7jxw5goqKCqSmpkIikSA8PFwZjv3UU0/p7c8YQ9u2bXHo0CGz2qVXUMvJycFnn32GiooK7Ny5U/lv69atkEpN/GtPCIcWNfxO77ma2/ebqfm6SPc7bMW4xmRdseU4chcXF1RXyyP2Hjx4AC8vL0gkEpw5cwZFRUUa+4SEhODEiROoq6tDdXU1Tp48CUD+1qhTp074+uuvAcid/erVq83mMRS9zt3Q0IBHjx5BKpWiurpa+c/V1RVbtmwxalKiBdCkho/bgqouWvZuYzcDobPlKzgg/z90NhC17ckYDi5oFt7yeFxjUhhH+EcgqX8S/Fz8wIHjpa5bCx4eHggJCcHYsWNx9epV/Pzzzxg3bhwOHToEf39/jX169+6N8PBwREZG4rXXXkNgYCDc3NwAyF81f/HFF4iMjERERAS+/fZbAMCYMWPwr3/9C1FRUbh165ZBNvLOW15UVISOHTsaNLhQkFqugrbDH03ZFQn8ngUGVffk0KzCngLnx3XYayqePLK7d9Y+fgthdd9/A6muroaLiwtqamowbdo0rFmzRimqmRteaZZ0sW3bNrMZQxiIrsMfqg742LGBpuuujr/rNSrBRIq9uBFpkwh1Vq5ciRs3bqCurg4TJkwQzLEBHs79t7/9DYD8fffdu3cRGRkJAEhLS4OXl5dghhE84JvW6LFjmwUjX3sRcj766KMWm0uvc//pT38CAGzcuBGpqanK6+Hh4Zg4caJwlhH6sVQxACo2YBPwDj+tqalBQcGTVysFBQWoqanR0YMQHDOkNTLrvIRVwfs9d2JiImbMmIHOnTuDMYbi4mKsWrVKSNsIfQxbqbkAX9PXUt0Gm+/RnIoN2Ay8nXvQoEFIT0/HzZs3AQD+/v5qMeaEBVArBqBBLVdV0h8r3upquQ44EdDGWZ59RaGWKxT01Nfl41pYOSd0o9e5f/zxR7z00kvNzlkr3rnpii0nWgDVggCqNFXSHyveSscWSQCO05yPHJDnSYMMmPhP+fh8lXlCEE6dOoV169ZBJpMhOjoar7/+ut4+ep373LlzeOmll9QOkKhCzm2laFLSVZHxqNCqqoyboeBAa+BgThE+PH4NxZU16NDOGfGjnkVUX9PiQ6RSKVavXo2dO3fCx8cHkyZNQnh4OHr06KGzn17njo2V/3VWHCAhbARzKdqKcahMr14O5hQhMfUSahrkT0lFlTVITL0EACY5eF5eHrp06YLOnTsDACIiIpCRkaHXuXmr5cOHD8fixYuRkpKC69evG20o0UKYS9FWjGMpZd6G+PD4NaVjK6hpkOLD49dMGresrAy+vk8O0/j4+KCsTMvpPhV4O/exY8cwdepUVFZW4oMPPsDw4cMxb94846wlhEfTwRFVRBJ5oQFdqCrjVKZXL8WVmrdB2q4LDW/nFolEaNOmDcRiMUQiEby8vChCzZzoSolkzFjN9sjygx9McYgk6v8B4/+v+sGS0NnNDpoo99NaDqLQfvsJHdpp/mOq7TpffHx8UFr65Ix7WVkZfHx89Pbj/SqsX79+CAwMxKxZsxAdHU25zsyJOZVoTemVAAAMYDIUh72nnmbJkPG1KfMEACB+1LNqe24AcJaIET/qWZPG7dWrF/Lz81FQUAAfHx+kpaXxCmMVJyUlJfGZICAgABzHISMjA99++y1u3boFmUym3OS3JHfv3kX79u1bfF7B5v7vFOBRufo1WSNQnAu89JbpY6mM6VRxFeIBC4yz0wqw5M9eH8/5tUUnD2dcKrqPh7WN6NjOGSvHvWCyWi4SidC1a1fEx8djz549iIyMxKhRo/T2433kU8Fvv/2G06dPY9euXSgvL0deXp7RRhuL3R35TGoHzSe0OCCp0kxjyWHgwBk6phVh60c+WxLej+ULFizA1atX8cwzzyA0NBTvv/8++vTpI6RtrQdthQCMUaL1FBXQmmaJsDt4O/frr7+OF154AWKx5rQ733//PQYMGGA2w1oVfGPEjR1LZcw7vefCOlJuEELD27l79eql8/6mTZuaObe+Er6MMaxbtw5ZWVlwcnLCxo0bBT28brXoixE/GidPPcyk8jhvRb6zpteeCXuikiviwZvEhXc4kwScXWM1mVUI4TBbIUBNW3d9JXxPnTqF/Px8pKen4+LFi0hKSsKBAwfMZZJtoU2JblosgEmbFw9QXMv+95M840z6pFgAoFzNOcU9gOLD7RyzlfDluOZnjfSV8M3IyEBUVBQ4jkNwcDCqqqpw+/Ztc5lkH2grFqCJpgUEFLHfuuLMFW0Iu8NsK7cxNA2r8/X1RVlZGby9vXX2a00lfJ9jUoPK6TaFT0ledr8QV22kLDGV8OWP2Zy7JTOjtqoSviYWC+AUirsOBZ1z72Qzv8St8VWYPu1KG3qdW1+9bMWRz08++YT3pAqahtWVlpbyCqtrVfSbqblAnyaaFu1TVdx1KOgUH25G+KaaNgB92pU2DCoEqAlTznOHh4djz549iIiIwMWLF+Hm5qb3kbzVMfZxMUVD1HJtv1gZq8HuF4CzojzkdoVACS1efPFFFBYafrTW4Ag1Q9BXwpcxhtWrV+P06dNwdnbG+vXr9b5yA+wwQq0FIfsF5OOeWoKROgNv/2zS0IWFhZg7d655H8tVOXnyJK5fv64scgYA8+fP19peXwlfjuPw3nvvGWICQVgvVpbQgversJUrV+LYsWPYs2cPAOD48eMoLi4WzDCCsDmsLKEFb+fOycnBBx98gLZt22L+/Pn4/PPPkZ+fL6BpBGFjWFlCC97O7eQkL7Xq7OyMsrIySCQS3LlzRzDDCMLmECihRVxcHKZOnYrff/8dgwYN4h3FyXvPPWTIEFRVVWH27NmYOHEiOI7DpEmTjDaYIOwSARJa6NOutMHbuV977TU4ODhg1KhRGDp0KOrq6uDo6GjUpARBCA/vx/IpU6YoP3ZwcICbm5vaNYIgrAu9K/edO3dQVlaG2tpaXL58WXn66+HDh1QIkCCsGL3O/d133yE1NRWlpaVqhQlcXV0RFxcnqHEEQRiPXueeMGECJkyYgOPHj/NKykYQhHXAe88dEhKC5cuXY86cOQCAGzdutN7ECgRhA/B27sTERLz88svKZApdu3bF7t27BTOMIAg5JSUlmDFjBsaMGYOIiAjs2rWLVz/ezn3v3j2MGTMGIpG8S5s2bZQfEwQhJ+1mGkZ+MRK9d/XGyC9GIu1mmsljisViJCQk4NixY9i3bx/++9//4saNG3r78fbOp556Cvfu3VOmU8rNzYWbm5vxFhOEnZF2Mw1JPyShpLoEDAwl1SVI+iHJZAf39vZWJg51dXWFv78/r0KAvINYEhIS8Oabb6KgoABTp07FvXv3kJycbLzFBGFnJGcno1Zaq3atVlqL5OxkRPhHmGWOwsJCXLlyhVfNAN7O3aNHD4wYMQLOzs5wcXHB8OHD0a1bN5MMJQh7orS61KDrhlJdXY3Y2FgsX74crq6uetvzfixfunQpbt68iTfeeAPTp09Hfn4+4uPjTTKWIOwJXxdfg64bQkNDA2JjYzFu3Dje2Y94r9zXr1/HsWPHlJ+HhYVhzJgxhltphRzMKcKHx6+huLIGHdo5I37UsyYXbyNaHwtDFiLphyS1R3MnsRMWhiw0aVzGGFasWAF/f3/MmjWLdz/eK/cLL7yA3Nxc5ecXL15Ez549DbPSCjmYU4TE1EsoqqwBA1BUWYPE1Es4mFNkadMIGyPCPwJJ/ZPg5+IHDhz8XPyQ1D/J5P32hQsXcOjQIZw5cwbjx4/H+PHjkZWVpbcf75X7l19+wdSpU9GhQwcAQHFxMbp164Zx48YBAI4cOWKk6Zblw+PX1OopA0BNgxQfHr9GqzdhMBH+EWYTzxSEhobi2rVrBvfj7dw7duwweHBboLhS8+EXbdcJwlbg7dwtWXSgJenQzhlFGhy5QztnDa0JwnZo9SFm8aOehbNEvSyxs0SM+FHPWsgigjAPgjv3qVOnMGrUKIwYMQKffvpps/upqakICwtTCgUtfRglqm9HbJjYCx0fr9RijlPuuUlUI2wZQQsBSqVSrF69Gjt37oSPjw8mTZqE8PBw9OjRQ63dmDFjsHKl5UraKISzxNRLSnFNoZqr3icIW0LQlTsvLw9dunRB586d4eDggIiICGRkZAg5pdHoUs0JwhYRdOVuWqLXx8cHeXl5zdqlp6fj3Llz6NatGxITE+Hn56dzXCFK+OpSzVXnaukSsuaG7DcMqy1dxAOL1ucGgKFDh2Ls2LFwcHDA559/jmXLluk9Jy5ECd8O7Uq0quaqc1l1rSoekP2tB0Efy5uW6C0rK2tWotfDwwMODg4AgOjoaPzyyy9CmqQVUs0Je0PQlbtXr17Iz89HQUEBfHx8kJaWho8++kitze3bt5VlezMzM9G9e3ez26EpdvzA+Vv4/rcKZRsHMYd66ZOCp2KOwyv95ELagI2Zyr5/7eUKWjgIW0BQ527Tpg1WrlyJOXPmQCqV4pVXXkFAQACSk5PRs2dPDBs2DP/5z3+QmZkJsVgMd3d3tQyr5kARO66qgi/al9usnapjA4CUMez7qQD7zhWg4fG9osoabPmhFh07FJGCTlg9gtbnFgpD9l0DNmZq3EubQsd2zvg+IdysY7YUtr5ntXX7WxK7j1ATIkac4s4JW8DunVuIGHGKOydsAbt3bk0qOF8kIg4SMad2zVHMkYJO2AR279xRfTsi5Bl3ve2a+DDEHIcpf+qMDyf1Qcd2zuAg32vH9n+axDTCJrB4EIvQvHPwktorL22IRBxEDGiQyfVFKWP48kIRQrt4qolnthzdRbQu7H7lTjlbwKtdg5QpHVsBxZYTtozdO7fUxDd9pIwTtordO7eY4/Q30gEp44StYvfOHfPnzrzaScQcJCL1PwQUW07YMnYjqGmKHz//R4VBe25VKLacsHXswrk1xY/H7c+FzITtNsWWE7aOXTyWa8qiYopjK2iQsWYrep2UkYJO2AR24dwtrWiTgk7YAnbh3C2taJOCTtgCduHcmuLHRaa9AQNAseWEbWMXgppC3NKmlhsTyNLx8RhNx/1rL1cS0wibwC6cG5A7uMLpdJXkfefgpWYOL+Y4xPy5M9ZG9dI6tgKKLSdsBbtxbgWaXospiguc/6MCe87catZHypjyujYHJwhbwy723KroKi6gL6CFb8ALQdgCdrdy6youoG/nbeohE4KwJuxu5db2mqpDO2e9h0hMPWRCENaExat81tfXY9GiRRgxYgSio6NRWFho0ny6igvoO0TC95AJQdgCgjq3osrnjh07kJaWhqNHj+LGjRtqbQ4cOIC2bdvim2++wcyZM7Fp0yaT5lQtyatIjbRhYi9E9e2ItVG9MD3smWYrtJjjMD3sGRLTCLtC0D23apVPAMoqn6olfNPWkygAAAhaSURBVDMzMzF//nwAwKhRo7B69WowxsCZ8Iis+lqsKWujepETE60CQVduTVU+y8rKmrVRVPVs06YN3NzccO/ePSHNIohWgU2q5UKU8OULlcC1LFTClz8Wr/Lp4+ODkpISAEBjYyMePHgADw8PIc0yCScnJ0ubYBJkv2Fcv369ReczJxav8hkeHo6vvvoKffv2xfHjxxEWFqZ3vx0cHCyk2QRhFwheCDArKwvr169XVvl888031ap81tXVIT4+HleuXIG7uzs+/vhjpQBHEITx2GSVT4Ig9GN3EWoEQcgh5yYIO4WcmyDsFHJugrBTyLl5UlJSghkzZmDMmDGIiIjArl27LG2SwUilUkRFReGNN96wtCkGU1VVhdjYWIwePRp/+ctfkJOTY2mTrB6bjFCzBGKxGAkJCQgKCsLDhw/xyiuvYMCAAWpx8tbO7t270b17dzx8+NDSphjMunXrMHDgQGzZsgX19fWora21tElWD63cPPH29kZQUBAAwNXVFf7+/s3i5K2Z0tJSnDx5EpMmTbK0KQbz4MEDnDt3Tmm7g4MD2rZta2GrrB9ybiMoLCzElStX0KdPH0ubwpv169cjPj4eIpHt/cgLCwvh6emJxMREREVFYcWKFXj06JGlzbJ6bO8nbWGqq6sRGxuL5cuXw9XV1dLm8OLEiRPw9PREz549LW2KUTQ2NuLy5cuIiYnBwYMH4ezsrDHxB6EOObcBNDQ0IDY2FuPGjcPIkSMtbQ5vsrOzkZmZifDwcMTFxeHMmTNYsmSJpc3ija+vL3x9fZVPSqNHj8bly5ctbJX1Q87NE8YYVqxYAX9/f8yaNcvS5hjE4sWLcerUKWRmZmLz5s0ICwszOeNNS9K+fXv4+vri5s2bAIAff/wR3bt3t7BV1g+p5Ty5cOECDh06hMDAQIwfPx4AEBcXh8GDB1vYstbBu+++iyVLlqChoQGdO3fGhg0bLG2S1UMHRwjCTqHHcoKwU8i5CcJOIecmCDuFnJsg7BRyboKwU8i5CcJOIeduIVJTU3kdNElISMD//vc/rfdnzJiBS5cumdM0VFVVYe/evcrPz549a/Cx0NjYWBQUmF4C+e2330Z+fr7J4xDk3C3GV199hdu3b1vaDI1UVVUhJSXF6P7Xr1+HVCo1S9bamJgY7Nixw+RxCIpQM5rCwkLMmTMHQUFBuHz5MgICAvD+++/jt99+w8aNG/Ho0SN4eHhgw4YNyM7Oxs8//4wlS5bAyckJ+/btw44dO3DixAnU1dWhb9++WL16tcH10b777jts3boV9fX1yqgtFxcXhIeHIyoqCidOnEBjYyP+/ve/o3v37qioqMDixYtx+/ZtBAcH44cffsCXX36Jjz76CLdu3cL48ePRv39/DBkyBI8ePUJsbCx+/fVXBAUFYdOmTVrtO3LkCIYNG6b8/NSpU/j4448hlUrh4eGBXbt2YevWrSgsLERBQQFKSkqQmJiI3NxcnD59Gt7e3ti2bRskEglCQ0ORkJCAxsZGtGlDv54mwQijKCgoYIGBgez8+fOMMcYSEhLYP//5TzZlyhRWXl7OGGMsLS2NJSQkMMYYmz59OsvLy1P2v3fvnvLjJUuWsIyMDMYYY8uWLWNff/211nkV45SXl7O//vWvrLq6mjHG2Pbt29nWrVsZY4wNHTqU7d69mzHG2J49e9jy5csZY4ytWrWKbdu2jTHGWFZWFgsMDGTl5eWsoKCARUREKOc4c+YMCwkJYSUlJUwqlbLJkyezc+fOabVp2rRp7OrVq4wxxsrLy9mgQYPYrVu31L7OLVu2sKlTp7L6+np25coV1rt3b3by5EnGGGNvvfUW++abb5TjzZw5k126dEnrfAQ/6E+jCfj5+aFfv34AgMjISGzfvh2//vqr8mCJTCZD+/btNfY9e/YsduzYgdraWlRWViIgIADh4eG857548SJu3LiBmJgYAPITa6qVWBSn1nr27IlvvvkGgDw+/pNPPgEADBo0CO7u7lrH7927t7KI43PPPYeioiKEhoZqbHvnzh14enoCAHJzcxEaGqp8RG/Xrp2y3aBBgyCRSBAYGAipVIpBgwYBAAIDA9Xqsnt6elrtFsaWIOc2gaaPqS4uLggICMC+fft09qurq8OqVavw5Zdfws/PD1u3bkVdXZ1BczPGMGDAAGzevFnjfYlEAgAQiUSQSqUGjQ3Is50oEIvFOsdwdHTkZb9iTJFIBIlEovz+NbWxvr7e5muaWQMkqJlAcXGxMlHf0aNH0adPH1RUVCivNTQ0KAvJubi4oLq6GgCUjuDh4YHq6mocP37c4LmDg4ORnZ2NP/74AwDw6NEj/P777zr7hISE4OuvvwYg36/fv3+/mW3G0L17d9y6dUtp1/nz55XKeWVlpcHj5efnIyAgwGh7CDm0cptAt27dsHfvXixfvhw9evTAjBkzMHDgQKxduxYPHjyAVCrFq6++ioCAAEyYMAHvvfeeUlCLjo7G2LFj8fTTT6NXr14Gz+3p6YkNGzYgLi4O9fX1AIBFixahW7duWvvMnz8fcXFxOHz4MIKDg9G+fXu4urrCwcEBISEhGDt2LAYOHIghQ4YYZMvgwYNx9uxZ9O/fH56enli9ejUWLFgAmUwGLy8v7Ny5k/dYd+/ehaOjo9btDGEAlt702ypNRShboK6ujjU0NDDGGMvOzmaRkZFmGbempoZFR0ezxsZGk8fauXMn279/vxmsImjlbkUUFxdj0aJFkMlkkEgkWLNmjVnGdXJywoIFC1BWVoYOHTqYNJabm5syGQZhGpSswUqZN2+emoIMAEuWLMHAgQMtZJF12kRoh5ybIOwUUssJwk4h5yYIO4WcmyDsFHJugrBT/j8EC9w5JqNZbgAAAABJRU5ErkJggg==\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"5KszRYQ0yaaV","colab":{"base_uri":"https://localhost:8080/","height":225},"executionInfo":{"status":"ok","timestamp":1614767516166,"user_tz":-300,"elapsed":1157,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"ffcba165-ccb3-4f61-e7ae-c413017c2e8c"},"source":["g = sns.FacetGrid(df, hue='target')\n","g.map(plt.scatter, 'petal_length_(cm)', 'sepal_width_(cm)');\n","g.add_legend();"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"SE3c3sS0yfgl","colab":{"base_uri":"https://localhost:8080/","height":225},"executionInfo":{"status":"ok","timestamp":1614767522475,"user_tz":-300,"elapsed":1668,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"ee44eed9-08f4-495a-e122-a0910757f984"},"source":["g = sns.FacetGrid(df, hue='target')\n","g.map(plt.scatter, 'sepal_length_(cm)', 'petal_width_(cm)');\n","g.add_legend();"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"markdown","metadata":{"id":"dnoGFA4MzW9o"},"source":["Можно все предыдущие графики вывести одной строчкой кода"]},{"cell_type":"code","metadata":{"id":"izSb9tJThvhk","colab":{"base_uri":"https://localhost:8080/","height":743},"executionInfo":{"status":"ok","timestamp":1614779829439,"user_tz":-300,"elapsed":12239,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"0a4d8076-27df-4520-bac8-004f756b4670"},"source":["sns.pairplot(df,hue='target',diag_kind=\"kde\",kind=\"scatter\",palette=\"husl\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":69},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"MOtqb-wJhvfD","colab":{"base_uri":"https://localhost:8080/","height":296},"executionInfo":{"status":"ok","timestamp":1614767558267,"user_tz":-300,"elapsed":714,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"bc730c65-b8f4-4417-d1f5-68fcc327dbcb"},"source":["sns.boxplot(x=\"target\", y=\"sepal_length_(cm)\", data=df)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":62},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"pop0xJy808kv","colab":{"base_uri":"https://localhost:8080/","height":299},"executionInfo":{"status":"ok","timestamp":1614767566285,"user_tz":-300,"elapsed":648,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"0c951aff-d432-4cab-ba5c-fd913d8256c9"},"source":["sns.boxplot(x=\"target\", y=\"sepal_width_(cm)\", data=df)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":63},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"zL6zrC0108t4","colab":{"base_uri":"https://localhost:8080/","height":296},"executionInfo":{"status":"ok","timestamp":1614767569562,"user_tz":-300,"elapsed":1021,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"41a4f4ed-c685-4b02-adec-77ede4cf6761"},"source":["sns.boxplot(x=\"target\", y=\"petal_length_(cm)\", data=df)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":64},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"gt-CS-v80841","colab":{"base_uri":"https://localhost:8080/","height":296},"executionInfo":{"status":"ok","timestamp":1614767574695,"user_tz":-300,"elapsed":672,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"17e5b682-a469-43a9-fe36-3ad50c2b4449"},"source":["sns.boxplot(x=\"target\", y=\"petal_width_(cm)\", data=df)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":65},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[]}}]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "qCUpgW4Chxlt" + }, + "source": [ + "# Игрушечные наборы данных\n", + "https://scikit-learn.org/stable/datasets/index.html" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "executionInfo": { + "elapsed": 867, + "status": "ok", + "timestamp": 1632403984813, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "6-e8Ub9ghvMA" + }, + "outputs": [], + "source": [ + "import sklearn.datasets as sets\n", + "datasets = {0:'boston', 1:'iris', 2:'diabets', 3:'digits', 4:'linnerud', 5:'wine', 6:'cancer', 7:'olivetti_faces', 8:'20_newsgroups',\n", + " 9:'20_newsgroups_vec', 10:'people_labeled_faces', 11:'pairs_labeled_faces', 12:'covertype', 13:'RCV1_multilabel',\n", + " 14:'kddcup99', 15:'california_housing', }\n", + "choise = 1\n", + "if choise == 0:\n", + " ds = sets.load_boston() #regression\n", + "elif choise == 1:\n", + " ds = sets.load_iris() # classification\n", + "elif choise == 2:\n", + " ds = sets.load_diabetes() # regression\n", + "elif choise == 3:\n", + " ds = sets.load_digits() # classification\n", + "elif choise == 4:\n", + " ds = sets.load_linnerud() # multivariate regression\n", + "elif choise == 5:\n", + " ds = sets.load_wine() # classification\n", + "elif choise == 6:\n", + " ds = sets.load_breast_cancer() # classification\n", + "elif choise == 7:\n", + " ds = sets.fetch_olivetti_faces() # classification\n", + "elif choise == 8:\n", + " ds = sets.fetch_20newsgroups() # classification\n", + "elif choise == 9:\n", + " ds = sets.fetch_20newsgroups_vectorized() # classification\n", + "elif choise == 10:\n", + " ds = sets.fetch_lfw_people() # classification\n", + "elif choise == 11:\n", + " ds = sets.fetch_lfw_pairs() # classification\n", + "elif choise == 12:\n", + " ds = sets.fetch_covtype() # classification\n", + "elif choise == 13:\n", + " ds = sets.fetch_rcv1() # classification\n", + "elif choise == 14:\n", + " ds = sets.fetch_kddcup99() # classification\n", + "elif choise == 15:\n", + " ds = sets.fetch_california_housing() # regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 1064, + "status": "ok", + "timestamp": 1615295304765, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "rHDZmzjAiy7N", + "outputId": "160c86a8-b336-429a-b12b-52cf5bb6a14b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _iris_dataset:\n", + "\n", + "Iris plants dataset\n", + "--------------------\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 150 (50 in each of three classes)\n", + " :Number of Attributes: 4 numeric, predictive attributes and the class\n", + " :Attribute Information:\n", + " - sepal length in cm\n", + " - sepal width in cm\n", + " - petal length in cm\n", + " - petal width in cm\n", + " - class:\n", + " - Iris-Setosa\n", + " - Iris-Versicolour\n", + " - Iris-Virginica\n", + " \n", + " :Summary Statistics:\n", + "\n", + " ============== ==== ==== ======= ===== ====================\n", + " Min Max Mean SD Class Correlation\n", + " ============== ==== ==== ======= ===== ====================\n", + " sepal length: 4.3 7.9 5.84 0.83 0.7826\n", + " sepal width: 2.0 4.4 3.05 0.43 -0.4194\n", + " petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)\n", + " petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)\n", + " ============== ==== ==== ======= ===== ====================\n", + "\n", + " :Missing Attribute Values: None\n", + " :Class Distribution: 33.3% for each of 3 classes.\n", + " :Creator: R.A. Fisher\n", + " :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n", + " :Date: July, 1988\n", + "\n", + "The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\n", + "from Fisher's paper. Note that it's the same as in R, but not as in the UCI\n", + "Machine Learning Repository, which has two wrong data points.\n", + "\n", + "This is perhaps the best known database to be found in the\n", + "pattern recognition literature. Fisher's paper is a classic in the field and\n", + "is referenced frequently to this day. (See Duda & Hart, for example.) The\n", + "data set contains 3 classes of 50 instances each, where each class refers to a\n", + "type of iris plant. One class is linearly separable from the other 2; the\n", + "latter are NOT linearly separable from each other.\n", + "\n", + ".. topic:: References\n", + "\n", + " - Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\n", + " Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n", + " Mathematical Statistics\" (John Wiley, NY, 1950).\n", + " - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n", + " (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n", + " - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n", + " Structure and Classification Rule for Recognition in Partially Exposed\n", + " Environments\". IEEE Transactions on Pattern Analysis and Machine\n", + " Intelligence, Vol. PAMI-2, No. 1, 67-71.\n", + " - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\". IEEE Transactions\n", + " on Information Theory, May 1972, 431-433.\n", + " - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al\"s AUTOCLASS II\n", + " conceptual clustering system finds 3 classes in the data.\n", + " - Many, many more ...\n" + ] + } + ], + "source": [ + "print(ds.DESCR)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 683, + "status": "ok", + "timestamp": 1632404056458, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "59mLor4WoeZg", + "outputId": "3548322c-6765-4349-8dea-66ab12f3f7d9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n", + "['setosa' 'versicolor' 'virginica']\n" + ] + } + ], + "source": [ + "print(ds.feature_names)\n", + "print(ds.target_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 420, + "status": "ok", + "timestamp": 1632404071563, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "9Yt4tJ2_otjm", + "outputId": "b471a124-b71b-456d-de41-fe29676b6604" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.ndarray" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = ds.data\n", + "type(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 402, + "status": "ok", + "timestamp": 1632404086557, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ZgxY_56q3YVG", + "outputId": "b4e3ee4f-16b7-4b1e-f5be-34d0e5f4dd31" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[5.1, 3.5, 1.4, 0.2],\n", + " [4.9, 3. , 1.4, 0.2],\n", + " [4.7, 3.2, 1.3, 0.2],\n", + " [4.6, 3.1, 1.5, 0.2],\n", + " [5. , 3.6, 1.4, 0.2],\n", + " [5.4, 3.9, 1.7, 0.4],\n", + " [4.6, 3.4, 1.4, 0.3],\n", + " [5. , 3.4, 1.5, 0.2],\n", + " [4.4, 2.9, 1.4, 0.2],\n", + " [4.9, 3.1, 1.5, 0.1],\n", + " [5.4, 3.7, 1.5, 0.2],\n", + " [4.8, 3.4, 1.6, 0.2],\n", + " [4.8, 3. , 1.4, 0.1],\n", + " [4.3, 3. , 1.1, 0.1],\n", + " [5.8, 4. , 1.2, 0.2],\n", + " [5.7, 4.4, 1.5, 0.4],\n", + " [5.4, 3.9, 1.3, 0.4],\n", + " [5.1, 3.5, 1.4, 0.3],\n", + " [5.7, 3.8, 1.7, 0.3],\n", + " [5.1, 3.8, 1.5, 0.3],\n", + " [5.4, 3.4, 1.7, 0.2],\n", + " [5.1, 3.7, 1.5, 0.4],\n", + " [4.6, 3.6, 1. , 0.2],\n", + " [5.1, 3.3, 1.7, 0.5],\n", + " [4.8, 3.4, 1.9, 0.2],\n", + " [5. , 3. , 1.6, 0.2],\n", + " [5. , 3.4, 1.6, 0.4],\n", + " [5.2, 3.5, 1.5, 0.2],\n", + " [5.2, 3.4, 1.4, 0.2],\n", + " [4.7, 3.2, 1.6, 0.2],\n", + " [4.8, 3.1, 1.6, 0.2],\n", + " [5.4, 3.4, 1.5, 0.4],\n", + " [5.2, 4.1, 1.5, 0.1],\n", + " [5.5, 4.2, 1.4, 0.2],\n", + " [4.9, 3.1, 1.5, 0.2],\n", + " [5. , 3.2, 1.2, 0.2],\n", + " [5.5, 3.5, 1.3, 0.2],\n", + " [4.9, 3.6, 1.4, 0.1],\n", + " [4.4, 3. , 1.3, 0.2],\n", + " [5.1, 3.4, 1.5, 0.2],\n", + " [5. , 3.5, 1.3, 0.3],\n", + " [4.5, 2.3, 1.3, 0.3],\n", + " [4.4, 3.2, 1.3, 0.2],\n", + " [5. , 3.5, 1.6, 0.6],\n", + " [5.1, 3.8, 1.9, 0.4],\n", + " [4.8, 3. , 1.4, 0.3],\n", + " [5.1, 3.8, 1.6, 0.2],\n", + " [4.6, 3.2, 1.4, 0.2],\n", + " [5.3, 3.7, 1.5, 0.2],\n", + " [5. , 3.3, 1.4, 0.2],\n", + " [7. , 3.2, 4.7, 1.4],\n", + " [6.4, 3.2, 4.5, 1.5],\n", + " [6.9, 3.1, 4.9, 1.5],\n", + " [5.5, 2.3, 4. , 1.3],\n", + " [6.5, 2.8, 4.6, 1.5],\n", + " [5.7, 2.8, 4.5, 1.3],\n", + " [6.3, 3.3, 4.7, 1.6],\n", + " [4.9, 2.4, 3.3, 1. ],\n", + " [6.6, 2.9, 4.6, 1.3],\n", + " [5.2, 2.7, 3.9, 1.4],\n", + " [5. , 2. , 3.5, 1. ],\n", + " [5.9, 3. , 4.2, 1.5],\n", + " [6. , 2.2, 4. , 1. ],\n", + " [6.1, 2.9, 4.7, 1.4],\n", + " [5.6, 2.9, 3.6, 1.3],\n", + " [6.7, 3.1, 4.4, 1.4],\n", + " [5.6, 3. , 4.5, 1.5],\n", + " [5.8, 2.7, 4.1, 1. ],\n", + " [6.2, 2.2, 4.5, 1.5],\n", + " [5.6, 2.5, 3.9, 1.1],\n", + " [5.9, 3.2, 4.8, 1.8],\n", + " [6.1, 2.8, 4. , 1.3],\n", + " [6.3, 2.5, 4.9, 1.5],\n", + " [6.1, 2.8, 4.7, 1.2],\n", + " [6.4, 2.9, 4.3, 1.3],\n", + " [6.6, 3. , 4.4, 1.4],\n", + " [6.8, 2.8, 4.8, 1.4],\n", + " [6.7, 3. , 5. , 1.7],\n", + " [6. , 2.9, 4.5, 1.5],\n", + " [5.7, 2.6, 3.5, 1. ],\n", + " [5.5, 2.4, 3.8, 1.1],\n", + " [5.5, 2.4, 3.7, 1. ],\n", + " [5.8, 2.7, 3.9, 1.2],\n", + " [6. , 2.7, 5.1, 1.6],\n", + " [5.4, 3. , 4.5, 1.5],\n", + " [6. , 3.4, 4.5, 1.6],\n", + " [6.7, 3.1, 4.7, 1.5],\n", + " [6.3, 2.3, 4.4, 1.3],\n", + " [5.6, 3. , 4.1, 1.3],\n", + " [5.5, 2.5, 4. , 1.3],\n", + " [5.5, 2.6, 4.4, 1.2],\n", + " [6.1, 3. , 4.6, 1.4],\n", + " [5.8, 2.6, 4. , 1.2],\n", + " [5. , 2.3, 3.3, 1. ],\n", + " [5.6, 2.7, 4.2, 1.3],\n", + " [5.7, 3. , 4.2, 1.2],\n", + " [5.7, 2.9, 4.2, 1.3],\n", + " [6.2, 2.9, 4.3, 1.3],\n", + " [5.1, 2.5, 3. , 1.1],\n", + " [5.7, 2.8, 4.1, 1.3],\n", + " [6.3, 3.3, 6. , 2.5],\n", + " [5.8, 2.7, 5.1, 1.9],\n", + " [7.1, 3. , 5.9, 2.1],\n", + " [6.3, 2.9, 5.6, 1.8],\n", + " [6.5, 3. , 5.8, 2.2],\n", + " [7.6, 3. , 6.6, 2.1],\n", + " [4.9, 2.5, 4.5, 1.7],\n", + " [7.3, 2.9, 6.3, 1.8],\n", + " [6.7, 2.5, 5.8, 1.8],\n", + " [7.2, 3.6, 6.1, 2.5],\n", + " [6.5, 3.2, 5.1, 2. ],\n", + " [6.4, 2.7, 5.3, 1.9],\n", + " [6.8, 3. , 5.5, 2.1],\n", + " [5.7, 2.5, 5. , 2. ],\n", + " [5.8, 2.8, 5.1, 2.4],\n", + " [6.4, 3.2, 5.3, 2.3],\n", + " [6.5, 3. , 5.5, 1.8],\n", + " [7.7, 3.8, 6.7, 2.2],\n", + " [7.7, 2.6, 6.9, 2.3],\n", + " [6. , 2.2, 5. , 1.5],\n", + " [6.9, 3.2, 5.7, 2.3],\n", + " [5.6, 2.8, 4.9, 2. ],\n", + " [7.7, 2.8, 6.7, 2. ],\n", + " [6.3, 2.7, 4.9, 1.8],\n", + " [6.7, 3.3, 5.7, 2.1],\n", + " [7.2, 3.2, 6. , 1.8],\n", + " [6.2, 2.8, 4.8, 1.8],\n", + " [6.1, 3. , 4.9, 1.8],\n", + " [6.4, 2.8, 5.6, 2.1],\n", + " [7.2, 3. , 5.8, 1.6],\n", + " [7.4, 2.8, 6.1, 1.9],\n", + " [7.9, 3.8, 6.4, 2. ],\n", + " [6.4, 2.8, 5.6, 2.2],\n", + " [6.3, 2.8, 5.1, 1.5],\n", + " [6.1, 2.6, 5.6, 1.4],\n", + " [7.7, 3. , 6.1, 2.3],\n", + " [6.3, 3.4, 5.6, 2.4],\n", + " [6.4, 3.1, 5.5, 1.8],\n", + " [6. , 3. , 4.8, 1.8],\n", + " [6.9, 3.1, 5.4, 2.1],\n", + " [6.7, 3.1, 5.6, 2.4],\n", + " [6.9, 3.1, 5.1, 2.3],\n", + " [5.8, 2.7, 5.1, 1.9],\n", + " [6.8, 3.2, 5.9, 2.3],\n", + " [6.7, 3.3, 5.7, 2.5],\n", + " [6.7, 3. , 5.2, 2.3],\n", + " [6.3, 2.5, 5. , 1.9],\n", + " [6.5, 3. , 5.2, 2. ],\n", + " [6.2, 3.4, 5.4, 2.3],\n", + " [5.9, 3. , 5.1, 1.8]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 855, + "status": "ok", + "timestamp": 1615295357693, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "-7ejnqmmwr_J", + "outputId": "e22abd6b-c840-4e43-aa62-d9c1a5cdd231" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(150, 4)" + ] + }, + "execution_count": 5, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 420, + "status": "ok", + "timestamp": 1632404107395, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "RmRL0mZ3o5ri", + "outputId": "bcace884-7ac8-49ce-d14e-05c8f625bb38" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0, 0, 0, 0, 0]), (150,))" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target = ds.target\n", + "target[:5], target.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fpcR7aEBJoGq" + }, + "source": [ + "# Pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "executionInfo": { + "elapsed": 546, + "status": "ok", + "timestamp": 1632404228644, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "FVTPYh-hhvah" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 423 + }, + "executionInfo": { + "elapsed": 20, + "status": "ok", + "timestamp": 1632404365934, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "CZzMZXcyDnCx", + "outputId": "55d262ac-6243-4338-a45e-57217f23a610" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
..................
1456.73.05.22.32
1466.32.55.01.92
1476.53.05.22.02
1486.23.45.42.32
1495.93.05.11.82
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + "0 5.1 3.5 ... 0.2 0\n", + "1 4.9 3.0 ... 0.2 0\n", + "2 4.7 3.2 ... 0.2 0\n", + "3 4.6 3.1 ... 0.2 0\n", + "4 5.0 3.6 ... 0.2 0\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 ... 2.3 2\n", + "146 6.3 2.5 ... 1.9 2\n", + "147 6.5 3.0 ... 2.0 2\n", + "148 6.2 3.4 ... 2.3 2\n", + "149 5.9 3.0 ... 1.8 2\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(data, columns=ds.feature_names) # data - может быть как лист, так и numpy array\n", + "df['target'] = ds.target\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "executionInfo": { + "elapsed": 1482, + "status": "ok", + "timestamp": 1632404401169, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "WMx25DeePe80", + "outputId": "ca1eb41f-18e0-47de-cc77-b8648b89cec5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + "0 5.1 3.5 ... 0.2 0\n", + "1 4.9 3.0 ... 0.2 0\n", + "\n", + "[2 rows x 5 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2) #tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "executionInfo": { + "elapsed": 580, + "status": "ok", + "timestamp": 1632404414446, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "yY02uqmWhvlj", + "outputId": "f4adccbb-22f7-4192-a8f7-67d00c8ff7c3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
1206.93.25.72.32
75.03.41.50.20
656.73.14.41.41
776.73.05.01.71
985.12.53.01.11
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + "120 6.9 3.2 ... 2.3 2\n", + "7 5.0 3.4 ... 0.2 0\n", + "65 6.7 3.1 ... 1.4 1\n", + "77 6.7 3.0 ... 1.7 1\n", + "98 5.1 2.5 ... 1.1 1\n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sample(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 486, + "status": "ok", + "timestamp": 1632404445651, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "L0oDISZyHqUh", + "outputId": "f2586af7-7f30-4106-861b-539f5ed618d6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 433, + "status": "ok", + "timestamp": 1632404485030, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "arTjJfy442ss", + "outputId": "6d630c99-cbed-42e1-d69f-c71e595be995" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df[\"target\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xX_Qut-QR_ia" + }, + "source": [ + "### Индексация и срезы данных" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 3256, + "status": "ok", + "timestamp": 1614783881358, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "jXimDZePWyIp", + "outputId": "00860947-6e2c-484e-90ae-8149d6c2bb45" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 5.1\n", + "1 4.9\n", + "2 4.7\n", + "3 4.6\n", + "4 5.0\n", + " ... \n", + "145 6.7\n", + "146 6.3\n", + "147 6.5\n", + "148 6.2\n", + "149 5.9\n", + "Name: sepal length (cm), Length: 150, dtype: float64" + ] + }, + "execution_count": 96, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df['sepal length (cm)'] # выбор столбца по названию" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 619, + "status": "ok", + "timestamp": 1615295621844, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "rOBV0RUtHxLh", + "outputId": "2e25e363-6fd5-477f-9e38-afe8f91522ac" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 14, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "type(df['sepal length (cm)'])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 523, + "status": "ok", + "timestamp": 1632404667952, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "sq2YmKFr5m-1", + "outputId": "e9f125e0-3f1f-4a4b-d39c-5e6091047c86" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',\n", + " 'petal width (cm)', 'target'],\n", + " dtype='object')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 1699, + "status": "ok", + "timestamp": 1614783884339, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "o5CI-Ha6P4AX", + "outputId": "ee350cf3-212a-4bdd-daf8-f0decfe313c0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'petal length (cm)': 'petal_length_(cm)',\n", + " 'petal width (cm)': 'petal_width_(cm)',\n", + " 'sepal length (cm)': 'sepal_length_(cm)',\n", + " 'sepal width (cm)': 'sepal_width_(cm)',\n", + " 'target': 'target'}" + ] + }, + "execution_count": 97, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "{name : '_'.join(name.split(' ')) for name in df.columns}" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "executionInfo": { + "elapsed": 585, + "status": "ok", + "timestamp": 1632404857471, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ztRKBaVlxM8d" + }, + "outputs": [], + "source": [ + "# df = df.rename(columns={name : '_'.join(name.split(' ')) for name in df.columns}) # смена имен столбцов\n", + "df.rename(columns={name : '_'.join(name.split(' ')) for name in df.columns}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 29, + "status": "ok", + "timestamp": 1632404863328, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "Bryqf6bCxNC5", + "outputId": "2fb81e40-0667-4c5b-9b50-4ba23010385b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['sepal_length_(cm)', 'sepal_width_(cm)', 'petal_length_(cm)',\n", + " 'petal_width_(cm)', 'target'],\n", + " dtype='object')" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 438, + "status": "ok", + "timestamp": 1615295826923, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "uesXOV19QcNX", + "outputId": "6476924c-249d-4876-89be-920b127e125b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "145 2\n", + "146 2\n", + "147 2\n", + "148 2\n", + "149 2\n", + "Name: target, Length: 150, dtype: int64" + ] + }, + "execution_count": 20, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.target" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 566, + "status": "ok", + "timestamp": 1614777840378, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "J2il4fodbWLb", + "outputId": "b6d5c2a4-dc69-497d-997c-8127f174765a" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "140 2\n", + "141 2\n", + "142 2\n", + "143 2\n", + "144 2\n", + "145 2\n", + "146 2\n", + "147 2\n", + "148 2\n", + "149 2\n", + "Name: target, dtype: int64" + ] + }, + "execution_count": 22, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.target[-10:] # возможен такой стиль обращения к столбцам, если его имя не содержит пробелов" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 357 + }, + "executionInfo": { + "elapsed": 607, + "status": "ok", + "timestamp": 1614777891289, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "2IaGUtDoYIAO", + "outputId": "c64f553c-27a2-4f0d-a1e3-aa82ee895acf" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_width_(cm)petal_length_(cm)petal_width_(cm)
1403.15.62.4
1413.15.12.3
1422.75.11.9
1433.25.92.3
1443.35.72.5
1453.05.22.3
1462.55.01.9
1473.05.22.0
1483.45.42.3
1493.05.11.8
\n", + "
" + ], + "text/plain": [ + " sepal_width_(cm) petal_length_(cm) petal_width_(cm)\n", + "140 3.1 5.6 2.4\n", + "141 3.1 5.1 2.3\n", + "142 2.7 5.1 1.9\n", + "143 3.2 5.9 2.3\n", + "144 3.3 5.7 2.5\n", + "145 3.0 5.2 2.3\n", + "146 2.5 5.0 1.9\n", + "147 3.0 5.2 2.0\n", + "148 3.4 5.4 2.3\n", + "149 3.0 5.1 1.8" + ] + }, + "execution_count": 23, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[140: , 'sepal_width_(cm)':'petal_width_(cm)'] # возможность среза данных по ИМЕНАМ строк и столбцов" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 357 + }, + "executionInfo": { + "elapsed": 735, + "status": "ok", + "timestamp": 1614777918498, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "RwTuNV_BxNtH", + "outputId": "89004bbc-fd5d-4bb9-fbdc-6756fa31cb1b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
55.43.91.70.4
64.63.41.40.3
75.03.41.50.2
84.42.91.40.2
94.93.11.50.1
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) petal_length_(cm) petal_width_(cm)\n", + "0 5.1 3.5 1.4 0.2\n", + "1 4.9 3.0 1.4 0.2\n", + "2 4.7 3.2 1.3 0.2\n", + "3 4.6 3.1 1.5 0.2\n", + "4 5.0 3.6 1.4 0.2\n", + "5 5.4 3.9 1.7 0.4\n", + "6 4.6 3.4 1.4 0.3\n", + "7 5.0 3.4 1.5 0.2\n", + "8 4.4 2.9 1.4 0.2\n", + "9 4.9 3.1 1.5 0.1" + ] + }, + "execution_count": 24, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[:10,:4] # возможность среза данных по ПОРЯДКОВЫМ НОМЕРАМ строк и столбцов " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 413, + "status": "ok", + "timestamp": 1632405184550, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "QSAbGcDbJP9B", + "outputId": "20274561-ff6c-4031-e1a7-a26a2399cea5" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['sepal_length_(cm)', 'sepal_width_(cm)']" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[column for column in df.columns if column.startswith('sepal')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "executionInfo": { + "elapsed": 1370, + "status": "ok", + "timestamp": 1614784351268, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "pytaw0cAxNp8", + "outputId": "81983e96-8834-40e4-b828-6706a4f3bbb6" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)
05.13.5
14.93.0
24.73.2
34.63.1
45.03.6
.........
1456.73.0
1466.32.5
1476.53.0
1486.23.4
1495.93.0
\n", + "

150 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm)\n", + "0 5.1 3.5\n", + "1 4.9 3.0\n", + "2 4.7 3.2\n", + "3 4.6 3.1\n", + "4 5.0 3.6\n", + ".. ... ...\n", + "145 6.7 3.0\n", + "146 6.3 2.5\n", + "147 6.5 3.0\n", + "148 6.2 3.4\n", + "149 5.9 3.0\n", + "\n", + "[150 rows x 2 columns]" + ] + }, + "execution_count": 102, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df[[column for column in df.columns if column.startswith('sepal')]] # выбор столбцов по условию" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 666, + "status": "ok", + "timestamp": 1632405255702, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "bHiE8tk872bY", + "outputId": "5cab46f0-7d00-4c5a-a435-ecd145b8c82c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "145 False\n", + "146 False\n", + "147 False\n", + "148 False\n", + "149 False\n", + "Name: target, Length: 150, dtype: bool" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.target==1.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 347 + }, + "executionInfo": { + "elapsed": 815, + "status": "ok", + "timestamp": 1615296046504, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "YNxRaJqqavOz", + "outputId": "2baa1152-611c-43a3-eea6-c9ae07cfea4e" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
507.03.24.71.41
516.43.24.51.51
526.93.14.91.51
535.52.34.01.31
546.52.84.61.51
555.72.84.51.31
566.33.34.71.61
574.92.43.31.01
586.62.94.61.31
595.22.73.91.41
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n", + "50 7.0 3.2 ... 1.4 1\n", + "51 6.4 3.2 ... 1.5 1\n", + "52 6.9 3.1 ... 1.5 1\n", + "53 5.5 2.3 ... 1.3 1\n", + "54 6.5 2.8 ... 1.5 1\n", + "55 5.7 2.8 ... 1.3 1\n", + "56 6.3 3.3 ... 1.6 1\n", + "57 4.9 2.4 ... 1.0 1\n", + "58 6.6 2.9 ... 1.3 1\n", + "59 5.2 2.7 ... 1.4 1\n", + "\n", + "[10 rows x 5 columns]" + ] + }, + "execution_count": 22, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.target==1.0][:10] # выбор данных по условию. В данном случае хотим увидеть данные у которых целевой класс = 1\n", + "# так же можно увидеть что обращаться к столбцу можно" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i4V1_5AOgmB9" + }, + "source": [ + "### Описательная статистика" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "executionInfo": { + "elapsed": 1283, + "status": "ok", + "timestamp": 1614766986724, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "EuwQ-U54xNnA", + "outputId": "5ed73970-f852-49b2-82a7-bfe43b1ad3c3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
count150.000000150.000000150.000000150.000000150.000000
mean5.8433333.0573333.7580001.1993331.000000
std0.8280660.4358661.7652980.7622380.819232
min4.3000002.0000001.0000000.1000000.000000
25%5.1000002.8000001.6000000.3000000.000000
50%5.8000003.0000004.3500001.3000001.000000
75%6.4000003.3000005.1000001.8000002.000000
max7.9000004.4000006.9000002.5000002.000000
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n", + "count 150.000000 150.000000 ... 150.000000 150.000000\n", + "mean 5.843333 3.057333 ... 1.199333 1.000000\n", + "std 0.828066 0.435866 ... 0.762238 0.819232\n", + "min 4.300000 2.000000 ... 0.100000 0.000000\n", + "25% 5.100000 2.800000 ... 0.300000 0.000000\n", + "50% 5.800000 3.000000 ... 1.300000 1.000000\n", + "75% 6.400000 3.300000 ... 1.800000 2.000000\n", + "max 7.900000 4.400000 ... 2.500000 2.000000\n", + "\n", + "[8 rows x 5 columns]" + ] + }, + "execution_count": 19, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe() # статистическое описание набора данных" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 627, + "status": "ok", + "timestamp": 1614778091397, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "X4ykTpKtxNiG", + "outputId": "e62b683d-f476-4422-d691-774ead34e63f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 150 entries, 0 to 149\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sepal_length_(cm) 150 non-null float64\n", + " 1 sepal_width_(cm) 150 non-null float64\n", + " 2 petal_length_(cm) 150 non-null float64\n", + " 3 petal_width_(cm) 150 non-null float64\n", + " 4 target 150 non-null int64 \n", + "dtypes: float64(4), int64(1)\n", + "memory usage: 6.0 KB\n" + ] + } + ], + "source": [ + "df.info() # информация об индексах, пропусках в данных, типах данных и объеме оперативной памяти занимаемой данными" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 51, + "status": "ok", + "timestamp": 1632405484185, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "b7khmMfj8mDB", + "outputId": "8e7ccfa9-cffa-4872-c0a5-d00635211e12" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0, 1, 2]), 3)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.target.unique(), df.target.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 167 + }, + "executionInfo": { + "elapsed": 783, + "status": "ok", + "timestamp": 1615296303195, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "n1XzQbdFRx7Z", + "outputId": "a4acff70-40cf-4462-f2b6-03546318b29b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
target
05.0063.4281.4620.246
15.9362.7704.2601.326
26.5882.9745.5522.026
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) ... petal_width_(cm)\n", + "target ... \n", + "0 5.006 ... 0.246\n", + "1 5.936 ... 1.326\n", + "2 6.588 ... 2.026\n", + "\n", + "[3 rows x 4 columns]" + ] + }, + "execution_count": 24, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('target').mean() #df.groupby('target')['petal_length_(cm)'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 217 + }, + "executionInfo": { + "elapsed": 724, + "status": "ok", + "timestamp": 1615296321113, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "MRiTYhiixNfC", + "outputId": "3bd6da21-1bde-404e-b9fb-d4e36e94634c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)
minmaxmeanstdsizeminmaxmeanstdsizeminmaxmeanstdsizeminmaxmeanstdsize
target
04.35.85.0060.35249050.02.34.43.4280.37906450.01.01.91.4620.17366450.00.10.60.2460.10538650.0
14.97.05.9360.51617150.02.03.42.7700.31379850.03.05.14.2600.46991150.01.01.81.3260.19775350.0
24.97.96.5880.63588050.02.23.82.9740.32249750.04.56.95.5520.55189550.01.42.52.0260.27465050.0
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) ... petal_width_(cm) \n", + " min max mean std ... max mean std size\n", + "target ... \n", + "0 4.3 5.8 5.006 0.352490 ... 0.6 0.246 0.105386 50.0\n", + "1 4.9 7.0 5.936 0.516171 ... 1.8 1.326 0.197753 50.0\n", + "2 4.9 7.9 6.588 0.635880 ... 2.5 2.026 0.274650 50.0\n", + "\n", + "[3 rows x 20 columns]" + ] + }, + "execution_count": 25, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('target').agg([min, max, np.mean, np.std, np.size]) # применение общих функций группировки для всех столбцов" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 511, + "status": "ok", + "timestamp": 1615296592781, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "w_oHay4KxNdC", + "outputId": "2b52fff3-b9c7-4c74-ea6f-e52b965f4e6b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)petal_width_(cm)
meanstdminmax
target
05.0060.3524900.10.6
15.9360.5161711.01.8
26.5880.6358801.42.5
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) petal_width_(cm) \n", + " mean std min max\n", + "target \n", + "0 5.006 0.352490 0.1 0.6\n", + "1 5.936 0.516171 1.0 1.8\n", + "2 6.588 0.635880 1.4 2.5" + ] + }, + "execution_count": 30, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('target').agg({'sepal_length_(cm)':[np.mean, np.std], 'petal_width_(cm)':[min, max]}) # индивидуальное применение функций группировки" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NCfoXnc41fmW" + }, + "source": [ + "### Полезные функции, которые конкретно сейчас не нужны, но часто применимы" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 747, + "status": "ok", + "timestamp": 1615296494311, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "KV8EM_b41m0m", + "outputId": "b898ccdb-16f0-415b-a629-25b794f42859" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n", + "0 5.1 3.5 ... 0.2 0\n", + "1 4.9 3.0 ... 0.2 0\n", + "2 4.7 3.2 ... 0.2 0\n", + "3 4.6 3.1 ... 0.2 0\n", + "4 5.0 3.6 ... 0.2 0\n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "execution_count": 26, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d = df.copy()\n", + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 737, + "status": "ok", + "timestamp": 1615296536700, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "pGOooxXo1xqA", + "outputId": "a3a5fe7b-d857-49c8-8d59-ed08472c37e9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{0.0: 'setosa', 1.0: 'versicolor', 2.0: 'virginica'}" + ] + }, + "execution_count": 27, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "targets = {float(i):target for i, target in enumerate(ds.target_names)}\n", + "targets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 474, + "status": "ok", + "timestamp": 1615296574079, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "1qI4cEd81xxK", + "outputId": "7e62a1d9-dc06-4fc5-8270-6da0236d7341" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n", + "0 5.1 3.5 ... 0.2 setosa\n", + "1 4.9 3.0 ... 0.2 setosa\n", + "2 4.7 3.2 ... 0.2 setosa\n", + "3 4.6 3.1 ... 0.2 setosa\n", + "4 5.0 3.6 ... 0.2 setosa\n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "execution_count": 28, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d.target = d.target.map(targets) # заменим цифровые обозначения классов на буквенные подписи\n", + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 647, + "status": "ok", + "timestamp": 1615296637939, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "q1W6kwXe1xuc", + "outputId": "cbd628aa-1e1b-4a98-e5f5-b9ef80aa9544" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)targetsepal_length_on_width
05.13.51.40.2setosa1.457143
14.93.01.40.2setosa1.633333
24.73.21.30.2setosa1.468750
34.63.11.50.2setosa1.483871
45.03.61.40.2setosa1.388889
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... target sepal_length_on_width\n", + "0 5.1 3.5 ... setosa 1.457143\n", + "1 4.9 3.0 ... setosa 1.633333\n", + "2 4.7 3.2 ... setosa 1.468750\n", + "3 4.6 3.1 ... setosa 1.483871\n", + "4 5.0 3.6 ... setosa 1.388889\n", + "\n", + "[5 rows x 6 columns]" + ] + }, + "execution_count": 31, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d['sepal_length_on_width'] = d['sepal_length_(cm)'] / d['sepal_width_(cm)'] # операции непосредственно со столбцами много быстрее поэлементных операций \n", + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dRp4-vhV1xmt" + }, + "outputs": [], + "source": [ + "d.sepal_length_on_width = d.sepal_length_on_width.apply(np.sin)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 767, + "status": "ok", + "timestamp": 1615296813029, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "357_A4ny1xjb", + "outputId": "196d0d5d-1883-4552-ec7c-890f592130de" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)targetsepal_length_on_width
05.13.51.40setosa0.993548
14.93.01.40setosa0.998045
24.73.21.30setosa0.994798
34.63.11.50setosa0.996224
45.03.61.40setosa0.983500
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... target sepal_length_on_width\n", + "0 5.1 3.5 ... setosa 0.993548\n", + "1 4.9 3.0 ... setosa 0.998045\n", + "2 4.7 3.2 ... setosa 0.994798\n", + "3 4.6 3.1 ... setosa 0.996224\n", + "4 5.0 3.6 ... setosa 0.983500\n", + "\n", + "[5 rows x 6 columns]" + ] + }, + "execution_count": 33, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "def bias(x):\n", + " if x < 1.0:\n", + " return 0\n", + " return 1\n", + "d['petal_width_(cm)'] = d['petal_width_(cm)'].apply(bias)\n", + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aeUhqZEX1xey" + }, + "outputs": [], + "source": [ + "d.drop([column for column in d.columns if column.endswith('length_(cm)')], axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 684, + "status": "ok", + "timestamp": 1615296912439, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "gQJ6De486fsw", + "outputId": "f39caff3-2866-4a3b-b6ac-8510ddad127f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_width_(cm)petal_width_(cm)targetsepal_length_on_width
03.50setosa0.993548
13.00setosa0.998045
23.20setosa0.994798
33.10setosa0.996224
43.60setosa0.983500
\n", + "
" + ], + "text/plain": [ + " sepal_width_(cm) petal_width_(cm) target sepal_length_on_width\n", + "0 3.5 0 setosa 0.993548\n", + "1 3.0 0 setosa 0.998045\n", + "2 3.2 0 setosa 0.994798\n", + "3 3.1 0 setosa 0.996224\n", + "4 3.6 0 setosa 0.983500" + ] + }, + "execution_count": 36, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 589, + "status": "ok", + "timestamp": 1615296981297, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "H6wlNTB76hoP", + "outputId": "ef4c98c9-53fc-403d-e5ad-91b96ab8f864" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((150, 4), (300, 4))" + ] + }, + "execution_count": 37, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "f = pd.concat([d,d], axis=0)\n", + "d.shape, f.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 572, + "status": "ok", + "timestamp": 1615297019618, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "8wvhQgCh6stP", + "outputId": "ed87bb36-d869-49c6-cc1a-516cd9daa65b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((150, 4), (150, 8))" + ] + }, + "execution_count": 38, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "f = pd.concat([d,d], axis=1)\n", + "d.shape, f.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "executionInfo": { + "elapsed": 476, + "status": "ok", + "timestamp": 1632405950884, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "hYXfdNRds8wc", + "outputId": "2b21ce4b-5d17-4800-ea16-396dc95557c3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
45
112
212
312
\n", + "
" + ], + "text/plain": [ + " 4 5\n", + "1 1 2\n", + "2 1 2\n", + "3 1 2" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_1 = pd.DataFrame(index=[1,2,3], data=[[1,2],[1,2],[1,2]], columns=[4,5])\n", + "df_2 = pd.DataFrame(index=[5,6,3], data=[[1,2],[1,2],[1,2]], columns=[5,7])\n", + "df_1" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "executionInfo": { + "elapsed": 12, + "status": "ok", + "timestamp": 1632405952831, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "-bALOOiOs_xk", + "outputId": "56478aab-30e8-477d-8628-2352f3ed3ac4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
57
512
612
312
\n", + "
" + ], + "text/plain": [ + " 5 7\n", + "5 1 2\n", + "6 1 2\n", + "3 1 2" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_2" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 237 + }, + "executionInfo": { + "elapsed": 400, + "status": "ok", + "timestamp": 1632405958777, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "nspfyfjMUepW", + "outputId": "946cd0f5-3470-4620-a1ea-52221f5a06b1" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
457
11.02NaN
21.02NaN
31.02NaN
5NaN12.0
6NaN12.0
3NaN12.0
\n", + "
" + ], + "text/plain": [ + " 4 5 7\n", + "1 1.0 2 NaN\n", + "2 1.0 2 NaN\n", + "3 1.0 2 NaN\n", + "5 NaN 1 2.0\n", + "6 NaN 1 2.0\n", + "3 NaN 1 2.0" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_1 = pd.DataFrame(index=[1,2,3], data=[[1,2],[1,2],[1,2]], columns=[4,5])\n", + "df_2 = pd.DataFrame(index=[5,6,3], data=[[1,2],[1,2],[1,2]], columns=[5,7])\n", + "\n", + "pd.concat([df_1,df_2], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 217 + }, + "executionInfo": { + "elapsed": 594, + "status": "ok", + "timestamp": 1615297123302, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "hfsafxqc6wl0", + "outputId": "80ab6214-48dd-4847-9637-c8eda376ce2b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_width_(cm)petal_width_(cm)targetsepal_length_on_widthsepal_width_(cm)petal_width_(cm)targetsepal_length_on_width
03.50setosa0.9935483.50setosa0.993548
13.00setosa0.9980453.00setosa0.998045
23.20setosa0.9947983.20setosa0.994798
33.10setosa0.9962243.10setosa0.996224
43.60setosa0.9835003.60setosa0.983500
\n", + "
" + ], + "text/plain": [ + " sepal_width_(cm) petal_width_(cm) ... target sepal_length_on_width\n", + "0 3.5 0 ... setosa 0.993548\n", + "1 3.0 0 ... setosa 0.998045\n", + "2 3.2 0 ... setosa 0.994798\n", + "3 3.1 0 ... setosa 0.996224\n", + "4 3.6 0 ... setosa 0.983500\n", + "\n", + "[5 rows x 8 columns]" + ] + }, + "execution_count": 44, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "f.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HRY-rDbb8gGk" + }, + "outputs": [], + "source": [ + "g = d.drop(['sepal_width_(cm)', 'petal_width_(cm)'], axis=1)\n", + "h = d.drop(['sepal_length_on_width'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 429, + "status": "ok", + "timestamp": 1615297139175, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "tsgVE2Si8oFG", + "outputId": "dea93f74-7d0d-4030-c81c-84ea655c5f6d" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
targetsepal_length_on_width
0setosa0.993548
1setosa0.998045
2setosa0.994798
3setosa0.996224
4setosa0.983500
\n", + "
" + ], + "text/plain": [ + " target sepal_length_on_width\n", + "0 setosa 0.993548\n", + "1 setosa 0.998045\n", + "2 setosa 0.994798\n", + "3 setosa 0.996224\n", + "4 setosa 0.983500" + ] + }, + "execution_count": 46, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "g.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 628, + "status": "ok", + "timestamp": 1615297148886, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "kny_HFf489cy", + "outputId": "59ae8694-c22e-4118-e25f-f31a2e148c4e" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_width_(cm)petal_width_(cm)target
03.50setosa
13.00setosa
23.20setosa
33.10setosa
43.60setosa
\n", + "
" + ], + "text/plain": [ + " sepal_width_(cm) petal_width_(cm) target\n", + "0 3.5 0 setosa\n", + "1 3.0 0 setosa\n", + "2 3.2 0 setosa\n", + "3 3.1 0 setosa\n", + "4 3.6 0 setosa" + ] + }, + "execution_count": 47, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "h.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 588, + "status": "ok", + "timestamp": 1615297241757, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ZAKyHnni8_wx", + "outputId": "cc83133f-1f83-4c7a-f041-b23d01f14cf4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
targetsepal_length_on_widthsepal_width_(cm)petal_width_(cm)
0setosa0.9935483.50
1setosa0.9935483.00
2setosa0.9935483.20
3setosa0.9935483.10
4setosa0.9935483.60
\n", + "
" + ], + "text/plain": [ + " target sepal_length_on_width sepal_width_(cm) petal_width_(cm)\n", + "0 setosa 0.993548 3.5 0\n", + "1 setosa 0.993548 3.0 0\n", + "2 setosa 0.993548 3.2 0\n", + "3 setosa 0.993548 3.1 0\n", + "4 setosa 0.993548 3.6 0" + ] + }, + "execution_count": 49, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d = g.merge(h, on='target')\n", + "d.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "executionInfo": { + "elapsed": 712, + "status": "ok", + "timestamp": 1614767389654, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "m6ec0Exh9K8V", + "outputId": "1c97b950-0ba5-4b63-f8b7-2560f8decceb" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
setosaversicolorvirginica
0100
1100
2100
3100
4100
............
7495001
7496001
7497001
7498001
7499001
\n", + "

7500 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " setosa versicolor virginica\n", + "0 1 0 0\n", + "1 1 0 0\n", + "2 1 0 0\n", + "3 1 0 0\n", + "4 1 0 0\n", + "... ... ... ...\n", + "7495 0 0 1\n", + "7496 0 0 1\n", + "7497 0 0 1\n", + "7498 0 0 1\n", + "7499 0 0 1\n", + "\n", + "[7500 rows x 3 columns]" + ] + }, + "execution_count": 46, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(d.target)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "executionInfo": { + "elapsed": 440, + "status": "ok", + "timestamp": 1615297478580, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "Hrp_HGEb9t4d", + "outputId": "b3b9983b-e598-4288-90ee-7b0d1abe5ff8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_on_widthsepal_width_(cm)petal_width_(cm)target_setosatarget_versicolortarget_virginica
00.9935483.50100
10.9935483.00100
20.9935483.20100
30.9935483.10100
40.9935483.60100
\n", + "
" + ], + "text/plain": [ + " sepal_length_on_width sepal_width_(cm) ... target_versicolor target_virginica\n", + "0 0.993548 3.5 ... 0 0\n", + "1 0.993548 3.0 ... 0 0\n", + "2 0.993548 3.2 ... 0 0\n", + "3 0.993548 3.1 ... 0 0\n", + "4 0.993548 3.6 ... 0 0\n", + "\n", + "[5 rows x 6 columns]" + ] + }, + "execution_count": 50, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "d = pd.get_dummies(data=d, columns=['target'])\n", + "d.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ym2h89BMguk6" + }, + "source": [ + "### Графическое представление" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EB8GRu9XxNaZ" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import seaborn as sns\n", + "from matplotlib import pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hbipgoEZxNOg" + }, + "outputs": [], + "source": [ + "sns.set_style(\"whitegrid\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 122 + }, + "executionInfo": { + "elapsed": 587, + "status": "ok", + "timestamp": 1614779517504, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "VlMb-EWdxNMn", + "outputId": "9907624b-bf04-4f40-f152-94951d92a782" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0.12156862745098039, 0.4666666666666667, 0.7058823529411765), (1.0, 0.4980392156862745, 0.054901960784313725), (0.17254901960784313, 0.6274509803921569, 0.17254901960784313), (0.8392156862745098, 0.15294117647058825, 0.1568627450980392), (0.5803921568627451, 0.403921568627451, 0.7411764705882353), (0.5490196078431373, 0.33725490196078434, 0.29411764705882354), (0.8901960784313725, 0.4666666666666667, 0.7607843137254902), (0.4980392156862745, 0.4980392156862745, 0.4980392156862745), (0.7372549019607844, 0.7411764705882353, 0.13333333333333333), (0.09019607843137255, 0.7450980392156863, 0.8117647058823529)]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAABECAYAAACF4e8fAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAChklEQVR4nO3bMWtTYRiG4a+pqJBQEO1gF4UiIo6ZdWpH/4WLm5s4WKgOrs6CILp1chGnThkEMZuODnWR2hRsaYjRmuMfaDsIX488XNf6Lg8cONxwOHNN0xQAgGSdtgcAANQmeACAeIIHAIgneACAeIIHAIh35qTj+w8fm697h6e15dRd747Lucl22zOq2e4tldHvUdszqlj+1pTZpcXSGe20PaWK6eWrZXowa3tGNWe70zLd/9H2jGoWLiyV+XHu8xt3D8tkMml7RhW93m7pdBbLbJb5bhl3lsv34J+zr82XUb/fXzzqdmLwzJqmPNjcrbPqP/Buda/cGNxre0Y1n1delPUv623PqGLj6WH5+eRxOf9ore0pVew/e1k+vRm3PaOam3cOyvDV87ZnVLNyd61c3Jy2PaOa0eqvMhgM2p5Rxa3br0uv+7QcjB+2PaWKnd5GuT+eb3tGNW8X/mwdd/NJCwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIJ3gAgHiCBwCIN9c0zbHH4XC4U0rZOr05AAD/7Eq/31886nBi8AAAJPBJCwCIJ3gAgHiCBwCIJ3gAgHiCBwCI9xdiZWLdKNW9eAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light", + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "print(sns.color_palette())\n", + "sns.palplot(sns.color_palette())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 622, + "status": "ok", + "timestamp": 1615297767532, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "4umRGJuKqHuO", + "outputId": "49a1d76f-c4ba-4088-817f-e1bdce211bdc" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{0.0: 'setosa', 1.0: 'versicolor', 2.0: 'virginica'}" + ] + }, + "execution_count": 54, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "targets = {float(i):target for i, target in enumerate(ds.target_names)}\n", + "targets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "executionInfo": { + "elapsed": 456, + "status": "ok", + "timestamp": 1615297774179, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "SDeuDnTEXKQk", + "outputId": "53cf3a73-56d9-42cc-f715-9dee1f23fd15" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_length_(cm)sepal_width_(cm)petal_length_(cm)petal_width_(cm)target
507.03.24.71.41
516.43.24.51.51
526.93.14.91.51
535.52.34.01.31
546.52.84.61.51
555.72.84.51.31
566.33.34.71.61
574.92.43.31.01
586.62.94.61.31
595.22.73.91.41
605.02.03.51.01
615.93.04.21.51
626.02.24.01.01
636.12.94.71.41
645.62.93.61.31
656.73.14.41.41
665.63.04.51.51
675.82.74.11.01
686.22.24.51.51
695.62.53.91.11
705.93.24.81.81
716.12.84.01.31
726.32.54.91.51
736.12.84.71.21
746.42.94.31.31
756.63.04.41.41
766.82.84.81.41
776.73.05.01.71
786.02.94.51.51
795.72.63.51.01
805.52.43.81.11
815.52.43.71.01
825.82.73.91.21
836.02.75.11.61
845.43.04.51.51
856.03.44.51.61
866.73.14.71.51
876.32.34.41.31
885.63.04.11.31
895.52.54.01.31
905.52.64.41.21
916.13.04.61.41
925.82.64.01.21
935.02.33.31.01
945.62.74.21.31
955.73.04.21.21
965.72.94.21.31
976.22.94.31.31
985.12.53.01.11
995.72.84.11.31
\n", + "
" + ], + "text/plain": [ + " sepal_length_(cm) sepal_width_(cm) ... petal_width_(cm) target\n", + "50 7.0 3.2 ... 1.4 1\n", + "51 6.4 3.2 ... 1.5 1\n", + "52 6.9 3.1 ... 1.5 1\n", + "53 5.5 2.3 ... 1.3 1\n", + "54 6.5 2.8 ... 1.5 1\n", + "55 5.7 2.8 ... 1.3 1\n", + "56 6.3 3.3 ... 1.6 1\n", + "57 4.9 2.4 ... 1.0 1\n", + "58 6.6 2.9 ... 1.3 1\n", + "59 5.2 2.7 ... 1.4 1\n", + "60 5.0 2.0 ... 1.0 1\n", + "61 5.9 3.0 ... 1.5 1\n", + "62 6.0 2.2 ... 1.0 1\n", + "63 6.1 2.9 ... 1.4 1\n", + "64 5.6 2.9 ... 1.3 1\n", + "65 6.7 3.1 ... 1.4 1\n", + "66 5.6 3.0 ... 1.5 1\n", + "67 5.8 2.7 ... 1.0 1\n", + "68 6.2 2.2 ... 1.5 1\n", + "69 5.6 2.5 ... 1.1 1\n", + "70 5.9 3.2 ... 1.8 1\n", + "71 6.1 2.8 ... 1.3 1\n", + "72 6.3 2.5 ... 1.5 1\n", + "73 6.1 2.8 ... 1.2 1\n", + "74 6.4 2.9 ... 1.3 1\n", + "75 6.6 3.0 ... 1.4 1\n", + "76 6.8 2.8 ... 1.4 1\n", + "77 6.7 3.0 ... 1.7 1\n", + "78 6.0 2.9 ... 1.5 1\n", + "79 5.7 2.6 ... 1.0 1\n", + "80 5.5 2.4 ... 1.1 1\n", + "81 5.5 2.4 ... 1.0 1\n", + "82 5.8 2.7 ... 1.2 1\n", + "83 6.0 2.7 ... 1.6 1\n", + "84 5.4 3.0 ... 1.5 1\n", + "85 6.0 3.4 ... 1.6 1\n", + "86 6.7 3.1 ... 1.5 1\n", + "87 6.3 2.3 ... 1.3 1\n", + "88 5.6 3.0 ... 1.3 1\n", + "89 5.5 2.5 ... 1.3 1\n", + "90 5.5 2.6 ... 1.2 1\n", + "91 6.1 3.0 ... 1.4 1\n", + "92 5.8 2.6 ... 1.2 1\n", + "93 5.0 2.3 ... 1.0 1\n", + "94 5.6 2.7 ... 1.3 1\n", + "95 5.7 3.0 ... 1.2 1\n", + "96 5.7 2.9 ... 1.3 1\n", + "97 6.2 2.9 ... 1.3 1\n", + "98 5.1 2.5 ... 1.1 1\n", + "99 5.7 2.8 ... 1.3 1\n", + "\n", + "[50 rows x 5 columns]" + ] + }, + "execution_count": 55, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.target==1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rg_HMRSVzGz-" + }, + "source": [ + "Строим гистограммы" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 406 + }, + "executionInfo": { + "elapsed": 1244, + "status": "ok", + "timestamp": 1615297826988, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "mx_PNSF8xNKe", + "outputId": "5d46e25e-fb29-467c-d88f-b3b689306815" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "for target in targets:\n", + " sns.distplot(df[df.target==target]['sepal_length_(cm)'],kde=True,kde_kws={\"label\":targets[target]})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 406 + }, + "executionInfo": { + "elapsed": 1136, + "status": "ok", + "timestamp": 1615297848522, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "bNUuVXgzhvz1", + "outputId": "7ef13877-988b-4be0-a9b2-b4983762d161" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "for target in targets:\n", + " sns.distplot(df[df.target==target]['sepal_width_(cm)'],kde=True,kde_kws={\"label\":targets[target]})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 408 + }, + "executionInfo": { + "elapsed": 923, + "status": "ok", + "timestamp": 1615297853838, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "6Li1iREOhvts", + "outputId": "56d8c257-b464-465f-c365-a0dbc90b03b6" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "for target in targets:\n", + " sns.distplot(df[df.target==target]['petal_length_(cm)'],kde=True,kde_kws={\"label\":targets[target]})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 404 + }, + "executionInfo": { + "elapsed": 1286, + "status": "ok", + "timestamp": 1614779712345, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "oCQEu59thvri", + "outputId": "1e523154-41f1-4e2c-ca8e-0aebf1cee232" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light", + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "for target in targets:\n", + " sns.distplot(df[df.target==target]['petal_width_(cm)'],kde=True,kde_kws={\"label\":targets[target]})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "48op7eXwzKb4" + }, + "source": [ + "Строим точечные графики взаимного влияния параметров" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "executionInfo": { + "elapsed": 1461, + "status": "ok", + "timestamp": 1614767506092, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "hKh-KV27whqi", + "outputId": "b6d7f703-3029-4c58-e075-ba67e8307bcf" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.FacetGrid(df, hue='target')\n", + "g.map(plt.scatter, 'sepal_length_(cm)', 'sepal_width_(cm)');\n", + "g.add_legend();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "executionInfo": { + "elapsed": 1523, + "status": "ok", + "timestamp": 1614767511831, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ArJjRTF6ySuO", + "outputId": "a857d46d-33d9-417c-8a78-e3b3cf5dcbd9" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.FacetGrid(df, hue='target')\n", + "g.map(plt.scatter, 'petal_length_(cm)', 'petal_width_(cm)');\n", + "g.add_legend();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "executionInfo": { + "elapsed": 1157, + "status": "ok", + "timestamp": 1614767516166, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "5KszRYQ0yaaV", + "outputId": "ffcba165-ccb3-4f61-e7ae-c413017c2e8c" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.FacetGrid(df, hue='target')\n", + "g.map(plt.scatter, 'petal_length_(cm)', 'sepal_width_(cm)');\n", + "g.add_legend();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "executionInfo": { + "elapsed": 1668, + "status": "ok", + "timestamp": 1614767522475, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "SE3c3sS0yfgl", + "outputId": "ee44eed9-08f4-495a-e122-a0910757f984" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPcAAADQCAYAAADbJffdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2de1hU5fbHvzPDNUAUjoB3RcUMRcVLmKaCgD6OgqKmHrU0rTRNyn4qaCni/VIntJ5O5ck0jbyhmHiOJOAly0uKkIqGEcZFEEUBEZCZeX9/TIwzw57Zey6b2TO8n+fx0b33e1kzsNx7f9/1riUihBBQKBSbQ2xpAygUCj9Q56ZQbBTq3BSKjUKdm0KxUahzUyg2ilU6d25urqVNUJGfn29pE3RCbTMOIdtmCFbp3DKZzNImqKipqbG0CTqhthmHkG0zBKt0bgqFwg51bgrFRqHOTaHYKLw69927dzFz5kyMGTMGUqkUu3btatTmwoUL6N+/PyIjIxEZGYlPP/2UT5MoDKTkpSD8YDgCdgUg/GA4UvJSmqRvU4zXnLHjc3CJRIKYmBj4+/vj8ePHmDhxIoYMGYJu3bpptBswYAC++OILPk2h6CAlLwVxP8ehVl4LALhbfRdxP8cBAKS+Ut76NsV4zR1e79xeXl7w9/cHALi6usLX1xelpaV8TkkxkIQrCSpnaqBWXouEKwm89m2K8Zo7vN651SksLEROTg769OnT6NrVq1cREREBLy8vLFu2DN27d9c7Vl1dHXJycvgy1SBqa2sFY4s2XGwrqS7ReZ7Pvky2mTKeOVG3rWfPnk02r7lpEueurq7GokWLsHz5cri6umpc8/f3R3p6OlxcXHD69GksWLAAqampesdzdHQUzJeek5MjGFu04WKbz3Uf3K2+2/i8iw+vfZlsM2U8cyLkn6kh8K6W19fXY9GiRRg3bhzCw8MbXXd1dYWLiwsAYPjw4ZDJZCgvL+fbLMrfRAdGw0nipHHOSeKE6MBoXvs2xXjNHV6dmxCCFStWwNfXF7Nnz2ZsU1ZWhoZ8EdnZ2VAoFGjVqhWfZlk95lSUpb5SRHaLhFik/FUQi8SI7BbJScAypa+u8eJeikMblzYQQYQ2Lm0Q91IcFdOMhNfH8suXLyM5ORl+fn6IjIwEACxevBjFxcUAgGnTpuHEiRNITEyERCKBk5MTPv74Y4hEIj7Nsmr4UKiTbydDQRQAAAVRIPl2Mvp59eOklhvbVxdSXyl1ZjMhssY0S0J6J2pqW8IPhjO+l7ZxaYPUSZpaBRfbDBnPnH2F9DPURsi2GQKNULMy9CnKTT2euW2hmBfq3FaGj4uPQef5HM/ctlDMC3VuK0NICjVVt4VNkwWxUMxDg9iUcCUBJdUl8HHxQXRgtEEiVEpeikb/yG6ROFN4BiXVJXB3dAchBLFnY5FwJUHv2FJfKTLvZeLA7wegIAqIRWL0bd0XCVcSEHs2Fj4uPujk1gkXSy+qrk/2m4wPgj4wy3fBxtrzazVsa8q5hQB1bivEFEWZSW1Pvp2MuJfiAMAgJZ5JLT9fcl51/W71XQ3BTUEU2HdrHwBgovtEo+znytrza1Vzac/dXBycPpY3M/TFbxsa283UngsHfj9gcB9zzdEUcwsFeuduZhijcJtbFW+40/OJrjmaYm6hQO/czQx9Creh6rexqnhDRBuf6JqjKeYWCs3nk1IA6Fe4DVW/mdpzYbLfZIP7mGuOpphbKFDnbmboi99mihVvUL+Z4tiZxprSY4rGcZBPkMZ4U3pMaRJB64OgDxDkE6RxLsgnqNmIaQB9526W6FLbuajf2uq5UGPBU/JScLXsqsa5q2VXkZKXIkh7+YDeuSkquKjf1pIZhWZ1oc5NUYOr+m0NseM07p06N0UNruq3NcSO07h36twUNbio39YSO07j3qmgRlGDKVZ8kPcg3Km6ozOOXTtOfVj7Yao4daZjtv6Gxsmroy9m3tSxrRHq3BQVTGr51bKrOlMdMcWpq8dzMx03qO2+8DVrVhl9MfPNyaHVoY/lFBVNEVuuPp45FW2qjjeGOjdFhaEKs6nZX8w5LlXHG0Odm6KiqWLLG/qZc1yqjjeGOjdFRVPElquPZ05Fm6rjjbF4lU9CCNauXYuwsDCMGzcO169f59Mkih4MzRvOJbZc+1h9PHPmKac5zxkgPFJaWkquXbtGCCGkqqqKhIeHk9zcXI02p06dInPmzCEKhYJkZmaSSZMmsY5748YNXuw1Bj5sOfbHMRJ2IIz0/qY3CTsQRtb8skbj+Ngfx5rMtjW/rCEBuwJIr296kYBdAWTNL2tMHpOQpvkZan+P2t+brutC+v0yBV6Xwry8vODl5QVAs8qnegnftLQ0jB8/HiKRCH379kVlZSXu3bun6tfc4LK81FRlba05VRHbMpu+677wtYTJZqfJ3rl1VfksLS2Fj88z0cPHx6dZl/kV0uYNa05VxLY01hyWzixe5dMYbLmEryGbN4wpk2sI+lIVmfqZ+S59zFYOWN91WsKXI2xVPr29vVFS8uyLLikpgbe3t94xbbmEr64yto3aGVkm1xDEl8SMDi4WiU3+zHyX7GErB6zvupOTk2B+v0zB4lU+Q0JCcOTIERBCcPXqVbi5uTXb921AWJs3rDlVEdvSWHNYOuN0587MzMTRo0fx66+/oqysDE5OTujevTtGjBiBiIgIuLm5MfbjUuVz+PDhOH36NMLCwuDs7Iz169eb6aNZJ0xFB7Q3X3Ry64TlPy1HzNkYsyTbV99w0VCUoPJpJXxcfBDkE2SxogJsturbDMJWvEHfdaG88pkKa5XPuXPnwsvLCyNHjkSvXr3g6emJuro65Ofn48KFC8jIyMCsWbMwcuTIprJZUFUYm9oWbQW7AabcZFxs01aNtXGSOPGyXmzM98ZkKx/2Cen3yxRYnbu8vBweHh56B+HSxpwI6ctvalv67O6j8z0469UsjXOmlPBVh0tJXkMx5nszpWSwIQjp98sUWB/LtZ328ePHkMlkquOWLVs2qWM3d8ydbN+UUr1NDd0cYhic1fLvv/8e27dvh6Ojo+qcSCRCWloaL4ZRmBGLdCvYxuDjwq7OC2XzhS5bhWKf0OD8G/H111/jhx9+QHp6uuoPdeymx9wKNps6LyQFuTko3OaE8527Q4cOcHZ25tMWCgcaRDONVEjPdcSZG98j4Ob38FEAw7wG4ExtsVIFvs6e+qidSzv8UflHo7mYSvLymSaJaTxtW5t76iRDYBXUGrhx4wZiY2PRp08fODg4qM5/8EHTL4sISfCwtC0ppz5E3J+HUSsWPTtJCCAS6e5kAurqtCnqNdP3xqbcGzK+KVj6Z2ouOD+Wr1y5EkFBQejTpw/8/f1VfyiWJSFPy7EB3hwb4Dc+W0hx9bYA58dymUyG2NhYPm2hGEGJBdJt8JEmyZB+VB3nBudfjWHDhmHfvn24d+8eHj16pPpDsSw+Fig3zUeaJEP6UXWcG5yd+9ixY/jiiy8wdepUREVFISoqChMnTuTTNgoHon0nwEmhJZtwk1GMgs/4bCHF1dsCnB/L09PT+bRD8JhbFTYX0hFrACjfvUvEaKyWcygU0Mmtkyp+HACes3sONbIa1qICUl8p8Nd5jbmjO402+nvhElcvlO/dGuCslu/duxfjxo1DixYtAAAVFRU4duwYpk+fzquBTDS1mqlPFfat8xWsssr795S9H/hhEVBf8+ycvTMwbhsQ8IplbTMBIdtmCJwfy/fv369ybABwd3fHgQPCz8hhDppD1g6jSIvXdGxAeZwWbxl7KBpwdm6FQgH1m7xcLkd9fT0vRgkNGtOsg4pCw85TmhTO79xDhw7Fu+++i6lTpwJQxpq//PLLvBkmJGhMsw7c2wMVBcznKRaH8517yZIlCAoKQmJiIhITEzF48GAsWbKET9sEA41p1sHIlcp3bHXsnZXnKRaH851bLBZj2rRpmDZtGp/2CBKry9pxbDFw+Rs8T+SASAL0nwWM/Vh3++z9yvfkikLlXXfkSlZBDMCzNsb01YFQVyWsEVbnnjdvHl555RW8/PLLsLe317hWUFCApKQktGvXDpMmTeLNSCEg9ZVaxy/ZscXAr/8BAIgAgMhVx4wOrq14VxQojwHuDm6CM6tjzpK+FA5LYWVlZdi5cydSU1Ph7u4ODw8P1NXVobCwEJ06dcL06dMRGhraVPYCENZShZBsAQCs9lA6tDYiCbCqvPH5f/XS8d7cAXjvmvnt+xum762pMq2wIbifqZGw3rlbt26NpUuXYunSpSgsLFQlSOzcuTPdAipEmBxb33kBKd50VcK8GLTtoH379ujXrx969uzZyLGnTJliVsMoRiKSGHZel7JtAcWbluE1L2bbU1RXV2euoSim0H+WYecFpHjTVQnzYraKIyKGPcSxsbE4deoUPD09cezYsUbXL1y4gLfffhvt2yvvEmFhYVi4cKG5TBIubOq0oeq1VvsUn65IsKtBiZ0EPjI5ou19IFUX07Tbt++JBFkJSiQS+MjliK6ogTTpTWWb7uFAbqrxthrwWdhyjVMMg9dyQlFRUZgxYwaWLVums82AAQPwxRdf8GmGsGBTpw1Vr7Xap8geIM7RA7Vi5Y/2rr0d4hRlwKkPlZtMmNorCGrt/m5vZ4e4Vi6AvA7SioJnSrsxtuq7bt+b8euxmlUJK8Bsj+VMovvAgQPh7u5urilsA7Z4bEPjtbXaJ7RqiVqx5o+1VixCQt5hA9qLkdCqJfN8hthKY88titnu3Js3bzaq39WrVxEREQEvLy8sW7YM3bt3Z+1jzVU+n68oBFMSJFJRiJs5OazX2cYrsWMWzkrEyiUezu11nDfEVn3X+a7yaQrNrspnamoqtm7digcPHoAQAkIIRCIRrly5AgDw8/MzeHJ/f3+kp6fDxcUFp0+fxoIFC5Cayr6eadVVPnXEY4vc2yvHYbvOMp6PTI679o1/rD4KMI6vs71Mx9KZIbbquS7kSpq2ss7N+bF8y5Yt+Pzzz3H58mVcuXIFmZmZKsc2FldXV7i4uAAAhg8fDplMhvJyhkALW4JNnTZUvdZqH/3wEZwUmrmXnBQE0b4TDGivQPRDHSm0DLFVQEq8uamsrMTevXt5n+fkyZO4ffu2UX05O7enpye6du1q1CS6KCsrU72rZ2dnQ6FQoFWrVmadwyJk71dGfsW1VP6dvf/ZtYBXlMkM3DsAECn/Vk9uwHZdG632UjtPxLkFoI2cQEQI2sgJIsVuSPjjIAK+6YXwS6uQ0r6n7vYyBeIqn0JaXaNsM2COpi19/ql8Z45rqfy7/aBna+giifJY/XqffzL2f37f4MbfjRVRWVmJxMREzu0JIVAoDE94Z4pzs4afNjwmX7x4Effv30doaKhG3vLw8HCdfRcvXoyLFy/i4cOH8PT0xDvvvKOqMzZt2jTs2bMHiYmJkEgkcHJyQkxMDAIDA1mNFtJjUyNbTMhOwodtedfjEVeVrSGaOSkUiHMLgHQS919OAMyfjQ31zy6g70YfXH6/3nvvPaSlpaFLly548cUXcevWLVRWVkImkyE6OhqhoaEoLCzEnDlz0KdPH1y/fh1ffvkljhw5gqNHj8LDwwNt2rSBv78/5syZg7/++gurV6/Gw4cP4eTkhDVr1qCiogLz5s2Dq6sr3NzcsH37dnTs2JHz52B9587IyFD929nZGefOndO4rs+5P/5Yz04kADNmzMCMGTPYTLAu9CnEFvgFTqi4qlrmaqBWLEZCxVUYvODE9NnYUP/sAvtuTOH9999Hbm4ukpOTIZPJUFtbC1dXV5SXl2PKlCmqktZ37tzBpk2b0LdvX2RnZyM1NRVHjx5FfX09oqKiVLn/P/zwQ6xevRqdO3dGVlYWVq9ejd27dyMkJAQjRozA6NGjDbaR1bk3bNgAALh8+TL69++vce3y5csGT2jzCChWGwBKJDrUcB3n9WLsZ2joJ7DvxlwQQvDxxx/j0qVLEIvFKC0txf379wEAbdu2Rd++fQEAV65cwciRI+Ho6AhHR0cEBwcDAKqrq5GZmYno6GeReE+fPjXZLs5q+dq1a3H48GHWc80egWUn8ZHLcdeOQQ2X61bDdaLrs3Hpp6+/lWdu+eGHH1BeXo6kpCTY29sjJCREFY793HPPsfYnhKBFixZITk42q12sglpmZia+/vprlJeXY+fOnao/27dvh9yYXxBbR2AKcbR7X2Y13L2v4YMxfTY2bFQ9d3FxQXV1NQCgqqoKnp6esLe3x/nz51FUVMTYJzAwEBkZGairq0N1dTVOnToFQLlq1L59e/z3v/8FoHT2mzdvNprHUFidu76+Hk+ePIFcLkd1dbXqj6urK7Zt22bUpDaNoWo3z0gnJSrVcJnsbzVcZpyYBjB/Nm01XftYx0oAEcB3YwqtWrVCYGAgxo4di5s3b+LatWsYN24ckpOT4evry9gnICAAISEhiIiIwBtvvAE/Pz+4ubkBUC41Hzx4EBEREZBKpTh58iQAYMyYMfjPf/6D8ePH46+//jLIRs55y4uKitCuXTuDBucLQavlxvB3WiQ0pEVy8QIeqyUtcHQH6iqeHXcZDrx2lLtt2ps3tDeDqB87/70UWfPQPJta2GwTIHzaVl1dDRcXF9TU1GD69OlYs2YNbwU1OaVZ0se///1vsxnTLFFLiwRA6eCPtbKRqDs2APx5GtgVwcnBGTdvaG8GUT+uKde8ZsqmFkojVq5cidu3b6Ourg4TJkzgtVIuq3O//vrrAJTr3ffv30dERAQAICUlBZ6enrwZ1my4/I1x/f48za2dMctX6tjoUpal+Oijj5psLlbnHjRoEABg48aNSEpKUp0PCQlBVFQUf5Y1F3SlPzIX5lhmsvGlLFuFc/hpTU0NCgqeLWMUFBSgpsaEOwJFia70R+bCHMtM6ktZfM1BMTuc17ljY2Mxc+ZMdOjQAYQQFBcXY/Xq1Xza1jzoP0vznZcrXYZzazdypeEho+poL2UxhY9a4VJWc4Czcw8bNgypqanIy8sDAPj6+mrEmFOMZOzHwIPbmu/Q2uq4xBmQ12hez/8JiHNnLzoQ8Arw13lNNb7zUKA8z3C1nIciBBT+YHXuX375BYMHD260z7phzU1fbDmFA9n7gcKLmucUT4GorzQVavVXc3XH51J0IOu7Z+/2RK6cz9j1ZTMWIaBw58yZM1i3bh0UCgUmT56MN998k7UPq3NfunQJgwcP1thAog51bhNhU6C5qt2Xv2F2bqpwNylHMouw5cQtFD+qQduWzlgyqgfG9zMtPkQulyM+Ph47d+6Et7c3Jk2ahJCQEHTr1k1vP1bnXrRIuY7ZsIGEYmbYFGiuSrQVFB2wdY5kFiE26TfU1Ct/FkWPahCb9BsAmOTg2dnZ6NSpEzp06AAAkEqlSEtLY3Vuzmp5aGgo3n//fSQmJiI3N9doQylasCnQXJVoKyg6YOtsOXFL5dgN1NTLseXELZPGLS0thY/Ps8IM3t7eKC0tZe3H2bmPHz+OqVOn4tGjR9i8eTNCQ0OxYMEC46ylPMOYVEVMWEHRAVun+BHz65Ou83zD2bnFYjHs7OwgkUggFovh6elJI9TU0ZdaSR8BryhTDxmSqqjLcM32A+boV8sFtJHFlmnbkvk/YV3nueLt7Y2Skmf10kpLS+Ht7c3aj/NSWP/+/eHn54fZs2dj8uTJtpHrzFyYEnPNpGarL4tVFCivm+KQVOFuEpaM6qHxzg0AzvYSLBnVw6Rxe/fujfz8fBQUFMDb2xspKSmcwlg5O/dHH32Ey5cv47vvvsOBAwfQr18/DBw4EIMHDzbJcJvAFEWaixpO1W2roEE0M7dabmdnh5UrV2Lu3LmQy+WYOHEip/z+nJ07NDQUoaGh+OOPP3D27Fns2rULO3bsQHZ2tkmG2wSmKNJcVWuqblsF4/u1M9mZmRg+fDiGD+cYlfg3nJ37nXfewc2bN9GxY0cMGDAAmzZtQp8+fQw20iYxJX0Q19RFVN2mGAhn537zzTfxwgsvQKIjsd65c+cwZMgQsxlmVZgSc80l9puq2xQj4OzcvXszV2VsYOvWrY2cm62ELyEE69atw+nTp+Hk5ISNGzfyunndJLQzkHj4Avk/4Xld8dpcY665xn6nxQNJbzJnUtGRLeV5Gv/drDFbIUCmbE1sJXzPnDmD/Px8pKamIisrC3FxcThw4IC5TDIfTGr434/SIuCZwq1vSUrf2Ppiv7lkUtGRLUXEdJ3SbDBbCV+RqHE9R7YSvmlpaRg/fjxEIhH69u2LyspK3Lt3z1wmmQ9D4rvNMTZbGVxtaNlcCgNmu3Mbg3ZYnY+PD0pLS+Hl5aW3X1OX8NVVilYbQuSMZXaNGZutDK6h7XWVALYUtIQv/5jNuZsyM2qTl/DlqGiLRBLD7TKyDK6h7XWWALYQzTX7qTGwaVe6YHVutnrZDVs+P/30U86TNqAdVldSUsIprK7J4ZrNRFd8t6Fjs2U/0YZmSxEOZkr9rA6bdqULgwoBMmHKfu6QkBDs2bMHUqkUWVlZcHNzY30ktwhMGUj+VssJkUPElg3F0LHZsp/oU8vV2pOKQoioWt508JT6eeDAgSgsNDyIiXNRAmNgK+FLCEF8fDzOnj0LZ2dnrF+/nnXJDRDWY5OQbNGG2mYcRtv2r146gpk6AO9dM8mmwsJCzJs3z7yP5eqcOnUKubm5qiJnALBw4UKd7dlK+IpEIqxatcoQEygU4SKwxBicl8JWrlyJ48ePY8+ePQCAEydOoLi4mDfDKBSrQ2CJMTg7d2ZmJjZv3owWLVpg4cKF+P7775Gfn8+jaRSKlSGwxBicndvJyQkA4OzsjNLSUtjb26OsrIw3wygUq4OnxBiLFy/G1KlT8eeff2LYsGGcozg5v3OPGDEClZWVmDNnDqKioiASiTBp0iSjDaZQbBIeEmOwaVe64Ozcb7zxBhwcHDBq1CgEBwejrq4Ojo6ORk1KoVD4h/Nj+ZQpU1T/dnBwgJubm8Y5CoUiLFjv3GVlZSgtLUVtbS1u3Lih2v31+PFjWgiQQhEwrM79008/ISkpCSUlJRqFCVxdXbF48WJejaNQKMbD6twTJkzAhAkTcOLECYwaNaopbKJQKGaA8zt3YGAgli9fjrlz5wIAbt++LczEChQKBYABzh0bG4uhQ4eqkil07twZu3fv5s0wCoWi5O7du5g5cybGjBkDqVSKXbt2cerH2bkfPnyIMWPGQCxWdrGzs1P9m0KhKEnJS0H4wXAE7ApA+MFwpOSlmDymRCJBTEwMjh8/jn379uG7777D7du3Wftx9s7nnnsODx8+VKVTunr1Ktzc3Iy3mEKxMVLyUhD3cxzuVt8FAcHd6ruI+znOZAf38vJSJQ51dXWFr68vp0KAnINYYmJiMH/+fBQUFGDq1Kl4+PAhEhISjLeYQrExEq4koFZeq3GuVl6LhCsJkPpKzTJHYWEhcnJyONUM4Ozc3bp1Q1hYGJydneHi4oLQ0FB06dLFJEMpFFuipLrEoPOGUl1djUWLFmH58uVwdXVlbc/5sXzp0qXIy8vDW2+9hRkzZiA/Px9LliwxyVgKxZbwcfEx6Lwh1NfXY9GiRRg3bhzn7Eec79y5ubk4fvy46jgoKAhjxowx3Eob4UhmkVrBt7tmKfhGsW6iA6MR93OcxqO5k8QJ0YHRJo1LCMGKFSvg6+uL2bNnc+7H+c79wgsv4OrVq6rjrKws9OrVyzArbYQjmUWITfoNRY9qQAAUPapBbNJvOJJZZGnTKBZE6itF3EtxaOPSBiKI0MalDeJeijP5ffvy5ctITk7G+fPnERkZicjISJw+fZq1H+c79/Xr1zF16lS0bdsWAFBcXIwuXbpg3LhxAIAffvjBSNOtjy0nbmnUYAaAmno5tpy4Re/ezRypr9Rs4lkDAwYMwK1btwzux9m5d+zYYfDgtkrxI+YNM7rOUyiWgLNzN2XRAaHTtqUzihgcuW1LZ4bWFIploCFmRrBkVA8422uWMna2l2DJqB4WsohCaQzvzn3mzBmMGjUKYWFh+PLLLxtdT0pKQlBQkEoosIbNKOP7tcOGqN5o19IZIgDtWjpjYv922HLiFrrEpGDIxnQqrlEsDq+FAOVyOeLj47Fz5054e3tj0qRJCAkJQbdu3TTajRkzBitXWle5m/H92mF8v3bIycnBrdoWiE36TSWyNajnDe0oFEvA6507OzsbnTp1QocOHeDg4ACpVIq0tDQ+p7QI+tRzCsVS8Hrn1i7R6+3tjezs7EbtUlNTcenSJXTp0gWxsbFo06aN3nGbuoSvPmpra/Wq55a001rK5AoNWsLXTAQHB2Ps2LFwcHDA999/j2XLlrHuE2/yEr56yMnJ0aueW9JOm6zH1QQI2TZD4PWxXLtEb2lpaaMSva1atYKDgwMAYPLkybh+/TqfJvECVc8pQoTXO3fv3r2Rn5+PgoICeHt7IyUlBR999JFGm3v37qnK9qanp6Nr1658mmQ2NGPLndG+lRNy71Wrrgd2dNcppk3/6hec+6NcdTykqwf2vjGY81w0jp3CBV6d287ODitXrsTcuXMhl8sxceJEdO/eHQkJCejVqxdGjhyJb7/9Funp6ZBIJHB3d9fIsCpUGmLL1dVxbc79UY4PjvyGteM1SxJrO3ZD2+lf/cLo4ExzUSWewgVe63PzhaXfiYZsTGd0aG0kIhH+2KC5c65zjO6sHPkbG8ck65qrXUtnnIsJ0Tu/pb8nfVDb+IdGqBkB1xhyuRn+36Rx7BRjoc5tBFxjyCV/55vjYy4ax05hgzq3ETCp40xMe7FDo3NDunowttV1nirxFGOx+Dp3U6GtOAc/3xoZN8t0KtD6FOrx/drh1zvlSLxQADkhkIhEcHEQo7LuWZRady+XRmIaAOx9Y7BBajnTXBP7t6NiGoWVZuHcTIrznvN/qa5rK9BsCvWRzCIculykeqeWE6Lh2ACQe6+aUS0HoHfZi8l27bkOXS7CgE4e1MEpemkWj+VMsd/aqMeCs8WKcxkPABIvFBhp8TNo3DrFWJqFc3NVlhvasSnUVC2nWAPNwrm5KssN7dgUaqqWU6yBZuHcXNRtdQWaTaE2RS03FKqWU4zFZiPUtBXp7l4uePJUoVK/ZXI5Squeqq63cJSg+qlCp/qtfSrTW38AAAr1SURBVF0EAhmHb04iEiHItxXyH9So5u7s6YzzeQ9VY/m2fg55ZU9Ux9Ne7KAhxBmq9BvyPVkKahv/2KRazhS/nXuvGkO6euBcTAjjdXVHZlK/ta9zRU6IxlxFj2o0wknlhGhsOJETolLyGxy8IesLQGPNKdyxycdybcfVPq/rupDQpbRT9ZzCFZt0bltA19MBVc8pXKHOLVB0Ke1UPadwxSadmy1+W9d1IaFLaafqOYUrNunce98Y3MiB1eO3ma57uzmo7pYSkQjebg56rztJNO+sdjqWtCUiEYZ09dDIcT6kq4fGWN29XDSOZwR1ZAxbBZhzpm+I6k3FNEojbEYtZ9rooS+Gmy2+W3u8zp7OuP/4oer6pIHK5SqmZRP1vj7uTpg8oKNZnU9dPadQdGETzm3u5SGm8bSXrxqWq6b3sGPtS5eqKJbAJh7Lzb08ZMrGELpURREKNuHc5l4eMmVjCF2qoggFm3Bucy8PmbIxhC5VUYSCxat8Pn36FO+++y7CwsIwefJkFBYWGjyHuZeHTNkYQpeqKEKBV+duqPK5Y8cOpKSk4NixY7h9+7ZGmwMHDqBFixb48ccfMWvWLGzdutXgecy9PMQ0nvbyla7lKrpURREKvKrl6lU+AaiqfKqX8E1PT8fChQsBAKNGjUJ8fDwIIRAZuBfa3MtDpoxHl6ooQoDXOzdTlc/S0tJGbRqqetrZ2cHNzQ0PHz4EhUIxDatc5xZaCV+h2KINtc04bKWEr8WrfHp7e+Pu3bsAAJlMhqqqKrRq1YpPs8yKk5OTpU3QCbXNONRty83NtaAlpmHxKp8hISE4fPgw+vXrhxMnTiAoKIj1fbtv3758mk2h2AS8p1k6ffo01q9fr6ryOX/+fI0qn3V1dViyZAlycnLg7u6Of/3rXyoBjkKhGI9V5lCjUCjs2ESEGoVCaQx1bgrFRqHOTaHYKNS5KRQbxSqDWIRASEgIXFxcIBaLIZFIkJSUZGmTVFRWVuKDDz7A77//DpFIhPXr16Nfv36WNgsAkJeXh/fee091XFBQgEWLFmHWrFmWM0qNb775BgcOHIBIJIKfnx82bNgAR0dHS5tlHIRiFMHBweTBgweWNoORpUuXkv379xNCCKmrqyMVFRUWtogZmUxGXnrpJVJYWGhpUwghhJSUlJDg4GBSU1NDCCFk0aJF5NChQxa2ynjoY7mNUVVVhUuXLmHSpEkAAAcHB7Ro0cLCVjHzyy+/oEOHDmjXTjibbORyOWprayGTyVBbWwsvLy9Lm2Q01LlNYM6cOYiKisK+ffssbYqKwsJCeHh4IDY2FuPHj8eKFSvw5MkTS5vFSEpKCsaOHWtpM1R4e3vj9ddfR3BwMIYOHQpXV1cMHTrU0mYZDXVuI0lMTMThw4fx1VdfYe/evbh06ZKlTQKgjM+/ceMGpk2bhiNHjsDZ2ZkxSYalefr0KdLT0zF69GhLm6KioqICaWlpSEtLw9mzZ1FTU4Pk5GRLm2U01LmNpGEDjKenJ8LCwpCdnW1hi5T4+PjAx8cHffr0AQCMHj0aN27csLBVjTlz5gz8/f3xj3/8w9KmqPj555/Rvn17eHh4wN7eHuHh4cjMzLS0WUZDndsInjx5gsePH6v+fe7cOXTv3t3CVilp3bo1fHx8kJeXB0D5Xtu1a1cLW9WYlJQUSKVSS5uhQdu2bZGVlYWamhoQQgT73XGFxpYbQUFBARYsWABAKcCMHTsW8+fPt7BVz8jJycGKFStQX1+PDh06YMOGDXB3d7e0WSqePHmC4OBgnDx5Em5ubpY2R4Nt27bh+PHjsLOzQ8+ePbFu3To4ODiwdxQg1LkpFBuFPpZTKDYKdW4KxUahzk2h2CjUuSkUG4U6N4Vio1DnplBsFOrcAuTChQt46623dF5PSkpCfHy82edNSkrSKBoREhKC8vJyzv1PnjyJTz/91GQ7MjIykJCQYPI4zR3q3BQVhw8fxr1794zuv2PHDvzzn/802Y4RI0YgIyMDNTW07LEp0GQNRvLkyRO8++67KCkpgUKhwNtvv42OHTti48aNePLkCVq1aoUNGzbAy8sLM2fORI8ePXDp0iXI5XKsX78eAQEByM7Oxrp161BXVwcnJyesX78evr6+BtlRXl6OVatWobi4GACwfPly9O/fH9u3b0dxcTEKCwtRXFyM1157Da+++ioA4LPPPsPRo0fh4eGBNm3awN/fH+3atcO1a9fwf//3f3ByclLtdNuzZw8yMjIgk8nwySef6AzH/PPPP2Fvbw8PDw8AwP3797Fq1SoUFBQAAOLi4uDl5YW5c+eib9++yMzMRK9evTBx4kRs27YN5eXl2Lp1KwICAiASiTBo0CBkZGRgzJgxRv18KKDJGozlf//7H1mxYoXquLKykkyZMkWVwCElJYXExMQQQgiZMWOGqu3FixeJVColhBBSVVVF6uvrCSGEnDt3jixcuJAQQsj58+fJm2++qXPuQ4cOkdWrVxNCCFm8eDG5dOkSIYSQoqIiMnr0aEIIIdu2bSNTpkwhdXV15MGDB2TQoEHk6dOnJCsri0RERJDa2lpSVVVFwsLCyI4dO1R2Zmdnq+YJDg4mu3fvJoQQsmfPHrJ8+XKdNh08eJBs2LBBdRwdHU127txJCFEmZaisrCQFBQWkZ8+e5ObNm0Qul5MJEyaQmJgYolAoyI8//kjmz5+v6p+cnEzi4+N1zkdhh965jcTPzw+bNm3Cli1bEBwcjBYtWuD333/H7NmzAQAKhQKtW7dWtW/YJDFw4EA8fvwYlZWVqK6uxrJly3Dnzh2IRCLU19cbbMfPP/+sURb58ePHqK6uBgAMHz4cDg4O8PDwgIeHBx48eIArV65g5MiRcHR0hKOjI4KDg/WOHx4eDgDo1asXfvzxR53tysrKVHdtADh//jw2b94MAJBIJHBzc0NFRQXat2+PHj2Utcq7deuGwYMHQyQSoUePHigqKlL19/T0NOkVgUIfy42mS5cuSEpKwunTp/HJJ58gKCgI3bt315m4QbtEkkgkQkJCAl588UV89tlnKCwsVD02G4JCocD+/fsZ83ypb3iQSCSQyWQGj29vbw8AEIvFkMvlOts5OTmhqqqKdTx1m8RisepYJBJpjF9XV2e9ucsEAhXUjKS0tBTOzs6IjIzEnDlzkJWVhfLyctX+3/r6eo0icsePHwcA/Prrr3Bzc4ObmxuqqqpU+8IPHz5slB1Dhw7Ft99+qzpmq5wZGBiIjIwM1NXVobq6GqdOnVJdc3FxUd31DcXX1xd37txRHQ8ePBjfffcdAOXOOS6Or05+fj78/PyMsoWihN65jeT333/H5s2bIRaLYWdnh7i4ONjZ2WHt2rWoqqqCXC7Ha6+9ptrn7ejoiPHjx0Mmk2H9+vUAgLlz5yImJgaff/45hg8fbpQdK1asQHx8PMaNGwe5XI4BAwboXSYLCAhASEgIIiIi4OnpCT8/P9W2ywkTJmDVqlUaghpXBg4ciE2bNoEQApFIhBUrVuDDDz/EoUOHIBaLERcXp/GawsaFCxewePFig2ygaGHpl/7mgLZQZWkeP35MCCHkyZMnZMKECeTatWtmGXfNmjXk3LlzJo9TVlZGXn31VTNY1Lyhd+5myMqVK3H79m3U1dVhwoQJ8Pf3N8u48+bNQ1ZWlsnjFBcXIyYmxgwWNW9osgYBc+jQIezevVvjXGBgIFatWmUhi4RpE4UZ6twUio1C1XIKxUahzk2h2CjUuSkUG4U6N4Vio/w/c7frjhEncosAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.FacetGrid(df, hue='target')\n", + "g.map(plt.scatter, 'sepal_length_(cm)', 'petal_width_(cm)');\n", + "g.add_legend();" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dnoGFA4MzW9o" + }, + "source": [ + "Можно все предыдущие графики вывести одной строчкой кода" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 743 + }, + "executionInfo": { + "elapsed": 12239, + "status": "ok", + "timestamp": 1614779829439, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "izSb9tJThvhk", + "outputId": "0a4d8076-27df-4520-bac8-004f756b4670" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 69, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light", + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.pairplot(df,hue='target',diag_kind=\"kde\",kind=\"scatter\",palette=\"husl\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 296 + }, + "executionInfo": { + "elapsed": 714, + "status": "ok", + "timestamp": 1614767558267, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "MOtqb-wJhvfD", + "outputId": "bc730c65-b8f4-4417-d1f5-68fcc327dbcb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 62, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEGCAYAAACHGfl5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAfLklEQVR4nO3df1RUdeL/8eeACvIjVzBm/MFW+KNYLF1X2zxboihKiD83M8oy83SyTCy1zEzFTHTPlq3U6ZRHd5NyaY8bZSb+KFA8W0b6FaQUd+VjrLoGKlioo8Ov+/3DoiYVBp07w4/X4xzPmbk/3vOaAXnN3Dv3XothGAYiItKq+Xg7gIiIeJ/KQEREVAYiIqIyEBERVAYiIgK08XaAq5Gfn4+fn5+3Y4iINCsOh4O+fftedl6zLAM/Pz8iIyO9HUNEpFkpLCy84jzTNxO9/fbbjBw5koSEBGbNmoXD4XCaX1lZyVNPPUVsbCwTJkzg2LFjZkcSEZFfMLUMSktLSUtL4/333+fjjz+mpqaGTZs2OS2zfv16rrvuOj755BMefvhhXn75ZTMjiYjIZZj+yaCmpoYLFy5QXV3NhQsXCAsLc5qfnZ3NuHHjABgxYgS7du1CB0WLiHiWqfsMrFYrjzzyCEOGDMHPz48//OEP3HnnnU7LlJaW0rlz54th2rQhODiY06dPExIScsVxHQ5Hvdu+RESkcUwtg++//56srCyysrIIDg5m5syZbNiwgTFjxlzTuNqBLCLSeF7bgfz555/TrVs3QkJCaNu2LcOHDycvL89pGavVyrfffgtAdXU1Z86coWPHjmbGEhGRXzC1DLp06cK+ffs4f/48hmGwa9cuunfv7rRMTEwMH3zwAQBbt27ljjvuwGKxmBlLRER+wdTNRH369GHEiBGMGzeONm3aEBkZycSJE1m5ciW9e/dm6NCh3HPPPTzzzDPExsbSoUMHXn31VTMjiUgzt2XLFjIzM90+bnl5OUC9+yuvVnx8PHFxcW4f151MP+gsKSmJpKQkp2kzZ86su+3n50dqaqrZMURE6lVWVgaYUwbNQbM8AllEWq+4uDhT3mX/+Ka1tb451YnqREREZSAiIioDERFBZSAiIqgMREQElYGIiKAyEBERVAYiIoLKQEREUBmIiAgqAxERQWUgIiKoDEREBJWBiIigMhAREVQGIiKCykBERDD5SmeHDx/m6aefrrt/9OhRkpKSePjhh+um5ebm8sQTT9CtWzcAYmNjefLJJ82MJSIiv2BqGURERLBhwwYAampqGDRoELGxsZcs179/f9566y0zo4iISD08tplo165dhIeH07VrV089pIiIuMjUTwY/t2nTJhISEi47Lz8/n9GjRxMWFsbcuXPp2bNnvWM5HA4KCwvNiCkirZTdbgdotX9bPFIGlZWVZGdnM3v27EvmRUVFkZ2dTWBgIDk5OUyfPp1t27bVO56fnx+RkZFmxRWRViggIACgRf9tqa/oPLKZaOfOnURFRdGpU6dL5gUFBREYGAhAdHQ01dXVlJeXeyKWiIj8wCNlsGnTJkaOHHnZeSdPnsQwDAAKCgqora2lY8eOnoglIiI/MH0zkd1u5/PPP+fFF1+sm5aeng5AYmIiW7duJT09HV9fX/z9/VmxYgUWi8XsWCIi8jOml0FAQAC5ublO0xITE+tuT5o0iUmTJpkdQ0RE6qEjkEVERGUgIiIqAxERQWUgIiKoDEREBJWBiIigMhAREVQGIiKCykBERFAZiIgIKgMREUFlICIiqAxERASVgYiIoDIQERFUBiIigspARERQGYiICCoDERHB5DI4fPgwY8aMqfvXr18/3n77badlDMPgpZdeIjY2llGjRrF//34zI4mIyGW0MXPwiIgINmzYAEBNTQ2DBg0iNjbWaZmdO3dSXFzMtm3b2LdvH8nJyaxfv97MWCIi8gumlsHP7dq1i/DwcLp27eo0PSsri7Fjx2KxWOjbty8VFRWcOHGCsLAwT0WTVmbLli1kZmaaMnZ5eTkAISEhbh87Pj6euLg4t48rAh4sg02bNpGQkHDJ9NLSUmw2W919m81GaWlpvWXgcDgoLCw0Jae0fMePH8dut5sy9smTJwHw9/d3+9jHjx/X772JfvydaK2vsUfKoLKykuzsbGbPnu2W8fz8/IiMjHTLWNL6REZG8sgjj5gydlJSEgCpqammjC/mCQgIAGjRf1vqKzqPfJto586dREVF0alTp0vmWa1WSkpK6u6XlJRgtVo9EUtERH7gkTLYtGkTI0eOvOy8mJgYPvzwQwzDID8/n+DgYO0vEBHxMNM3E9ntdj7//HNefPHFumnp6ekAJCYmEh0dTU5ODrGxsbRv356UlBSzI4mIyVJTUykqKvJ2jEY5dOgQ8NOmvuaiR48ebslsehkEBASQm5vrNC0xMbHutsViYdGiRWbHEBEPKioqIm9/HvzK20ka4YftJHn/y/Nujsb4zn1DeezbRCLSyvwKagfXejtFi+azw31b+l0qA4fDwfbt29mzZw8nTpzA39+fnj17MnjwYHr27Om2MCIi4h0NlkFqaio7duzg9ttvp0+fPoSGhuJwOCguLuaVV17B4XAwd+5cbrnlFk/kFREREzRYBrfddtsVd05MmTKFsrIyjh8/7vZgIiLiOQ2WweDBg+udHxoaSmhoqLvyiIiIF7i8A/mrr77izTff5Pjx41RXV9dN37hxoynBRETEc1wugzlz5vDss8/Sq1cvfHx0GQQRkZbE5TIICQlh6NChZmYREREvcbkMkpKSmD9/PgMHDqRdu3Z104cPH25KMBER8RyXy+D999/n8OHDVFdXO20mUhmIiDR/jdqBvHXrVjOziIiIl7i8J7hfv37N7sRTIiLiGpc/GeTn5zN27Fi6du3qtM9AXy0VEWn+XC6D1atXm5lDRES8yOXNRCdPnqRDhw507dqVrl270qFDB06dOmVmNhER8RCXyyA5OZnAwMC6+wEBASQnJ5uRSUREPMzlMjAMA4vF8tOKPj5Op6UQEZHmy+UyCA8PJy0tjaqqKqqqqli7di3h4eFmZhMREQ9xuQwWL15MXl4egwYNIjo6moKCApYsWdLgehUVFSQlJREXF8fdd99NXp7zJeVyc3P53e9+x5gxYxgzZgyvv/5645+FiIhcE5e/TRQaGsqrr77a6AdYunQpd911F6mpqVRWVnLhwoVLlunfvz9vvfVWo8cWERH3aLAM3njjDe6//35+9avLX9l6165dXLhwgSFDhlwy78yZM+zevZvly5cD0K5dO6djFESkZSovL4fv3HuNXrmM76C8fblbhmqwDHr16sW0adPw8/PjN7/5DSEhITgcDv773/9y8OBBBg4cyLRp0y677rFjxwgJCWHevHkcPHiQqKgo5s+fT0BAgNNy+fn5jB49mrCwMObOndvgdZUdDgeFhYWNeJoinmG32wFa/e9nZWWltyO0GpWVlW75fWuwDIYNG8awYcMoLi5m7969nDx5kqCgIEaPHs2SJUvw9/e/4rrV1dUcOHCABQsW0KdPH1566SVWrVrFU089VbdMVFQU2dnZBAYGkpOTw/Tp09m2bVu9mfz8/IiMjGzE0xTxjB/f6LT230+bzca3Nd9SO7jW21FaNJ8dPthsNpd/3+orDZf3Gdx4443ceOONV5y/ZMkSFixY4DTNZrNhs9no06cPAHFxcaxatcppmaCgoLrb0dHRLF68mPLyckJCQlyNJiIi18htG/T27t17ybTrr78em83G4cOHgYv7F7p37+60zMmTJzEMA4CCggJqa2vp2LGju2KJiIgLXP5kcLUWLFjAnDlzqKqqIjw8nGXLlpGeng5AYmIiW7duJT09HV9fX/z9/VmxYoXTwW0iImI+08sgMjKSjIwMp2mJiYl1tydNmsSkSZPMjiEiIvVw22aiHzf1iIhI8+O2MnjooYfcNZSIiHiYy5uJvvnmG9asWcPx48edTlCXlpYGwPjx492fTkREPMLlMpg5cyb33Xcf9957Lz4+OqpQRKQlcbkM2rRpw/33329mFhEnqampze6624cOHQIgKSnJy0lc16NHj2aVV8zRYBl89913AAwZMoR169YRGxvrdH6hK52zSORaFRUV8Z+v9/LroBpvR3HZdcbFr0VfKN7t5SSuOXLW19sRpIlosAzGjx+PxWKp+7bQmjVr6uZZLBaysrLMSyet3q+Danih/1lvx2ixXtoT1PBC0io0WAbZ2dnAxZPD+fn5Oc1zOBzmpBIREY9yeZ/BfffdxwcffNDgNBERoPmdwvrHS61c+dybTc93QFf3DNVgGZw8eZLS0lIuXLjAgQMH6jYXnT17lvPnz7snhYi0KD169PB2hEb7ced/z671n0K/Senqvte6wTL417/+RUZGBiUlJSxbtqxuemBgILNmzXJLCBFpWZrjt5N+zJyamurlJN7RYBmMGzeOcePGsXXrVkaMGOGJTCIi4mEu7zM4fvw4f/vb35ymBQUF0bt371Z/IQ8RkebO5TL4+uuv+frrr+uudbx9+3Zuvvlm3nvvPeLi4nj00UdNCykiIuZyuQxKSkrIyMggMDAQgBkzZvDYY4+xbt06xo8frzIQEWnGXP7eV1lZmdORx23btuXUqVP4+/s7TRcRkebH5U8Go0aN4t5772Xo0KHAxYPREhISsNvtl1zKUkREmheXy2D69Oncdddd5OXlAbB48WJuvfVWAF555RVz0omIiEc06rKXUVFRWK1Wamounjjs+PHjdOnSpd51KioqeOGFF/jPf/6DxWIhJSWF3/72t3XzDcNg6dKl5OTk4O/vz/Lly4mKirqKpyIiIlfL5TJ45513eP311+nUqZPT9Qw2btxY73pLly7lrrvuIjU1lcrKSi5cuOA0f+fOnRQXF7Nt2zb27dtHcnIy69evb+TTEBGRa+FyGaSlpbFlyxY6duzo8uBnzpxh9+7dLF++HIB27dpdsrM5KyuLsWPHYrFY6Nu3LxUVFZw4cYKwsDCXH0dERK6Ny98mstlsBAcHN2rwY8eOERISwrx58xg7dizz58/Hbrc7LVNaWorNZnN6nNLS0kY9joiIXBuXPxmEh4fz4IMPMnjwYKd391OmTLniOtXV1Rw4cIAFCxbQp08fXnrpJVatWsVTTz11TaEdDgeFhYXXNIY0fXa73fV3K3LV7Ha7/j9B3RvV1vpauFwGXbp0oUuXLlRVVVFVVeXSOjabDZvNRp8+fQCIi4tj1apVTstYrVZKSkrq7peUlGC1Wusd18/PT6fAaAUCAgK40PBico0CAgL0/4mLrwPQol+L+orO5TJ48sknATh//jzt27d3aZ3rr78em83G4cOHiYiIYNeuXZcckxATE8O7777LyJEj2bdvH8HBwdpfICLiYS6XQV5eXt02/x07dnDw4EHee+89kpOT611vwYIFzJkzh6qqKsLDw1m2bBnp6ekAJCYmEh0dTU5ODrGxsbRv356UlJRrekIiItJ4LpdBSkoKa9as4fHHHwfglltuYc+ePQ2uFxkZSUZGhtO0xMTEutsWi4VFixa5GkNakfLyck6e8dV1ek303zO+XF9e7u0Y0gQ0av9c586dnVf20e49EZGWwOVPBp07d2bv3r1YLBaqqqpIS0vTOYnEVCEhIQRU/B8v9D/r7Sgt1kt7gvAPCfF2DGkCXH5rn5yczLp16ygtLWXQoEEUFhaycOFCM7OJiIiHuPzJICQkRCekExFpoRosgyVLlmCxWK44/4UXXnBrIBER8bwGy6B3796eyNHsbNmyhczMTLePW/7DNztCTNiOGx8fT1xcnNvHFZHmr8EyGDdunEsDLVmyhAULFlxzoNaurKwMMKcMRESupFHXM6jP3r173TVUsxAXF2fKu+ykpCQAUlNT3T62iMiV6EABERFRGYiIiBvLwDAMdw0lIiIe5rYyeOihh9w1lIiIeFiDO5CnTZtW7/w333wTgPHjx7snkYiIeFyDZfDII494IoeIiHhRg2Vw++23eyKHiIh4kcvHGRQXF7NixQqKiopwOBx107OyskwJJiIinuNyGcybN4+kpCRSUlJIS0sjIyOD2tpaM7OJcORs87q4zfeVF8/j1aFd8/h23ZGzvvTydohGMutUMIcOHQJ+OvDTnZrDqWBcLgOHw8HAgQMB6Nq1KzNmzGD8+PHMnDnTtHDSuvXo0cPbERrt6A9/UKw39vRyEtf0onm+zmYIDQ31dgSvcrkM2rVrR21tLTfccAPvvvsuVquVc+fOmZlNWjkz3qGZTacTMZ9Zp4Jp7Vwug+eff57z58/zwgsvsHLlSr744gv+9Kc/NbheTEwMgYGB+Pj44Ovre8n1kHNzc3niiSfo1q0bALGxsTz55JONfBoiInItXC6D2267Dbh4pPH8+fMJCnJ9O+7atWvrPQtn//79eeutt1weT0RE3MvlMvjqq694/vnn6zYNBQUFkZKSousdiIi0AI3aTLRo0SL69+8PwJ49e5g3bx4bN25scN2pU6disViYOHEiEydOvGR+fn4+o0ePJiwsjLlz59KzZ/073xwOB4WFha5Gb1bsdjtAi31+LZ1+ftJcuVwGvr6+dUUAFzfttGnT8Orp6elYrVbKysqYMmUKERERDBgwoG5+VFQU2dnZBAYGkpOTw/Tp09m2bVu9Y/r5+REZGelq9GYlICAAoMU+v5ZOPz9pyup7k+LyieoGDBjAwoULyc3N5csvvyQ5OZnbb7+d/fv3s3///iuuZ7VagYtf24qNjaWgoMBpflBQEIGBgQBER0dTXV1dd+lHERHxDJc/GRw8eBCA119/3Wn6gQMHsFgspKWlXbKO3W6ntraWoKAg7HY7n332GU888YTTMidPnqRTp05YLBYKCgqora2lY8eOV/NcRETkKrlcBu+8806jBy8rK2P69OkA1NTUkJCQwKBBg0hPTwcgMTGRrVu3kp6ejq+vL/7+/qxYsQKLxdLoxxIRkavnchmcOnWKFStWcOLECVavXk1RURF5eXlMmDDhiuuEh4fz0UcfXTI9MTGx7vakSZOYNGlSI2OLiIg7ubzP4LnnnuPOO+/kxIkTANx4442X3TQkIiLNj8tlcPr0aeLj4/HxubhKmzZt6m6LiEjz5vJf84CAAE6fPl23PT8/P5/g4GDTgomIiOe4vM/gueee4/HHH+fIkSPcd999nD59mpUrV5qZTUREPMTlMjhy5AirV6/m22+/ZevWrRQUFFBTU2NmNhER8RCXNxO98cYbBAUF8f3335Obm8v9999PcnKyidFERMRTXC4DX19fAHJycrj33nsZPHgwVVVVpgUTERHPcbkMrFYrCxcuJDMzk+joaCorK3XZSxGRFsLlMvjLX/7CnXfeyZo1a7juuuv47rvvePbZZ83MJiIiHuLyDuT27dszfPjwuvthYWGEhYWZEspdUlNTKSoq8naMRjHzotxm6tGjR7PLLCI/cbkMmqOioiLyvjpAbcCVr7LW1FhqLv5I/t//lXg5iet87DrLrEhz16LLAKA2IIQLv0nwdowWzf/Ax96OICLXSOeTEBERlYGIiKgMREQElYGIiKAyEBERVAYiIoIHvloaExNDYGAgPj4++Pr6kpGR4TTfMAyWLl1KTk4O/v7+LF++nKioKLNjiYjIz3jkOIO1a9cSEnL5A7927txJcXEx27ZtY9++fSQnJ7N+/XpPxBIRkR94/aCzrKwsxo4di8VioW/fvlRUVHDixAm3nOqivLwcH3uZDooymY+9jPLydt6OISLXwCNlMHXqVCwWCxMnTmTixIlO80pLS7HZbHX3bTYbpaWl9ZaBw+GgsLCwwcetrKy8+tDSKJWVlS79TFo6u90OoNdCmh3TyyA9PR2r1UpZWRlTpkwhIiKCAQMGXNOYfn5+REZGNriczWbjf+fQ6ShM5n/gY2w2m0s/k5YuICAAQK+FNEn1vUkx/dtEVqsVgNDQUGJjYykoKLhkfknJTydlKykpqVtHRMRTTp06xYwZMygrK/N2FK8wtQzsdjtnz56tu/3ZZ5/Rs2dPp2ViYmL48MMPMQyD/Px8goODm/ypsUWk5Vm7di0FBQWsXbvW21G8wtTNRGVlZUyfPh2AmpoaEhISGDRoEOnp6QAkJiYSHR1NTk4OsbGxtG/fnpSUFDMjiYhc4tSpU2zevBnDMNi8eTOTJ08mNDTU27E8ytQyCA8P56OPPrpkemJiYt1ti8XCokWLzIwh4mTLli1kZmaaMraZFyeKj48nLi7O7ePKxU8FhmEAUFtby9q1a5k1a5aXU3mWjkAWcaPQ0NBW946yJfjkk0+oqqoCoKqqim3btnk5ked5/TgDEU+Li4vTO2xxEhsbS2ZmJlVVVbRt29bpEr+thT4ZiEirN3nyZCwWCwA+Pj5MnjzZy4k8T2UgIq1ep06duPvuu7FYLNx9992tclNfi99M5GMvb1ano7BUnQfAaNvey0lc52MvB2wNLifSlE2ePJni4uJW+akAWngZ9OjRw9sRGu3Hb6P07N6c/rjamuVrLfJznTp14rXXXvN2DK9p0WVgxtf7zPZj5tTUVC8nEZHWRPsMREREZSAiIioDERFBZSAiIqgMREQElYGIiKAyEBERVAYiIoLKQEREUBmIiAgqAxERQWUgIiJ4qAxqamoYO3Ysjz322CXzMjIyuOOOOxgzZgxjxoxh/fr1nogkIiI/45GzlqalpdG9e3fOnj172fnx8fEsXLjQE1FEROQyTP9kUFJSwo4dO7jnnnvMfigREblKpn8ySElJ4ZlnnuHcuXNXXGbbtm3s3r2bm266iXnz5tG5c+d6x3Q4HBQWFro7apNgt9sBWuzzE5GmydQy2L59OyEhIfTu3Zvc3NzLLjNkyBASEhJo164d7733HnPnziUtLa3ecf38/IiMjDQjstcFBAQAtNjnJyLeU9+bTFM3E+3du5fs7GxiYmKYNWsWX3zxBXPmzHFapmPHjrRr1w6ACRMmsH//fjMjiYjIZZj6yWD27NnMnj0bgNzcXP7617/y8ssvOy1z4sQJwsLCAMjOzqZ79+5mRhIRkcvwyjWQV65cSe/evRk6dCjvvPMO2dnZ+Pr60qFDB5YtW+aNSCIirZrFMAzD2yEaq7CwsMVuU09KSgIgNTXVy0lEpKWp72+njkAWERGVgYiIqAxERASVgYiI4KVvE7UEW7ZsITMz0+3jHjp0CPhpR7I7xcfHExcX5/ZxRaT5Uxk0MaGhod6OICKtkMrgKsXFxeldtoi0GNpnICIiKgMREVEZiIgIKgMREUFlICIiqAxERASVgYiIoDIQERGa6UFnDodDF4wXEWkkh8NxxXnN8uI2IiLiXtpMJCIiKgMREVEZiIgIKgMREUFlICIiqAxERASVQZOyc+dORowYQWxsLKtWrfJ2HGmEefPmMXDgQBISErwdRa7Ct99+y4MPPkh8fDwjR45k7dq13o7kcSqDJqKmpoYXX3yR1atXs2nTJj7++GOKioq8HUtcNH78eFavXu3tGHKVfH19ee6558jMzOQf//gHf//731vd/z+VQRNRUFDADTfcQHh4OO3atWPkyJFkZWV5O5a4aMCAAXTo0MHbMeQqhYWFERUVBUBQUBARERGUlpZ6OZVnqQyaiNLSUmw2W919q9Xa6n4ZRZqCY8eOUVhYSJ8+fbwdxaNUBiIiPzh37hxJSUk8//zzBAUFeTuOR6kMmgir1UpJSUnd/dLSUqxWqxcTibQuVVVVJCUlMWrUKIYPH+7tOB6nMmgibr31VoqLizl69CiVlZVs2rSJmJgYb8cSaRUMw2D+/PlEREQwZcoUb8fxCp21tAnJyckhJSWFmpoa/vjHP/L44497O5K4aNasWXz55ZecPn2a0NBQZsyYwYQJE7wdS1y0Z88eHnjgAXr16oWPz8X3yLNmzSI6OtrLyTxHZSAiItpMJCIiKgMREUFlICIiqAxERASVgYiIoDIQuayKigrWrVtn+uN8+umnre6EaNI0qQxELqOiooL09HSXlzcMg9ra2kY/jspAmgodZyByGU8//TRZWVncdNNN/P73v+ff//43FRUVVFdXM3PmTIYNG8axY8eYOnUqffr0Yf/+/axatYoPP/yQjz76iJCQEDp37kxUVBRTp07lyJEjLF68mNOnT+Pv78+SJUv4/vvvmTZtGkFBQQQHB/Paa6/x61//2ttPXVorQ0QucfToUWPkyJGGYRhGVVWVcebMGcMwDKOsrMwYNmyYUVtbaxw9etS4+eabjby8PMMwDGPfvn3G6NGjjQsXLhhnzpwxYmNjjdWrVxuGYRgPPfSQ8c033xiGYRj5+fnGgw8+aBiGYcydO9fYvHmzh5+dyKXaeLuMRJo6wzBYsWIFu3fvxsfHh9LSUk6dOgVAly5d6Nu3LwB79+5l6NCh+Pn54efnx5AhQ4CLZ8LMy8tj5syZdWNWVlZ6/omI1ENlINKAjRs3Ul5eTkZGBm3btiUmJgaHwwFAQEBAg+sbhsF1113Hhg0bzI4qctW0A1nkMgIDAzl37hwAZ86cITQ0lLZt2/LFF1/wv//977Lr9OvXj+3bt+NwODh37hw7duwALl45q1u3bmzevBm4WA4HDx685HFEvEllIHIZHTt2pF+/fiQkJHDw4EG+/vprRo0axYYNG4iIiLjsOrfddhsxMTGMHj2aRx99lF69ehEcHAzAn//8Z/75z38yevRoRo4cyaeffgpAfHw8a9asYezYsRw5csRjz0/kl/RtIhE3OnfuHIGBgZw/f54HHniAJUuW1F1bV6Qp0z4DETdauHAhRUVFOBwOxo0bpyKQZkOfDERERPsMREREZSAiIqgMREQElYGIiKAyEBER4P8DoFbm8gVbN0UAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=\"target\", y=\"sepal_length_(cm)\", data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 299 + }, + "executionInfo": { + "elapsed": 648, + "status": "ok", + "timestamp": 1614767566285, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "pop0xJy808kv", + "outputId": "0c951aff-d432-4cab-ba5c-fd913d8256c9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 63, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=\"target\", y=\"sepal_width_(cm)\", data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 296 + }, + "executionInfo": { + "elapsed": 1021, + "status": "ok", + "timestamp": 1614767569562, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "zL6zrC0108t4", + "outputId": "41a4f4ed-c685-4b02-adec-77ede4cf6761" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 64, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=\"target\", y=\"petal_length_(cm)\", data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 296 + }, + "executionInfo": { + "elapsed": 672, + "status": "ok", + "timestamp": 1614767574695, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "gt-CS-v80841", + "outputId": "17e5b682-a469-43a9-fe36-3ad50c2b4449" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=\"target\", y=\"petal_width_(cm)\", data=df)" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyPGZA72+5Brg/wHtKFk27jK", + "collapsed_sections": [], + "name": "01_Pandas.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From 29da91805833b7a23cd3305bc318b6f052e915bd Mon Sep 17 00:00:00 2001 From: ooonush Date: Sat, 12 Mar 2022 10:44:29 +0500 Subject: [PATCH 3/5] =?UTF-8?q?pd=20=D0=B4=D0=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Pandas (06.03)/Pandas. Task. Part 1.ipynb | 914 +++++++++++++++++++++- 1 file changed, 913 insertions(+), 1 deletion(-) diff --git a/Pandas (06.03)/Pandas. Task. Part 1.ipynb b/Pandas (06.03)/Pandas. Task. Part 1.ipynb index 5172e85..3760429 100644 --- a/Pandas (06.03)/Pandas. Task. Part 1.ipynb +++ b/Pandas (06.03)/Pandas. Task. Part 1.ipynb @@ -1 +1,913 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"anaconda-cloud":{},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.6"},"colab":{"name":"01_task_pandas.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"UTKVH3sMutTM"},"source":["**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**"]},{"cell_type":"markdown","metadata":{"id":"3lUT-CqYutTO"},"source":["Уникальные значения признаков (больше информации по ссылке выше):\n","- age: continuous.\n","- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n","- fnlwgt: continuous.\n","- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n","- education-num: continuous.\n","- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n","- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n","- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n","- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n","- sex: Female, Male.\n","- capital-gain: continuous.\n","- capital-loss: continuous.\n","- hours-per-week: continuous.\n","- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n","- salary: >50K,<=50K"]},{"cell_type":"code","metadata":{"id":"6GzulHvOutTR"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"SJ3LbaoiutTT","colab":{"base_uri":"https://localhost:8080/","height":380},"executionInfo":{"status":"ok","timestamp":1626441443051,"user_tz":-300,"elapsed":499,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"eab110b9-0f5f-4bcd-db91-328a0b391379"},"source":["data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n","data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n","
"],"text/plain":[" age workclass fnlwgt ... hours-per-week native-country salary\n","0 39 State-gov 77516 ... 40 United-States <=50K\n","1 50 Self-emp-not-inc 83311 ... 13 United-States <=50K\n","2 38 Private 215646 ... 40 United-States <=50K\n","3 53 Private 234721 ... 40 United-States <=50K\n","4 28 Private 338409 ... 40 Cuba <=50K\n","\n","[5 rows x 15 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"EpQFv8t1ds05"},"source":["# def married(row):\n","# return \"Married\" in row\n","data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":756},"id":"3Bb2mRTEeoJK","executionInfo":{"status":"ok","timestamp":1626441731759,"user_tz":-300,"elapsed":481,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"9dd7d83b-f51a-4e11-f6dc-035a844f81c9"},"source":["data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n","

32561 rows × 16 columns

\n","
"],"text/plain":[" age workclass fnlwgt ... native-country salary married\n","0 39 State-gov 77516 ... United-States <=50K False\n","1 50 Self-emp-not-inc 83311 ... United-States <=50K True\n","2 38 Private 215646 ... United-States <=50K False\n","3 53 Private 234721 ... United-States <=50K True\n","4 28 Private 338409 ... Cuba <=50K True\n","... ... ... ... ... ... ... ...\n","32556 27 Private 257302 ... United-States <=50K True\n","32557 40 Private 154374 ... United-States >50K True\n","32558 58 Private 151910 ... United-States <=50K False\n","32559 22 Private 201490 ... United-States <=50K False\n","32560 52 Self-emp-inc 287927 ... United-States >50K True\n","\n","[32561 rows x 16 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"MoK8B5fIutTW"},"source":["**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"hdzky90TutTY"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"adF8lgVbutTZ"},"source":["**2. Каков средний возраст (признак *age*) женщин?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"K6C2qZ_zutTb"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"-Cz1S7-HutTd"},"source":["**3. Какова доля граждан Германии (признак *native-country*)?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"Y4mmqN6outTf"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Do-rEgaautTg"},"source":["**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год? **"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eSuk0CAnutTh"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rK9SwvI_utTj"},"source":["**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"eygYabkdutTj"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4DqPASEsutTk"},"source":["**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"fYkBDZMdutTl"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"cn-jYXhzutTl"},"source":["**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"4hIQXgGAutTm"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Rsh8YvoXutTm"},"source":["**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"RK1JQSIZutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"kUXV84AjutTn"},"source":["**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"3gzYG3CDutTn"},"source":["# Ваш код здесь"],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "UTKVH3sMutTM" + }, + "source": [ + "**В задании предлагается с помощью Pandas ответить на несколько вопросов по данным репозитория UCI [Adult](https://archive.ics.uci.edu/ml/datasets/Adult)**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3lUT-CqYutTO" + }, + "source": [ + "Уникальные значения признаков (больше информации по ссылке выше):\n", + "- age: continuous.\n", + "- workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.\n", + "- fnlwgt: continuous.\n", + "- education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.\n", + "- education-num: continuous.\n", + "- marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.\n", + "- occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.\n", + "- relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.\n", + "- race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.\n", + "- sex: Female, Male.\n", + "- capital-gain: continuous.\n", + "- capital-loss: continuous.\n", + "- hours-per-week: continuous.\n", + "- native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. \n", + "- salary: >50K,<=50K" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "6GzulHvOutTR" + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 380 + }, + "executionInfo": { + "elapsed": 499, + "status": "ok", + "timestamp": 1626441443051, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "SJ3LbaoiutTT", + "outputId": "eab110b9-0f5f-4bcd-db91-328a0b391379" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalary
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education-num \\\n", + "0 39 State-gov 77516 Bachelors 13 \n", + "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", + "2 38 Private 215646 HS-grad 9 \n", + "3 53 Private 234721 11th 7 \n", + "4 28 Private 338409 Bachelors 13 \n", + "\n", + " marital-status occupation relationship race sex \\\n", + "0 Never-married Adm-clerical Not-in-family White Male \n", + "1 Married-civ-spouse Exec-managerial Husband White Male \n", + "2 Divorced Handlers-cleaners Not-in-family White Male \n", + "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", + "4 Married-civ-spouse Prof-specialty Wife Black Female \n", + "\n", + " capital-gain capital-loss hours-per-week native-country salary \n", + "0 2174 0 40 United-States <=50K \n", + "1 0 0 13 United-States <=50K \n", + "2 0 0 40 United-States <=50K \n", + "3 0 0 40 United-States <=50K \n", + "4 0 0 40 Cuba <=50K " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(\"https://raw.githubusercontent.com/aksenov7/Kaggle_competition_group/master/adult.data.csv\")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "EpQFv8t1ds05" + }, + "outputs": [], + "source": [ + "# def married(row):\n", + "# return \"Married\" in row\n", + "data[\"married\"] = data[\"marital-status\"].apply(lambda row: \"Married\" in row)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 756 + }, + "executionInfo": { + "elapsed": 481, + "status": "ok", + "timestamp": 1626441731759, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "3Bb2mRTEeoJK", + "outputId": "9dd7d83b-f51a-4e11-f6dc-035a844f81c9" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countrysalarymarried
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50KFalse
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50KTrue
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50KFalse
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50KTrue
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50KTrue
...................................................
3255627Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States<=50KTrue
3255740Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States>50KTrue
3255858Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States<=50KFalse
3255922Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States<=50KFalse
3256052Self-emp-inc287927HS-grad9Married-civ-spouseExec-managerialWifeWhiteFemale15024040United-States>50KTrue
\n", + "

32561 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education-num \\\n", + "0 39 State-gov 77516 Bachelors 13 \n", + "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", + "2 38 Private 215646 HS-grad 9 \n", + "3 53 Private 234721 11th 7 \n", + "4 28 Private 338409 Bachelors 13 \n", + "... ... ... ... ... ... \n", + "32556 27 Private 257302 Assoc-acdm 12 \n", + "32557 40 Private 154374 HS-grad 9 \n", + "32558 58 Private 151910 HS-grad 9 \n", + "32559 22 Private 201490 HS-grad 9 \n", + "32560 52 Self-emp-inc 287927 HS-grad 9 \n", + "\n", + " marital-status occupation relationship race sex \\\n", + "0 Never-married Adm-clerical Not-in-family White Male \n", + "1 Married-civ-spouse Exec-managerial Husband White Male \n", + "2 Divorced Handlers-cleaners Not-in-family White Male \n", + "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", + "4 Married-civ-spouse Prof-specialty Wife Black Female \n", + "... ... ... ... ... ... \n", + "32556 Married-civ-spouse Tech-support Wife White Female \n", + "32557 Married-civ-spouse Machine-op-inspct Husband White Male \n", + "32558 Widowed Adm-clerical Unmarried White Female \n", + "32559 Never-married Adm-clerical Own-child White Male \n", + "32560 Married-civ-spouse Exec-managerial Wife White Female \n", + "\n", + " capital-gain capital-loss hours-per-week native-country salary \\\n", + "0 2174 0 40 United-States <=50K \n", + "1 0 0 13 United-States <=50K \n", + "2 0 0 40 United-States <=50K \n", + "3 0 0 40 United-States <=50K \n", + "4 0 0 40 Cuba <=50K \n", + "... ... ... ... ... ... \n", + "32556 0 0 38 United-States <=50K \n", + "32557 0 0 40 United-States >50K \n", + "32558 0 0 40 United-States <=50K \n", + "32559 0 0 20 United-States <=50K \n", + "32560 15024 0 40 United-States >50K \n", + "\n", + " married \n", + "0 False \n", + "1 True \n", + "2 False \n", + "3 True \n", + "4 True \n", + "... ... \n", + "32556 True \n", + "32557 True \n", + "32558 False \n", + "32559 False \n", + "32560 True \n", + "\n", + "[32561 rows x 16 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MoK8B5fIutTW" + }, + "source": [ + "**1. Сколько мужчин и женщин (признак *sex*) представлено в этом наборе данных?**" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "hdzky90TutTY" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21790 10771\n" + ] + } + ], + "source": [ + "women = data[data['sex'] == 'Female']\n", + "men = data[data['sex'] == 'Male']\n", + "print(len(men), len(women))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "adF8lgVbutTZ" + }, + "source": [ + "**2. Каков средний возраст (признак *age*) женщин?**" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "K6C2qZ_zutTb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "36.85823043357163\n" + ] + } + ], + "source": [ + "print(women['age'].mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-Cz1S7-HutTd" + }, + "source": [ + "**3. Какова доля граждан Германии (признак *native-country*)?**" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "Y4mmqN6outTf" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.42074874850281013\n" + ] + } + ], + "source": [ + "df=data.groupby(\"native-country\").size()/len(data)*100\n", + "print(df[\"Germany\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Do-rEgaautTg" + }, + "source": [ + "**4-5. Каковы средние значения и среднеквадратичные отклонения возраста тех, кто получает более 50K в год (признак *salary*) и тех, кто получает менее 50K в год? **" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "eSuk0CAnutTh" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "средние <= 50 36.78373786407767\n", + "средние > 50 44.24984058155847\n", + "СКО <= 50df 14.020088490824866\n", + "СКО > 50df 10.519027719851843\n" + ] + } + ], + "source": [ + "df=data.groupby(\"salary\")[\"age\"].mean()\n", + "print('средние <= 50', df[\"<=50K\"])\n", + "print('средние > 50', df[\">50K\"])\n", + "\n", + "df=data.groupby(\"salary\")[\"age\"].std()\n", + "print(\"СКО <= 50df\", df[\"<=50K\"])\n", + "print(\"СКО > 50df\", df[\">50K\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rK9SwvI_utTj" + }, + "source": [ + "**6. Правда ли, что люди, которые получают больше 50k, имеют как минимум высшее образование? (признак *education – Bachelors, Prof-school, Assoc-acdm, Assoc-voc, Masters* или *Doctorate*)**" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "eygYabkdutTj" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + } + ], + "source": [ + "df=data[data[\"salary\"]==\">50K\"]\n", + "count_people=len(df[df[\"education\"]==\"Bachelors\"])+len(df[df[\"education\"]==\"Prof-school\"])+len(df[df[\"education\"]==\"Assoc-acdm\"])+len(df[df[\"education\"]==\"Assoc-voc\"])+len(df[df[\"education\"]==\"Masters\"])+len(df[df[\"education\"]==\"Doctorate\"])\n", + "print(len(df)==count_people)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4DqPASEsutTk" + }, + "source": [ + "**7. Выведите статистику возраста для каждой расы (признак *race*) и каждого пола. Используйте *groupby* и *describe*. Найдите таким образом максимальный возраст мужчин расы *Amer-Indian-Eskimo*.**" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "fYkBDZMdutTl" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "82.0\n" + ] + } + ], + "source": [ + "df=data.groupby([\"race\",\"sex\"]).describe().loc['Amer-Indian-Eskimo'].loc['Male']\n", + "print(df[\"age\"][\"max\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cn-jYXhzutTl" + }, + "source": [ + "**8. Среди кого больше доля зарабатывающих много (>50K): среди женатых или холостых мужчин (признак *marital-status*)? Женатыми считаем тех, у кого *marital-status* начинается с *Married* (Married-civ-spouse, Married-spouse-absent или Married-AF-spouse), остальных считаем холостыми.**" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "4hIQXgGAutTm" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "df=data[data[\"salary\"]==\">50K\"]\n", + "df=df[df[\"sex\"]==\"Male\"]\n", + "count_men=len(df[df[\"marital-status\"]==\"Married-civ-spouse\"])+len(df[df[\"marital-status\"]==\"Married-spouse-absent\"])+len(df[df[\"marital-status\"]==\"Married-AF-spouse\"])\n", + "print(count_men>len(df)-count_men)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rsh8YvoXutTm" + }, + "source": [ + "**9. Какое максимальное число часов человек работает в неделю (признак *hours-per-week*)? Сколько людей работают такое количество часов и каков среди них процент зарабатывающих много?**" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "RK1JQSIZutTn" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "99.0\n", + "85\n", + "0.29411764705882354\n" + ] + } + ], + "source": [ + "print(data['hours-per-week'].describe()['max'])\n", + "h = data[data['hours-per-week'] == data['hours-per-week'].describe()['max']]\n", + "print(len(h))\n", + "print(h['salary'].value_counts(normalize=True).loc['>50K'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kUXV84AjutTn" + }, + "source": [ + "**10. Посчитайте среднее время работы (*hours-per-week*) зарабатывающих мало и много (*salary*) для каждой страны (*native-country*).**" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "3gzYG3CDutTn" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "native-country salary\n", + "? <=50K 40.164760\n", + " >50K 45.547945\n", + "Cambodia <=50K 41.416667\n", + " >50K 40.000000\n", + "Canada <=50K 37.914634\n", + " ... \n", + "United-States >50K 45.505369\n", + "Vietnam <=50K 37.193548\n", + " >50K 39.200000\n", + "Yugoslavia <=50K 41.600000\n", + " >50K 49.500000\n", + "Name: hours-per-week, Length: 82, dtype: float64\n" + ] + } + ], + "source": [ + "print(data.groupby(['native-country', 'salary'])['hours-per-week'].mean())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "collapsed_sections": [], + "name": "01_task_pandas.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From 0a3c87d1e685c89f90d466bdc0305d1eedb42547 Mon Sep 17 00:00:00 2001 From: ooonush Date: Sat, 19 Mar 2022 14:49:58 +0500 Subject: [PATCH 4/5] Pandas 2 --- Numpy (26.02)/Numpy_Lecture.ipynb | 34 +- Pandas (06.03)/Pandas. Lecture. Part 2.ipynb | 7354 +++++++++++++++++- Pandas (06.03)/Pandas. Task. Part 2.ipynb | 672 +- 3 files changed, 8039 insertions(+), 21 deletions(-) diff --git a/Numpy (26.02)/Numpy_Lecture.ipynb b/Numpy (26.02)/Numpy_Lecture.ipynb index b7b1491..6665dd0 100644 --- a/Numpy (26.02)/Numpy_Lecture.ipynb +++ b/Numpy (26.02)/Numpy_Lecture.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "id": "m8T33eQfSuf6" }, @@ -1237,7 +1237,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1268,13 +1268,13 @@ }, { "ename": "ValueError", - "evalue": "ignored", + "evalue": "operands could not be broadcast together with shapes (2,3) (3,2) ", "output_type": "error", "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0md\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mc\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (2,3) (3,2) " + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0md\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m4\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m6\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0md\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m \u001b[0mc\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0md\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mValueError\u001b[0m: operands could not be broadcast together with shapes (2,3) (3,2) " ] } ], @@ -1330,7 +1330,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1357,10 +1357,8 @@ " [24, 25, 26]])" ] }, - "execution_count": 36, - "metadata": { - "tags": [] - }, + "execution_count": 5, + "metadata": {}, "output_type": "execute_result" } ], @@ -1733,7 +1731,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "id": "q7IVVJ4X9q__" }, @@ -1748,7 +1746,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1774,10 +1772,8 @@ "23" ] }, - "execution_count": 115, - "metadata": { - "tags": [] - }, + "execution_count": 7, + "metadata": {}, "output_type": "execute_result" } ], @@ -3504,7 +3500,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/Pandas (06.03)/Pandas. Lecture. Part 2.ipynb b/Pandas (06.03)/Pandas. Lecture. Part 2.ipynb index fb80887..e4b09b8 100644 --- a/Pandas (06.03)/Pandas. Lecture. Part 2.ipynb +++ b/Pandas (06.03)/Pandas. Lecture. Part 2.ipynb @@ -1 +1,7353 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"anaconda-cloud":{},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.1"},"name":"seminar02_part2_pandas.ipynb","colab":{"name":"02_Pandas.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"FzQ_ch0ktU7n"},"source":["#
Первичный анализ данных с Pandas
"]},{"cell_type":"code","metadata":{"collapsed":true,"scrolled":true,"id":"Parpx34utU7s","executionInfo":{"status":"ok","timestamp":1633609636856,"user_tz":-300,"elapsed":631,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["import numpy as np\n","import pandas as pd"],"execution_count":5,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"QxIKAzfCtU7u"},"source":["Данные, с которыми работают дата саентисты и аналитики, обычно хранятся в виде табличек — например, в форматах `.csv`, `.tsv` или `.xlsx`. Для того, чтобы считать нужные данные из такого файла, отлично подходит библиотека Pandas.\n","\n","Основными структурами данных в Pandas являются классы `Series` и `DataFrame`. Первый из них представляет собой одномерный индексированный массив данных некоторого фиксированного типа. Второй - это двухмерная структура данных, представляющая собой таблицу, каждый столбец которой содержит данные одного типа. Можно представлять её как словарь объектов типа `Series`. Структура `DataFrame` отлично подходит для представления реальных данных: строки соответствуют признаковым описаниям отдельных объектов, а столбцы соответствуют признакам."]},{"cell_type":"markdown","metadata":{"id":"l_Ell72CtU7w"},"source":["---------\n","\n","## Демонстрация основных методов Pandas \n"]},{"cell_type":"markdown","metadata":{"id":"YMu_ER8WtU7y"},"source":["### Чтение из файла и первичный анализ"]},{"cell_type":"markdown","metadata":{"id":"efGYx1kqtU7z"},"source":["Прочитаем данные и посмотрим на первые 5 строк с помощью метода `head`:"]},{"cell_type":"code","metadata":{"collapsed":true,"scrolled":true,"id":"ByXZK9MFtU71","executionInfo":{"status":"ok","timestamp":1633609637892,"user_tz":-300,"elapsed":597,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["df = pd.read_csv(\"https://raw.githubusercontent.com/Yorko/mlcourse.ai/master/data/telecom_churn.csv\")"],"execution_count":6,"outputs":[]},{"cell_type":"code","metadata":{"scrolled":true,"id":"hFaFpz2utU73","colab":{"base_uri":"https://localhost:8080/","height":241},"executionInfo":{"status":"ok","timestamp":1633609637895,"user_tz":-300,"elapsed":77,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"cbd457e9-c2bd-4beb-a1fa-c7ba8a4c5b97"},"source":["df.head()"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
0KS128415NoYes25265.111045.07197.49916.78244.79111.0110.032.701False
1OH107415NoYes26161.612327.47195.510316.62254.410311.4513.733.701False
2NJ137415NoNo0243.411441.38121.211010.30162.61047.3212.253.290False
3OH84408YesNo0299.47150.9061.9885.26196.9898.866.671.782False
4OK75415YesNo0166.711328.34148.312212.61186.91218.4110.132.733False
\n","
"],"text/plain":[" State Account length ... Customer service calls Churn\n","0 KS 128 ... 1 False\n","1 OH 107 ... 1 False\n","2 NJ 137 ... 0 False\n","3 OH 84 ... 2 False\n","4 OK 75 ... 3 False\n","\n","[5 rows x 20 columns]"]},"metadata":{},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"CpV496POtU75"},"source":["В Jupyter-ноутбуках датафреймы `Pandas` выводятся в виде вот таких красивых табличек, и `print(df.head())` выглядит хуже.\n","\n","Кстати, по умолчанию `Pandas` выводит всего 20 столбцов и 60 строк, поэтому если ваш датафрейм больше, воспользуйтесь функцией `set_option`:"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"CYFyCCGGtU77","executionInfo":{"status":"ok","timestamp":1633609637897,"user_tz":-300,"elapsed":68,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["# задание проанализировать все опции и выбрать 3-5 самых полезных по личному мнению \n","# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.set_option.html\n","pd.set_option(\"display.max_columns\", 100)\n","pd.set_option(\"display.max_rows\", 100)"],"execution_count":8,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"CbfNn4a9tU78"},"source":["А также укажем значение параметра `presicion` равным 2, чтобы отображать два знака после запятой (а не 6, как установлено по умолчанию."]},{"cell_type":"code","metadata":{"collapsed":true,"id":"-0MCBxGItU78","executionInfo":{"status":"ok","timestamp":1633609637899,"user_tz":-300,"elapsed":67,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["pd.set_option(\"precision\", 2)"],"execution_count":9,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Cu652IOYtU79"},"source":["**Посмотрим на размер данных, названия признаков и их типы**"]},{"cell_type":"code","metadata":{"id":"LQw6THQytU79","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609637901,"user_tz":-300,"elapsed":66,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"b2d6d2f1-a6d1-47c6-e4bb-5c5f33834c4a"},"source":["print(df.shape)"],"execution_count":10,"outputs":[{"output_type":"stream","name":"stdout","text":["(3333, 20)\n"]}]},{"cell_type":"markdown","metadata":{"id":"LJEPKleBtU7-"},"source":["Видим, что в таблице 3333 строки и 20 столбцов. Выведем названия столбцов:"]},{"cell_type":"code","metadata":{"id":"CQArdzC8tU7_","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609637903,"user_tz":-300,"elapsed":57,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"08e4c81f-5a94-4589-c4d3-c6792128de13"},"source":["print(df.columns)"],"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":["Index(['State', 'Account length', 'Area code', 'International plan',\n"," 'Voice mail plan', 'Number vmail messages', 'Total day minutes',\n"," 'Total day calls', 'Total day charge', 'Total eve minutes',\n"," 'Total eve calls', 'Total eve charge', 'Total night minutes',\n"," 'Total night calls', 'Total night charge', 'Total intl minutes',\n"," 'Total intl calls', 'Total intl charge', 'Customer service calls',\n"," 'Churn'],\n"," dtype='object')\n"]}]},{"cell_type":"markdown","metadata":{"id":"RoZn1MpBtU8A"},"source":["Чтобы посмотреть общую информацию по датафрейму и всем признакам, воспользуемся методом **`info`**:"]},{"cell_type":"code","metadata":{"scrolled":false,"id":"W_ZF3eM8tU8B","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609637906,"user_tz":-300,"elapsed":54,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"b4d58f04-d867-458f-bb5e-7b91fbdc9cd9"},"source":["print(df.info())"],"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","RangeIndex: 3333 entries, 0 to 3332\n","Data columns (total 20 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 State 3333 non-null object \n"," 1 Account length 3333 non-null int64 \n"," 2 Area code 3333 non-null int64 \n"," 3 International plan 3333 non-null object \n"," 4 Voice mail plan 3333 non-null object \n"," 5 Number vmail messages 3333 non-null int64 \n"," 6 Total day minutes 3333 non-null float64\n"," 7 Total day calls 3333 non-null int64 \n"," 8 Total day charge 3333 non-null float64\n"," 9 Total eve minutes 3333 non-null float64\n"," 10 Total eve calls 3333 non-null int64 \n"," 11 Total eve charge 3333 non-null float64\n"," 12 Total night minutes 3333 non-null float64\n"," 13 Total night calls 3333 non-null int64 \n"," 14 Total night charge 3333 non-null float64\n"," 15 Total intl minutes 3333 non-null float64\n"," 16 Total intl calls 3333 non-null int64 \n"," 17 Total intl charge 3333 non-null float64\n"," 18 Customer service calls 3333 non-null int64 \n"," 19 Churn 3333 non-null bool \n","dtypes: bool(1), float64(8), int64(8), object(3)\n","memory usage: 498.1+ KB\n","None\n"]}]},{"cell_type":"markdown","metadata":{"id":"FYDNyB6CtU8C"},"source":["`bool`, `int64`, `float64` и `object` — это типы признаков. Видим, что 1 признак — логический (`bool`), 3 признака имеют тип `object` и 16 признаков — числовые.\n","\n","**Изменить тип колонки** можно с помощью метода `astype`. Применим этот метод к признаку `Churn` и переведём его в `int64`:"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"byRJQVM5tU8D","executionInfo":{"status":"ok","timestamp":1633609637909,"user_tz":-300,"elapsed":48,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["df[\"Churn\"] = df[\"Churn\"].astype(\"int64\")"],"execution_count":13,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"sBTm0lLYtU8D"},"source":["Метод **`describe`** показывает основные статистические характеристики данных по каждому числовому признаку (типы `int64` и `float64`): число непропущенных значений, среднее, стандартное отклонение, диапазон, медиану, 0.25 и 0.75 квартили."]},{"cell_type":"code","metadata":{"id":"bAsmrRI6tU8D","colab":{"base_uri":"https://localhost:8080/","height":335},"executionInfo":{"status":"ok","timestamp":1633609637911,"user_tz":-300,"elapsed":48,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"32a7192a-b49b-4be7-9b6e-9b7f08f57731"},"source":["df.describe()"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Account lengthArea codeNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
count3333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.00
mean101.06437.188.10179.78100.4430.56200.98100.1117.08200.87100.119.0410.244.482.761.560.14
std39.8242.3713.6954.4720.079.2650.7119.924.3150.5719.572.282.792.460.751.320.35
min1.00408.000.000.000.000.000.000.000.0023.2033.001.040.000.000.000.000.00
25%74.00408.000.00143.7087.0024.43166.6087.0014.16167.0087.007.528.503.002.301.000.00
50%101.00415.000.00179.40101.0030.50201.40100.0017.12201.20100.009.0510.304.002.781.000.00
75%127.00510.0020.00216.40114.0036.79235.30114.0020.00235.30113.0010.5912.106.003.272.000.00
max243.00510.0051.00350.80165.0059.64363.70170.0030.91395.00175.0017.7720.0020.005.409.001.00
\n","
"],"text/plain":[" Account length Area code Number vmail messages Total day minutes \\\n","count 3333.00 3333.00 3333.00 3333.00 \n","mean 101.06 437.18 8.10 179.78 \n","std 39.82 42.37 13.69 54.47 \n","min 1.00 408.00 0.00 0.00 \n","25% 74.00 408.00 0.00 143.70 \n","50% 101.00 415.00 0.00 179.40 \n","75% 127.00 510.00 20.00 216.40 \n","max 243.00 510.00 51.00 350.80 \n","\n"," Total day calls Total day charge Total eve minutes Total eve calls \\\n","count 3333.00 3333.00 3333.00 3333.00 \n","mean 100.44 30.56 200.98 100.11 \n","std 20.07 9.26 50.71 19.92 \n","min 0.00 0.00 0.00 0.00 \n","25% 87.00 24.43 166.60 87.00 \n","50% 101.00 30.50 201.40 100.00 \n","75% 114.00 36.79 235.30 114.00 \n","max 165.00 59.64 363.70 170.00 \n","\n"," Total eve charge Total night minutes Total night calls \\\n","count 3333.00 3333.00 3333.00 \n","mean 17.08 200.87 100.11 \n","std 4.31 50.57 19.57 \n","min 0.00 23.20 33.00 \n","25% 14.16 167.00 87.00 \n","50% 17.12 201.20 100.00 \n","75% 20.00 235.30 113.00 \n","max 30.91 395.00 175.00 \n","\n"," Total night charge Total intl minutes Total intl calls \\\n","count 3333.00 3333.00 3333.00 \n","mean 9.04 10.24 4.48 \n","std 2.28 2.79 2.46 \n","min 1.04 0.00 0.00 \n","25% 7.52 8.50 3.00 \n","50% 9.05 10.30 4.00 \n","75% 10.59 12.10 6.00 \n","max 17.77 20.00 20.00 \n","\n"," Total intl charge Customer service calls Churn \n","count 3333.00 3333.00 3333.00 \n","mean 2.76 1.56 0.14 \n","std 0.75 1.32 0.35 \n","min 0.00 0.00 0.00 \n","25% 2.30 1.00 0.00 \n","50% 2.78 1.00 0.00 \n","75% 3.27 2.00 0.00 \n","max 5.40 9.00 1.00 "]},"metadata":{},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"l6MzhnkotU8D"},"source":["Чтобы посмотреть статистику по нечисловым признакам, нужно явно указать интересующие нас типы в параметре `include`. Можно также задать `include`='all', чтоб вывести статистику по всем имеющимся признакам."]},{"cell_type":"code","metadata":{"scrolled":true,"id":"ewJscFGZtU8F","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609638506,"user_tz":-300,"elapsed":639,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"187fb398-e4bf-4c36-f3ff-e395013e994f"},"source":["df.describe(include=[\"object\", \"bool\"])"],"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateInternational planVoice mail plan
count333333333333
unique5122
topWVNoNo
freq10630102411
\n","
"],"text/plain":[" State International plan Voice mail plan\n","count 3333 3333 3333\n","unique 51 2 2\n","top WV No No\n","freq 106 3010 2411"]},"metadata":{},"execution_count":15}]},{"cell_type":"markdown","metadata":{"id":"1qbs0vug9TCh"},"source":["Тот же принцип работает при выборе столбцов указанного типа."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":424},"id":"tbL3f9OD9Tg7","executionInfo":{"status":"ok","timestamp":1633609638538,"user_tz":-300,"elapsed":120,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"1489c427-200c-45fa-f127-369a97e46ea8"},"source":["df.select_dtypes(include=['object', 'bool']) # exclude"],"execution_count":16,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateInternational planVoice mail plan
0KSNoYes
1OHNoYes
2NJNoNo
3OHYesNo
4OKYesNo
............
3328AZNoYes
3329WVNoNo
3330RINoNo
3331CTYesNo
3332TNNoYes
\n","

3333 rows × 3 columns

\n","
"],"text/plain":[" State International plan Voice mail plan\n","0 KS No Yes\n","1 OH No Yes\n","2 NJ No No\n","3 OH Yes No\n","4 OK Yes No\n","... ... ... ...\n","3328 AZ No Yes\n","3329 WV No No\n","3330 RI No No\n","3331 CT Yes No\n","3332 TN No Yes\n","\n","[3333 rows x 3 columns]"]},"metadata":{},"execution_count":16}]},{"cell_type":"markdown","metadata":{"id":"Ge-uZsFvtU8G"},"source":["Для категориальных (тип `object`) и булевых (тип `bool`) признаков можно воспользоваться методом **`value_counts`**. Посмотрим на распределение нашей целевой переменной — `Churn`:"]},{"cell_type":"code","metadata":{"id":"eeDu-JiYtU8G","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638540,"user_tz":-300,"elapsed":115,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"19761b7d-d89b-49eb-e4bd-371bd68907d7"},"source":["df[\"Churn\"].value_counts()"],"execution_count":17,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 2850\n","1 483\n","Name: Churn, dtype: int64"]},"metadata":{},"execution_count":17}]},{"cell_type":"markdown","metadata":{"id":"KANMt5q2tU8I"},"source":["2850 пользователей из 3333 — лояльные, значение переменной `Churn` у них — `0`.\n","\n","Посмотрим на распределение пользователей по переменной `Area code`. Укажем значение параметра `normalize=True`, чтобы посмотреть не абсолютные частоты, а относительные."]},{"cell_type":"code","metadata":{"id":"pMenDSyHtU8I","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638542,"user_tz":-300,"elapsed":109,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"a99c176c-d2b0-45b9-e54f-653c1f060dd0"},"source":["df[\"Area code\"].value_counts(normalize=True)"],"execution_count":18,"outputs":[{"output_type":"execute_result","data":{"text/plain":["415 0.50\n","510 0.25\n","408 0.25\n","Name: Area code, dtype: float64"]},"metadata":{},"execution_count":18}]},{"cell_type":"markdown","metadata":{"id":"l4ikQZaptU8I"},"source":["### Сортировка\n","\n","`DataFrame` можно отсортировать по значению какого-нибудь из признаков. В нашем случае, например, по `Total day charge` (`ascending=False` для сортировки по убыванию):"]},{"cell_type":"code","metadata":{"id":"GrbzIXBQtU8J","colab":{"base_uri":"https://localhost:8080/","height":241},"executionInfo":{"status":"ok","timestamp":1633609638544,"user_tz":-300,"elapsed":102,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"7cf76892-8c0d-42fa-fa98-aa49f8c2ab6e"},"source":["df.sort_values(by=\"Total day charge\", ascending=False).head()"],"execution_count":19,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
365CO154415NoNo0350.87559.64216.59418.40253.910011.4310.192.7311
985NY64415YesNo0346.85558.96249.57921.21275.410212.3913.393.5911
2594OH115510YesNo0345.38158.70203.410617.29217.51079.7911.883.1911
156OH83415NoNo0337.412057.36227.411619.33153.91146.9315.874.2701
605MO112415NoNo0335.57757.04212.510918.06265.013211.9312.783.4321
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","365 CO 154 415 No No \n","985 NY 64 415 Yes No \n","2594 OH 115 510 Yes No \n","156 OH 83 415 No No \n","605 MO 112 415 No No \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","365 0 350.8 75 \n","985 0 346.8 55 \n","2594 0 345.3 81 \n","156 0 337.4 120 \n","605 0 335.5 77 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","365 59.64 216.5 94 18.40 \n","985 58.96 249.5 79 21.21 \n","2594 58.70 203.4 106 17.29 \n","156 57.36 227.4 116 19.33 \n","605 57.04 212.5 109 18.06 \n","\n"," Total night minutes Total night calls Total night charge \\\n","365 253.9 100 11.43 \n","985 275.4 102 12.39 \n","2594 217.5 107 9.79 \n","156 153.9 114 6.93 \n","605 265.0 132 11.93 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","365 10.1 9 2.73 \n","985 13.3 9 3.59 \n","2594 11.8 8 3.19 \n","156 15.8 7 4.27 \n","605 12.7 8 3.43 \n","\n"," Customer service calls Churn \n","365 1 1 \n","985 1 1 \n","2594 1 1 \n","156 0 1 \n","605 2 1 "]},"metadata":{},"execution_count":19}]},{"cell_type":"markdown","metadata":{"id":"apUOhvc_tU8J"},"source":["Сортировать можно и по группе столбцов:"]},{"cell_type":"code","metadata":{"id":"KUU1Xp63tU8K","colab":{"base_uri":"https://localhost:8080/","height":241},"executionInfo":{"status":"ok","timestamp":1633609638545,"user_tz":-300,"elapsed":100,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"0bbacb6a-bbf7-4697-b720-20033f341ff3"},"source":["df.sort_values(by=[\"Churn\", \"Total day charge\"], ascending=[True, False]).head()"],"execution_count":20,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
688MN13510NoYes21315.610553.65208.97117.76260.112311.7012.133.2730
2259NC210415NoYes31313.88753.35147.710312.55192.7978.6710.172.7330
534LA67510NoNo0310.49752.7766.51235.65246.59911.099.2102.4840
575SD114415NoYes36309.99052.68200.38917.03183.51058.2614.223.8310
2858AL141510NoYes28308.012352.36247.812821.06152.91036.887.432.0010
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","688 MN 13 510 No Yes \n","2259 NC 210 415 No Yes \n","534 LA 67 510 No No \n","575 SD 114 415 No Yes \n","2858 AL 141 510 No Yes \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","688 21 315.6 105 \n","2259 31 313.8 87 \n","534 0 310.4 97 \n","575 36 309.9 90 \n","2858 28 308.0 123 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","688 53.65 208.9 71 17.76 \n","2259 53.35 147.7 103 12.55 \n","534 52.77 66.5 123 5.65 \n","575 52.68 200.3 89 17.03 \n","2858 52.36 247.8 128 21.06 \n","\n"," Total night minutes Total night calls Total night charge \\\n","688 260.1 123 11.70 \n","2259 192.7 97 8.67 \n","534 246.5 99 11.09 \n","575 183.5 105 8.26 \n","2858 152.9 103 6.88 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","688 12.1 3 3.27 \n","2259 10.1 7 2.73 \n","534 9.2 10 2.48 \n","575 14.2 2 3.83 \n","2858 7.4 3 2.00 \n","\n"," Customer service calls Churn \n","688 3 0 \n","2259 3 0 \n","534 4 0 \n","575 1 0 \n","2858 1 0 "]},"metadata":{},"execution_count":20}]},{"cell_type":"markdown","metadata":{"id":"VCTKeJUYtU8L"},"source":["### Индексация и извлечение данных"]},{"cell_type":"markdown","metadata":{"id":"lveNXBbztU8L"},"source":["`DataFrame` можно индексировать по-разному. В связи с этим рассмотрим различные способы индексации и извлечения нужных нам данных из датафрейма на примере простых вопросов.\n","\n","Для извлечения отдельного столбца можно использовать конструкцию вида `DataFrame['Name']`. Воспользуемся этим для ответа на вопрос: **какова доля нелояльных пользователей в нашем датафрейме?**"]},{"cell_type":"code","metadata":{"id":"FLaA5u1ztU8L","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638547,"user_tz":-300,"elapsed":98,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"d1b61bde-7b0c-45d0-c2e9-32d9bb9539c0"},"source":["df[\"Churn\"].mean()"],"execution_count":21,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.14491449144914492"]},"metadata":{},"execution_count":21}]},{"cell_type":"markdown","metadata":{"id":"QiJUnpEJtU8M"},"source":["14,5% — довольно плохой показатель для компании, с таким процентом оттока можно и разориться."]},{"cell_type":"markdown","metadata":{"id":"2v6CRyJ3tU8M"},"source":["Очень удобной является логическая индексация `DataFrame` по одному столбцу. Выглядит она следующим образом: `df[P(df['Name'])]`, где `P` - это некоторое логическое условие, проверяемое для каждого элемента столбца `Name`. Итогом такой индексации является `DataFrame`, состоящий только из строк, удовлетворяющих условию `P` по столбцу `Name`. \n","\n","Воспользуемся этим для ответа на вопрос: **каковы средние значения числовых признаков среди нелояльных пользователей?**"]},{"cell_type":"code","metadata":{"scrolled":true,"id":"0G0_4zPytU8O","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638548,"user_tz":-300,"elapsed":90,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"79d763ca-3a4e-4408-f218-e5996dbd68bb"},"source":["df[df[\"Churn\"] == 1].mean()"],"execution_count":22,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Account length 102.66\n","Area code 437.82\n","Number vmail messages 5.12\n","Total day minutes 206.91\n","Total day calls 101.34\n","Total day charge 35.18\n","Total eve minutes 212.41\n","Total eve calls 100.56\n","Total eve charge 18.05\n","Total night minutes 205.23\n","Total night calls 100.40\n","Total night charge 9.24\n","Total intl minutes 10.70\n","Total intl calls 4.16\n","Total intl charge 2.89\n","Customer service calls 2.23\n","Churn 1.00\n","dtype: float64"]},"metadata":{},"execution_count":22}]},{"cell_type":"markdown","metadata":{"id":"vX7Kv82ztU8O"},"source":["Скомбинировав предыдущие два вида индексации, ответим на вопрос: **сколько в среднем в течение дня разговаривают по телефону нелояльные пользователи**?"]},{"cell_type":"code","metadata":{"id":"ZmpzMz9LtU8O","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638551,"user_tz":-300,"elapsed":87,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"f4ef2f49-5d18-4228-b513-96402e23b1b4"},"source":["df[df[\"Churn\"] == 1][\"Total day minutes\"].mean()"],"execution_count":23,"outputs":[{"output_type":"execute_result","data":{"text/plain":["206.91407867494814"]},"metadata":{},"execution_count":23}]},{"cell_type":"markdown","metadata":{"id":"rME2EKe8tU8P"},"source":["**Какова максимальная длина международных звонков среди лояльных пользователей (`Churn == 0`), не пользующихся услугой международного роуминга (`'International plan' == 'No'`)?**"]},{"cell_type":"code","metadata":{"id":"DQ0H-bJttU8Q","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638552,"user_tz":-300,"elapsed":82,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"3c8a6304-7ede-495b-f2cc-dcf70beb252f"},"source":["df[(df[\"Churn\"] == 0) & (df[\"International plan\"] == \"No\")][\"Total intl minutes\"].max()"],"execution_count":24,"outputs":[{"output_type":"execute_result","data":{"text/plain":["18.9"]},"metadata":{},"execution_count":24}]},{"cell_type":"markdown","metadata":{"id":"f6IelrO4tU8Q"},"source":["Датафреймы можно индексировать как по названию столбца или строки, так и по порядковому номеру. Для индексации **по названию** используется метод **`loc`**, **по номеру** — **`iloc`**.\n","\n","В первом случае мы говорим _«передай нам значения для id строк от 0 до 5 и для столбцов от State до Area code»_, а во втором — _«передай нам значения первых пяти строк в первых трёх столбцах»_. \n","\n","В случае `iloc` срез работает как обычно, однако в случае `loc` учитываются и начало, и конец среза. Да, неудобно, да, вызывает путаницу."]},{"cell_type":"code","metadata":{"scrolled":true,"id":"Pp82lj7ktU8R","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1633609638554,"user_tz":-300,"elapsed":78,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"8e2a9392-b3f0-44ee-e383-b19a46f8d708"},"source":["d = df.copy()\n","d = d.drop_duplicates('State')\n","d = d.set_index('State')\n","# d = d.reset_index() # сбрасываем столбец-индекс не удаляя его\n","d = d.reset_index(drop=True) # сбрасываем столбец-индекс удаляя его\n","d\n","# d.loc['KS':'OK','Area code':'Total day minutes']"],"execution_count":25,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Account lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
0128415NoYes25265.111045.07197.49916.78244.79111.0110.032.7010
1107415NoYes26161.612327.47195.510316.62254.410311.4513.733.7010
2137415NoNo0243.411441.38121.211010.30162.61047.3212.253.2900
375415YesNo0166.711328.34148.312212.61186.91218.4110.132.7330
4118510YesNo0223.49837.98220.610118.75203.91189.186.361.7000
5121510NoYes24218.28837.09348.510829.62212.61189.577.572.0330
6147415YesNo0157.07926.69103.1948.76211.8969.537.161.9200
7117408NoNo0184.59731.37351.68029.89215.8909.718.742.3510
8141415YesYes37258.68443.96222.011118.87326.49714.6911.253.0200
965415NoNo0129.113721.95228.58319.42208.81119.4012.763.4341
1074415NoNo0187.712731.91163.414813.89196.0948.829.152.4600
11168408NoNo0128.89621.90104.9718.92141.11286.3511.223.0210
1295510NoNo0156.68826.62247.67521.05192.31158.6512.353.3230
13161415NoNo0332.96756.59317.89727.01160.61287.235.491.4641
1485408NoYes27196.413933.39280.99023.8889.3754.0213.843.7310
1593510NoNo0190.711432.42218.211118.55129.61215.838.132.1930
1676510NoYes33189.76632.25212.86518.09165.71087.4610.052.7010
1773415NoNo0224.49038.15159.58813.56192.8748.6813.023.5110
18147415NoNo0155.111726.37239.79320.37208.81339.4010.642.8600
1977408NoNo062.48910.61169.912114.44209.6649.435.761.5451
20130415NoNo0183.011231.1172.9996.20181.8788.189.5192.5700
21111415NoNo0110.410318.77137.310211.67189.61058.537.762.0820
22174415NoNo0124.37621.13277.111223.55250.711511.2815.554.1930
2357408NoYes39213.011536.21191.111216.24182.71158.229.532.5700
2449510NoNo0119.311720.28215.110918.28178.7908.0411.113.0010
25142415NoNo084.89514.42136.76311.62250.514811.2714.263.8320
2675510NoNo0226.110538.44201.510717.13246.29811.0810.352.7810
2772415NoYes37220.08037.40217.310218.47152.8716.8814.763.9730
2836408NoYes30146.312824.87162.58013.81129.31095.8214.563.9200
29135408YesYes41173.18529.43203.910717.33122.2785.5014.6153.9401
3034510NoNo0124.88221.22282.29823.99311.57814.0210.042.7020
3164510NoNo0154.06726.18225.811819.19265.38611.943.530.9510
3259408NoYes28120.99720.55213.09218.11163.11167.348.552.3020
3365415NoNo0211.312035.92162.612213.82134.71186.0613.253.5630
34142408NoNo0187.013331.79134.67411.44242.212710.907.452.0020
3596415NoNo0160.211727.23267.56722.74228.56810.289.352.5120
36116415NoYes34268.68345.66178.214215.15166.31067.4811.633.1320
3774510NoYes33193.79132.93246.19620.92138.0926.2114.633.9420
38149408NoYes28180.79230.72187.86415.96265.55311.9512.633.4030
3938408NoNo0131.29822.30162.99713.85159.01067.158.262.2120
4040415NoYes41148.17425.18169.58814.41214.11029.636.251.6720
41147510NoNo0248.68342.26148.98512.66172.51097.768.042.1630
4290415NoNo0203.414634.58226.711719.27152.41056.867.341.9710
4382415NoNo0300.310951.05181.010015.39270.17312.1511.743.1601
4474415NoYes35154.110426.20123.48410.49202.1579.0910.992.9420
4578415NoNo0252.99342.99178.411215.16263.910511.889.572.5730
46120408NoNo0212.113136.06209.410417.80167.2967.525.351.4311
4778415NoNo0149.711925.45182.211515.49261.512611.779.782.6200
4882415NoYes24155.213126.38244.510620.78122.4685.5110.732.8910
49199415NoYes34230.612139.20219.49918.65299.39413.478.022.1600
5079408NoNo0205.712334.97214.510818.23226.110610.176.7181.8110
\n","
"],"text/plain":[" Account length Area code International plan Voice mail plan \\\n","0 128 415 No Yes \n","1 107 415 No Yes \n","2 137 415 No No \n","3 75 415 Yes No \n","4 118 510 Yes No \n","5 121 510 No Yes \n","6 147 415 Yes No \n","7 117 408 No No \n","8 141 415 Yes Yes \n","9 65 415 No No \n","10 74 415 No No \n","11 168 408 No No \n","12 95 510 No No \n","13 161 415 No No \n","14 85 408 No Yes \n","15 93 510 No No \n","16 76 510 No Yes \n","17 73 415 No No \n","18 147 415 No No \n","19 77 408 No No \n","20 130 415 No No \n","21 111 415 No No \n","22 174 415 No No \n","23 57 408 No Yes \n","24 49 510 No No \n","25 142 415 No No \n","26 75 510 No No \n","27 72 415 No Yes \n","28 36 408 No Yes \n","29 135 408 Yes Yes \n","30 34 510 No No \n","31 64 510 No No \n","32 59 408 No Yes \n","33 65 415 No No \n","34 142 408 No No \n","35 96 415 No No \n","36 116 415 No Yes \n","37 74 510 No Yes \n","38 149 408 No Yes \n","39 38 408 No No \n","40 40 415 No Yes \n","41 147 510 No No \n","42 90 415 No No \n","43 82 415 No No \n","44 74 415 No Yes \n","45 78 415 No No \n","46 120 408 No No \n","47 78 415 No No \n","48 82 415 No Yes \n","49 199 415 No Yes \n","50 79 408 No No \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 166.7 113 \n","4 0 223.4 98 \n","5 24 218.2 88 \n","6 0 157.0 79 \n","7 0 184.5 97 \n","8 37 258.6 84 \n","9 0 129.1 137 \n","10 0 187.7 127 \n","11 0 128.8 96 \n","12 0 156.6 88 \n","13 0 332.9 67 \n","14 27 196.4 139 \n","15 0 190.7 114 \n","16 33 189.7 66 \n","17 0 224.4 90 \n","18 0 155.1 117 \n","19 0 62.4 89 \n","20 0 183.0 112 \n","21 0 110.4 103 \n","22 0 124.3 76 \n","23 39 213.0 115 \n","24 0 119.3 117 \n","25 0 84.8 95 \n","26 0 226.1 105 \n","27 37 220.0 80 \n","28 30 146.3 128 \n","29 41 173.1 85 \n","30 0 124.8 82 \n","31 0 154.0 67 \n","32 28 120.9 97 \n","33 0 211.3 120 \n","34 0 187.0 133 \n","35 0 160.2 117 \n","36 34 268.6 83 \n","37 33 193.7 91 \n","38 28 180.7 92 \n","39 0 131.2 98 \n","40 41 148.1 74 \n","41 0 248.6 83 \n","42 0 203.4 146 \n","43 0 300.3 109 \n","44 35 154.1 104 \n","45 0 252.9 93 \n","46 0 212.1 131 \n","47 0 149.7 119 \n","48 24 155.2 131 \n","49 34 230.6 121 \n","50 0 205.7 123 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 28.34 148.3 122 12.61 \n","4 37.98 220.6 101 18.75 \n","5 37.09 348.5 108 29.62 \n","6 26.69 103.1 94 8.76 \n","7 31.37 351.6 80 29.89 \n","8 43.96 222.0 111 18.87 \n","9 21.95 228.5 83 19.42 \n","10 31.91 163.4 148 13.89 \n","11 21.90 104.9 71 8.92 \n","12 26.62 247.6 75 21.05 \n","13 56.59 317.8 97 27.01 \n","14 33.39 280.9 90 23.88 \n","15 32.42 218.2 111 18.55 \n","16 32.25 212.8 65 18.09 \n","17 38.15 159.5 88 13.56 \n","18 26.37 239.7 93 20.37 \n","19 10.61 169.9 121 14.44 \n","20 31.11 72.9 99 6.20 \n","21 18.77 137.3 102 11.67 \n","22 21.13 277.1 112 23.55 \n","23 36.21 191.1 112 16.24 \n","24 20.28 215.1 109 18.28 \n","25 14.42 136.7 63 11.62 \n","26 38.44 201.5 107 17.13 \n","27 37.40 217.3 102 18.47 \n","28 24.87 162.5 80 13.81 \n","29 29.43 203.9 107 17.33 \n","30 21.22 282.2 98 23.99 \n","31 26.18 225.8 118 19.19 \n","32 20.55 213.0 92 18.11 \n","33 35.92 162.6 122 13.82 \n","34 31.79 134.6 74 11.44 \n","35 27.23 267.5 67 22.74 \n","36 45.66 178.2 142 15.15 \n","37 32.93 246.1 96 20.92 \n","38 30.72 187.8 64 15.96 \n","39 22.30 162.9 97 13.85 \n","40 25.18 169.5 88 14.41 \n","41 42.26 148.9 85 12.66 \n","42 34.58 226.7 117 19.27 \n","43 51.05 181.0 100 15.39 \n","44 26.20 123.4 84 10.49 \n","45 42.99 178.4 112 15.16 \n","46 36.06 209.4 104 17.80 \n","47 25.45 182.2 115 15.49 \n","48 26.38 244.5 106 20.78 \n","49 39.20 219.4 99 18.65 \n","50 34.97 214.5 108 18.23 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 186.9 121 8.41 \n","4 203.9 118 9.18 \n","5 212.6 118 9.57 \n","6 211.8 96 9.53 \n","7 215.8 90 9.71 \n","8 326.4 97 14.69 \n","9 208.8 111 9.40 \n","10 196.0 94 8.82 \n","11 141.1 128 6.35 \n","12 192.3 115 8.65 \n","13 160.6 128 7.23 \n","14 89.3 75 4.02 \n","15 129.6 121 5.83 \n","16 165.7 108 7.46 \n","17 192.8 74 8.68 \n","18 208.8 133 9.40 \n","19 209.6 64 9.43 \n","20 181.8 78 8.18 \n","21 189.6 105 8.53 \n","22 250.7 115 11.28 \n","23 182.7 115 8.22 \n","24 178.7 90 8.04 \n","25 250.5 148 11.27 \n","26 246.2 98 11.08 \n","27 152.8 71 6.88 \n","28 129.3 109 5.82 \n","29 122.2 78 5.50 \n","30 311.5 78 14.02 \n","31 265.3 86 11.94 \n","32 163.1 116 7.34 \n","33 134.7 118 6.06 \n","34 242.2 127 10.90 \n","35 228.5 68 10.28 \n","36 166.3 106 7.48 \n","37 138.0 92 6.21 \n","38 265.5 53 11.95 \n","39 159.0 106 7.15 \n","40 214.1 102 9.63 \n","41 172.5 109 7.76 \n","42 152.4 105 6.86 \n","43 270.1 73 12.15 \n","44 202.1 57 9.09 \n","45 263.9 105 11.88 \n","46 167.2 96 7.52 \n","47 261.5 126 11.77 \n","48 122.4 68 5.51 \n","49 299.3 94 13.47 \n","50 226.1 106 10.17 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 10.1 3 2.73 \n","4 6.3 6 1.70 \n","5 7.5 7 2.03 \n","6 7.1 6 1.92 \n","7 8.7 4 2.35 \n","8 11.2 5 3.02 \n","9 12.7 6 3.43 \n","10 9.1 5 2.46 \n","11 11.2 2 3.02 \n","12 12.3 5 3.32 \n","13 5.4 9 1.46 \n","14 13.8 4 3.73 \n","15 8.1 3 2.19 \n","16 10.0 5 2.70 \n","17 13.0 2 3.51 \n","18 10.6 4 2.86 \n","19 5.7 6 1.54 \n","20 9.5 19 2.57 \n","21 7.7 6 2.08 \n","22 15.5 5 4.19 \n","23 9.5 3 2.57 \n","24 11.1 1 3.00 \n","25 14.2 6 3.83 \n","26 10.3 5 2.78 \n","27 14.7 6 3.97 \n","28 14.5 6 3.92 \n","29 14.6 15 3.94 \n","30 10.0 4 2.70 \n","31 3.5 3 0.95 \n","32 8.5 5 2.30 \n","33 13.2 5 3.56 \n","34 7.4 5 2.00 \n","35 9.3 5 2.51 \n","36 11.6 3 3.13 \n","37 14.6 3 3.94 \n","38 12.6 3 3.40 \n","39 8.2 6 2.21 \n","40 6.2 5 1.67 \n","41 8.0 4 2.16 \n","42 7.3 4 1.97 \n","43 11.7 4 3.16 \n","44 10.9 9 2.94 \n","45 9.5 7 2.57 \n","46 5.3 5 1.43 \n","47 9.7 8 2.62 \n","48 10.7 3 2.89 \n","49 8.0 2 2.16 \n","50 6.7 18 1.81 \n","\n"," Customer service calls Churn \n","0 1 0 \n","1 1 0 \n","2 0 0 \n","3 3 0 \n","4 0 0 \n","5 3 0 \n","6 0 0 \n","7 1 0 \n","8 0 0 \n","9 4 1 \n","10 0 0 \n","11 1 0 \n","12 3 0 \n","13 4 1 \n","14 1 0 \n","15 3 0 \n","16 1 0 \n","17 1 0 \n","18 0 0 \n","19 5 1 \n","20 0 0 \n","21 2 0 \n","22 3 0 \n","23 0 0 \n","24 1 0 \n","25 2 0 \n","26 1 0 \n","27 3 0 \n","28 0 0 \n","29 0 1 \n","30 2 0 \n","31 1 0 \n","32 2 0 \n","33 3 0 \n","34 2 0 \n","35 2 0 \n","36 2 0 \n","37 2 0 \n","38 3 0 \n","39 2 0 \n","40 2 0 \n","41 3 0 \n","42 1 0 \n","43 0 1 \n","44 2 0 \n","45 3 0 \n","46 1 1 \n","47 0 0 \n","48 1 0 \n","49 0 0 \n","50 1 0 "]},"metadata":{},"execution_count":25}]},{"cell_type":"code","metadata":{"scrolled":true,"id":"qGN5gaALtU8R","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1633609638556,"user_tz":-300,"elapsed":75,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"1f803cde-6306-4ebe-cd3a-1cb7ad85010e"},"source":["df.iloc[0:5, 0:3]"],"execution_count":26,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea code
0KS128415
1OH107415
2NJ137415
3OH84408
4OK75415
\n","
"],"text/plain":[" State Account length Area code\n","0 KS 128 415\n","1 OH 107 415\n","2 NJ 137 415\n","3 OH 84 408\n","4 OK 75 415"]},"metadata":{},"execution_count":26}]},{"cell_type":"markdown","metadata":{"id":"UCMKdcx9tU8S"},"source":["Метод `ix` индексирует и по названию, и по номеру, но он вызывает путаницу, и поэтому был объявлен устаревшим (deprecated)."]},{"cell_type":"markdown","metadata":{"id":"HnMAXWTAtU8S"},"source":["Если нам нужна первая или последняя строчка датафрейма, пользуемся конструкцией `df[:1]` или `df[-1:]`:"]},{"cell_type":"code","metadata":{"scrolled":true,"id":"OrwoqAGPtU8U","colab":{"base_uri":"https://localhost:8080/","height":115},"executionInfo":{"status":"ok","timestamp":1633609638558,"user_tz":-300,"elapsed":74,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"53bc6332-8c03-4b98-9335-295812d859cd"},"source":["df[-1:]"],"execution_count":27,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
3332TN74415NoYes25234.411339.85265.98222.6241.47710.8613.743.700
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","3332 TN 74 415 No Yes \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","3332 25 234.4 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","3332 39.85 265.9 82 22.6 \n","\n"," Total night minutes Total night calls Total night charge \\\n","3332 241.4 77 10.86 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","3332 13.7 4 3.7 \n","\n"," Customer service calls Churn \n","3332 0 0 "]},"metadata":{},"execution_count":27}]},{"cell_type":"markdown","metadata":{"id":"Ur_--vTVtU8W"},"source":["### Применение функций: `apply`, `map` и др."]},{"cell_type":"markdown","metadata":{"id":"da6UVfVjtU8W"},"source":["**Применение функции к каждому столбцу:**"]},{"cell_type":"code","metadata":{"id":"LIlX4ORVtU8W","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1633609638559,"user_tz":-300,"elapsed":71,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"315e42ff-9efa-4fa9-e41f-cba08a9534d2"},"source":["df.apply(np.max)"],"execution_count":28,"outputs":[{"output_type":"execute_result","data":{"text/plain":["State WY\n","Account length 243\n","Area code 510\n","International plan Yes\n","Voice mail plan Yes\n","Number vmail messages 51\n","Total day minutes 3.5e+02\n","Total day calls 165\n","Total day charge 60\n","Total eve minutes 3.6e+02\n","Total eve calls 170\n","Total eve charge 31\n","Total night minutes 4e+02\n","Total night calls 175\n","Total night charge 18\n","Total intl minutes 20\n","Total intl calls 20\n","Total intl charge 5.4\n","Customer service calls 9\n","Churn 1\n","dtype: object"]},"metadata":{},"execution_count":28}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":478},"id":"p-mUIP9HQakx","executionInfo":{"status":"ok","timestamp":1633609638561,"user_tz":-300,"elapsed":66,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"bbe218f1-eddb-4ff4-eb09-6a148e04a29c"},"source":["def make_feature(row):\n"," if row['Voice mail plan'] == 'Yes':\n"," return row['Number vmail messages'] * 4\n"," return row['Number vmail messages'] + 4\n","df['new_Number_vmail_messages'] = df.apply(make_feature, axis=1)\n","df"],"execution_count":29,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415NoYes25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415NoYes26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415NoNo0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408YesNo0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415YesNo0166.711328.34148.312212.61186.91218.4110.132.73304
..................................................................
3328AZ192415NoYes36156.27726.55215.512618.32279.18312.569.962.6720144
3329WV68415NoNo0231.15739.29153.45513.04191.31238.619.642.59304
3330RI28510NoNo0180.810930.74288.85824.55191.9918.6414.163.81204
3331CT184510YesNo0213.810536.35159.68413.57139.21376.265.0101.35204
3332TN74415NoYes25234.411339.85265.98222.60241.47710.8613.743.7000100
\n","

3333 rows × 21 columns

\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 No Yes \n","1 OH 107 415 No Yes \n","2 NJ 137 415 No No \n","3 OH 84 408 Yes No \n","4 OK 75 415 Yes No \n","... ... ... ... ... ... \n","3328 AZ 192 415 No Yes \n","3329 WV 68 415 No No \n","3330 RI 28 510 No No \n","3331 CT 184 510 Yes No \n","3332 TN 74 415 No Yes \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","... ... ... ... \n","3328 36 156.2 77 \n","3329 0 231.1 57 \n","3330 0 180.8 109 \n","3331 0 213.8 105 \n","3332 25 234.4 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","... ... ... ... ... \n","3328 26.55 215.5 126 18.32 \n","3329 39.29 153.4 55 13.04 \n","3330 30.74 288.8 58 24.55 \n","3331 36.35 159.6 84 13.57 \n","3332 39.85 265.9 82 22.60 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","... ... ... ... \n","3328 279.1 83 12.56 \n","3329 191.3 123 8.61 \n","3330 191.9 91 8.64 \n","3331 139.2 137 6.26 \n","3332 241.4 77 10.86 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","... ... ... ... \n","3328 9.9 6 2.67 \n","3329 9.6 4 2.59 \n","3330 14.1 6 3.81 \n","3331 5.0 10 1.35 \n","3332 13.7 4 3.70 \n","\n"," Customer service calls Churn new_Number_vmail_messages \n","0 1 0 100 \n","1 1 0 104 \n","2 0 0 4 \n","3 2 0 4 \n","4 3 0 4 \n","... ... ... ... \n","3328 2 0 144 \n","3329 3 0 4 \n","3330 2 0 4 \n","3331 2 0 4 \n","3332 0 0 100 \n","\n","[3333 rows x 21 columns]"]},"metadata":{},"execution_count":29}]},{"cell_type":"markdown","metadata":{"id":"j2fEIU5ptU8Y"},"source":["Метод `apply` можно использовать и для того, чтобы применить функцию к каждой строке. Для этого нужно указать `axis=1`."]},{"cell_type":"markdown","metadata":{"id":"e-TxyhUttU8Y"},"source":["**Применение функции к каждой ячейке столбца**\n","\n","Допустим, по какой-то причине нас интересуют все люди из штатов, названия которых начинаются на 'W'. В данному случае это можно сделать по-разному, но наибольшую свободу дает связка `apply`-`lambda` – применение функции ко всем значениям в столбце."]},{"cell_type":"code","metadata":{"scrolled":false,"id":"-jnLxPnWtU8Z","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609638563,"user_tz":-300,"elapsed":65,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"bcab7df7-ed63-45c1-816b-ec70b6e4fa7b"},"source":["df[df[\"State\"].apply(lambda state: state[0] == \"W\")].head()"],"execution_count":30,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
9WV141415YesYes37258.68443.96222.011118.87326.49714.6911.253.0200148
26WY57408NoYes39213.011536.21191.111216.24182.71158.229.532.5700156
44WI64510NoNo0154.06726.18225.811819.19265.38611.943.530.95104
49WY97415NoYes24133.213522.64217.25818.4670.6793.1811.032.971096
54WY87415NoNo0151.08325.67219.711618.67203.91279.189.732.62514
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","9 WV 141 415 Yes Yes \n","26 WY 57 408 No Yes \n","44 WI 64 510 No No \n","49 WY 97 415 No Yes \n","54 WY 87 415 No No \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","9 37 258.6 84 \n","26 39 213.0 115 \n","44 0 154.0 67 \n","49 24 133.2 135 \n","54 0 151.0 83 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","9 43.96 222.0 111 18.87 \n","26 36.21 191.1 112 16.24 \n","44 26.18 225.8 118 19.19 \n","49 22.64 217.2 58 18.46 \n","54 25.67 219.7 116 18.67 \n","\n"," Total night minutes Total night calls Total night charge \\\n","9 326.4 97 14.69 \n","26 182.7 115 8.22 \n","44 265.3 86 11.94 \n","49 70.6 79 3.18 \n","54 203.9 127 9.18 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","9 11.2 5 3.02 \n","26 9.5 3 2.57 \n","44 3.5 3 0.95 \n","49 11.0 3 2.97 \n","54 9.7 3 2.62 \n","\n"," Customer service calls Churn new_Number_vmail_messages \n","9 0 0 148 \n","26 0 0 156 \n","44 1 0 4 \n","49 1 0 96 \n","54 5 1 4 "]},"metadata":{},"execution_count":30}]},{"cell_type":"markdown","metadata":{"id":"q6SkeDiJtU8Z"},"source":["Метод `map` можно использовать и для **замены значений в колонке**, передав ему в качестве аргумента словарь вида `{old_value: new_value}`:"]},{"cell_type":"code","metadata":{"id":"q3lbm6XXtU8a","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609638564,"user_tz":-300,"elapsed":63,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"10f505a4-3227-4ff5-b868-1efaadf4a181"},"source":["d = {\"No\": False, \"Yes\": True}\n","df[\"International plan\"] = df[\"International plan\"].map(d)\n","df.head()"],"execution_count":31,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseYes25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415FalseYes26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415FalseNo0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408TrueNo0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueNo0166.711328.34148.312212.61186.91218.4110.132.73304
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 False Yes \n","1 OH 107 415 False Yes \n","2 NJ 137 415 False No \n","3 OH 84 408 True No \n","4 OK 75 415 True No \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","\n"," Customer service calls Churn new_Number_vmail_messages \n","0 1 0 100 \n","1 1 0 104 \n","2 0 0 4 \n","3 2 0 4 \n","4 3 0 4 "]},"metadata":{},"execution_count":31}]},{"cell_type":"markdown","metadata":{"id":"YkK8_gEBtU8b"},"source":["Аналогичную операцию можно провернуть с помощью метода `replace`:"]},{"cell_type":"code","metadata":{"id":"xop7OSmZtU8b","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609639156,"user_tz":-300,"elapsed":653,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"16d543f3-5373-45d1-cb55-b69355e6a5cb"},"source":["df = df.replace({\"Voice mail plan\": d})\n","df.head()"],"execution_count":32,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 False True \n","1 OH 107 415 False True \n","2 NJ 137 415 False False \n","3 OH 84 408 True False \n","4 OK 75 415 True False \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","\n"," Customer service calls Churn new_Number_vmail_messages \n","0 1 0 100 \n","1 1 0 104 \n","2 0 0 4 \n","3 2 0 4 \n","4 3 0 4 "]},"metadata":{},"execution_count":32}]},{"cell_type":"markdown","metadata":{"id":"sJ9KC2CrtU8d"},"source":["### Группировка данных\n","\n","В общем случае группировка данных в Pandas выглядит следующим образом:\n","\n","```\n","df.groupby(by=grouping_columns)[columns_to_show].function()\n","```\n","\n","1. К датафрейму применяется метод **`groupby`**, который разделяет данные по `grouping_columns` – признаку или набору признаков.\n","3. Индексируем по нужным нам столбцам (`columns_to_show`). \n","2. К полученным группам применяется функция или несколько функций."]},{"cell_type":"markdown","metadata":{"id":"wiHvK8LFtU8d"},"source":["**Группирование данных в зависимости от значения признака `Churn` и вывод статистик по трём столбцам в каждой группе.**"]},{"cell_type":"code","metadata":{"id":"pXrstrQgtU8d","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609639159,"user_tz":-300,"elapsed":99,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"42ed5c1c-65c1-457f-cd03-6a26bb60da9f"},"source":["columns_to_show = [\"Total day minutes\", \"Total eve minutes\", \"Total night minutes\"]\n","\n","df.groupby([\"Churn\"])[columns_to_show].describe(percentiles=[])"],"execution_count":33,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Total day minutesTotal eve minutesTotal night minutes
countmeanstdmin50%maxcountmeanstdmin50%maxcountmeanstdmin50%max
Churn
02850.0175.1850.180.0177.2315.62850.0199.0450.290.0199.6361.82850.0200.1351.1123.2200.25395.0
1483.0206.9169.000.0217.6350.8483.0212.4151.7370.9211.3363.7483.0205.2347.1347.4204.80354.9
\n","
"],"text/plain":[" Total day minutes Total eve minutes \\\n"," count mean std min 50% max count \n","Churn \n","0 2850.0 175.18 50.18 0.0 177.2 315.6 2850.0 \n","1 483.0 206.91 69.00 0.0 217.6 350.8 483.0 \n","\n"," Total night minutes \\\n"," mean std min 50% max count mean std \n","Churn \n","0 199.04 50.29 0.0 199.6 361.8 2850.0 200.13 51.11 \n","1 212.41 51.73 70.9 211.3 363.7 483.0 205.23 47.13 \n","\n"," \n"," min 50% max \n","Churn \n","0 23.2 200.25 395.0 \n","1 47.4 204.80 354.9 "]},"metadata":{},"execution_count":33}]},{"cell_type":"markdown","metadata":{"id":"_EZVRvNptU8d"},"source":["Сделаем то же самое, но немного по-другому, передав в `agg` список функций:"]},{"cell_type":"code","metadata":{"id":"9x5emqSwtU8e","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609639161,"user_tz":-300,"elapsed":97,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"9e81a4db-b89a-4e06-d928-70be8f26fdc5"},"source":["columns_to_show = [\"Total day minutes\", \"Total eve minutes\", \"Total night minutes\"]\n","\n","df.groupby([\"Churn\"])[columns_to_show].agg([np.mean, np.std, np.min, np.max])"],"execution_count":34,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Total day minutesTotal eve minutesTotal night minutes
meanstdaminamaxmeanstdaminamaxmeanstdaminamax
Churn
0175.1850.180.0315.6199.0450.290.0361.8200.1351.1123.2395.0
1206.9169.000.0350.8212.4151.7370.9363.7205.2347.1347.4354.9
\n","
"],"text/plain":[" Total day minutes Total eve minutes \\\n"," mean std amin amax mean std amin \n","Churn \n","0 175.18 50.18 0.0 315.6 199.04 50.29 0.0 \n","1 206.91 69.00 0.0 350.8 212.41 51.73 70.9 \n","\n"," Total night minutes \n"," amax mean std amin amax \n","Churn \n","0 361.8 200.13 51.11 23.2 395.0 \n","1 363.7 205.23 47.13 47.4 354.9 "]},"metadata":{},"execution_count":34}]},{"cell_type":"markdown","metadata":{"id":"bMsnErVv_o77"},"source":["Сбрасываем индекс с группирующего поля"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":112},"id":"8HFw5er5_DhM","executionInfo":{"status":"ok","timestamp":1633609639162,"user_tz":-300,"elapsed":93,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"bbdaaf9f-323e-42aa-e086-768f78599e65"},"source":["df.groupby('Churn', as_index=False)['State'].count()"],"execution_count":35,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ChurnState
002850
11483
\n","
"],"text/plain":[" Churn State\n","0 0 2850\n","1 1 483"]},"metadata":{},"execution_count":35}]},{"cell_type":"markdown","metadata":{"id":"mwqgfLAVtU8e"},"source":["### Сводные таблицы"]},{"cell_type":"markdown","metadata":{"id":"gYnSr64ptU8e"},"source":["Допустим, мы хотим посмотреть, как наблюдения в нашей выборке распределены в контексте двух признаков — `Churn` и `Customer service calls`. Для этого мы можем построить **таблицу сопряженности**, воспользовавшись методом **`crosstab`**:"]},{"cell_type":"code","metadata":{"id":"yhgrYerutU8f","colab":{"base_uri":"https://localhost:8080/","height":143},"executionInfo":{"status":"ok","timestamp":1633609639163,"user_tz":-300,"elapsed":91,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"25d6f12a-34f1-4f8b-8f49-d8914548046d"},"source":["pd.crosstab(df[\"Churn\"], df[\"International plan\"])"],"execution_count":36,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
International planFalseTrue
Churn
02664186
1346137
\n","
"],"text/plain":["International plan False True \n","Churn \n","0 2664 186\n","1 346 137"]},"metadata":{},"execution_count":36}]},{"cell_type":"code","metadata":{"scrolled":true,"id":"cR0WankTtU8f","colab":{"base_uri":"https://localhost:8080/","height":143},"executionInfo":{"status":"ok","timestamp":1633609639165,"user_tz":-300,"elapsed":90,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"a949d3c8-88f3-4f4a-8a67-6208d2c44445"},"source":["pd.crosstab(df[\"Churn\"], df[\"Voice mail plan\"], normalize=True)"],"execution_count":37,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Voice mail planFalseTrue
Churn
00.600.25
10.120.02
\n","
"],"text/plain":["Voice mail plan False True \n","Churn \n","0 0.60 0.25\n","1 0.12 0.02"]},"metadata":{},"execution_count":37}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"P_eONQ24W0aU","executionInfo":{"status":"ok","timestamp":1633609639166,"user_tz":-300,"elapsed":87,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"525dd141-f2d7-47a6-c4ea-6bd3be3386a9"},"source":["df[\"Customer service calls\"].unique()"],"execution_count":38,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([1, 0, 2, 3, 4, 5, 7, 9, 6, 8])"]},"metadata":{},"execution_count":38}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"abLbV3cKARwi","executionInfo":{"status":"ok","timestamp":1633609639167,"user_tz":-300,"elapsed":75,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"c5f227e8-5570-4c48-e7bf-48bb31227734"},"source":["df[\"Customer service calls\"].nunique()"],"execution_count":39,"outputs":[{"output_type":"execute_result","data":{"text/plain":["10"]},"metadata":{},"execution_count":39}]},{"cell_type":"markdown","metadata":{"id":"1F8uRUIwtU8h"},"source":["Мы видим, что большинство пользователей — лояльные и пользуются дополнительными услугами (международного роуминга / голосовой почты)."]},{"cell_type":"markdown","metadata":{"id":"reNYiSlJtU8h"},"source":["Продвинутые пользователи `Excel` наверняка вспомнят о такой фиче, как **сводные таблицы** (`pivot tables`). В `Pandas` за сводные таблицы отвечает метод **`pivot_table`**, который принимает в качестве параметров:\n","\n","* `values` – список переменных, по которым требуется рассчитать нужные статистики,\n","* `index` – список переменных, по которым нужно сгруппировать данные,\n","* `aggfunc` — то, что нам, собственно, нужно посчитать по группам — сумму, среднее, максимум, минимум или что-то ещё.\n","\n","Давайте посмотрим среднее число дневных, вечерних и ночных звонков для разных `Area code`:"]},{"cell_type":"code","metadata":{"scrolled":false,"id":"xabiD5fktU8h","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609639168,"user_tz":-300,"elapsed":68,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"a51980ac-bbb9-44b4-c453-1085d9014a7c"},"source":["df.pivot_table(\n"," [\"Total day calls\", \"Total eve calls\", \"Total night calls\"],\n"," [\"Area code\"],\n"," aggfunc=\"mean\",\n",").head(10)"],"execution_count":40,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Total day callsTotal eve callsTotal night calls
Area code
408100.5099.7999.04
415100.58100.50100.40
510100.1099.67100.60
\n","
"],"text/plain":[" Total day calls Total eve calls Total night calls\n","Area code \n","408 100.50 99.79 99.04\n","415 100.58 100.50 100.40\n","510 100.10 99.67 100.60"]},"metadata":{},"execution_count":40}]},{"cell_type":"markdown","metadata":{"id":"0_haYJdjtU8h"},"source":["### Преобразование датафреймов\n","\n","Как и многие другие вещи, добавлять столбцы в `DataFrame` можно несколькими способами."]},{"cell_type":"markdown","metadata":{"id":"35zMtFv8tU8i"},"source":["Например, мы хотим посчитать общее количество звонков для всех пользователей. Создадим объект `total_calls` типа `Series` и вставим его в датафрейм:"]},{"cell_type":"code","metadata":{"id":"z1ktVfD0tU8i","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609639171,"user_tz":-300,"elapsed":67,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"a0c006bf-4504-4c46-af1d-8b8ab8167d79"},"source":["total_calls = (\n"," df[\"Total day calls\"]\n"," + df[\"Total eve calls\"]\n"," + df[\"Total night calls\"]\n"," + df[\"Total intl calls\"]\n",")\n","df.insert(loc=len(df.columns), column=\"Total calls\", value=total_calls)\n","# loc - номер столбца, после которого нужно вставить данный Series\n","# мы указали len(df.columns), чтобы вставить его в самом конце\n","df.head()"],"execution_count":41,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messagesTotal calls
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100303
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.7010104332
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.29004333
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204255
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304359
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 False True \n","1 OH 107 415 False True \n","2 NJ 137 415 False False \n","3 OH 84 408 True False \n","4 OK 75 415 True False \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","\n"," Customer service calls Churn new_Number_vmail_messages Total calls \n","0 1 0 100 303 \n","1 1 0 104 332 \n","2 0 0 4 333 \n","3 2 0 4 255 \n","4 3 0 4 359 "]},"metadata":{},"execution_count":41}]},{"cell_type":"markdown","metadata":{"id":"nB0mpCA1tU8j"},"source":["Добавить столбец из имеющихся можно и проще, не создавая промежуточных `Series`:"]},{"cell_type":"code","metadata":{"id":"ZVpdhf1etU8k","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609639173,"user_tz":-300,"elapsed":64,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"93b3fe31-2757-4cb7-afcc-7c10f765bb46"},"source":["df[\"Total charge\"] = (\n"," df[\"Total day charge\"]\n"," + df[\"Total eve charge\"]\n"," + df[\"Total night charge\"]\n"," + df[\"Total intl charge\"]\n",")\n","\n","df.head()"],"execution_count":42,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messagesTotal callsTotal charge
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.701010030375.56
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.701010433259.24
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.2900433362.29
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.7820425566.80
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.7330435952.09
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 False True \n","1 OH 107 415 False True \n","2 NJ 137 415 False False \n","3 OH 84 408 True False \n","4 OK 75 415 True False \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","1 26 161.6 123 \n","2 0 243.4 114 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","1 27.47 195.5 103 16.62 \n","2 41.38 121.2 110 10.30 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","1 254.4 103 11.45 \n","2 162.6 104 7.32 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","1 13.7 3 3.70 \n","2 12.2 5 3.29 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","\n"," Customer service calls Churn new_Number_vmail_messages Total calls \\\n","0 1 0 100 303 \n","1 1 0 104 332 \n","2 0 0 4 333 \n","3 2 0 4 255 \n","4 3 0 4 359 \n","\n"," Total charge \n","0 75.56 \n","1 59.24 \n","2 62.29 \n","3 66.80 \n","4 52.09 "]},"metadata":{},"execution_count":42}]},{"cell_type":"markdown","metadata":{"id":"xrn0pZo1tU8l"},"source":["Чтобы удалить столбцы или строки, воспользуйтесь методом `drop`, передавая в качестве аргумента нужные индексы и требуемое значение параметра `axis` (`1`, если удаляете столбцы, и ничего или `0`, если удаляете строки):"]},{"cell_type":"code","metadata":{"scrolled":false,"id":"oSvOmNv-tU8l","colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"status":"ok","timestamp":1633609639175,"user_tz":-300,"elapsed":62,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"d0304dcc-8e8a-42c9-8765-1822ae6f5c44"},"source":["# избавляемся от созданных только что столбцов\n","df = df.drop([\"Total charge\", \"Total calls\"], axis=1)\n","\n","df.drop([1, 2]).head() # а вот так можно удалить строчки"],"execution_count":43,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304
5AL118510TrueFalse0223.49837.98220.610118.75203.91189.186.361.70004
6MA121510FalseTrue24218.28837.09348.510829.62212.61189.577.572.033096
\n","
"],"text/plain":[" State Account length Area code International plan Voice mail plan \\\n","0 KS 128 415 False True \n","3 OH 84 408 True False \n","4 OK 75 415 True False \n","5 AL 118 510 True False \n","6 MA 121 510 False True \n","\n"," Number vmail messages Total day minutes Total day calls \\\n","0 25 265.1 110 \n","3 0 299.4 71 \n","4 0 166.7 113 \n","5 0 223.4 98 \n","6 24 218.2 88 \n","\n"," Total day charge Total eve minutes Total eve calls Total eve charge \\\n","0 45.07 197.4 99 16.78 \n","3 50.90 61.9 88 5.26 \n","4 28.34 148.3 122 12.61 \n","5 37.98 220.6 101 18.75 \n","6 37.09 348.5 108 29.62 \n","\n"," Total night minutes Total night calls Total night charge \\\n","0 244.7 91 11.01 \n","3 196.9 89 8.86 \n","4 186.9 121 8.41 \n","5 203.9 118 9.18 \n","6 212.6 118 9.57 \n","\n"," Total intl minutes Total intl calls Total intl charge \\\n","0 10.0 3 2.70 \n","3 6.6 7 1.78 \n","4 10.1 3 2.73 \n","5 6.3 6 1.70 \n","6 7.5 7 2.03 \n","\n"," Customer service calls Churn new_Number_vmail_messages \n","0 1 0 100 \n","3 2 0 4 \n","4 3 0 4 \n","5 0 0 4 \n","6 3 0 96 "]},"metadata":{},"execution_count":43}]},{"cell_type":"markdown","metadata":{"id":"JLDUG5hNtU8l"},"source":["--------\n","\n","\n","\n","## Первые попытки прогнозирования оттока\n"]},{"cell_type":"markdown","metadata":{"id":"1sv6q4lNtU8m"},"source":["Посмотрим, как отток связан с признаком *\"Подключение международного роуминга\"* (`International plan`). Сделаем это с помощью сводной таблички `crosstab`, а также путем иллюстрации с `Seaborn` (как именно строить такие картинки и анализировать с их помощью графики – материал следующей статьи.)"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"M7cBvVn-tU8m","executionInfo":{"status":"ok","timestamp":1633609639176,"user_tz":-300,"elapsed":57,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}}},"source":["# надо дополнительно установить (команда в терминале)\n","# чтоб картинки рисовались в тетрадке\n","# !conda install seaborn\n","%matplotlib inline\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","\n","plt.rcParams[\"figure.figsize\"] = (8, 6)"],"execution_count":44,"outputs":[]},{"cell_type":"code","metadata":{"id":"8ZJBwL8NtU8m","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609639177,"user_tz":-300,"elapsed":56,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"334f814d-2c27-4f67-cabd-17159188ca2b"},"source":["pd.crosstab(df[\"Churn\"], df[\"International plan\"], margins=True)"],"execution_count":45,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
International planFalseTrueAll
Churn
026641862850
1346137483
All30103233333
\n","
"],"text/plain":["International plan False True All\n","Churn \n","0 2664 186 2850\n","1 346 137 483\n","All 3010 323 3333"]},"metadata":{},"execution_count":45}]},{"cell_type":"code","metadata":{"id":"BGwuNSretU8n","colab":{"base_uri":"https://localhost:8080/","height":388},"executionInfo":{"status":"ok","timestamp":1633609640633,"user_tz":-300,"elapsed":1509,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"0b7deac2-30bb-4ec9-e84b-ec60b64ceeff"},"source":["sns.countplot(x=\"International plan\", hue=\"Churn\", data=df)\n","plt.savefig(\"int_plan_and_churn.png\", dpi=300);"],"execution_count":46,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"j8CpA17FtU8o"},"source":["Видим, что когда роуминг подключен, доля оттока намного выше – интересное наблюдение! Возможно, большие и плохо контролируемые траты в роуминге очень конфликтогенны и приводят к недовольству клиентов телеком-оператора и, соответственно, к их оттоку. "]},{"cell_type":"markdown","metadata":{"id":"JDm9ePM4tU8o"},"source":["Далее посмотрим на еще один важный признак – *\"Число обращений в сервисный центр\"* (`Customer service calls`). Также построим сводную таблицу и картинку."]},{"cell_type":"code","metadata":{"id":"UKGrw7fbtU8p","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609640635,"user_tz":-300,"elapsed":25,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"7801c2d5-2a6d-4872-cde2-f847b851b3c5"},"source":["pd.crosstab(df[\"Churn\"], df[\"Customer service calls\"], margins=True)"],"execution_count":47,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Customer service calls0123456789All
Churn
06051059672385902684102850
1921228744764014512483
All697118175942916666229223333
\n","
"],"text/plain":["Customer service calls 0 1 2 3 4 5 6 7 8 9 All\n","Churn \n","0 605 1059 672 385 90 26 8 4 1 0 2850\n","1 92 122 87 44 76 40 14 5 1 2 483\n","All 697 1181 759 429 166 66 22 9 2 2 3333"]},"metadata":{},"execution_count":47}]},{"cell_type":"code","metadata":{"id":"sMJh9m1VtU8p","colab":{"base_uri":"https://localhost:8080/","height":388},"executionInfo":{"status":"ok","timestamp":1633609642719,"user_tz":-300,"elapsed":2104,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"43183fcc-f324-4492-acd6-30a6cf0615b5"},"source":["sns.countplot(x=\"Customer service calls\", hue=\"Churn\", data=df)\n","plt.savefig(\"serv_calls__and_churn.png\", dpi=300);"],"execution_count":48,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"x2ZJPe-DtU8q"},"source":["Может быть, по сводной табличке это не так хорошо видно (или скучно ползать взглядом по строчкам с цифрами), а вот картинка красноречиво свидетельствует о том, что доля оттока сильно возрастает начиная с 4 звонков в сервисный центр. "]},{"cell_type":"markdown","metadata":{"id":"Dqj4LVe3tU8q"},"source":["Добавим теперь в наш DataFrame бинарный признак — результат сравнения `Customer service calls > 3`. И еще раз посмотрим, как он связан с оттоком. "]},{"cell_type":"code","metadata":{"scrolled":true,"id":"o9R6NM8ltU8q","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1633609642722,"user_tz":-300,"elapsed":14,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"ab3e7ee2-24f1-455b-c973-e994cf85a4c7"},"source":["df[\"Many_service_calls\"] = (df[\"Customer service calls\"] > 3).astype(\"int\")\n","\n","pd.crosstab(df[\"Many_service_calls\"], df[\"Churn\"], margins=True)"],"execution_count":49,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Churn01All
Many_service_calls
027213453066
1129138267
All28504833333
\n","
"],"text/plain":["Churn 0 1 All\n","Many_service_calls \n","0 2721 345 3066\n","1 129 138 267\n","All 2850 483 3333"]},"metadata":{},"execution_count":49}]},{"cell_type":"code","metadata":{"id":"nUQk7G96tU8r","colab":{"base_uri":"https://localhost:8080/","height":389},"executionInfo":{"status":"ok","timestamp":1633609643487,"user_tz":-300,"elapsed":777,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"ed0370de-9813-45fa-f666-ee36ce5206a1"},"source":["sns.countplot(x=\"Many_service_calls\", hue=\"Churn\", data=df)\n","plt.savefig(\"many_serv_calls__and_churn.png\", dpi=300);"],"execution_count":50,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"XfYD5KqGtU8s"},"source":["Объединим рассмотренные выше условия и построим сводную табличку для этого объединения и оттока."]},{"cell_type":"code","metadata":{"id":"ZuaXCibrtU8s","colab":{"base_uri":"https://localhost:8080/","height":143},"executionInfo":{"status":"ok","timestamp":1633609643489,"user_tz":-300,"elapsed":36,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64","userId":"11145992452404092449"}},"outputId":"dbd40bfd-2dd2-41f1-ba50-cf159b12ddd7"},"source":["pd.crosstab(df[\"Many_service_calls\"] & df[\"International plan\"], df[\"Churn\"])"],"execution_count":51,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Churn01
row_0
False2841464
True919
\n","
"],"text/plain":["Churn 0 1\n","row_0 \n","False 2841 464\n","True 9 19"]},"metadata":{},"execution_count":51}]},{"cell_type":"markdown","metadata":{"id":"VyaMr43HtU8t"},"source":["Значит, прогнозируя отток клиента в случае, когда число звонков в сервисный центр больше 3 и подключен роуминг (и прогнозируя лояльность – в противном случае), можно ожидать около 85.8% правильных попаданий (ошибаемся всего 464 + 9 раз). Эти 85.8%, которые мы получили с помощью очень простых рассуждений – это неплохая отправная точка (*baseline*) для дальнейших моделей машинного обучения, которые мы будем строить. "]},{"cell_type":"markdown","metadata":{"id":"d6_n0ESntU8u"},"source":["В целом до появления машинного обучения процесс анализа данных выглядел примерно так. Прорезюмируем:\n"," \n","- Доля лояльных клиентов в выборке – 85.5%. Самая наивная модель, ответ которой \"Клиент всегда лоялен\" на подобных данных будет угадывать примерно в 85.5% случаев. То есть доли правильных ответов (*accuracy*) последующих моделей должны быть как минимум не меньше, а лучше, значительно выше этой цифры;\n","- С помощью простого прогноза , который условно можно выразить такой формулой: \"International plan = True & Customer Service calls > 3 => Churn = 1, else Churn = 0\", можно ожидать долю угадываний 85.8%, что еще чуть выше 85.5%\n","- Эти два бейзлайна мы получили без всякого машинного обучения, и они служат отправной точной для наших последующих моделей. Если окажется, что мы громадными усилиями увеличиваем долю правильных ответов всего, скажем, на 0.5%, то возможно, мы что-то делаем не так, и достаточно ограничиться простой моделью из двух условий. \n","- Перед обучением сложных моделей рекомендуется немного покрутить данные и проверить простые предположения. Более того, в бизнес-приложениях машинного обучения чаще всего начинают именно с простых решений, а потом экспериментируют с их усложнением. "]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FzQ_ch0ktU7n" + }, + "source": [ + "#
Первичный анализ данных с Pandas
" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 631, + "status": "ok", + "timestamp": 1633609636856, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "Parpx34utU7s", + "scrolled": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QxIKAzfCtU7u" + }, + "source": [ + "Данные, с которыми работают дата саентисты и аналитики, обычно хранятся в виде табличек — например, в форматах `.csv`, `.tsv` или `.xlsx`. Для того, чтобы считать нужные данные из такого файла, отлично подходит библиотека Pandas.\n", + "\n", + "Основными структурами данных в Pandas являются классы `Series` и `DataFrame`. Первый из них представляет собой одномерный индексированный массив данных некоторого фиксированного типа. Второй - это двухмерная структура данных, представляющая собой таблицу, каждый столбец которой содержит данные одного типа. Можно представлять её как словарь объектов типа `Series`. Структура `DataFrame` отлично подходит для представления реальных данных: строки соответствуют признаковым описаниям отдельных объектов, а столбцы соответствуют признакам." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_Ell72CtU7w" + }, + "source": [ + "---------\n", + "\n", + "## Демонстрация основных методов Pandas \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YMu_ER8WtU7y" + }, + "source": [ + "### Чтение из файла и первичный анализ" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "efGYx1kqtU7z" + }, + "source": [ + "Прочитаем данные и посмотрим на первые 5 строк с помощью метода `head`:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 597, + "status": "ok", + "timestamp": 1633609637892, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ByXZK9MFtU71", + "scrolled": true + }, + "outputs": [], + "source": [ + "df = pd.read_csv(\"https://raw.githubusercontent.com/Yorko/mlcourse.ai/master/data/telecom_churn.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "executionInfo": { + "elapsed": 77, + "status": "ok", + "timestamp": 1633609637895, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "hFaFpz2utU73", + "outputId": "cbd457e9-c2bd-4beb-a1fa-c7ba8a4c5b97", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
0KS128415NoYes25265.111045.07197.49916.78244.79111.0110.032.701False
1OH107415NoYes26161.612327.47195.510316.62254.410311.4513.733.701False
2NJ137415NoNo0243.411441.38121.211010.30162.61047.3212.253.290False
3OH84408YesNo0299.47150.9061.9885.26196.9898.866.671.782False
4OK75415YesNo0166.711328.34148.312212.61186.91218.4110.132.733False
\n", + "
" + ], + "text/plain": [ + " State Account length ... Customer service calls Churn\n", + "0 KS 128 ... 1 False\n", + "1 OH 107 ... 1 False\n", + "2 NJ 137 ... 0 False\n", + "3 OH 84 ... 2 False\n", + "4 OK 75 ... 3 False\n", + "\n", + "[5 rows x 20 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CpV496POtU75" + }, + "source": [ + "В Jupyter-ноутбуках датафреймы `Pandas` выводятся в виде вот таких красивых табличек, и `print(df.head())` выглядит хуже.\n", + "\n", + "Кстати, по умолчанию `Pandas` выводит всего 20 столбцов и 60 строк, поэтому если ваш датафрейм больше, воспользуйтесь функцией `set_option`:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 68, + "status": "ok", + "timestamp": 1633609637897, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "CYFyCCGGtU77" + }, + "outputs": [], + "source": [ + "# задание проанализировать все опции и выбрать 3-5 самых полезных по личному мнению \n", + "# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.set_option.html\n", + "pd.set_option(\"display.max_columns\", 100)\n", + "pd.set_option(\"display.max_rows\", 100)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CbfNn4a9tU78" + }, + "source": [ + "А также укажем значение параметра `presicion` равным 2, чтобы отображать два знака после запятой (а не 6, как установлено по умолчанию." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 67, + "status": "ok", + "timestamp": 1633609637899, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "-0MCBxGItU78" + }, + "outputs": [], + "source": [ + "pd.set_option(\"precision\", 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cu652IOYtU79" + }, + "source": [ + "**Посмотрим на размер данных, названия признаков и их типы**" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 66, + "status": "ok", + "timestamp": 1633609637901, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "LQw6THQytU79", + "outputId": "b2d6d2f1-a6d1-47c6-e4bb-5c5f33834c4a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(3333, 20)\n" + ] + } + ], + "source": [ + "print(df.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LJEPKleBtU7-" + }, + "source": [ + "Видим, что в таблице 3333 строки и 20 столбцов. Выведем названия столбцов:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 57, + "status": "ok", + "timestamp": 1633609637903, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "CQArdzC8tU7_", + "outputId": "08e4c81f-5a94-4589-c4d3-c6792128de13" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['State', 'Account length', 'Area code', 'International plan',\n", + " 'Voice mail plan', 'Number vmail messages', 'Total day minutes',\n", + " 'Total day calls', 'Total day charge', 'Total eve minutes',\n", + " 'Total eve calls', 'Total eve charge', 'Total night minutes',\n", + " 'Total night calls', 'Total night charge', 'Total intl minutes',\n", + " 'Total intl calls', 'Total intl charge', 'Customer service calls',\n", + " 'Churn'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print(df.columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RoZn1MpBtU8A" + }, + "source": [ + "Чтобы посмотреть общую информацию по датафрейму и всем признакам, воспользуемся методом **`info`**:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 54, + "status": "ok", + "timestamp": 1633609637906, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "W_ZF3eM8tU8B", + "outputId": "b4d58f04-d867-458f-bb5e-7b91fbdc9cd9", + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 3333 entries, 0 to 3332\n", + "Data columns (total 20 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 State 3333 non-null object \n", + " 1 Account length 3333 non-null int64 \n", + " 2 Area code 3333 non-null int64 \n", + " 3 International plan 3333 non-null object \n", + " 4 Voice mail plan 3333 non-null object \n", + " 5 Number vmail messages 3333 non-null int64 \n", + " 6 Total day minutes 3333 non-null float64\n", + " 7 Total day calls 3333 non-null int64 \n", + " 8 Total day charge 3333 non-null float64\n", + " 9 Total eve minutes 3333 non-null float64\n", + " 10 Total eve calls 3333 non-null int64 \n", + " 11 Total eve charge 3333 non-null float64\n", + " 12 Total night minutes 3333 non-null float64\n", + " 13 Total night calls 3333 non-null int64 \n", + " 14 Total night charge 3333 non-null float64\n", + " 15 Total intl minutes 3333 non-null float64\n", + " 16 Total intl calls 3333 non-null int64 \n", + " 17 Total intl charge 3333 non-null float64\n", + " 18 Customer service calls 3333 non-null int64 \n", + " 19 Churn 3333 non-null bool \n", + "dtypes: bool(1), float64(8), int64(8), object(3)\n", + "memory usage: 498.1+ KB\n", + "None\n" + ] + } + ], + "source": [ + "print(df.info())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FYDNyB6CtU8C" + }, + "source": [ + "`bool`, `int64`, `float64` и `object` — это типы признаков. Видим, что 1 признак — логический (`bool`), 3 признака имеют тип `object` и 16 признаков — числовые.\n", + "\n", + "**Изменить тип колонки** можно с помощью метода `astype`. Применим этот метод к признаку `Churn` и переведём его в `int64`:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 48, + "status": "ok", + "timestamp": 1633609637909, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "byRJQVM5tU8D" + }, + "outputs": [], + "source": [ + "df[\"Churn\"] = df[\"Churn\"].astype(\"int64\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sBTm0lLYtU8D" + }, + "source": [ + "Метод **`describe`** показывает основные статистические характеристики данных по каждому числовому признаку (типы `int64` и `float64`): число непропущенных значений, среднее, стандартное отклонение, диапазон, медиану, 0.25 и 0.75 квартили." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 335 + }, + "executionInfo": { + "elapsed": 48, + "status": "ok", + "timestamp": 1633609637911, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "bAsmrRI6tU8D", + "outputId": "32a7192a-b49b-4be7-9b6e-9b7f08f57731" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Account lengthArea codeNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
count3333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.003333.00
mean101.06437.188.10179.78100.4430.56200.98100.1117.08200.87100.119.0410.244.482.761.560.14
std39.8242.3713.6954.4720.079.2650.7119.924.3150.5719.572.282.792.460.751.320.35
min1.00408.000.000.000.000.000.000.000.0023.2033.001.040.000.000.000.000.00
25%74.00408.000.00143.7087.0024.43166.6087.0014.16167.0087.007.528.503.002.301.000.00
50%101.00415.000.00179.40101.0030.50201.40100.0017.12201.20100.009.0510.304.002.781.000.00
75%127.00510.0020.00216.40114.0036.79235.30114.0020.00235.30113.0010.5912.106.003.272.000.00
max243.00510.0051.00350.80165.0059.64363.70170.0030.91395.00175.0017.7720.0020.005.409.001.00
\n", + "
" + ], + "text/plain": [ + " Account length Area code Number vmail messages Total day minutes \\\n", + "count 3333.00 3333.00 3333.00 3333.00 \n", + "mean 101.06 437.18 8.10 179.78 \n", + "std 39.82 42.37 13.69 54.47 \n", + "min 1.00 408.00 0.00 0.00 \n", + "25% 74.00 408.00 0.00 143.70 \n", + "50% 101.00 415.00 0.00 179.40 \n", + "75% 127.00 510.00 20.00 216.40 \n", + "max 243.00 510.00 51.00 350.80 \n", + "\n", + " Total day calls Total day charge Total eve minutes Total eve calls \\\n", + "count 3333.00 3333.00 3333.00 3333.00 \n", + "mean 100.44 30.56 200.98 100.11 \n", + "std 20.07 9.26 50.71 19.92 \n", + "min 0.00 0.00 0.00 0.00 \n", + "25% 87.00 24.43 166.60 87.00 \n", + "50% 101.00 30.50 201.40 100.00 \n", + "75% 114.00 36.79 235.30 114.00 \n", + "max 165.00 59.64 363.70 170.00 \n", + "\n", + " Total eve charge Total night minutes Total night calls \\\n", + "count 3333.00 3333.00 3333.00 \n", + "mean 17.08 200.87 100.11 \n", + "std 4.31 50.57 19.57 \n", + "min 0.00 23.20 33.00 \n", + "25% 14.16 167.00 87.00 \n", + "50% 17.12 201.20 100.00 \n", + "75% 20.00 235.30 113.00 \n", + "max 30.91 395.00 175.00 \n", + "\n", + " Total night charge Total intl minutes Total intl calls \\\n", + "count 3333.00 3333.00 3333.00 \n", + "mean 9.04 10.24 4.48 \n", + "std 2.28 2.79 2.46 \n", + "min 1.04 0.00 0.00 \n", + "25% 7.52 8.50 3.00 \n", + "50% 9.05 10.30 4.00 \n", + "75% 10.59 12.10 6.00 \n", + "max 17.77 20.00 20.00 \n", + "\n", + " Total intl charge Customer service calls Churn \n", + "count 3333.00 3333.00 3333.00 \n", + "mean 2.76 1.56 0.14 \n", + "std 0.75 1.32 0.35 \n", + "min 0.00 0.00 0.00 \n", + "25% 2.30 1.00 0.00 \n", + "50% 2.78 1.00 0.00 \n", + "75% 3.27 2.00 0.00 \n", + "max 5.40 9.00 1.00 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l6MzhnkotU8D" + }, + "source": [ + "Чтобы посмотреть статистику по нечисловым признакам, нужно явно указать интересующие нас типы в параметре `include`. Можно также задать `include`='all', чтоб вывести статистику по всем имеющимся признакам." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 639, + "status": "ok", + "timestamp": 1633609638506, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ewJscFGZtU8F", + "outputId": "187fb398-e4bf-4c36-f3ff-e395013e994f", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateInternational planVoice mail plan
count333333333333
unique5122
topWVNoNo
freq10630102411
\n", + "
" + ], + "text/plain": [ + " State International plan Voice mail plan\n", + "count 3333 3333 3333\n", + "unique 51 2 2\n", + "top WV No No\n", + "freq 106 3010 2411" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe(include=[\"object\", \"bool\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1qbs0vug9TCh" + }, + "source": [ + "Тот же принцип работает при выборе столбцов указанного типа." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "executionInfo": { + "elapsed": 120, + "status": "ok", + "timestamp": 1633609638538, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "tbL3f9OD9Tg7", + "outputId": "1489c427-200c-45fa-f127-369a97e46ea8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateInternational planVoice mail plan
0KSNoYes
1OHNoYes
2NJNoNo
3OHYesNo
4OKYesNo
............
3328AZNoYes
3329WVNoNo
3330RINoNo
3331CTYesNo
3332TNNoYes
\n", + "

3333 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " State International plan Voice mail plan\n", + "0 KS No Yes\n", + "1 OH No Yes\n", + "2 NJ No No\n", + "3 OH Yes No\n", + "4 OK Yes No\n", + "... ... ... ...\n", + "3328 AZ No Yes\n", + "3329 WV No No\n", + "3330 RI No No\n", + "3331 CT Yes No\n", + "3332 TN No Yes\n", + "\n", + "[3333 rows x 3 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.select_dtypes(include=['object', 'bool']) # exclude" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ge-uZsFvtU8G" + }, + "source": [ + "Для категориальных (тип `object`) и булевых (тип `bool`) признаков можно воспользоваться методом **`value_counts`**. Посмотрим на распределение нашей целевой переменной — `Churn`:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 115, + "status": "ok", + "timestamp": 1633609638540, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "eeDu-JiYtU8G", + "outputId": "19761b7d-d89b-49eb-e4bd-371bd68907d7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2850\n", + "1 483\n", + "Name: Churn, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Churn\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KANMt5q2tU8I" + }, + "source": [ + "2850 пользователей из 3333 — лояльные, значение переменной `Churn` у них — `0`.\n", + "\n", + "Посмотрим на распределение пользователей по переменной `Area code`. Укажем значение параметра `normalize=True`, чтобы посмотреть не абсолютные частоты, а относительные." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 109, + "status": "ok", + "timestamp": 1633609638542, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "pMenDSyHtU8I", + "outputId": "a99c176c-d2b0-45b9-e54f-653c1f060dd0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "415 0.50\n", + "510 0.25\n", + "408 0.25\n", + "Name: Area code, dtype: float64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Area code\"].value_counts(normalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l4ikQZaptU8I" + }, + "source": [ + "### Сортировка\n", + "\n", + "`DataFrame` можно отсортировать по значению какого-нибудь из признаков. В нашем случае, например, по `Total day charge` (`ascending=False` для сортировки по убыванию):" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "executionInfo": { + "elapsed": 102, + "status": "ok", + "timestamp": 1633609638544, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "GrbzIXBQtU8J", + "outputId": "7cf76892-8c0d-42fa-fa98-aa49f8c2ab6e" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
365CO154415NoNo0350.87559.64216.59418.40253.910011.4310.192.7311
985NY64415YesNo0346.85558.96249.57921.21275.410212.3913.393.5911
2594OH115510YesNo0345.38158.70203.410617.29217.51079.7911.883.1911
156OH83415NoNo0337.412057.36227.411619.33153.91146.9315.874.2701
605MO112415NoNo0335.57757.04212.510918.06265.013211.9312.783.4321
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "365 CO 154 415 No No \n", + "985 NY 64 415 Yes No \n", + "2594 OH 115 510 Yes No \n", + "156 OH 83 415 No No \n", + "605 MO 112 415 No No \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "365 0 350.8 75 \n", + "985 0 346.8 55 \n", + "2594 0 345.3 81 \n", + "156 0 337.4 120 \n", + "605 0 335.5 77 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "365 59.64 216.5 94 18.40 \n", + "985 58.96 249.5 79 21.21 \n", + "2594 58.70 203.4 106 17.29 \n", + "156 57.36 227.4 116 19.33 \n", + "605 57.04 212.5 109 18.06 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "365 253.9 100 11.43 \n", + "985 275.4 102 12.39 \n", + "2594 217.5 107 9.79 \n", + "156 153.9 114 6.93 \n", + "605 265.0 132 11.93 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "365 10.1 9 2.73 \n", + "985 13.3 9 3.59 \n", + "2594 11.8 8 3.19 \n", + "156 15.8 7 4.27 \n", + "605 12.7 8 3.43 \n", + "\n", + " Customer service calls Churn \n", + "365 1 1 \n", + "985 1 1 \n", + "2594 1 1 \n", + "156 0 1 \n", + "605 2 1 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(by=\"Total day charge\", ascending=False).head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "apUOhvc_tU8J" + }, + "source": [ + "Сортировать можно и по группе столбцов:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "executionInfo": { + "elapsed": 100, + "status": "ok", + "timestamp": 1633609638545, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "KUU1Xp63tU8K", + "outputId": "0bbacb6a-bbf7-4697-b720-20033f341ff3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
688MN13510NoYes21315.610553.65208.97117.76260.112311.7012.133.2730
2259NC210415NoYes31313.88753.35147.710312.55192.7978.6710.172.7330
534LA67510NoNo0310.49752.7766.51235.65246.59911.099.2102.4840
575SD114415NoYes36309.99052.68200.38917.03183.51058.2614.223.8310
2858AL141510NoYes28308.012352.36247.812821.06152.91036.887.432.0010
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "688 MN 13 510 No Yes \n", + "2259 NC 210 415 No Yes \n", + "534 LA 67 510 No No \n", + "575 SD 114 415 No Yes \n", + "2858 AL 141 510 No Yes \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "688 21 315.6 105 \n", + "2259 31 313.8 87 \n", + "534 0 310.4 97 \n", + "575 36 309.9 90 \n", + "2858 28 308.0 123 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "688 53.65 208.9 71 17.76 \n", + "2259 53.35 147.7 103 12.55 \n", + "534 52.77 66.5 123 5.65 \n", + "575 52.68 200.3 89 17.03 \n", + "2858 52.36 247.8 128 21.06 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "688 260.1 123 11.70 \n", + "2259 192.7 97 8.67 \n", + "534 246.5 99 11.09 \n", + "575 183.5 105 8.26 \n", + "2858 152.9 103 6.88 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "688 12.1 3 3.27 \n", + "2259 10.1 7 2.73 \n", + "534 9.2 10 2.48 \n", + "575 14.2 2 3.83 \n", + "2858 7.4 3 2.00 \n", + "\n", + " Customer service calls Churn \n", + "688 3 0 \n", + "2259 3 0 \n", + "534 4 0 \n", + "575 1 0 \n", + "2858 1 0 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(by=[\"Churn\", \"Total day charge\"], ascending=[True, False]).head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VCTKeJUYtU8L" + }, + "source": [ + "### Индексация и извлечение данных" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lveNXBbztU8L" + }, + "source": [ + "`DataFrame` можно индексировать по-разному. В связи с этим рассмотрим различные способы индексации и извлечения нужных нам данных из датафрейма на примере простых вопросов.\n", + "\n", + "Для извлечения отдельного столбца можно использовать конструкцию вида `DataFrame['Name']`. Воспользуемся этим для ответа на вопрос: **какова доля нелояльных пользователей в нашем датафрейме?**" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 98, + "status": "ok", + "timestamp": 1633609638547, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "FLaA5u1ztU8L", + "outputId": "d1b61bde-7b0c-45d0-c2e9-32d9bb9539c0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.14491449144914492" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Churn\"].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QiJUnpEJtU8M" + }, + "source": [ + "14,5% — довольно плохой показатель для компании, с таким процентом оттока можно и разориться." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2v6CRyJ3tU8M" + }, + "source": [ + "Очень удобной является логическая индексация `DataFrame` по одному столбцу. Выглядит она следующим образом: `df[P(df['Name'])]`, где `P` - это некоторое логическое условие, проверяемое для каждого элемента столбца `Name`. Итогом такой индексации является `DataFrame`, состоящий только из строк, удовлетворяющих условию `P` по столбцу `Name`. \n", + "\n", + "Воспользуемся этим для ответа на вопрос: **каковы средние значения числовых признаков среди нелояльных пользователей?**" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 90, + "status": "ok", + "timestamp": 1633609638548, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "0G0_4zPytU8O", + "outputId": "79d763ca-3a4e-4408-f218-e5996dbd68bb", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Account length 102.66\n", + "Area code 437.82\n", + "Number vmail messages 5.12\n", + "Total day minutes 206.91\n", + "Total day calls 101.34\n", + "Total day charge 35.18\n", + "Total eve minutes 212.41\n", + "Total eve calls 100.56\n", + "Total eve charge 18.05\n", + "Total night minutes 205.23\n", + "Total night calls 100.40\n", + "Total night charge 9.24\n", + "Total intl minutes 10.70\n", + "Total intl calls 4.16\n", + "Total intl charge 2.89\n", + "Customer service calls 2.23\n", + "Churn 1.00\n", + "dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df[\"Churn\"] == 1].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vX7Kv82ztU8O" + }, + "source": [ + "Скомбинировав предыдущие два вида индексации, ответим на вопрос: **сколько в среднем в течение дня разговаривают по телефону нелояльные пользователи**?" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 87, + "status": "ok", + "timestamp": 1633609638551, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ZmpzMz9LtU8O", + "outputId": "f4ef2f49-5d18-4228-b513-96402e23b1b4" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "206.91407867494814" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df[\"Churn\"] == 1][\"Total day minutes\"].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rME2EKe8tU8P" + }, + "source": [ + "**Какова максимальная длина международных звонков среди лояльных пользователей (`Churn == 0`), не пользующихся услугой международного роуминга (`'International plan' == 'No'`)?**" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 82, + "status": "ok", + "timestamp": 1633609638552, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "DQ0H-bJttU8Q", + "outputId": "3c8a6304-7ede-495b-f2cc-dcf70beb252f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "18.9" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[(df[\"Churn\"] == 0) & (df[\"International plan\"] == \"No\")][\"Total intl minutes\"].max()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f6IelrO4tU8Q" + }, + "source": [ + "Датафреймы можно индексировать как по названию столбца или строки, так и по порядковому номеру. Для индексации **по названию** используется метод **`loc`**, **по номеру** — **`iloc`**.\n", + "\n", + "В первом случае мы говорим _«передай нам значения для id строк от 0 до 5 и для столбцов от State до Area code»_, а во втором — _«передай нам значения первых пяти строк в первых трёх столбцах»_. \n", + "\n", + "В случае `iloc` срез работает как обычно, однако в случае `loc` учитываются и начало, и конец среза. Да, неудобно, да, вызывает путаницу." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "executionInfo": { + "elapsed": 78, + "status": "ok", + "timestamp": 1633609638554, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "Pp82lj7ktU8R", + "outputId": "8e2a9392-b3f0-44ee-e383-b19a46f8d708", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Account lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
0128415NoYes25265.111045.07197.49916.78244.79111.0110.032.7010
1107415NoYes26161.612327.47195.510316.62254.410311.4513.733.7010
2137415NoNo0243.411441.38121.211010.30162.61047.3212.253.2900
375415YesNo0166.711328.34148.312212.61186.91218.4110.132.7330
4118510YesNo0223.49837.98220.610118.75203.91189.186.361.7000
5121510NoYes24218.28837.09348.510829.62212.61189.577.572.0330
6147415YesNo0157.07926.69103.1948.76211.8969.537.161.9200
7117408NoNo0184.59731.37351.68029.89215.8909.718.742.3510
8141415YesYes37258.68443.96222.011118.87326.49714.6911.253.0200
965415NoNo0129.113721.95228.58319.42208.81119.4012.763.4341
1074415NoNo0187.712731.91163.414813.89196.0948.829.152.4600
11168408NoNo0128.89621.90104.9718.92141.11286.3511.223.0210
1295510NoNo0156.68826.62247.67521.05192.31158.6512.353.3230
13161415NoNo0332.96756.59317.89727.01160.61287.235.491.4641
1485408NoYes27196.413933.39280.99023.8889.3754.0213.843.7310
1593510NoNo0190.711432.42218.211118.55129.61215.838.132.1930
1676510NoYes33189.76632.25212.86518.09165.71087.4610.052.7010
1773415NoNo0224.49038.15159.58813.56192.8748.6813.023.5110
18147415NoNo0155.111726.37239.79320.37208.81339.4010.642.8600
1977408NoNo062.48910.61169.912114.44209.6649.435.761.5451
20130415NoNo0183.011231.1172.9996.20181.8788.189.5192.5700
21111415NoNo0110.410318.77137.310211.67189.61058.537.762.0820
22174415NoNo0124.37621.13277.111223.55250.711511.2815.554.1930
2357408NoYes39213.011536.21191.111216.24182.71158.229.532.5700
2449510NoNo0119.311720.28215.110918.28178.7908.0411.113.0010
25142415NoNo084.89514.42136.76311.62250.514811.2714.263.8320
2675510NoNo0226.110538.44201.510717.13246.29811.0810.352.7810
2772415NoYes37220.08037.40217.310218.47152.8716.8814.763.9730
2836408NoYes30146.312824.87162.58013.81129.31095.8214.563.9200
29135408YesYes41173.18529.43203.910717.33122.2785.5014.6153.9401
3034510NoNo0124.88221.22282.29823.99311.57814.0210.042.7020
3164510NoNo0154.06726.18225.811819.19265.38611.943.530.9510
3259408NoYes28120.99720.55213.09218.11163.11167.348.552.3020
3365415NoNo0211.312035.92162.612213.82134.71186.0613.253.5630
34142408NoNo0187.013331.79134.67411.44242.212710.907.452.0020
3596415NoNo0160.211727.23267.56722.74228.56810.289.352.5120
36116415NoYes34268.68345.66178.214215.15166.31067.4811.633.1320
3774510NoYes33193.79132.93246.19620.92138.0926.2114.633.9420
38149408NoYes28180.79230.72187.86415.96265.55311.9512.633.4030
3938408NoNo0131.29822.30162.99713.85159.01067.158.262.2120
4040415NoYes41148.17425.18169.58814.41214.11029.636.251.6720
41147510NoNo0248.68342.26148.98512.66172.51097.768.042.1630
4290415NoNo0203.414634.58226.711719.27152.41056.867.341.9710
4382415NoNo0300.310951.05181.010015.39270.17312.1511.743.1601
4474415NoYes35154.110426.20123.48410.49202.1579.0910.992.9420
4578415NoNo0252.99342.99178.411215.16263.910511.889.572.5730
46120408NoNo0212.113136.06209.410417.80167.2967.525.351.4311
4778415NoNo0149.711925.45182.211515.49261.512611.779.782.6200
4882415NoYes24155.213126.38244.510620.78122.4685.5110.732.8910
49199415NoYes34230.612139.20219.49918.65299.39413.478.022.1600
5079408NoNo0205.712334.97214.510818.23226.110610.176.7181.8110
\n", + "
" + ], + "text/plain": [ + " Account length Area code International plan Voice mail plan \\\n", + "0 128 415 No Yes \n", + "1 107 415 No Yes \n", + "2 137 415 No No \n", + "3 75 415 Yes No \n", + "4 118 510 Yes No \n", + "5 121 510 No Yes \n", + "6 147 415 Yes No \n", + "7 117 408 No No \n", + "8 141 415 Yes Yes \n", + "9 65 415 No No \n", + "10 74 415 No No \n", + "11 168 408 No No \n", + "12 95 510 No No \n", + "13 161 415 No No \n", + "14 85 408 No Yes \n", + "15 93 510 No No \n", + "16 76 510 No Yes \n", + "17 73 415 No No \n", + "18 147 415 No No \n", + "19 77 408 No No \n", + "20 130 415 No No \n", + "21 111 415 No No \n", + "22 174 415 No No \n", + "23 57 408 No Yes \n", + "24 49 510 No No \n", + "25 142 415 No No \n", + "26 75 510 No No \n", + "27 72 415 No Yes \n", + "28 36 408 No Yes \n", + "29 135 408 Yes Yes \n", + "30 34 510 No No \n", + "31 64 510 No No \n", + "32 59 408 No Yes \n", + "33 65 415 No No \n", + "34 142 408 No No \n", + "35 96 415 No No \n", + "36 116 415 No Yes \n", + "37 74 510 No Yes \n", + "38 149 408 No Yes \n", + "39 38 408 No No \n", + "40 40 415 No Yes \n", + "41 147 510 No No \n", + "42 90 415 No No \n", + "43 82 415 No No \n", + "44 74 415 No Yes \n", + "45 78 415 No No \n", + "46 120 408 No No \n", + "47 78 415 No No \n", + "48 82 415 No Yes \n", + "49 199 415 No Yes \n", + "50 79 408 No No \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 166.7 113 \n", + "4 0 223.4 98 \n", + "5 24 218.2 88 \n", + "6 0 157.0 79 \n", + "7 0 184.5 97 \n", + "8 37 258.6 84 \n", + "9 0 129.1 137 \n", + "10 0 187.7 127 \n", + "11 0 128.8 96 \n", + "12 0 156.6 88 \n", + "13 0 332.9 67 \n", + "14 27 196.4 139 \n", + "15 0 190.7 114 \n", + "16 33 189.7 66 \n", + "17 0 224.4 90 \n", + "18 0 155.1 117 \n", + "19 0 62.4 89 \n", + "20 0 183.0 112 \n", + "21 0 110.4 103 \n", + "22 0 124.3 76 \n", + "23 39 213.0 115 \n", + "24 0 119.3 117 \n", + "25 0 84.8 95 \n", + "26 0 226.1 105 \n", + "27 37 220.0 80 \n", + "28 30 146.3 128 \n", + "29 41 173.1 85 \n", + "30 0 124.8 82 \n", + "31 0 154.0 67 \n", + "32 28 120.9 97 \n", + "33 0 211.3 120 \n", + "34 0 187.0 133 \n", + "35 0 160.2 117 \n", + "36 34 268.6 83 \n", + "37 33 193.7 91 \n", + "38 28 180.7 92 \n", + "39 0 131.2 98 \n", + "40 41 148.1 74 \n", + "41 0 248.6 83 \n", + "42 0 203.4 146 \n", + "43 0 300.3 109 \n", + "44 35 154.1 104 \n", + "45 0 252.9 93 \n", + "46 0 212.1 131 \n", + "47 0 149.7 119 \n", + "48 24 155.2 131 \n", + "49 34 230.6 121 \n", + "50 0 205.7 123 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 28.34 148.3 122 12.61 \n", + "4 37.98 220.6 101 18.75 \n", + "5 37.09 348.5 108 29.62 \n", + "6 26.69 103.1 94 8.76 \n", + "7 31.37 351.6 80 29.89 \n", + "8 43.96 222.0 111 18.87 \n", + "9 21.95 228.5 83 19.42 \n", + "10 31.91 163.4 148 13.89 \n", + "11 21.90 104.9 71 8.92 \n", + "12 26.62 247.6 75 21.05 \n", + "13 56.59 317.8 97 27.01 \n", + "14 33.39 280.9 90 23.88 \n", + "15 32.42 218.2 111 18.55 \n", + "16 32.25 212.8 65 18.09 \n", + "17 38.15 159.5 88 13.56 \n", + "18 26.37 239.7 93 20.37 \n", + "19 10.61 169.9 121 14.44 \n", + "20 31.11 72.9 99 6.20 \n", + "21 18.77 137.3 102 11.67 \n", + "22 21.13 277.1 112 23.55 \n", + "23 36.21 191.1 112 16.24 \n", + "24 20.28 215.1 109 18.28 \n", + "25 14.42 136.7 63 11.62 \n", + "26 38.44 201.5 107 17.13 \n", + "27 37.40 217.3 102 18.47 \n", + "28 24.87 162.5 80 13.81 \n", + "29 29.43 203.9 107 17.33 \n", + "30 21.22 282.2 98 23.99 \n", + "31 26.18 225.8 118 19.19 \n", + "32 20.55 213.0 92 18.11 \n", + "33 35.92 162.6 122 13.82 \n", + "34 31.79 134.6 74 11.44 \n", + "35 27.23 267.5 67 22.74 \n", + "36 45.66 178.2 142 15.15 \n", + "37 32.93 246.1 96 20.92 \n", + "38 30.72 187.8 64 15.96 \n", + "39 22.30 162.9 97 13.85 \n", + "40 25.18 169.5 88 14.41 \n", + "41 42.26 148.9 85 12.66 \n", + "42 34.58 226.7 117 19.27 \n", + "43 51.05 181.0 100 15.39 \n", + "44 26.20 123.4 84 10.49 \n", + "45 42.99 178.4 112 15.16 \n", + "46 36.06 209.4 104 17.80 \n", + "47 25.45 182.2 115 15.49 \n", + "48 26.38 244.5 106 20.78 \n", + "49 39.20 219.4 99 18.65 \n", + "50 34.97 214.5 108 18.23 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 186.9 121 8.41 \n", + "4 203.9 118 9.18 \n", + "5 212.6 118 9.57 \n", + "6 211.8 96 9.53 \n", + "7 215.8 90 9.71 \n", + "8 326.4 97 14.69 \n", + "9 208.8 111 9.40 \n", + "10 196.0 94 8.82 \n", + "11 141.1 128 6.35 \n", + "12 192.3 115 8.65 \n", + "13 160.6 128 7.23 \n", + "14 89.3 75 4.02 \n", + "15 129.6 121 5.83 \n", + "16 165.7 108 7.46 \n", + "17 192.8 74 8.68 \n", + "18 208.8 133 9.40 \n", + "19 209.6 64 9.43 \n", + "20 181.8 78 8.18 \n", + "21 189.6 105 8.53 \n", + "22 250.7 115 11.28 \n", + "23 182.7 115 8.22 \n", + "24 178.7 90 8.04 \n", + "25 250.5 148 11.27 \n", + "26 246.2 98 11.08 \n", + "27 152.8 71 6.88 \n", + "28 129.3 109 5.82 \n", + "29 122.2 78 5.50 \n", + "30 311.5 78 14.02 \n", + "31 265.3 86 11.94 \n", + "32 163.1 116 7.34 \n", + "33 134.7 118 6.06 \n", + "34 242.2 127 10.90 \n", + "35 228.5 68 10.28 \n", + "36 166.3 106 7.48 \n", + "37 138.0 92 6.21 \n", + "38 265.5 53 11.95 \n", + "39 159.0 106 7.15 \n", + "40 214.1 102 9.63 \n", + "41 172.5 109 7.76 \n", + "42 152.4 105 6.86 \n", + "43 270.1 73 12.15 \n", + "44 202.1 57 9.09 \n", + "45 263.9 105 11.88 \n", + "46 167.2 96 7.52 \n", + "47 261.5 126 11.77 \n", + "48 122.4 68 5.51 \n", + "49 299.3 94 13.47 \n", + "50 226.1 106 10.17 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 10.1 3 2.73 \n", + "4 6.3 6 1.70 \n", + "5 7.5 7 2.03 \n", + "6 7.1 6 1.92 \n", + "7 8.7 4 2.35 \n", + "8 11.2 5 3.02 \n", + "9 12.7 6 3.43 \n", + "10 9.1 5 2.46 \n", + "11 11.2 2 3.02 \n", + "12 12.3 5 3.32 \n", + "13 5.4 9 1.46 \n", + "14 13.8 4 3.73 \n", + "15 8.1 3 2.19 \n", + "16 10.0 5 2.70 \n", + "17 13.0 2 3.51 \n", + "18 10.6 4 2.86 \n", + "19 5.7 6 1.54 \n", + "20 9.5 19 2.57 \n", + "21 7.7 6 2.08 \n", + "22 15.5 5 4.19 \n", + "23 9.5 3 2.57 \n", + "24 11.1 1 3.00 \n", + "25 14.2 6 3.83 \n", + "26 10.3 5 2.78 \n", + "27 14.7 6 3.97 \n", + "28 14.5 6 3.92 \n", + "29 14.6 15 3.94 \n", + "30 10.0 4 2.70 \n", + "31 3.5 3 0.95 \n", + "32 8.5 5 2.30 \n", + "33 13.2 5 3.56 \n", + "34 7.4 5 2.00 \n", + "35 9.3 5 2.51 \n", + "36 11.6 3 3.13 \n", + "37 14.6 3 3.94 \n", + "38 12.6 3 3.40 \n", + "39 8.2 6 2.21 \n", + "40 6.2 5 1.67 \n", + "41 8.0 4 2.16 \n", + "42 7.3 4 1.97 \n", + "43 11.7 4 3.16 \n", + "44 10.9 9 2.94 \n", + "45 9.5 7 2.57 \n", + "46 5.3 5 1.43 \n", + "47 9.7 8 2.62 \n", + "48 10.7 3 2.89 \n", + "49 8.0 2 2.16 \n", + "50 6.7 18 1.81 \n", + "\n", + " Customer service calls Churn \n", + "0 1 0 \n", + "1 1 0 \n", + "2 0 0 \n", + "3 3 0 \n", + "4 0 0 \n", + "5 3 0 \n", + "6 0 0 \n", + "7 1 0 \n", + "8 0 0 \n", + "9 4 1 \n", + "10 0 0 \n", + "11 1 0 \n", + "12 3 0 \n", + "13 4 1 \n", + "14 1 0 \n", + "15 3 0 \n", + "16 1 0 \n", + "17 1 0 \n", + "18 0 0 \n", + "19 5 1 \n", + "20 0 0 \n", + "21 2 0 \n", + "22 3 0 \n", + "23 0 0 \n", + "24 1 0 \n", + "25 2 0 \n", + "26 1 0 \n", + "27 3 0 \n", + "28 0 0 \n", + "29 0 1 \n", + "30 2 0 \n", + "31 1 0 \n", + "32 2 0 \n", + "33 3 0 \n", + "34 2 0 \n", + "35 2 0 \n", + "36 2 0 \n", + "37 2 0 \n", + "38 3 0 \n", + "39 2 0 \n", + "40 2 0 \n", + "41 3 0 \n", + "42 1 0 \n", + "43 0 1 \n", + "44 2 0 \n", + "45 3 0 \n", + "46 1 1 \n", + "47 0 0 \n", + "48 1 0 \n", + "49 0 0 \n", + "50 1 0 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d = df.copy()\n", + "d = d.drop_duplicates('State')\n", + "d = d.set_index('State')\n", + "# d = d.reset_index() # сбрасываем столбец-индекс не удаляя его\n", + "d = d.reset_index(drop=True) # сбрасываем столбец-индекс удаляя его\n", + "d\n", + "# d.loc['KS':'OK','Area code':'Total day minutes']" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "executionInfo": { + "elapsed": 75, + "status": "ok", + "timestamp": 1633609638556, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "qGN5gaALtU8R", + "outputId": "1f803cde-6306-4ebe-cd3a-1cb7ad85010e", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea code
0KS128415
1OH107415
2NJ137415
3OH84408
4OK75415
\n", + "
" + ], + "text/plain": [ + " State Account length Area code\n", + "0 KS 128 415\n", + "1 OH 107 415\n", + "2 NJ 137 415\n", + "3 OH 84 408\n", + "4 OK 75 415" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0:5, 0:3]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UCMKdcx9tU8S" + }, + "source": [ + "Метод `ix` индексирует и по названию, и по номеру, но он вызывает путаницу, и поэтому был объявлен устаревшим (deprecated)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HnMAXWTAtU8S" + }, + "source": [ + "Если нам нужна первая или последняя строчка датафрейма, пользуемся конструкцией `df[:1]` или `df[-1:]`:" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 115 + }, + "executionInfo": { + "elapsed": 74, + "status": "ok", + "timestamp": 1633609638558, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "OrwoqAGPtU8U", + "outputId": "53bc6332-8c03-4b98-9335-295812d859cd", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurn
3332TN74415NoYes25234.411339.85265.98222.6241.47710.8613.743.700
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "3332 TN 74 415 No Yes \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "3332 25 234.4 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "3332 39.85 265.9 82 22.6 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "3332 241.4 77 10.86 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "3332 13.7 4 3.7 \n", + "\n", + " Customer service calls Churn \n", + "3332 0 0 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[-1:]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ur_--vTVtU8W" + }, + "source": [ + "### Применение функций: `apply`, `map` и др." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "da6UVfVjtU8W" + }, + "source": [ + "**Применение функции к каждому столбцу:**" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 71, + "status": "ok", + "timestamp": 1633609638559, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "LIlX4ORVtU8W", + "outputId": "315e42ff-9efa-4fa9-e41f-cba08a9534d2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "State WY\n", + "Account length 243\n", + "Area code 510\n", + "International plan Yes\n", + "Voice mail plan Yes\n", + "Number vmail messages 51\n", + "Total day minutes 3.5e+02\n", + "Total day calls 165\n", + "Total day charge 60\n", + "Total eve minutes 3.6e+02\n", + "Total eve calls 170\n", + "Total eve charge 31\n", + "Total night minutes 4e+02\n", + "Total night calls 175\n", + "Total night charge 18\n", + "Total intl minutes 20\n", + "Total intl calls 20\n", + "Total intl charge 5.4\n", + "Customer service calls 9\n", + "Churn 1\n", + "dtype: object" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.apply(np.max)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 478 + }, + "executionInfo": { + "elapsed": 66, + "status": "ok", + "timestamp": 1633609638561, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "p-mUIP9HQakx", + "outputId": "bbe218f1-eddb-4ff4-eb09-6a148e04a29c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415NoYes25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415NoYes26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415NoNo0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408YesNo0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415YesNo0166.711328.34148.312212.61186.91218.4110.132.73304
..................................................................
3328AZ192415NoYes36156.27726.55215.512618.32279.18312.569.962.6720144
3329WV68415NoNo0231.15739.29153.45513.04191.31238.619.642.59304
3330RI28510NoNo0180.810930.74288.85824.55191.9918.6414.163.81204
3331CT184510YesNo0213.810536.35159.68413.57139.21376.265.0101.35204
3332TN74415NoYes25234.411339.85265.98222.60241.47710.8613.743.7000100
\n", + "

3333 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 No Yes \n", + "1 OH 107 415 No Yes \n", + "2 NJ 137 415 No No \n", + "3 OH 84 408 Yes No \n", + "4 OK 75 415 Yes No \n", + "... ... ... ... ... ... \n", + "3328 AZ 192 415 No Yes \n", + "3329 WV 68 415 No No \n", + "3330 RI 28 510 No No \n", + "3331 CT 184 510 Yes No \n", + "3332 TN 74 415 No Yes \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "... ... ... ... \n", + "3328 36 156.2 77 \n", + "3329 0 231.1 57 \n", + "3330 0 180.8 109 \n", + "3331 0 213.8 105 \n", + "3332 25 234.4 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "... ... ... ... ... \n", + "3328 26.55 215.5 126 18.32 \n", + "3329 39.29 153.4 55 13.04 \n", + "3330 30.74 288.8 58 24.55 \n", + "3331 36.35 159.6 84 13.57 \n", + "3332 39.85 265.9 82 22.60 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "... ... ... ... \n", + "3328 279.1 83 12.56 \n", + "3329 191.3 123 8.61 \n", + "3330 191.9 91 8.64 \n", + "3331 139.2 137 6.26 \n", + "3332 241.4 77 10.86 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "... ... ... ... \n", + "3328 9.9 6 2.67 \n", + "3329 9.6 4 2.59 \n", + "3330 14.1 6 3.81 \n", + "3331 5.0 10 1.35 \n", + "3332 13.7 4 3.70 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages \n", + "0 1 0 100 \n", + "1 1 0 104 \n", + "2 0 0 4 \n", + "3 2 0 4 \n", + "4 3 0 4 \n", + "... ... ... ... \n", + "3328 2 0 144 \n", + "3329 3 0 4 \n", + "3330 2 0 4 \n", + "3331 2 0 4 \n", + "3332 0 0 100 \n", + "\n", + "[3333 rows x 21 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def make_feature(row):\n", + " if row['Voice mail plan'] == 'Yes':\n", + " return row['Number vmail messages'] * 4\n", + " return row['Number vmail messages'] + 4\n", + "df['new_Number_vmail_messages'] = df.apply(make_feature, axis=1)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j2fEIU5ptU8Y" + }, + "source": [ + "Метод `apply` можно использовать и для того, чтобы применить функцию к каждой строке. Для этого нужно указать `axis=1`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e-TxyhUttU8Y" + }, + "source": [ + "**Применение функции к каждой ячейке столбца**\n", + "\n", + "Допустим, по какой-то причине нас интересуют все люди из штатов, названия которых начинаются на 'W'. В данному случае это можно сделать по-разному, но наибольшую свободу дает связка `apply`-`lambda` – применение функции ко всем значениям в столбце." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 65, + "status": "ok", + "timestamp": 1633609638563, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "-jnLxPnWtU8Z", + "outputId": "bcab7df7-ed63-45c1-816b-ec70b6e4fa7b", + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
9WV141415YesYes37258.68443.96222.011118.87326.49714.6911.253.0200148
26WY57408NoYes39213.011536.21191.111216.24182.71158.229.532.5700156
44WI64510NoNo0154.06726.18225.811819.19265.38611.943.530.95104
49WY97415NoYes24133.213522.64217.25818.4670.6793.1811.032.971096
54WY87415NoNo0151.08325.67219.711618.67203.91279.189.732.62514
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "9 WV 141 415 Yes Yes \n", + "26 WY 57 408 No Yes \n", + "44 WI 64 510 No No \n", + "49 WY 97 415 No Yes \n", + "54 WY 87 415 No No \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "9 37 258.6 84 \n", + "26 39 213.0 115 \n", + "44 0 154.0 67 \n", + "49 24 133.2 135 \n", + "54 0 151.0 83 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "9 43.96 222.0 111 18.87 \n", + "26 36.21 191.1 112 16.24 \n", + "44 26.18 225.8 118 19.19 \n", + "49 22.64 217.2 58 18.46 \n", + "54 25.67 219.7 116 18.67 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "9 326.4 97 14.69 \n", + "26 182.7 115 8.22 \n", + "44 265.3 86 11.94 \n", + "49 70.6 79 3.18 \n", + "54 203.9 127 9.18 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "9 11.2 5 3.02 \n", + "26 9.5 3 2.57 \n", + "44 3.5 3 0.95 \n", + "49 11.0 3 2.97 \n", + "54 9.7 3 2.62 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages \n", + "9 0 0 148 \n", + "26 0 0 156 \n", + "44 1 0 4 \n", + "49 1 0 96 \n", + "54 5 1 4 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df[\"State\"].apply(lambda state: state[0] == \"W\")].head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q6SkeDiJtU8Z" + }, + "source": [ + "Метод `map` можно использовать и для **замены значений в колонке**, передав ему в качестве аргумента словарь вида `{old_value: new_value}`:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 63, + "status": "ok", + "timestamp": 1633609638564, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "q3lbm6XXtU8a", + "outputId": "10f505a4-3227-4ff5-b868-1efaadf4a181" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseYes25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415FalseYes26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415FalseNo0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408TrueNo0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueNo0166.711328.34148.312212.61186.91218.4110.132.73304
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 False Yes \n", + "1 OH 107 415 False Yes \n", + "2 NJ 137 415 False No \n", + "3 OH 84 408 True No \n", + "4 OK 75 415 True No \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages \n", + "0 1 0 100 \n", + "1 1 0 104 \n", + "2 0 0 4 \n", + "3 2 0 4 \n", + "4 3 0 4 " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d = {\"No\": False, \"Yes\": True}\n", + "df[\"International plan\"] = df[\"International plan\"].map(d)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YkK8_gEBtU8b" + }, + "source": [ + "Аналогичную операцию можно провернуть с помощью метода `replace`:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 653, + "status": "ok", + "timestamp": 1633609639156, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "xop7OSmZtU8b", + "outputId": "16d543f3-5373-45d1-cb55-b69355e6a5cb" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.7010104
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.29004
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 False True \n", + "1 OH 107 415 False True \n", + "2 NJ 137 415 False False \n", + "3 OH 84 408 True False \n", + "4 OK 75 415 True False \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages \n", + "0 1 0 100 \n", + "1 1 0 104 \n", + "2 0 0 4 \n", + "3 2 0 4 \n", + "4 3 0 4 " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.replace({\"Voice mail plan\": d})\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sJ9KC2CrtU8d" + }, + "source": [ + "### Группировка данных\n", + "\n", + "В общем случае группировка данных в Pandas выглядит следующим образом:\n", + "\n", + "```\n", + "df.groupby(by=grouping_columns)[columns_to_show].function()\n", + "```\n", + "\n", + "1. К датафрейму применяется метод **`groupby`**, который разделяет данные по `grouping_columns` – признаку или набору признаков.\n", + "3. Индексируем по нужным нам столбцам (`columns_to_show`). \n", + "2. К полученным группам применяется функция или несколько функций." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wiHvK8LFtU8d" + }, + "source": [ + "**Группирование данных в зависимости от значения признака `Churn` и вывод статистик по трём столбцам в каждой группе.**" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 99, + "status": "ok", + "timestamp": 1633609639159, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "pXrstrQgtU8d", + "outputId": "42ed5c1c-65c1-457f-cd03-6a26bb60da9f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Total day minutesTotal eve minutesTotal night minutes
countmeanstdmin50%maxcountmeanstdmin50%maxcountmeanstdmin50%max
Churn
02850.0175.1850.180.0177.2315.62850.0199.0450.290.0199.6361.82850.0200.1351.1123.2200.25395.0
1483.0206.9169.000.0217.6350.8483.0212.4151.7370.9211.3363.7483.0205.2347.1347.4204.80354.9
\n", + "
" + ], + "text/plain": [ + " Total day minutes Total eve minutes \\\n", + " count mean std min 50% max count \n", + "Churn \n", + "0 2850.0 175.18 50.18 0.0 177.2 315.6 2850.0 \n", + "1 483.0 206.91 69.00 0.0 217.6 350.8 483.0 \n", + "\n", + " Total night minutes \\\n", + " mean std min 50% max count mean std \n", + "Churn \n", + "0 199.04 50.29 0.0 199.6 361.8 2850.0 200.13 51.11 \n", + "1 212.41 51.73 70.9 211.3 363.7 483.0 205.23 47.13 \n", + "\n", + " \n", + " min 50% max \n", + "Churn \n", + "0 23.2 200.25 395.0 \n", + "1 47.4 204.80 354.9 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns_to_show = [\"Total day minutes\", \"Total eve minutes\", \"Total night minutes\"]\n", + "\n", + "df.groupby([\"Churn\"])[columns_to_show].describe(percentiles=[])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_EZVRvNptU8d" + }, + "source": [ + "Сделаем то же самое, но немного по-другому, передав в `agg` список функций:" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 97, + "status": "ok", + "timestamp": 1633609639161, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "9x5emqSwtU8e", + "outputId": "9e81a4db-b89a-4e06-d928-70be8f26fdc5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Total day minutesTotal eve minutesTotal night minutes
meanstdaminamaxmeanstdaminamaxmeanstdaminamax
Churn
0175.1850.180.0315.6199.0450.290.0361.8200.1351.1123.2395.0
1206.9169.000.0350.8212.4151.7370.9363.7205.2347.1347.4354.9
\n", + "
" + ], + "text/plain": [ + " Total day minutes Total eve minutes \\\n", + " mean std amin amax mean std amin \n", + "Churn \n", + "0 175.18 50.18 0.0 315.6 199.04 50.29 0.0 \n", + "1 206.91 69.00 0.0 350.8 212.41 51.73 70.9 \n", + "\n", + " Total night minutes \n", + " amax mean std amin amax \n", + "Churn \n", + "0 361.8 200.13 51.11 23.2 395.0 \n", + "1 363.7 205.23 47.13 47.4 354.9 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns_to_show = [\"Total day minutes\", \"Total eve minutes\", \"Total night minutes\"]\n", + "\n", + "df.groupby([\"Churn\"])[columns_to_show].agg([np.mean, np.std, np.min, np.max])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bMsnErVv_o77" + }, + "source": [ + "Сбрасываем индекс с группирующего поля" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "executionInfo": { + "elapsed": 93, + "status": "ok", + "timestamp": 1633609639162, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "8HFw5er5_DhM", + "outputId": "bbdaaf9f-323e-42aa-e086-768f78599e65" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ChurnState
002850
11483
\n", + "
" + ], + "text/plain": [ + " Churn State\n", + "0 0 2850\n", + "1 1 483" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('Churn', as_index=False)['State'].count()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mwqgfLAVtU8e" + }, + "source": [ + "### Сводные таблицы" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gYnSr64ptU8e" + }, + "source": [ + "Допустим, мы хотим посмотреть, как наблюдения в нашей выборке распределены в контексте двух признаков — `Churn` и `Customer service calls`. Для этого мы можем построить **таблицу сопряженности**, воспользовавшись методом **`crosstab`**:" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "executionInfo": { + "elapsed": 91, + "status": "ok", + "timestamp": 1633609639163, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "yhgrYerutU8f", + "outputId": "25d6f12a-34f1-4f8b-8f49-d8914548046d" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
International planFalseTrue
Churn
02664186
1346137
\n", + "
" + ], + "text/plain": [ + "International plan False True \n", + "Churn \n", + "0 2664 186\n", + "1 346 137" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df[\"Churn\"], df[\"International plan\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "executionInfo": { + "elapsed": 90, + "status": "ok", + "timestamp": 1633609639165, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "cR0WankTtU8f", + "outputId": "a949d3c8-88f3-4f4a-8a67-6208d2c44445", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Voice mail planFalseTrue
Churn
00.600.25
10.120.02
\n", + "
" + ], + "text/plain": [ + "Voice mail plan False True \n", + "Churn \n", + "0 0.60 0.25\n", + "1 0.12 0.02" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df[\"Churn\"], df[\"Voice mail plan\"], normalize=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 87, + "status": "ok", + "timestamp": 1633609639166, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "P_eONQ24W0aU", + "outputId": "525dd141-f2d7-47a6-c4ea-6bd3be3386a9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 2, 3, 4, 5, 7, 9, 6, 8])" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Customer service calls\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 75, + "status": "ok", + "timestamp": 1633609639167, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "abLbV3cKARwi", + "outputId": "c5f227e8-5570-4c48-e7bf-48bb31227734" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Customer service calls\"].nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1F8uRUIwtU8h" + }, + "source": [ + "Мы видим, что большинство пользователей — лояльные и пользуются дополнительными услугами (международного роуминга / голосовой почты)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "reNYiSlJtU8h" + }, + "source": [ + "Продвинутые пользователи `Excel` наверняка вспомнят о такой фиче, как **сводные таблицы** (`pivot tables`). В `Pandas` за сводные таблицы отвечает метод **`pivot_table`**, который принимает в качестве параметров:\n", + "\n", + "* `values` – список переменных, по которым требуется рассчитать нужные статистики,\n", + "* `index` – список переменных, по которым нужно сгруппировать данные,\n", + "* `aggfunc` — то, что нам, собственно, нужно посчитать по группам — сумму, среднее, максимум, минимум или что-то ещё.\n", + "\n", + "Давайте посмотрим среднее число дневных, вечерних и ночных звонков для разных `Area code`:" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 68, + "status": "ok", + "timestamp": 1633609639168, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "xabiD5fktU8h", + "outputId": "a51980ac-bbb9-44b4-c453-1085d9014a7c", + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Total day callsTotal eve callsTotal night calls
Area code
408100.5099.7999.04
415100.58100.50100.40
510100.1099.67100.60
\n", + "
" + ], + "text/plain": [ + " Total day calls Total eve calls Total night calls\n", + "Area code \n", + "408 100.50 99.79 99.04\n", + "415 100.58 100.50 100.40\n", + "510 100.10 99.67 100.60" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.pivot_table(\n", + " [\"Total day calls\", \"Total eve calls\", \"Total night calls\"],\n", + " [\"Area code\"],\n", + " aggfunc=\"mean\",\n", + ").head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0_haYJdjtU8h" + }, + "source": [ + "### Преобразование датафреймов\n", + "\n", + "Как и многие другие вещи, добавлять столбцы в `DataFrame` можно несколькими способами." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "35zMtFv8tU8i" + }, + "source": [ + "Например, мы хотим посчитать общее количество звонков для всех пользователей. Создадим объект `total_calls` типа `Series` и вставим его в датафрейм:" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 67, + "status": "ok", + "timestamp": 1633609639171, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "z1ktVfD0tU8i", + "outputId": "a0c006bf-4504-4c46-af1d-8b8ab8167d79" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messagesTotal calls
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100303
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.7010104332
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.29004333
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204255
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304359
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 False True \n", + "1 OH 107 415 False True \n", + "2 NJ 137 415 False False \n", + "3 OH 84 408 True False \n", + "4 OK 75 415 True False \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages Total calls \n", + "0 1 0 100 303 \n", + "1 1 0 104 332 \n", + "2 0 0 4 333 \n", + "3 2 0 4 255 \n", + "4 3 0 4 359 " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_calls = (\n", + " df[\"Total day calls\"]\n", + " + df[\"Total eve calls\"]\n", + " + df[\"Total night calls\"]\n", + " + df[\"Total intl calls\"]\n", + ")\n", + "df.insert(loc=len(df.columns), column=\"Total calls\", value=total_calls)\n", + "# loc - номер столбца, после которого нужно вставить данный Series\n", + "# мы указали len(df.columns), чтобы вставить его в самом конце\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nB0mpCA1tU8j" + }, + "source": [ + "Добавить столбец из имеющихся можно и проще, не создавая промежуточных `Series`:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 64, + "status": "ok", + "timestamp": 1633609639173, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ZVpdhf1etU8k", + "outputId": "93b3fe31-2757-4cb7-afcc-7c10f765bb46" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messagesTotal callsTotal charge
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.701010030375.56
1OH107415FalseTrue26161.612327.47195.510316.62254.410311.4513.733.701010433259.24
2NJ137415FalseFalse0243.411441.38121.211010.30162.61047.3212.253.2900433362.29
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.7820425566.80
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.7330435952.09
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 False True \n", + "1 OH 107 415 False True \n", + "2 NJ 137 415 False False \n", + "3 OH 84 408 True False \n", + "4 OK 75 415 True False \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "1 26 161.6 123 \n", + "2 0 243.4 114 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "1 27.47 195.5 103 16.62 \n", + "2 41.38 121.2 110 10.30 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "1 254.4 103 11.45 \n", + "2 162.6 104 7.32 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "1 13.7 3 3.70 \n", + "2 12.2 5 3.29 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages Total calls \\\n", + "0 1 0 100 303 \n", + "1 1 0 104 332 \n", + "2 0 0 4 333 \n", + "3 2 0 4 255 \n", + "4 3 0 4 359 \n", + "\n", + " Total charge \n", + "0 75.56 \n", + "1 59.24 \n", + "2 62.29 \n", + "3 66.80 \n", + "4 52.09 " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Total charge\"] = (\n", + " df[\"Total day charge\"]\n", + " + df[\"Total eve charge\"]\n", + " + df[\"Total night charge\"]\n", + " + df[\"Total intl charge\"]\n", + ")\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xrn0pZo1tU8l" + }, + "source": [ + "Чтобы удалить столбцы или строки, воспользуйтесь методом `drop`, передавая в качестве аргумента нужные индексы и требуемое значение параметра `axis` (`1`, если удаляете столбцы, и ничего или `0`, если удаляете строки):" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "executionInfo": { + "elapsed": 62, + "status": "ok", + "timestamp": 1633609639175, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "oSvOmNv-tU8l", + "outputId": "d0304dcc-8e8a-42c9-8765-1822ae6f5c44", + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateAccount lengthArea codeInternational planVoice mail planNumber vmail messagesTotal day minutesTotal day callsTotal day chargeTotal eve minutesTotal eve callsTotal eve chargeTotal night minutesTotal night callsTotal night chargeTotal intl minutesTotal intl callsTotal intl chargeCustomer service callsChurnnew_Number_vmail_messages
0KS128415FalseTrue25265.111045.07197.49916.78244.79111.0110.032.7010100
3OH84408TrueFalse0299.47150.9061.9885.26196.9898.866.671.78204
4OK75415TrueFalse0166.711328.34148.312212.61186.91218.4110.132.73304
5AL118510TrueFalse0223.49837.98220.610118.75203.91189.186.361.70004
6MA121510FalseTrue24218.28837.09348.510829.62212.61189.577.572.033096
\n", + "
" + ], + "text/plain": [ + " State Account length Area code International plan Voice mail plan \\\n", + "0 KS 128 415 False True \n", + "3 OH 84 408 True False \n", + "4 OK 75 415 True False \n", + "5 AL 118 510 True False \n", + "6 MA 121 510 False True \n", + "\n", + " Number vmail messages Total day minutes Total day calls \\\n", + "0 25 265.1 110 \n", + "3 0 299.4 71 \n", + "4 0 166.7 113 \n", + "5 0 223.4 98 \n", + "6 24 218.2 88 \n", + "\n", + " Total day charge Total eve minutes Total eve calls Total eve charge \\\n", + "0 45.07 197.4 99 16.78 \n", + "3 50.90 61.9 88 5.26 \n", + "4 28.34 148.3 122 12.61 \n", + "5 37.98 220.6 101 18.75 \n", + "6 37.09 348.5 108 29.62 \n", + "\n", + " Total night minutes Total night calls Total night charge \\\n", + "0 244.7 91 11.01 \n", + "3 196.9 89 8.86 \n", + "4 186.9 121 8.41 \n", + "5 203.9 118 9.18 \n", + "6 212.6 118 9.57 \n", + "\n", + " Total intl minutes Total intl calls Total intl charge \\\n", + "0 10.0 3 2.70 \n", + "3 6.6 7 1.78 \n", + "4 10.1 3 2.73 \n", + "5 6.3 6 1.70 \n", + "6 7.5 7 2.03 \n", + "\n", + " Customer service calls Churn new_Number_vmail_messages \n", + "0 1 0 100 \n", + "3 2 0 4 \n", + "4 3 0 4 \n", + "5 0 0 4 \n", + "6 3 0 96 " + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# избавляемся от созданных только что столбцов\n", + "df = df.drop([\"Total charge\", \"Total calls\"], axis=1)\n", + "\n", + "df.drop([1, 2]).head() # а вот так можно удалить строчки" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JLDUG5hNtU8l" + }, + "source": [ + "--------\n", + "\n", + "\n", + "\n", + "## Первые попытки прогнозирования оттока\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1sv6q4lNtU8m" + }, + "source": [ + "Посмотрим, как отток связан с признаком *\"Подключение международного роуминга\"* (`International plan`). Сделаем это с помощью сводной таблички `crosstab`, а также путем иллюстрации с `Seaborn` (как именно строить такие картинки и анализировать с их помощью графики – материал следующей статьи.)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": true, + "executionInfo": { + "elapsed": 57, + "status": "ok", + "timestamp": 1633609639176, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "M7cBvVn-tU8m" + }, + "outputs": [], + "source": [ + "# надо дополнительно установить (команда в терминале)\n", + "# чтоб картинки рисовались в тетрадке\n", + "# !conda install seaborn\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "plt.rcParams[\"figure.figsize\"] = (8, 6)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 56, + "status": "ok", + "timestamp": 1633609639177, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "8ZJBwL8NtU8m", + "outputId": "334f814d-2c27-4f67-cabd-17159188ca2b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
International planFalseTrueAll
Churn
026641862850
1346137483
All30103233333
\n", + "
" + ], + "text/plain": [ + "International plan False True All\n", + "Churn \n", + "0 2664 186 2850\n", + "1 346 137 483\n", + "All 3010 323 3333" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df[\"Churn\"], df[\"International plan\"], margins=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 388 + }, + "executionInfo": { + "elapsed": 1509, + "status": "ok", + "timestamp": 1633609640633, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "BGwuNSretU8n", + "outputId": "0b7deac2-30bb-4ec9-e84b-ec60b64ceeff" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=\"International plan\", hue=\"Churn\", data=df)\n", + "plt.savefig(\"int_plan_and_churn.png\", dpi=300);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j8CpA17FtU8o" + }, + "source": [ + "Видим, что когда роуминг подключен, доля оттока намного выше – интересное наблюдение! Возможно, большие и плохо контролируемые траты в роуминге очень конфликтогенны и приводят к недовольству клиентов телеком-оператора и, соответственно, к их оттоку. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JDm9ePM4tU8o" + }, + "source": [ + "Далее посмотрим на еще один важный признак – *\"Число обращений в сервисный центр\"* (`Customer service calls`). Также построим сводную таблицу и картинку." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 25, + "status": "ok", + "timestamp": 1633609640635, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "UKGrw7fbtU8p", + "outputId": "7801c2d5-2a6d-4872-cde2-f847b851b3c5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Customer service calls0123456789All
Churn
06051059672385902684102850
1921228744764014512483
All697118175942916666229223333
\n", + "
" + ], + "text/plain": [ + "Customer service calls 0 1 2 3 4 5 6 7 8 9 All\n", + "Churn \n", + "0 605 1059 672 385 90 26 8 4 1 0 2850\n", + "1 92 122 87 44 76 40 14 5 1 2 483\n", + "All 697 1181 759 429 166 66 22 9 2 2 3333" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df[\"Churn\"], df[\"Customer service calls\"], margins=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 388 + }, + "executionInfo": { + "elapsed": 2104, + "status": "ok", + "timestamp": 1633609642719, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "sMJh9m1VtU8p", + "outputId": "43183fcc-f324-4492-acd6-30a6cf0615b5" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=\"Customer service calls\", hue=\"Churn\", data=df)\n", + "plt.savefig(\"serv_calls__and_churn.png\", dpi=300);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x2ZJPe-DtU8q" + }, + "source": [ + "Может быть, по сводной табличке это не так хорошо видно (или скучно ползать взглядом по строчкам с цифрами), а вот картинка красноречиво свидетельствует о том, что доля оттока сильно возрастает начиная с 4 звонков в сервисный центр. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dqj4LVe3tU8q" + }, + "source": [ + "Добавим теперь в наш DataFrame бинарный признак — результат сравнения `Customer service calls > 3`. И еще раз посмотрим, как он связан с оттоком. " + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 14, + "status": "ok", + "timestamp": 1633609642722, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "o9R6NM8ltU8q", + "outputId": "ab3e7ee2-24f1-455b-c973-e994cf85a4c7", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Churn01All
Many_service_calls
027213453066
1129138267
All28504833333
\n", + "
" + ], + "text/plain": [ + "Churn 0 1 All\n", + "Many_service_calls \n", + "0 2721 345 3066\n", + "1 129 138 267\n", + "All 2850 483 3333" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Many_service_calls\"] = (df[\"Customer service calls\"] > 3).astype(\"int\")\n", + "\n", + "pd.crosstab(df[\"Many_service_calls\"], df[\"Churn\"], margins=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 389 + }, + "executionInfo": { + "elapsed": 777, + "status": "ok", + "timestamp": 1633609643487, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "nUQk7G96tU8r", + "outputId": "ed0370de-9813-45fa-f666-ee36ce5206a1" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=\"Many_service_calls\", hue=\"Churn\", data=df)\n", + "plt.savefig(\"many_serv_calls__and_churn.png\", dpi=300);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XfYD5KqGtU8s" + }, + "source": [ + "Объединим рассмотренные выше условия и построим сводную табличку для этого объединения и оттока." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "executionInfo": { + "elapsed": 36, + "status": "ok", + "timestamp": 1633609643489, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "ZuaXCibrtU8s", + "outputId": "dbd40bfd-2dd2-41f1-ba50-cf159b12ddd7" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Churn01
row_0
False2841464
True919
\n", + "
" + ], + "text/plain": [ + "Churn 0 1\n", + "row_0 \n", + "False 2841 464\n", + "True 9 19" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df[\"Many_service_calls\"] & df[\"International plan\"], df[\"Churn\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VyaMr43HtU8t" + }, + "source": [ + "Значит, прогнозируя отток клиента в случае, когда число звонков в сервисный центр больше 3 и подключен роуминг (и прогнозируя лояльность – в противном случае), можно ожидать около 85.8% правильных попаданий (ошибаемся всего 464 + 9 раз). Эти 85.8%, которые мы получили с помощью очень простых рассуждений – это неплохая отправная точка (*baseline*) для дальнейших моделей машинного обучения, которые мы будем строить. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d6_n0ESntU8u" + }, + "source": [ + "В целом до появления машинного обучения процесс анализа данных выглядел примерно так. Прорезюмируем:\n", + " \n", + "- Доля лояльных клиентов в выборке – 85.5%. Самая наивная модель, ответ которой \"Клиент всегда лоялен\" на подобных данных будет угадывать примерно в 85.5% случаев. То есть доли правильных ответов (*accuracy*) последующих моделей должны быть как минимум не меньше, а лучше, значительно выше этой цифры;\n", + "- С помощью простого прогноза , который условно можно выразить такой формулой: \"International plan = True & Customer Service calls > 3 => Churn = 1, else Churn = 0\", можно ожидать долю угадываний 85.8%, что еще чуть выше 85.5%\n", + "- Эти два бейзлайна мы получили без всякого машинного обучения, и они служат отправной точной для наших последующих моделей. Если окажется, что мы громадными усилиями увеличиваем долю правильных ответов всего, скажем, на 0.5%, то возможно, мы что-то делаем не так, и достаточно ограничиться простой моделью из двух условий. \n", + "- Перед обучением сложных моделей рекомендуется немного покрутить данные и проверить простые предположения. Более того, в бизнес-приложениях машинного обучения чаще всего начинают именно с простых решений, а потом экспериментируют с их усложнением. " + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "collapsed_sections": [], + "name": "02_Pandas.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "name": "seminar02_part2_pandas.ipynb" + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/Pandas (06.03)/Pandas. Task. Part 2.ipynb b/Pandas (06.03)/Pandas. Task. Part 2.ipynb index bb60a1c..ceb2106 100644 --- a/Pandas (06.03)/Pandas. Task. Part 2.ipynb +++ b/Pandas (06.03)/Pandas. Task. Part 2.ipynb @@ -1 +1,671 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.8"},"colab":{"name":"02_pandas_task.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"EmV0s8YY05p7"},"source":["- __ID__ - Unique number for each athlete\n","- __Name__ - Athlete's name\n","- __Sex__ - M or F\n","- __Age__ - Integer\n","- __Height__ - In centimeters\n","- __Weight__ - In kilograms\n","- __Team__ - Team name\n","- __NOC__ - National Olympic Committee 3-letter code\n","- __Games__ - Year and season\n","- __Year__ - Integer\n","- __Season__ - Summer or Winter\n","- __City__ - Host city\n","- __Sport__ - Sport\n","- __Event__ - Event\n","- __Medal__ - Gold, Silver, Bronze, or NA"]},{"cell_type":"code","metadata":{"id":"rVCrMDMh05p_"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"D5Q4Z-JW05qC"},"source":["# не меняем путь!\n","PATH = 'https://github.com/aksenov7/Kaggle_competition_group/blob/master/athlete_events.csv.zip?raw=true'"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mI0LtqkY4Kp-"},"source":["__0. Откройте файл используя необходимые параметры и не меняя переменную PATH__"]},{"cell_type":"code","metadata":{"id":"h5SQwBLr05qG","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1615627554682,"user_tz":-300,"elapsed":2477,"user":{"displayName":"Александр Аксёнов","photoUrl":"https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg","userId":"11145992452404092449"}},"outputId":"882f9e83-5fd7-4c3b-b005-56917b15a0fd"},"source":["data = \n","data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
IDNameSexAgeHeightWeightTeamNOCGamesYearSeasonCitySportEventMedal
01A DijiangM24.0180.080.0ChinaCHN1992 Summer1992SummerBarcelonaBasketballBasketball Men's BasketballNaN
12A LamusiM23.0170.060.0ChinaCHN2012 Summer2012SummerLondonJudoJudo Men's Extra-LightweightNaN
23Gunnar Nielsen AabyM24.0NaNNaNDenmarkDEN1920 Summer1920SummerAntwerpenFootballFootball Men's FootballNaN
34Edgar Lindenau AabyeM34.0NaNNaNDenmark/SwedenDEN1900 Summer1900SummerParisTug-Of-WarTug-Of-War Men's Tug-Of-WarGold
45Christine Jacoba AaftinkF21.0185.082.0NetherlandsNED1988 Winter1988WinterCalgarySpeed SkatingSpeed Skating Women's 500 metresNaN
\n","
"],"text/plain":[" ID Name ... Event Medal\n","0 1 A Dijiang ... Basketball Men's Basketball NaN\n","1 2 A Lamusi ... Judo Men's Extra-Lightweight NaN\n","2 3 Gunnar Nielsen Aaby ... Football Men's Football NaN\n","3 4 Edgar Lindenau Aabye ... Tug-Of-War Men's Tug-Of-War Gold\n","4 5 Christine Jacoba Aaftink ... Speed Skating Women's 500 metres NaN\n","\n","[5 rows x 15 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"stYR4EbV05qP"},"source":["__1. Сколько лет было самым молодым мужчинам и женщинам-участникам Олимпийских игр 1992 года ?__\n","- 16 и 15\n","- 14 и 13 \n","- 13 и 11\n","- 11 и 12"]},{"cell_type":"code","metadata":{"id":"HgiqBXtb05qR"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GQ290dsi05qc"},"source":["__2. Каков был процент баскетболистов-мужчин среди всех мужчин-участников Олимпийских игр 2012 года? Округлите ответ до первого десятичного знака.__\n","\n","Здесь и далее при необходимости отбрасывайте дублированных спортсменов, чтобы считать только уникальных . \n","- 0.2\n","- 1.5 \n","- 2.5\n","- 7.7"]},{"cell_type":"code","metadata":{"id":"-fI5MqWP05qi"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"u5WrTgIC05qv"},"source":["__3. Каковы среднее и стандартное отклонение роста теннисисток, участвовавших в Олимпийских играх 2000 года? Округлите ответ до первого десятичного знака.__\n","\n","- 171.8 и 6.5\n","- 179.4 и 10\n","- 180.7 и 6.7\n","- 182.4 и 9.1 "]},{"cell_type":"code","metadata":{"id":"vsKTqn6405qw"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"xOOEzhNQ05qy"},"source":["__4. Найдите спортсмена, который участвовал в Олимпийских играх 2006 года, с наибольшим весом среди других участников той же Олимпиады. Каким спортом он или она занимался?__\n","\n","- Judo\n","- Bobsleigh \n","- Skeleton\n","- Boxing"]},{"cell_type":"code","metadata":{"id":"EkWD1Tnb05qz"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"UQzxZ3HT05q0"},"source":["__5. Сколько раз John Aalberg участвовал в Олимпийских играх в разные годы?__\n","\n","Один год - это один раз. Неважно сколько участий внутри одного года\n","- 0\n","- 1 \n","- 2\n","- 3 "]},{"cell_type":"code","metadata":{"id":"ZSfkdjPO05q0"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"8EnLcNrk05q3"},"source":["__6. Сколько золотых медалей по теннису выиграли спортсмены сборной Switzerland на Олимпиаде-2008? Считайте каждую медаль от каждого спортсмена.__\n","\n","- 0\n","- 1 \n","- 2\n","- 3 "]},{"cell_type":"code","metadata":{"id":"Y754OGI-05q3"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"v3h5sQF805q5"},"source":["__7. Правда ли, что на Олимпийских играх 2016 Spain выиграла меньше медалей, чем Италия?__ \n","\n","- Да\n","- Нет"]},{"cell_type":"code","metadata":{"id":"gqJqDi2605q7"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"kkSYL5mK05q-"},"source":["__8. К какой возрастной категории принадлежало наименьшее и наибольшее количество участников Олимпиады-2008?__\n","\n","- [45-55] и [25-35) соответственно\n","- [45-55] и [15-25) соответственно\n","- [35-45) и [25-35) соответственно\n","- [45-55] и [35-45) соответственно"]},{"cell_type":"code","metadata":{"id":"pMAQtW7i05q_"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"JQmJPiXv05rB"},"source":["__9. Правда ли, что в Atlanta проводились летние Олимпийские игры? Правда ли, что в Squaw Valley проводились зимние Олимпийские игры? ?__\n","\n","- Да, Да\n","- Да, Нет\n","- Нет, Да \n","- Нет, Нет "]},{"cell_type":"code","metadata":{"id":"UU66wRHC05rB"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4hxR5D-t05rF"},"source":["__10. Какова абсолютная разница между количеством уникальных видов спорта на Олимпиаде 1986 года и Олимпиаде 2002 года?__\n","\n","- 3 \n","- 10\n","- 15\n","- 27 "]},{"cell_type":"code","metadata":{"id":"WKIr-TR105rF"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "EmV0s8YY05p7" + }, + "source": [ + "- __ID__ - Unique number for each athlete\n", + "- __Name__ - Athlete's name\n", + "- __Sex__ - M or F\n", + "- __Age__ - Integer\n", + "- __Height__ - In centimeters\n", + "- __Weight__ - In kilograms\n", + "- __Team__ - Team name\n", + "- __NOC__ - National Olympic Committee 3-letter code\n", + "- __Games__ - Year and season\n", + "- __Year__ - Integer\n", + "- __Season__ - Summer or Winter\n", + "- __City__ - Host city\n", + "- __Sport__ - Sport\n", + "- __Event__ - Event\n", + "- __Medal__ - Gold, Silver, Bronze, or NA" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "rVCrMDMh05p_" + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "D5Q4Z-JW05qC" + }, + "outputs": [], + "source": [ + "# не меняем путь!\n", + "PATH = 'https://github.com/aksenov7/Kaggle_competition_group/blob/master/athlete_events.csv.zip?raw=true'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mI0LtqkY4Kp-" + }, + "source": [ + "__0. Откройте файл используя необходимые параметры и не меняя переменную PATH__" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "executionInfo": { + "elapsed": 2477, + "status": "ok", + "timestamp": 1615627554682, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh5.googleusercontent.com/-jOf_oDVHsg8/AAAAAAAAAAI/AAAAAAAAAFM/qwdbG0GW_To/s64/photo.jpg", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "h5SQwBLr05qG", + "outputId": "882f9e83-5fd7-4c3b-b005-56917b15a0fd" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDNameSexAgeHeightWeightTeamNOCGamesYearSeasonCitySportEventMedal
01A DijiangM24.0180.080.0ChinaCHN1992 Summer1992SummerBarcelonaBasketballBasketball Men's BasketballNaN
12A LamusiM23.0170.060.0ChinaCHN2012 Summer2012SummerLondonJudoJudo Men's Extra-LightweightNaN
23Gunnar Nielsen AabyM24.0NaNNaNDenmarkDEN1920 Summer1920SummerAntwerpenFootballFootball Men's FootballNaN
34Edgar Lindenau AabyeM34.0NaNNaNDenmark/SwedenDEN1900 Summer1900SummerParisTug-Of-WarTug-Of-War Men's Tug-Of-WarGold
45Christine Jacoba AaftinkF21.0185.082.0NetherlandsNED1988 Winter1988WinterCalgarySpeed SkatingSpeed Skating Women's 500 metresNaN
\n", + "
" + ], + "text/plain": [ + " ID Name Sex Age Height Weight Team \\\n", + "0 1 A Dijiang M 24.0 180.0 80.0 China \n", + "1 2 A Lamusi M 23.0 170.0 60.0 China \n", + "2 3 Gunnar Nielsen Aaby M 24.0 NaN NaN Denmark \n", + "3 4 Edgar Lindenau Aabye M 34.0 NaN NaN Denmark/Sweden \n", + "4 5 Christine Jacoba Aaftink F 21.0 185.0 82.0 Netherlands \n", + "\n", + " NOC Games Year Season City Sport \\\n", + "0 CHN 1992 Summer 1992 Summer Barcelona Basketball \n", + "1 CHN 2012 Summer 2012 Summer London Judo \n", + "2 DEN 1920 Summer 1920 Summer Antwerpen Football \n", + "3 DEN 1900 Summer 1900 Summer Paris Tug-Of-War \n", + "4 NED 1988 Winter 1988 Winter Calgary Speed Skating \n", + "\n", + " Event Medal \n", + "0 Basketball Men's Basketball NaN \n", + "1 Judo Men's Extra-Lightweight NaN \n", + "2 Football Men's Football NaN \n", + "3 Tug-Of-War Men's Tug-Of-War Gold \n", + "4 Speed Skating Women's 500 metres NaN " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(PATH, compression='zip')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "stYR4EbV05qP" + }, + "source": [ + "__1. Сколько лет было самым молодым мужчинам и женщинам-участникам Олимпийских игр 1992 года ?__\n", + "- 16 и 15\n", + "- 14 и 13 \n", + "- 13 и 11\n", + "- 11 и 12" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "id": "HgiqBXtb05qR" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Мужчина: 11\n", + "Женщина: 12\n" + ] + } + ], + "source": [ + "people = data[data['Year'] == 1992][['Sex', 'Age']]\n", + "\n", + "print('Мужчина: ', int(people[people['Sex'] == 'M']['Age'].min()))\n", + "print('Женщина: ', int(people[people['Sex'] == 'F']['Age'].min()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GQ290dsi05qc" + }, + "source": [ + "__2. Каков был процент баскетболистов-мужчин среди всех мужчин-участников Олимпийских игр 2012 года? Округлите ответ до первого десятичного знака.__\n", + "\n", + "Здесь и далее при необходимости отбрасывайте дублированных спортсменов, чтобы считать только уникальных . \n", + "- 0.2\n", + "- 1.5 \n", + "- 2.5\n", + "- 7.7" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "-fI5MqWP05qi" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.0\n" + ] + } + ], + "source": [ + "people = data[(data['Sex'] == 'M') & (data['Year'] == 2012)].drop_duplicates()\n", + "basket = people[people['Sport'] == 'Basketball']\n", + "print(round(len(basket) / len(people) * 100, 1))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u5WrTgIC05qv" + }, + "source": [ + "__3. Каковы среднее и стандартное отклонение роста теннисисток, участвовавших в Олимпийских играх 2000 года? Округлите ответ до первого десятичного знака.__\n", + "\n", + "- 171.8 и 6.5\n", + "- 179.4 и 10\n", + "- 180.7 и 6.7\n", + "- 182.4 и 9.1 " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "vsKTqn6405qw" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "171.8\n", + "6.5\n" + ] + } + ], + "source": [ + "female = data[(data['Sex'] == 'F') & (data['Year'] == 2000) & (data['Sport'] == 'Tennis')][['Height']]\n", + "print(round(female.mean()['Height'], 1))\n", + "print(round(female.std()['Height'], 1))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xOOEzhNQ05qy" + }, + "source": [ + "__4. Найдите спортсмена, который участвовал в Олимпийских играх 2006 года, с наибольшим весом среди других участников той же Олимпиады. Каким спортом он или она занимался?__\n", + "\n", + "- Judo\n", + "- Bobsleigh \n", + "- Skeleton\n", + "- Boxing" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "EkWD1Tnb05qz" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
WeightSexSport
8102127.0MSkeleton
\n", + "
" + ], + "text/plain": [ + " Weight Sex Sport\n", + "8102 127.0 M Skeleton" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "people = data[data['Year'] == 2006][['Weight', 'Sex', 'Sport']]\n", + "max_w = people['Weight'].max()\n", + "people[people['Weight'] == max_w]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UQzxZ3HT05q0" + }, + "source": [ + "__5. Сколько раз John Aalberg участвовал в Олимпийских играх в разные годы?__\n", + "\n", + "Один год - это один раз. Неважно сколько участий внутри одного года\n", + "- 0\n", + "- 1 \n", + "- 2\n", + "- 3 " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "ZSfkdjPO05q0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n" + ] + } + ], + "source": [ + "print(data[data['Name'] == 'John Aalberg']['Year'].drop_duplicates().count())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8EnLcNrk05q3" + }, + "source": [ + "__6. Сколько золотых медалей по теннису выиграли спортсмены сборной Switzerland на Олимпиаде-2008? Считайте каждую медаль от каждого спортсмена.__\n", + "\n", + "- 0\n", + "- 1 \n", + "- 2\n", + "- 3 " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "id": "Y754OGI-05q3" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n" + ] + } + ], + "source": [ + "people = data[(data['Year'] == 2008) & (data['Team'] == 'Switzerland') & (data['Medal'] == 'Gold') \n", + " & (data['Sport'] == 'Tennis')]\n", + "print(len(people))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v3h5sQF805q5" + }, + "source": [ + "__7. Правда ли, что на Олимпийских играх 2016 Spain выиграла меньше медалей, чем Италия?__ \n", + "\n", + "- Да\n", + "- Нет" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "id": "gqJqDi2605q7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "people = data[(data['Year'] == 2016) & (data['Medal'])][['Team', 'Medal']]\n", + "italy_count = people[people['Team'] == 'Italy']['Medal'].count()\n", + "spain_count = people[people['Team'] == 'Spain']['Medal'].count()\n", + "print(italy_count > spain_count)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kkSYL5mK05q-" + }, + "source": [ + "__8. К какой возрастной категории принадлежало наименьшее и наибольшее количество участников Олимпиады-2008?__\n", + "\n", + "- [45-55] и [25-35) соответственно\n", + "- [45-55] и [15-25) соответственно\n", + "- [35-45) и [25-35) соответственно\n", + "- [45-55] и [35-45) соответственно" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "id": "pMAQtW7i05q_" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[45-55] и [15-25) соответственно\n" + ] + } + ], + "source": [ + "people = data[(data['Year'] == 2008)][['Age']]\n", + "categories = {\n", + " \"[45-55]\" : len(people[(people['Age'] >= 45) & (people['Age'] <= 55)]),\n", + " \"[25-35)\" : len(people[(people['Age'] >= 25) & (people['Age'] < 35)]),\n", + " \"[15-25)\" : len(people[(people['Age'] >= 15) & (people['Age'] <= 25)]),\n", + " \"[35-45]\" : len(people[(people['Age'] >= 35) & (people['Age'] < 45)])\n", + "}\n", + "\n", + "print(f'{max(categories)} и {min(categories)} соответственно')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JQmJPiXv05rB" + }, + "source": [ + "__9. Правда ли, что в Atlanta проводились летние Олимпийские игры? Правда ли, что в Squaw Valley проводились зимние Олимпийские игры? ?__\n", + "\n", + "- Да, Да\n", + "- Да, Нет\n", + "- Нет, Да \n", + "- Нет, Нет " + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "id": "UU66wRHC05rB" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Да, Да\n" + ] + } + ], + "source": [ + "atlanta = len(data[(data['Season'] == 'Summer') & (data['City'] == 'Atlanta')])\n", + "squaw_valley = len(data[(data['Season'] == 'Winter') & (data['City'] == 'Squaw Valley')])\n", + "\n", + "print('Да,' if atlanta > 0 else 'Нет,', 'Да' if squaw_valley > 0 else 'Нет')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4hxR5D-t05rF" + }, + "source": [ + "__10. Какова абсолютная разница между количеством уникальных видов спорта на Олимпиаде 1986 года и Олимпиаде 2002 года?__\n", + "\n", + "- 3 \n", + "- 10\n", + "- 15\n", + "- 27 " + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "id": "WKIr-TR105rF" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "15\n" + ] + } + ], + "source": [ + "a = len(data[data[\"Year\"]==2002].drop_duplicates(subset=['Sport']))\n", + "b = len(data[data[\"Year\"]==1986].drop_duplicates(subset=['Sport']))\n", + "print(a - b)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "02_pandas_task.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From 89dc2ebee266731e6a2a6569474efdb5fcf9bea5 Mon Sep 17 00:00:00 2001 From: ooonush Date: Wed, 12 Oct 2022 19:50:17 +0500 Subject: [PATCH 5/5] r --- .idea/.gitignore | 8 + .../inspectionProfiles/profiles_settings.xml | 6 + .idea/misc.xml | 4 + .idea/modules.xml | 8 + .idea/ooonush_data_analysis_in_python.iml | 8 + .idea/vcs.xml | 6 + ...267\320\260\321\206\320\270\321\217.ipynb" | 105 +- ...206\320\270\321\217 \320\224\320\227.html" | 182 ++++ ...265\321\201\321\201\320\270\321\217.ipynb" | 732 +++++++++++++ ...06\320\270\321\217 \320\224\320\227.ipynb" | 999 ++++++++++++++++++ ...267\320\260\321\206\320\270\321\217.ipynb" | 125 +-- 11 files changed, 2013 insertions(+), 170 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/ooonush_data_analysis_in_python.iml create mode 100644 .idea/vcs.xml create mode 100644 "exportToHTML/\320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217 \320\224\320\227.html" create mode 100644 "my/\320\273\320\270\320\275\320\265\320\271\320\275\320\260\321\217 \321\200\320\265\320\263\321\200\320\265\321\201\321\201\320\270\321\217.ipynb" create mode 100644 "\320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217 \320\224\320\227.ipynb" diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..dc9ea49 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..d961b75 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/ooonush_data_analysis_in_python.iml b/.idea/ooonush_data_analysis_in_python.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/ooonush_data_analysis_in_python.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git "a/Pandas and EDA (12.03)/\320\237\320\265\321\200\320\262\320\270\321\207\320\275\321\213\320\271 \320\260\320\275\320\260\320\273\320\270\320\267 \320\264\320\260\320\275\320\275\321\213\321\205 (EDA). \320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217.ipynb" "b/Pandas and EDA (12.03)/\320\237\320\265\321\200\320\262\320\270\321\207\320\275\321\213\320\271 \320\260\320\275\320\260\320\273\320\270\320\267 \320\264\320\260\320\275\320\275\321\213\321\205 (EDA). \320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217.ipynb" index 12ac412..c649924 100644 --- "a/Pandas and EDA (12.03)/\320\237\320\265\321\200\320\262\320\270\321\207\320\275\321\213\320\271 \320\260\320\275\320\260\320\273\320\270\320\267 \320\264\320\260\320\275\320\275\321\213\321\205 (EDA). \320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217.ipynb" +++ "b/Pandas and EDA (12.03)/\320\237\320\265\321\200\320\262\320\270\321\207\320\275\321\213\320\271 \320\260\320\275\320\260\320\273\320\270\320\267 \320\264\320\260\320\275\320\275\321\213\321\205 (EDA). \320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217.ipynb" @@ -532,57 +532,6 @@ " plt.show()" ] }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 284 - }, - "executionInfo": { - "elapsed": 507, - "status": "ok", - "timestamp": 1633614688173, - "user": { - "displayName": "Александр Аксёнов", - "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GhmPE3kg2vafh4QNEoLX_DeI08tDxoR8I8MoJZP=s64", - "userId": "11145992452404092449" - }, - "user_tz": -300 - }, - "id": "G_yJAqux4pqE", - "outputId": "28193c74-e4b7-446f-e81f-04b85ae4e477" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD6CAYAAABNu5eFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAARCElEQVR4nO3df6zddX3H8edrpboFTVpEb7q2W1nWZemSieYGWewfV41QmK6YLA1k00ZJ6h/gMHFZ0H9wEhKXDN00jqRCY01U1kxZS9MMu44Txx8ioAwoyLhDCG1KG4eoVxMW2Ht/nG/dSXtv7+39cW57Ps9HcnO+3/f38/2ez7scXufb7/ne01QVkqQ2/NpyT0CSNDyGviQ1xNCXpIYY+pLUEENfkhpi6EtSQ2YN/STrk9yf5Mkkh5Pc1NU/neRokke7n6sH9vlkkskkTye5cqC+patNJrl5aVqSJM0ks92nn2QNsKaqvp/kjcAjwDXANmCqqv72lPGbgG8AlwG/Cfwr8Hvd5v8E3gscAR4CrquqJ2d67osvvrg2bNgwj7b6fvGLX3DhhRfOe//zSUu9gv2OspZ6haXp95FHHvlxVb15um0XzLZzVR0DjnXLP0/yFLD2DLtsBe6uqleAHyWZpP8GADBZVc8CJLm7Gztj6G/YsIGHH354tinOqNfrMTExMe/9zyct9Qr2O8pa6hWWpt8kz8+07ayu6SfZALwNeLAr3ZjksSS7kqzuamuBFwZ2O9LVZqpLkoZk1jP9k5K8Afgm8PGq+lmSO4Bbgeoebwc+stAJJdkB7AAYGxuj1+vN+1hTU1ML2v980lKvYL+jrKVeYfj9zin0k6ykH/hfq6pvAVTV8YHtXwb2d6tHgfUDu6/rapyh/itVtRPYCTA+Pl4L+WtPS39NbKlXsN9R1lKvMPx+53L3ToC7gKeq6nMD9TUDwz4APNEt7wOuTfL6JJcAG4Hv0f/gdmOSS5K8Dri2GytJGpK5nOm/E/gg8HiSR7vap4DrklxK//LOc8BHAarqcJI99D+gfRW4oapeA0hyI3AfsALYVVWHF7EXSdIs5nL3zgNAptl04Az73AbcNk39wJn2kyQtLX8jV5IaYuhLUkMMfUlqyJzv0z8fTZ6Y4vYvPnBa/d6PbV6G2UjS8vNMX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDVk1tBPsj7J/UmeTHI4yU1d/aIkB5M80z2u7upJ8oUkk0keS/L2gWNt78Y/k2T70rUlSZrOXM70XwU+UVWbgMuBG5JsAm4GDlXVRuBQtw5wFbCx+9kB3AH9NwngFuAdwGXALSffKCRJwzFr6FfVsar6frf8c+ApYC2wFdjdDdsNXNMtbwW+Wn3fBVYlWQNcCRysqpeq6ifAQWDLonYjSTqjs7qmn2QD8DbgQWCsqo51m14ExrrltcALA7sd6Woz1SVJQ3LBXAcmeQPwTeDjVfWzJL/aVlWVpBZjQkl20L8sxNjYGL1eb97HWrXyNbaOvXxafSHHPFdNTU2NZF8zsd/R1VKvMPx+5xT6SVbSD/yvVdW3uvLxJGuq6lh3+eZEVz8KrB/YfV1XOwpMnFLvnfpcVbUT2AkwPj5eExMTpw6Zszv37Gfv8VWn1e/dtnnexzxX9Xo9FvJndb6x39HVUq8w/H7ncvdOgLuAp6rqcwOb9gEn78DZDuwdqH+ou4vncuCn3WWg+4ArkqzuPsC9oqtJkoZkLmf67wQ+CDye5NGu9ings8CeJNcDzwPbum0HgKuBSeCXwIcBquqlJLcCD3XjPlNVLy1KF5KkOZk19KvqASAzbH7PNOMLuGGGY+0Cdp3NBCVJi8ffyJWkhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUkFlDP8muJCeSPDFQ+3SSo0ke7X6uHtj2ySSTSZ5OcuVAfUtXm0xy8+K3IkmazVzO9L8CbJmm/vmqurT7OQCQZBNwLfAH3T7/kGRFkhXAl4CrgE3Add1YSdIQXTDbgKr6TpINczzeVuDuqnoF+FGSSeCybttkVT0LkOTubuyTZz1jSdK8LeSa/o1JHusu/6zuamuBFwbGHOlqM9UlSUM065n+DO4AbgWqe7wd+MhiTCjJDmAHwNjYGL1eb97HWrXyNbaOvXxafSHHPFdNTU2NZF8zsd/R1VKvMPx+5xX6VXX85HKSLwP7u9WjwPqBoeu6Gmeon3rsncBOgPHx8ZqYmJjPFAG4c89+9h5fdVr93m2b533Mc1Wv12Mhf1bnG/sdXS31CsPvd16Xd5KsGVj9AHDyzp59wLVJXp/kEmAj8D3gIWBjkkuSvI7+h7375j9tSdJ8zHqmn+QbwARwcZIjwC3ARJJL6V/eeQ74KEBVHU6yh/4HtK8CN1TVa91xbgTuA1YAu6rq8KJ3I0k6o7ncvXPdNOW7zjD+NuC2aeoHgANnNTtJ0qLyN3IlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNWTW0E+yK8mJJE8M1C5KcjDJM93j6q6eJF9IMpnksSRvH9hnezf+mSTbl6YdSdKZzOVM/yvAllNqNwOHqmojcKhbB7gK2Nj97ADugP6bBHAL8A7gMuCWk28UkqThmTX0q+o7wEunlLcCu7vl3cA1A/WvVt93gVVJ1gBXAger6qWq+glwkNPfSCRJS2y+1/THqupYt/wiMNYtrwVeGBh3pKvNVJckDdEFCz1AVVWSWozJACTZQf/SEGNjY/R6vXkfa9XK19g69vJp9YUc81w1NTU1kn3NxH5HV0u9wvD7nW/oH0+ypqqOdZdvTnT1o8D6gXHrutpRYOKUem+6A1fVTmAnwPj4eE1MTEw3bE7u3LOfvcdXnVa/d9vmeR/zXNXr9VjIn9X5xn5HV0u9wvD7ne/lnX3AyTtwtgN7B+of6u7iuRz4aXcZ6D7giiSruw9wr+hqkqQhmvVMP8k36J+lX5zkCP27cD4L7ElyPfA8sK0bfgC4GpgEfgl8GKCqXkpyK/BQN+4zVXXqh8OSpCU2a+hX1XUzbHrPNGMLuGGG4+wCdp3V7CRJi8rfyJWkhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUkAWFfpLnkjye5NEkD3e1i5IcTPJM97i6qyfJF5JMJnksydsXowFJ0twtxpn+u6rq0qoa79ZvBg5V1UbgULcOcBWwsfvZAdyxCM8tSToLS3F5Zyuwu1veDVwzUP9q9X0XWJVkzRI8vyRpBgsN/QK+neSRJDu62lhVHeuWXwTGuuW1wAsD+x7papKkIblggftvrqqjSd4CHEzyw8GNVVVJ6mwO2L157AAYGxuj1+vNe3KrVr7G1rGXT6sv5JjnqqmpqZHsayb2O7pa6hWG3++CQr+qjnaPJ5LcA1wGHE+ypqqOdZdvTnTDjwLrB3Zf19VOPeZOYCfA+Ph4TUxMzHt+d+7Zz97jq06r37tt87yPea7q9Xos5M/qfGO/o6ulXmH4/c778k6SC5O88eQycAXwBLAP2N4N2w7s7Zb3AR/q7uK5HPjpwGUgSdIQLORMfwy4J8nJ43y9qv4lyUPAniTXA88D27rxB4CrgUngl8CHF/DckqR5mHfoV9WzwFunqf838J5p6gXcMN/nkyQtnL+RK0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDblguScgteL9X3zgrMbf+7HNZ3WcmcZLg4Ye+km2AH8PrADurKrPDnsO0nTONkzPNsTP1uDxt45Ncfsszzef+fhG0Z6hhn6SFcCXgPcCR4CHkuyrqieHOQ+NjqUO3mE9x3JZ6t7828q5Z9hn+pcBk1X1LECSu4GtgKEvYO4hNJczXy2/s31Tef8XH5j2v61vBotn2KG/FnhhYP0I8I4hz6FJo3y2qtF3Lr5+z9c3onPug9wkO4Ad3epUkqcXcLiLgR+f9hx/sYAjnrum7XVU7bffkXW+9LqIObIU/f72TBuGHfpHgfUD6+u62q9U1U5g52I8WZKHq2p8MY51rmupV7DfUdZSrzD8fod9n/5DwMYklyR5HXAtsG/Ic5CkZg31TL+qXk1yI3Af/Vs2d1XV4WHOQZJaNvRr+lV1ADgwpKdblMtE54mWegX7HWUt9QpD7jdVNcznkyQtI797R5IaMpKhn2RLkqeTTCa5ebnns9iS7EpyIskTA7WLkhxM8kz3uHo557hYkqxPcn+SJ5McTnJTVx/Vfn89yfeS/EfX71939UuSPNi9pv+xuxFiZCRZkeQHSfZ36yPbb5Lnkjye5NEkD3e1ob2eRy70B77q4SpgE3Bdkk3LO6tF9xVgyym1m4FDVbURONStj4JXgU9U1SbgcuCG7r/nqPb7CvDuqnorcCmwJcnlwN8An6+q3wV+Aly/jHNcCjcBTw2sj3q/76qqSwdu1Rza63nkQp+Br3qoqv8BTn7Vw8ioqu8AL51S3grs7pZ3A9cMdVJLpKqOVdX3u+Wf0w+GtYxuv1VVU93qyu6ngHcD/9TVR6ZfgCTrgD8G7uzWwwj3O4OhvZ5HMfSn+6qHtcs0l2Eaq6pj3fKLwNhyTmYpJNkAvA14kBHut7vU8ShwAjgI/BfwclW92g0Ztdf03wF/Bfxvt/4mRrvfAr6d5JHuGwhgiK/nc+5rGLRwVVVJRuq2rCRvAL4JfLyqftY/GewbtX6r6jXg0iSrgHuA31/mKS2ZJO8DTlTVI0kmlns+Q7K5qo4meQtwMMkPBzcu9et5FM/0Z/2qhxF1PMkagO7xxDLPZ9EkWUk/8L9WVd/qyiPb70lV9TJwP/BHwKokJ0/SRuk1/U7gT5I8R/9S7Lvp/3sbo9ovVXW0ezxB/039Mob4eh7F0G/1qx72Adu75e3A3mWcy6Lpru/eBTxVVZ8b2DSq/b65O8MnyW/Q/7cnnqIf/n/aDRuZfqvqk1W1rqo20P9/9d+q6s8Y0X6TXJjkjSeXgSuAJxji63kkfzkrydX0rxOe/KqH25Z5SosqyTeACfrfznccuAX4Z2AP8FvA88C2qjr1w97zTpLNwL8Dj/P/13w/Rf+6/ij2+4f0P8hbQf+kbE9VfSbJ79A/E74I+AHw51X1yvLNdPF1l3f+sqreN6r9dn3d061eAHy9qm5L8iaG9HoeydCXJE1vFC/vSJJmYOhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktSQ/wP2K3YaZ+1f0gAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# Примеры специальных функций. Гистограмма\n", - "df['Number vmail messages'].hist(bins=50, alpha=0.8)" - ] - }, { "cell_type": "code", "execution_count": 9, @@ -639,7 +588,7 @@ "id": "SqpJpNfy4pqI" }, "source": [ - "Диаграмма рассеяния позволяет анализировать **пары** параметров и выявлять их взаимосвязь " + "Диаграмма рассеяния позволяет анализировать **пары** параметров и выявлять их взаимосвязь" ] }, { @@ -1099,10 +1048,6 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "id": "PGZfPcWz4pqJ", - "outputId": "60091379-8ac7-479c-a43e-c5f5c6026241" - }, "outputs": [ { "data": { @@ -1133,7 +1078,51 @@ "# Диаграмма рассеяния в seaborn\n", "plt.figure(figsize=(15,10))\n", "sns.relplot(x=\"Total day calls\", y=\"Total intl calls\", hue=\"Churn\", size=\"International plan\", sizes=(40, 10), data=df);" - ] + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD6CAYAAABNu5eFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAARCElEQVR4nO3df6zddX3H8edrpboFTVpEb7q2W1nWZemSieYGWewfV41QmK6YLA1k00ZJ6h/gMHFZ0H9wEhKXDN00jqRCY01U1kxZS9MMu44Txx8ioAwoyLhDCG1KG4eoVxMW2Ht/nG/dSXtv7+39cW57Ps9HcnO+3/f38/2ez7scXufb7/ne01QVkqQ2/NpyT0CSNDyGviQ1xNCXpIYY+pLUEENfkhpi6EtSQ2YN/STrk9yf5Mkkh5Pc1NU/neRokke7n6sH9vlkkskkTye5cqC+patNJrl5aVqSJM0ks92nn2QNsKaqvp/kjcAjwDXANmCqqv72lPGbgG8AlwG/Cfwr8Hvd5v8E3gscAR4CrquqJ2d67osvvrg2bNgwj7b6fvGLX3DhhRfOe//zSUu9gv2OspZ6haXp95FHHvlxVb15um0XzLZzVR0DjnXLP0/yFLD2DLtsBe6uqleAHyWZpP8GADBZVc8CJLm7Gztj6G/YsIGHH354tinOqNfrMTExMe/9zyct9Qr2O8pa6hWWpt8kz8+07ayu6SfZALwNeLAr3ZjksSS7kqzuamuBFwZ2O9LVZqpLkoZk1jP9k5K8Afgm8PGq+lmSO4Bbgeoebwc+stAJJdkB7AAYGxuj1+vN+1hTU1ML2v980lKvYL+jrKVeYfj9zin0k6ykH/hfq6pvAVTV8YHtXwb2d6tHgfUDu6/rapyh/itVtRPYCTA+Pl4L+WtPS39NbKlXsN9R1lKvMPx+53L3ToC7gKeq6nMD9TUDwz4APNEt7wOuTfL6JJcAG4Hv0f/gdmOSS5K8Dri2GytJGpK5nOm/E/gg8HiSR7vap4DrklxK//LOc8BHAarqcJI99D+gfRW4oapeA0hyI3AfsALYVVWHF7EXSdIs5nL3zgNAptl04Az73AbcNk39wJn2kyQtLX8jV5IaYuhLUkMMfUlqyJzv0z8fTZ6Y4vYvPnBa/d6PbV6G2UjS8vNMX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDVk1tBPsj7J/UmeTHI4yU1d/aIkB5M80z2u7upJ8oUkk0keS/L2gWNt78Y/k2T70rUlSZrOXM70XwU+UVWbgMuBG5JsAm4GDlXVRuBQtw5wFbCx+9kB3AH9NwngFuAdwGXALSffKCRJwzFr6FfVsar6frf8c+ApYC2wFdjdDdsNXNMtbwW+Wn3fBVYlWQNcCRysqpeq6ifAQWDLonYjSTqjs7qmn2QD8DbgQWCsqo51m14ExrrltcALA7sd6Woz1SVJQ3LBXAcmeQPwTeDjVfWzJL/aVlWVpBZjQkl20L8sxNjYGL1eb97HWrXyNbaOvXxafSHHPFdNTU2NZF8zsd/R1VKvMPx+5xT6SVbSD/yvVdW3uvLxJGuq6lh3+eZEVz8KrB/YfV1XOwpMnFLvnfpcVbUT2AkwPj5eExMTpw6Zszv37Gfv8VWn1e/dtnnexzxX9Xo9FvJndb6x39HVUq8w/H7ncvdOgLuAp6rqcwOb9gEn78DZDuwdqH+ou4vncuCn3WWg+4ArkqzuPsC9oqtJkoZkLmf67wQ+CDye5NGu9ings8CeJNcDzwPbum0HgKuBSeCXwIcBquqlJLcCD3XjPlNVLy1KF5KkOZk19KvqASAzbH7PNOMLuGGGY+0Cdp3NBCVJi8ffyJWkhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUkFlDP8muJCeSPDFQ+3SSo0ke7X6uHtj2ySSTSZ5OcuVAfUtXm0xy8+K3IkmazVzO9L8CbJmm/vmqurT7OQCQZBNwLfAH3T7/kGRFkhXAl4CrgE3Add1YSdIQXTDbgKr6TpINczzeVuDuqnoF+FGSSeCybttkVT0LkOTubuyTZz1jSdK8LeSa/o1JHusu/6zuamuBFwbGHOlqM9UlSUM065n+DO4AbgWqe7wd+MhiTCjJDmAHwNjYGL1eb97HWrXyNbaOvXxafSHHPFdNTU2NZF8zsd/R1VKvMPx+5xX6VXX85HKSLwP7u9WjwPqBoeu6Gmeon3rsncBOgPHx8ZqYmJjPFAG4c89+9h5fdVr93m2b533Mc1Wv12Mhf1bnG/sdXS31CsPvd16Xd5KsGVj9AHDyzp59wLVJXp/kEmAj8D3gIWBjkkuSvI7+h7375j9tSdJ8zHqmn+QbwARwcZIjwC3ARJJL6V/eeQ74KEBVHU6yh/4HtK8CN1TVa91xbgTuA1YAu6rq8KJ3I0k6o7ncvXPdNOW7zjD+NuC2aeoHgANnNTtJ0qLyN3IlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNWTW0E+yK8mJJE8M1C5KcjDJM93j6q6eJF9IMpnksSRvH9hnezf+mSTbl6YdSdKZzOVM/yvAllNqNwOHqmojcKhbB7gK2Nj97ADugP6bBHAL8A7gMuCWk28UkqThmTX0q+o7wEunlLcCu7vl3cA1A/WvVt93gVVJ1gBXAger6qWq+glwkNPfSCRJS2y+1/THqupYt/wiMNYtrwVeGBh3pKvNVJckDdEFCz1AVVWSWozJACTZQf/SEGNjY/R6vXkfa9XK19g69vJp9YUc81w1NTU1kn3NxH5HV0u9wvD7nW/oH0+ypqqOdZdvTnT1o8D6gXHrutpRYOKUem+6A1fVTmAnwPj4eE1MTEw3bE7u3LOfvcdXnVa/d9vmeR/zXNXr9VjIn9X5xn5HV0u9wvD7ne/lnX3AyTtwtgN7B+of6u7iuRz4aXcZ6D7giiSruw9wr+hqkqQhmvVMP8k36J+lX5zkCP27cD4L7ElyPfA8sK0bfgC4GpgEfgl8GKCqXkpyK/BQN+4zVXXqh8OSpCU2a+hX1XUzbHrPNGMLuGGG4+wCdp3V7CRJi8rfyJWkhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUkAWFfpLnkjye5NEkD3e1i5IcTPJM97i6qyfJF5JMJnksydsXowFJ0twtxpn+u6rq0qoa79ZvBg5V1UbgULcOcBWwsfvZAdyxCM8tSToLS3F5Zyuwu1veDVwzUP9q9X0XWJVkzRI8vyRpBgsN/QK+neSRJDu62lhVHeuWXwTGuuW1wAsD+x7papKkIblggftvrqqjSd4CHEzyw8GNVVVJ6mwO2L157AAYGxuj1+vNe3KrVr7G1rGXT6sv5JjnqqmpqZHsayb2O7pa6hWG3++CQr+qjnaPJ5LcA1wGHE+ypqqOdZdvTnTDjwLrB3Zf19VOPeZOYCfA+Ph4TUxMzHt+d+7Zz97jq06r37tt87yPea7q9Xos5M/qfGO/o6ulXmH4/c778k6SC5O88eQycAXwBLAP2N4N2w7s7Zb3AR/q7uK5HPjpwGUgSdIQLORMfwy4J8nJ43y9qv4lyUPAniTXA88D27rxB4CrgUngl8CHF/DckqR5mHfoV9WzwFunqf838J5p6gXcMN/nkyQtnL+RK0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDblguScgteL9X3zgrMbf+7HNZ3WcmcZLg4Ye+km2AH8PrADurKrPDnsO0nTONkzPNsTP1uDxt45Ncfsszzef+fhG0Z6hhn6SFcCXgPcCR4CHkuyrqieHOQ+NjqUO3mE9x3JZ6t7828q5Z9hn+pcBk1X1LECSu4GtgKEvYO4hNJczXy2/s31Tef8XH5j2v61vBotn2KG/FnhhYP0I8I4hz6FJo3y2qtF3Lr5+z9c3onPug9wkO4Ad3epUkqcXcLiLgR+f9hx/sYAjnrum7XVU7bffkXW+9LqIObIU/f72TBuGHfpHgfUD6+u62q9U1U5g52I8WZKHq2p8MY51rmupV7DfUdZSrzD8fod9n/5DwMYklyR5HXAtsG/Ic5CkZg31TL+qXk1yI3Af/Vs2d1XV4WHOQZJaNvRr+lV1ADgwpKdblMtE54mWegX7HWUt9QpD7jdVNcznkyQtI797R5IaMpKhn2RLkqeTTCa5ebnns9iS7EpyIskTA7WLkhxM8kz3uHo557hYkqxPcn+SJ5McTnJTVx/Vfn89yfeS/EfX71939UuSPNi9pv+xuxFiZCRZkeQHSfZ36yPbb5Lnkjye5NEkD3e1ob2eRy70B77q4SpgE3Bdkk3LO6tF9xVgyym1m4FDVbURONStj4JXgU9U1SbgcuCG7r/nqPb7CvDuqnorcCmwJcnlwN8An6+q3wV+Aly/jHNcCjcBTw2sj3q/76qqSwdu1Rza63nkQp+Br3qoqv8BTn7Vw8ioqu8AL51S3grs7pZ3A9cMdVJLpKqOVdX3u+Wf0w+GtYxuv1VVU93qyu6ngHcD/9TVR6ZfgCTrgD8G7uzWwwj3O4OhvZ5HMfSn+6qHtcs0l2Eaq6pj3fKLwNhyTmYpJNkAvA14kBHut7vU8ShwAjgI/BfwclW92g0Ztdf03wF/Bfxvt/4mRrvfAr6d5JHuGwhgiK/nc+5rGLRwVVVJRuq2rCRvAL4JfLyqftY/GewbtX6r6jXg0iSrgHuA31/mKS2ZJO8DTlTVI0kmlns+Q7K5qo4meQtwMMkPBzcu9et5FM/0Z/2qhxF1PMkagO7xxDLPZ9EkWUk/8L9WVd/qyiPb70lV9TJwP/BHwKokJ0/SRuk1/U7gT5I8R/9S7Lvp/3sbo9ovVXW0ezxB/039Mob4eh7F0G/1qx72Adu75e3A3mWcy6Lpru/eBTxVVZ8b2DSq/b65O8MnyW/Q/7cnnqIf/n/aDRuZfqvqk1W1rqo20P9/9d+q6s8Y0X6TXJjkjSeXgSuAJxji63kkfzkrydX0rxOe/KqH25Z5SosqyTeACfrfznccuAX4Z2AP8FvA88C2qjr1w97zTpLNwL8Dj/P/13w/Rf+6/ij2+4f0P8hbQf+kbE9VfSbJ79A/E74I+AHw51X1yvLNdPF1l3f+sqreN6r9dn3d061eAHy9qm5L8iaG9HoeydCXJE1vFC/vSJJmYOhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktSQ/wP2K3YaZ+1f0gAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Примеры специальных функций. Гистограмма\n", + "df['Number vmail messages'].hist(bins=50, alpha=0.8)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } }, { "cell_type": "markdown", @@ -3613,4 +3602,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} +} \ No newline at end of file diff --git "a/exportToHTML/\320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217 \320\224\320\227.html" "b/exportToHTML/\320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217 \320\224\320\227.html" new file mode 100644 index 0000000..11f0142 --- /dev/null +++ "b/exportToHTML/\320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217 \320\224\320\227.html" @@ -0,0 +1,182 @@ + + +Визуализация ДЗ.ipynb + + + + + +
+ +Визуализация ДЗ.ipynb +
+
#%% md 
+**Импорт всех необходимых библиотек** 
+#%% 
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+pd.set_option("display.max_rows", 20)
+pd.set_option("display.max_columns", 20)
+pd.set_option("display.precision", 4)
+pd.set_option("plotting.backend", "matplotlib")
+
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, RobustScaler
+#%% md 
+# 1. Исследовательский анализ данных (exploratory data analysis - EDA) 
+#%% md 
+## 1.1 Словесное описание признаков 
+#%% md 
+- __ID__ - Уникальный номер спортсмена 
+- __Name__ - ФИО спортсмена 
+- __Sex__ - Пол: M or F - Мужчина или женщина 
+- __Age__ - Возраст спортсмена 
+- __Height__ - Рост в см. 
+- __Weight__ - Вес в кг. 
+- __Team__ - Название команды 
+- __NOC__ - 3-буквенный код Национального олимпийского комитета 
+- __Games__ - Год и сезон проведения олимпиады, напр: Summer 2000 или Winter 2000 
+- __Year__ - Год проведения олимпиады 
+- __Season__ - Сезон проведения олимпиады: Summer, Winter 
+- __City__ - Город проведения олимпиады 
+- __Sport__ - Название вида спорта 
+- __Event__ - Название мероприятия 
+- __Medal__ - Медаль: Gold, Silver, Bronze, or NA 
+#%% md 
+## 1.2 Загрузка данных общее описание набора данных 
+#%% 
+PATH = 'https://github.com/aksenov7/Kaggle_competition_group/blob/master/athlete_events.csv.zip?raw=true'
+df = pd.read_csv(PATH, compression='zip')
+df.head()
+#%% 
+df.shape
+#%% 
+df.info()
+#%% md 
+- Видим большое кол-во пропусков Medal, но будем считать, что это люди, которые не получили медаль. 
+- Что более существенно, так это пропуски по Weight, Height, Age 
+ 
+#%% 
+df.Sex.value_counts()
+#%% 
+df.Sex.value_counts(normalize=True)
+#%% md 
+Видим преобладание мужского пола 
+#%% 
+df.describe()
+#%% md 
+**Первичные выводы по числовым данным** 
+* Возраст людей в выборке от 10 до 97 лет. В среднем возраст варьируется от 19 до 32 лет. Младшие 21 покрывают 25%, 21-28 летние 50%. Остальные 25% это люди от 28 до 97. 
+* Рост от 127 до 226 см. В среднем 175. СКО = 10. 
+* Вес от 25 до 214 кг. В среднем 70.7 кг. 
+* Год проведения лучше рассмотреть отдельно. Но можно сказать, что данные приведены с 1896-2016г. 
+#%% 
+df.describe(include=object)
+#%% md 
+**Первичные выводы по строковым данным** 
+* 1184 уникальных команд. Самыми активными являются United States - они поучаствовали в 17847 соревнованиях. 
+* Самым же активным спортсменом является Robert Tail McKenzie - 58 участий. 
+* Самой популярной игрой является 2000 Summer - 13821 участников. 
+* В выборке преобладают мужчины - 72.5%. 
+* Как уже говорилось. Больше всего игр проводятся летом - 82%. 
+* Самым популярным видом спорта является Athletics - 38624 человека. 
+* Самый популярный Event - Football Men's Football. Всего различных ивентов - 765. 
+#%% md 
+### Медали: 
+Теперь посмотрим на медалистов 
+#%% 
+golds = df[(df['Medal'] == 'Gold')]
+golds.Team.describe(include=object)
+#%% md 
+* United States выигрывали золото чаще других. 
+#%% 
+df.groupby('Medal').describe(include=object)['Name']
+#%% md 
+* Больше всего Gold получал Michael Fred Phelps, II - 23 раза. 
+* Silver - Mikhail Yakovlevich Voronin - 6 раз. 
+* Bronze - Heikki Ilmari Savolainen - 6 раз. 
+#%% 
+df.groupby('Medal').describe(include=object)['Team']
+#%% md 
+* United States получали медали чаще остальных. И почти половина из них Gold 
+#%% 
+no_medal = df[df['Medal'].isna()]
+no_medal.describe(include=object)
+#%% md 
+* United States не получали медали чаще остальных. 
+* Robert Tait McKenzie не получал медали чаще остальных - 57 раз. 
+#%% md 
+
+#%% md 
+## 1.3 Визуальный и статистический анализ данных 
+#%% 
+#Кол-во мужчин и женщин по возрасту
+plt.figure(figsize=(18,8))
+sns.histplot(data=df, x='Age', hue='Sex')
+#%% md 
+* Распределение нормальное, что и можно было ожидать. 
+* Видно, что более молодых женщин (до 18 лет) больше, чем мужчин. 
+#%% md 
+### По годам 
+#%% 
+#Средний возраст по годам
+gr = df.groupby('Year').mean()
+plt.figure(figsize=(18,8))
+gr['Age'].plot()
+#%% md 
+Из графика видно, что ранее 195-ых в основном преобладали люди с возрастом ~28, к 1980 году средний возраст упал до 24, а потом опять начал расти. 
+#%% 
+#Кол-во мужчин и женщин по годам
+silver = df
+plt.figure(figsize=(18,8))
+sns.histplot(x="Year", hue="Sex", data=df)
+#%% md 
+### По времени года 
+#%% 
+#Кол-во мужчин и женщин с Gold по возрасту
+plt.figure(figsize=(18, 8))
+sns.countplot(data=df, x='Year', hue='Season')
+#%% md 
+Зимние виды спорта проводятся реже. А до 1924 они вообще не проводились. 
+#%% md 
+### Корреляции 
+#%% 
+df.corr()
+#%% 
+# Посмотрим на тепловую карту
+plt.figure(figsize=(8,8))
+sns.heatmap(df.corr(), annot=True, cmap="YlGnBu", cbar=False);
+#%% md 
+* Коэф. корреляции между Height и Weight равен 0.8, что ожидаемо 
+* Также немного коррелируют между собой Age и Height или Weight 
+* Остальные данные вообще не коррелируют между собой 
+#%% 
+df.plot.scatter(x='Height', y='Weight')
+#%% 
+# Количество медалей по командам
+plt.figure(figsize=(18,9))
+medals = df.groupby('Team')['Medal'].describe(include=object)['count'].sort_values(ascending=False)[:50]
+medals.plot()
+medals
+#%% md 
+Количество полученных медалей 50 лучших команд. 
+Видно, что United States получили больше всех - 5219, а второе место аж в два раза меньше - 2451. 
+#%% md 
+## Интересные факты 
+* Возраст самого старого спортсмена 97 лет 
+* Возраст самого молодого - 10 лет 
+* Наименьший вес спортсмена - 25 кг 
+* Раньше спортсменок почти не было, но к настоящему моменту наблюдается тенденция равного кол-ва мужчин и женщин 
+* До 1992 года олимпиады проводились каждые 4 года, но начиная с 1992 стали проводить их каждые 2 года, причем каждая вторая олимпиада была менее "масштабной". 
+* United States выигрывали медали 5219 раз. Это более чем в два раза больше следующей по счету команды.
+ + \ No newline at end of file diff --git "a/my/\320\273\320\270\320\275\320\265\320\271\320\275\320\260\321\217 \321\200\320\265\320\263\321\200\320\265\321\201\321\201\320\270\321\217.ipynb" "b/my/\320\273\320\270\320\275\320\265\320\271\320\275\320\260\321\217 \321\200\320\265\320\263\321\200\320\265\321\201\321\201\320\270\321\217.ipynb" new file mode 100644 index 0000000..847a5c7 --- /dev/null +++ "b/my/\320\273\320\270\320\275\320\265\320\271\320\275\320\260\321\217 \321\200\320\265\320\263\321\200\320\265\321\201\321\201\320\270\321\217.ipynb" @@ -0,0 +1,732 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 90, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.datasets import load_boston\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import r2_score\n", + "from sklearn import metrics\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\ooonu\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\utils\\deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.\n", + "\n", + " The Boston housing prices dataset has an ethical problem. You can refer to\n", + " the documentation of this function for further details.\n", + "\n", + " The scikit-learn maintainers therefore strongly discourage the use of this\n", + " dataset unless the purpose of the code is to study and educate about\n", + " ethical issues in data science and machine learning.\n", + "\n", + " In this special case, you can fetch the dataset from the original\n", + " source::\n", + "\n", + " import pandas as pd\n", + " import numpy as np\n", + "\n", + "\n", + " data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n", + " raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n", + " data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n", + " target = raw_df.values[1::2, 2]\n", + "\n", + " Alternative datasets include the California housing dataset (i.e.\n", + " :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n", + " dataset. You can load the datasets as follows::\n", + "\n", + " from sklearn.datasets import fetch_california_housing\n", + " housing = fetch_california_housing()\n", + "\n", + " for the California housing dataset and::\n", + "\n", + " from sklearn.datasets import fetch_openml\n", + " housing = fetch_openml(name=\"house_prices\", as_frame=True)\n", + "\n", + " for the Ames housing dataset.\n", + " \n", + " warnings.warn(msg, category=FutureWarning)\n" + ] + } + ], + "source": [ + "boston = load_boston()\n", + "features = boston.data\n", + "target = boston.target\n", + "features_name = boston.feature_names" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 282, + "outputs": [], + "source": [ + "def test_linear_regression(X, y):\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + " l_reg = LinearRegression()\n", + " l_reg.fit(X_train, y_train)\n", + " l_reg.score(X_test, y_test)\n", + "\n", + " l_reg = LinearRegression()\n", + " l_reg.fit(X_train, y_train)\n", + " l_reg.score(X_test, y_test)\n", + "\n", + " #print('r2_score: ', l_reg.score(X_test, y_test))\n", + " return (l_reg, l_reg.score(X_test, y_test))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 94, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "r2_score: 0.6687594935356278\n" + ] + } + ], + "source": [ + "test_linear_regression(features, target)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Нормализуем значения" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 97, + "outputs": [], + "source": [ + "from sklearn import preprocessing" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Проведем EDA" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 103, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 303, + "outputs": [ + { + "data": { + "text/plain": " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n\n PTRATIO B LSTAT TARGET \n0 15.3 396.90 4.98 24.0 \n1 17.8 396.90 9.14 21.6 \n2 17.8 392.83 4.03 34.7 \n3 18.7 394.63 2.94 33.4 \n4 18.7 396.90 5.33 36.2 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATTARGET
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.9824.0
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.1421.6
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.0334.7
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.9433.4
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.3336.2
\n
" + }, + "execution_count": 303, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(np.column_stack((features, target)), columns=np.hstack((features_name, ['TARGET'])))\n", + "df.head()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 101, + "outputs": [ + { + "data": { + "text/plain": " CRIM ZN INDUS CHAS NOX RM \\\ncount 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 \nmean 3.613524 11.363636 11.136779 0.069170 0.554695 6.284634 \nstd 8.601545 23.322453 6.860353 0.253994 0.115878 0.702617 \nmin 0.006320 0.000000 0.460000 0.000000 0.385000 3.561000 \n25% 0.082045 0.000000 5.190000 0.000000 0.449000 5.885500 \n50% 0.256510 0.000000 9.690000 0.000000 0.538000 6.208500 \n75% 3.677083 12.500000 18.100000 0.000000 0.624000 6.623500 \nmax 88.976200 100.000000 27.740000 1.000000 0.871000 8.780000 \n\n AGE DIS RAD TAX PTRATIO B \\\ncount 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 \nmean 68.574901 3.795043 9.549407 408.237154 18.455534 356.674032 \nstd 28.148861 2.105710 8.707259 168.537116 2.164946 91.294864 \nmin 2.900000 1.129600 1.000000 187.000000 12.600000 0.320000 \n25% 45.025000 2.100175 4.000000 279.000000 17.400000 375.377500 \n50% 77.500000 3.207450 5.000000 330.000000 19.050000 391.440000 \n75% 94.075000 5.188425 24.000000 666.000000 20.200000 396.225000 \nmax 100.000000 12.126500 24.000000 711.000000 22.000000 396.900000 \n\n LSTAT TARGET \ncount 506.000000 506.000000 \nmean 12.653063 22.532806 \nstd 7.141062 9.197104 \nmin 1.730000 5.000000 \n25% 6.950000 17.025000 \n50% 11.360000 21.200000 \n75% 16.955000 25.000000 \nmax 37.970000 50.000000 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATTARGET
count506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000506.000000
mean3.61352411.36363611.1367790.0691700.5546956.28463468.5749013.7950439.549407408.23715418.455534356.67403212.65306322.532806
std8.60154523.3224536.8603530.2539940.1158780.70261728.1488612.1057108.707259168.5371162.16494691.2948647.1410629.197104
min0.0063200.0000000.4600000.0000000.3850003.5610002.9000001.1296001.000000187.00000012.6000000.3200001.7300005.000000
25%0.0820450.0000005.1900000.0000000.4490005.88550045.0250002.1001754.000000279.00000017.400000375.3775006.95000017.025000
50%0.2565100.0000009.6900000.0000000.5380006.20850077.5000003.2074505.000000330.00000019.050000391.44000011.36000021.200000
75%3.67708312.50000018.1000000.0000000.6240006.62350094.0750005.18842524.000000666.00000020.200000396.22500016.95500025.000000
max88.976200100.00000027.7400001.0000000.8710008.780000100.00000012.12650024.000000711.00000022.000000396.90000037.97000050.000000
\n
" + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 110, + "outputs": [ + { + "data": { + "text/plain": " CRIM ZN INDUS CHAS NOX RM AGE \\\nCRIM 1.000000 -0.200469 0.406583 -0.055892 0.420972 -0.219247 0.352734 \nZN -0.200469 1.000000 -0.533828 -0.042697 -0.516604 0.311991 -0.569537 \nINDUS 0.406583 -0.533828 1.000000 0.062938 0.763651 -0.391676 0.644779 \nCHAS -0.055892 -0.042697 0.062938 1.000000 0.091203 0.091251 0.086518 \nNOX 0.420972 -0.516604 0.763651 0.091203 1.000000 -0.302188 0.731470 \nRM -0.219247 0.311991 -0.391676 0.091251 -0.302188 1.000000 -0.240265 \nAGE 0.352734 -0.569537 0.644779 0.086518 0.731470 -0.240265 1.000000 \nDIS -0.379670 0.664408 -0.708027 -0.099176 -0.769230 0.205246 -0.747881 \nRAD 0.625505 -0.311948 0.595129 -0.007368 0.611441 -0.209847 0.456022 \nTAX 0.582764 -0.314563 0.720760 -0.035587 0.668023 -0.292048 0.506456 \nPTRATIO 0.289946 -0.391679 0.383248 -0.121515 0.188933 -0.355501 0.261515 \nB -0.385064 0.175520 -0.356977 0.048788 -0.380051 0.128069 -0.273534 \nLSTAT 0.455621 -0.412995 0.603800 -0.053929 0.590879 -0.613808 0.602339 \nTARGET -0.388305 0.360445 -0.483725 0.175260 -0.427321 0.695360 -0.376955 \n\n DIS RAD TAX PTRATIO B LSTAT TARGET \nCRIM -0.379670 0.625505 0.582764 0.289946 -0.385064 0.455621 -0.388305 \nZN 0.664408 -0.311948 -0.314563 -0.391679 0.175520 -0.412995 0.360445 \nINDUS -0.708027 0.595129 0.720760 0.383248 -0.356977 0.603800 -0.483725 \nCHAS -0.099176 -0.007368 -0.035587 -0.121515 0.048788 -0.053929 0.175260 \nNOX -0.769230 0.611441 0.668023 0.188933 -0.380051 0.590879 -0.427321 \nRM 0.205246 -0.209847 -0.292048 -0.355501 0.128069 -0.613808 0.695360 \nAGE -0.747881 0.456022 0.506456 0.261515 -0.273534 0.602339 -0.376955 \nDIS 1.000000 -0.494588 -0.534432 -0.232471 0.291512 -0.496996 0.249929 \nRAD -0.494588 1.000000 0.910228 0.464741 -0.444413 0.488676 -0.381626 \nTAX -0.534432 0.910228 1.000000 0.460853 -0.441808 0.543993 -0.468536 \nPTRATIO -0.232471 0.464741 0.460853 1.000000 -0.177383 0.374044 -0.507787 \nB 0.291512 -0.444413 -0.441808 -0.177383 1.000000 -0.366087 0.333461 \nLSTAT -0.496996 0.488676 0.543993 0.374044 -0.366087 1.000000 -0.737663 \nTARGET 0.249929 -0.381626 -0.468536 -0.507787 0.333461 -0.737663 1.000000 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATTARGET
CRIM1.000000-0.2004690.406583-0.0558920.420972-0.2192470.352734-0.3796700.6255050.5827640.289946-0.3850640.455621-0.388305
ZN-0.2004691.000000-0.533828-0.042697-0.5166040.311991-0.5695370.664408-0.311948-0.314563-0.3916790.175520-0.4129950.360445
INDUS0.406583-0.5338281.0000000.0629380.763651-0.3916760.644779-0.7080270.5951290.7207600.383248-0.3569770.603800-0.483725
CHAS-0.055892-0.0426970.0629381.0000000.0912030.0912510.086518-0.099176-0.007368-0.035587-0.1215150.048788-0.0539290.175260
NOX0.420972-0.5166040.7636510.0912031.000000-0.3021880.731470-0.7692300.6114410.6680230.188933-0.3800510.590879-0.427321
RM-0.2192470.311991-0.3916760.091251-0.3021881.000000-0.2402650.205246-0.209847-0.292048-0.3555010.128069-0.6138080.695360
AGE0.352734-0.5695370.6447790.0865180.731470-0.2402651.000000-0.7478810.4560220.5064560.261515-0.2735340.602339-0.376955
DIS-0.3796700.664408-0.708027-0.099176-0.7692300.205246-0.7478811.000000-0.494588-0.534432-0.2324710.291512-0.4969960.249929
RAD0.625505-0.3119480.595129-0.0073680.611441-0.2098470.456022-0.4945881.0000000.9102280.464741-0.4444130.488676-0.381626
TAX0.582764-0.3145630.720760-0.0355870.668023-0.2920480.506456-0.5344320.9102281.0000000.460853-0.4418080.543993-0.468536
PTRATIO0.289946-0.3916790.383248-0.1215150.188933-0.3555010.261515-0.2324710.4647410.4608531.000000-0.1773830.374044-0.507787
B-0.3850640.175520-0.3569770.048788-0.3800510.128069-0.2735340.291512-0.444413-0.441808-0.1773831.000000-0.3660870.333461
LSTAT0.455621-0.4129950.603800-0.0539290.590879-0.6138080.602339-0.4969960.4886760.5439930.374044-0.3660871.000000-0.737663
TARGET-0.3883050.360445-0.4837250.175260-0.4273210.695360-0.3769550.249929-0.381626-0.468536-0.5077870.333461-0.7376631.000000
\n
" + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cor = df.corr()\n", + "cor" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 115, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10,8))\n", + "sns.heatmap(cor)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 111, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Посмотрим на тепловую карту\n", + "plt.figure(figsize=(8,8))\n", + "sns.heatmap(df.corr(), annot=True, cmap=\"YlGnBu\", cbar=False);" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 117, + "outputs": [], + "source": [ + "def search_outliers(feature):\n", + " \"\"\"Функция принимает набор значений 1-го признака и\n", + " возвращает массив индексов тех значений, которые являются выбросами\"\"\"\n", + " q1, q3 = np.percentile(feature, [25, 75])\n", + "\n", + " iqr = q3 - q1\n", + " lower_bound = q1 - 1.5 * iqr\n", + " upper_bound = q3 + 1.5 * iqr\n", + " return np.where((feature < lower_bound) | (feature > upper_bound))[0]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 128, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Признак CRIM содержит 66 выбросов из 506 наблюдений\n", + "Признак ZN содержит 68 выбросов из 506 наблюдений\n", + "Признак INDUS содержит 0 выбросов из 506 наблюдений\n", + "Признак CHAS содержит 35 выбросов из 506 наблюдений\n", + "Признак NOX содержит 0 выбросов из 506 наблюдений\n", + "Признак RM содержит 30 выбросов из 506 наблюдений\n", + "Признак AGE содержит 0 выбросов из 506 наблюдений\n", + "Признак DIS содержит 5 выбросов из 506 наблюдений\n", + "Признак RAD содержит 0 выбросов из 506 наблюдений\n", + "Признак TAX содержит 0 выбросов из 506 наблюдений\n", + "Признак PTRATIO содержит 15 выбросов из 506 наблюдений\n", + "Признак B содержит 77 выбросов из 506 наблюдений\n", + "Признак LSTAT содержит 7 выбросов из 506 наблюдений\n", + "Признак TARGET содержит 40 выбросов из 506 наблюдений\n" + ] + } + ], + "source": [ + "for feature in df.columns:\n", + " sum_outliers = len(search_outliers(df[feature]))\n", + " print(f\"Признак {feature} содержит {sum_outliers} выбросов из {df[feature].shape[0]} наблюдений\")" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 307, + "outputs": [], + "source": [ + "def drop_outliers(data, inplace=False):\n", + " drop_index = np.array([])\n", + " for feature in df.columns:\n", + " drop_index = np.hstack((drop_index, search_outliers(df[feature])))\n", + " return data.drop(drop_index, inplace=inplace)\n", + "\n", + "def search_outliers_new(old_feature, new_feature):\n", + " \"\"\"Функция принимает набор значений 1-го признака каким он был до удаления выбросов,\n", + " чтобы корректно расчитать границы выбросов\n", + " И набор значений того же признака после удаления выбросов\n", + " Возвращает массив индексов тех значений, которые являются выбросами\"\"\"\n", + " q1, q3 = np.percentile(old_feature, [25, 75])\n", + " iqr = q3 - q1\n", + " lower_bound = q1 - 1.5 * iqr\n", + " upper_bound = q3 + 1.5 * iqr\n", + " return np.where((new_feature < lower_bound) | (new_feature > upper_bound))[0]\n", + "\n", + "def find_best_features(reg, data):\n", + " coef = pd.DataFrame(reg.coef_, index=features_name, columns=['coef'])\n", + " sorted_coef = round(abs(coef).sort_values('coef', ascending=False))\n", + "\n", + " best_score = test_linear_regression(data.drop('TARGET', axis=1), data['TARGET'])[1]\n", + " best_features = list(data.columns.values)\n", + " for i in range(1, data.shape[1] - 1):\n", + " X_best = data[list(sorted_coef[:i].index)]\n", + " y = data.TARGET\n", + " new_score = test_linear_regression(X_best, y)[1]\n", + " if new_score > best_score:\n", + " best_score = new_score\n", + " best_features = X_best\n", + " print('best score:', best_score)\n", + " return best_features" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 256, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Признак CRIM содержит 0 выбросов из 268 наблюдений\n", + "Признак ZN содержит 0 выбросов из 268 наблюдений\n", + "Признак INDUS содержит 0 выбросов из 268 наблюдений\n", + "Признак CHAS содержит 0 выбросов из 268 наблюдений\n", + "Признак NOX содержит 0 выбросов из 268 наблюдений\n", + "Признак RM содержит 0 выбросов из 268 наблюдений\n", + "Признак AGE содержит 0 выбросов из 268 наблюдений\n", + "Признак DIS содержит 0 выбросов из 268 наблюдений\n", + "Признак RAD содержит 0 выбросов из 268 наблюдений\n", + "Признак TAX содержит 0 выбросов из 268 наблюдений\n", + "Признак PTRATIO содержит 0 выбросов из 268 наблюдений\n", + "Признак B содержит 0 выбросов из 268 наблюдений\n", + "Признак LSTAT содержит 0 выбросов из 268 наблюдений\n", + "Признак TARGET содержит 0 выбросов из 268 наблюдений\n" + ] + } + ], + "source": [ + "d = drop_outliers(df)\n", + "for feature in df.columns:\n", + " sum_outliers = len(search_outliers_new(df[feature], d[feature]))\n", + " print(f\"Признак {feature} содержит {sum_outliers} выбросов из {d[feature].shape[0]} наблюдений\")" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 290, + "outputs": [ + { + "data": { + "text/plain": "[-1.053401914044275,\n -0.7109720834359541,\n -0.6569429298489212,\n -0.2992475805890461,\n -0.17398391375182257,\n -0.0441895094140321,\n -0.03386775527943525,\n -0.014740240787574542,\n -0.010817480966202496,\n 5.440092820663267e-15,\n 0.01476202413443139,\n 0.45936692695078507,\n 4.8874632022255335]" + }, + "execution_count": 290, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reg = test_linear_regression(d.drop('TARGET', axis=1), d.TARGET)[0]\n", + "sorted(reg.coef_)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Видим, что большинство признаков не очень информативные, попробуем удалить" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 308, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "best score: 0.6754022231974177\n" + ] + }, + { + "data": { + "text/plain": " NOX RM CHAS DIS PTRATIO LSTAT RAD CRIM INDUS ZN\n0 0.538 6.575 0.0 4.0900 15.3 4.98 1.0 0.00632 2.31 18.0\n1 0.469 6.421 0.0 4.9671 17.8 9.14 2.0 0.02731 7.07 0.0\n2 0.469 7.185 0.0 4.9671 17.8 4.03 2.0 0.02729 7.07 0.0\n3 0.458 6.998 0.0 6.0622 18.7 2.94 3.0 0.03237 2.18 0.0\n4 0.458 7.147 0.0 6.0622 18.7 5.33 3.0 0.06905 2.18 0.0\n.. ... ... ... ... ... ... ... ... ... ...\n501 0.573 6.593 0.0 2.4786 21.0 9.67 1.0 0.06263 11.93 0.0\n502 0.573 6.120 0.0 2.2875 21.0 9.08 1.0 0.04527 11.93 0.0\n503 0.573 6.976 0.0 2.1675 21.0 5.64 1.0 0.06076 11.93 0.0\n504 0.573 6.794 0.0 2.3889 21.0 6.48 1.0 0.10959 11.93 0.0\n505 0.573 6.030 0.0 2.5050 21.0 7.88 1.0 0.04741 11.93 0.0\n\n[506 rows x 10 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NOXRMCHASDISPTRATIOLSTATRADCRIMINDUSZN
00.5386.5750.04.090015.34.981.00.006322.3118.0
10.4696.4210.04.967117.89.142.00.027317.070.0
20.4697.1850.04.967117.84.032.00.027297.070.0
30.4586.9980.06.062218.72.943.00.032372.180.0
40.4587.1470.06.062218.75.333.00.069052.180.0
.................................
5010.5736.5930.02.478621.09.671.00.0626311.930.0
5020.5736.1200.02.287521.09.081.00.0452711.930.0
5030.5736.9760.02.167521.05.641.00.0607611.930.0
5040.5736.7940.02.388921.06.481.00.1095911.930.0
5050.5736.0300.02.505021.07.881.00.0474111.930.0
\n

506 rows × 10 columns

\n
" + }, + "execution_count": 308, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reg = test_linear_regression(df.drop('TARGET', axis=1), df.TARGET)[0]\n", + "find_best_features(reg, df)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 309, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "best score: 0.808542297158312\n" + ] + }, + { + "data": { + "text/plain": " RM CRIM PTRATIO DIS RAD NOX LSTAT AGE ZN\n0 6.575 0.00632 15.3 4.0900 1.0 0.538 4.98 65.2 18.0\n1 6.421 0.02731 17.8 4.9671 2.0 0.469 9.14 78.9 0.0\n2 7.185 0.02729 17.8 4.9671 2.0 0.469 4.03 61.1 0.0\n3 6.998 0.03237 18.7 6.0622 3.0 0.458 2.94 45.8 0.0\n4 7.147 0.06905 18.7 6.0622 3.0 0.458 5.33 54.2 0.0\n.. ... ... ... ... ... ... ... ... ...\n501 6.593 0.06263 21.0 2.4786 1.0 0.573 9.67 69.1 0.0\n502 6.120 0.04527 21.0 2.2875 1.0 0.573 9.08 76.7 0.0\n503 6.976 0.06076 21.0 2.1675 1.0 0.573 5.64 91.0 0.0\n504 6.794 0.10959 21.0 2.3889 1.0 0.573 6.48 89.3 0.0\n505 6.030 0.04741 21.0 2.5050 1.0 0.573 7.88 80.8 0.0\n\n[268 rows x 9 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
RMCRIMPTRATIODISRADNOXLSTATAGEZN
06.5750.0063215.34.09001.00.5384.9865.218.0
16.4210.0273117.84.96712.00.4699.1478.90.0
27.1850.0272917.84.96712.00.4694.0361.10.0
36.9980.0323718.76.06223.00.4582.9445.80.0
47.1470.0690518.76.06223.00.4585.3354.20.0
..............................
5016.5930.0626321.02.47861.00.5739.6769.10.0
5026.1200.0452721.02.28751.00.5739.0876.70.0
5036.9760.0607621.02.16751.00.5735.6491.00.0
5046.7940.1095921.02.38891.00.5736.4889.30.0
5056.0300.0474121.02.50501.00.5737.8880.80.0
\n

268 rows × 9 columns

\n
" + }, + "execution_count": 309, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reg = test_linear_regression(d.drop('TARGET', axis=1), d.TARGET)[0]\n", + "find_best_features(reg, d)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Видим, что для данных без выбросов лучше брать 9 фич, а для исходных - все 13" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Нормализация" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 305, + "outputs": [], + "source": [ + "min_max_scaler = preprocessing.MinMaxScaler()\n", + "norm_features = min_max_scaler.fit_transform(features)\n", + "nd = pd.DataFrame(np.column_stack((norm_features, target)), columns=np.hstack((features_name, ['TARGET'])))\n", + "reg_and_score = test_linear_regression(nd.drop('TARGET', axis=1), nd.TARGET)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Качество не улучшилось, попробуем оставить только лучшие фичи" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 310, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "best score: 0.6715231799608913\n" + ] + }, + { + "data": { + "text/plain": " RM LSTAT DIS CRIM PTRATIO NOX RAD \\\n0 0.577505 0.089680 0.269203 0.000000 0.287234 0.314815 0.000000 \n1 0.547998 0.204470 0.348962 0.000236 0.553191 0.172840 0.043478 \n2 0.694386 0.063466 0.348962 0.000236 0.553191 0.172840 0.043478 \n3 0.658555 0.033389 0.448545 0.000293 0.648936 0.150206 0.086957 \n4 0.687105 0.099338 0.448545 0.000705 0.648936 0.150206 0.086957 \n.. ... ... ... ... ... ... ... \n501 0.580954 0.219095 0.122671 0.000633 0.893617 0.386831 0.000000 \n502 0.490324 0.202815 0.105293 0.000438 0.893617 0.386831 0.000000 \n503 0.654340 0.107892 0.094381 0.000612 0.893617 0.386831 0.000000 \n504 0.619467 0.131071 0.114514 0.001161 0.893617 0.386831 0.000000 \n505 0.473079 0.169702 0.125072 0.000462 0.893617 0.386831 0.000000 \n\n TAX B ZN CHAS \n0 0.208015 1.000000 0.18 0.0 \n1 0.104962 1.000000 0.00 0.0 \n2 0.104962 0.989737 0.00 0.0 \n3 0.066794 0.994276 0.00 0.0 \n4 0.066794 1.000000 0.00 0.0 \n.. ... ... ... ... \n501 0.164122 0.987619 0.00 0.0 \n502 0.164122 1.000000 0.00 0.0 \n503 0.164122 1.000000 0.00 0.0 \n504 0.164122 0.991301 0.00 0.0 \n505 0.164122 1.000000 0.00 0.0 \n\n[506 rows x 11 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
RMLSTATDISCRIMPTRATIONOXRADTAXBZNCHAS
00.5775050.0896800.2692030.0000000.2872340.3148150.0000000.2080151.0000000.180.0
10.5479980.2044700.3489620.0002360.5531910.1728400.0434780.1049621.0000000.000.0
20.6943860.0634660.3489620.0002360.5531910.1728400.0434780.1049620.9897370.000.0
30.6585550.0333890.4485450.0002930.6489360.1502060.0869570.0667940.9942760.000.0
40.6871050.0993380.4485450.0007050.6489360.1502060.0869570.0667941.0000000.000.0
....................................
5010.5809540.2190950.1226710.0006330.8936170.3868310.0000000.1641220.9876190.000.0
5020.4903240.2028150.1052930.0004380.8936170.3868310.0000000.1641221.0000000.000.0
5030.6543400.1078920.0943810.0006120.8936170.3868310.0000000.1641221.0000000.000.0
5040.6194670.1310710.1145140.0011610.8936170.3868310.0000000.1641220.9913010.000.0
5050.4730790.1697020.1250720.0004620.8936170.3868310.0000000.1641221.0000000.000.0
\n

506 rows × 11 columns

\n
" + }, + "execution_count": 310, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "find_best_features(reg_and_score[0], nd)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 320, + "outputs": [ + { + "data": { + "text/plain": " CRIM ZN INDUS CHAS NOX RM AGE \\\n0 -0.419782 0.284830 -1.287909 -0.272599 -0.144217 0.413672 -0.120013 \n1 -0.417339 -0.487722 -0.593381 -0.272599 -0.740262 0.194274 0.367166 \n2 -0.417342 -0.487722 -0.593381 -0.272599 -0.740262 1.282714 -0.265812 \n3 -0.416750 -0.487722 -1.306878 -0.272599 -0.835284 1.016303 -0.809889 \n4 -0.412482 -0.487722 -1.306878 -0.272599 -0.835284 1.228577 -0.511180 \n.. ... ... ... ... ... ... ... \n501 -0.413229 -0.487722 0.115738 -0.272599 0.158124 0.439316 0.018673 \n502 -0.415249 -0.487722 0.115738 -0.272599 0.158124 -0.234548 0.288933 \n503 -0.413447 -0.487722 0.115738 -0.272599 0.158124 0.984960 0.797449 \n504 -0.407764 -0.487722 0.115738 -0.272599 0.158124 0.725672 0.736996 \n505 -0.415000 -0.487722 0.115738 -0.272599 0.158124 -0.362767 0.434732 \n\n DIS RAD TAX PTRATIO B LSTAT TARGET \n0 0.140214 -0.982843 -0.666608 -1.459000 0.441052 -1.075562 24.0 \n1 0.557160 -0.867883 -0.987329 -0.303094 0.441052 -0.492439 21.6 \n2 0.557160 -0.867883 -0.987329 -0.303094 0.396427 -1.208727 34.7 \n3 1.077737 -0.752922 -1.106115 0.113032 0.416163 -1.361517 33.4 \n4 1.077737 -0.752922 -1.106115 0.113032 0.441052 -1.026501 36.2 \n.. ... ... ... ... ... ... ... \n501 -0.625796 -0.982843 -0.803212 1.176466 0.387217 -0.418147 22.4 \n502 -0.716639 -0.982843 -0.803212 1.176466 0.441052 -0.500850 20.6 \n503 -0.773684 -0.982843 -0.803212 1.176466 0.441052 -0.983048 23.9 \n504 -0.668437 -0.982843 -0.803212 1.176466 0.403225 -0.865302 22.0 \n505 -0.613246 -0.982843 -0.803212 1.176466 0.441052 -0.669058 11.9 \n\n[506 rows x 14 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATTARGET
0-0.4197820.284830-1.287909-0.272599-0.1442170.413672-0.1200130.140214-0.982843-0.666608-1.4590000.441052-1.07556224.0
1-0.417339-0.487722-0.593381-0.272599-0.7402620.1942740.3671660.557160-0.867883-0.987329-0.3030940.441052-0.49243921.6
2-0.417342-0.487722-0.593381-0.272599-0.7402621.282714-0.2658120.557160-0.867883-0.987329-0.3030940.396427-1.20872734.7
3-0.416750-0.487722-1.306878-0.272599-0.8352841.016303-0.8098891.077737-0.752922-1.1061150.1130320.416163-1.36151733.4
4-0.412482-0.487722-1.306878-0.272599-0.8352841.228577-0.5111801.077737-0.752922-1.1061150.1130320.441052-1.02650136.2
.............................................
501-0.413229-0.4877220.115738-0.2725990.1581240.4393160.018673-0.625796-0.982843-0.8032121.1764660.387217-0.41814722.4
502-0.415249-0.4877220.115738-0.2725990.158124-0.2345480.288933-0.716639-0.982843-0.8032121.1764660.441052-0.50085020.6
503-0.413447-0.4877220.115738-0.2725990.1581240.9849600.797449-0.773684-0.982843-0.8032121.1764660.441052-0.98304823.9
504-0.407764-0.4877220.115738-0.2725990.1581240.7256720.736996-0.668437-0.982843-0.8032121.1764660.403225-0.86530222.0
505-0.415000-0.4877220.115738-0.2725990.158124-0.3627670.434732-0.613246-0.982843-0.8032121.1764660.441052-0.66905811.9
\n

506 rows × 14 columns

\n
" + }, + "execution_count": 320, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "standart_scaler = preprocessing.StandardScaler()\n", + "standart_feature = standart_scaler.fit_transform(df.drop('TARGET', axis=1))\n", + "sd = pd.DataFrame(np.column_stack((standart_feature, target)), columns=np.hstack((features_name, ['TARGET'])))\n", + "sd" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 321, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "best score: 0.6726543801851019\n" + ] + }, + { + "data": { + "text/plain": " LSTAT RM DIS RAD NOX PTRATIO TAX\n0 -1.075562 0.413672 0.140214 -0.982843 -0.144217 -1.459000 -0.666608\n1 -0.492439 0.194274 0.557160 -0.867883 -0.740262 -0.303094 -0.987329\n2 -1.208727 1.282714 0.557160 -0.867883 -0.740262 -0.303094 -0.987329\n3 -1.361517 1.016303 1.077737 -0.752922 -0.835284 0.113032 -1.106115\n4 -1.026501 1.228577 1.077737 -0.752922 -0.835284 0.113032 -1.106115\n.. ... ... ... ... ... ... ...\n501 -0.418147 0.439316 -0.625796 -0.982843 0.158124 1.176466 -0.803212\n502 -0.500850 -0.234548 -0.716639 -0.982843 0.158124 1.176466 -0.803212\n503 -0.983048 0.984960 -0.773684 -0.982843 0.158124 1.176466 -0.803212\n504 -0.865302 0.725672 -0.668437 -0.982843 0.158124 1.176466 -0.803212\n505 -0.669058 -0.362767 -0.613246 -0.982843 0.158124 1.176466 -0.803212\n\n[506 rows x 7 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
LSTATRMDISRADNOXPTRATIOTAX
0-1.0755620.4136720.140214-0.982843-0.144217-1.459000-0.666608
1-0.4924390.1942740.557160-0.867883-0.740262-0.303094-0.987329
2-1.2087271.2827140.557160-0.867883-0.740262-0.303094-0.987329
3-1.3615171.0163031.077737-0.752922-0.8352840.113032-1.106115
4-1.0265011.2285771.077737-0.752922-0.8352840.113032-1.106115
........................
501-0.4181470.439316-0.625796-0.9828430.1581241.176466-0.803212
502-0.500850-0.234548-0.716639-0.9828430.1581241.176466-0.803212
503-0.9830480.984960-0.773684-0.9828430.1581241.176466-0.803212
504-0.8653020.725672-0.668437-0.9828430.1581241.176466-0.803212
505-0.669058-0.362767-0.613246-0.9828430.1581241.176466-0.803212
\n

506 rows × 7 columns

\n
" + }, + "execution_count": 321, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reg = test_linear_regression(sd.drop('TARGET', axis=1), sd.TARGET)[0]\n", + "find_best_features(reg, sd)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git "a/\320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217 \320\224\320\227.ipynb" "b/\320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217 \320\224\320\227.ipynb" new file mode 100644 index 0000000..4058c2c --- /dev/null +++ "b/\320\222\320\270\320\267\321\203\320\260\320\273\320\270\320\267\320\260\321\206\320\270\321\217 \320\224\320\227.ipynb" @@ -0,0 +1,999 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "CXsKzXe_x-t4" + }, + "source": [ + "**Импорт всех необходимых библиотек**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "RrSo2OJzx-Pw" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import missingno as msno\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "pd.set_option(\"display.max_rows\", 20)\n", + "pd.set_option(\"display.max_columns\", 20)\n", + "pd.set_option(\"display.precision\", 4)\n", + "pd.set_option(\"plotting.backend\", \"matplotlib\")\n", + "\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler, RobustScaler" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UTKVH3sMutTM" + }, + "source": [ + "# 1. Исследовательский анализ данных (exploratory data analysis - EDA)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tiFgQjEcxnu2" + }, + "source": [ + "## 1.1 Словесное описание признаков" + ] + }, + { + "cell_type": "markdown", + "source": [ + "- __ID__ - Уникальный номер спортсмена\n", + "- __Name__ - ФИО спортсмена\n", + "- __Sex__ - Пол: M or F - Мужчина или женщина\n", + "- __Age__ - Возраст спортсмена\n", + "- __Height__ - Рост в см.\n", + "- __Weight__ - Вес в кг.\n", + "- __Team__ - Название команды\n", + "- __NOC__ - 3-буквенный код Национального олимпийского комитета\n", + "- __Games__ - Год и сезон проведения олимпиады, напр: Summer 2000 или Winter 2000\n", + "- __Year__ - Год проведения олимпиады\n", + "- __Season__ - Сезон проведения олимпиады: Summer, Winter\n", + "- __City__ - Город проведения олимпиады\n", + "- __Sport__ - Название вида спорта\n", + "- __Event__ - Название мероприятия\n", + "- __Medal__ - Медаль: Gold, Silver, Bronze, or NA" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qy4yj--r07RL" + }, + "source": [ + "## 1.2 Загрузка данных общее описание набора данных" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "executionInfo": { + "elapsed": 657, + "status": "ok", + "timestamp": 1636625260411, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GgpkPDdBChJz5khG7PXMg_P3ziSIZzWUDpDAjL7KA=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "SJ3LbaoiutTT", + "outputId": "c92018d9-5be4-4b69-8b9a-8abdae205a6f" + }, + "outputs": [ + { + "data": { + "text/plain": " ID Name Sex Age Height Weight Team \\\n0 1 A Dijiang M 24.0 180.0 80.0 China \n1 2 A Lamusi M 23.0 170.0 60.0 China \n2 3 Gunnar Nielsen Aaby M 24.0 NaN NaN Denmark \n3 4 Edgar Lindenau Aabye M 34.0 NaN NaN Denmark/Sweden \n4 5 Christine Jacoba Aaftink F 21.0 185.0 82.0 Netherlands \n\n NOC Games Year Season City Sport \\\n0 CHN 1992 Summer 1992 Summer Barcelona Basketball \n1 CHN 2012 Summer 2012 Summer London Judo \n2 DEN 1920 Summer 1920 Summer Antwerpen Football \n3 DEN 1900 Summer 1900 Summer Paris Tug-Of-War \n4 NED 1988 Winter 1988 Winter Calgary Speed Skating \n\n Event Medal \n0 Basketball Men's Basketball NaN \n1 Judo Men's Extra-Lightweight NaN \n2 Football Men's Football NaN \n3 Tug-Of-War Men's Tug-Of-War Gold \n4 Speed Skating Women's 500 metres NaN ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
IDNameSexAgeHeightWeightTeamNOCGamesYearSeasonCitySportEventMedal
01A DijiangM24.0180.080.0ChinaCHN1992 Summer1992SummerBarcelonaBasketballBasketball Men's BasketballNaN
12A LamusiM23.0170.060.0ChinaCHN2012 Summer2012SummerLondonJudoJudo Men's Extra-LightweightNaN
23Gunnar Nielsen AabyM24.0NaNNaNDenmarkDEN1920 Summer1920SummerAntwerpenFootballFootball Men's FootballNaN
34Edgar Lindenau AabyeM34.0NaNNaNDenmark/SwedenDEN1900 Summer1900SummerParisTug-Of-WarTug-Of-War Men's Tug-Of-WarGold
45Christine Jacoba AaftinkF21.0185.082.0NetherlandsNED1988 Winter1988WinterCalgarySpeed SkatingSpeed Skating Women's 500 metresNaN
\n
" + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "PATH = 'https://github.com/aksenov7/Kaggle_competition_group/blob/master/athlete_events.csv.zip?raw=true'\n", + "df = pd.read_csv(PATH, compression='zip')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 291, + "status": "ok", + "timestamp": 1636625261045, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GgpkPDdBChJz5khG7PXMg_P3ziSIZzWUDpDAjL7KA=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "8YtRZ5qi0epJ", + "outputId": "3f18857b-f4d0-42c4-bee1-d9252110857d" + }, + "outputs": [ + { + "data": { + "text/plain": "(271116, 15)" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 271116 entries, 0 to 271115\n", + "Data columns (total 15 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 ID 271116 non-null int64 \n", + " 1 Name 271116 non-null object \n", + " 2 Sex 271116 non-null object \n", + " 3 Age 261642 non-null float64\n", + " 4 Height 210945 non-null float64\n", + " 5 Weight 208241 non-null float64\n", + " 6 Team 271116 non-null object \n", + " 7 NOC 271116 non-null object \n", + " 8 Games 271116 non-null object \n", + " 9 Year 271116 non-null int64 \n", + " 10 Season 271116 non-null object \n", + " 11 City 271116 non-null object \n", + " 12 Sport 271116 non-null object \n", + " 13 Event 271116 non-null object \n", + " 14 Medal 39783 non-null object \n", + "dtypes: float64(3), int64(2), object(10)\n", + "memory usage: 31.0+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "- Видим большое кол-во пропусков Medal, но будем считать, что это люди, которые не получили медаль.\n", + "- Что более существенно, так это пропуски по Weight, Height, Age\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 4, + "status": "ok", + "timestamp": 1636625263002, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GgpkPDdBChJz5khG7PXMg_P3ziSIZzWUDpDAjL7KA=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "asswEHNi0emi", + "outputId": "7839e073-a7a6-4611-b355-943739b277fa" + }, + "outputs": [ + { + "data": { + "text/plain": "M 196594\nF 74522\nName: Sex, dtype: int64" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Sex.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [ + { + "data": { + "text/plain": "M 0.7251\nF 0.2749\nName: Sex, dtype: float64" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Sex.value_counts(normalize=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Видим преобладание мужского пола" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "executionInfo": { + "elapsed": 309, + "status": "ok", + "timestamp": 1636625267072, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GgpkPDdBChJz5khG7PXMg_P3ziSIZzWUDpDAjL7KA=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "EpQFv8t1ds05", + "outputId": "58426d5e-4b8f-45e4-82ff-6278d2db26af" + }, + "outputs": [ + { + "data": { + "text/plain": " ID Age Height Weight Year\ncount 271116.0000 261642.0000 210945.0000 208241.0000 271116.0000\nmean 68248.9544 25.5569 175.3390 70.7024 1978.3785\nstd 39022.2863 6.3936 10.5185 14.3480 29.8776\nmin 1.0000 10.0000 127.0000 25.0000 1896.0000\n25% 34643.0000 21.0000 168.0000 60.0000 1960.0000\n50% 68205.0000 24.0000 175.0000 70.0000 1988.0000\n75% 102097.2500 28.0000 183.0000 79.0000 2002.0000\nmax 135571.0000 97.0000 226.0000 214.0000 2016.0000", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
IDAgeHeightWeightYear
count271116.0000261642.0000210945.0000208241.0000271116.0000
mean68248.954425.5569175.339070.70241978.3785
std39022.28636.393610.518514.348029.8776
min1.000010.0000127.000025.00001896.0000
25%34643.000021.0000168.000060.00001960.0000
50%68205.000024.0000175.000070.00001988.0000
75%102097.250028.0000183.000079.00002002.0000
max135571.000097.0000226.0000214.00002016.0000
\n
" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZGB2K6975SrE" + }, + "source": [ + "**Первичные выводы по числовым данным**\n", + "* Возраст людей в выборке от 10 до 97 лет. В среднем возраст варьируется от 19 до 32 лет. Младшие 21 покрывают 25%, 21-28 летние 50%. Остальные 25% это люди от 28 до 97.\n", + "* Рост от 127 до 226 см. В среднем 175. СКО = 10.\n", + "* Вес от 25 до 214 кг. В среднем 70.7 кг.\n", + "* Год проведения лучше рассмотреть отдельно. Но можно сказать, что данные приведены с 1896-2016г." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "executionInfo": { + "elapsed": 6, + "status": "ok", + "timestamp": 1636625269818, + "user": { + "displayName": "Александр Аксёнов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GgpkPDdBChJz5khG7PXMg_P3ziSIZzWUDpDAjL7KA=s64", + "userId": "11145992452404092449" + }, + "user_tz": -300 + }, + "id": "LMdTGO9C4187", + "outputId": "10bdbd48-35be-4e98-e64a-9daa9db723ef" + }, + "outputs": [ + { + "data": { + "text/plain": " Name Sex Team NOC Games \\\ncount 271116 271116 271116 271116 271116 \nunique 134732 2 1184 230 51 \ntop Robert Tait McKenzie M United States USA 2000 Summer \nfreq 58 196594 17847 18853 13821 \n\n Season City Sport Event Medal \ncount 271116 271116 271116 271116 39783 \nunique 2 42 66 765 3 \ntop Summer London Athletics Football Men's Football Gold \nfreq 222552 22426 38624 5733 13372 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameSexTeamNOCGamesSeasonCitySportEventMedal
count27111627111627111627111627111627111627111627111627111639783
unique1347322118423051242667653
topRobert Tait McKenzieMUnited StatesUSA2000 SummerSummerLondonAthleticsFootball Men's FootballGold
freq581965941784718853138212225522242638624573313372
\n
" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe(include=object)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "70kXb6Vk9zYK" + }, + "source": [ + "**Первичные выводы по строковым данным**\n", + "* 1184 уникальных команд. Самыми активными являются United States - они поучаствовали в 17847 соревнованиях.\n", + "* Самым же активным спортсменом является Robert Tail McKenzie - 58 участий.\n", + "* Самой популярной игрой является 2000 Summer - 13821 участников.\n", + "* В выборке преобладают мужчины - 72.5%.\n", + "* Как уже говорилось. Больше всего игр проводятся летом - 82%.\n", + "* Самым популярным видом спорта является Athletics - 38624 человека.\n", + "* Самый популярный Event - Football Men's Football. Всего различных ивентов - 765." + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Медали:\n", + "Теперь посмотрим на медалистов" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "data": { + "text/plain": "count 13372\nunique 242\ntop United States\nfreq 2474\nName: Team, dtype: object" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "golds = df[(df['Medal'] == 'Gold')]\n", + "golds.Team.describe(include=object)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "* United States выигрывали золото чаще других." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "data": { + "text/plain": " count unique top freq\nMedal \nBronze 13295 11867 Heikki Ilmari Savolainen 6\nGold 13372 10413 Michael Fred Phelps, II 23\nSilver 13116 11430 Mikhail Yakovlevich Voronin 6", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
countuniquetopfreq
Medal
Bronze1329511867Heikki Ilmari Savolainen6
Gold1337210413Michael Fred Phelps, II23
Silver1311611430Mikhail Yakovlevich Voronin6
\n
" + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('Medal').describe(include=object)['Name']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "* Больше всего Gold получал Michael Fred Phelps, II - 23 раза.\n", + "* Silver - Mikhail Yakovlevich Voronin - 6 раз.\n", + "* Bronze - Heikki Ilmari Savolainen - 6 раз." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [ + { + "data": { + "text/plain": " count unique top freq\nMedal \nBronze 13295 268 United States 1233\nGold 13372 242 United States 2474\nSilver 13116 273 United States 1512", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
countuniquetopfreq
Medal
Bronze13295268United States1233
Gold13372242United States2474
Silver13116273United States1512
\n
" + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('Medal').describe(include=object)['Team']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "* United States получали медали чаще остальных. И почти половина из них Gold" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [ + { + "data": { + "text/plain": " Name Sex Team NOC Games \\\ncount 231333 231333 231333 231333 231333 \nunique 120401 2 932 230 51 \ntop Robert Tait McKenzie M United States USA 1996 Summer \nfreq 57 168064 12628 13216 11938 \n\n Season City Sport Event Medal \ncount 231333 231333 231333 231333 0 \nunique 2 42 60 723 0 \ntop Summer London Athletics Football Men's Football NaN \nfreq 188464 18802 34655 4464 NaN ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameSexTeamNOCGamesSeasonCitySportEventMedal
count2313332313332313332313332313332313332313332313332313330
unique120401293223051242607230
topRobert Tait McKenzieMUnited StatesUSA1996 SummerSummerLondonAthleticsFootball Men's FootballNaN
freq5716806412628132161193818846418802346554464NaN
\n
" + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "no_medal = df[df['Medal'].isna()]\n", + "no_medal.describe(include=object)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "* United States не получали медали чаще остальных.\n", + "* Robert Tait McKenzie не получал медали чаще остальных - 57 раз." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "USQjKMAIETO8" + }, + "source": [ + "## 1.3 Визуальный и статистический анализ данных" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Кол-во мужчин и женщин по возрасту\n", + "plt.figure(figsize=(18,8))\n", + "sns.histplot(data=df, x='Age', hue='Sex')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "* Распределение нормальное, что и можно было ожидать.\n", + "* Видно, что более молодых женщин (до 18 лет) больше, чем мужчин." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### По годам" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Средний возраст по годам\n", + "gr = df.groupby('Year').mean()\n", + "plt.figure(figsize=(18,8))\n", + "gr['Age'].plot()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Из графика видно, что ранее 195-ых в основном преобладали люди с возрастом ~28, к 1980 году средний возраст упал до 24, а потом опять начал расти." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 251, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 251, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Кол-во мужчин и женщин по годам\n", + "silver = df\n", + "plt.figure(figsize=(18,8))\n", + "sns.histplot(x=\"Year\", hue=\"Sex\", data=df)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### По времени года" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "iVBORw0KGgoAAAANSUhEUgAABC8AAAHgCAYAAACFN0f7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAA5N0lEQVR4nO3de7wdZX0v/s9DNpeKyEVSDxAgtKJyCRcTuR5qflIBPadArRVtbYiXQ1sVL23xyPEc8UTxRcUeL61HxUoh1XpDqxRtwSpULYgkiNwiBBQhNEq4avSHEnzOHzPBFdw72StZa69J9vv9eu3XnvXMM/N9nrVnZs3+rplnSq01AAAAAF211agbAAAAALA+khcAAABAp0leAAAAAJ0meQEAAAB0muQFAAAA0GmSFwAAAECnjY26AVNt1113rbNnzx51MwAAAIAeS5cuvbfWOnO8edMueTF79uwsWbJk1M0AAAAAepRSvj/RPLeNAAAAAJ0meQEAAAB0muQFAAAA0GnTbswLAKA/jzzySFasWJGHH3541E3ZbGy33XaZNWtWtt5661E3BQC2CJIXAMB6rVixIjvssENmz56dUsqom9N5tdbcd999WbFiRfbZZ59RNwcAtghuGwEA1uvhhx/Ok5/8ZImLSSql5MlPfrIrVQBggCQvAIANkrjoj/cLAAZL8gIAGImzzz47BxxwQA466KAccsghufrqq0fdJACgo4x5AQBMuauuuiqXXHJJrr322my77ba599578/Of/3zUzQIAOsqVFwDAlFu5cmV23XXXbLvttkmSXXfdNbvvvnuWLl2aZz/72Zk7d26OP/74rFy5Mkny4Q9/OM961rNy8MEH5/d+7/fy05/+NEny6U9/OgceeGAOPvjg/NZv/VaSZoyOl73sZZkzZ04OPfTQXH755UmSCy64IC94wQtywgknZN99980b3/jGEfQcANgYQ0telFLOL6XcU0q5cZx5f15KqaWUXdvXpZTyvlLKbaWU60spz+ype2opZXn7c2pP+dxSyg3tMu8rbi4FgM3Gcccdl7vuuitPe9rT8qpXvSr/9m//lkceeSSnn356LrrooixdujQvf/nL8+Y3vzlJ8oIXvCDXXHNNvv3tb2e//fbLRz7ykSTJokWLcumll+bb3/52Lr744iTJ+9///pRScsMNN+TjH/94Tj311McGz7zuuuvyyU9+MjfccEM++clP5q677hrNGwAA9GWYV15ckOSExxeWUvZMclySO3uKn5dk3/bntCQfaOvukuSsJIcnOSzJWaWUndtlPpDkv/Us9yuxAIBueuITn5ilS5fmvPPOy8yZM3PKKafkQx/6UG688cY897nPzSGHHJK3v/3tWbFiRZLkxhtvzDHHHJM5c+bkYx/7WG666aYkydFHH52FCxfmwx/+cB599NEkyde//vW89KUvTZI84xnPyN57751bb701SXLsscdmxx13zHbbbZf9998/3//+90fQewCgX0Mb86LW+tVSyuxxZr07yRuTfL6n7KQki2utNck3Sik7lVJ2SzI/yZdqrfcnSSnlS0lOKKVckeRJtdZvtOWLk5yc5J+H0xsAYNBmzJiR+fPnZ/78+ZkzZ07e//7354ADDshVV131K3UXLlyYz33uczn44INzwQUX5IorrkiSfPCDH8zVV1+dL3zhC5k7d26WLl263phrb1NZG3/NmjUD7RMAMBxTOuZFKeWkJHfXWr/9uFl7JOm9bnNFW7a+8hXjlE8U97RSypJSypJVq1ZtQg8AgEG45ZZbsnz58sdeX3fdddlvv/2yatWqx5IXjzzyyGNXWPz4xz/ObrvtlkceeSQf+9jHHlvu9ttvz+GHH55FixZl5syZueuuu3LMMcc8VufWW2/NnXfemac//elT2DsAYNCm7GkjpZQnJPkfaW4ZmVK11vOSnJck8+bNq1MdHwBY1+rVq3P66afnwQcfzNjYWJ761KfmvPPOy2mnnZbXvva1eeihh7JmzZq8/vWvzwEHHJC3ve1tOfzwwzNz5swcfvjh+fGPf5wkOeOMM7J8+fLUWnPsscfm4IMPzjOe8Yz86Z/+aebMmZOxsbFccMEF61xxAQBsfkpzp8aQVt7cNnJJrfXAUsqcJF9O8tN29qwk/5FmLIv/neSKWuvH2+VuSXPLyPwk82utf9yWfyjJFe3P5bXWZ7TlL+mttz7z5s2rS5YsGUwHAWAaWLZsWfbbb79RN2Oz430DgP6UUpbWWueNN2/Kbhuptd5Qa/31WuvsWuvsNLd6PLPW+oMkFydZ0D515IgkD9VaVya5NMlxpZSd24E6j0tyaTvvR6WUI9qnjCzIumNoAAAAAFuIYT4q9eNJrkry9FLKilLKK9ZT/YtJvpvktiQfTvKqJGkH6nxbkmvan0VrB+9s6/xtu8ztMVgnAAAAbJGG+bSRl2xg/uye6Zrk1RPUOz/J+eOUL0ly4Ka1EgAAAOi6KRuwEwAAYDLmnrF40nWXnrtgiC0BukLyAgAANhP9/FOf+Mce2HJM2YCdAAAAABtD8gIA6Lyzzz47BxxwQA466KAccsghufrqq0fdJABgCrltBADoS7+XrW/Ihi5rv+qqq3LJJZfk2muvzbbbbpt77703P//5zwfahk1Ra02tNVtt5TshALZMXbhlzacsANBpK1euzK677pptt902SbLrrrtm9913z+zZs3PvvfcmSZYsWZL58+cnSd761rfm1FNPzTHHHJO99947n/3sZ/PGN74xc+bMyQknnJBHHnkkSTJ79uyceeaZOeSQQzJv3rxce+21Of744/Obv/mb+eAHP/hY/HPPPTfPetazctBBB+Wss85Kktxxxx15+tOfngULFuTAAw/MXXfdNYXvCABMP668AAA67bjjjsuiRYvytKc9Lb/927+dU045Jc9+9rPXu8ztt9+eyy+/PDfffHOOPPLIfOYzn8k73/nO/O7v/m6+8IUv5OSTT06S7LXXXrnuuuvyhje8IQsXLsy///u/5+GHH86BBx6YP/mTP8lll12W5cuX55vf/GZqrTnxxBPz1a9+NXvttVeWL1+eCy+8MEccccQUvAsAm58ufFvPlkPyAgDotCc+8YlZunRpvva1r+Xyyy/PKaecknPOOWe9yzzvec/L1ltvnTlz5uTRRx/NCSeckCSZM2dO7rjjjsfqnXjiiY+Vr169OjvssEN22GGHbLvttnnwwQdz2WWX5bLLLsuhhx6aJFm9enWWL1+evfbaK3vvvbfEBQBMEckLAKDzZsyYkfnz52f+/PmZM2dOLrzwwoyNjeUXv/hFkuThhx9ep/7aW0y22mqrbL311imlPPZ6zZo149ZbO91br9aaM888M3/8x3+8zvrvuOOObL/99oPvKAAwLmNeAACddsstt2T58uWPvb7uuuuy9957Z/bs2Vm6dGmS5DOf+cxQYh9//PE5//zzs3r16iTJ3XffnXvuuWcosQCAibnyAgDotNWrV+f000/Pgw8+mLGxsTz1qU/Neeedl2XLluUVr3hF/tf/+l+PDdY5aMcdd1yWLVuWI488MklzC8tHP/rRzJgxYyjxAIDxlVrrqNswpebNm1eXLFky6mYAwGZj2bJl2W+//UbdjM2O941hmC4DIPbTz+nQx2Tz7Od06ON0MVV/y1LK0lrrvPHmuW0EAAAA6DTJCwAAAKDTJC8AAACATpO8AAAAADpN8gIAAADoNMkLAAAAoNMkLwCATnvDG96Q97znPY+9Pv744/PKV77ysdd//ud/nkWLFuWcc85Z73ruuOOO/MM//MOwmgkADNHYqBsAAGxe7lw0Z6Dr2+stN6x3/tFHH51PfepTef3rX59f/OIXuffee/OjH/3osflXXnll3v3ud+eII45Y73rWJi/+4A/+YNJtW7NmTcbGnC4BwKi58gIA6LSjjjoqV111VZLkpptuyoEHHpgddtghDzzwQH72s59l2bJluf766/Oa17wmSbJw4cK89rWvzVFHHZXf+I3fyEUXXZQkedOb3pSvfe1rOeSQQ/Lud787jz76aM4444w861nPykEHHZQPfehDSZIrrrgixxxzTE488cTsv//+o+k0ALAOXyUAAJ22++67Z2xsLHfeeWeuvPLKHHnkkbn77rtz1VVXZccdd8ycOXOyzTbbrLPMypUr8/Wvfz3f+c53cuKJJ+aFL3xhzjnnnLzrXe/KJZdckiQ577zzsuOOO+aaa67Jz372sxx99NE57rjjkiTXXnttbrzxxuyzzz5T3l8A4FdJXgAAnXfUUUflyiuvzJVXXpk/+7M/y913350rr7wyO+64Y44++uhfqX/yySdnq622yv77758f/vCH467zsssuy/XXX//YlRkPPfRQli9fnm222SaHHXaYxAUAdIjkBQDQeUcffXSuvPLK3HDDDTnwwAOz55575q/+6q/ypCc9KS972cty//33r1N/2223fWy61jruOmut+eu//uscf/zx65RfccUV2X777QffCQBgoxnzAgDovKOOOiqXXHJJdtlll8yYMSO77LJLHnzwwVx11VU56qijJrWOHXbYIT/+8Y8fe3388cfnAx/4QB555JEkya233pqf/OQnQ2k/ALBpXHkBAHTenDlzcu+9967zpJA5c+Zk9erV2XXXXSe1joMOOigzZszIwQcfnIULF+Z1r3td7rjjjjzzmc9MrTUzZ87M5z73uSH1gKkw94zFk6679NwFQ2wJAIMmeQEA9GVDjzYdhhkzZqzzeNQkueCCCx6bXrhwYRYuXPgr5UmyevXqJMnWW2+dr3zlK+vMe8c73pF3vOMd65TNnz8/8+fPH0i7AYDBkLwAAACAjeSqr6lhzAsAAACg0yQvAAAAgE6TvAAANmiix40yPu8XAAyW5AUAsF7bbbdd7rvvPv+QT1KtNffdd1+22267UTcFALYYBuwEANZr1qxZWbFiRVatWjXqpmw2tttuu8yaNWvUzQCALYbkBQCwXltvvXX22WefUTcDAJjG3DYCAAAAdJrkBQAAANBpkhcAAABAp0leAAAAAJ0meQEAAAB0muQFAAAA0GmSFwAAAECnSV4AAAAAnSZ5AQAAAHSa5AUAAADQaZIXAAAAQKdJXgAAAACdJnkBAAAAdJrkBQAAANBpkhcAAABAp0leAAAAAJ02tORFKeX8Uso9pZQbe8rOLaV8p5RyfSnlH0spO/XMO7OUclsp5ZZSyvE95Se0ZbeVUt7UU75PKeXqtvyTpZRthtUXAAAAYHSGeeXFBUlOeFzZl5IcWGs9KMmtSc5MklLK/klenOSAdpn/W0qZUUqZkeT9SZ6XZP8kL2nrJslfJnl3rfWpSR5I8ooh9gUAAAAYkaElL2qtX01y/+PKLqu1rmlffiPJrHb6pCSfqLX+rNb6vSS3JTms/bmt1vrdWuvPk3wiyUmllJLkOUkuape/MMnJw+oLAAAAMDpjI4z98iSfbKf3SJPMWGtFW5Ykdz2u/PAkT07yYE8ipLc+AAAjNveMxX3VX3rugiG1BIAtwUgG7CylvDnJmiQfm6J4p5VSlpRSlqxatWoqQgIAAAADMuXJi1LKwiT/Nckf1lprW3x3kj17qs1qyyYqvy/JTqWUsceVj6vWel6tdV6tdd7MmTMH0g8AAABgakxp8qKUckKSNyY5sdb6055ZFyd5cSll21LKPkn2TfLNJNck2bd9ssg2aQb1vLhNelye5IXt8qcm+fxU9QMAAACYOsN8VOrHk1yV5OmllBWllFck+ZskOyT5UinlulLKB5Ok1npTkk8luTnJvyR5da310XZMi9ckuTTJsiSfausmyX9P8mellNvSjIHxkWH1BQAAABidoQ3YWWt9yTjFEyYYaq1nJzl7nPIvJvniOOXfTfM0EgAAAGALNpIBOwEAAAAmS/ICAAAA6DTJCwAAAKDTJC8AAACATpO8AAAAADpN8gIAAADoNMkLAAAAoNMkLwAAAIBOk7wAAAAAOk3yAgAAAOg0yQsAAACg0yQvAAAAgE6TvAAAAAA6TfICAAAA6DTJCwAAAKDTJC8AAACATpO8AAAAADpN8gIAAADoNMkLAAAAoNMkLwAAAIBOk7wAAAAAOk3yAgAAAOg0yQsAAACg0yQvAAAAgE6TvAAAAAA6TfICAAAA6DTJCwAAAKDTJC8AAACATpO8AAAAADpN8gIAAADoNMkLAAAAoNMkLwAAAIBOk7wAAAAAOk3yAgAAAOg0yQsAAACg0yQvAAAAgE6TvAAAAAA6TfICAAAA6DTJCwAAAKDTJC8AAACATpO8AAAAADpN8gIAAADoNMkLAAAAoNMkLwAAAIBOk7wAAAAAOk3yAgAAAOi0sVE3AABgrblnLO6r/tJzFwypJQBAl7jyAgAAAOg0yQsAAACg0yQvAAAAgE6TvAAAAAA6bWjJi1LK+aWUe0opN/aU7VJK+VIpZXn7e+e2vJRS3ldKua2Ucn0p5Zk9y5za1l9eSjm1p3xuKeWGdpn3lVLKsPoCAAAAjM4wr7y4IMkJjyt7U5Iv11r3TfLl9nWSPC/Jvu3PaUk+kDTJjiRnJTk8yWFJzlqb8Gjr/Lee5R4fCwAAANgCDC15UWv9apL7H1d8UpIL2+kLk5zcU764Nr6RZKdSym5Jjk/ypVrr/bXWB5J8KckJ7bwn1Vq/UWutSRb3rAsAAADYgkz1mBdPqbWubKd/kOQp7fQeSe7qqbeiLVtf+YpxygEAAIAtzMgG7GyvmKhTEauUclopZUkpZcmqVaumIiQAAAAwIFOdvPhhe8tH2t/3tOV3J9mzp96stmx95bPGKR9XrfW8Wuu8Wuu8mTNnbnInAAAAgKkz1cmLi5OsfWLIqUk+31O+oH3qyBFJHmpvL7k0yXGllJ3bgTqPS3JpO+9HpZQj2qeMLOhZFwAAALAFGRvWikspH08yP8mupZQVaZ4ack6ST5VSXpHk+0le1Fb/YpLnJ7ktyU+TvCxJaq33l1LeluSatt6iWuvaQUBfleaJJr+W5J/bHwAAAGALM7TkRa31JRPMOnacujXJqydYz/lJzh+nfEmSAzeljQAAAED3jWzATgAAAIDJkLwAAAAAOk3yAgAAAOg0yQsAAACg0yQvAAAAgE6TvAAAAAA6TfICAAAA6DTJCwAAAKDTJC8AAACATpO8AAAAADpN8gIAAADotLFRNwAAgOGbe8biSdddeu6CIbYEAPrnygsAAACg0yQvAAAAgE6TvAAAAAA6TfICAAAA6DTJCwAAAKDTJC8AAACATpO8AAAAADpN8gIAAADoNMkLAAAAoNMkLwAAAIBOk7wAAAAAOk3yAgAAAOg0yQsAAACg0yQvAAAAgE6TvAAAAAA6TfICAAAA6DTJCwAAAKDTJC8AAACATpO8AAAAADpN8gIAAADoNMkLAAAAoNMkLwAAAIBOk7wAAAAAOk3yAgAAAOi0sVE3AABglOaesbiv+kvPXTCklgAAE5G8AAAm1M8/9v6pBwCGxW0jAAAAQKdJXgAAAACdJnkBAAAAdJrkBQAAANBpkhcAAABAp0leAAAAAJ0meQEAAAB0muQFAAAA0GmSFwAAAECnjY26AQBseeaesXjSdZeeu2CILQEAYEvgygsAAACg0yQvAAAAgE6TvAAAAAA6bSRjXpRS3pDklUlqkhuSvCzJbkk+keTJSZYm+aNa689LKdsmWZxkbpL7kpxSa72jXc+ZSV6R5NEkr621XjrFXQFgmupnXI/E2B4AAJtiyq+8KKXskeS1SebVWg9MMiPJi5P8ZZJ311qfmuSBNEmJtL8faMvf3dZLKWX/drkDkpyQ5P+WUmZMZV8AAACA4RvVbSNjSX6tlDKW5AlJViZ5TpKL2vkXJjm5nT6pfZ12/rGllNKWf6LW+rNa6/eS3JbksKlpPgAAADBVpjx5UWu9O8m7ktyZJmnxUJrbRB6sta5pq61Iskc7vUeSu9pl17T1n9xbPs4yAAAAwBZiFLeN7Jzmqol9kuyeZPs0t30MM+ZppZQlpZQlq1atGmYoAAAAYMBGcdvIbyf5Xq11Va31kSSfTXJ0kp3a20iSZFaSu9vpu5PsmSTt/B3TDNz5WPk4y6yj1nperXVerXXezJkzB90fAAAAYIhGkby4M8kRpZQntGNXHJvk5iSXJ3lhW+fUJJ9vpy9uX6ed/5Vaa23LX1xK2baUsk+SfZN8c4r6AAAAAEyRKX9Uaq316lLKRUmuTbImybeSnJfkC0k+UUp5e1v2kXaRjyT5+1LKbUnuT/OEkdRabyqlfCpN4mNNklfXWh+d0s4AAAAAQzflyYskqbWeleSsxxV/N+M8LaTW+nCS359gPWcnOXvgDQQAAAA6Y1SPSgUAAACYFMkLAAAAoNMkLwAAAIBOk7wAAAAAOm0kA3YCAP2be8bivuovPXfBkFoCADC1XHkBAAAAdNqkkhellC9PpgwAAABg0NZ720gpZbskT0iyayll5ySlnfWkJHsMuW0AAAAAGxzz4o+TvD7J7kmW5pfJix8l+ZvhNQsAAACgsd7kRa31vUneW0o5vdb611PUJgAAAIDHTOppI7XWvy6lHJVkdu8ytdb+hj0HAAAA6NOkkhellL9P8ptJrkvyaFtck0heAAAAAEM1qeRFknlJ9q+11mE2BgAAAODxJvWo1CQ3JvlPw2wIAAAAwHgme+XFrkluLqV8M8nP1hbWWk8cSqsAAAAAWpNNXrx1mI0AAAAAmMhknzbyb8NuCAAAAMB4Jvu0kR+nebpIkmyTZOskP6m1PmlYDQMAAABIJn/lxQ5rp0spJclJSY4YVqMAAAAA1prsmBePaR+X+rlSyllJ3jT4JgEAbNnmnrF40nWXnrtgiC0BgM3DZG8beUHPy62SzEvy8FBaBAAAANBjslde/E7P9Jokd6S5dQQAAABgqCY75sXLht0QAAAAgPFsNZlKpZRZpZR/LKXc0/58ppQya9iNAwAAAJhU8iLJ3yW5OMnu7c8/tWUAAAAAQzXZ5MXMWuvf1VrXtD8XJJk5xHYBAAAAJJl88uK+UspLSykz2p+XJrlvmA0DAAAASCafvHh5khcl+UGSlUlemGThkNoEAAAA8JjJPip1UZJTa60PJEkpZZck70qT1AAAAAAYmsleeXHQ2sRFktRa709y6HCaBAAAAPBLk01ebFVK2Xnti/bKi8letQEAAACw0SabgPirJFeVUj7dvv79JGcPp0kAAAAAvzSp5EWtdXEpZUmS57RFL6i13jy8ZgEAAAA0Jn3rR5uskLAAAAAAptRkx7wAAAAAGAnJCwAAAKDTJC8AAACATpO8AAAAADpN8gIAAADoNMkLAAAAoNMm/ahUAAZj7hmLJ1136bkLhtgSAADYPLjyAgAAAOg0yQsAAACg09w2ArCF6+c2lcStKgAAdI8rLwAAAIBOk7wAAAAAOk3yAgAAAOg0yQsAAACg0yQvAAAAgE6TvAAAAAA6TfICAAAA6LSRJC9KKTuVUi4qpXynlLKslHJkKWWXUsqXSinL2987t3VLKeV9pZTbSinXl1Ke2bOeU9v6y0spp46iLwAAAMBwjerKi/cm+Zda6zOSHJxkWZI3JflyrXXfJF9uXyfJ85Ls2/6cluQDSVJK2SXJWUkOT3JYkrPWJjwAAACALceUJy9KKTsm+a0kH0mSWuvPa60PJjkpyYVttQuTnNxOn5RkcW18I8lOpZTdkhyf5Eu11vtrrQ8k+VKSE6asIwAAAMCUGMWVF/skWZXk70op3yql/G0pZfskT6m1rmzr/CDJU9rpPZLc1bP8irZsonIAAABgCzKK5MVYkmcm+UCt9dAkP8kvbxFJktRaa5I6qICllNNKKUtKKUtWrVo1qNUCAAAAU2AUyYsVSVbUWq9uX1+UJpnxw/Z2kLS/72nn351kz57lZ7VlE5X/ilrrebXWebXWeTNnzhxYRwAAAIDhm/LkRa31B0nuKqU8vS06NsnNSS5OsvaJIacm+Xw7fXGSBe1TR45I8lB7e8mlSY4rpezcDtR5XFsGAAAAbEHGRhT39CQfK6Vsk+S7SV6WJpHyqVLKK5J8P8mL2rpfTPL8JLcl+WlbN7XW+0spb0tyTVtvUa31/qnrAgAAADAVRpK8qLVel2TeOLOOHaduTfLqCdZzfpLzB9o4AAAAoFNGMeYFAAAAwKRJXgAAAACdJnkBAAAAdJrkBQAAANBpkhcAAABAp0leAAAAAJ0meQEAAAB0muQFAAAA0GmSFwAAAECnSV4AAAAAnSZ5AQAAAHSa5AUAAADQaZIXAAAAQKdJXgAAAACdJnkBAAAAdJrkBQAAANBpkhcAAABAp0leAAAAAJ02NuoGAN0094zFfdVfeu6CIbUEAACY7lx5AQAAAHSa5AUAAADQaW4bAWCL0M+tTm5zAgDYvLjyAgAAAOg0yQsAAACg0yQvAAAAgE4z5gXQGR7PCgAAjMeVFwAAAECnSV4AAAAAnea2EQAAANhC3bloTl/193rLDUNqyaZx5QUAAADQaZIXAAAAQKdJXgAAAACdJnkBAAAAdJrkBQAAANBpkhcAAABAp0leAAAAAJ0meQEAAAB0muQFAAAA0GmSFwAAAECnSV4AAAAAnSZ5AQAAAHSa5AUAAADQaZIXAAAAQKeNjboBAAAAdMudi+b0VX+vt9wwpJYMVz/93Fz7uKVw5QUAAADQaZIXAAAAQKe5bQQ2E3PPWDzpukvPXTDElgAAAEwtV14AAAAAnSZ5AQAAAHSa5AUAAADQaZIXAAAAQKeNLHlRSplRSvlWKeWS9vU+pZSrSym3lVI+WUrZpi3ftn19Wzt/ds86zmzLbymlHD+irgAAAABDNMqnjbwuybIkT2pf/2WSd9daP1FK+WCSVyT5QPv7gVrrU0spL27rnVJK2T/Ji5MckGT3JP9aSnlarfXRqe4IAAAwfdy5aM6k6+71lhuG2BKYPkZy5UUpZVaS/5Lkb9vXJclzklzUVrkwycnt9Ent67Tzj23rn5TkE7XWn9Vav5fktiSHTUkHAAAAgCkzqisv3pPkjUl2aF8/OcmDtdY17esVSfZop/dIcleS1FrXlFIeauvvkeQbPevsXQaGau4Zi/uqv/TcBUNqCQAAwJZvyq+8KKX81yT31FqXTmHM00opS0opS1atWjVVYQEAAIABGMVtI0cnObGUckeST6S5XeS9SXYqpay9EmRWkrvb6buT7Jkk7fwdk9zXWz7OMuuotZ5Xa51Xa503c+bMwfYGAAAAGKopT17UWs+stc6qtc5OM+DmV2qtf5jk8iQvbKudmuTz7fTF7eu0879Sa61t+Yvbp5Hsk2TfJN+com4AAAAAU2SUTxt5vP+e5BOllLcn+VaSj7TlH0ny96WU25LcnybhkVrrTaWUTyW5OcmaJK/2pBEAAADY8ow0eVFrvSLJFe30dzPO00JqrQ8n+f0Jlj87ydnDayEAAAAwaiN5VCoAAADAZEleAAAAAJ0meQEAAAB0muQFAAAA0GmSFwAAAECnSV4AAAAAnSZ5AQAAAHTa2KgbAAAAbBnuXDSnr/p7veWGIbUE2NK48gIAAADoNMkLAAAAoNMkLwAAAIBOk7wAAAAAOs2AnQAATDsGlgTYvLjyAgAAAOg0yQsAAACg0yQvAAAAgE6TvAAAAAA6TfICAAAA6DTJCwAAAKDTJC8AAACAThsbdQMAAGA6uHPRnL7q7/WWG4bUEoDNjysvAAAAgE5z5QWbvblnLO6r/tJzFwypJQAAAAyDKy8AAACATpO8AAAAADpN8gIAAADoNGNeANNaP2OmGC8FAABGw5UXAAAAQKdJXgAAAACdJnkBAAAAdJrkBQAAANBpkhcAAABAp0leAAAAAJ0meQEAAAB0muQFAAAA0GmSFwAAAECnjY26AQAAcOeiOZOuu9dbbhhiSwDoIldeAAAAAJ0meQEAAAB0muQFAAAA0GmSFwAAAECnSV4AAAAAnSZ5AQAAAHSa5AUAAADQaZIXAAAAQKdJXgAAAACdJnkBAAAAdJrkBQAAANBpkhcAAABAp0leAAAAAJ0meQEAAAB02thUByyl7JlkcZKnJKlJzqu1vreUskuSTyaZneSOJC+qtT5QSilJ3pvk+Ul+mmRhrfXadl2nJvmf7arfXmu9cCr7AgCwJbpz0ZxJ193rLTcMsSUA0BjFlRdrkvx5rXX/JEckeXUpZf8kb0ry5Vrrvkm+3L5Okucl2bf9OS3JB5KkTXacleTwJIclOauUsvNUdgQAAAAYvilPXtRaV669cqLW+uMky5LskeSkJGuvnLgwycnt9ElJFtfGN5LsVErZLcnxSb5Ua72/1vpAki8lOWHqegIAAABMhZGOeVFKmZ3k0CRXJ3lKrXVlO+sHaW4rSZrExl09i61oyyYqBwAAALYgI0telFKemOQzSV5fa/1R77xaa00zHsagYp1WSllSSlmyatWqQa0WAAAAmAIjSV6UUrZOk7j4WK31s23xD9vbQdL+vqctvzvJnj2Lz2rLJir/FbXW82qt82qt82bOnDm4jgAAAABDN4qnjZQkH0myrNb6f3pmXZzk1CTntL8/31P+mlLKJ9IMzvlQrXVlKeXSJO/oGaTzuCRnTkUfNtbcMxb3VX/puQuG1BIAAADYfEx58iLJ0Un+KMkNpZTr2rL/kSZp8alSyiuSfD/Ji9p5X0zzmNTb0jwq9WVJUmu9v5TytiTXtPUW1Vrvn5IeAAC/op/HayYesQkATN6UJy9qrV9PUiaYfew49WuSV0+wrvOTnD+41gEArF8/SRoJGgAYjJE+bQQAAABgQyQvAAAAgE4bxZgXAMAUcHvDlsFYIgAgecEQ9PNUFU9UAQAAYEPcNgIAAAB0misvAIDNlltjAGB6cOUFAAAA0GmSFwAAAECnSV4AAAAAnSZ5AQAAAHSa5AUAAADQaZIXAAAAQKdJXgAAAACdJnkBAAAAdJrkBQAAANBpkhcAAABAp42NugEM19wzFk+67tJzFwyxJQDdceeiOX3V3+stNwypJQAATIYrLwAAAIBOk7wAAAAAOk3yAgAAAOg0yQsAAACg0wzYCQBToJ9BQg0QCgCwLskLAEbKkz8AANgQyQsA1uEKAQAAusaYFwAAAECnSV4AAAAAnSZ5AQAAAHSa5AUAAADQaQbsBOgwg2cCAIArLwAAAICOk7wAAAAAOk3yAgAAAOg0yQsAAACg0yQvAAAAgE6TvAAAAAA6TfICAAAA6DTJCwAAAKDTxkbdAICNdeeiOZOuu9dbbhhiSwAAgGFy5QUAAADQaZIXAAAAQKdJXgAAAACdZswLYCCMPwEAAAyLKy8AAACATpvWV17MPWPxpOsuPXfBEFsCAAAATGRaJy8A+tHPrTGJ22MAAGBQ3DYCAAAAdJrkBQAAANBpkhcAAABAp0leAAAAAJ0meQEAAAB0muQFAAAA0GmbffKilHJCKeWWUsptpZQ3jbo9AAAAwGBt1smLUsqMJO9P8rwk+yd5SSll/9G2CgAAABikzTp5keSwJLfVWr9ba/15kk8kOWnEbQIAAAAGaHNPXuyR5K6e1yvaMgAAAGALUWqto27DRiulvDDJCbXWV7av/yjJ4bXW1zyu3mlJTmtfPj3JLRsRbtck925CczfGVMecDn2cLjGnQx9HEXM69HEUMadDH6dLzOnQx1HEnA59HEXM6dDHUcScDn2cLjGnQx9HEXM69HFTYu5da5053oyxTWvPyN2dZM+e17PasnXUWs9Lct6mBCqlLKm1ztuUdXQ95nTo43SJOR36OIqY06GPo4g5Hfo4XWJOhz6OIuZ06OMoYk6HPo4i5nTo43SJOR36OIqY06GPw4q5ud82ck2SfUsp+5RStkny4iQXj7hNAAAAwABt1lde1FrXlFJek+TSJDOSnF9rvWnEzQIAAAAGaLNOXiRJrfWLSb44BaE26baTzSTmdOjjdIk5Hfo4ipjToY+jiDkd+jhdYk6HPo4i5nTo4yhiToc+jiLmdOjjdIk5Hfo4ipjToY9DiblZD9gJAAAAbPk29zEvAAAAgC1drXVa/iQ5P8k9SW7sKTskyTeSXJdkSZLD2vKdk/xjkuuTfDPJgT3L7JTkoiTfSbIsyZF9xjw4yVVJbkjyT0me1DPvzCS3pXm06/E95Se0ZbcledNG9LPvmO28GUm+leSSqYg52fd2gPHekOSmJDcm+XiS7QbRxyRPTnJ5ktVJ/qan/hOSfKHt301JzhnU+5rkuUmWtuVLkzynZ5m5bfltSd6X9gqsYcXrWfbi3nUNuY8vacuvT/IvSXYdUMzD0hwfrkvy7SS/25bv2f6Nb27/lq8bZrxhH3va+Xul2Wb/Ygr2kdlJ/v+evn6w331kEPHaedukuczx1jbu7w3wWHdQO++mdv52/eyTg4w5rP1yA/2c1H7Z59/yD3v+jtcl+UWaz/JhHl/HjTnM7SfJ1kkubMuXJTmzZ5lJnRMMKl47f+DnAxvo47COPdsk+bu2/NtJ5vd77BlUzH72y0HFG+L2ur6Ykz0OjPu5mmSXJF9Ksrz9vXNbXtIcP29r1/3MnnWd2tZfnuTU9fRxIDHTHIPWHgOvT3LKsGP2rO9JSVak53xziO/rXkkuS7O/3pxk9hTEfGe7jmVZ/zlsvzGf0f7Nfpae8546yWPsoOJNtJ5h97Gdv8Hj+oDf150yyfPYdZabTKUt8SfJbyV5ZtY9CF+W5Hnt9POTXNFOn5vkrJ4/wpd7lrkwySvb6W2S7NRnzGuSPLudfnmSt7XT+6c54G+bZJ8kt7cb1Yx2+jfaeN9Osv8wY/Ys92dJ/mF9G/UgY072vR3Q+7pHku8l+bW23qeSLBxQH7dP8p+T/El+NXnx//X072trt78BxDw0ye7t9IFJ7u5Z5ptJjkjz4fDPE8UcVLy27AXttrOhf5I2OWaasXzuSXtClOaD7q0DivmEJGPt9G5tnLF2eu1Jyw5pTgTH3S8HEa+f/aPfmD3zL0ry6bQfNhnuPjJ7vG0jfewjg4jXzvvfSd7eTm+V9Se++ok5luYk7OD29ZPzy2PdpPbJQcYc4n45bsz0sV/2E+9xy81Jcnu/286gYg55+/mDJJ/o6dsdabbjSZ8TDCJez3LDOB+YqI/DPPa8OsnftdO/niYRvtUwt5+JYvazXw4qXoa3vU70vvZzHBj3c7Vd5k1t+ZuS/GU7/fw0x8+S5nh6dVu+S5Lvtr93bqd3HnLMpyXZt53ePcnKTHwOO5CYPet7b7v9TJS8GFi8JFckeW47/cQkTxjy+3pUkn/PL/8fuirjJP82MuavJ3lWkrOzbjJhUsfYAcbr53xyIDF71rfB4/ogY6aP89h1lptMpS31J487gU3z1JJT2umXJPmHdvoLSY7pqXd7kqck2THNB+qE35JNIuZD+eXYI3smubmdPjPrfutwaZIj259Le8rXqTeMmO30rCRfTvKc9W3UA+xnX+/tAOLtkeSuNB9uY0kuSXLcIGL2zF+YCT5M2vnvTfLfBhmzLS9J7k+TsNktyXd65r0kyYeGFa99/cQkX09zcFvvP0kD6uPWSVYl2bst/2CS04YQc58kP0ybTHjcvM+n/UAfRrwM8djTvj45TdL2rVk3eTGUfeTx9TZ2HxlEvLaP2w/6fU1zMvbRcZbva58cRMx23lD2y/X0s6/9sp/ttafOO5KcvTHbzqBiDnH7eUmab7bH0iSEbk2zL/Z1TrCp8dp5wzofmKiPwzz2vD/JH/XU+3Laq26Htf2sL2b62C8HFG9Y2+u4MbMRn8896/h8misub0myW1u2W5Jb2ukPJXlJT/1b2vnrHFMfX28YMcdZz7fTJjOGGTPNVXyfyAbONwf0vu6f5OuT3XYGFPPINImwX0uTYFySZL9BxOyp99asm0zo+/+uTYk30XqG2ce2rK/j+gDe177PY9f+GPNiXa9Pcm4p5a4k70qzgSbNQecFSVJKOSzNQXdWmn8mViX5u1LKt0opf1tK2b7PmDclOamd/v00B//klx/Wa61oyyYqH2bMJHlPkjemuTx2Y/Qbc1Pf277i1VrvTvM3vzNNhvyhWutlfcRbX8wNKqXslOR30hw4Bh3z95JcW2v9WZr+r+iZ1+/202+8JHlbkr9K8tM+4mx0zFrrI0n+NM1lqf+R5gP2I4OKWUo5vJSy9nL4P6m1ruldsJQyO81VIVcPMd7Qjj2llCcm+e9pvpF7zBTsI/u0ffm3Usoxj19wI/eRvuK1MZLkbaWUa0spny6lPKWPeOuL+bQktZRyabvuN7blm7pPbkzMZHj75bgxB7BfTuY4cEqaWwrWMeTj62Mxh7z9XJTkJ2n2vTuTvKvWen82/Zyg33jJ8M4Hxo055GPPt5OcWEoZK6Xsk+afv3X+zkPYftYXc1P2y77iDXl7HTfmxh4HHve5+pRa68p21g/SfJmYDPi8eRNj9q7nsDTfKt8+zJillK3SbDt/saE4g4iX5lj/YCnls+1n6bmllBnDjFlrvSrNLQsr259La63LBhRzIn1vP5sYb6L19FN3Y2K+J30e1zcx5kafx0perOtPk7yh1rpnmvsr1x5Qz0myUynluiSnp7kf6NE03wA8M8kHaq2HpvnQfVOfMV+e5FWllKVpLr/5+aZ2YtAxSyn/Nck9tdalUxUzm/7e9tvHndN8AO+T5hK/7UspL+0jXt8xe2KPpTn5fV+t9buDjFlKOSDJXyb54z7XO5B4pZRDkvxmrfUfpzDm1mn25UPT/C2vzy8TkZscs9Z6da31gDSXwZ1ZStmupy1PTPKZJK+vtf5oiPGGeex5a5J311pX91Ye8j6yMslebV/+LMk/lFKe1BN7Y/eRfuONpUlMX1lrfWaaS1LfNaA+jqW5fewP29+/W0o5ts91DyTmkPfLiWJu6n65oePA4Ul+Wmu98XHlwzy+Pj7mMLefw9Kcd+yeZh/881LKb/S57k2ON+TzgYliDvPYc36af0aWpDl5v7JtQ5KhbT/jxhzAftlvH4e5vU7Ux76PA+v7XK3NV7i1zzZv0KBillJ2S/L3SV5Wa13vP4UDiPmqJF+sta7YQL1BxRtLckyaZMmz0txWsXCYMUspT02yX5rtdo8kzxnvy45BxuzXALedSZ9PDuB97fu4PqDtZ6POY8cm28hp4tQkr2unP53kb5Ok/aO8LElKKSXNZS7fTXPJ0opa69qM2EXp8x+IWut3khzXrvtpSf5LO+vurJv9n9WWZT3lw4p5YpoM+vOTbJfkSaWUj9ZaJ33ysBExV2QT3tuNiPfbSb5Xa13VLvPZNPfWfXQAMTfkvCTLa63vmWysycQspcxKM9Dsglrr2oz/3Wn6vFZf289GxDsyybxSyh1pjje/Xkq5otY6f4gxD2mXu72t86kMbr/srbOslLI6zXgbS9qTss8k+Vit9bNDjrdJ+8cGYh6e5IWllHemGUzpF6WUh9PcsjKUfaS9Sudn7fTSUsrtab7VWdIuulH7yEbEW5rmG8+1f79PJ3nFIGKm+Zt9tdZ6bzvvi2k+uD+aTdgnNzLm6gxvv5wo5o/a5TZqv5zEPvLijHPVRYZ0fJ0g5n0Z3vbzB0n+pf3m+p5Syr8nmZfmG8GNPifYiHiHZnjnAxPFrBnesWdNmi+r0s67Ms3tKmsNfPtZT8xnZxP2y42IN7TtdT0xD2nnT+o4MMHn6g9LKbvVWle2yYF72vKJzu/uTjL/ceVXDDlm2oT4F5K8udb6jYniDTDmkUmOKaW8Ks3tR9uUUlbXWn/l/R1QvLEk161N6pVSPpdmfIpxr6QZUMyXJvnG2i9XSin/3Pb7awOIOZH1/T82jHgTraefuv3GPDp9HNcHFHOjz2NdebGu/0jz4ZE09/wsT5pLBksp27Tlr0xzYvajWusPktxVSnl6O+/YNKOvTlop5dfb31sl+Z9p7v9LmpGmX1xK2bY0l9ztm2ZQt2uS7FtK2adt04vbukOLWWs9s9Y6q9Y6u433lX5OVDYy5ia9txvxvt6Z5IhSyhPaBNWxaUa+HUQf17fM29Pc9/X6fmJtKGZpLgf9QprBc/59bf3aXNL1o1LKEW0/F6S5X21Y8T5Qa9293Xb+c5Jb+/kHaWNipvlQ2b+UMrN9/dwM6G/Z7ndj7fTeaQbwvaN9Lz+SZFmt9f/0E2tj4g3z2FNrPabWOrv9m70nyTtqrX+TIe4jpZSZpb3ctDTfJu+bJkG8SftIv/Habwv+Kb880R3kMf3SJHPa928szWfNzZu6T25kzKHtlxPFzCbul+s7vrZlL0pzr3fvMkM5vk4Uc8jbz51pzktSmktrj0gzQvsmnRP0G2/I5wMT9XGYx54ntLFSSnlukjW11pvb18P6fB435qbulxsRb2jb63re10kfB9bzuXpxmi8b0/7+fE/5gtI4Is3tRSvTHJOOK6XsXJqreI5ry4YWs90X/zHJ4lrrReO+eQOOWWv9w1rrXu328xdt7PESF4N6X69Jc1X62r/lczLB9jPAmHcmeXZpbkfaOs3ny6C2n4lM6hg7qHjrWU8/dfuK2c9xfYAxN/48tm7EQCtbwk+ab0tWJnkkTfbnFWk+LJamuVfv6iRz27pHpskY35ImQ71zz3oOSfPt4PVJPpcJRjBeT8zXteu+Nc3tKaWn/pvT3CN3S3pGuU4zINqt7bw3b0Q/+47ZM39+Njy6+KD6Oan3doDx/neaE6Ub01zmt+0A+3hHmgElV7f190+Tva1pDrzXtT+vHETMNCcRP8m6j/P79XbevLaPtyf5m952DiNez/pmZ8MDkA2qj3/Svq/Xpzk5e/KAYv5Rmnt8r0tybZKT2/L/3P4tr+9py/OHFW/Yx56e5d6adQdYGso+kmbMkt5+/k5bPul9ZBDx2nl7J/lq+75+Oc3tJYM6Drw0v3zc4zt7yie1Tw4y5rD2yw30c1L75UbEm5/m27jedQzt+DpRzGFuP2m+Sf10+77enOSMnvVM6pxgUPEe9x4M7HxgA30c1rFndppzgWVJ/jXJ3sPefiaK2c9+Oah4Gd72ur6Ykz0OjPu5mmYw1y+n+YLxX/PLgWRLmoFCb08zpsa8nnW9PM1jLm9LcwvHRH0cSMw0x8BHsu75ySHD7mfPOhdm4qeNDPJ9fW67nhuSXJBkmyG/rzPSDOa5LM0x4v8M8G/5n9Js1z9K8mA7vfbRvxs8xg4q3kTrGXYfJ3tcH/D7ekgmeR7b+7P2IAMAAADQSW4bAQAAADpN8gIAAADoNMkLAAAAoNMkLwAAAIBOk7wAAAAAOk3yAgDolNL4einleT1lv19K+ZdRtgsAGB2PSgUAOqeUcmCSTyc5NMlYkm8lOaHWevtGrGus1rpmwE0EAKaQ5AUA0EmllHcm+UmS7dvfeyc5MMnWSd5aa/18KWV2kr9v6yTJa2qtV5ZS5id5W5IHkjyj1vq0qW09ADBIkhcAQCeVUrZPcm2Snye5JMlNtdaPllJ2SvLNNFdl1CS/qLU+XErZN8nHa63z2uTFF5IcWGv93ijaDwAMztioGwAAMJ5a609KKZ9MsjrJi5L8TinlL9rZ2yXZK8l/JPmbUsohSR5N0nuFxTclLgBgyyB5AQB02S/an5Lk92qtt/TOLKW8NckPkxycZiDyh3tm/2SK2ggADJmnjQAAm4NLk5xeSilJUko5tC3fMcnKWusvkvxRkhkjah8AMESSFwDA5uBtaQbqvL6UclP7Okn+b5JTSynfTvKMuNoCALZIBuwEAAAAOs2VFwAAAECnSV4AAAAAnSZ5AQAAAHSa5AUAAADQaZIXAAAAQKdJXgAAAACdJnkBAAAAdJrkBQAAANBp/w/FFsK2JJrjNAAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Кол-во мужчин и женщин с Gold по возрасту\n", + "plt.figure(figsize=(18, 8))\n", + "sns.countplot(data=df, x='Year', hue='Season')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Зимние виды спорта проводятся реже. А до 1924 они вообще не проводились." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Корреляции" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [ + { + "data": { + "text/plain": " ID Age Height Weight Year\nID 1.0000 -0.0036 -0.0111 -0.0092 0.0119\nAge -0.0036 1.0000 0.1382 0.2121 -0.1151\nHeight -0.0111 0.1382 1.0000 0.7962 0.0476\nWeight -0.0092 0.2121 0.7962 1.0000 0.0191\nYear 0.0119 -0.1151 0.0476 0.0191 1.0000", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
IDAgeHeightWeightYear
ID1.0000-0.0036-0.0111-0.00920.0119
Age-0.00361.00000.13820.2121-0.1151
Height-0.01110.13821.00000.79620.0476
Weight-0.00920.21210.79621.00000.0191
Year0.0119-0.11510.04760.01911.0000
\n
" + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 18, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Посмотрим на тепловую карту\n", + "plt.figure(figsize=(8,8))\n", + "sns.heatmap(df.corr(), annot=True, cmap=\"YlGnBu\", cbar=False);" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "* Коэф. корреляции между Height и Weight равен 0.8, что ожидаемо\n", + "* Также немного коррелируют между собой Age и Height или Weight\n", + "* Остальные данные вообще не коррелируют между собой" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df.plot.scatter(x='Height', y='Weight')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 234, + "outputs": [ + { + "data": { + "text/plain": "Team\nUnited States 5219\nSoviet Union 2451\nGermany 1984\nGreat Britain 1673\nFrance 1550\n ... \nUnited States-1 101\nNigeria 99\nTurkey 95\nSerbia 85\nKazakhstan 77\nName: count, Length: 50, dtype: object" + }, + "execution_count": 234, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Количество медалей по командам\n", + "plt.figure(figsize=(18,9))\n", + "medals = df.groupby('Team')['Medal'].describe(include=object)['count'].sort_values(ascending=False)[:50]\n", + "medals.plot()\n", + "medals" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Количество полученных медалей 50 лучших команд.\n", + "Видно, что United States получили больше всех - 5219, а второе место аж в два раза меньше - 2451." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Интересные факты\n", + "* Возраст самого старого спортсмена 97 лет\n", + "* Возраст самого молодого - 10 лет\n", + "* Наименьший вес спортсмена - 25 кг\n", + "* Раньше спортсменок почти не было, но к настоящему моменту наблюдается тенденция равного кол-ва мужчин и женщин\n", + "* До 1992 года олимпиады проводились каждые 4 года, но начиная с 1992 стали проводить их каждые 2 года, причем каждая вторая олимпиада была менее \"масштабной\".\n", + "* United States выигрывали медали 5219 раз. Это более чем в два раза больше следующей по счету команды." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + } + ], + "metadata": { + "anaconda-cloud": {}, + "colab": { + "collapsed_sections": [ + "UTKVH3sMutTM", + "tiFgQjEcxnu2", + "qy4yj--r07RL", + "USQjKMAIETO8", + "QeOBRH60Wf1F", + "qwyKgedLeIMT", + "u4vIuhNgeNZx", + "nmb5O7vgWrnf", + "v9NMrXW4keP5", + "j_OKzzgAmiW-", + "CYdb_HyBnc-E", + "O1oGo8x3qXN0", + "sH1avRRhrWF2", + "NBLI-3pesJ0m", + "gv3ndpEhssD5", + "cDEsUU-Zu4cS", + "yzb09-GB33KV", + "APcKwPUddmDr", + "QaznDKFNv6jw", + "oWsKclsog0hC", + "mdP3P92kXxsu", + "ZqIWfFVLXRfZ", + "L1jG1C3BX5Cx", + "WTgnPj6raOzL" + ], + "name": "01_Baseline_example.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git "a/\320\237\321\200\320\265\320\264\320\276\320\261\321\200\320\260\320\261\320\276\321\202\320\272\320\260 \320\264\320\260\320\275\320\275\321\213\321\205, \320\277\320\276\321\201\321\202\321\200\320\276\320\265\320\275\320\275\320\270\320\265 \320\277\321\200\320\276\321\201\321\202\320\276\320\271 \320\274\320\276\320\264\320\265\320\273\320\270, \320\272\320\276\320\275\320\262\320\265\320\271\320\265\321\200\320\270\320\267\320\260\321\206\320\270\321\217.ipynb" "b/\320\237\321\200\320\265\320\264\320\276\320\261\321\200\320\260\320\261\320\276\321\202\320\272\320\260 \320\264\320\260\320\275\320\275\321\213\321\205, \320\277\320\276\321\201\321\202\321\200\320\276\320\265\320\275\320\275\320\270\320\265 \320\277\321\200\320\276\321\201\321\202\320\276\320\271 \320\274\320\276\320\264\320\265\320\273\320\270, \320\272\320\276\320\275\320\262\320\265\320\271\320\265\321\200\320\270\320\267\320\260\321\206\320\270\321\217.ipynb" index ea3bddb..c6e30a5 100644 --- "a/\320\237\321\200\320\265\320\264\320\276\320\261\321\200\320\260\320\261\320\276\321\202\320\272\320\260 \320\264\320\260\320\275\320\275\321\213\321\205, \320\277\320\276\321\201\321\202\321\200\320\276\320\265\320\275\320\275\320\270\320\265 \320\277\321\200\320\276\321\201\321\202\320\276\320\271 \320\274\320\276\320\264\320\265\320\273\320\270, \320\272\320\276\320\275\320\262\320\265\320\271\320\265\321\200\320\270\320\267\320\260\321\206\320\270\321\217.ipynb" +++ "b/\320\237\321\200\320\265\320\264\320\276\320\261\321\200\320\260\320\261\320\276\321\202\320\272\320\260 \320\264\320\260\320\275\320\275\321\213\321\205, \320\277\320\276\321\201\321\202\321\200\320\276\320\265\320\275\320\275\320\270\320\265 \320\277\321\200\320\276\321\201\321\202\320\276\320\271 \320\274\320\276\320\264\320\265\320\273\320\270, \320\272\320\276\320\275\320\262\320\265\320\271\320\265\321\200\320\270\320\267\320\260\321\206\320\270\321\217.ipynb" @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 1, "metadata": { "executionInfo": { "elapsed": 324, @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 2, "metadata": { "executionInfo": { "elapsed": 585, @@ -96,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "id": "SU42i9kWq3H0" }, @@ -108,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -131,113 +131,16 @@ "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATtarget
5020.045270.011.930.00.5736.12076.72.28751.0273.021.0396.99.0820.6
130.629760.08.140.00.5385.94961.84.70754.0307.021.0396.98.2620.4
820.0365925.04.860.00.4266.30232.25.40074.0281.019.0396.96.7224.8
\n", - "
" - ], - "text/plain": [ - " CRIM ZN INDUS CHAS NOX ... TAX PTRATIO B LSTAT target\n", - "502 0.04527 0.0 11.93 0.0 0.573 ... 273.0 21.0 396.9 9.08 20.6\n", - "13 0.62976 0.0 8.14 0.0 0.538 ... 307.0 21.0 396.9 8.26 20.4\n", - "82 0.03659 25.0 4.86 0.0 0.426 ... 281.0 19.0 396.9 6.72 24.8\n", - "\n", - "[3 rows x 14 columns]" - ] + "text/plain": " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n292 0.03615 80.0 4.95 0.0 0.411 6.630 23.4 5.1167 4.0 245.0 \n444 12.80230 0.0 18.10 0.0 0.740 5.854 96.6 1.8956 24.0 666.0 \n336 0.03427 0.0 5.19 0.0 0.515 5.869 46.3 5.2311 5.0 224.0 \n473 4.64689 0.0 18.10 0.0 0.614 6.980 67.6 2.5329 24.0 666.0 \n467 4.42228 0.0 18.10 0.0 0.584 6.003 94.5 2.5403 24.0 666.0 \n\n PTRATIO B LSTAT target \n292 19.2 396.90 4.70 27.9 \n444 20.2 240.52 23.79 10.8 \n336 20.2 396.90 9.80 19.5 \n473 20.2 374.68 11.66 29.8 \n467 20.2 331.29 21.32 19.1 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATtarget
2920.0361580.04.950.00.4116.63023.45.11674.0245.019.2396.904.7027.9
44412.802300.018.100.00.7405.85496.61.895624.0666.020.2240.5223.7910.8
3360.034270.05.190.00.5155.86946.35.23115.0224.020.2396.909.8019.5
4734.646890.018.100.00.6146.98067.62.532924.0666.020.2374.6811.6629.8
4674.422280.018.100.00.5846.00394.52.540324.0666.020.2331.2921.3219.1
\n
" }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.sample(3)" + "df.sample(5)" ] }, { @@ -605,7 +508,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -627,11 +530,9 @@ "outputs": [ { "data": { - "text/plain": [ - "(0.0, 1.0, 0.42220830944694204, 0.32131773862477314)" - ] + "text/plain": "(0.0, 1.0, 0.42220830944694204, 0.32131773862477314)" }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -639,7 +540,7 @@ "source": [ "min_max_scaler = preprocessing.MinMaxScaler()\n", "min_max_feature = min_max_scaler.fit_transform(df['TAX'].values.reshape(-1, 1))\n", - "min_max_feature.min(), min_max_feature.max() , min_max_feature.mean() , min_max_feature.std() " + "min_max_feature.min(), min_max_feature.max() , min_max_feature.mean() , min_max_feature.std()" ] }, { @@ -7831,4 +7732,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} +} \ No newline at end of file