diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 8a9fa9e..0b329bb 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,11 +12,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# Import your libraries:\n" + "# Import your libraries:\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.model_selection import train_test_split" ] }, { @@ -37,11 +42,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes = load_diabetes()\n" ] }, { @@ -53,11 +59,91 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': array([[ 0.03807591, 0.05068012, 0.06169621, ..., -0.00259226,\n", + " 0.01990842, -0.01764613],\n", + " [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n", + " -0.06832974, -0.09220405],\n", + " [ 0.08529891, 0.05068012, 0.04445121, ..., -0.00259226,\n", + " 0.00286377, -0.02593034],\n", + " ...,\n", + " [ 0.04170844, 0.05068012, -0.01590626, ..., -0.01107952,\n", + " -0.04687948, 0.01549073],\n", + " [-0.04547248, -0.04464164, 0.03906215, ..., 0.02655962,\n", + " 0.04452837, -0.02593034],\n", + " [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n", + " -0.00421986, 0.00306441]]),\n", + " 'target': array([151., 75., 141., 206., 135., 97., 138., 63., 110., 310., 101.,\n", + " 69., 179., 185., 118., 171., 166., 144., 97., 168., 68., 49.,\n", + " 68., 245., 184., 202., 137., 85., 131., 283., 129., 59., 341.,\n", + " 87., 65., 102., 265., 276., 252., 90., 100., 55., 61., 92.,\n", + " 259., 53., 190., 142., 75., 142., 155., 225., 59., 104., 182.,\n", + " 128., 52., 37., 170., 170., 61., 144., 52., 128., 71., 163.,\n", + " 150., 97., 160., 178., 48., 270., 202., 111., 85., 42., 170.,\n", + " 200., 252., 113., 143., 51., 52., 210., 65., 141., 55., 134.,\n", + " 42., 111., 98., 164., 48., 96., 90., 162., 150., 279., 92.,\n", + " 83., 128., 102., 302., 198., 95., 53., 134., 144., 232., 81.,\n", + " 104., 59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,\n", + " 173., 180., 84., 121., 161., 99., 109., 115., 268., 274., 158.,\n", + " 107., 83., 103., 272., 85., 280., 336., 281., 118., 317., 235.,\n", + " 60., 174., 259., 178., 128., 96., 126., 288., 88., 292., 71.,\n", + " 197., 186., 25., 84., 96., 195., 53., 217., 172., 131., 214.,\n", + " 59., 70., 220., 268., 152., 47., 74., 295., 101., 151., 127.,\n", + " 237., 225., 81., 151., 107., 64., 138., 185., 265., 101., 137.,\n", + " 143., 141., 79., 292., 178., 91., 116., 86., 122., 72., 129.,\n", + " 142., 90., 158., 39., 196., 222., 277., 99., 196., 202., 155.,\n", + " 77., 191., 70., 73., 49., 65., 263., 248., 296., 214., 185.,\n", + " 78., 93., 252., 150., 77., 208., 77., 108., 160., 53., 220.,\n", + " 154., 259., 90., 246., 124., 67., 72., 257., 262., 275., 177.,\n", + " 71., 47., 187., 125., 78., 51., 258., 215., 303., 243., 91.,\n", + " 150., 310., 153., 346., 63., 89., 50., 39., 103., 308., 116.,\n", + " 145., 74., 45., 115., 264., 87., 202., 127., 182., 241., 66.,\n", + " 94., 283., 64., 102., 200., 265., 94., 230., 181., 156., 233.,\n", + " 60., 219., 80., 68., 332., 248., 84., 200., 55., 85., 89.,\n", + " 31., 129., 83., 275., 65., 198., 236., 253., 124., 44., 172.,\n", + " 114., 142., 109., 180., 144., 163., 147., 97., 220., 190., 109.,\n", + " 191., 122., 230., 242., 248., 249., 192., 131., 237., 78., 135.,\n", + " 244., 199., 270., 164., 72., 96., 306., 91., 214., 95., 216.,\n", + " 263., 178., 113., 200., 139., 139., 88., 148., 88., 243., 71.,\n", + " 77., 109., 272., 60., 54., 221., 90., 311., 281., 182., 321.,\n", + " 58., 262., 206., 233., 242., 123., 167., 63., 197., 71., 168.,\n", + " 140., 217., 121., 235., 245., 40., 52., 104., 132., 88., 69.,\n", + " 219., 72., 201., 110., 51., 277., 63., 118., 69., 273., 258.,\n", + " 43., 198., 242., 232., 175., 93., 168., 275., 293., 281., 72.,\n", + " 140., 189., 181., 209., 136., 261., 113., 131., 174., 257., 55.,\n", + " 84., 42., 146., 212., 233., 91., 111., 152., 120., 67., 310.,\n", + " 94., 183., 66., 173., 72., 49., 64., 48., 178., 104., 132.,\n", + " 220., 57.]),\n", + " 'frame': None,\n", + " 'DESCR': '.. _diabetes_dataset:\\n\\nDiabetes dataset\\n----------------\\n\\nTen baseline variables, age, sex, body mass index, average blood\\npressure, and six blood serum measurements were obtained for each of n =\\n442 diabetes patients, as well as the response of interest, a\\nquantitative measure of disease progression one year after baseline.\\n\\n**Data Set Characteristics:**\\n\\n :Number of Instances: 442\\n\\n :Number of Attributes: First 10 columns are numeric predictive values\\n\\n :Target: Column 11 is a quantitative measure of disease progression one year after baseline\\n\\n :Attribute Information:\\n - age age in years\\n - sex\\n - bmi body mass index\\n - bp average blood pressure\\n - s1 tc, total serum cholesterol\\n - s2 ldl, low-density lipoproteins\\n - s3 hdl, high-density lipoproteins\\n - s4 tch, total cholesterol / HDL\\n - s5 ltg, possibly log of serum triglycerides level\\n - s6 glu, blood sugar level\\n\\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\\n\\nSource URL:\\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\\n\\nFor more information see:\\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)',\n", + " 'feature_names': ['age',\n", + " 'sex',\n", + " 'bmi',\n", + " 'bp',\n", + " 's1',\n", + " 's2',\n", + " 's3',\n", + " 's4',\n", + " 's5',\n", + " 's6'],\n", + " 'data_filename': 'diabetes_data.csv.gz',\n", + " 'target_filename': 'diabetes_target.csv.gz',\n", + " 'data_module': 'sklearn.datasets.data'}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "diabetes" ] }, { @@ -73,13 +159,59 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "scrolled": false }, - "outputs": [], - "source": [ - "# Your code here:\n" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _diabetes_dataset:\n", + "\n", + "Diabetes dataset\n", + "----------------\n", + "\n", + "Ten baseline variables, age, sex, body mass index, average blood\n", + "pressure, and six blood serum measurements were obtained for each of n =\n", + "442 diabetes patients, as well as the response of interest, a\n", + "quantitative measure of disease progression one year after baseline.\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 442\n", + "\n", + " :Number of Attributes: First 10 columns are numeric predictive values\n", + "\n", + " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n", + "\n", + " :Attribute Information:\n", + " - age age in years\n", + " - sex\n", + " - bmi body mass index\n", + " - bp average blood pressure\n", + " - s1 tc, total serum cholesterol\n", + " - s2 ldl, low-density lipoproteins\n", + " - s3 hdl, high-density lipoproteins\n", + " - s4 tch, total cholesterol / HDL\n", + " - s5 ltg, possibly log of serum triglycerides level\n", + " - s6 glu, blood sugar level\n", + "\n", + "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n", + "\n", + "Source URL:\n", + "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n", + "\n", + "For more information see:\n", + "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n", + "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "print(diabetes[\"DESCR\"])\n" ] }, { @@ -97,11 +229,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "# Enter your answer here:\n" + "# Enter your answer here:\n", + "#10 attributes\n", + "# data is all the columns(features) in the dataset that influence the target\n", + "#442 dtapoints" ] }, { @@ -115,11 +250,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "(442, 1)\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "features = pd.DataFrame(diabetes[\"data\"])\n", + "labels = pd.DataFrame(diabetes[\"target\"])\n", + "\n", + "print(features.shape)\n", + "print(target.shape)" ] }, { @@ -156,11 +305,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 108, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "from sklearn import linear_model\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import classification_report, confusion_matrix" ] }, { @@ -172,11 +324,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 109, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "\n", + "\n", + "model = LinearRegression()\n", + "\n" ] }, { @@ -190,11 +346,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 110, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "x_train, x_test, y_train, y_test = train_test_split(features, labels)" ] }, { @@ -206,11 +363,75 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 111, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "model.fit(x_train,y_train)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-0.01516906, -0.04445468, -0.01195425, ..., -0.03952806,\n", + " -0.02144627, -0.03425455],\n", + " [ 0.0030313 , -0.04440727, -0.00971306, ..., -0.0394498 ,\n", + " -0.06458319, -0.05484579],\n", + " [ 0.00980782, 0.00670042, -0.06611101, ..., -0.11478127,\n", + " -0.13227327, 0.01067851],\n", + " ...,\n", + " [ 0.03526416, 0.05040808, 0.10924208, ..., -0.00322748,\n", + " 0.02739389, 0.07282956],\n", + " [ 0.03526328, 0.05031124, 0.12313841, ..., 0.107163 ,\n", + " -0.0004295 , 0.0272642 ],\n", + " [-0.04795658, -0.04460567, 0.15884403, ..., 0.03372178,\n", + " 0.02752319, 0.01094463]])" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "59.878661866524354" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.intercept_" ] }, { @@ -231,11 +452,43 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([80.28644663, 76.24775398, 75.94342497, 71.24920284, 83.83190315,\n", + " 73.03865226, 74.10635141, 82.82065036, 77.01113304, 75.1250318 ,\n", + " 76.15226139, 80.20834626, 75.1980536 , 73.74698033, 81.27442865,\n", + " 76.1232762 , 79.44519564, 75.11011399, 77.62254583, 72.07568619,\n", + " 78.50739308, 76.32643475, 78.01828843, 75.04845961, 76.74239034,\n", + " 74.66162681, 71.92159576, 73.82414604, 77.10259969, 74.09871317,\n", + " 78.75724739, 79.67262333, 78.4384927 , 73.3741055 , 76.4609047 ,\n", + " 76.39797403, 75.78480016, 75.94117835, 77.41954397, 73.77477612,\n", + " 75.20203033, 75.39317823, 73.51780167, 74.13849327, 80.27009619,\n", + " 73.82509068, 77.34199622, 75.28781048, 72.66567114, 76.33086219,\n", + " 72.73168368, 77.36624577, 75.52243031, 71.54823886, 72.3036273 ,\n", + " 76.71404463, 72.91510167, 75.32069756, 75.5718132 , 78.72895184,\n", + " 75.71382028, 78.87122116, 78.85659141, 74.24352312, 73.95493681,\n", + " 77.64052524, 76.13561755, 78.58729432, 74.60411978, 75.03890064,\n", + " 74.52162772, 75.8939553 , 77.27938514, 73.62434487, 74.44526203,\n", + " 75.18721185, 76.68844159, 74.84410759, 73.29602297, 75.38571926,\n", + " 73.82486715, 75.22650518, 78.34618773, 78.82554997, 77.27009538,\n", + " 75.79374482, 72.42991909, 75.45398028, 83.2747976 , 80.51917031,\n", + " 75.83942444, 76.61833215, 76.6107049 , 75.78285371, 74.12854871,\n", + " 76.40894251, 79.84332333, 74.90664293])" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "pred = model.predict(x_test)\n", + "pred" ] }, { @@ -247,11 +500,34 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "343 81\n", + "224 77\n", + "272 78\n", + "111 73\n", + "326 80\n", + " ..\n", + "148 74\n", + "261 78\n", + "332 80\n", + "333 80\n", + "334 80\n", + "Name: model_year, Length: 98, dtype: int64" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "y_test" ] }, { @@ -263,11 +539,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ - "# Your explanation here:\n" + "# Your explanation here:\n", + "#no, but seems to be very close to it" ] }, { @@ -351,11 +628,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 79, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "auto = pd.read_csv(\"../auto-mpg.csv\")" ] }, { @@ -367,11 +645,124 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mpgcylindersdisplacementhorse_powerweightaccelerationmodel_yearcar_name
018.08307.0130.0350412.070\\t\"chevrolet chevelle malibu\"
115.08350.0165.0369311.570\\t\"buick skylark 320\"
218.08318.0150.0343611.070\\t\"plymouth satellite\"
316.08304.0150.0343312.070\\t\"amc rebel sst\"
417.08302.0140.0344910.570\\t\"ford torino\"
\n", + "
" + ], + "text/plain": [ + " mpg cylinders displacement horse_power weight acceleration \\\n", + "0 18.0 8 307.0 130.0 3504 12.0 \n", + "1 15.0 8 350.0 165.0 3693 11.5 \n", + "2 18.0 8 318.0 150.0 3436 11.0 \n", + "3 16.0 8 304.0 150.0 3433 12.0 \n", + "4 17.0 8 302.0 140.0 3449 10.5 \n", + "\n", + " model_year car_name \n", + "0 70 \\t\"chevrolet chevelle malibu\" \n", + "1 70 \\t\"buick skylark 320\" \n", + "2 70 \\t\"plymouth satellite\" \n", + "3 70 \\t\"amc rebel sst\" \n", + "4 70 \\t\"ford torino\" " + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "auto.head()" ] }, { @@ -383,11 +774,31 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mpg float64\n", + "cylinders int64\n", + "displacement float64\n", + "horse_power float64\n", + "weight int64\n", + "acceleration float64\n", + "model_year int64\n", + "car_name object\n", + "dtype: object" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "auto.dtypes" ] }, { @@ -399,11 +810,481 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 90, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "70\n", + "82\n" + ] + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "print(auto[\"model_year\"].min())\n", + "print(auto[\"model_year\"].max())" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mpgcylindersdisplacementhorse_powerweightaccelerationmodel_yearcar_name
36728.04112.088.0260519.682\\t\"chevrolet cavalier\"
36827.04112.088.0264018.682\\t\"chevrolet cavalier wagon\"
36934.04112.088.0239518.082\\t\"chevrolet cavalier 2-door\"
37031.04112.085.0257516.282\\t\"pontiac j2000 se hatchback\"
37129.04135.084.0252516.082\\t\"dodge aries se\"
37227.04151.090.0273518.082\\t\"pontiac phoenix\"
37324.04140.092.0286516.482\\t\"ford fairmont futura\"
37423.04151.0NaN303520.582\\t\"amc concord dl\"
37536.04105.074.0198015.382\\t\"volkswagen rabbit l\"
37637.0491.068.0202518.282\\t\"mazda glc custom l\"
37731.0491.068.0197017.682\\t\"mazda glc custom\"
37838.04105.063.0212514.782\\t\"plymouth horizon miser\"
37936.0498.070.0212517.382\\t\"mercury lynx l\"
38036.04120.088.0216014.582\\t\"nissan stanza xe\"
38136.04107.075.0220514.582\\t\"honda accord\"
38234.04108.070.0224516.982\\t\"toyota corolla\"
38338.0491.067.0196515.082\\t\"honda civic\"
38432.0491.067.0196515.782\\t\"honda civic (auto)\"
38538.0491.067.0199516.282\\t\"datsun 310 gx\"
38625.06181.0110.0294516.482\\t\"buick century limited\"
38738.06262.085.0301517.082\\t\"oldsmobile cutlass ciera (diesel)\"
38826.04156.092.0258514.582\\t\"chrysler lebaron medallion\"
38922.06232.0112.0283514.782\\t\"ford granada l\"
39032.04144.096.0266513.982\\t\"toyota celica gt\"
39136.04135.084.0237013.082\\t\"dodge charger 2.2\"
39227.04151.090.0295017.382\\t\"chevrolet camaro\"
39327.04140.086.0279015.682\\t\"ford mustang gl\"
39444.0497.052.0213024.682\\t\"vw pickup\"
39532.04135.084.0229511.682\\t\"dodge rampage\"
39628.04120.079.0262518.682\\t\"ford ranger\"
39731.04119.082.0272019.482\\t\"chevy s-10\"
\n", + "
" + ], + "text/plain": [ + " mpg cylinders displacement horse_power weight acceleration \\\n", + "367 28.0 4 112.0 88.0 2605 19.6 \n", + "368 27.0 4 112.0 88.0 2640 18.6 \n", + "369 34.0 4 112.0 88.0 2395 18.0 \n", + "370 31.0 4 112.0 85.0 2575 16.2 \n", + "371 29.0 4 135.0 84.0 2525 16.0 \n", + "372 27.0 4 151.0 90.0 2735 18.0 \n", + "373 24.0 4 140.0 92.0 2865 16.4 \n", + "374 23.0 4 151.0 NaN 3035 20.5 \n", + "375 36.0 4 105.0 74.0 1980 15.3 \n", + "376 37.0 4 91.0 68.0 2025 18.2 \n", + "377 31.0 4 91.0 68.0 1970 17.6 \n", + "378 38.0 4 105.0 63.0 2125 14.7 \n", + "379 36.0 4 98.0 70.0 2125 17.3 \n", + "380 36.0 4 120.0 88.0 2160 14.5 \n", + "381 36.0 4 107.0 75.0 2205 14.5 \n", + "382 34.0 4 108.0 70.0 2245 16.9 \n", + "383 38.0 4 91.0 67.0 1965 15.0 \n", + "384 32.0 4 91.0 67.0 1965 15.7 \n", + "385 38.0 4 91.0 67.0 1995 16.2 \n", + "386 25.0 6 181.0 110.0 2945 16.4 \n", + "387 38.0 6 262.0 85.0 3015 17.0 \n", + "388 26.0 4 156.0 92.0 2585 14.5 \n", + "389 22.0 6 232.0 112.0 2835 14.7 \n", + "390 32.0 4 144.0 96.0 2665 13.9 \n", + "391 36.0 4 135.0 84.0 2370 13.0 \n", + "392 27.0 4 151.0 90.0 2950 17.3 \n", + "393 27.0 4 140.0 86.0 2790 15.6 \n", + "394 44.0 4 97.0 52.0 2130 24.6 \n", + "395 32.0 4 135.0 84.0 2295 11.6 \n", + "396 28.0 4 120.0 79.0 2625 18.6 \n", + "397 31.0 4 119.0 82.0 2720 19.4 \n", + "\n", + " model_year car_name \n", + "367 82 \\t\"chevrolet cavalier\" \n", + "368 82 \\t\"chevrolet cavalier wagon\" \n", + "369 82 \\t\"chevrolet cavalier 2-door\" \n", + "370 82 \\t\"pontiac j2000 se hatchback\" \n", + "371 82 \\t\"dodge aries se\" \n", + "372 82 \\t\"pontiac phoenix\" \n", + "373 82 \\t\"ford fairmont futura\" \n", + "374 82 \\t\"amc concord dl\" \n", + "375 82 \\t\"volkswagen rabbit l\" \n", + "376 82 \\t\"mazda glc custom l\" \n", + "377 82 \\t\"mazda glc custom\" \n", + "378 82 \\t\"plymouth horizon miser\" \n", + "379 82 \\t\"mercury lynx l\" \n", + "380 82 \\t\"nissan stanza xe\" \n", + "381 82 \\t\"honda accord\" \n", + "382 82 \\t\"toyota corolla\" \n", + "383 82 \\t\"honda civic\" \n", + "384 82 \\t\"honda civic (auto)\" \n", + "385 82 \\t\"datsun 310 gx\" \n", + "386 82 \\t\"buick century limited\" \n", + "387 82 \\t\"oldsmobile cutlass ciera (diesel)\" \n", + "388 82 \\t\"chrysler lebaron medallion\" \n", + "389 82 \\t\"ford granada l\" \n", + "390 82 \\t\"toyota celica gt\" \n", + "391 82 \\t\"dodge charger 2.2\" \n", + "392 82 \\t\"chevrolet camaro\" \n", + "393 82 \\t\"ford mustang gl\" \n", + "394 82 \\t\"vw pickup\" \n", + "395 82 \\t\"dodge rampage\" \n", + "396 82 \\t\"ford ranger\" \n", + "397 82 \\t\"chevy s-10\" " + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "auto[auto[\"model_year\"]==82]" ] }, { @@ -415,11 +1296,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 94, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "auto_prep = auto.dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mpg 0\n", + "cylinders 0\n", + "displacement 0\n", + "horse_power 0\n", + "weight 0\n", + "acceleration 0\n", + "model_year 0\n", + "car_name 0\n", + "dtype: int64" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "auto_prep.isna().sum()" ] }, { @@ -431,11 +1341,28 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4 199\n", + "8 103\n", + "6 83\n", + "3 4\n", + "5 3\n", + "Name: cylinders, dtype: int64" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "auto_prep[\"cylinders\"].value_counts()" ] }, { @@ -451,11 +1378,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Vasco Nicolau\\AppData\\Local\\Temp\\ipykernel_28036\\791769897.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " auto_prep.drop(columns = \"car_name\", inplace = True)\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "auto_prep.drop(columns = \"car_name\", inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 134, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "features = auto_prep.drop(columns = \"model_year\")\n", + "labels = auto_prep[\"model_year\"]\n", + "model = LinearRegression()\n", + "x_train, x_test, y_train, y_test = train_test_split(features, labels)\n" ] }, { @@ -469,11 +1421,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 135, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "auto_model = model.fit(x_train,y_train)" ] }, { @@ -493,11 +1446,28 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.4716051324906728" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "from sklearn.metrics import r2_score\n", + "y_pred = model.predict(x_train)\n", + "\n", + "r_squared = r2_score(y_train, y_pred)\n", + "\n", + "r_squared" ] }, { @@ -513,11 +1483,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.4721030493247429" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "y_test_pred = model.predict(x_test)\n", + "\n", + "r_squared_test = r2_score(y_test, y_test_pred)\n", + "\n", + "r_squared_test" ] }, { @@ -542,11 +1528,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 165, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here: \n", + "x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size = 0.10)" ] }, { @@ -558,11 +1545,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 166, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "model = LinearRegression()\n", + "auto_model09 = model.fit(x_train,y_train)" ] }, { @@ -574,11 +1563,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.2730588325889347" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "y_test_pred = model.predict(x_test)\n", + "\n", + "r_squared_test = r2_score(y_test, y_test_pred)\n", + "\n", + "r_squared_test" ] }, { @@ -590,11 +1595,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 168, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "#it decreased, instead of being somewhere arounds 0.4/0.5 it is around 0.30" ] }, { @@ -703,7 +1709,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -717,7 +1723,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.9.13" } }, "nbformat": 4,