diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 8a9fa9e..0b329bb 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,11 +12,16 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "# Import your libraries:\n"
+ "# Import your libraries:\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "from sklearn.datasets import load_diabetes\n",
+ "from sklearn.model_selection import train_test_split"
]
},
{
@@ -37,11 +42,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "diabetes = load_diabetes()\n"
]
},
{
@@ -53,11 +59,91 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'data': array([[ 0.03807591, 0.05068012, 0.06169621, ..., -0.00259226,\n",
+ " 0.01990842, -0.01764613],\n",
+ " [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n",
+ " -0.06832974, -0.09220405],\n",
+ " [ 0.08529891, 0.05068012, 0.04445121, ..., -0.00259226,\n",
+ " 0.00286377, -0.02593034],\n",
+ " ...,\n",
+ " [ 0.04170844, 0.05068012, -0.01590626, ..., -0.01107952,\n",
+ " -0.04687948, 0.01549073],\n",
+ " [-0.04547248, -0.04464164, 0.03906215, ..., 0.02655962,\n",
+ " 0.04452837, -0.02593034],\n",
+ " [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n",
+ " -0.00421986, 0.00306441]]),\n",
+ " 'target': array([151., 75., 141., 206., 135., 97., 138., 63., 110., 310., 101.,\n",
+ " 69., 179., 185., 118., 171., 166., 144., 97., 168., 68., 49.,\n",
+ " 68., 245., 184., 202., 137., 85., 131., 283., 129., 59., 341.,\n",
+ " 87., 65., 102., 265., 276., 252., 90., 100., 55., 61., 92.,\n",
+ " 259., 53., 190., 142., 75., 142., 155., 225., 59., 104., 182.,\n",
+ " 128., 52., 37., 170., 170., 61., 144., 52., 128., 71., 163.,\n",
+ " 150., 97., 160., 178., 48., 270., 202., 111., 85., 42., 170.,\n",
+ " 200., 252., 113., 143., 51., 52., 210., 65., 141., 55., 134.,\n",
+ " 42., 111., 98., 164., 48., 96., 90., 162., 150., 279., 92.,\n",
+ " 83., 128., 102., 302., 198., 95., 53., 134., 144., 232., 81.,\n",
+ " 104., 59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,\n",
+ " 173., 180., 84., 121., 161., 99., 109., 115., 268., 274., 158.,\n",
+ " 107., 83., 103., 272., 85., 280., 336., 281., 118., 317., 235.,\n",
+ " 60., 174., 259., 178., 128., 96., 126., 288., 88., 292., 71.,\n",
+ " 197., 186., 25., 84., 96., 195., 53., 217., 172., 131., 214.,\n",
+ " 59., 70., 220., 268., 152., 47., 74., 295., 101., 151., 127.,\n",
+ " 237., 225., 81., 151., 107., 64., 138., 185., 265., 101., 137.,\n",
+ " 143., 141., 79., 292., 178., 91., 116., 86., 122., 72., 129.,\n",
+ " 142., 90., 158., 39., 196., 222., 277., 99., 196., 202., 155.,\n",
+ " 77., 191., 70., 73., 49., 65., 263., 248., 296., 214., 185.,\n",
+ " 78., 93., 252., 150., 77., 208., 77., 108., 160., 53., 220.,\n",
+ " 154., 259., 90., 246., 124., 67., 72., 257., 262., 275., 177.,\n",
+ " 71., 47., 187., 125., 78., 51., 258., 215., 303., 243., 91.,\n",
+ " 150., 310., 153., 346., 63., 89., 50., 39., 103., 308., 116.,\n",
+ " 145., 74., 45., 115., 264., 87., 202., 127., 182., 241., 66.,\n",
+ " 94., 283., 64., 102., 200., 265., 94., 230., 181., 156., 233.,\n",
+ " 60., 219., 80., 68., 332., 248., 84., 200., 55., 85., 89.,\n",
+ " 31., 129., 83., 275., 65., 198., 236., 253., 124., 44., 172.,\n",
+ " 114., 142., 109., 180., 144., 163., 147., 97., 220., 190., 109.,\n",
+ " 191., 122., 230., 242., 248., 249., 192., 131., 237., 78., 135.,\n",
+ " 244., 199., 270., 164., 72., 96., 306., 91., 214., 95., 216.,\n",
+ " 263., 178., 113., 200., 139., 139., 88., 148., 88., 243., 71.,\n",
+ " 77., 109., 272., 60., 54., 221., 90., 311., 281., 182., 321.,\n",
+ " 58., 262., 206., 233., 242., 123., 167., 63., 197., 71., 168.,\n",
+ " 140., 217., 121., 235., 245., 40., 52., 104., 132., 88., 69.,\n",
+ " 219., 72., 201., 110., 51., 277., 63., 118., 69., 273., 258.,\n",
+ " 43., 198., 242., 232., 175., 93., 168., 275., 293., 281., 72.,\n",
+ " 140., 189., 181., 209., 136., 261., 113., 131., 174., 257., 55.,\n",
+ " 84., 42., 146., 212., 233., 91., 111., 152., 120., 67., 310.,\n",
+ " 94., 183., 66., 173., 72., 49., 64., 48., 178., 104., 132.,\n",
+ " 220., 57.]),\n",
+ " 'frame': None,\n",
+ " 'DESCR': '.. _diabetes_dataset:\\n\\nDiabetes dataset\\n----------------\\n\\nTen baseline variables, age, sex, body mass index, average blood\\npressure, and six blood serum measurements were obtained for each of n =\\n442 diabetes patients, as well as the response of interest, a\\nquantitative measure of disease progression one year after baseline.\\n\\n**Data Set Characteristics:**\\n\\n :Number of Instances: 442\\n\\n :Number of Attributes: First 10 columns are numeric predictive values\\n\\n :Target: Column 11 is a quantitative measure of disease progression one year after baseline\\n\\n :Attribute Information:\\n - age age in years\\n - sex\\n - bmi body mass index\\n - bp average blood pressure\\n - s1 tc, total serum cholesterol\\n - s2 ldl, low-density lipoproteins\\n - s3 hdl, high-density lipoproteins\\n - s4 tch, total cholesterol / HDL\\n - s5 ltg, possibly log of serum triglycerides level\\n - s6 glu, blood sugar level\\n\\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\\n\\nSource URL:\\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\\n\\nFor more information see:\\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)',\n",
+ " 'feature_names': ['age',\n",
+ " 'sex',\n",
+ " 'bmi',\n",
+ " 'bp',\n",
+ " 's1',\n",
+ " 's2',\n",
+ " 's3',\n",
+ " 's4',\n",
+ " 's5',\n",
+ " 's6'],\n",
+ " 'data_filename': 'diabetes_data.csv.gz',\n",
+ " 'target_filename': 'diabetes_target.csv.gz',\n",
+ " 'data_module': 'sklearn.datasets.data'}"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "diabetes"
]
},
{
@@ -73,13 +159,59 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {
"scrolled": false
},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ".. _diabetes_dataset:\n",
+ "\n",
+ "Diabetes dataset\n",
+ "----------------\n",
+ "\n",
+ "Ten baseline variables, age, sex, body mass index, average blood\n",
+ "pressure, and six blood serum measurements were obtained for each of n =\n",
+ "442 diabetes patients, as well as the response of interest, a\n",
+ "quantitative measure of disease progression one year after baseline.\n",
+ "\n",
+ "**Data Set Characteristics:**\n",
+ "\n",
+ " :Number of Instances: 442\n",
+ "\n",
+ " :Number of Attributes: First 10 columns are numeric predictive values\n",
+ "\n",
+ " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
+ "\n",
+ " :Attribute Information:\n",
+ " - age age in years\n",
+ " - sex\n",
+ " - bmi body mass index\n",
+ " - bp average blood pressure\n",
+ " - s1 tc, total serum cholesterol\n",
+ " - s2 ldl, low-density lipoproteins\n",
+ " - s3 hdl, high-density lipoproteins\n",
+ " - s4 tch, total cholesterol / HDL\n",
+ " - s5 ltg, possibly log of serum triglycerides level\n",
+ " - s6 glu, blood sugar level\n",
+ "\n",
+ "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n",
+ "\n",
+ "Source URL:\n",
+ "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
+ "\n",
+ "For more information see:\n",
+ "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
+ "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "print(diabetes[\"DESCR\"])\n"
]
},
{
@@ -97,11 +229,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
- "# Enter your answer here:\n"
+ "# Enter your answer here:\n",
+ "#10 attributes\n",
+ "# data is all the columns(features) in the dataset that influence the target\n",
+ "#442 dtapoints"
]
},
{
@@ -115,11 +250,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(442, 10)\n",
+ "(442, 1)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "features = pd.DataFrame(diabetes[\"data\"])\n",
+ "labels = pd.DataFrame(diabetes[\"target\"])\n",
+ "\n",
+ "print(features.shape)\n",
+ "print(target.shape)"
]
},
{
@@ -156,11 +305,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "from sklearn import linear_model\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.metrics import classification_report, confusion_matrix"
]
},
{
@@ -172,11 +324,15 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "\n",
+ "\n",
+ "model = LinearRegression()\n",
+ "\n"
]
},
{
@@ -190,11 +346,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "x_train, x_test, y_train, y_test = train_test_split(features, labels)"
]
},
{
@@ -206,11 +363,75 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 111,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "execution_count": 111,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "model.fit(x_train,y_train)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-0.01516906, -0.04445468, -0.01195425, ..., -0.03952806,\n",
+ " -0.02144627, -0.03425455],\n",
+ " [ 0.0030313 , -0.04440727, -0.00971306, ..., -0.0394498 ,\n",
+ " -0.06458319, -0.05484579],\n",
+ " [ 0.00980782, 0.00670042, -0.06611101, ..., -0.11478127,\n",
+ " -0.13227327, 0.01067851],\n",
+ " ...,\n",
+ " [ 0.03526416, 0.05040808, 0.10924208, ..., -0.00322748,\n",
+ " 0.02739389, 0.07282956],\n",
+ " [ 0.03526328, 0.05031124, 0.12313841, ..., 0.107163 ,\n",
+ " -0.0004295 , 0.0272642 ],\n",
+ " [-0.04795658, -0.04460567, 0.15884403, ..., 0.03372178,\n",
+ " 0.02752319, 0.01094463]])"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.coef_"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "59.878661866524354"
+ ]
+ },
+ "execution_count": 112,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.intercept_"
]
},
{
@@ -231,11 +452,43 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 117,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([80.28644663, 76.24775398, 75.94342497, 71.24920284, 83.83190315,\n",
+ " 73.03865226, 74.10635141, 82.82065036, 77.01113304, 75.1250318 ,\n",
+ " 76.15226139, 80.20834626, 75.1980536 , 73.74698033, 81.27442865,\n",
+ " 76.1232762 , 79.44519564, 75.11011399, 77.62254583, 72.07568619,\n",
+ " 78.50739308, 76.32643475, 78.01828843, 75.04845961, 76.74239034,\n",
+ " 74.66162681, 71.92159576, 73.82414604, 77.10259969, 74.09871317,\n",
+ " 78.75724739, 79.67262333, 78.4384927 , 73.3741055 , 76.4609047 ,\n",
+ " 76.39797403, 75.78480016, 75.94117835, 77.41954397, 73.77477612,\n",
+ " 75.20203033, 75.39317823, 73.51780167, 74.13849327, 80.27009619,\n",
+ " 73.82509068, 77.34199622, 75.28781048, 72.66567114, 76.33086219,\n",
+ " 72.73168368, 77.36624577, 75.52243031, 71.54823886, 72.3036273 ,\n",
+ " 76.71404463, 72.91510167, 75.32069756, 75.5718132 , 78.72895184,\n",
+ " 75.71382028, 78.87122116, 78.85659141, 74.24352312, 73.95493681,\n",
+ " 77.64052524, 76.13561755, 78.58729432, 74.60411978, 75.03890064,\n",
+ " 74.52162772, 75.8939553 , 77.27938514, 73.62434487, 74.44526203,\n",
+ " 75.18721185, 76.68844159, 74.84410759, 73.29602297, 75.38571926,\n",
+ " 73.82486715, 75.22650518, 78.34618773, 78.82554997, 77.27009538,\n",
+ " 75.79374482, 72.42991909, 75.45398028, 83.2747976 , 80.51917031,\n",
+ " 75.83942444, 76.61833215, 76.6107049 , 75.78285371, 74.12854871,\n",
+ " 76.40894251, 79.84332333, 74.90664293])"
+ ]
+ },
+ "execution_count": 117,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "pred = model.predict(x_test)\n",
+ "pred"
]
},
{
@@ -247,11 +500,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 118,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "343 81\n",
+ "224 77\n",
+ "272 78\n",
+ "111 73\n",
+ "326 80\n",
+ " ..\n",
+ "148 74\n",
+ "261 78\n",
+ "332 80\n",
+ "333 80\n",
+ "334 80\n",
+ "Name: model_year, Length: 98, dtype: int64"
+ ]
+ },
+ "execution_count": 118,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "y_test"
]
},
{
@@ -263,11 +539,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 119,
"metadata": {},
"outputs": [],
"source": [
- "# Your explanation here:\n"
+ "# Your explanation here:\n",
+ "#no, but seems to be very close to it"
]
},
{
@@ -351,11 +628,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "auto = pd.read_csv(\"../auto-mpg.csv\")"
]
},
{
@@ -367,11 +645,124 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mpg | \n",
+ " cylinders | \n",
+ " displacement | \n",
+ " horse_power | \n",
+ " weight | \n",
+ " acceleration | \n",
+ " model_year | \n",
+ " car_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18.0 | \n",
+ " 8 | \n",
+ " 307.0 | \n",
+ " 130.0 | \n",
+ " 3504 | \n",
+ " 12.0 | \n",
+ " 70 | \n",
+ " \\t\"chevrolet chevelle malibu\" | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 15.0 | \n",
+ " 8 | \n",
+ " 350.0 | \n",
+ " 165.0 | \n",
+ " 3693 | \n",
+ " 11.5 | \n",
+ " 70 | \n",
+ " \\t\"buick skylark 320\" | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18.0 | \n",
+ " 8 | \n",
+ " 318.0 | \n",
+ " 150.0 | \n",
+ " 3436 | \n",
+ " 11.0 | \n",
+ " 70 | \n",
+ " \\t\"plymouth satellite\" | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 16.0 | \n",
+ " 8 | \n",
+ " 304.0 | \n",
+ " 150.0 | \n",
+ " 3433 | \n",
+ " 12.0 | \n",
+ " 70 | \n",
+ " \\t\"amc rebel sst\" | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 17.0 | \n",
+ " 8 | \n",
+ " 302.0 | \n",
+ " 140.0 | \n",
+ " 3449 | \n",
+ " 10.5 | \n",
+ " 70 | \n",
+ " \\t\"ford torino\" | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mpg cylinders displacement horse_power weight acceleration \\\n",
+ "0 18.0 8 307.0 130.0 3504 12.0 \n",
+ "1 15.0 8 350.0 165.0 3693 11.5 \n",
+ "2 18.0 8 318.0 150.0 3436 11.0 \n",
+ "3 16.0 8 304.0 150.0 3433 12.0 \n",
+ "4 17.0 8 302.0 140.0 3449 10.5 \n",
+ "\n",
+ " model_year car_name \n",
+ "0 70 \\t\"chevrolet chevelle malibu\" \n",
+ "1 70 \\t\"buick skylark 320\" \n",
+ "2 70 \\t\"plymouth satellite\" \n",
+ "3 70 \\t\"amc rebel sst\" \n",
+ "4 70 \\t\"ford torino\" "
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "auto.head()"
]
},
{
@@ -383,11 +774,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "mpg float64\n",
+ "cylinders int64\n",
+ "displacement float64\n",
+ "horse_power float64\n",
+ "weight int64\n",
+ "acceleration float64\n",
+ "model_year int64\n",
+ "car_name object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "auto.dtypes"
]
},
{
@@ -399,11 +810,481 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 90,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "70\n",
+ "82\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "print(auto[\"model_year\"].min())\n",
+ "print(auto[\"model_year\"].max())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 99,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mpg | \n",
+ " cylinders | \n",
+ " displacement | \n",
+ " horse_power | \n",
+ " weight | \n",
+ " acceleration | \n",
+ " model_year | \n",
+ " car_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 367 | \n",
+ " 28.0 | \n",
+ " 4 | \n",
+ " 112.0 | \n",
+ " 88.0 | \n",
+ " 2605 | \n",
+ " 19.6 | \n",
+ " 82 | \n",
+ " \\t\"chevrolet cavalier\" | \n",
+ "
\n",
+ " \n",
+ " | 368 | \n",
+ " 27.0 | \n",
+ " 4 | \n",
+ " 112.0 | \n",
+ " 88.0 | \n",
+ " 2640 | \n",
+ " 18.6 | \n",
+ " 82 | \n",
+ " \\t\"chevrolet cavalier wagon\" | \n",
+ "
\n",
+ " \n",
+ " | 369 | \n",
+ " 34.0 | \n",
+ " 4 | \n",
+ " 112.0 | \n",
+ " 88.0 | \n",
+ " 2395 | \n",
+ " 18.0 | \n",
+ " 82 | \n",
+ " \\t\"chevrolet cavalier 2-door\" | \n",
+ "
\n",
+ " \n",
+ " | 370 | \n",
+ " 31.0 | \n",
+ " 4 | \n",
+ " 112.0 | \n",
+ " 85.0 | \n",
+ " 2575 | \n",
+ " 16.2 | \n",
+ " 82 | \n",
+ " \\t\"pontiac j2000 se hatchback\" | \n",
+ "
\n",
+ " \n",
+ " | 371 | \n",
+ " 29.0 | \n",
+ " 4 | \n",
+ " 135.0 | \n",
+ " 84.0 | \n",
+ " 2525 | \n",
+ " 16.0 | \n",
+ " 82 | \n",
+ " \\t\"dodge aries se\" | \n",
+ "
\n",
+ " \n",
+ " | 372 | \n",
+ " 27.0 | \n",
+ " 4 | \n",
+ " 151.0 | \n",
+ " 90.0 | \n",
+ " 2735 | \n",
+ " 18.0 | \n",
+ " 82 | \n",
+ " \\t\"pontiac phoenix\" | \n",
+ "
\n",
+ " \n",
+ " | 373 | \n",
+ " 24.0 | \n",
+ " 4 | \n",
+ " 140.0 | \n",
+ " 92.0 | \n",
+ " 2865 | \n",
+ " 16.4 | \n",
+ " 82 | \n",
+ " \\t\"ford fairmont futura\" | \n",
+ "
\n",
+ " \n",
+ " | 374 | \n",
+ " 23.0 | \n",
+ " 4 | \n",
+ " 151.0 | \n",
+ " NaN | \n",
+ " 3035 | \n",
+ " 20.5 | \n",
+ " 82 | \n",
+ " \\t\"amc concord dl\" | \n",
+ "
\n",
+ " \n",
+ " | 375 | \n",
+ " 36.0 | \n",
+ " 4 | \n",
+ " 105.0 | \n",
+ " 74.0 | \n",
+ " 1980 | \n",
+ " 15.3 | \n",
+ " 82 | \n",
+ " \\t\"volkswagen rabbit l\" | \n",
+ "
\n",
+ " \n",
+ " | 376 | \n",
+ " 37.0 | \n",
+ " 4 | \n",
+ " 91.0 | \n",
+ " 68.0 | \n",
+ " 2025 | \n",
+ " 18.2 | \n",
+ " 82 | \n",
+ " \\t\"mazda glc custom l\" | \n",
+ "
\n",
+ " \n",
+ " | 377 | \n",
+ " 31.0 | \n",
+ " 4 | \n",
+ " 91.0 | \n",
+ " 68.0 | \n",
+ " 1970 | \n",
+ " 17.6 | \n",
+ " 82 | \n",
+ " \\t\"mazda glc custom\" | \n",
+ "
\n",
+ " \n",
+ " | 378 | \n",
+ " 38.0 | \n",
+ " 4 | \n",
+ " 105.0 | \n",
+ " 63.0 | \n",
+ " 2125 | \n",
+ " 14.7 | \n",
+ " 82 | \n",
+ " \\t\"plymouth horizon miser\" | \n",
+ "
\n",
+ " \n",
+ " | 379 | \n",
+ " 36.0 | \n",
+ " 4 | \n",
+ " 98.0 | \n",
+ " 70.0 | \n",
+ " 2125 | \n",
+ " 17.3 | \n",
+ " 82 | \n",
+ " \\t\"mercury lynx l\" | \n",
+ "
\n",
+ " \n",
+ " | 380 | \n",
+ " 36.0 | \n",
+ " 4 | \n",
+ " 120.0 | \n",
+ " 88.0 | \n",
+ " 2160 | \n",
+ " 14.5 | \n",
+ " 82 | \n",
+ " \\t\"nissan stanza xe\" | \n",
+ "
\n",
+ " \n",
+ " | 381 | \n",
+ " 36.0 | \n",
+ " 4 | \n",
+ " 107.0 | \n",
+ " 75.0 | \n",
+ " 2205 | \n",
+ " 14.5 | \n",
+ " 82 | \n",
+ " \\t\"honda accord\" | \n",
+ "
\n",
+ " \n",
+ " | 382 | \n",
+ " 34.0 | \n",
+ " 4 | \n",
+ " 108.0 | \n",
+ " 70.0 | \n",
+ " 2245 | \n",
+ " 16.9 | \n",
+ " 82 | \n",
+ " \\t\"toyota corolla\" | \n",
+ "
\n",
+ " \n",
+ " | 383 | \n",
+ " 38.0 | \n",
+ " 4 | \n",
+ " 91.0 | \n",
+ " 67.0 | \n",
+ " 1965 | \n",
+ " 15.0 | \n",
+ " 82 | \n",
+ " \\t\"honda civic\" | \n",
+ "
\n",
+ " \n",
+ " | 384 | \n",
+ " 32.0 | \n",
+ " 4 | \n",
+ " 91.0 | \n",
+ " 67.0 | \n",
+ " 1965 | \n",
+ " 15.7 | \n",
+ " 82 | \n",
+ " \\t\"honda civic (auto)\" | \n",
+ "
\n",
+ " \n",
+ " | 385 | \n",
+ " 38.0 | \n",
+ " 4 | \n",
+ " 91.0 | \n",
+ " 67.0 | \n",
+ " 1995 | \n",
+ " 16.2 | \n",
+ " 82 | \n",
+ " \\t\"datsun 310 gx\" | \n",
+ "
\n",
+ " \n",
+ " | 386 | \n",
+ " 25.0 | \n",
+ " 6 | \n",
+ " 181.0 | \n",
+ " 110.0 | \n",
+ " 2945 | \n",
+ " 16.4 | \n",
+ " 82 | \n",
+ " \\t\"buick century limited\" | \n",
+ "
\n",
+ " \n",
+ " | 387 | \n",
+ " 38.0 | \n",
+ " 6 | \n",
+ " 262.0 | \n",
+ " 85.0 | \n",
+ " 3015 | \n",
+ " 17.0 | \n",
+ " 82 | \n",
+ " \\t\"oldsmobile cutlass ciera (diesel)\" | \n",
+ "
\n",
+ " \n",
+ " | 388 | \n",
+ " 26.0 | \n",
+ " 4 | \n",
+ " 156.0 | \n",
+ " 92.0 | \n",
+ " 2585 | \n",
+ " 14.5 | \n",
+ " 82 | \n",
+ " \\t\"chrysler lebaron medallion\" | \n",
+ "
\n",
+ " \n",
+ " | 389 | \n",
+ " 22.0 | \n",
+ " 6 | \n",
+ " 232.0 | \n",
+ " 112.0 | \n",
+ " 2835 | \n",
+ " 14.7 | \n",
+ " 82 | \n",
+ " \\t\"ford granada l\" | \n",
+ "
\n",
+ " \n",
+ " | 390 | \n",
+ " 32.0 | \n",
+ " 4 | \n",
+ " 144.0 | \n",
+ " 96.0 | \n",
+ " 2665 | \n",
+ " 13.9 | \n",
+ " 82 | \n",
+ " \\t\"toyota celica gt\" | \n",
+ "
\n",
+ " \n",
+ " | 391 | \n",
+ " 36.0 | \n",
+ " 4 | \n",
+ " 135.0 | \n",
+ " 84.0 | \n",
+ " 2370 | \n",
+ " 13.0 | \n",
+ " 82 | \n",
+ " \\t\"dodge charger 2.2\" | \n",
+ "
\n",
+ " \n",
+ " | 392 | \n",
+ " 27.0 | \n",
+ " 4 | \n",
+ " 151.0 | \n",
+ " 90.0 | \n",
+ " 2950 | \n",
+ " 17.3 | \n",
+ " 82 | \n",
+ " \\t\"chevrolet camaro\" | \n",
+ "
\n",
+ " \n",
+ " | 393 | \n",
+ " 27.0 | \n",
+ " 4 | \n",
+ " 140.0 | \n",
+ " 86.0 | \n",
+ " 2790 | \n",
+ " 15.6 | \n",
+ " 82 | \n",
+ " \\t\"ford mustang gl\" | \n",
+ "
\n",
+ " \n",
+ " | 394 | \n",
+ " 44.0 | \n",
+ " 4 | \n",
+ " 97.0 | \n",
+ " 52.0 | \n",
+ " 2130 | \n",
+ " 24.6 | \n",
+ " 82 | \n",
+ " \\t\"vw pickup\" | \n",
+ "
\n",
+ " \n",
+ " | 395 | \n",
+ " 32.0 | \n",
+ " 4 | \n",
+ " 135.0 | \n",
+ " 84.0 | \n",
+ " 2295 | \n",
+ " 11.6 | \n",
+ " 82 | \n",
+ " \\t\"dodge rampage\" | \n",
+ "
\n",
+ " \n",
+ " | 396 | \n",
+ " 28.0 | \n",
+ " 4 | \n",
+ " 120.0 | \n",
+ " 79.0 | \n",
+ " 2625 | \n",
+ " 18.6 | \n",
+ " 82 | \n",
+ " \\t\"ford ranger\" | \n",
+ "
\n",
+ " \n",
+ " | 397 | \n",
+ " 31.0 | \n",
+ " 4 | \n",
+ " 119.0 | \n",
+ " 82.0 | \n",
+ " 2720 | \n",
+ " 19.4 | \n",
+ " 82 | \n",
+ " \\t\"chevy s-10\" | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mpg cylinders displacement horse_power weight acceleration \\\n",
+ "367 28.0 4 112.0 88.0 2605 19.6 \n",
+ "368 27.0 4 112.0 88.0 2640 18.6 \n",
+ "369 34.0 4 112.0 88.0 2395 18.0 \n",
+ "370 31.0 4 112.0 85.0 2575 16.2 \n",
+ "371 29.0 4 135.0 84.0 2525 16.0 \n",
+ "372 27.0 4 151.0 90.0 2735 18.0 \n",
+ "373 24.0 4 140.0 92.0 2865 16.4 \n",
+ "374 23.0 4 151.0 NaN 3035 20.5 \n",
+ "375 36.0 4 105.0 74.0 1980 15.3 \n",
+ "376 37.0 4 91.0 68.0 2025 18.2 \n",
+ "377 31.0 4 91.0 68.0 1970 17.6 \n",
+ "378 38.0 4 105.0 63.0 2125 14.7 \n",
+ "379 36.0 4 98.0 70.0 2125 17.3 \n",
+ "380 36.0 4 120.0 88.0 2160 14.5 \n",
+ "381 36.0 4 107.0 75.0 2205 14.5 \n",
+ "382 34.0 4 108.0 70.0 2245 16.9 \n",
+ "383 38.0 4 91.0 67.0 1965 15.0 \n",
+ "384 32.0 4 91.0 67.0 1965 15.7 \n",
+ "385 38.0 4 91.0 67.0 1995 16.2 \n",
+ "386 25.0 6 181.0 110.0 2945 16.4 \n",
+ "387 38.0 6 262.0 85.0 3015 17.0 \n",
+ "388 26.0 4 156.0 92.0 2585 14.5 \n",
+ "389 22.0 6 232.0 112.0 2835 14.7 \n",
+ "390 32.0 4 144.0 96.0 2665 13.9 \n",
+ "391 36.0 4 135.0 84.0 2370 13.0 \n",
+ "392 27.0 4 151.0 90.0 2950 17.3 \n",
+ "393 27.0 4 140.0 86.0 2790 15.6 \n",
+ "394 44.0 4 97.0 52.0 2130 24.6 \n",
+ "395 32.0 4 135.0 84.0 2295 11.6 \n",
+ "396 28.0 4 120.0 79.0 2625 18.6 \n",
+ "397 31.0 4 119.0 82.0 2720 19.4 \n",
+ "\n",
+ " model_year car_name \n",
+ "367 82 \\t\"chevrolet cavalier\" \n",
+ "368 82 \\t\"chevrolet cavalier wagon\" \n",
+ "369 82 \\t\"chevrolet cavalier 2-door\" \n",
+ "370 82 \\t\"pontiac j2000 se hatchback\" \n",
+ "371 82 \\t\"dodge aries se\" \n",
+ "372 82 \\t\"pontiac phoenix\" \n",
+ "373 82 \\t\"ford fairmont futura\" \n",
+ "374 82 \\t\"amc concord dl\" \n",
+ "375 82 \\t\"volkswagen rabbit l\" \n",
+ "376 82 \\t\"mazda glc custom l\" \n",
+ "377 82 \\t\"mazda glc custom\" \n",
+ "378 82 \\t\"plymouth horizon miser\" \n",
+ "379 82 \\t\"mercury lynx l\" \n",
+ "380 82 \\t\"nissan stanza xe\" \n",
+ "381 82 \\t\"honda accord\" \n",
+ "382 82 \\t\"toyota corolla\" \n",
+ "383 82 \\t\"honda civic\" \n",
+ "384 82 \\t\"honda civic (auto)\" \n",
+ "385 82 \\t\"datsun 310 gx\" \n",
+ "386 82 \\t\"buick century limited\" \n",
+ "387 82 \\t\"oldsmobile cutlass ciera (diesel)\" \n",
+ "388 82 \\t\"chrysler lebaron medallion\" \n",
+ "389 82 \\t\"ford granada l\" \n",
+ "390 82 \\t\"toyota celica gt\" \n",
+ "391 82 \\t\"dodge charger 2.2\" \n",
+ "392 82 \\t\"chevrolet camaro\" \n",
+ "393 82 \\t\"ford mustang gl\" \n",
+ "394 82 \\t\"vw pickup\" \n",
+ "395 82 \\t\"dodge rampage\" \n",
+ "396 82 \\t\"ford ranger\" \n",
+ "397 82 \\t\"chevy s-10\" "
+ ]
+ },
+ "execution_count": 99,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "auto[auto[\"model_year\"]==82]"
]
},
{
@@ -415,11 +1296,40 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 94,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "auto_prep = auto.dropna()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "mpg 0\n",
+ "cylinders 0\n",
+ "displacement 0\n",
+ "horse_power 0\n",
+ "weight 0\n",
+ "acceleration 0\n",
+ "model_year 0\n",
+ "car_name 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 95,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "auto_prep.isna().sum()"
]
},
{
@@ -431,11 +1341,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 96,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4 199\n",
+ "8 103\n",
+ "6 83\n",
+ "3 4\n",
+ "5 3\n",
+ "Name: cylinders, dtype: int64"
+ ]
+ },
+ "execution_count": 96,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "auto_prep[\"cylinders\"].value_counts()"
]
},
{
@@ -451,11 +1378,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 97,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\Vasco Nicolau\\AppData\\Local\\Temp\\ipykernel_28036\\791769897.py:2: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " auto_prep.drop(columns = \"car_name\", inplace = True)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "auto_prep.drop(columns = \"car_name\", inplace = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 134,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "features = auto_prep.drop(columns = \"model_year\")\n",
+ "labels = auto_prep[\"model_year\"]\n",
+ "model = LinearRegression()\n",
+ "x_train, x_test, y_train, y_test = train_test_split(features, labels)\n"
]
},
{
@@ -469,11 +1421,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 135,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "auto_model = model.fit(x_train,y_train)"
]
},
{
@@ -493,11 +1446,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 136,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.4716051324906728"
+ ]
+ },
+ "execution_count": 136,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "from sklearn.metrics import r2_score\n",
+ "y_pred = model.predict(x_train)\n",
+ "\n",
+ "r_squared = r2_score(y_train, y_pred)\n",
+ "\n",
+ "r_squared"
]
},
{
@@ -513,11 +1483,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 137,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.4721030493247429"
+ ]
+ },
+ "execution_count": 137,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "y_test_pred = model.predict(x_test)\n",
+ "\n",
+ "r_squared_test = r2_score(y_test, y_test_pred)\n",
+ "\n",
+ "r_squared_test"
]
},
{
@@ -542,11 +1528,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 165,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here: \n",
+ "x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size = 0.10)"
]
},
{
@@ -558,11 +1545,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 166,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "model = LinearRegression()\n",
+ "auto_model09 = model.fit(x_train,y_train)"
]
},
{
@@ -574,11 +1563,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 167,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.2730588325889347"
+ ]
+ },
+ "execution_count": 167,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "y_test_pred = model.predict(x_test)\n",
+ "\n",
+ "r_squared_test = r2_score(y_test, y_test_pred)\n",
+ "\n",
+ "r_squared_test"
]
},
{
@@ -590,11 +1595,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 168,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "#it decreased, instead of being somewhere arounds 0.4/0.5 it is around 0.30"
]
},
{
@@ -703,7 +1709,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -717,7 +1723,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.9"
+ "version": "3.9.13"
}
},
"nbformat": 4,