From e2d743b4026ffc3f9bcf860550967deab43847cd Mon Sep 17 00:00:00 2001
From: Jasper Tielmann <jaspertielmann@web.de>
Date: Sun, 26 Nov 2023 22:46:14 +0000
Subject: [PATCH] Lab done

---
 auto-mpg.csv => your-code/auto-mpg.csv |   0
 your-code/main.ipynb                   | 881 ++++++++++++++++++++++---
 2 files changed, 784 insertions(+), 97 deletions(-)
 rename auto-mpg.csv => your-code/auto-mpg.csv (100%)

diff --git a/auto-mpg.csv b/your-code/auto-mpg.csv
similarity index 100%
rename from auto-mpg.csv
rename to your-code/auto-mpg.csv
diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 8a9fa9e..388b417 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,11 +12,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Import your libraries:\n"
+    "import pyforest\n",
+    "from sklearn.metrics import confusion_matrix, accuracy_score, mean_squared_error"
    ]
   },
   {
@@ -37,11 +38,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "from sklearn.datasets import load_diabetes\n",
+    "diabetes = load_diabetes()"
    ]
   },
   {
@@ -53,11 +55,90 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,\n",
+       "          0.01990749, -0.01764613],\n",
+       "        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n",
+       "         -0.06833155, -0.09220405],\n",
+       "        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,\n",
+       "          0.00286131, -0.02593034],\n",
+       "        ...,\n",
+       "        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,\n",
+       "         -0.04688253,  0.01549073],\n",
+       "        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,\n",
+       "          0.04452873, -0.02593034],\n",
+       "        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n",
+       "         -0.00422151,  0.00306441]]),\n",
+       " 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,\n",
+       "         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,\n",
+       "         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,\n",
+       "         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,\n",
+       "        259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,\n",
+       "        128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,\n",
+       "        150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,\n",
+       "        200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,\n",
+       "         42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,\n",
+       "         83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,\n",
+       "        104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,\n",
+       "        173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,\n",
+       "        107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,\n",
+       "         60., 174., 259., 178., 128.,  96., 126., 288.,  88., 292.,  71.,\n",
+       "        197., 186.,  25.,  84.,  96., 195.,  53., 217., 172., 131., 214.,\n",
+       "         59.,  70., 220., 268., 152.,  47.,  74., 295., 101., 151., 127.,\n",
+       "        237., 225.,  81., 151., 107.,  64., 138., 185., 265., 101., 137.,\n",
+       "        143., 141.,  79., 292., 178.,  91., 116.,  86., 122.,  72., 129.,\n",
+       "        142.,  90., 158.,  39., 196., 222., 277.,  99., 196., 202., 155.,\n",
+       "         77., 191.,  70.,  73.,  49.,  65., 263., 248., 296., 214., 185.,\n",
+       "         78.,  93., 252., 150.,  77., 208.,  77., 108., 160.,  53., 220.,\n",
+       "        154., 259.,  90., 246., 124.,  67.,  72., 257., 262., 275., 177.,\n",
+       "         71.,  47., 187., 125.,  78.,  51., 258., 215., 303., 243.,  91.,\n",
+       "        150., 310., 153., 346.,  63.,  89.,  50.,  39., 103., 308., 116.,\n",
+       "        145.,  74.,  45., 115., 264.,  87., 202., 127., 182., 241.,  66.,\n",
+       "         94., 283.,  64., 102., 200., 265.,  94., 230., 181., 156., 233.,\n",
+       "         60., 219.,  80.,  68., 332., 248.,  84., 200.,  55.,  85.,  89.,\n",
+       "         31., 129.,  83., 275.,  65., 198., 236., 253., 124.,  44., 172.,\n",
+       "        114., 142., 109., 180., 144., 163., 147.,  97., 220., 190., 109.,\n",
+       "        191., 122., 230., 242., 248., 249., 192., 131., 237.,  78., 135.,\n",
+       "        244., 199., 270., 164.,  72.,  96., 306.,  91., 214.,  95., 216.,\n",
+       "        263., 178., 113., 200., 139., 139.,  88., 148.,  88., 243.,  71.,\n",
+       "         77., 109., 272.,  60.,  54., 221.,  90., 311., 281., 182., 321.,\n",
+       "         58., 262., 206., 233., 242., 123., 167.,  63., 197.,  71., 168.,\n",
+       "        140., 217., 121., 235., 245.,  40.,  52., 104., 132.,  88.,  69.,\n",
+       "        219.,  72., 201., 110.,  51., 277.,  63., 118.,  69., 273., 258.,\n",
+       "         43., 198., 242., 232., 175.,  93., 168., 275., 293., 281.,  72.,\n",
+       "        140., 189., 181., 209., 136., 261., 113., 131., 174., 257.,  55.,\n",
+       "         84.,  42., 146., 212., 233.,  91., 111., 152., 120.,  67., 310.,\n",
+       "         94., 183.,  66., 173.,  72.,  49.,  64.,  48., 178., 104., 132.,\n",
+       "        220.,  57.]),\n",
+       " 'frame': None,\n",
+       " 'DESCR': '.. _diabetes_dataset:\\n\\nDiabetes dataset\\n----------------\\n\\nTen baseline variables, age, sex, body mass index, average blood\\npressure, and six blood serum measurements were obtained for each of n =\\n442 diabetes patients, as well as the response of interest, a\\nquantitative measure of disease progression one year after baseline.\\n\\n**Data Set Characteristics:**\\n\\n  :Number of Instances: 442\\n\\n  :Number of Attributes: First 10 columns are numeric predictive values\\n\\n  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\\n\\n  :Attribute Information:\\n      - age     age in years\\n      - sex\\n      - bmi     body mass index\\n      - bp      average blood pressure\\n      - s1      tc, total serum cholesterol\\n      - s2      ldl, low-density lipoproteins\\n      - s3      hdl, high-density lipoproteins\\n      - s4      tch, total cholesterol / HDL\\n      - s5      ltg, possibly log of serum triglycerides level\\n      - s6      glu, blood sugar level\\n\\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times the square root of `n_samples` (i.e. the sum of squares of each column totals 1).\\n\\nSource URL:\\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\\n\\nFor more information see:\\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\\n',\n",
+       " 'feature_names': ['age',\n",
+       "  'sex',\n",
+       "  'bmi',\n",
+       "  'bp',\n",
+       "  's1',\n",
+       "  's2',\n",
+       "  's3',\n",
+       "  's4',\n",
+       "  's5',\n",
+       "  's6'],\n",
+       " 'data_filename': 'diabetes_data_raw.csv.gz',\n",
+       " 'target_filename': 'diabetes_target.csv.gz',\n",
+       " 'data_module': 'sklearn.datasets.data'}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "diabetes"
    ]
   },
   {
@@ -73,13 +154,60 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {
     "scrolled": false
    },
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ".. _diabetes_dataset:\n",
+      "\n",
+      "Diabetes dataset\n",
+      "----------------\n",
+      "\n",
+      "Ten baseline variables, age, sex, body mass index, average blood\n",
+      "pressure, and six blood serum measurements were obtained for each of n =\n",
+      "442 diabetes patients, as well as the response of interest, a\n",
+      "quantitative measure of disease progression one year after baseline.\n",
+      "\n",
+      "**Data Set Characteristics:**\n",
+      "\n",
+      "  :Number of Instances: 442\n",
+      "\n",
+      "  :Number of Attributes: First 10 columns are numeric predictive values\n",
+      "\n",
+      "  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
+      "\n",
+      "  :Attribute Information:\n",
+      "      - age     age in years\n",
+      "      - sex\n",
+      "      - bmi     body mass index\n",
+      "      - bp      average blood pressure\n",
+      "      - s1      tc, total serum cholesterol\n",
+      "      - s2      ldl, low-density lipoproteins\n",
+      "      - s3      hdl, high-density lipoproteins\n",
+      "      - s4      tch, total cholesterol / HDL\n",
+      "      - s5      ltg, possibly log of serum triglycerides level\n",
+      "      - s6      glu, blood sugar level\n",
+      "\n",
+      "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times the square root of `n_samples` (i.e. the sum of squares of each column totals 1).\n",
+      "\n",
+      "Source URL:\n",
+      "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
+      "\n",
+      "For more information see:\n",
+      "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
+      "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "print(diabetes[\"DESCR\"])"
    ]
   },
   {
@@ -97,11 +225,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Enter your answer here:\n"
+    "# Enter your answer here:\n",
+    "# there are 10 attributes in the data. They're numeric and needed to predict the target.\n",
+    "# data shows the feautes, target the target the features are supposed to predict\n",
+    "# there are 442 instances in the data"
    ]
   },
   {
@@ -115,11 +246,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd'); }\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd'); }\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(442,)\n",
+      "(442, 10)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "features = pd.DataFrame(diabetes[\"data\"], columns=diabetes[\"feature_names\"])\n",
+    "target = pd.Series(diabetes[\"target\"], name=\"target\")\n",
+    "\n",
+    "print(target.shape)\n",
+    "print(features.shape)"
    ]
   },
   {
@@ -156,11 +329,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "from sklearn.linear_model import LinearRegression"
    ]
   },
   {
@@ -172,11 +345,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "diabetes_model = LinearRegression()"
    ]
   },
   {
@@ -190,11 +363,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nfrom sklearn.model_selection import train_test_split'); }\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "diabetes_data_train, diabetes_data_test, diabetes_target_train, diabetes_target_test = train_test_split(features, target)"
    ]
   },
   {
@@ -206,11 +394,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Interception: 153.29025971764074\n",
+      "Coefficents:  [   14.19144904  -287.81159219   485.90819878   370.64689156\n",
+      " -1230.15768889   752.49586282   213.31951642   324.96723948\n",
+      "   882.76864042    44.73301878]\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes_model.fit(diabetes_data_train, diabetes_target_train)\n",
+    "print(\"Interception:\", diabetes_model.intercept_)\n",
+    "print(\"Coefficents: \", diabetes_model.coef_)"
    ]
   },
   {
@@ -231,11 +433,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "pred = diabetes_model.predict(diabetes_data_test)"
    ]
   },
   {
@@ -247,11 +450,168 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd\\nfrom sklearn.model_selection import train_test_split'); }\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>target</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>131.0</th>\n",
+       "      <td>165.9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>55.0</th>\n",
+       "      <td>41.8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>341.0</th>\n",
+       "      <td>282.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>90.0</th>\n",
+       "      <td>50.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>103.0</th>\n",
+       "      <td>124.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>262.0</th>\n",
+       "      <td>170.7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>77.0</th>\n",
+       "      <td>57.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>264.0</th>\n",
+       "      <td>253.2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>151.0</th>\n",
+       "      <td>173.2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>150.0</th>\n",
+       "      <td>206.5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>111 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            0\n",
+       "target       \n",
+       "131.0   165.9\n",
+       "55.0     41.8\n",
+       "341.0   282.1\n",
+       "90.0     50.6\n",
+       "103.0   124.5\n",
+       "...       ...\n",
+       "262.0   170.7\n",
+       "77.0     57.1\n",
+       "264.0   253.2\n",
+       "151.0   173.2\n",
+       "150.0   206.5\n",
+       "\n",
+       "[111 rows x 1 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "pred = [round(i,1) for i in pred]\n",
+    "display(pd.DataFrame(pred, diabetes_target_test))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "r^2:  0.3626947436331116\n"
+     ]
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import numpy as np\\nimport pandas as pd\\nfrom sklearn.model_selection import train_test_split'); }\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "rmse:  61.253116853955476\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import mean_squared_error\n",
+    "\n",
+    "print(\"r^2: \", diabetes_model.score(diabetes_data_test, diabetes_target_test))\n",
+    "print(\"rmse: \", np.sqrt(mean_squared_error(pred, diabetes_target_test)))"
    ]
   },
   {
@@ -263,11 +623,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your explanation here:\n"
+    "# Your explanation here:\n",
+    "# no its not there is actually quite a big variation between both.\n",
+    "# it is very rare that LinearRegression predict the exact accurate data because of its fragmentency\n",
+    "# the goal is to get as close as possible to make meaningful prediction and insides"
    ]
   },
   {
@@ -351,11 +714,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import numpy as np\\nimport pandas as pd\\nfrom sklearn.model_selection import train_test_split'); }\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto = pd.read_csv(\"auto-mpg.csv\")"
    ]
   },
   {
@@ -367,11 +746,124 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mpg</th>\n",
+       "      <th>cylinders</th>\n",
+       "      <th>displacement</th>\n",
+       "      <th>horse_power</th>\n",
+       "      <th>weight</th>\n",
+       "      <th>acceleration</th>\n",
+       "      <th>model_year</th>\n",
+       "      <th>car_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>3504</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"chevrolet chevelle malibu\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>15.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>350.0</td>\n",
+       "      <td>165.0</td>\n",
+       "      <td>3693</td>\n",
+       "      <td>11.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"buick skylark 320\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>318.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3436</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"plymouth satellite\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>304.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3433</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"amc rebel sst\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>17.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>302.0</td>\n",
+       "      <td>140.0</td>\n",
+       "      <td>3449</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"ford torino\"</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    mpg  cylinders  displacement  horse_power  weight  acceleration  \\\n",
+       "0  18.0          8         307.0        130.0    3504          12.0   \n",
+       "1  15.0          8         350.0        165.0    3693          11.5   \n",
+       "2  18.0          8         318.0        150.0    3436          11.0   \n",
+       "3  16.0          8         304.0        150.0    3433          12.0   \n",
+       "4  17.0          8         302.0        140.0    3449          10.5   \n",
+       "\n",
+       "   model_year                       car_name  \n",
+       "0          70  \\t\"chevrolet chevelle malibu\"  \n",
+       "1          70          \\t\"buick skylark 320\"  \n",
+       "2          70         \\t\"plymouth satellite\"  \n",
+       "3          70              \\t\"amc rebel sst\"  \n",
+       "4          70                \\t\"ford torino\"  "
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.head()"
    ]
   },
   {
@@ -383,11 +875,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             float64\n",
+       "cylinders         int64\n",
+       "displacement    float64\n",
+       "horse_power     float64\n",
+       "weight            int64\n",
+       "acceleration    float64\n",
+       "model_year        int64\n",
+       "car_name         object\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.dtypes"
    ]
   },
   {
@@ -399,11 +911,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "oldest:  70\n",
+      "newest:  82\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "print(\"oldest: \", auto[\"model_year\"].min())\n",
+    "print(\"newest: \", auto[\"model_year\"].max())"
    ]
   },
   {
@@ -415,11 +938,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             0\n",
+       "cylinders       0\n",
+       "displacement    0\n",
+       "horse_power     0\n",
+       "weight          0\n",
+       "acceleration    0\n",
+       "model_year      0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.dropna(inplace=True)\n",
+    "auto.isnull().sum()"
    ]
   },
   {
@@ -431,11 +974,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4    204\n",
+       "8    103\n",
+       "6     84\n",
+       "3      4\n",
+       "5      3\n",
+       "Name: cylinders, dtype: int64"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto[\"cylinders\"].value_counts()"
    ]
   },
   {
@@ -451,11 +1011,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto.drop(\"car_name\", axis=1, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import numpy as np\\nimport pandas as pd\\nfrom sklearn.model_selection import train_test_split'); }\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "features = auto.drop(\"mpg\", axis=1)\n",
+    "label = auto[\"mpg\"]\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.80)"
    ]
   },
   {
@@ -469,11 +1057,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-4 {color: black;background-color: white;}#sk-container-id-4 pre{padding: 0;}#sk-container-id-4 div.sk-toggleable {background-color: white;}#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-4 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-4 div.sk-item {position: relative;z-index: 1;}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-4 div.sk-item::before, #sk-container-id-4 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-4 div.sk-label-container {text-align: center;}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-4 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "LinearRegression()"
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto_model = LinearRegression()\n",
+    "\n",
+    "auto_model.fit(X_train, y_train)"
    ]
   },
   {
@@ -493,11 +1098,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 69,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "r^2:  0.8322920653782885\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "from sklearn.metrics import r2_score\n",
+    "y_pred = auto_model.predict(X_train)\n",
+    "print(\"r^2: \", r2_score(y_pred, y_train))"
    ]
   },
   {
@@ -513,11 +1129,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 60,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "r^2:  0.7992202382383298\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "y_test_data = auto_model.predict(X_test)\n",
+    "print(\"r^2: \", r2_score(y_test_data, y_test))"
    ]
   },
   {
@@ -542,11 +1168,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import numpy as np\\nimport pandas as pd\\nfrom sklearn.model_selection import train_test_split'); }\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "X_train09, X_test09, y_train09, y_test09 = train_test_split(features, label, test_size=0.10)"
    ]
   },
   {
@@ -558,11 +1200,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-6 {color: black;background-color: white;}#sk-container-id-6 pre{padding: 0;}#sk-container-id-6 div.sk-toggleable {background-color: white;}#sk-container-id-6 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-6 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-6 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-6 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-6 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-6 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-6 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-6 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-6 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-6 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-6 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-6 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-6 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-6 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-6 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-6 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-6 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-6 div.sk-item {position: relative;z-index: 1;}#sk-container-id-6 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-6 div.sk-item::before, #sk-container-id-6 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-6 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-6 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-6 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-6 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-6 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-6 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-6 div.sk-label-container {text-align: center;}#sk-container-id-6 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-6 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-6\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" checked><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "LinearRegression()"
+      ]
+     },
+     "execution_count": 78,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto_model09 = LinearRegression()\n",
+    "auto_model09.fit(X_train09, y_train09)"
    ]
   },
   {
@@ -574,11 +1232,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 79,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "r^2:  0.7538710984983046\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "y_pred = auto_model09.predict(X_train09)\n",
+    "print(\"r^2: \", r2_score(y_pred, y_train09))"
    ]
   },
   {
@@ -590,11 +1258,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "r^2:  0.8718352161114574\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "y_pred = auto_model09.predict(X_test09)\n",
+    "print(\"r^2: \", r2_score(y_pred, y_test09))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# while the r^2 for the training data actually got worse, the r^2 for the test data improved a lot to 0.87"
    ]
   },
   {
@@ -703,7 +1390,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -717,7 +1404,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,