diff --git a/auto-mpg.csv b/your-code/auto-mpg.csv
similarity index 100%
rename from auto-mpg.csv
rename to your-code/auto-mpg.csv
diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 8a9fa9e..3effe9b 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,11 +12,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "# Import your libraries:\n"
+ "import pandas as pd"
]
},
{
@@ -37,11 +37,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "from sklearn.datasets import load_diabetes"
]
},
{
@@ -53,11 +53,90 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'data': array([[ 0.03807591, 0.05068012, 0.06169621, ..., -0.00259226,\n",
+ " 0.01990749, -0.01764613],\n",
+ " [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n",
+ " -0.06833155, -0.09220405],\n",
+ " [ 0.08529891, 0.05068012, 0.04445121, ..., -0.00259226,\n",
+ " 0.00286131, -0.02593034],\n",
+ " ...,\n",
+ " [ 0.04170844, 0.05068012, -0.01590626, ..., -0.01107952,\n",
+ " -0.04688253, 0.01549073],\n",
+ " [-0.04547248, -0.04464164, 0.03906215, ..., 0.02655962,\n",
+ " 0.04452873, -0.02593034],\n",
+ " [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n",
+ " -0.00422151, 0.00306441]]),\n",
+ " 'target': array([151., 75., 141., 206., 135., 97., 138., 63., 110., 310., 101.,\n",
+ " 69., 179., 185., 118., 171., 166., 144., 97., 168., 68., 49.,\n",
+ " 68., 245., 184., 202., 137., 85., 131., 283., 129., 59., 341.,\n",
+ " 87., 65., 102., 265., 276., 252., 90., 100., 55., 61., 92.,\n",
+ " 259., 53., 190., 142., 75., 142., 155., 225., 59., 104., 182.,\n",
+ " 128., 52., 37., 170., 170., 61., 144., 52., 128., 71., 163.,\n",
+ " 150., 97., 160., 178., 48., 270., 202., 111., 85., 42., 170.,\n",
+ " 200., 252., 113., 143., 51., 52., 210., 65., 141., 55., 134.,\n",
+ " 42., 111., 98., 164., 48., 96., 90., 162., 150., 279., 92.,\n",
+ " 83., 128., 102., 302., 198., 95., 53., 134., 144., 232., 81.,\n",
+ " 104., 59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,\n",
+ " 173., 180., 84., 121., 161., 99., 109., 115., 268., 274., 158.,\n",
+ " 107., 83., 103., 272., 85., 280., 336., 281., 118., 317., 235.,\n",
+ " 60., 174., 259., 178., 128., 96., 126., 288., 88., 292., 71.,\n",
+ " 197., 186., 25., 84., 96., 195., 53., 217., 172., 131., 214.,\n",
+ " 59., 70., 220., 268., 152., 47., 74., 295., 101., 151., 127.,\n",
+ " 237., 225., 81., 151., 107., 64., 138., 185., 265., 101., 137.,\n",
+ " 143., 141., 79., 292., 178., 91., 116., 86., 122., 72., 129.,\n",
+ " 142., 90., 158., 39., 196., 222., 277., 99., 196., 202., 155.,\n",
+ " 77., 191., 70., 73., 49., 65., 263., 248., 296., 214., 185.,\n",
+ " 78., 93., 252., 150., 77., 208., 77., 108., 160., 53., 220.,\n",
+ " 154., 259., 90., 246., 124., 67., 72., 257., 262., 275., 177.,\n",
+ " 71., 47., 187., 125., 78., 51., 258., 215., 303., 243., 91.,\n",
+ " 150., 310., 153., 346., 63., 89., 50., 39., 103., 308., 116.,\n",
+ " 145., 74., 45., 115., 264., 87., 202., 127., 182., 241., 66.,\n",
+ " 94., 283., 64., 102., 200., 265., 94., 230., 181., 156., 233.,\n",
+ " 60., 219., 80., 68., 332., 248., 84., 200., 55., 85., 89.,\n",
+ " 31., 129., 83., 275., 65., 198., 236., 253., 124., 44., 172.,\n",
+ " 114., 142., 109., 180., 144., 163., 147., 97., 220., 190., 109.,\n",
+ " 191., 122., 230., 242., 248., 249., 192., 131., 237., 78., 135.,\n",
+ " 244., 199., 270., 164., 72., 96., 306., 91., 214., 95., 216.,\n",
+ " 263., 178., 113., 200., 139., 139., 88., 148., 88., 243., 71.,\n",
+ " 77., 109., 272., 60., 54., 221., 90., 311., 281., 182., 321.,\n",
+ " 58., 262., 206., 233., 242., 123., 167., 63., 197., 71., 168.,\n",
+ " 140., 217., 121., 235., 245., 40., 52., 104., 132., 88., 69.,\n",
+ " 219., 72., 201., 110., 51., 277., 63., 118., 69., 273., 258.,\n",
+ " 43., 198., 242., 232., 175., 93., 168., 275., 293., 281., 72.,\n",
+ " 140., 189., 181., 209., 136., 261., 113., 131., 174., 257., 55.,\n",
+ " 84., 42., 146., 212., 233., 91., 111., 152., 120., 67., 310.,\n",
+ " 94., 183., 66., 173., 72., 49., 64., 48., 178., 104., 132.,\n",
+ " 220., 57.]),\n",
+ " 'frame': None,\n",
+ " 'DESCR': '.. _diabetes_dataset:\\n\\nDiabetes dataset\\n----------------\\n\\nTen baseline variables, age, sex, body mass index, average blood\\npressure, and six blood serum measurements were obtained for each of n =\\n442 diabetes patients, as well as the response of interest, a\\nquantitative measure of disease progression one year after baseline.\\n\\n**Data Set Characteristics:**\\n\\n :Number of Instances: 442\\n\\n :Number of Attributes: First 10 columns are numeric predictive values\\n\\n :Target: Column 11 is a quantitative measure of disease progression one year after baseline\\n\\n :Attribute Information:\\n - age age in years\\n - sex\\n - bmi body mass index\\n - bp average blood pressure\\n - s1 tc, total serum cholesterol\\n - s2 ldl, low-density lipoproteins\\n - s3 hdl, high-density lipoproteins\\n - s4 tch, total cholesterol / HDL\\n - s5 ltg, possibly log of serum triglycerides level\\n - s6 glu, blood sugar level\\n\\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times the square root of `n_samples` (i.e. the sum of squares of each column totals 1).\\n\\nSource URL:\\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\\n\\nFor more information see:\\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\\n',\n",
+ " 'feature_names': ['age',\n",
+ " 'sex',\n",
+ " 'bmi',\n",
+ " 'bp',\n",
+ " 's1',\n",
+ " 's2',\n",
+ " 's3',\n",
+ " 's4',\n",
+ " 's5',\n",
+ " 's6'],\n",
+ " 'data_filename': 'diabetes_data_raw.csv.gz',\n",
+ " 'target_filename': 'diabetes_target.csv.gz',\n",
+ " 'data_module': 'sklearn.datasets.data'}"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "load_diabetes()"
]
},
{
@@ -73,13 +152,59 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {
"scrolled": false
},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ".. _diabetes_dataset:\n",
+ "\n",
+ "Diabetes dataset\n",
+ "----------------\n",
+ "\n",
+ "Ten baseline variables, age, sex, body mass index, average blood\n",
+ "pressure, and six blood serum measurements were obtained for each of n =\n",
+ "442 diabetes patients, as well as the response of interest, a\n",
+ "quantitative measure of disease progression one year after baseline.\n",
+ "\n",
+ "**Data Set Characteristics:**\n",
+ "\n",
+ " :Number of Instances: 442\n",
+ "\n",
+ " :Number of Attributes: First 10 columns are numeric predictive values\n",
+ "\n",
+ " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
+ "\n",
+ " :Attribute Information:\n",
+ " - age age in years\n",
+ " - sex\n",
+ " - bmi body mass index\n",
+ " - bp average blood pressure\n",
+ " - s1 tc, total serum cholesterol\n",
+ " - s2 ldl, low-density lipoproteins\n",
+ " - s3 hdl, high-density lipoproteins\n",
+ " - s4 tch, total cholesterol / HDL\n",
+ " - s5 ltg, possibly log of serum triglycerides level\n",
+ " - s6 glu, blood sugar level\n",
+ "\n",
+ "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times the square root of `n_samples` (i.e. the sum of squares of each column totals 1).\n",
+ "\n",
+ "Source URL:\n",
+ "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
+ "\n",
+ "For more information see:\n",
+ "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
+ "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(load_diabetes()['DESCR'])"
]
},
{
@@ -97,11 +222,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
- "# Enter your answer here:\n"
+ "# 10 attributes\n",
+ "# 'data' are the features we'll use to predict values like 'target'\n",
+ "# 442 records"
]
},
{
@@ -115,11 +242,45 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(442, 10)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "diabetes = load_diabetes()\n",
+ "diabetes['data'] = pd.DataFrame(load_diabetes()['data'], columns = load_diabetes()['feature_names'])\n",
+ "diabetes['data'].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(442, 1)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "diabetes['target'] = pd.DataFrame(load_diabetes()['target'], columns = ['target'])\n",
+ "diabetes['target'].shape"
]
},
{
@@ -156,11 +317,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "from sklearn.linear_model import LinearRegression"
]
},
{
@@ -172,11 +333,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "diabetes_model = LinearRegression()"
]
},
{
@@ -190,11 +351,17 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# 20/442 = 5% sample size for test\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "features = diabetes['data']\n",
+ "target = diabetes['target']\n",
+ "\n",
+ "x_train, x_test, y_train, y_test= train_test_split(features, target, random_state = 0, test_size = 0.05)"
]
},
{
@@ -206,11 +373,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
+ ],
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "diabetes_model.fit(x_train, y_train)"
]
},
{
@@ -231,11 +412,51 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[152.29381843]\n",
+ "[[ -31.7605229 -252.18207172 506.10080931 354.47865753 -738.54800934\n",
+ " 437.84133511 70.77548804 178.0337923 718.26704056 77.64177897]]\n",
+ "[[235.21250016]\n",
+ " [246.65791356]\n",
+ " [164.58380407]\n",
+ " [119.92994211]\n",
+ " [189.79266424]\n",
+ " [250.63216638]\n",
+ " [114.88799329]\n",
+ " [188.32456819]\n",
+ " [143.43371148]\n",
+ " [228.33472904]\n",
+ " [165.94552439]\n",
+ " [177.11682099]\n",
+ " [104.83770607]\n",
+ " [ 86.90065901]\n",
+ " [244.35550346]\n",
+ " [ 88.25595435]\n",
+ " [158.81322019]\n",
+ " [ 68.07326136]\n",
+ " [101.33827656]\n",
+ " [225.9101069 ]\n",
+ " [193.22405692]\n",
+ " [155.67587861]\n",
+ " [162.63078436]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "interc = diabetes_model.intercept_\n",
+ "print(interc)\n",
+ "\n",
+ "coef = diabetes_model.coef_\n",
+ "print(coef)\n",
+ "\n",
+ "pred = diabetes_model.predict(x_test)\n",
+ "print(pred)"
]
},
{
@@ -247,11 +468,166 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 13,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 362 | \n",
+ " 321.0 | \n",
+ "
\n",
+ " \n",
+ " | 249 | \n",
+ " 215.0 | \n",
+ "
\n",
+ " \n",
+ " | 271 | \n",
+ " 127.0 | \n",
+ "
\n",
+ " \n",
+ " | 435 | \n",
+ " 64.0 | \n",
+ "
\n",
+ " \n",
+ " | 400 | \n",
+ " 175.0 | \n",
+ "
\n",
+ " \n",
+ " | 403 | \n",
+ " 275.0 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 179.0 | \n",
+ "
\n",
+ " \n",
+ " | 399 | \n",
+ " 232.0 | \n",
+ "
\n",
+ " \n",
+ " | 198 | \n",
+ " 142.0 | \n",
+ "
\n",
+ " \n",
+ " | 205 | \n",
+ " 99.0 | \n",
+ "
\n",
+ " \n",
+ " | 78 | \n",
+ " 252.0 | \n",
+ "
\n",
+ " \n",
+ " | 144 | \n",
+ " 174.0 | \n",
+ "
\n",
+ " \n",
+ " | 298 | \n",
+ " 129.0 | \n",
+ "
\n",
+ " \n",
+ " | 171 | \n",
+ " 74.0 | \n",
+ "
\n",
+ " \n",
+ " | 268 | \n",
+ " 264.0 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 49.0 | \n",
+ "
\n",
+ " \n",
+ " | 194 | \n",
+ " 86.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 75.0 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 101.0 | \n",
+ "
\n",
+ " \n",
+ " | 208 | \n",
+ " 155.0 | \n",
+ "
\n",
+ " \n",
+ " | 76 | \n",
+ " 170.0 | \n",
+ "
\n",
+ " \n",
+ " | 37 | \n",
+ " 276.0 | \n",
+ "
\n",
+ " \n",
+ " | 388 | \n",
+ " 110.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " target\n",
+ "362 321.0\n",
+ "249 215.0\n",
+ "271 127.0\n",
+ "435 64.0\n",
+ "400 175.0\n",
+ "403 275.0\n",
+ "12 179.0\n",
+ "399 232.0\n",
+ "198 142.0\n",
+ "205 99.0\n",
+ "78 252.0\n",
+ "144 174.0\n",
+ "298 129.0\n",
+ "171 74.0\n",
+ "268 264.0\n",
+ "21 49.0\n",
+ "194 86.0\n",
+ "1 75.0\n",
+ "10 101.0\n",
+ "208 155.0\n",
+ "76 170.0\n",
+ "37 276.0\n",
+ "388 110.0"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_test"
]
},
{
@@ -263,11 +639,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "target 0.984705\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your explanation here:\n"
+ "# pretty much the same. look at the mean % difference. if y_test = pred it would be 1\n",
+ "(y_test/pred).mean()\n"
]
},
{
@@ -302,7 +691,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@@ -326,7 +715,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
@@ -351,11 +740,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "auto = pd.read_csv('auto-mpg.csv')"
]
},
{
@@ -367,11 +756,123 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mpg | \n",
+ " cylinders | \n",
+ " displacement | \n",
+ " horse_power | \n",
+ " weight | \n",
+ " acceleration | \n",
+ " model_year | \n",
+ " car_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18.0 | \n",
+ " 8 | \n",
+ " 307.0 | \n",
+ " 130.0 | \n",
+ " 3504 | \n",
+ " 12.0 | \n",
+ " 70 | \n",
+ " \\t\"chevrolet chevelle malibu\" | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 15.0 | \n",
+ " 8 | \n",
+ " 350.0 | \n",
+ " 165.0 | \n",
+ " 3693 | \n",
+ " 11.5 | \n",
+ " 70 | \n",
+ " \\t\"buick skylark 320\" | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18.0 | \n",
+ " 8 | \n",
+ " 318.0 | \n",
+ " 150.0 | \n",
+ " 3436 | \n",
+ " 11.0 | \n",
+ " 70 | \n",
+ " \\t\"plymouth satellite\" | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 16.0 | \n",
+ " 8 | \n",
+ " 304.0 | \n",
+ " 150.0 | \n",
+ " 3433 | \n",
+ " 12.0 | \n",
+ " 70 | \n",
+ " \\t\"amc rebel sst\" | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 17.0 | \n",
+ " 8 | \n",
+ " 302.0 | \n",
+ " 140.0 | \n",
+ " 3449 | \n",
+ " 10.5 | \n",
+ " 70 | \n",
+ " \\t\"ford torino\" | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mpg cylinders displacement horse_power weight acceleration \\\n",
+ "0 18.0 8 307.0 130.0 3504 12.0 \n",
+ "1 15.0 8 350.0 165.0 3693 11.5 \n",
+ "2 18.0 8 318.0 150.0 3436 11.0 \n",
+ "3 16.0 8 304.0 150.0 3433 12.0 \n",
+ "4 17.0 8 302.0 140.0 3449 10.5 \n",
+ "\n",
+ " model_year car_name \n",
+ "0 70 \\t\"chevrolet chevelle malibu\" \n",
+ "1 70 \\t\"buick skylark 320\" \n",
+ "2 70 \\t\"plymouth satellite\" \n",
+ "3 70 \\t\"amc rebel sst\" \n",
+ "4 70 \\t\"ford torino\" "
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "auto.head()"
]
},
{
@@ -383,11 +884,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "mpg float64\n",
+ "cylinders int64\n",
+ "displacement float64\n",
+ "horse_power float64\n",
+ "weight int64\n",
+ "acceleration float64\n",
+ "model_year int64\n",
+ "car_name object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "auto.dtypes\n",
+ "#all types ok"
]
},
{
@@ -399,11 +920,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "oldest model year: 70\n",
+ "newest model year: 82\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "print('oldest model year: ', auto['model_year'].unique()[0])\n",
+ "print('newest model year: ', auto['model_year'].unique()[-1])\n"
]
},
{
@@ -415,11 +946,67 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "mpg 0\n",
+ "cylinders 0\n",
+ "displacement 0\n",
+ "horse_power 6\n",
+ "weight 0\n",
+ "acceleration 0\n",
+ "model_year 0\n",
+ "car_name 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "auto.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "auto = auto.dropna()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "mpg 0\n",
+ "cylinders 0\n",
+ "displacement 0\n",
+ "horse_power 0\n",
+ "weight 0\n",
+ "acceleration 0\n",
+ "model_year 0\n",
+ "car_name 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "auto.isna().sum()"
]
},
{
@@ -431,11 +1018,45 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "cylinders\n",
+ "4 199\n",
+ "8 103\n",
+ "6 83\n",
+ "3 4\n",
+ "5 3\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "auto['cylinders'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "there are 5 values for cylinders\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "print('there are', auto['cylinders'].nunique(), 'values for cylinders')"
]
},
{
@@ -451,11 +1072,16 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "auto.drop(['car_name'], axis = 1, inplace = True)\n",
+ "\n",
+ "features = auto.drop('mpg',axis=1)\n",
+ "target = auto['mpg']\n",
+ "\n",
+ "x_train, x_test, y_train, y_test = train_test_split(features,target,random_state = 1, test_size = 0.2)"
]
},
{
@@ -469,11 +1095,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
+ ],
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import LinearRegression\n",
+ "\n",
+ "automodel=LinearRegression()\n",
+ "automodel.fit(x_train,y_train)"
]
},
{
@@ -493,11 +1136,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.8144902542572598\n",
+ "0.8144902542572598\n"
+ ]
+ }
+ ],
+ "source": [
+ "#also can be done as\n",
+ "\n",
+ "print(automodel.score(x_test,y_test))\n",
+ "\n",
+ "#but\n",
+ "from sklearn.metrics import r2_score\n",
+ "y_pred = automodel.predict(x_test)\n",
+ "print(r2_score(y_test, y_pred))"
]
},
{
@@ -513,11 +1172,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 44,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.805894855213757\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "print(automodel.score(x_train,y_train))"
]
},
{
@@ -542,11 +1209,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "x_train2, x_test2, y_train2, y_test2 = train_test_split(features,target, random_state = 0, test_size = 0.1)"
]
},
{
@@ -558,11 +1225,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 48,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
+ ],
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "automodel09=LinearRegression()\n",
+ "automodel09.fit(x_train,y_train)"
]
},
{
@@ -574,11 +1256,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 49,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.7940489105129045\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "y_pred = automodel.predict(x_test2)\n",
+ "print(r2_score(y_test2, y_pred))"
]
},
{
@@ -590,7 +1281,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@@ -610,7 +1301,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
@@ -626,7 +1317,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
@@ -642,7 +1333,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
@@ -660,7 +1351,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
@@ -676,7 +1367,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
@@ -703,7 +1394,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -717,7 +1408,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.9"
+ "version": "3.10.9"
}
},
"nbformat": 4,