diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 8a9fa9e..7c8d48f 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,11 +12,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 175, "metadata": {}, "outputs": [], "source": [ - "# Import your libraries:\n" + "# Import your libraries:\n", + "import pandas as pd\n", + "import numpy as np" ] }, { @@ -37,11 +39,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 176, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "from sklearn.datasets import load_diabetes\n", + "diabetes=load_diabetes()" ] }, { @@ -53,11 +57,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 177, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename', 'data_module'])" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes.keys()" ] }, { @@ -73,13 +89,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 178, "metadata": { "scrolled": false }, - "outputs": [], - "source": [ - "# Your code here:\n" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _diabetes_dataset:\n", + "\n", + "Diabetes dataset\n", + "----------------\n", + "\n", + "Ten baseline variables, age, sex, body mass index, average blood\n", + "pressure, and six blood serum measurements were obtained for each of n =\n", + "442 diabetes patients, as well as the response of interest, a\n", + "quantitative measure of disease progression one year after baseline.\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 442\n", + "\n", + " :Number of Attributes: First 10 columns are numeric predictive values\n", + "\n", + " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n", + "\n", + " :Attribute Information:\n", + " - age age in years\n", + " - sex\n", + " - bmi body mass index\n", + " - bp average blood pressure\n", + " - s1 tc, total serum cholesterol\n", + " - s2 ldl, low-density lipoproteins\n", + " - s3 hdl, high-density lipoproteins\n", + " - s4 tch, total cholesterol / HDL\n", + " - s5 ltg, possibly log of serum triglycerides level\n", + " - s6 glu, blood sugar level\n", + "\n", + "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times the square root of `n_samples` (i.e. the sum of squares of each column totals 1).\n", + "\n", + "Source URL:\n", + "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n", + "\n", + "For more information see:\n", + "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n", + "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n", + "\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "print(diabetes['DESCR'])" ] }, { @@ -97,11 +160,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 179, "metadata": {}, "outputs": [], "source": [ - "# Enter your answer here:\n" + "# Enter your answer here:\n", + "#1-> 10, information about the people who took exams\n", + "#2-> diabetes['target'] is a parameter that calculates the progression of the disease after a year\n", + "#3-> 442" ] }, { @@ -115,11 +181,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 180, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "(442,)\n" + ] + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "print(diabetes['data'].shape)\n", + "print(diabetes['target'].shape)" ] }, { @@ -156,11 +233,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 181, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.model_selection import train_test_split" ] }, { @@ -172,11 +251,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 182, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes_model=LinearRegression()" ] }, { @@ -190,11 +270,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 183, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes_data_train=diabetes['data'][:-20]\n", + "diabetes_data_test=diabetes['data'][-20:]\n", + "diabetes_target_train=diabetes['target'][:-20]\n", + "diabetes_target_test=diabetes['target'][-20:]\n", + "\n", + "# diabetes_data_train, diabetes_data_test, diabetes_target_train, diabetes_target_test = train_test_split(diabetes['data'], diabetes['target'], test_size= 20)" ] }, { @@ -206,11 +292,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 184, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Intercept: 152.76429169049118\n", + "Coefficients: [ 3.06094248e-01 -2.37635570e+02 5.10538048e+02 3.27729878e+02\n", + " -8.14111926e+02 4.92799595e+02 1.02841240e+02 1.84603496e+02\n", + " 7.43509388e+02 7.60966464e+01]\n" + ] + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes_model.fit(diabetes_data_train,diabetes_target_train)\n", + "print(\"Intercept:\",diabetes_model.intercept_)\n", + "print(\"Coefficients:\", diabetes_model.coef_)" ] }, { @@ -231,11 +331,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([197.61898486, 155.44031962, 172.88875144, 111.53270645,\n", + " 164.79397301, 131.06765869, 259.12441219, 100.47873746,\n", + " 117.06005372, 124.30261597, 218.36868146, 61.19581944,\n", + " 132.24837933, 120.33293546, 52.54513009, 194.03746764,\n", + " 102.5756431 , 123.56778709, 211.03465323, 52.60221696])" + ] + }, + "execution_count": 185, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "diabetes_model.predict(diabetes_data_test)" ] }, { @@ -247,11 +363,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 186, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([233., 91., 111., 152., 120., 67., 310., 94., 183., 66., 173.,\n", + " 72., 49., 64., 48., 178., 104., 132., 220., 57.])" + ] + }, + "execution_count": 186, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes_target_test" ] }, { @@ -263,11 +392,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 187, "metadata": {}, "outputs": [], "source": [ - "# Your explanation here:\n" + "# Your explanation here:\n", + "#No we need to train the model better" ] }, { @@ -302,11 +432,61 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 188, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: y R-squared: 0.512\n", + "Model: OLS Adj. R-squared: 0.500\n", + "Method: Least Squares F-statistic: 43.16\n", + "Date: Thu, 24 Aug 2023 Prob (F-statistic): 4.65e-58\n", + "Time: 15:54:15 Log-Likelihood: -2281.1\n", + "No. Observations: 422 AIC: 4584.\n", + "Df Residuals: 411 BIC: 4629.\n", + "Df Model: 10 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 152.7643 2.658 57.468 0.000 147.539 157.990\n", + "x1 0.3061 61.286 0.005 0.996 -120.167 120.779\n", + "x2 -237.6356 62.837 -3.782 0.000 -361.158 -114.113\n", + "x3 510.5380 68.156 7.491 0.000 376.561 644.515\n", + "x4 327.7299 66.876 4.901 0.000 196.267 459.192\n", + "x5 -814.1119 424.040 -1.920 0.056 -1647.669 19.445\n", + "x6 492.7996 344.223 1.432 0.153 -183.857 1169.457\n", + "x7 102.8412 219.462 0.469 0.640 -328.566 534.248\n", + "x8 184.6035 167.336 1.103 0.271 -144.338 513.545\n", + "x9 743.5094 175.357 4.240 0.000 398.801 1088.218\n", + "x10 76.0966 68.293 1.114 0.266 -58.151 210.344\n", + "==============================================================================\n", + "Omnibus: 1.544 Durbin-Watson: 2.026\n", + "Prob(Omnibus): 0.462 Jarque-Bera (JB): 1.421\n", + "Skew: 0.004 Prob(JB): 0.491\n", + "Kurtosis: 2.716 Cond. No. 224.\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "import statsmodels.api as sm\n", + "\n", + "diabetes_data_train_const = sm.add_constant(diabetes_data_train)\n", + "\n", + "mod = sm.OLS( diabetes_target_train, diabetes_data_train_const)\n", + "\n", + "res = mod.fit()\n", + "\n", + "print(res.summary())" ] }, { @@ -326,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 189, "metadata": {}, "outputs": [], "source": [ @@ -351,11 +531,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 190, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "auto=pd.read_csv('../auto-mpg.csv')" ] }, { @@ -367,11 +548,124 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 191, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mpgcylindersdisplacementhorse_powerweightaccelerationmodel_yearcar_name
018.08307.0130.0350412.070\\t\"chevrolet chevelle malibu\"
115.08350.0165.0369311.570\\t\"buick skylark 320\"
218.08318.0150.0343611.070\\t\"plymouth satellite\"
316.08304.0150.0343312.070\\t\"amc rebel sst\"
417.08302.0140.0344910.570\\t\"ford torino\"
\n", + "
" + ], + "text/plain": [ + " mpg cylinders displacement horse_power weight acceleration \\\n", + "0 18.0 8 307.0 130.0 3504 12.0 \n", + "1 15.0 8 350.0 165.0 3693 11.5 \n", + "2 18.0 8 318.0 150.0 3436 11.0 \n", + "3 16.0 8 304.0 150.0 3433 12.0 \n", + "4 17.0 8 302.0 140.0 3449 10.5 \n", + "\n", + " model_year car_name \n", + "0 70 \\t\"chevrolet chevelle malibu\" \n", + "1 70 \\t\"buick skylark 320\" \n", + "2 70 \\t\"plymouth satellite\" \n", + "3 70 \\t\"amc rebel sst\" \n", + "4 70 \\t\"ford torino\" " + ] + }, + "execution_count": 191, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "auto.head()" ] }, { @@ -383,11 +677,34 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 192, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 398 entries, 0 to 397\n", + "Data columns (total 8 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 mpg 398 non-null float64\n", + " 1 cylinders 398 non-null int64 \n", + " 2 displacement 398 non-null float64\n", + " 3 horse_power 392 non-null float64\n", + " 4 weight 398 non-null int64 \n", + " 5 acceleration 398 non-null float64\n", + " 6 model_year 398 non-null int64 \n", + " 7 car_name 398 non-null object \n", + "dtypes: float64(4), int64(3), object(1)\n", + "memory usage: 25.0+ KB\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "auto.info()" ] }, { @@ -399,11 +716,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 193, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "70" + ] + }, + "execution_count": 193, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "auto['model_year'].min()" + ] + }, + { + "cell_type": "code", + "execution_count": 194, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The oldest model year is 70\n", + "The newest model year is 82\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "print(f'''The oldest model year is {auto['model_year'].min()}\n", + "The newest model year is {auto['model_year'].max()}''')" ] }, { @@ -415,11 +763,32 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 195, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mpg 0\n", + "cylinders 0\n", + "displacement 0\n", + "horse_power 0\n", + "weight 0\n", + "acceleration 0\n", + "model_year 0\n", + "car_name 0\n", + "dtype: int64" + ] + }, + "execution_count": 195, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "auto.dropna(inplace=True)\n", + "auto.isna().sum()\n" ] }, { @@ -431,11 +800,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 196, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cylinders\n", + "4 199\n", + "8 103\n", + "6 83\n", + "3 4\n", + "5 3\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 196, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "auto['cylinders'].value_counts()\n", + "#there are 5 cylinders values possible" ] }, { @@ -451,11 +839,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 197, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "auto.drop('car_name',axis=1,inplace=True)\n", + "feature=auto.drop('mpg',axis=1)\n", + "target=auto['mpg']\n", + "X_train,X_test,y_train,y_test=train_test_split(feature,target,test_size=0.2)\n" ] }, { @@ -469,11 +861,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 216, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 216, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "auto_model=LinearRegression()\n", + "auto_model.fit(X_train,y_train)\n" ] }, { @@ -493,11 +901,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 220, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.815045703503711" + ] + }, + "execution_count": 220, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "from sklearn.metrics import r2_score\n", + "y_pred=auto_model.predict(X_train)\n", + "r2_score(y_train,y_pred)" ] }, { @@ -513,11 +935,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 221, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.7754354118636195" + ] + }, + "execution_count": 221, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "y_test_pred = model.predict(X_test)\n", + "r2_score(y_test,y_test_pred)" ] }, { @@ -542,11 +977,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 222, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "X_train09,X_test09,y_train09,y_test09=train_test_split(feature,target,test_size=0.1)" ] }, { @@ -558,11 +994,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 223, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 223, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "auto_model09=LinearRegression()\n", + "auto_model09.fit(X_train09,y_train09)" ] }, { @@ -574,11 +1026,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 226, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.8080536802305803" + ] + }, + "execution_count": 226, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "y_pred09=auto_model.predict(X_train09)\n", + "r2_score(y_train09,y_pred09)" ] }, { @@ -590,11 +1055,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 227, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.8135694691090268" + ] + }, + "execution_count": 227, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "y_test_pred09 = model.predict(X_test09)\n", + "r2_score(y_test09,y_test_pred09)" ] }, { @@ -610,7 +1088,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 205, "metadata": {}, "outputs": [], "source": [ @@ -626,7 +1104,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 206, "metadata": {}, "outputs": [], "source": [ @@ -642,7 +1120,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 207, "metadata": {}, "outputs": [], "source": [ @@ -660,7 +1138,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 208, "metadata": {}, "outputs": [], "source": [ @@ -676,7 +1154,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 209, "metadata": {}, "outputs": [], "source": [ @@ -717,7 +1195,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.11.4" } }, "nbformat": 4,