diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 8a9fa9e..7c8d48f 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,11 +12,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 175, "metadata": {}, "outputs": [], "source": [ - "# Import your libraries:\n" + "# Import your libraries:\n", + "import pandas as pd\n", + "import numpy as np" ] }, { @@ -37,11 +39,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 176, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "from sklearn.datasets import load_diabetes\n", + "diabetes=load_diabetes()" ] }, { @@ -53,11 +57,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 177, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename', 'data_module'])" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes.keys()" ] }, { @@ -73,13 +89,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 178, "metadata": { "scrolled": false }, - "outputs": [], - "source": [ - "# Your code here:\n" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _diabetes_dataset:\n", + "\n", + "Diabetes dataset\n", + "----------------\n", + "\n", + "Ten baseline variables, age, sex, body mass index, average blood\n", + "pressure, and six blood serum measurements were obtained for each of n =\n", + "442 diabetes patients, as well as the response of interest, a\n", + "quantitative measure of disease progression one year after baseline.\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 442\n", + "\n", + " :Number of Attributes: First 10 columns are numeric predictive values\n", + "\n", + " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n", + "\n", + " :Attribute Information:\n", + " - age age in years\n", + " - sex\n", + " - bmi body mass index\n", + " - bp average blood pressure\n", + " - s1 tc, total serum cholesterol\n", + " - s2 ldl, low-density lipoproteins\n", + " - s3 hdl, high-density lipoproteins\n", + " - s4 tch, total cholesterol / HDL\n", + " - s5 ltg, possibly log of serum triglycerides level\n", + " - s6 glu, blood sugar level\n", + "\n", + "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times the square root of `n_samples` (i.e. the sum of squares of each column totals 1).\n", + "\n", + "Source URL:\n", + "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n", + "\n", + "For more information see:\n", + "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n", + "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n", + "\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "print(diabetes['DESCR'])" ] }, { @@ -97,11 +160,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 179, "metadata": {}, "outputs": [], "source": [ - "# Enter your answer here:\n" + "# Enter your answer here:\n", + "#1-> 10, information about the people who took exams\n", + "#2-> diabetes['target'] is a parameter that calculates the progression of the disease after a year\n", + "#3-> 442" ] }, { @@ -115,11 +181,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 180, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n", + "(442,)\n" + ] + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "print(diabetes['data'].shape)\n", + "print(diabetes['target'].shape)" ] }, { @@ -156,11 +233,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 181, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.model_selection import train_test_split" ] }, { @@ -172,11 +251,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 182, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes_model=LinearRegression()" ] }, { @@ -190,11 +270,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 183, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes_data_train=diabetes['data'][:-20]\n", + "diabetes_data_test=diabetes['data'][-20:]\n", + "diabetes_target_train=diabetes['target'][:-20]\n", + "diabetes_target_test=diabetes['target'][-20:]\n", + "\n", + "# diabetes_data_train, diabetes_data_test, diabetes_target_train, diabetes_target_test = train_test_split(diabetes['data'], diabetes['target'], test_size= 20)" ] }, { @@ -206,11 +292,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 184, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Intercept: 152.76429169049118\n", + "Coefficients: [ 3.06094248e-01 -2.37635570e+02 5.10538048e+02 3.27729878e+02\n", + " -8.14111926e+02 4.92799595e+02 1.02841240e+02 1.84603496e+02\n", + " 7.43509388e+02 7.60966464e+01]\n" + ] + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes_model.fit(diabetes_data_train,diabetes_target_train)\n", + "print(\"Intercept:\",diabetes_model.intercept_)\n", + "print(\"Coefficients:\", diabetes_model.coef_)" ] }, { @@ -231,11 +331,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([197.61898486, 155.44031962, 172.88875144, 111.53270645,\n", + " 164.79397301, 131.06765869, 259.12441219, 100.47873746,\n", + " 117.06005372, 124.30261597, 218.36868146, 61.19581944,\n", + " 132.24837933, 120.33293546, 52.54513009, 194.03746764,\n", + " 102.5756431 , 123.56778709, 211.03465323, 52.60221696])" + ] + }, + "execution_count": 185, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "diabetes_model.predict(diabetes_data_test)" ] }, { @@ -247,11 +363,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 186, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([233., 91., 111., 152., 120., 67., 310., 94., 183., 66., 173.,\n", + " 72., 49., 64., 48., 178., 104., 132., 220., 57.])" + ] + }, + "execution_count": 186, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes_target_test" ] }, { @@ -263,11 +392,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 187, "metadata": {}, "outputs": [], "source": [ - "# Your explanation here:\n" + "# Your explanation here:\n", + "#No we need to train the model better" ] }, { @@ -302,11 +432,61 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 188, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: y R-squared: 0.512\n", + "Model: OLS Adj. R-squared: 0.500\n", + "Method: Least Squares F-statistic: 43.16\n", + "Date: Thu, 24 Aug 2023 Prob (F-statistic): 4.65e-58\n", + "Time: 15:54:15 Log-Likelihood: -2281.1\n", + "No. Observations: 422 AIC: 4584.\n", + "Df Residuals: 411 BIC: 4629.\n", + "Df Model: 10 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 152.7643 2.658 57.468 0.000 147.539 157.990\n", + "x1 0.3061 61.286 0.005 0.996 -120.167 120.779\n", + "x2 -237.6356 62.837 -3.782 0.000 -361.158 -114.113\n", + "x3 510.5380 68.156 7.491 0.000 376.561 644.515\n", + "x4 327.7299 66.876 4.901 0.000 196.267 459.192\n", + "x5 -814.1119 424.040 -1.920 0.056 -1647.669 19.445\n", + "x6 492.7996 344.223 1.432 0.153 -183.857 1169.457\n", + "x7 102.8412 219.462 0.469 0.640 -328.566 534.248\n", + "x8 184.6035 167.336 1.103 0.271 -144.338 513.545\n", + "x9 743.5094 175.357 4.240 0.000 398.801 1088.218\n", + "x10 76.0966 68.293 1.114 0.266 -58.151 210.344\n", + "==============================================================================\n", + "Omnibus: 1.544 Durbin-Watson: 2.026\n", + "Prob(Omnibus): 0.462 Jarque-Bera (JB): 1.421\n", + "Skew: 0.004 Prob(JB): 0.491\n", + "Kurtosis: 2.716 Cond. No. 224.\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "import statsmodels.api as sm\n", + "\n", + "diabetes_data_train_const = sm.add_constant(diabetes_data_train)\n", + "\n", + "mod = sm.OLS( diabetes_target_train, diabetes_data_train_const)\n", + "\n", + "res = mod.fit()\n", + "\n", + "print(res.summary())" ] }, { @@ -326,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 189, "metadata": {}, "outputs": [], "source": [ @@ -351,11 +531,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 190, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "auto=pd.read_csv('../auto-mpg.csv')" ] }, { @@ -367,11 +548,124 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 191, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | mpg | \n", + "cylinders | \n", + "displacement | \n", + "horse_power | \n", + "weight | \n", + "acceleration | \n", + "model_year | \n", + "car_name | \n", + "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "18.0 | \n", + "8 | \n", + "307.0 | \n", + "130.0 | \n", + "3504 | \n", + "12.0 | \n", + "70 | \n", + "\\t\"chevrolet chevelle malibu\" | \n", + "
| 1 | \n", + "15.0 | \n", + "8 | \n", + "350.0 | \n", + "165.0 | \n", + "3693 | \n", + "11.5 | \n", + "70 | \n", + "\\t\"buick skylark 320\" | \n", + "
| 2 | \n", + "18.0 | \n", + "8 | \n", + "318.0 | \n", + "150.0 | \n", + "3436 | \n", + "11.0 | \n", + "70 | \n", + "\\t\"plymouth satellite\" | \n", + "
| 3 | \n", + "16.0 | \n", + "8 | \n", + "304.0 | \n", + "150.0 | \n", + "3433 | \n", + "12.0 | \n", + "70 | \n", + "\\t\"amc rebel sst\" | \n", + "
| 4 | \n", + "17.0 | \n", + "8 | \n", + "302.0 | \n", + "140.0 | \n", + "3449 | \n", + "10.5 | \n", + "70 | \n", + "\\t\"ford torino\" | \n", + "
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()