diff --git a/.ipynb_checkpoints/work_2018.01.17.1-checkpoint.ipynb b/.ipynb_checkpoints/work_2018.01.17.1-checkpoint.ipynb new file mode 100644 index 0000000..bb39268 --- /dev/null +++ b/.ipynb_checkpoints/work_2018.01.17.1-checkpoint.ipynb @@ -0,0 +1,52 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# import the needed packages\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.model_selection import train_test_split\n", + "from pandas.plotting import scatter_matrix\n", + "from itertools import combinations\n", + "\n", + "# from sklearn.linear_model import LinearRegression\n", + "from sklearn.model_selection import cross_val_score\n", + "%matplotlib inline\n", + "\n", + "# read in the data from csv file\n", + "df = pd.read_csv('loan.na.csv', index_col=0)\n", + "df.info() # handy way to check if all of the fields are present\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/work_2018.01.17.1.ipynb b/work_2018.01.17.1.ipynb new file mode 100644 index 0000000..ed4211c --- /dev/null +++ b/work_2018.01.17.1.ipynb @@ -0,0 +1,673 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 400 entries, 0 to 399\n", + "Data columns (total 4 columns):\n", + "admit 400 non-null int64\n", + "gre 400 non-null int64\n", + "gpa 400 non-null float64\n", + "rank 400 non-null int64\n", + "dtypes: float64(1), int64(3)\n", + "memory usage: 12.6 KB\n" + ] + } + ], + "source": [ + "# import the needed packages\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.model_selection import train_test_split\n", + "from pandas.plotting import scatter_matrix\n", + "from itertools import combinations\n", + "\n", + "# from sklearn.linear_model import LinearRegression\n", + "from sklearn.model_selection import cross_val_score\n", + "%matplotlib inline\n", + "\n", + "# read in the data from csv file\n", + "df = pd.read_csv('grad.csv')\n", + "# df = pd.read_csv('grad.csv', index_col=0)\n", + "df.info() # handy way to check if all of the fields are present\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
admitgregparank
003803.613
116603.673
218004.001
316403.194
405202.934
\n", + "
" + ], + "text/plain": [ + " admit gre gpa rank\n", + "0 0 380 3.61 3\n", + "1 1 660 3.67 3\n", + "2 1 800 4.00 1\n", + "3 1 640 3.19 4\n", + "4 0 520 2.93 4" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# A quick look at the top rows of the data table\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
admitgregparank
count400.000000400.000000400.000000400.00000
mean0.317500587.7000003.3899002.48500
std0.466087115.5165360.3805670.94446
min0.000000220.0000002.2600001.00000
25%0.000000520.0000003.1300002.00000
50%0.000000580.0000003.3950002.00000
75%1.000000660.0000003.6700003.00000
max1.000000800.0000004.0000004.00000
\n", + "
" + ], + "text/plain": [ + " admit gre gpa rank\n", + "count 400.000000 400.000000 400.000000 400.00000\n", + "mean 0.317500 587.700000 3.389900 2.48500\n", + "std 0.466087 115.516536 0.380567 0.94446\n", + "min 0.000000 220.000000 2.260000 1.00000\n", + "25% 0.000000 520.000000 3.130000 2.00000\n", + "50% 0.000000 580.000000 3.395000 2.00000\n", + "75% 1.000000 660.000000 3.670000 3.00000\n", + "max 1.000000 800.000000 4.000000 4.00000" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gpa2.262.422.482.522.552.562.622.632.652.67...3.93.913.923.933.943.953.973.983.994.0
admit
01111111101...21214110215
10100001011...10001401113
\n", + "

2 rows × 132 columns

\n", + "
" + ], + "text/plain": [ + "gpa 2.26 2.42 2.48 2.52 2.55 2.56 2.62 2.63 2.65 2.67 ... 3.90 \\\n", + "admit ... \n", + "0 1 1 1 1 1 1 1 1 0 1 ... 2 \n", + "1 0 1 0 0 0 0 1 0 1 1 ... 1 \n", + "\n", + "gpa 3.91 3.92 3.93 3.94 3.95 3.97 3.98 3.99 4.00 \n", + "admit \n", + "0 1 2 1 4 1 1 0 2 15 \n", + "1 0 0 0 1 4 0 1 1 13 \n", + "\n", + "[2 rows x 132 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df.admit,df.gpa)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gre220300340360380400420440460480...620640660680700720740760780800
admit
0123489781010...1816121117871114
10110020246...1251295344411
\n", + "

2 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + "gre 220 300 340 360 380 400 420 440 460 480 ... 620 640 660 \\\n", + "admit ... \n", + "0 1 2 3 4 8 9 7 8 10 10 ... 18 16 12 \n", + "1 0 1 1 0 0 2 0 2 4 6 ... 12 5 12 \n", + "\n", + "gre 680 700 720 740 760 780 800 \n", + "admit \n", + "0 11 17 8 7 1 1 14 \n", + "1 9 5 3 4 4 4 11 \n", + "\n", + "[2 rows x 26 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.crosstab(df.admit,df.gre)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rank" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}