diff --git a/.ipynb_checkpoints/Decision Trees - Titanic-checkpoint.ipynb b/.ipynb_checkpoints/Decision Trees - Titanic-checkpoint.ipynb
new file mode 100644
index 0000000..a3d88b6
--- /dev/null
+++ b/.ipynb_checkpoints/Decision Trees - Titanic-checkpoint.ipynb
@@ -0,0 +1,414 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# You are given data from the Titanic, in data.csv.\n",
+ "\n",
+ "# For this challenge we need to guess whether the individuals from the dataset had survived or not. Use the provided features and either modify, delete or add new features based on existing ones. This is a very core part of being a data scientist.\n",
+ "\n",
+ "# After you have massaged the data into the form that makes you happy, then use a DecisionTreeClassifier from sklearn and try and get the highest accuracy you can get. Try adjusting the depth of the tree to vary accuracy.\n",
+ "\n",
+ "# Finally try to perform cross validation.\n",
+ "\n",
+ "# Note the data-dictionary.txt provides information on the fields in the CSV file.\n",
+ "\n",
+ "# - http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.metrics import confusion_matrix\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.model_selection import cross_val_score\n",
+ "from sklearn import linear_model\n",
+ "from pandas.plotting import scatter_matrix\n",
+ "from itertools import combinations\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv('data.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Braund, Mr. Owen Harris | \n",
+ " male | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A/5 21171 | \n",
+ " 7.2500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
+ " female | \n",
+ " 38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " PC 17599 | \n",
+ " 71.2833 | \n",
+ " C85 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " Heikkinen, Miss. Laina | \n",
+ " female | \n",
+ " 26.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " STON/O2. 3101282 | \n",
+ " 7.9250 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
+ " female | \n",
+ " 35.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 113803 | \n",
+ " 53.1000 | \n",
+ " C123 | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Allen, Mr. William Henry | \n",
+ " male | \n",
+ " 35.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 373450 | \n",
+ " 8.0500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass \\\n",
+ "0 1 0 3 \n",
+ "1 2 1 1 \n",
+ "2 3 1 3 \n",
+ "3 4 1 1 \n",
+ "4 5 0 3 \n",
+ "\n",
+ " Name Sex Age SibSp \\\n",
+ "0 Braund, Mr. Owen Harris male 22.0 1 \n",
+ "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
+ "2 Heikkinen, Miss. Laina female 26.0 0 \n",
+ "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
+ "4 Allen, Mr. William Henry male 35.0 0 \n",
+ "\n",
+ " Parch Ticket Fare Cabin Embarked \n",
+ "0 0 A/5 21171 7.2500 NaN S \n",
+ "1 0 PC 17599 71.2833 C85 C \n",
+ "2 0 STON/O2. 3101282 7.9250 NaN S \n",
+ "3 0 113803 53.1000 C123 S \n",
+ "4 0 373450 8.0500 NaN S "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Fare | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 891.000000 | \n",
+ " 891.000000 | \n",
+ " 891.000000 | \n",
+ " 714.000000 | \n",
+ " 891.000000 | \n",
+ " 891.000000 | \n",
+ " 891.000000 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 446.000000 | \n",
+ " 0.383838 | \n",
+ " 2.308642 | \n",
+ " 29.699118 | \n",
+ " 0.523008 | \n",
+ " 0.381594 | \n",
+ " 32.204208 | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 257.353842 | \n",
+ " 0.486592 | \n",
+ " 0.836071 | \n",
+ " 14.526497 | \n",
+ " 1.102743 | \n",
+ " 0.806057 | \n",
+ " 49.693429 | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 0.420000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 223.500000 | \n",
+ " 0.000000 | \n",
+ " 2.000000 | \n",
+ " 20.125000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 7.910400 | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 446.000000 | \n",
+ " 0.000000 | \n",
+ " 3.000000 | \n",
+ " 28.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 14.454200 | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 668.500000 | \n",
+ " 1.000000 | \n",
+ " 3.000000 | \n",
+ " 38.000000 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 31.000000 | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 891.000000 | \n",
+ " 1.000000 | \n",
+ " 3.000000 | \n",
+ " 80.000000 | \n",
+ " 8.000000 | \n",
+ " 6.000000 | \n",
+ " 512.329200 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass Age SibSp \\\n",
+ "count 891.000000 891.000000 891.000000 714.000000 891.000000 \n",
+ "mean 446.000000 0.383838 2.308642 29.699118 0.523008 \n",
+ "std 257.353842 0.486592 0.836071 14.526497 1.102743 \n",
+ "min 1.000000 0.000000 1.000000 0.420000 0.000000 \n",
+ "25% 223.500000 0.000000 2.000000 20.125000 0.000000 \n",
+ "50% 446.000000 0.000000 3.000000 28.000000 0.000000 \n",
+ "75% 668.500000 1.000000 3.000000 38.000000 1.000000 \n",
+ "max 891.000000 1.000000 3.000000 80.000000 8.000000 \n",
+ "\n",
+ " Parch Fare \n",
+ "count 891.000000 891.000000 \n",
+ "mean 0.381594 32.204208 \n",
+ "std 0.806057 49.693429 \n",
+ "min 0.000000 0.000000 \n",
+ "25% 0.000000 7.910400 \n",
+ "50% 0.000000 14.454200 \n",
+ "75% 0.000000 31.000000 \n",
+ "max 6.000000 512.329200 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 891 entries, 0 to 890\n",
+ "Data columns (total 12 columns):\n",
+ "PassengerId 891 non-null int64\n",
+ "Survived 891 non-null int64\n",
+ "Pclass 891 non-null int64\n",
+ "Name 891 non-null object\n",
+ "Sex 891 non-null object\n",
+ "Age 714 non-null float64\n",
+ "SibSp 891 non-null int64\n",
+ "Parch 891 non-null int64\n",
+ "Ticket 891 non-null object\n",
+ "Fare 891 non-null float64\n",
+ "Cabin 204 non-null object\n",
+ "Embarked 889 non-null object\n",
+ "dtypes: float64(2), int64(5), object(5)\n",
+ "memory usage: 83.6+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Decision Trees - Titanic.ipynb b/Decision Trees - Titanic.ipynb
new file mode 100644
index 0000000..c36a2f0
--- /dev/null
+++ b/Decision Trees - Titanic.ipynb
@@ -0,0 +1,751 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# You are given data from the Titanic, in data.csv.\n",
+ "\n",
+ "# For this challenge we need to guess whether the individuals from the dataset had survived or not. Use the provided features and either modify, delete or add new features based on existing ones. This is a very core part of being a data scientist.\n",
+ "\n",
+ "# After you have massaged the data into the form that makes you happy, then use a DecisionTreeClassifier from sklearn and try and get the highest accuracy you can get. Try adjusting the depth of the tree to vary accuracy.\n",
+ "\n",
+ "# Finally try to perform cross validation.\n",
+ "\n",
+ "# Note the data-dictionary.txt provides information on the fields in the CSV file.\n",
+ "\n",
+ "# - http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from sklearn.datasets import load_iris\n",
+ "from sklearn import tree\n",
+ "from sklearn.model_selection import train_test_split"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv('data.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Braund, Mr. Owen Harris | \n",
+ " male | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A/5 21171 | \n",
+ " 7.2500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
+ " female | \n",
+ " 38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " PC 17599 | \n",
+ " 71.2833 | \n",
+ " C85 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " Heikkinen, Miss. Laina | \n",
+ " female | \n",
+ " 26.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " STON/O2. 3101282 | \n",
+ " 7.9250 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
+ " female | \n",
+ " 35.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 113803 | \n",
+ " 53.1000 | \n",
+ " C123 | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Allen, Mr. William Henry | \n",
+ " male | \n",
+ " 35.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 373450 | \n",
+ " 8.0500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass \\\n",
+ "0 1 0 3 \n",
+ "1 2 1 1 \n",
+ "2 3 1 3 \n",
+ "3 4 1 1 \n",
+ "4 5 0 3 \n",
+ "\n",
+ " Name Sex Age SibSp \\\n",
+ "0 Braund, Mr. Owen Harris male 22.0 1 \n",
+ "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
+ "2 Heikkinen, Miss. Laina female 26.0 0 \n",
+ "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
+ "4 Allen, Mr. William Henry male 35.0 0 \n",
+ "\n",
+ " Parch Ticket Fare Cabin Embarked \n",
+ "0 0 A/5 21171 7.2500 NaN S \n",
+ "1 0 PC 17599 71.2833 C85 C \n",
+ "2 0 STON/O2. 3101282 7.9250 NaN S \n",
+ "3 0 113803 53.1000 C123 S \n",
+ "4 0 373450 8.0500 NaN S "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Fare | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 891.000000 | \n",
+ " 891.000000 | \n",
+ " 891.000000 | \n",
+ " 714.000000 | \n",
+ " 891.000000 | \n",
+ " 891.000000 | \n",
+ " 891.000000 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 446.000000 | \n",
+ " 0.383838 | \n",
+ " 2.308642 | \n",
+ " 29.699118 | \n",
+ " 0.523008 | \n",
+ " 0.381594 | \n",
+ " 32.204208 | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 257.353842 | \n",
+ " 0.486592 | \n",
+ " 0.836071 | \n",
+ " 14.526497 | \n",
+ " 1.102743 | \n",
+ " 0.806057 | \n",
+ " 49.693429 | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 0.420000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 223.500000 | \n",
+ " 0.000000 | \n",
+ " 2.000000 | \n",
+ " 20.125000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 7.910400 | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 446.000000 | \n",
+ " 0.000000 | \n",
+ " 3.000000 | \n",
+ " 28.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 14.454200 | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 668.500000 | \n",
+ " 1.000000 | \n",
+ " 3.000000 | \n",
+ " 38.000000 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 31.000000 | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 891.000000 | \n",
+ " 1.000000 | \n",
+ " 3.000000 | \n",
+ " 80.000000 | \n",
+ " 8.000000 | \n",
+ " 6.000000 | \n",
+ " 512.329200 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass Age SibSp \\\n",
+ "count 891.000000 891.000000 891.000000 714.000000 891.000000 \n",
+ "mean 446.000000 0.383838 2.308642 29.699118 0.523008 \n",
+ "std 257.353842 0.486592 0.836071 14.526497 1.102743 \n",
+ "min 1.000000 0.000000 1.000000 0.420000 0.000000 \n",
+ "25% 223.500000 0.000000 2.000000 20.125000 0.000000 \n",
+ "50% 446.000000 0.000000 3.000000 28.000000 0.000000 \n",
+ "75% 668.500000 1.000000 3.000000 38.000000 1.000000 \n",
+ "max 891.000000 1.000000 3.000000 80.000000 8.000000 \n",
+ "\n",
+ " Parch Fare \n",
+ "count 891.000000 891.000000 \n",
+ "mean 0.381594 32.204208 \n",
+ "std 0.806057 49.693429 \n",
+ "min 0.000000 0.000000 \n",
+ "25% 0.000000 7.910400 \n",
+ "50% 0.000000 14.454200 \n",
+ "75% 0.000000 31.000000 \n",
+ "max 6.000000 512.329200 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 891 entries, 0 to 890\n",
+ "Data columns (total 12 columns):\n",
+ "PassengerId 891 non-null int64\n",
+ "Survived 891 non-null int64\n",
+ "Pclass 891 non-null int64\n",
+ "Name 891 non-null object\n",
+ "Sex 891 non-null object\n",
+ "Age 714 non-null float64\n",
+ "SibSp 891 non-null int64\n",
+ "Parch 891 non-null int64\n",
+ "Ticket 891 non-null object\n",
+ "Fare 891 non-null float64\n",
+ "Cabin 204 non-null object\n",
+ "Embarked 889 non-null object\n",
+ "dtypes: float64(2), int64(5), object(5)\n",
+ "memory usage: 83.6+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X = df[['Pclass', 'Fare', 'SibSp', 'Parch']]\n",
+ "y = df.Survived"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)\n",
+ "model = tree.DecisionTreeClassifier().fit(X_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.84395973154362414"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.score(X_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.68474576271186438"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.score(X_test, y_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "df['Sex'] = df.Sex.map({'male':0, 'female':1})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Braund, Mr. Owen Harris | \n",
+ " 0 | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A/5 21171 | \n",
+ " 7.2500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
+ " 1 | \n",
+ " 38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " PC 17599 | \n",
+ " 71.2833 | \n",
+ " C85 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " Heikkinen, Miss. Laina | \n",
+ " 1 | \n",
+ " 26.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " STON/O2. 3101282 | \n",
+ " 7.9250 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
+ " 1 | \n",
+ " 35.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 113803 | \n",
+ " 53.1000 | \n",
+ " C123 | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Allen, Mr. William Henry | \n",
+ " 0 | \n",
+ " 35.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 373450 | \n",
+ " 8.0500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass \\\n",
+ "0 1 0 3 \n",
+ "1 2 1 1 \n",
+ "2 3 1 3 \n",
+ "3 4 1 1 \n",
+ "4 5 0 3 \n",
+ "\n",
+ " Name Sex Age SibSp Parch \\\n",
+ "0 Braund, Mr. Owen Harris 0 22.0 1 0 \n",
+ "1 Cumings, Mrs. John Bradley (Florence Briggs Th... 1 38.0 1 0 \n",
+ "2 Heikkinen, Miss. Laina 1 26.0 0 0 \n",
+ "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 35.0 1 0 \n",
+ "4 Allen, Mr. William Henry 0 35.0 0 0 \n",
+ "\n",
+ " Ticket Fare Cabin Embarked \n",
+ "0 A/5 21171 7.2500 NaN S \n",
+ "1 PC 17599 71.2833 C85 C \n",
+ "2 STON/O2. 3101282 7.9250 NaN S \n",
+ "3 113803 53.1000 C123 S \n",
+ "4 373450 8.0500 NaN S "
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "X = df[['Pclass', 'Fare', 'SibSp', 'Sex']]\n",
+ "y = df.Survived"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)\n",
+ "model = tree.DecisionTreeClassifier().fit(X_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.9261744966442953"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.score(X_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.78983050847457625"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.score(X_test, y_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "y_hat = model.predict(X_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([152, 23, 39, 81], dtype=int64)"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "confusion_matrix(y_test,y_hat).ravel()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Accuracy : 0.789830508475\n",
+ "Recall : 0.675\n",
+ "Precision : 0.778846153846\n"
+ ]
+ }
+ ],
+ "source": [
+ "TN,FP,FN,TP = confusion_matrix(y_test,y_hat).ravel()\n",
+ "TOTAL = TP + TN + FP + FN\n",
+ "accuracy = (TP + TN) / TOTAL\n",
+ "recall = TP / (TP + FN)\n",
+ "precision = TP / (TP + FP)\n",
+ "print (\"Accuracy:\",accuracy)\n",
+ "print (\"Recall:\", recall)\n",
+ "print (\"Precision:\", precision)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}