diff --git a/Titanicsurvivors-JLC.ipynb b/Titanicsurvivors-JLC.ipynb new file mode 100644 index 0000000..6e2206a --- /dev/null +++ b/Titanicsurvivors-JLC.ipynb @@ -0,0 +1,6113 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2018-01-24-morning\n", + "\n", + "### Decision Trees\n", + "\n", + "You are given data from the Titanic, in `data.csv`.\n", + "\n", + "For this challenge we need to guess whether the individuals from the dataset had survived or not. Use the provided features and either modify, delete or add new features based on existing ones. This is a very core part of being a data scientist.\n", + "\n", + "After you have massaged the data into the form that makes you happy, then use a DecisionTreeClassifier from sklearn and try and get the highest accuracy you can get. Try adjusting the depth of the tree to vary accuracy.\n", + "\n", + "Finally try to perform cross validation.\n", + "\n", + "*Note* the `data-dictionary.txt` provides information on the fields in the CSV file.\n", + "\n", + "### Pull Request\n", + "\n", + "Send me a pull request after analysis is complete.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn import linear_model\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.model_selection import train_test_split\n", + "from pandas.plotting import scatter_matrix\n", + "from itertools import combinations\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn import tree\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
5603Moran, Mr. JamesmaleNaN003308778.4583NaNQ
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S
7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNS
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.1333NaNS
91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.0708NaNC
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S
111211Bonnell, Miss. Elizabethfemale58.00011378326.5500C103S
121303Saundercock, Mr. William Henrymale20.000A/5. 21518.0500NaNS
131403Andersson, Mr. Anders Johanmale39.01534708231.2750NaNS
141503Vestrom, Miss. Hulda Amanda Adolfinafemale14.0003504067.8542NaNS
151612Hewlett, Mrs. (Mary D Kingcome)female55.00024870616.0000NaNS
161703Rice, Master. Eugenemale2.04138265229.1250NaNQ
171812Williams, Mr. Charles EugenemaleNaN0024437313.0000NaNS
181903Vander Planke, Mrs. Julius (Emelia Maria Vande...female31.01034576318.0000NaNS
192013Masselmani, Mrs. FatimafemaleNaN0026497.2250NaNC
202102Fynney, Mr. Joseph Jmale35.00023986526.0000NaNS
212212Beesley, Mr. Lawrencemale34.00024869813.0000D56S
222313McGowan, Miss. Anna \"Annie\"female15.0003309238.0292NaNQ
232411Sloper, Mr. William Thompsonmale28.00011378835.5000A6S
242503Palsson, Miss. Torborg Danirafemale8.03134990921.0750NaNS
252613Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...female38.01534707731.3875NaNS
262703Emir, Mr. Farred ChehabmaleNaN0026317.2250NaNC
272801Fortune, Mr. Charles Alexandermale19.03219950263.0000C23 C25 C27S
282913O'Dwyer, Miss. Ellen \"Nellie\"femaleNaN003309597.8792NaNQ
293003Todoroff, Mr. LaliomaleNaN003492167.8958NaNS
.......................................
86186202Giles, Mr. Frederick Edwardmale21.0102813411.5000NaNS
86286311Swift, Mrs. Frederick Joel (Margaret Welles Ba...female48.0001746625.9292D17S
86386403Sage, Miss. Dorothy Edith \"Dolly\"femaleNaN82CA. 234369.5500NaNS
86486502Gill, Mr. John Williammale24.00023386613.0000NaNS
86586612Bystrom, Mrs. (Karolina)female42.00023685213.0000NaNS
86686712Duran y More, Miss. Asuncionfemale27.010SC/PARIS 214913.8583NaNC
86786801Roebling, Mr. Washington Augustus IImale31.000PC 1759050.4958A24S
86886903van Melkebeke, Mr. PhilemonmaleNaN003457779.5000NaNS
86987013Johnson, Master. Harold Theodormale4.01134774211.1333NaNS
87087103Balkic, Mr. Cerinmale26.0003492487.8958NaNS
87187211Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.0111175152.5542D35S
87287301Carlsson, Mr. Frans Olofmale33.0006955.0000B51 B53 B55S
87387403Vander Cruyssen, Mr. Victormale47.0003457659.0000NaNS
87487512Abelson, Mrs. Samuel (Hannah Wizosky)female28.010P/PP 338124.0000NaNC
87587613Najib, Miss. Adele Kiamie \"Jane\"female15.00026677.2250NaNC
87687703Gustafsson, Mr. Alfred Ossianmale20.00075349.8458NaNS
87787803Petroff, Mr. Nedeliomale19.0003492127.8958NaNS
87887903Laleff, Mr. KristomaleNaN003492177.8958NaNS
87988011Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.0011176783.1583C50C
88088112Shelley, Mrs. William (Imanita Parrish Hall)female25.00123043326.0000NaNS
88188203Markun, Mr. Johannmale33.0003492577.8958NaNS
88288303Dahlberg, Miss. Gerda Ulrikafemale22.000755210.5167NaNS
88388402Banfield, Mr. Frederick Jamesmale28.000C.A./SOTON 3406810.5000NaNS
88488503Sutehall, Mr. Henry Jrmale25.000SOTON/OQ 3920767.0500NaNS
88588603Rice, Mrs. William (Margaret Norton)female39.00538265229.1250NaNQ
88688702Montvila, Rev. Juozasmale27.00021153613.0000NaNS
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S
88888903Johnston, Miss. Catherine Helen \"Carrie\"femaleNaN12W./C. 660723.4500NaNS
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C
89089103Dooley, Mr. Patrickmale32.0003703767.7500NaNQ
\n", + "

891 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "5 6 0 3 \n", + "6 7 0 1 \n", + "7 8 0 3 \n", + "8 9 1 3 \n", + "9 10 1 2 \n", + "10 11 1 3 \n", + "11 12 1 1 \n", + "12 13 0 3 \n", + "13 14 0 3 \n", + "14 15 0 3 \n", + "15 16 1 2 \n", + "16 17 0 3 \n", + "17 18 1 2 \n", + "18 19 0 3 \n", + "19 20 1 3 \n", + "20 21 0 2 \n", + "21 22 1 2 \n", + "22 23 1 3 \n", + "23 24 1 1 \n", + "24 25 0 3 \n", + "25 26 1 3 \n", + "26 27 0 3 \n", + "27 28 0 1 \n", + "28 29 1 3 \n", + "29 30 0 3 \n", + ".. ... ... ... \n", + "861 862 0 2 \n", + "862 863 1 1 \n", + "863 864 0 3 \n", + "864 865 0 2 \n", + "865 866 1 2 \n", + "866 867 1 2 \n", + "867 868 0 1 \n", + "868 869 0 3 \n", + "869 870 1 3 \n", + "870 871 0 3 \n", + "871 872 1 1 \n", + "872 873 0 1 \n", + "873 874 0 3 \n", + "874 875 1 2 \n", + "875 876 1 3 \n", + "876 877 0 3 \n", + "877 878 0 3 \n", + "878 879 0 3 \n", + "879 880 1 1 \n", + "880 881 1 2 \n", + "881 882 0 3 \n", + "882 883 0 3 \n", + "883 884 0 2 \n", + "884 885 0 3 \n", + "885 886 0 3 \n", + "886 887 0 2 \n", + "887 888 1 1 \n", + "888 889 0 3 \n", + "889 890 1 1 \n", + "890 891 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "5 Moran, Mr. James male NaN 0 \n", + "6 McCarthy, Mr. Timothy J male 54.0 0 \n", + "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", + "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", + "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", + "10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n", + "11 Bonnell, Miss. Elizabeth female 58.0 0 \n", + "12 Saundercock, Mr. William Henry male 20.0 0 \n", + "13 Andersson, Mr. Anders Johan male 39.0 1 \n", + "14 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 \n", + "15 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 \n", + "16 Rice, Master. Eugene male 2.0 4 \n", + "17 Williams, Mr. Charles Eugene male NaN 0 \n", + "18 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 \n", + "19 Masselmani, Mrs. Fatima female NaN 0 \n", + "20 Fynney, Mr. Joseph J male 35.0 0 \n", + "21 Beesley, Mr. Lawrence male 34.0 0 \n", + "22 McGowan, Miss. Anna \"Annie\" female 15.0 0 \n", + "23 Sloper, Mr. William Thompson male 28.0 0 \n", + "24 Palsson, Miss. Torborg Danira female 8.0 3 \n", + "25 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 \n", + "26 Emir, Mr. Farred Chehab male NaN 0 \n", + "27 Fortune, Mr. Charles Alexander male 19.0 3 \n", + "28 O'Dwyer, Miss. Ellen \"Nellie\" female NaN 0 \n", + "29 Todoroff, Mr. Lalio male NaN 0 \n", + ".. ... ... ... ... \n", + "861 Giles, Mr. Frederick Edward male 21.0 1 \n", + "862 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 \n", + "863 Sage, Miss. Dorothy Edith \"Dolly\" female NaN 8 \n", + "864 Gill, Mr. John William male 24.0 0 \n", + "865 Bystrom, Mrs. (Karolina) female 42.0 0 \n", + "866 Duran y More, Miss. Asuncion female 27.0 1 \n", + "867 Roebling, Mr. Washington Augustus II male 31.0 0 \n", + "868 van Melkebeke, Mr. Philemon male NaN 0 \n", + "869 Johnson, Master. Harold Theodor male 4.0 1 \n", + "870 Balkic, Mr. Cerin male 26.0 0 \n", + "871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 \n", + "872 Carlsson, Mr. Frans Olof male 33.0 0 \n", + "873 Vander Cruyssen, Mr. Victor male 47.0 0 \n", + "874 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 \n", + "875 Najib, Miss. Adele Kiamie \"Jane\" female 15.0 0 \n", + "876 Gustafsson, Mr. Alfred Ossian male 20.0 0 \n", + "877 Petroff, Mr. Nedelio male 19.0 0 \n", + "878 Laleff, Mr. Kristo male NaN 0 \n", + "879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n", + "880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 \n", + "881 Markun, Mr. Johann male 33.0 0 \n", + "882 Dahlberg, Miss. Gerda Ulrika female 22.0 0 \n", + "883 Banfield, Mr. Frederick James male 28.0 0 \n", + "884 Sutehall, Mr. Henry Jr male 25.0 0 \n", + "885 Rice, Mrs. William (Margaret Norton) female 39.0 0 \n", + "886 Montvila, Rev. Juozas male 27.0 0 \n", + "887 Graham, Miss. Margaret Edith female 19.0 0 \n", + "888 Johnston, Miss. Catherine Helen \"Carrie\" female NaN 1 \n", + "889 Behr, Mr. Karl Howell male 26.0 0 \n", + "890 Dooley, Mr. Patrick male 32.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S \n", + "5 0 330877 8.4583 NaN Q \n", + "6 0 17463 51.8625 E46 S \n", + "7 1 349909 21.0750 NaN S \n", + "8 2 347742 11.1333 NaN S \n", + "9 0 237736 30.0708 NaN C \n", + "10 1 PP 9549 16.7000 G6 S \n", + "11 0 113783 26.5500 C103 S \n", + "12 0 A/5. 2151 8.0500 NaN S \n", + "13 5 347082 31.2750 NaN S \n", + "14 0 350406 7.8542 NaN S \n", + "15 0 248706 16.0000 NaN S \n", + "16 1 382652 29.1250 NaN Q \n", + "17 0 244373 13.0000 NaN S \n", + "18 0 345763 18.0000 NaN S \n", + "19 0 2649 7.2250 NaN C \n", + "20 0 239865 26.0000 NaN S \n", + "21 0 248698 13.0000 D56 S \n", + "22 0 330923 8.0292 NaN Q \n", + "23 0 113788 35.5000 A6 S \n", + "24 1 349909 21.0750 NaN S \n", + "25 5 347077 31.3875 NaN S \n", + "26 0 2631 7.2250 NaN C \n", + "27 2 19950 263.0000 C23 C25 C27 S \n", + "28 0 330959 7.8792 NaN Q \n", + "29 0 349216 7.8958 NaN S \n", + ".. ... ... ... ... ... \n", + "861 0 28134 11.5000 NaN S \n", + "862 0 17466 25.9292 D17 S \n", + "863 2 CA. 2343 69.5500 NaN S \n", + "864 0 233866 13.0000 NaN S \n", + "865 0 236852 13.0000 NaN S \n", + "866 0 SC/PARIS 2149 13.8583 NaN C \n", + "867 0 PC 17590 50.4958 A24 S \n", + "868 0 345777 9.5000 NaN S \n", + "869 1 347742 11.1333 NaN S \n", + "870 0 349248 7.8958 NaN S \n", + "871 1 11751 52.5542 D35 S \n", + "872 0 695 5.0000 B51 B53 B55 S \n", + "873 0 345765 9.0000 NaN S \n", + "874 0 P/PP 3381 24.0000 NaN C \n", + "875 0 2667 7.2250 NaN C \n", + "876 0 7534 9.8458 NaN S \n", + "877 0 349212 7.8958 NaN S \n", + "878 0 349217 7.8958 NaN S \n", + "879 1 11767 83.1583 C50 C \n", + "880 1 230433 26.0000 NaN S \n", + "881 0 349257 7.8958 NaN S \n", + "882 0 7552 10.5167 NaN S \n", + "883 0 C.A./SOTON 34068 10.5000 NaN S \n", + "884 0 SOTON/OQ 392076 7.0500 NaN S \n", + "885 5 382652 29.1250 NaN Q \n", + "886 0 211536 13.0000 NaN S \n", + "887 0 112053 30.0000 B42 S \n", + "888 2 W./C. 6607 23.4500 NaN S \n", + "889 0 111369 30.0000 C148 C \n", + "890 0 370376 7.7500 NaN Q \n", + "\n", + "[891 rows x 12 columns]" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('data.csv')\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I replaced all of the Nan's with zero's and dummified the sex column\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df.replace(np.nan, 0, inplace=True)\n", + "#df.dropna(axis=0, how='any', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def sex_to_numeric(s):\n", + " if s=='Male':\n", + " return 1\n", + " else:\n", + " return 0" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedmale
0103Braund, Mr. Owen Harrismale22.010A/5 211717.25000S0
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C0
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.92500S0
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S0
4503Allen, Mr. William Henrymale35.0003734508.05000S0
5603Moran, Mr. Jamesmale0.0003308778.45830Q0
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S0
7803Palsson, Master. Gosta Leonardmale2.03134990921.07500S0
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.13330S0
91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.07080C0
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S0
111211Bonnell, Miss. Elizabethfemale58.00011378326.5500C103S0
121303Saundercock, Mr. William Henrymale20.000A/5. 21518.05000S0
131403Andersson, Mr. Anders Johanmale39.01534708231.27500S0
141503Vestrom, Miss. Hulda Amanda Adolfinafemale14.0003504067.85420S0
151612Hewlett, Mrs. (Mary D Kingcome)female55.00024870616.00000S0
161703Rice, Master. Eugenemale2.04138265229.12500Q0
171812Williams, Mr. Charles Eugenemale0.00024437313.00000S0
181903Vander Planke, Mrs. Julius (Emelia Maria Vande...female31.01034576318.00000S0
192013Masselmani, Mrs. Fatimafemale0.00026497.22500C0
202102Fynney, Mr. Joseph Jmale35.00023986526.00000S0
212212Beesley, Mr. Lawrencemale34.00024869813.0000D56S0
222313McGowan, Miss. Anna \"Annie\"female15.0003309238.02920Q0
232411Sloper, Mr. William Thompsonmale28.00011378835.5000A6S0
242503Palsson, Miss. Torborg Danirafemale8.03134990921.07500S0
252613Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...female38.01534707731.38750S0
262703Emir, Mr. Farred Chehabmale0.00026317.22500C0
272801Fortune, Mr. Charles Alexandermale19.03219950263.0000C23 C25 C27S0
282913O'Dwyer, Miss. Ellen \"Nellie\"female0.0003309597.87920Q0
293003Todoroff, Mr. Laliomale0.0003492167.89580S0
..........................................
86186202Giles, Mr. Frederick Edwardmale21.0102813411.50000S0
86286311Swift, Mrs. Frederick Joel (Margaret Welles Ba...female48.0001746625.9292D17S0
86386403Sage, Miss. Dorothy Edith \"Dolly\"female0.082CA. 234369.55000S0
86486502Gill, Mr. John Williammale24.00023386613.00000S0
86586612Bystrom, Mrs. (Karolina)female42.00023685213.00000S0
86686712Duran y More, Miss. Asuncionfemale27.010SC/PARIS 214913.85830C0
86786801Roebling, Mr. Washington Augustus IImale31.000PC 1759050.4958A24S0
86886903van Melkebeke, Mr. Philemonmale0.0003457779.50000S0
86987013Johnson, Master. Harold Theodormale4.01134774211.13330S0
87087103Balkic, Mr. Cerinmale26.0003492487.89580S0
87187211Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.0111175152.5542D35S0
87287301Carlsson, Mr. Frans Olofmale33.0006955.0000B51 B53 B55S0
87387403Vander Cruyssen, Mr. Victormale47.0003457659.00000S0
87487512Abelson, Mrs. Samuel (Hannah Wizosky)female28.010P/PP 338124.00000C0
87587613Najib, Miss. Adele Kiamie \"Jane\"female15.00026677.22500C0
87687703Gustafsson, Mr. Alfred Ossianmale20.00075349.84580S0
87787803Petroff, Mr. Nedeliomale19.0003492127.89580S0
87887903Laleff, Mr. Kristomale0.0003492177.89580S0
87988011Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.0011176783.1583C50C0
88088112Shelley, Mrs. William (Imanita Parrish Hall)female25.00123043326.00000S0
88188203Markun, Mr. Johannmale33.0003492577.89580S0
88288303Dahlberg, Miss. Gerda Ulrikafemale22.000755210.51670S0
88388402Banfield, Mr. Frederick Jamesmale28.000C.A./SOTON 3406810.50000S0
88488503Sutehall, Mr. Henry Jrmale25.000SOTON/OQ 3920767.05000S0
88588603Rice, Mrs. William (Margaret Norton)female39.00538265229.12500Q0
88688702Montvila, Rev. Juozasmale27.00021153613.00000S0
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S0
88888903Johnston, Miss. Catherine Helen \"Carrie\"female0.012W./C. 660723.45000S0
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C0
89089103Dooley, Mr. Patrickmale32.0003703767.75000Q0
\n", + "

891 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "5 6 0 3 \n", + "6 7 0 1 \n", + "7 8 0 3 \n", + "8 9 1 3 \n", + "9 10 1 2 \n", + "10 11 1 3 \n", + "11 12 1 1 \n", + "12 13 0 3 \n", + "13 14 0 3 \n", + "14 15 0 3 \n", + "15 16 1 2 \n", + "16 17 0 3 \n", + "17 18 1 2 \n", + "18 19 0 3 \n", + "19 20 1 3 \n", + "20 21 0 2 \n", + "21 22 1 2 \n", + "22 23 1 3 \n", + "23 24 1 1 \n", + "24 25 0 3 \n", + "25 26 1 3 \n", + "26 27 0 3 \n", + "27 28 0 1 \n", + "28 29 1 3 \n", + "29 30 0 3 \n", + ".. ... ... ... \n", + "861 862 0 2 \n", + "862 863 1 1 \n", + "863 864 0 3 \n", + "864 865 0 2 \n", + "865 866 1 2 \n", + "866 867 1 2 \n", + "867 868 0 1 \n", + "868 869 0 3 \n", + "869 870 1 3 \n", + "870 871 0 3 \n", + "871 872 1 1 \n", + "872 873 0 1 \n", + "873 874 0 3 \n", + "874 875 1 2 \n", + "875 876 1 3 \n", + "876 877 0 3 \n", + "877 878 0 3 \n", + "878 879 0 3 \n", + "879 880 1 1 \n", + "880 881 1 2 \n", + "881 882 0 3 \n", + "882 883 0 3 \n", + "883 884 0 2 \n", + "884 885 0 3 \n", + "885 886 0 3 \n", + "886 887 0 2 \n", + "887 888 1 1 \n", + "888 889 0 3 \n", + "889 890 1 1 \n", + "890 891 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "5 Moran, Mr. James male 0.0 0 \n", + "6 McCarthy, Mr. Timothy J male 54.0 0 \n", + "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", + "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", + "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", + "10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n", + "11 Bonnell, Miss. Elizabeth female 58.0 0 \n", + "12 Saundercock, Mr. William Henry male 20.0 0 \n", + "13 Andersson, Mr. Anders Johan male 39.0 1 \n", + "14 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 \n", + "15 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 \n", + "16 Rice, Master. Eugene male 2.0 4 \n", + "17 Williams, Mr. Charles Eugene male 0.0 0 \n", + "18 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 \n", + "19 Masselmani, Mrs. Fatima female 0.0 0 \n", + "20 Fynney, Mr. Joseph J male 35.0 0 \n", + "21 Beesley, Mr. Lawrence male 34.0 0 \n", + "22 McGowan, Miss. Anna \"Annie\" female 15.0 0 \n", + "23 Sloper, Mr. William Thompson male 28.0 0 \n", + "24 Palsson, Miss. Torborg Danira female 8.0 3 \n", + "25 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 \n", + "26 Emir, Mr. Farred Chehab male 0.0 0 \n", + "27 Fortune, Mr. Charles Alexander male 19.0 3 \n", + "28 O'Dwyer, Miss. Ellen \"Nellie\" female 0.0 0 \n", + "29 Todoroff, Mr. Lalio male 0.0 0 \n", + ".. ... ... ... ... \n", + "861 Giles, Mr. Frederick Edward male 21.0 1 \n", + "862 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 \n", + "863 Sage, Miss. Dorothy Edith \"Dolly\" female 0.0 8 \n", + "864 Gill, Mr. John William male 24.0 0 \n", + "865 Bystrom, Mrs. (Karolina) female 42.0 0 \n", + "866 Duran y More, Miss. Asuncion female 27.0 1 \n", + "867 Roebling, Mr. Washington Augustus II male 31.0 0 \n", + "868 van Melkebeke, Mr. Philemon male 0.0 0 \n", + "869 Johnson, Master. Harold Theodor male 4.0 1 \n", + "870 Balkic, Mr. Cerin male 26.0 0 \n", + "871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 \n", + "872 Carlsson, Mr. Frans Olof male 33.0 0 \n", + "873 Vander Cruyssen, Mr. Victor male 47.0 0 \n", + "874 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 \n", + "875 Najib, Miss. Adele Kiamie \"Jane\" female 15.0 0 \n", + "876 Gustafsson, Mr. Alfred Ossian male 20.0 0 \n", + "877 Petroff, Mr. Nedelio male 19.0 0 \n", + "878 Laleff, Mr. Kristo male 0.0 0 \n", + "879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n", + "880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 \n", + "881 Markun, Mr. Johann male 33.0 0 \n", + "882 Dahlberg, Miss. Gerda Ulrika female 22.0 0 \n", + "883 Banfield, Mr. Frederick James male 28.0 0 \n", + "884 Sutehall, Mr. Henry Jr male 25.0 0 \n", + "885 Rice, Mrs. William (Margaret Norton) female 39.0 0 \n", + "886 Montvila, Rev. Juozas male 27.0 0 \n", + "887 Graham, Miss. Margaret Edith female 19.0 0 \n", + "888 Johnston, Miss. Catherine Helen \"Carrie\" female 0.0 1 \n", + "889 Behr, Mr. Karl Howell male 26.0 0 \n", + "890 Dooley, Mr. Patrick male 32.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked male \n", + "0 0 A/5 21171 7.2500 0 S 0 \n", + "1 0 PC 17599 71.2833 C85 C 0 \n", + "2 0 STON/O2. 3101282 7.9250 0 S 0 \n", + "3 0 113803 53.1000 C123 S 0 \n", + "4 0 373450 8.0500 0 S 0 \n", + "5 0 330877 8.4583 0 Q 0 \n", + "6 0 17463 51.8625 E46 S 0 \n", + "7 1 349909 21.0750 0 S 0 \n", + "8 2 347742 11.1333 0 S 0 \n", + "9 0 237736 30.0708 0 C 0 \n", + "10 1 PP 9549 16.7000 G6 S 0 \n", + "11 0 113783 26.5500 C103 S 0 \n", + "12 0 A/5. 2151 8.0500 0 S 0 \n", + "13 5 347082 31.2750 0 S 0 \n", + "14 0 350406 7.8542 0 S 0 \n", + "15 0 248706 16.0000 0 S 0 \n", + "16 1 382652 29.1250 0 Q 0 \n", + "17 0 244373 13.0000 0 S 0 \n", + "18 0 345763 18.0000 0 S 0 \n", + "19 0 2649 7.2250 0 C 0 \n", + "20 0 239865 26.0000 0 S 0 \n", + "21 0 248698 13.0000 D56 S 0 \n", + "22 0 330923 8.0292 0 Q 0 \n", + "23 0 113788 35.5000 A6 S 0 \n", + "24 1 349909 21.0750 0 S 0 \n", + "25 5 347077 31.3875 0 S 0 \n", + "26 0 2631 7.2250 0 C 0 \n", + "27 2 19950 263.0000 C23 C25 C27 S 0 \n", + "28 0 330959 7.8792 0 Q 0 \n", + "29 0 349216 7.8958 0 S 0 \n", + ".. ... ... ... ... ... ... \n", + "861 0 28134 11.5000 0 S 0 \n", + "862 0 17466 25.9292 D17 S 0 \n", + "863 2 CA. 2343 69.5500 0 S 0 \n", + "864 0 233866 13.0000 0 S 0 \n", + "865 0 236852 13.0000 0 S 0 \n", + "866 0 SC/PARIS 2149 13.8583 0 C 0 \n", + "867 0 PC 17590 50.4958 A24 S 0 \n", + "868 0 345777 9.5000 0 S 0 \n", + "869 1 347742 11.1333 0 S 0 \n", + "870 0 349248 7.8958 0 S 0 \n", + "871 1 11751 52.5542 D35 S 0 \n", + "872 0 695 5.0000 B51 B53 B55 S 0 \n", + "873 0 345765 9.0000 0 S 0 \n", + "874 0 P/PP 3381 24.0000 0 C 0 \n", + "875 0 2667 7.2250 0 C 0 \n", + "876 0 7534 9.8458 0 S 0 \n", + "877 0 349212 7.8958 0 S 0 \n", + "878 0 349217 7.8958 0 S 0 \n", + "879 1 11767 83.1583 C50 C 0 \n", + "880 1 230433 26.0000 0 S 0 \n", + "881 0 349257 7.8958 0 S 0 \n", + "882 0 7552 10.5167 0 S 0 \n", + "883 0 C.A./SOTON 34068 10.5000 0 S 0 \n", + "884 0 SOTON/OQ 392076 7.0500 0 S 0 \n", + "885 5 382652 29.1250 0 Q 0 \n", + "886 0 211536 13.0000 0 S 0 \n", + "887 0 112053 30.0000 B42 S 0 \n", + "888 2 W./C. 6607 23.4500 0 S 0 \n", + "889 0 111369 30.0000 C148 C 0 \n", + "890 0 370376 7.7500 0 Q 0 \n", + "\n", + "[891 rows x 13 columns]" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['male'] = df['Sex'].apply(sex_to_numeric)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After I ran this model once, I decided to go back and split out the name column to get just the prefix so we could generate a column that was either \"adult or child\" to see if we could get a better score. that was a no on the better score." + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "df['Last'], df['beginning'] = df['Name'].str.split(',', 1).str" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedmaleLastfirstbeginningPrefix
0103Braund, Mr. Owen Harrismale22.010A/5 211717.25000S0BraundOwen HarrisMr. Owen HarrisMr
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C0CumingsJohn Bradley (Florence Briggs Thayer)Mrs. John Bradley (Florence Briggs Thayer)Mrs
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.92500S0HeikkinenLainaMiss. LainaMiss
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S0FutrelleJacques Heath (Lily May Peel)Mrs. Jacques Heath (Lily May Peel)Mrs
4503Allen, Mr. William Henrymale35.0003734508.05000S0AllenWilliam HenryMr. William HenryMr
5603Moran, Mr. Jamesmale0.0003308778.45830Q0MoranJamesMr. JamesMr
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S0McCarthyTimothy JMr. Timothy JMr
7803Palsson, Master. Gosta Leonardmale2.03134990921.07500S0PalssonGosta LeonardMaster. Gosta LeonardMaster
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.13330S0JohnsonOscar W (Elisabeth Vilhelmina Berg)Mrs. Oscar W (Elisabeth Vilhelmina Berg)Mrs
91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.07080C0NasserNicholas (Adele Achem)Mrs. Nicholas (Adele Achem)Mrs
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S0SandstromMarguerite RutMiss. Marguerite RutMiss
111211Bonnell, Miss. Elizabethfemale58.00011378326.5500C103S0BonnellElizabethMiss. ElizabethMiss
121303Saundercock, Mr. William Henrymale20.000A/5. 21518.05000S0SaundercockWilliam HenryMr. William HenryMr
131403Andersson, Mr. Anders Johanmale39.01534708231.27500S0AnderssonAnders JohanMr. Anders JohanMr
141503Vestrom, Miss. Hulda Amanda Adolfinafemale14.0003504067.85420S0VestromHulda Amanda AdolfinaMiss. Hulda Amanda AdolfinaMiss
151612Hewlett, Mrs. (Mary D Kingcome)female55.00024870616.00000S0Hewlett(Mary D Kingcome)Mrs. (Mary D Kingcome)Mrs
161703Rice, Master. Eugenemale2.04138265229.12500Q0RiceEugeneMaster. EugeneMaster
171812Williams, Mr. Charles Eugenemale0.00024437313.00000S0WilliamsCharles EugeneMr. Charles EugeneMr
181903Vander Planke, Mrs. Julius (Emelia Maria Vande...female31.01034576318.00000S0Vander PlankeJulius (Emelia Maria Vandemoortele)Mrs. Julius (Emelia Maria Vandemoortele)Mrs
192013Masselmani, Mrs. Fatimafemale0.00026497.22500C0MasselmaniFatimaMrs. FatimaMrs
202102Fynney, Mr. Joseph Jmale35.00023986526.00000S0FynneyJoseph JMr. Joseph JMr
212212Beesley, Mr. Lawrencemale34.00024869813.0000D56S0BeesleyLawrenceMr. LawrenceMr
222313McGowan, Miss. Anna \"Annie\"female15.0003309238.02920Q0McGowanAnna \"Annie\"Miss. Anna \"Annie\"Miss
232411Sloper, Mr. William Thompsonmale28.00011378835.5000A6S0SloperWilliam ThompsonMr. William ThompsonMr
242503Palsson, Miss. Torborg Danirafemale8.03134990921.07500S0PalssonTorborg DaniraMiss. Torborg DaniraMiss
252613Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...female38.01534707731.38750S0AsplundCarl Oscar (Selma Augusta Emilia Johansson)Mrs. Carl Oscar (Selma Augusta Emilia Johansson)Mrs
262703Emir, Mr. Farred Chehabmale0.00026317.22500C0EmirFarred ChehabMr. Farred ChehabMr
272801Fortune, Mr. Charles Alexandermale19.03219950263.0000C23 C25 C27S0FortuneCharles AlexanderMr. Charles AlexanderMr
282913O'Dwyer, Miss. Ellen \"Nellie\"female0.0003309597.87920Q0O'DwyerEllen \"Nellie\"Miss. Ellen \"Nellie\"Miss
293003Todoroff, Mr. Laliomale0.0003492167.89580S0TodoroffLalioMr. LalioMr
......................................................
86186202Giles, Mr. Frederick Edwardmale21.0102813411.50000S0GilesFrederick EdwardMr. Frederick EdwardMr
86286311Swift, Mrs. Frederick Joel (Margaret Welles Ba...female48.0001746625.9292D17S0SwiftFrederick Joel (Margaret Welles Barron)Mrs. Frederick Joel (Margaret Welles Barron)Mrs
86386403Sage, Miss. Dorothy Edith \"Dolly\"female0.082CA. 234369.55000S0SageDorothy Edith \"Dolly\"Miss. Dorothy Edith \"Dolly\"Miss
86486502Gill, Mr. John Williammale24.00023386613.00000S0GillJohn WilliamMr. John WilliamMr
86586612Bystrom, Mrs. (Karolina)female42.00023685213.00000S0Bystrom(Karolina)Mrs. (Karolina)Mrs
86686712Duran y More, Miss. Asuncionfemale27.010SC/PARIS 214913.85830C0Duran y MoreAsuncionMiss. AsuncionMiss
86786801Roebling, Mr. Washington Augustus IImale31.000PC 1759050.4958A24S0RoeblingWashington Augustus IIMr. Washington Augustus IIMr
86886903van Melkebeke, Mr. Philemonmale0.0003457779.50000S0van MelkebekePhilemonMr. PhilemonMr
86987013Johnson, Master. Harold Theodormale4.01134774211.13330S0JohnsonHarold TheodorMaster. Harold TheodorMaster
87087103Balkic, Mr. Cerinmale26.0003492487.89580S0BalkicCerinMr. CerinMr
87187211Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.0111175152.5542D35S0BeckwithRichard Leonard (Sallie Monypeny)Mrs. Richard Leonard (Sallie Monypeny)Mrs
87287301Carlsson, Mr. Frans Olofmale33.0006955.0000B51 B53 B55S0CarlssonFrans OlofMr. Frans OlofMr
87387403Vander Cruyssen, Mr. Victormale47.0003457659.00000S0Vander CruyssenVictorMr. VictorMr
87487512Abelson, Mrs. Samuel (Hannah Wizosky)female28.010P/PP 338124.00000C0AbelsonSamuel (Hannah Wizosky)Mrs. Samuel (Hannah Wizosky)Mrs
87587613Najib, Miss. Adele Kiamie \"Jane\"female15.00026677.22500C0NajibAdele Kiamie \"Jane\"Miss. Adele Kiamie \"Jane\"Miss
87687703Gustafsson, Mr. Alfred Ossianmale20.00075349.84580S0GustafssonAlfred OssianMr. Alfred OssianMr
87787803Petroff, Mr. Nedeliomale19.0003492127.89580S0PetroffNedelioMr. NedelioMr
87887903Laleff, Mr. Kristomale0.0003492177.89580S0LaleffKristoMr. KristoMr
87988011Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.0011176783.1583C50C0PotterThomas Jr (Lily Alexenia Wilson)Mrs. Thomas Jr (Lily Alexenia Wilson)Mrs
88088112Shelley, Mrs. William (Imanita Parrish Hall)female25.00123043326.00000S0ShelleyWilliam (Imanita Parrish Hall)Mrs. William (Imanita Parrish Hall)Mrs
88188203Markun, Mr. Johannmale33.0003492577.89580S0MarkunJohannMr. JohannMr
88288303Dahlberg, Miss. Gerda Ulrikafemale22.000755210.51670S0DahlbergGerda UlrikaMiss. Gerda UlrikaMiss
88388402Banfield, Mr. Frederick Jamesmale28.000C.A./SOTON 3406810.50000S0BanfieldFrederick JamesMr. Frederick JamesMr
88488503Sutehall, Mr. Henry Jrmale25.000SOTON/OQ 3920767.05000S0SutehallHenry JrMr. Henry JrMr
88588603Rice, Mrs. William (Margaret Norton)female39.00538265229.12500Q0RiceWilliam (Margaret Norton)Mrs. William (Margaret Norton)Mrs
88688702Montvila, Rev. Juozasmale27.00021153613.00000S0MontvilaJuozasRev. JuozasRev
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S0GrahamMargaret EdithMiss. Margaret EdithMiss
88888903Johnston, Miss. Catherine Helen \"Carrie\"female0.012W./C. 660723.45000S0JohnstonCatherine Helen \"Carrie\"Miss. Catherine Helen \"Carrie\"Miss
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C0BehrKarl HowellMr. Karl HowellMr
89089103Dooley, Mr. Patrickmale32.0003703767.75000Q0DooleyPatrickMr. PatrickMr
\n", + "

891 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "5 6 0 3 \n", + "6 7 0 1 \n", + "7 8 0 3 \n", + "8 9 1 3 \n", + "9 10 1 2 \n", + "10 11 1 3 \n", + "11 12 1 1 \n", + "12 13 0 3 \n", + "13 14 0 3 \n", + "14 15 0 3 \n", + "15 16 1 2 \n", + "16 17 0 3 \n", + "17 18 1 2 \n", + "18 19 0 3 \n", + "19 20 1 3 \n", + "20 21 0 2 \n", + "21 22 1 2 \n", + "22 23 1 3 \n", + "23 24 1 1 \n", + "24 25 0 3 \n", + "25 26 1 3 \n", + "26 27 0 3 \n", + "27 28 0 1 \n", + "28 29 1 3 \n", + "29 30 0 3 \n", + ".. ... ... ... \n", + "861 862 0 2 \n", + "862 863 1 1 \n", + "863 864 0 3 \n", + "864 865 0 2 \n", + "865 866 1 2 \n", + "866 867 1 2 \n", + "867 868 0 1 \n", + "868 869 0 3 \n", + "869 870 1 3 \n", + "870 871 0 3 \n", + "871 872 1 1 \n", + "872 873 0 1 \n", + "873 874 0 3 \n", + "874 875 1 2 \n", + "875 876 1 3 \n", + "876 877 0 3 \n", + "877 878 0 3 \n", + "878 879 0 3 \n", + "879 880 1 1 \n", + "880 881 1 2 \n", + "881 882 0 3 \n", + "882 883 0 3 \n", + "883 884 0 2 \n", + "884 885 0 3 \n", + "885 886 0 3 \n", + "886 887 0 2 \n", + "887 888 1 1 \n", + "888 889 0 3 \n", + "889 890 1 1 \n", + "890 891 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "5 Moran, Mr. James male 0.0 0 \n", + "6 McCarthy, Mr. Timothy J male 54.0 0 \n", + "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", + "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", + "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", + "10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n", + "11 Bonnell, Miss. Elizabeth female 58.0 0 \n", + "12 Saundercock, Mr. William Henry male 20.0 0 \n", + "13 Andersson, Mr. Anders Johan male 39.0 1 \n", + "14 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 \n", + "15 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 \n", + "16 Rice, Master. Eugene male 2.0 4 \n", + "17 Williams, Mr. Charles Eugene male 0.0 0 \n", + "18 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 \n", + "19 Masselmani, Mrs. Fatima female 0.0 0 \n", + "20 Fynney, Mr. Joseph J male 35.0 0 \n", + "21 Beesley, Mr. Lawrence male 34.0 0 \n", + "22 McGowan, Miss. Anna \"Annie\" female 15.0 0 \n", + "23 Sloper, Mr. William Thompson male 28.0 0 \n", + "24 Palsson, Miss. Torborg Danira female 8.0 3 \n", + "25 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 \n", + "26 Emir, Mr. Farred Chehab male 0.0 0 \n", + "27 Fortune, Mr. Charles Alexander male 19.0 3 \n", + "28 O'Dwyer, Miss. Ellen \"Nellie\" female 0.0 0 \n", + "29 Todoroff, Mr. Lalio male 0.0 0 \n", + ".. ... ... ... ... \n", + "861 Giles, Mr. Frederick Edward male 21.0 1 \n", + "862 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 \n", + "863 Sage, Miss. Dorothy Edith \"Dolly\" female 0.0 8 \n", + "864 Gill, Mr. John William male 24.0 0 \n", + "865 Bystrom, Mrs. (Karolina) female 42.0 0 \n", + "866 Duran y More, Miss. Asuncion female 27.0 1 \n", + "867 Roebling, Mr. Washington Augustus II male 31.0 0 \n", + "868 van Melkebeke, Mr. Philemon male 0.0 0 \n", + "869 Johnson, Master. Harold Theodor male 4.0 1 \n", + "870 Balkic, Mr. Cerin male 26.0 0 \n", + "871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 \n", + "872 Carlsson, Mr. Frans Olof male 33.0 0 \n", + "873 Vander Cruyssen, Mr. Victor male 47.0 0 \n", + "874 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 \n", + "875 Najib, Miss. Adele Kiamie \"Jane\" female 15.0 0 \n", + "876 Gustafsson, Mr. Alfred Ossian male 20.0 0 \n", + "877 Petroff, Mr. Nedelio male 19.0 0 \n", + "878 Laleff, Mr. Kristo male 0.0 0 \n", + "879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n", + "880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 \n", + "881 Markun, Mr. Johann male 33.0 0 \n", + "882 Dahlberg, Miss. Gerda Ulrika female 22.0 0 \n", + "883 Banfield, Mr. Frederick James male 28.0 0 \n", + "884 Sutehall, Mr. Henry Jr male 25.0 0 \n", + "885 Rice, Mrs. William (Margaret Norton) female 39.0 0 \n", + "886 Montvila, Rev. Juozas male 27.0 0 \n", + "887 Graham, Miss. Margaret Edith female 19.0 0 \n", + "888 Johnston, Miss. Catherine Helen \"Carrie\" female 0.0 1 \n", + "889 Behr, Mr. Karl Howell male 26.0 0 \n", + "890 Dooley, Mr. Patrick male 32.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked male \\\n", + "0 0 A/5 21171 7.2500 0 S 0 \n", + "1 0 PC 17599 71.2833 C85 C 0 \n", + "2 0 STON/O2. 3101282 7.9250 0 S 0 \n", + "3 0 113803 53.1000 C123 S 0 \n", + "4 0 373450 8.0500 0 S 0 \n", + "5 0 330877 8.4583 0 Q 0 \n", + "6 0 17463 51.8625 E46 S 0 \n", + "7 1 349909 21.0750 0 S 0 \n", + "8 2 347742 11.1333 0 S 0 \n", + "9 0 237736 30.0708 0 C 0 \n", + "10 1 PP 9549 16.7000 G6 S 0 \n", + "11 0 113783 26.5500 C103 S 0 \n", + "12 0 A/5. 2151 8.0500 0 S 0 \n", + "13 5 347082 31.2750 0 S 0 \n", + "14 0 350406 7.8542 0 S 0 \n", + "15 0 248706 16.0000 0 S 0 \n", + "16 1 382652 29.1250 0 Q 0 \n", + "17 0 244373 13.0000 0 S 0 \n", + "18 0 345763 18.0000 0 S 0 \n", + "19 0 2649 7.2250 0 C 0 \n", + "20 0 239865 26.0000 0 S 0 \n", + "21 0 248698 13.0000 D56 S 0 \n", + "22 0 330923 8.0292 0 Q 0 \n", + "23 0 113788 35.5000 A6 S 0 \n", + "24 1 349909 21.0750 0 S 0 \n", + "25 5 347077 31.3875 0 S 0 \n", + "26 0 2631 7.2250 0 C 0 \n", + "27 2 19950 263.0000 C23 C25 C27 S 0 \n", + "28 0 330959 7.8792 0 Q 0 \n", + "29 0 349216 7.8958 0 S 0 \n", + ".. ... ... ... ... ... ... \n", + "861 0 28134 11.5000 0 S 0 \n", + "862 0 17466 25.9292 D17 S 0 \n", + "863 2 CA. 2343 69.5500 0 S 0 \n", + "864 0 233866 13.0000 0 S 0 \n", + "865 0 236852 13.0000 0 S 0 \n", + "866 0 SC/PARIS 2149 13.8583 0 C 0 \n", + "867 0 PC 17590 50.4958 A24 S 0 \n", + "868 0 345777 9.5000 0 S 0 \n", + "869 1 347742 11.1333 0 S 0 \n", + "870 0 349248 7.8958 0 S 0 \n", + "871 1 11751 52.5542 D35 S 0 \n", + "872 0 695 5.0000 B51 B53 B55 S 0 \n", + "873 0 345765 9.0000 0 S 0 \n", + "874 0 P/PP 3381 24.0000 0 C 0 \n", + "875 0 2667 7.2250 0 C 0 \n", + "876 0 7534 9.8458 0 S 0 \n", + "877 0 349212 7.8958 0 S 0 \n", + "878 0 349217 7.8958 0 S 0 \n", + "879 1 11767 83.1583 C50 C 0 \n", + "880 1 230433 26.0000 0 S 0 \n", + "881 0 349257 7.8958 0 S 0 \n", + "882 0 7552 10.5167 0 S 0 \n", + "883 0 C.A./SOTON 34068 10.5000 0 S 0 \n", + "884 0 SOTON/OQ 392076 7.0500 0 S 0 \n", + "885 5 382652 29.1250 0 Q 0 \n", + "886 0 211536 13.0000 0 S 0 \n", + "887 0 112053 30.0000 B42 S 0 \n", + "888 2 W./C. 6607 23.4500 0 S 0 \n", + "889 0 111369 30.0000 C148 C 0 \n", + "890 0 370376 7.7500 0 Q 0 \n", + "\n", + " Last first \\\n", + "0 Braund Owen Harris \n", + "1 Cumings John Bradley (Florence Briggs Thayer) \n", + "2 Heikkinen Laina \n", + "3 Futrelle Jacques Heath (Lily May Peel) \n", + "4 Allen William Henry \n", + "5 Moran James \n", + "6 McCarthy Timothy J \n", + "7 Palsson Gosta Leonard \n", + "8 Johnson Oscar W (Elisabeth Vilhelmina Berg) \n", + "9 Nasser Nicholas (Adele Achem) \n", + "10 Sandstrom Marguerite Rut \n", + "11 Bonnell Elizabeth \n", + "12 Saundercock William Henry \n", + "13 Andersson Anders Johan \n", + "14 Vestrom Hulda Amanda Adolfina \n", + "15 Hewlett (Mary D Kingcome) \n", + "16 Rice Eugene \n", + "17 Williams Charles Eugene \n", + "18 Vander Planke Julius (Emelia Maria Vandemoortele) \n", + "19 Masselmani Fatima \n", + "20 Fynney Joseph J \n", + "21 Beesley Lawrence \n", + "22 McGowan Anna \"Annie\" \n", + "23 Sloper William Thompson \n", + "24 Palsson Torborg Danira \n", + "25 Asplund Carl Oscar (Selma Augusta Emilia Johansson) \n", + "26 Emir Farred Chehab \n", + "27 Fortune Charles Alexander \n", + "28 O'Dwyer Ellen \"Nellie\" \n", + "29 Todoroff Lalio \n", + ".. ... ... \n", + "861 Giles Frederick Edward \n", + "862 Swift Frederick Joel (Margaret Welles Barron) \n", + "863 Sage Dorothy Edith \"Dolly\" \n", + "864 Gill John William \n", + "865 Bystrom (Karolina) \n", + "866 Duran y More Asuncion \n", + "867 Roebling Washington Augustus II \n", + "868 van Melkebeke Philemon \n", + "869 Johnson Harold Theodor \n", + "870 Balkic Cerin \n", + "871 Beckwith Richard Leonard (Sallie Monypeny) \n", + "872 Carlsson Frans Olof \n", + "873 Vander Cruyssen Victor \n", + "874 Abelson Samuel (Hannah Wizosky) \n", + "875 Najib Adele Kiamie \"Jane\" \n", + "876 Gustafsson Alfred Ossian \n", + "877 Petroff Nedelio \n", + "878 Laleff Kristo \n", + "879 Potter Thomas Jr (Lily Alexenia Wilson) \n", + "880 Shelley William (Imanita Parrish Hall) \n", + "881 Markun Johann \n", + "882 Dahlberg Gerda Ulrika \n", + "883 Banfield Frederick James \n", + "884 Sutehall Henry Jr \n", + "885 Rice William (Margaret Norton) \n", + "886 Montvila Juozas \n", + "887 Graham Margaret Edith \n", + "888 Johnston Catherine Helen \"Carrie\" \n", + "889 Behr Karl Howell \n", + "890 Dooley Patrick \n", + "\n", + " beginning Prefix \n", + "0 Mr. Owen Harris Mr \n", + "1 Mrs. John Bradley (Florence Briggs Thayer) Mrs \n", + "2 Miss. Laina Miss \n", + "3 Mrs. Jacques Heath (Lily May Peel) Mrs \n", + "4 Mr. William Henry Mr \n", + "5 Mr. James Mr \n", + "6 Mr. Timothy J Mr \n", + "7 Master. Gosta Leonard Master \n", + "8 Mrs. Oscar W (Elisabeth Vilhelmina Berg) Mrs \n", + "9 Mrs. Nicholas (Adele Achem) Mrs \n", + "10 Miss. Marguerite Rut Miss \n", + "11 Miss. Elizabeth Miss \n", + "12 Mr. William Henry Mr \n", + "13 Mr. Anders Johan Mr \n", + "14 Miss. Hulda Amanda Adolfina Miss \n", + "15 Mrs. (Mary D Kingcome) Mrs \n", + "16 Master. Eugene Master \n", + "17 Mr. Charles Eugene Mr \n", + "18 Mrs. Julius (Emelia Maria Vandemoortele) Mrs \n", + "19 Mrs. Fatima Mrs \n", + "20 Mr. Joseph J Mr \n", + "21 Mr. Lawrence Mr \n", + "22 Miss. Anna \"Annie\" Miss \n", + "23 Mr. William Thompson Mr \n", + "24 Miss. Torborg Danira Miss \n", + "25 Mrs. Carl Oscar (Selma Augusta Emilia Johansson) Mrs \n", + "26 Mr. Farred Chehab Mr \n", + "27 Mr. Charles Alexander Mr \n", + "28 Miss. Ellen \"Nellie\" Miss \n", + "29 Mr. Lalio Mr \n", + ".. ... ... \n", + "861 Mr. Frederick Edward Mr \n", + "862 Mrs. Frederick Joel (Margaret Welles Barron) Mrs \n", + "863 Miss. Dorothy Edith \"Dolly\" Miss \n", + "864 Mr. John William Mr \n", + "865 Mrs. (Karolina) Mrs \n", + "866 Miss. Asuncion Miss \n", + "867 Mr. Washington Augustus II Mr \n", + "868 Mr. Philemon Mr \n", + "869 Master. Harold Theodor Master \n", + "870 Mr. Cerin Mr \n", + "871 Mrs. Richard Leonard (Sallie Monypeny) Mrs \n", + "872 Mr. Frans Olof Mr \n", + "873 Mr. Victor Mr \n", + "874 Mrs. Samuel (Hannah Wizosky) Mrs \n", + "875 Miss. Adele Kiamie \"Jane\" Miss \n", + "876 Mr. Alfred Ossian Mr \n", + "877 Mr. Nedelio Mr \n", + "878 Mr. Kristo Mr \n", + "879 Mrs. Thomas Jr (Lily Alexenia Wilson) Mrs \n", + "880 Mrs. William (Imanita Parrish Hall) Mrs \n", + "881 Mr. Johann Mr \n", + "882 Miss. Gerda Ulrika Miss \n", + "883 Mr. Frederick James Mr \n", + "884 Mr. Henry Jr Mr \n", + "885 Mrs. William (Margaret Norton) Mrs \n", + "886 Rev. Juozas Rev \n", + "887 Miss. Margaret Edith Miss \n", + "888 Miss. Catherine Helen \"Carrie\" Miss \n", + "889 Mr. Karl Howell Mr \n", + "890 Mr. Patrick Mr \n", + "\n", + "[891 rows x 17 columns]" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Prefix'], df['first'] = df['beginning'].str.split('.', 1).str\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def Prefix_to_numeric(w):\n", + " if w==' Mr':\n", + " return 1\n", + " if w==' Mrs':\n", + " return 1\n", + " if w==' Miss':\n", + " return 0\n", + " if w==' Master':\n", + " return 0\n", + " else:\n", + " return 1" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedmaleLastfirstbeginningPrefixPrefixscoreAdult
0103Braund, Mr. Owen Harrismale22.010A/5 211717.25000S0BraundOwen HarrisMr. Owen HarrisMr11
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C0CumingsJohn Bradley (Florence Briggs Thayer)Mrs. John Bradley (Florence Briggs Thayer)Mrs11
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.92500S0HeikkinenLainaMiss. LainaMiss20
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S0FutrelleJacques Heath (Lily May Peel)Mrs. Jacques Heath (Lily May Peel)Mrs11
4503Allen, Mr. William Henrymale35.0003734508.05000S0AllenWilliam HenryMr. William HenryMr11
5603Moran, Mr. Jamesmale0.0003308778.45830Q0MoranJamesMr. JamesMr11
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S0McCarthyTimothy JMr. Timothy JMr11
7803Palsson, Master. Gosta Leonardmale2.03134990921.07500S0PalssonGosta LeonardMaster. Gosta LeonardMaster20
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.13330S0JohnsonOscar W (Elisabeth Vilhelmina Berg)Mrs. Oscar W (Elisabeth Vilhelmina Berg)Mrs11
91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.07080C0NasserNicholas (Adele Achem)Mrs. Nicholas (Adele Achem)Mrs11
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S0SandstromMarguerite RutMiss. Marguerite RutMiss20
111211Bonnell, Miss. Elizabethfemale58.00011378326.5500C103S0BonnellElizabethMiss. ElizabethMiss20
121303Saundercock, Mr. William Henrymale20.000A/5. 21518.05000S0SaundercockWilliam HenryMr. William HenryMr11
131403Andersson, Mr. Anders Johanmale39.01534708231.27500S0AnderssonAnders JohanMr. Anders JohanMr11
141503Vestrom, Miss. Hulda Amanda Adolfinafemale14.0003504067.85420S0VestromHulda Amanda AdolfinaMiss. Hulda Amanda AdolfinaMiss20
151612Hewlett, Mrs. (Mary D Kingcome)female55.00024870616.00000S0Hewlett(Mary D Kingcome)Mrs. (Mary D Kingcome)Mrs11
161703Rice, Master. Eugenemale2.04138265229.12500Q0RiceEugeneMaster. EugeneMaster20
171812Williams, Mr. Charles Eugenemale0.00024437313.00000S0WilliamsCharles EugeneMr. Charles EugeneMr11
181903Vander Planke, Mrs. Julius (Emelia Maria Vande...female31.01034576318.00000S0Vander PlankeJulius (Emelia Maria Vandemoortele)Mrs. Julius (Emelia Maria Vandemoortele)Mrs11
192013Masselmani, Mrs. Fatimafemale0.00026497.22500C0MasselmaniFatimaMrs. FatimaMrs11
202102Fynney, Mr. Joseph Jmale35.00023986526.00000S0FynneyJoseph JMr. Joseph JMr11
212212Beesley, Mr. Lawrencemale34.00024869813.0000D56S0BeesleyLawrenceMr. LawrenceMr11
222313McGowan, Miss. Anna \"Annie\"female15.0003309238.02920Q0McGowanAnna \"Annie\"Miss. Anna \"Annie\"Miss20
232411Sloper, Mr. William Thompsonmale28.00011378835.5000A6S0SloperWilliam ThompsonMr. William ThompsonMr11
242503Palsson, Miss. Torborg Danirafemale8.03134990921.07500S0PalssonTorborg DaniraMiss. Torborg DaniraMiss20
252613Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...female38.01534707731.38750S0AsplundCarl Oscar (Selma Augusta Emilia Johansson)Mrs. Carl Oscar (Selma Augusta Emilia Johansson)Mrs11
262703Emir, Mr. Farred Chehabmale0.00026317.22500C0EmirFarred ChehabMr. Farred ChehabMr11
272801Fortune, Mr. Charles Alexandermale19.03219950263.0000C23 C25 C27S0FortuneCharles AlexanderMr. Charles AlexanderMr11
282913O'Dwyer, Miss. Ellen \"Nellie\"female0.0003309597.87920Q0O'DwyerEllen \"Nellie\"Miss. Ellen \"Nellie\"Miss20
293003Todoroff, Mr. Laliomale0.0003492167.89580S0TodoroffLalioMr. LalioMr11
............................................................
86186202Giles, Mr. Frederick Edwardmale21.0102813411.50000S0GilesFrederick EdwardMr. Frederick EdwardMr11
86286311Swift, Mrs. Frederick Joel (Margaret Welles Ba...female48.0001746625.9292D17S0SwiftFrederick Joel (Margaret Welles Barron)Mrs. Frederick Joel (Margaret Welles Barron)Mrs11
86386403Sage, Miss. Dorothy Edith \"Dolly\"female0.082CA. 234369.55000S0SageDorothy Edith \"Dolly\"Miss. Dorothy Edith \"Dolly\"Miss20
86486502Gill, Mr. John Williammale24.00023386613.00000S0GillJohn WilliamMr. John WilliamMr11
86586612Bystrom, Mrs. (Karolina)female42.00023685213.00000S0Bystrom(Karolina)Mrs. (Karolina)Mrs11
86686712Duran y More, Miss. Asuncionfemale27.010SC/PARIS 214913.85830C0Duran y MoreAsuncionMiss. AsuncionMiss20
86786801Roebling, Mr. Washington Augustus IImale31.000PC 1759050.4958A24S0RoeblingWashington Augustus IIMr. Washington Augustus IIMr11
86886903van Melkebeke, Mr. Philemonmale0.0003457779.50000S0van MelkebekePhilemonMr. PhilemonMr11
86987013Johnson, Master. Harold Theodormale4.01134774211.13330S0JohnsonHarold TheodorMaster. Harold TheodorMaster20
87087103Balkic, Mr. Cerinmale26.0003492487.89580S0BalkicCerinMr. CerinMr11
87187211Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.0111175152.5542D35S0BeckwithRichard Leonard (Sallie Monypeny)Mrs. Richard Leonard (Sallie Monypeny)Mrs11
87287301Carlsson, Mr. Frans Olofmale33.0006955.0000B51 B53 B55S0CarlssonFrans OlofMr. Frans OlofMr11
87387403Vander Cruyssen, Mr. Victormale47.0003457659.00000S0Vander CruyssenVictorMr. VictorMr11
87487512Abelson, Mrs. Samuel (Hannah Wizosky)female28.010P/PP 338124.00000C0AbelsonSamuel (Hannah Wizosky)Mrs. Samuel (Hannah Wizosky)Mrs11
87587613Najib, Miss. Adele Kiamie \"Jane\"female15.00026677.22500C0NajibAdele Kiamie \"Jane\"Miss. Adele Kiamie \"Jane\"Miss20
87687703Gustafsson, Mr. Alfred Ossianmale20.00075349.84580S0GustafssonAlfred OssianMr. Alfred OssianMr11
87787803Petroff, Mr. Nedeliomale19.0003492127.89580S0PetroffNedelioMr. NedelioMr11
87887903Laleff, Mr. Kristomale0.0003492177.89580S0LaleffKristoMr. KristoMr11
87988011Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.0011176783.1583C50C0PotterThomas Jr (Lily Alexenia Wilson)Mrs. Thomas Jr (Lily Alexenia Wilson)Mrs11
88088112Shelley, Mrs. William (Imanita Parrish Hall)female25.00123043326.00000S0ShelleyWilliam (Imanita Parrish Hall)Mrs. William (Imanita Parrish Hall)Mrs11
88188203Markun, Mr. Johannmale33.0003492577.89580S0MarkunJohannMr. JohannMr11
88288303Dahlberg, Miss. Gerda Ulrikafemale22.000755210.51670S0DahlbergGerda UlrikaMiss. Gerda UlrikaMiss20
88388402Banfield, Mr. Frederick Jamesmale28.000C.A./SOTON 3406810.50000S0BanfieldFrederick JamesMr. Frederick JamesMr11
88488503Sutehall, Mr. Henry Jrmale25.000SOTON/OQ 3920767.05000S0SutehallHenry JrMr. Henry JrMr11
88588603Rice, Mrs. William (Margaret Norton)female39.00538265229.12500Q0RiceWilliam (Margaret Norton)Mrs. William (Margaret Norton)Mrs11
88688702Montvila, Rev. Juozasmale27.00021153613.00000S0MontvilaJuozasRev. JuozasRev11
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S0GrahamMargaret EdithMiss. Margaret EdithMiss20
88888903Johnston, Miss. Catherine Helen \"Carrie\"female0.012W./C. 660723.45000S0JohnstonCatherine Helen \"Carrie\"Miss. Catherine Helen \"Carrie\"Miss20
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C0BehrKarl HowellMr. Karl HowellMr11
89089103Dooley, Mr. Patrickmale32.0003703767.75000Q0DooleyPatrickMr. PatrickMr11
\n", + "

891 rows × 19 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "5 6 0 3 \n", + "6 7 0 1 \n", + "7 8 0 3 \n", + "8 9 1 3 \n", + "9 10 1 2 \n", + "10 11 1 3 \n", + "11 12 1 1 \n", + "12 13 0 3 \n", + "13 14 0 3 \n", + "14 15 0 3 \n", + "15 16 1 2 \n", + "16 17 0 3 \n", + "17 18 1 2 \n", + "18 19 0 3 \n", + "19 20 1 3 \n", + "20 21 0 2 \n", + "21 22 1 2 \n", + "22 23 1 3 \n", + "23 24 1 1 \n", + "24 25 0 3 \n", + "25 26 1 3 \n", + "26 27 0 3 \n", + "27 28 0 1 \n", + "28 29 1 3 \n", + "29 30 0 3 \n", + ".. ... ... ... \n", + "861 862 0 2 \n", + "862 863 1 1 \n", + "863 864 0 3 \n", + "864 865 0 2 \n", + "865 866 1 2 \n", + "866 867 1 2 \n", + "867 868 0 1 \n", + "868 869 0 3 \n", + "869 870 1 3 \n", + "870 871 0 3 \n", + "871 872 1 1 \n", + "872 873 0 1 \n", + "873 874 0 3 \n", + "874 875 1 2 \n", + "875 876 1 3 \n", + "876 877 0 3 \n", + "877 878 0 3 \n", + "878 879 0 3 \n", + "879 880 1 1 \n", + "880 881 1 2 \n", + "881 882 0 3 \n", + "882 883 0 3 \n", + "883 884 0 2 \n", + "884 885 0 3 \n", + "885 886 0 3 \n", + "886 887 0 2 \n", + "887 888 1 1 \n", + "888 889 0 3 \n", + "889 890 1 1 \n", + "890 891 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "5 Moran, Mr. James male 0.0 0 \n", + "6 McCarthy, Mr. Timothy J male 54.0 0 \n", + "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", + "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", + "9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", + "10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n", + "11 Bonnell, Miss. Elizabeth female 58.0 0 \n", + "12 Saundercock, Mr. William Henry male 20.0 0 \n", + "13 Andersson, Mr. Anders Johan male 39.0 1 \n", + "14 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 \n", + "15 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 \n", + "16 Rice, Master. Eugene male 2.0 4 \n", + "17 Williams, Mr. Charles Eugene male 0.0 0 \n", + "18 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 \n", + "19 Masselmani, Mrs. Fatima female 0.0 0 \n", + "20 Fynney, Mr. Joseph J male 35.0 0 \n", + "21 Beesley, Mr. Lawrence male 34.0 0 \n", + "22 McGowan, Miss. Anna \"Annie\" female 15.0 0 \n", + "23 Sloper, Mr. William Thompson male 28.0 0 \n", + "24 Palsson, Miss. Torborg Danira female 8.0 3 \n", + "25 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 \n", + "26 Emir, Mr. Farred Chehab male 0.0 0 \n", + "27 Fortune, Mr. Charles Alexander male 19.0 3 \n", + "28 O'Dwyer, Miss. Ellen \"Nellie\" female 0.0 0 \n", + "29 Todoroff, Mr. Lalio male 0.0 0 \n", + ".. ... ... ... ... \n", + "861 Giles, Mr. Frederick Edward male 21.0 1 \n", + "862 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 \n", + "863 Sage, Miss. Dorothy Edith \"Dolly\" female 0.0 8 \n", + "864 Gill, Mr. John William male 24.0 0 \n", + "865 Bystrom, Mrs. (Karolina) female 42.0 0 \n", + "866 Duran y More, Miss. Asuncion female 27.0 1 \n", + "867 Roebling, Mr. Washington Augustus II male 31.0 0 \n", + "868 van Melkebeke, Mr. Philemon male 0.0 0 \n", + "869 Johnson, Master. Harold Theodor male 4.0 1 \n", + "870 Balkic, Mr. Cerin male 26.0 0 \n", + "871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 \n", + "872 Carlsson, Mr. Frans Olof male 33.0 0 \n", + "873 Vander Cruyssen, Mr. Victor male 47.0 0 \n", + "874 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 \n", + "875 Najib, Miss. Adele Kiamie \"Jane\" female 15.0 0 \n", + "876 Gustafsson, Mr. Alfred Ossian male 20.0 0 \n", + "877 Petroff, Mr. Nedelio male 19.0 0 \n", + "878 Laleff, Mr. Kristo male 0.0 0 \n", + "879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n", + "880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 \n", + "881 Markun, Mr. Johann male 33.0 0 \n", + "882 Dahlberg, Miss. Gerda Ulrika female 22.0 0 \n", + "883 Banfield, Mr. Frederick James male 28.0 0 \n", + "884 Sutehall, Mr. Henry Jr male 25.0 0 \n", + "885 Rice, Mrs. William (Margaret Norton) female 39.0 0 \n", + "886 Montvila, Rev. Juozas male 27.0 0 \n", + "887 Graham, Miss. Margaret Edith female 19.0 0 \n", + "888 Johnston, Miss. Catherine Helen \"Carrie\" female 0.0 1 \n", + "889 Behr, Mr. Karl Howell male 26.0 0 \n", + "890 Dooley, Mr. Patrick male 32.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked male \\\n", + "0 0 A/5 21171 7.2500 0 S 0 \n", + "1 0 PC 17599 71.2833 C85 C 0 \n", + "2 0 STON/O2. 3101282 7.9250 0 S 0 \n", + "3 0 113803 53.1000 C123 S 0 \n", + "4 0 373450 8.0500 0 S 0 \n", + "5 0 330877 8.4583 0 Q 0 \n", + "6 0 17463 51.8625 E46 S 0 \n", + "7 1 349909 21.0750 0 S 0 \n", + "8 2 347742 11.1333 0 S 0 \n", + "9 0 237736 30.0708 0 C 0 \n", + "10 1 PP 9549 16.7000 G6 S 0 \n", + "11 0 113783 26.5500 C103 S 0 \n", + "12 0 A/5. 2151 8.0500 0 S 0 \n", + "13 5 347082 31.2750 0 S 0 \n", + "14 0 350406 7.8542 0 S 0 \n", + "15 0 248706 16.0000 0 S 0 \n", + "16 1 382652 29.1250 0 Q 0 \n", + "17 0 244373 13.0000 0 S 0 \n", + "18 0 345763 18.0000 0 S 0 \n", + "19 0 2649 7.2250 0 C 0 \n", + "20 0 239865 26.0000 0 S 0 \n", + "21 0 248698 13.0000 D56 S 0 \n", + "22 0 330923 8.0292 0 Q 0 \n", + "23 0 113788 35.5000 A6 S 0 \n", + "24 1 349909 21.0750 0 S 0 \n", + "25 5 347077 31.3875 0 S 0 \n", + "26 0 2631 7.2250 0 C 0 \n", + "27 2 19950 263.0000 C23 C25 C27 S 0 \n", + "28 0 330959 7.8792 0 Q 0 \n", + "29 0 349216 7.8958 0 S 0 \n", + ".. ... ... ... ... ... ... \n", + "861 0 28134 11.5000 0 S 0 \n", + "862 0 17466 25.9292 D17 S 0 \n", + "863 2 CA. 2343 69.5500 0 S 0 \n", + "864 0 233866 13.0000 0 S 0 \n", + "865 0 236852 13.0000 0 S 0 \n", + "866 0 SC/PARIS 2149 13.8583 0 C 0 \n", + "867 0 PC 17590 50.4958 A24 S 0 \n", + "868 0 345777 9.5000 0 S 0 \n", + "869 1 347742 11.1333 0 S 0 \n", + "870 0 349248 7.8958 0 S 0 \n", + "871 1 11751 52.5542 D35 S 0 \n", + "872 0 695 5.0000 B51 B53 B55 S 0 \n", + "873 0 345765 9.0000 0 S 0 \n", + "874 0 P/PP 3381 24.0000 0 C 0 \n", + "875 0 2667 7.2250 0 C 0 \n", + "876 0 7534 9.8458 0 S 0 \n", + "877 0 349212 7.8958 0 S 0 \n", + "878 0 349217 7.8958 0 S 0 \n", + "879 1 11767 83.1583 C50 C 0 \n", + "880 1 230433 26.0000 0 S 0 \n", + "881 0 349257 7.8958 0 S 0 \n", + "882 0 7552 10.5167 0 S 0 \n", + "883 0 C.A./SOTON 34068 10.5000 0 S 0 \n", + "884 0 SOTON/OQ 392076 7.0500 0 S 0 \n", + "885 5 382652 29.1250 0 Q 0 \n", + "886 0 211536 13.0000 0 S 0 \n", + "887 0 112053 30.0000 B42 S 0 \n", + "888 2 W./C. 6607 23.4500 0 S 0 \n", + "889 0 111369 30.0000 C148 C 0 \n", + "890 0 370376 7.7500 0 Q 0 \n", + "\n", + " Last first \\\n", + "0 Braund Owen Harris \n", + "1 Cumings John Bradley (Florence Briggs Thayer) \n", + "2 Heikkinen Laina \n", + "3 Futrelle Jacques Heath (Lily May Peel) \n", + "4 Allen William Henry \n", + "5 Moran James \n", + "6 McCarthy Timothy J \n", + "7 Palsson Gosta Leonard \n", + "8 Johnson Oscar W (Elisabeth Vilhelmina Berg) \n", + "9 Nasser Nicholas (Adele Achem) \n", + "10 Sandstrom Marguerite Rut \n", + "11 Bonnell Elizabeth \n", + "12 Saundercock William Henry \n", + "13 Andersson Anders Johan \n", + "14 Vestrom Hulda Amanda Adolfina \n", + "15 Hewlett (Mary D Kingcome) \n", + "16 Rice Eugene \n", + "17 Williams Charles Eugene \n", + "18 Vander Planke Julius (Emelia Maria Vandemoortele) \n", + "19 Masselmani Fatima \n", + "20 Fynney Joseph J \n", + "21 Beesley Lawrence \n", + "22 McGowan Anna \"Annie\" \n", + "23 Sloper William Thompson \n", + "24 Palsson Torborg Danira \n", + "25 Asplund Carl Oscar (Selma Augusta Emilia Johansson) \n", + "26 Emir Farred Chehab \n", + "27 Fortune Charles Alexander \n", + "28 O'Dwyer Ellen \"Nellie\" \n", + "29 Todoroff Lalio \n", + ".. ... ... \n", + "861 Giles Frederick Edward \n", + "862 Swift Frederick Joel (Margaret Welles Barron) \n", + "863 Sage Dorothy Edith \"Dolly\" \n", + "864 Gill John William \n", + "865 Bystrom (Karolina) \n", + "866 Duran y More Asuncion \n", + "867 Roebling Washington Augustus II \n", + "868 van Melkebeke Philemon \n", + "869 Johnson Harold Theodor \n", + "870 Balkic Cerin \n", + "871 Beckwith Richard Leonard (Sallie Monypeny) \n", + "872 Carlsson Frans Olof \n", + "873 Vander Cruyssen Victor \n", + "874 Abelson Samuel (Hannah Wizosky) \n", + "875 Najib Adele Kiamie \"Jane\" \n", + "876 Gustafsson Alfred Ossian \n", + "877 Petroff Nedelio \n", + "878 Laleff Kristo \n", + "879 Potter Thomas Jr (Lily Alexenia Wilson) \n", + "880 Shelley William (Imanita Parrish Hall) \n", + "881 Markun Johann \n", + "882 Dahlberg Gerda Ulrika \n", + "883 Banfield Frederick James \n", + "884 Sutehall Henry Jr \n", + "885 Rice William (Margaret Norton) \n", + "886 Montvila Juozas \n", + "887 Graham Margaret Edith \n", + "888 Johnston Catherine Helen \"Carrie\" \n", + "889 Behr Karl Howell \n", + "890 Dooley Patrick \n", + "\n", + " beginning Prefix Prefixscore \\\n", + "0 Mr. Owen Harris Mr 1 \n", + "1 Mrs. John Bradley (Florence Briggs Thayer) Mrs 1 \n", + "2 Miss. Laina Miss 2 \n", + "3 Mrs. Jacques Heath (Lily May Peel) Mrs 1 \n", + "4 Mr. William Henry Mr 1 \n", + "5 Mr. James Mr 1 \n", + "6 Mr. Timothy J Mr 1 \n", + "7 Master. Gosta Leonard Master 2 \n", + "8 Mrs. Oscar W (Elisabeth Vilhelmina Berg) Mrs 1 \n", + "9 Mrs. Nicholas (Adele Achem) Mrs 1 \n", + "10 Miss. Marguerite Rut Miss 2 \n", + "11 Miss. Elizabeth Miss 2 \n", + "12 Mr. William Henry Mr 1 \n", + "13 Mr. Anders Johan Mr 1 \n", + "14 Miss. Hulda Amanda Adolfina Miss 2 \n", + "15 Mrs. (Mary D Kingcome) Mrs 1 \n", + "16 Master. Eugene Master 2 \n", + "17 Mr. Charles Eugene Mr 1 \n", + "18 Mrs. Julius (Emelia Maria Vandemoortele) Mrs 1 \n", + "19 Mrs. Fatima Mrs 1 \n", + "20 Mr. Joseph J Mr 1 \n", + "21 Mr. Lawrence Mr 1 \n", + "22 Miss. Anna \"Annie\" Miss 2 \n", + "23 Mr. William Thompson Mr 1 \n", + "24 Miss. Torborg Danira Miss 2 \n", + "25 Mrs. Carl Oscar (Selma Augusta Emilia Johansson) Mrs 1 \n", + "26 Mr. Farred Chehab Mr 1 \n", + "27 Mr. Charles Alexander Mr 1 \n", + "28 Miss. Ellen \"Nellie\" Miss 2 \n", + "29 Mr. Lalio Mr 1 \n", + ".. ... ... ... \n", + "861 Mr. Frederick Edward Mr 1 \n", + "862 Mrs. Frederick Joel (Margaret Welles Barron) Mrs 1 \n", + "863 Miss. Dorothy Edith \"Dolly\" Miss 2 \n", + "864 Mr. John William Mr 1 \n", + "865 Mrs. (Karolina) Mrs 1 \n", + "866 Miss. Asuncion Miss 2 \n", + "867 Mr. Washington Augustus II Mr 1 \n", + "868 Mr. Philemon Mr 1 \n", + "869 Master. Harold Theodor Master 2 \n", + "870 Mr. Cerin Mr 1 \n", + "871 Mrs. Richard Leonard (Sallie Monypeny) Mrs 1 \n", + "872 Mr. Frans Olof Mr 1 \n", + "873 Mr. Victor Mr 1 \n", + "874 Mrs. Samuel (Hannah Wizosky) Mrs 1 \n", + "875 Miss. Adele Kiamie \"Jane\" Miss 2 \n", + "876 Mr. Alfred Ossian Mr 1 \n", + "877 Mr. Nedelio Mr 1 \n", + "878 Mr. Kristo Mr 1 \n", + "879 Mrs. Thomas Jr (Lily Alexenia Wilson) Mrs 1 \n", + "880 Mrs. William (Imanita Parrish Hall) Mrs 1 \n", + "881 Mr. Johann Mr 1 \n", + "882 Miss. Gerda Ulrika Miss 2 \n", + "883 Mr. Frederick James Mr 1 \n", + "884 Mr. Henry Jr Mr 1 \n", + "885 Mrs. William (Margaret Norton) Mrs 1 \n", + "886 Rev. Juozas Rev 1 \n", + "887 Miss. Margaret Edith Miss 2 \n", + "888 Miss. Catherine Helen \"Carrie\" Miss 2 \n", + "889 Mr. Karl Howell Mr 1 \n", + "890 Mr. Patrick Mr 1 \n", + "\n", + " Adult \n", + "0 1 \n", + "1 1 \n", + "2 0 \n", + "3 1 \n", + "4 1 \n", + "5 1 \n", + "6 1 \n", + "7 0 \n", + "8 1 \n", + "9 1 \n", + "10 0 \n", + "11 0 \n", + "12 1 \n", + "13 1 \n", + "14 0 \n", + "15 1 \n", + "16 0 \n", + "17 1 \n", + "18 1 \n", + "19 1 \n", + "20 1 \n", + "21 1 \n", + "22 0 \n", + "23 1 \n", + "24 0 \n", + "25 1 \n", + "26 1 \n", + "27 1 \n", + "28 0 \n", + "29 1 \n", + ".. ... \n", + "861 1 \n", + "862 1 \n", + "863 0 \n", + "864 1 \n", + "865 1 \n", + "866 0 \n", + "867 1 \n", + "868 1 \n", + "869 0 \n", + "870 1 \n", + "871 1 \n", + "872 1 \n", + "873 1 \n", + "874 1 \n", + "875 0 \n", + "876 1 \n", + "877 1 \n", + "878 1 \n", + "879 1 \n", + "880 1 \n", + "881 1 \n", + "882 0 \n", + "883 1 \n", + "884 1 \n", + "885 1 \n", + "886 1 \n", + "887 0 \n", + "888 0 \n", + "889 1 \n", + "890 1 \n", + "\n", + "[891 rows x 19 columns]" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Adult'] = df['Prefix'].apply(Prefix_to_numeric)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then I ran the test train split for decision trees to train the model and score it." + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "dfsubset = df.filter([\"Survived\", \"Pclass\", \"Age\", \"Fare\", \"male\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X = dfsubset[[\"Pclass\", \"Age\", \"Fare\", \"male\"]]\n", + "y = dfsubset[[\"Survived\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "model = tree.DecisionTreeClassifier().fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", + " max_features=None, max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best')" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.86644219977553316" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(X, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "Trying to make a picture, I think I have to install something else first. Says I don't have pydotplus" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", + " max_features=None, max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best')" + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "dtree=DecisionTreeClassifier()\n", + "dtree.fit(X,y)" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'pydotplus'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mIPython\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdisplay\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtree\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mexport_graphviz\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mpydotplus\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[0mdot_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mStringIO\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m export_graphviz(dtree, out_file=dot_data, \n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'pydotplus'" + ] + } + ], + "source": [ + "from sklearn.externals.six import StringIO \n", + "from IPython.display import Image \n", + "from sklearn.tree import export_graphviz\n", + "import pydotplus\n", + "dot_data = StringIO()\n", + "export_graphviz(dtree, out_file=dot_data, \n", + " filled=True, rounded=True,\n", + " special_characters=True)\n", + "graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) \n", + "Image(graph.create_png())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0], dtype=int64)" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict([[1,.67, 14.5, 1]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}