wget https://raw.githubusercontent.com/lakshya90/DataScience101/master/titanic.csv (curl -O for mac)
import pandas as pddf = pd.read_csv('titanic.csv')df.shape # Output : 891,12df.head(5)df.tail(5)df.info() # Output : 891,12df.describe()df['Sex'].value_counts();
df['Survived'].value_counts();
df['Pclass'].value_counts() d = df['PassengerId']; type(d) #<class ‘pandas.core.series.Series’> df.iloc[654,:]df.describe() #Check count of all features
df_a = df; df_a['Age'] = df_a['Age'].fill na(df_a['Age'].mean())
df_a['Age'].count() #891 from previous 714df.drop(['PassengerId','Name','Ticket', 'Cabin'], axis=1)pd.isnull(df['Cabin'])
df[df['Embarked'].isnull()]df['Pclass'].value_counts(); s = df['Pclass'] < 2; s.value_counts()df.sort_values('Age')df.groupby('Sex')['Survived'].value_counts()
df.groupby('Sex').Survived.mean()df.describe(include=['O'])df['Age'].mean() #Mean of all values of the feature ‘Age’
df['Cabin'].count() #Count of all valid ‘Cabin’ values
df['Fare'].max() #Maximum fare paid for the ticketdf.to_csv('output.csv')