Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions PythonAssignment2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')

hospitaldata = pd.read_csv(r'D:\DIH\Assignments\FarooqUrRehman_KCI_Python_Assignment2\hospitaldata.csv')

#Answer 1
hospitaldata = hospitaldata.rename(columns=lambda x: x.replace('.', ''))
print(hospitaldata.columns)

#Answer 2
hospitaldata['WeekDay'] = pd.to_datetime(hospitaldata['Date']).dt.weekday_name
WeekDay_Counts=hospitaldata[['WeekDay','id']].groupby('WeekDay').agg('count')
WeekDay_Max = WeekDay_Counts['id'].max()
print(WeekDay_Counts[(WeekDay_Counts['id'] == WeekDay_Max)])

#Answer 3

Average_Age = hospitaldata.dropna(subset =['Age'])
Average_Age = Average_Age[(Average_Age['Age']!='-')]
Average_Age['Age_Years']=Average_Age['Age'].str.extract('(\d+)').astype(int)
Average_Age['Age_Years']=np.where(Average_Age['Age'].str.contains('M') == True, Average_Age['Age_Years']/12.0, Average_Age['Age_Years'])
print(Average_Age['Age_Years'].mean())

#Answer 4
Children_Count = Average_Age[(Average_Age['Age_Years'] >=1) & (Average_Age['Age_Years'] <=12)]
print(Children_Count['Age_Years'].count())

#Answer 5
Gender_Procedure = hospitaldata.dropna(subset =['Sex'])
Gender_Procedure = Gender_Procedure.dropna(subset =['Procedure'])
Gender_Procedure = Gender_Procedure[(Gender_Procedure['Sex']!='-')]
Gender_Procedure['Sex']=Gender_Procedure['Sex'].str.replace("m","M")
Gender_Procedure['Sex']=Gender_Procedure['Sex'].str.replace("f","F")
print(Gender_Procedure.groupby('Sex')['Procedure'].value_counts().nlargest(2))

#Answer 6
Doctor_Earnings = hospitaldata[(hospitaldata['ConsultingDoctor'].str.contains("Dr")==True)]
print(Doctor_Earnings.groupby("ConsultingDoctor")['AmountReceived'].max().nlargest(1))

#Answer 7
Procedure_Earnings=hospitaldata.dropna(subset =['Procedure'])
Procedure_Earnings=hospitaldata.dropna(subset =['AmountReceived'])
print(Procedure_Earnings.groupby("Procedure")['AmountReceived'].sum().nlargest(1))

#Answer 8
VF = hospitaldata.dropna(subset =['Time'])
VF = VF[(VF['Time']!='-')]
#Visit_Frequency['AM_PM']=np.where(Average_Age['Time'].str.contains('PM') == True, "PM","AM")
VF['VisitHour']=pd.to_datetime(VF['Time'])
VF['VisitHour']=VF['VisitHour'].dt.hour
VF1=VF[['VisitHour','id']]
VF1.groupby('VisitHour')['id'].count().nlargest(1)

#Answer 9
VF['TimeBracket']= np.where((VF['VisitHour'] >=6) & (VF['VisitHour'] <12), "Morning",np.where((VF['VisitHour'] >=12) & (VF['VisitHour'] <16), "Afternoon",np.where((VF['VisitHour'] >=16) & (VF['VisitHour'] <19), "Evening","Night")))
VF[['TimeBracket','VisitHour']]

#Answer 10 & 11
VF2=VF[['id','Date']].groupby('id').count()
VF2[VF2['Date'] > 1]

# Answer 12
Gender_Procedure2=Gender_Procedure[['id','Procedure','Date']].groupby(['id','Procedure']).count()
Gender_Procedure2[Gender_Procedure2['Date']>1]

# Answer 13
Average_Age['Sex']=Average_Age['Sex'].str.replace("m","M")
Average_Age['Sex']=Average_Age['Sex'].str.replace("f","F")
Average_Age.groupby("Sex")['Age_Years'].mean()

# Answer 14
Gender_Procedure = Gender_Procedure.dropna(subset =['AmountBalance'])
Gender_Procedure = Gender_Procedure[(Gender_Procedure['AmountBalance']!=' - ')]
Gender_Procedure['AmountBalance1'] = Gender_Procedure['AmountBalance'].str.replace(',','')
Gender_Procedure['AmountBalance1'] = Gender_Procedure['AmountBalance1'].str.extract('(\d+)').astype(float)
sum(Gender_Procedure['AmountBalance1'])

# Answer 15
Consultation_Earnings=Procedure_Earnings[(Procedure_Earnings['Procedure'] == 'Consultation')]
print(sum(Consultation_Earnings['AmountReceived']))

# Answer 16
Age_Charges_Cor = Average_Age.dropna(subset =['TotalCharges'])
Age_Charges_Cor = Age_Charges_Cor[(Age_Charges_Cor['TotalCharges']!='Cancelled')]
np.correlate(Age_Charges_Cor['Age_Years'],Age_Charges_Cor['TotalCharges'].str.extract('(\d+)').astype(float))

# Answer 17
Average_Age['Age_Group']= np.where((Average_Age['Age_Years'] <=15), "1-15 Yrs",np.where((Average_Age['Age_Years'] >15) & (Average_Age['Age_Years'] <=30), "16-30 Yrs",np.where((Average_Age['Age_Years'] >30) & (Average_Age['Age_Years'] <=45), "31-45 Yrs","Above 45 Yrs")))
print(Average_Age.groupby('Age_Group')['id'].count())

# Answer 18
XRay_Scalling_Earnings=Procedure_Earnings[(Procedure_Earnings['Procedure'] == 'X Ray') | (Procedure_Earnings['Procedure'] == 'Scalling')]
print(sum(XRay_Scalling_Earnings['AmountReceived']))
158 changes: 158 additions & 0 deletions RAssignment_2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
library(tidyr)
library(lubridate)
library(dplyr)
library(plyr)

#hospitaldata <- read.csv("D://DIH//Assignments//FarooqUrRehman_KCI_R_Assignment2//hospitaldata.csv")

## Answer 1
names(hospitaldata) <- gsub("\\.", "", names(hospitaldata))

## Answer 2
day_count <- count(wday(mdy(hospitaldata$Date),label=TRUE))
day_max <- max(day_count$freq)
select(filter(day_count, day_count$freq == day_max), x)

## Answer 3
hospitaldata <- hospitaldata%>%
mutate(Age=ifelse(Age=='-', NA, Age))%>%
mutate(Age=ifelse(grepl("M",Age),parse_number(Age)/12,Age))

print(mean(as.numeric(hospitaldata$Age),na.rm = TRUE))

##Answer 4
hospitaldata <- hospitaldata %>%
mutate(Age = as.numeric(Age))

children_data <- hospitaldata %>%
filter(Age >= 1, Age <=12) %>%
select()%>%
count() %>%
sum() %>%
print

# Answer 5
gen_proc<-hospitaldata %>%
mutate(Sex = ifelse(Sex == 'M' | Sex =='m', 'M',ifelse(Sex == 'f' | Sex =='F','F',NA))) %>%
filter(!is.na(Sex), !is.na(Procedure), Sex == 'F') %>%
select(Procedure) %>%
count()
gen_proc_1 <- max(gen_proc$freq)
select(filter(gen_proc,gen_proc$freq == gen_proc_1),Procedure,freq)

gen_proc<-hospitaldata %>%
mutate(Sex = ifelse(Sex == 'M' | Sex =='m', 'M',ifelse(Sex == 'f' | Sex =='F','F',NA))) %>%
filter(!is.na(Sex), !is.na(Procedure), Sex == 'M') %>%
select(Procedure) %>%
count()
gen_proc_1 <- max(gen_proc$freq)
select(filter(gen_proc,gen_proc$freq == gen_proc_1),Procedure,freq)


# Answer 6
doctors_earnings <- hospitaldata %>%
filter(!is.na(AmountReceived),grepl("^Dr",ConsultingDoctor)) %>%
group_by(ConsultingDoctor) %>%
summarise(sum(AmountReceived))

max_earning <- max(doctors_earnings$`sum(AmountReceived)`)
print (filter(doctors_earnings, doctors_earnings$`sum(AmountReceived)` == max_earning))

# Answer 7
proc_earnings <- hospitaldata %>%
filter(!is.na(AmountReceived),!is.na(Procedure)) %>%
group_by(Procedure) %>%
summarise(sum(AmountReceived))

max_proc_earning <- max(proc_earnings$`sum(AmountReceived)`)
print (filter(proc_earnings, proc_earnings$`sum(AmountReceived)` == max_proc_earning))

# Answer 8

cons_time <- hospitaldata %>%
filter(!is.na(Time),!grepl("-",Time)) %>%
mutate(am_pm=ifelse(grepl("AM",Time),"AM",ifelse(grepl("PM",Time),"PM","AM"))) %>%
group_by(cons_time=paste(as.character(hour(hm(Time))),am_pm, sep=":00 ")) %>%
tally()

cons_max_count <- max(cons_time$n)
print (filter(cons_time, cons_time$n == cons_max_count))

# Answer 9
Time_Bracket <- hospitaldata %>%
filter(!is.na(Time),!grepl("-",Time)) %>%
mutate(CHour = as.numeric(hour(hm(Time)))) %>%
mutate(CHour = ifelse(grepl("AM",Time) & CHour == 12,0,ifelse(grepl("PM",Time) & CHour<12,12+CHour,CHour))) %>%
mutate(TimeBracket = ifelse(CHour >=6 & CHour<12,"Morning",ifelse(CHour >=12 & CHour<16,"Afternoon",ifelse(CHour >=16 & CHour<19,"Evening","Night"))))%>%
print(Time,TimeBracket)

# Answer 10 and 11
Repeated_Visitors <- hospitaldata %>%
filter(grepl("^Dr",ConsultingDoctor)) %>%
group_by(id) %>%
tally()%>%
filter(n>1) %>%
print

# Answer 12
Rep_with_same_problem <- hospitaldata %>%
filter(grepl("^Dr",ConsultingDoctor)) %>%
group_by(id, Procedure) %>%
tally()%>%
filter(n>1) %>%
print

# Answer 13
median_age <- hospitaldata %>%
filter(!is.na(Sex), !grepl("-",Sex), !is.na(Age)) %>%
group_by(Sex) %>%
summarise_each(funs(median),Age) %>%
print

# Answer 14
balance_amount <-hospitaldata %>%
filter(!is.na(AmountBalance), !grepl("-",AmountBalance)) %>%
mutate(AmountBalance=parse_number(AmountBalance)) %>%
summarise(sum(AmountBalance)) %>%
print

# Answer 15
Consultation_amount <-hospitaldata %>%
filter(!is.na(AmountReceived), Procedure=='Consultation') %>%
summarise(sum(AmountReceived)) %>%
print

# Answer 15
Consultation_amount <-hospitaldata %>%
filter(!is.na(AmountReceived), Procedure=='Consultation') %>%
summarise(sum(AmountReceived)) %>%
print

# Answer 16
Age_n_Charges_Relation <- hospitaldata %>%
filter(!is.na(Age), !is.na(TotalCharges), !grepl("Cancelled",TotalCharges)) %>%
mutate(TotalCharges = parse_number(TotalCharges)) %>%
select(TotalCharges, Age) %>%
cor() %>%
print

# Answer 17
AgeGroup_Visits <-hospitaldata %>%
filter(!is.na(Age)) %>%
mutate(Age_Group=ifelse(Age<=10,'0 - 10 yrs',ifelse(Age>10 & Age <= 30,'11 - 30 yrs',ifelse(Age>31 & Age <= 50,'31 - 50 yrs','51 yrs and onwards'))))%>%
group_by(Age_Group)%>%
tally() %>%
print

# Answer 18
XRay_Scalling_Cost <-hospitaldata %>%
filter(!is.na(AmountReceived), grepl("X Ray",Procedure) | grepl("Scalling",Procedure)) %>%
summarise(sum(AmountReceived))%>%
print

# 2nd answer
XRay_Scalling_Cost <-hospitaldata %>%
filter(!is.na(AmountReceived), Procedure=="X Ray" | Procedure == "Scalling") %>%
summarise(sum(AmountReceived)) %>%
print

Loading