Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ tutorials/.DS_Store
problemSets/example/.Rhistory
problemSets/.DS_Store
.DS_Store
.Rproj.user
Binary file added problemSets/.DS_Store
Binary file not shown.
Empty file.
200 changes: 200 additions & 0 deletions problemSets/PS01/my_answers/PS01_DP.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
#####################
# load libraries
# set wd
# clear global .envir
#####################

# remove objects
rm(list=ls())
# detach all libraries
detachAllPackages <- function() {
basic.packages <- c("package:stats", "package:graphics", "package:grDevices", "package:utils", "package:datasets", "package:methods", "package:base")
package.list <- search()[ifelse(unlist(gregexpr("package:", search()))==1, TRUE, FALSE)]
package.list <- setdiff(package.list, basic.packages)
if (length(package.list)>0) for (package in package.list) detach(package, character.only=TRUE)
}
detachAllPackages()

# load libraries
pkgTest <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}

# here is where you load any necessary packages
# ex: stringr
# lapply(c("stringr"), pkgTest)

lapply(c(), pkgTest, "ggplot2")

#####################
# Problem 1
#####################

y <- c(105, 69, 86, 100, 82, 111, 104, 110, 87, 108, 87, 90, 94, 113, 112, 98, 80, 97, 95, 111, 114, 89, 95, 126, 98)

# calculate the mean
mean_y = mean(y)
mean_y

# calculate the standard deviation
demeanedSum <- y - mean_y
squaredError <- demeanedSum ^ 2
variance <- sum(squaredError)/(length(y)-1)
stdev = sqrt(variance)
stdev

# calculate the standard error
sterror = stdev/sqrt(length(y))
sterror

# find the t-score for the desired confidence level
t = qt(p = (1 - 0.9)/2 , df = 24, lower.tail = FALSE)
t

# construct the confidence interval
lowerlimit = mean_y - t * sterror
upperlimit = mean_y + t * sterror
lowerlimit
upperlimit

# hypothesis testing
# step 1 - assumptions: data is quantitative, sampling method is random, sample size is smaller than 30 -> t-score instead of z-score

# step 2- null hypothesis: mean is lower or equal to 100
# alternative hypothesis: mean is higher than 100

# step 3 - calculate a test statistic
ts = (mean_y - 100)/sterror
ts

# step 4 - calculate the p-value
p = pt(ts, df = 24, lower.tail = FALSE)
p

# step 5 - conclusion
# p-value is higher than alpha -> cannot reject the null hypothesis that the mean is lower or equal to 100


#####################
# Problem 2
#####################
lapply(c("ggplot2"), pkgTest)
install.packages("ggpubr")
library(ggpubr)

expenditure <- read.table("https://raw.githubusercontent.com/ASDS-TCD/StatsI_2025/main/datasets/expenditure.txt", header=T)
head(expenditure)

# plot the relationships between all the variables
# add the correlation coefficient and hide the p-value
# save the plots as a png to add to the latex file

png(file = "scatter_x1_y.pdf")
ggplot(expenditure, aes(x = X1, y = Y)) +
geom_point() +
labs(title = "Relationship between personal income
and housing assistance expenditure",
x = "personal income (USD per capita)",
y = "expenditure on housing assistance (USD per capita)") +
stat_cor(aes(label = ..r.label..)) +
theme_minimal()
dev.off()

png(file = "scatter_x2_y.png")
ggplot(expenditure, aes(x = X2, y = Y)) +
geom_point() +
labs(title = "Relationship between financial insecurity
and housing assistance expenditure",
x = "number of financially insecure residents (per 100,000)",
y = "expenditure on housing assistance (USD per capita)") +
stat_cor(aes(label = ..r.label..)) +
theme_minimal()
dev.off()

png(file = "scatter_x3_y.png")
ggplot(expenditure, aes(x = X3, y = Y)) +
geom_point() +
labs(title = "Relationship between urban residency and
expenditure on housing assistance (in state)",
x = "number of urban residents (per 1000)",
y = "expenditure on housing assistance (USD per capita)") +
stat_cor(aes(label = ..r.label..)) +
theme_minimal()
dev.off()

png(file = "scatter_x1_x2.png")
ggplot(expenditure, aes(x = X1, y = X2)) +
geom_point() +
labs(title = "Relationship between personal income
and financial insecurity",
x = "personal income (USD per capita)",
y = "number of financially insecure residents (per 100,000)")+
stat_cor(aes(label = ..r.label..)) +
theme_minimal()
dev.off()

png(file = "scatter_x1_x3.png")
ggplot(expenditure, aes(x = X1, y = X3)) +
geom_point() +
labs(title = "Relationship between personal income
and urban residency",
x = "personal income (USD per capita)",
y = "number of urban residents (per 1000)") +
stat_cor(aes(label = ..r.label..)) +
theme_minimal()
dev.off()

png(file = "scatter_x2_x3.png")
ggplot(expenditure, aes(x = X2, y = X3)) +
geom_point() +
labs(title = "Relationship between financial insecurity
and urban residency",
x = "number of financially insecure residents (per 100,000)",
y = "number of urban residents (per 1000)") +
stat_cor(aes(label = ..r.label..)) +
theme_minimal()
dev.off()


# turn the Region into a factor to make the plot more readable
# now it shows the name of the region instead of 1,2,3,4
expenditure$Region <- factor(expenditure$Region,
levels = c(1, 2, 3, 4),
labels = c("Northeast", "North Central", "South", "West"))

# make a boxplot to compare the per capita housing expenditure in different regions
png(file = "boxplot_reg_y.png")
ggplot(expenditure, aes(x=Region, y=Y, group=Region)) +
geom_boxplot() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
labs(title = "Boxplot for housing assistance expenditure for each region",
x = "Region",
y = "expenditure on housing assistance (USD per capita)") +
theme_minimal()
dev.off()

# plot the relationship between x1, y and region
# each region is represented by a different shape and colour
png(file = "scatter_x1_y_reg.png")
ggplot(expenditure, aes(x = X1, y = Y, shape = factor(Region), color = factor(Region))) +
geom_point(size = 3) + # increase size to distinguish between the shapes
scale_color_manual(values = c("black", "red", "green", "purple")) +
scale_shape_manual(values = c(21, 17, 18, 19)) + # each value corresponds to a shape
labs(title = "Relationship between personal income and
\n house assistance expenditure per region",
x = "personal income (USD per capita)",
y = "expenditure on housing assistance (USD per capita)",
shape = "Region",
color = "Region") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) # center title
dev.off()




38 changes: 38 additions & 0 deletions problemSets/PS01/my_answers/PS01_DP.aux
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{1}{lstlisting.-1}\protected@file@percent }
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{2}{lstlisting.-2}\protected@file@percent }
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{2}{lstlisting.-3}\protected@file@percent }
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{2}{lstlisting.-4}\protected@file@percent }
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{2}{lstlisting.-5}\protected@file@percent }
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{3}{lstlisting.-6}\protected@file@percent }
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{3}{lstlisting.-7}\protected@file@percent }
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{4}{lstlisting.-8}\protected@file@percent }
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{5}{lstlisting.-9}\protected@file@percent }
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{6}{lstlisting.-10}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces \footnotesize Scatterplot of X1 and Y}}{6}{figure.1}\protected@file@percent }
\newlabel{fig:plot_1}{{1}{6}{\footnotesize Scatterplot of X1 and Y}{figure.1}{}}
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{7}{lstlisting.-11}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces \footnotesize Scatterplot of X2 and Y}}{7}{figure.2}\protected@file@percent }
\newlabel{fig:plot_2}{{2}{7}{\footnotesize Scatterplot of X2 and Y}{figure.2}{}}
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{8}{lstlisting.-12}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces \footnotesize Scatterplot of X3 and Y}}{8}{figure.3}\protected@file@percent }
\newlabel{fig:plot_3}{{3}{8}{\footnotesize Scatterplot of X3 and Y}{figure.3}{}}
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{9}{lstlisting.-13}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces \footnotesize Scatterplot of X1 and X2}}{9}{figure.4}\protected@file@percent }
\newlabel{fig:plot_4}{{4}{9}{\footnotesize Scatterplot of X1 and X2}{figure.4}{}}
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{10}{lstlisting.-14}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces \footnotesize Scatterplot of X1 and X3}}{10}{figure.5}\protected@file@percent }
\newlabel{fig:plot_5}{{5}{10}{\footnotesize Scatterplot of X1 and X3}{figure.5}{}}
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{11}{lstlisting.-15}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces \footnotesize Scatterplot of X2 and X3}}{11}{figure.6}\protected@file@percent }
\newlabel{fig:plot_6}{{6}{11}{\footnotesize Scatterplot of X2 and X3}{figure.6}{}}
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{12}{lstlisting.-16}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces \footnotesize Boxplot of Y and Region}}{12}{figure.7}\protected@file@percent }
\newlabel{fig:plot_7}{{7}{12}{\footnotesize Boxplot of Y and Region}{figure.7}{}}
\@writefile{lol}{\contentsline {lstlisting}{PS01\textunderscore DP.R}{13}{lstlisting.-17}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces \footnotesize Scatterplot of X1, Y and Region}}{13}{figure.8}\protected@file@percent }
\newlabel{fig:plot_8}{{8}{13}{\footnotesize Scatterplot of X1, Y and Region}{figure.8}{}}
\gdef \@abspage@last{14}
Loading