From bc1c7fd9bd438bfe3879881be100e3904d7dc75e Mon Sep 17 00:00:00 2001 From: zee-zhijun Date: Sun, 18 Dec 2016 15:53:32 -0500 Subject: [PATCH] Submit twitter_data visualizations. --- .gitignore | 3 +++ twitter-data.Rproj | 13 +++++++++++++ twitter.Rmd | 31 +++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 .gitignore create mode 100644 twitter-data.Rproj create mode 100644 twitter.Rmd diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..807ea25 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.Rproj.user +.Rhistory +.RData diff --git a/twitter-data.Rproj b/twitter-data.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/twitter-data.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/twitter.Rmd b/twitter.Rmd new file mode 100644 index 0000000..16e727c --- /dev/null +++ b/twitter.Rmd @@ -0,0 +1,31 @@ +--- +title: "Twitter" +author: "Zee" +date: "11 October, 2016" +output: html_document +--- + +```{r} +#Set working directory +setwd("~/Documents/EDM2016/twitter-data") + +#Save data as twitter_data +twitter_data <- read.csv("HUDK4050-twitter-10-06-16.csv", header = TRUE, sep = ",") +head(twitter_data) + +#Plot favorite counts with respect to time index +plot(twitter_data$favoriteCount) + +#Histogram (frequency of tweets with respect to time index) +hist(twitter_data$time, breaks = 100) + +#Scatter plot of number of favorite versus time +plot(twitter_data$favoriteCount, twitter_data$time) + +#Boxplot tweeting time by screenName +boxplot(twitter_data$time ~ twitter_data$screenName, las = 2) + +#Boxplot favoriteCount by ScreeName +boxplot(twitter_data$favoriteCount ~ twitter_data$screenName, las = 2) +``` +