Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions R/Super Bowl boxes/q1.probs.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"0","1","2","3","4","5","6","7","8","9"
0.0457446808510638,0.025531914893617,0.025531914893617,0.0372340425531915,0.0276595744680851,0.025531914893617,0.0276595744680851,0.0382978723404255,0.025531914893617,0.0265957446808511
0.0202127659574468,0,0,0.0117021276595745,0.00212765957446809,0,0.00212765957446809,0.0127659574468085,0,0.00106382978723404
0.0202127659574468,0,0,0.0117021276595745,0.00212765957446809,0,0.00212765957446809,0.0127659574468085,0,0.00106382978723404
0.0319148936170213,0.0117021276595745,0.0117021276595745,0.0234042553191489,0.0138297872340426,0.0117021276595745,0.0138297872340426,0.024468085106383,0.0117021276595745,0.0127659574468085
0.025531914893617,0.00531914893617021,0.00531914893617021,0.0170212765957447,0.0074468085106383,0.00531914893617021,0.0074468085106383,0.0180851063829787,0.00531914893617021,0.00638297872340425
0.0202127659574468,0,0,0.0117021276595745,0.00212765957446809,0,0.00212765957446809,0.0127659574468085,0,0.00106382978723404
0.0202127659574468,0,0,0.0117021276595745,0.00212765957446809,0,0.00212765957446809,0.0127659574468085,0,0.00106382978723404
0.0276595744680851,0.0074468085106383,0.0074468085106383,0.0191489361702128,0.00957446808510638,0.0074468085106383,0.00957446808510638,0.0202127659574468,0.0074468085106383,0.00851063829787234
0.0202127659574468,0,0,0.0117021276595745,0.00212765957446809,0,0.00212765957446809,0.0127659574468085,0,0.00106382978723404
0.0202127659574468,0,0,0.0117021276595745,0.00212765957446809,0,0.00212765957446809,0.0127659574468085,0,0.00106382978723404
11 changes: 11 additions & 0 deletions R/Super Bowl boxes/q2.probs.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"0","1","2","3","4","5","6","7","8","9"
0.0297872340425532,0.0191489361702128,0.0191489361702128,0.0276595744680851,0.0212765957446809,0.0180851063829787,0.024468085106383,0.024468085106383,0.0170212765957447,0.0191489361702128
0.0148936170212766,0.00425531914893617,0.00425531914893617,0.0127659574468085,0.00638297872340425,0.00319148936170213,0.00957446808510638,0.00957446808510638,0.00212765957446809,0.00425531914893617
0.0127659574468085,0.00212765957446809,0.00212765957446809,0.0106382978723404,0.00425531914893617,0.00106382978723404,0.0074468085106383,0.0074468085106383,0,0.00212765957446809
0.0202127659574468,0.00957446808510638,0.00957446808510638,0.0180851063829787,0.0117021276595745,0.00851063829787234,0.0148936170212766,0.0148936170212766,0.0074468085106383,0.00957446808510638
0.0180851063829787,0.0074468085106383,0.0074468085106383,0.0159574468085106,0.00957446808510638,0.00638297872340425,0.0127659574468085,0.0127659574468085,0.00531914893617021,0.0074468085106383
0.0127659574468085,0.00212765957446809,0.00212765957446809,0.0106382978723404,0.00425531914893617,0.00106382978723404,0.0074468085106383,0.0074468085106383,0,0.00212765957446809
0.0159574468085106,0.00531914893617021,0.00531914893617021,0.0138297872340426,0.0074468085106383,0.00425531914893617,0.0106382978723404,0.0106382978723404,0.00319148936170213,0.00531914893617021
0.0234042553191489,0.0127659574468085,0.0127659574468085,0.0212765957446809,0.0148936170212766,0.0117021276595745,0.0180851063829787,0.0180851063829787,0.0106382978723404,0.0127659574468085
0.0159574468085106,0.00531914893617021,0.00531914893617021,0.0138297872340426,0.0074468085106383,0.00425531914893617,0.0106382978723404,0.0106382978723404,0.00319148936170213,0.00531914893617021
0.0138297872340426,0.00319148936170213,0.00319148936170213,0.0117021276595745,0.00531914893617021,0.00212765957446809,0.00851063829787234,0.00851063829787234,0.00106382978723404,0.00319148936170213
11 changes: 11 additions & 0 deletions R/Super Bowl boxes/q3.probs.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"0","1","2","3","4","5","6","7","8","9"
0.0191489361702128,0.0117021276595745,0.00957446808510638,0.0117021276595745,0.0127659574468085,0.00957446808510638,0.0127659574468085,0.0148936170212766,0.00957446808510638,0.0127659574468085
0.0138297872340426,0.00638297872340425,0.00425531914893617,0.00638297872340425,0.0074468085106383,0.00425531914893617,0.0074468085106383,0.00957446808510638,0.00425531914893617,0.0074468085106383
0.0127659574468085,0.00531914893617021,0.00319148936170213,0.00531914893617021,0.00638297872340425,0.00319148936170213,0.00638297872340425,0.00851063829787234,0.00319148936170213,0.00638297872340425
0.0202127659574468,0.0127659574468085,0.0106382978723404,0.0127659574468085,0.0138297872340426,0.0106382978723404,0.0138297872340426,0.0159574468085106,0.0106382978723404,0.0138297872340426
0.0180851063829787,0.0106382978723404,0.00851063829787234,0.0106382978723404,0.0117021276595745,0.00851063829787234,0.0117021276595745,0.0138297872340426,0.00851063829787234,0.0117021276595745
0.0138297872340426,0.00638297872340425,0.00425531914893617,0.00638297872340425,0.0074468085106383,0.00425531914893617,0.0074468085106383,0.00957446808510638,0.00425531914893617,0.0074468085106383
0.0159574468085106,0.00851063829787234,0.00638297872340425,0.00851063829787234,0.00957446808510638,0.00638297872340425,0.00957446808510638,0.0117021276595745,0.00638297872340425,0.00957446808510638
0.0276595744680851,0.0202127659574468,0.0180851063829787,0.0202127659574468,0.0212765957446809,0.0180851063829787,0.0212765957446809,0.0234042553191489,0.0180851063829787,0.0212765957446809
0.0138297872340426,0.00638297872340425,0.00425531914893617,0.00638297872340425,0.0074468085106383,0.00425531914893617,0.0074468085106383,0.00957446808510638,0.00425531914893617,0.0074468085106383
0.0117021276595745,0.00425531914893617,0.00212765957446809,0.00425531914893617,0.00531914893617021,0.00212765957446809,0.00531914893617021,0.0074468085106383,0.00212765957446809,0.00531914893617021
11 changes: 11 additions & 0 deletions R/Super Bowl boxes/q4.probs.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"0","1","2","3","4","5","6","7","8","9"
0.0127659574468085,0.0127659574468085,0.0106382978723404,0.0106382978723404,0.0159574468085106,0.0117021276595745,0.0170212765957447,0.0212765957446809,0.00851063829787234,0.0138297872340426
0.0138297872340426,0.0138297872340426,0.0117021276595745,0.0117021276595745,0.0170212765957447,0.0127659574468085,0.0180851063829787,0.0223404255319149,0.00957446808510638,0.0148936170212766
0.00638297872340425,0.00638297872340425,0.00425531914893617,0.00425531914893617,0.00957446808510638,0.00531914893617021,0.0106382978723404,0.0148936170212766,0.00212765957446809,0.0074468085106383
0.00957446808510638,0.00957446808510638,0.0074468085106383,0.0074468085106383,0.0127659574468085,0.00851063829787234,0.0138297872340426,0.0180851063829787,0.00531914893617021,0.0106382978723404
0.00957446808510638,0.00957446808510638,0.0074468085106383,0.0074468085106383,0.0127659574468085,0.00851063829787234,0.0138297872340426,0.0180851063829787,0.00531914893617021,0.0106382978723404
0.00638297872340425,0.00638297872340425,0.00425531914893617,0.00425531914893617,0.00957446808510638,0.00531914893617021,0.0106382978723404,0.0148936170212766,0.00212765957446809,0.0074468085106383
0.00638297872340425,0.00638297872340425,0.00425531914893617,0.00425531914893617,0.00957446808510638,0.00531914893617021,0.0106382978723404,0.0148936170212766,0.00212765957446809,0.0074468085106383
0.0127659574468085,0.0127659574468085,0.0106382978723404,0.0106382978723404,0.0159574468085106,0.0117021276595745,0.0170212765957447,0.0212765957446809,0.00851063829787234,0.0138297872340426
0.0074468085106383,0.0074468085106383,0.00531914893617021,0.00531914893617021,0.0106382978723404,0.00638297872340425,0.0117021276595745,0.0159574468085106,0.00319148936170213,0.00851063829787234
0.0074468085106383,0.0074468085106383,0.00531914893617021,0.00531914893617021,0.0106382978723404,0.00638297872340425,0.0117021276595745,0.0159574468085106,0.00319148936170213,0.00851063829787234
36 changes: 21 additions & 15 deletions R/Super Bowl boxes/super_bowl.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Purpose: Gather and manipulate historic Super Bowl score data
# Data Used: Wikipedia Super Bowl data
# Packages Used: ggplot2, RCurl, XML
# Output File:
# Output File: q1probs.csv, q2probs.csv, q3probs.csv, q4probs.csv
# Data Output:
# Machine: Drew Conway's MacBook Pro

Expand All @@ -17,6 +17,8 @@
library(ggplot2)
library(RCurl)
library(XML)
library(plyr)
library(reshape)

# A function that converts a given integer into its Roman Numeral equivalent
to.RomanNumeral<-function(x) {
Expand All @@ -39,7 +41,7 @@ to.RomanNumeral<-function(x) {
}
}

# Function returns quater scores from Wikipedia Super Bown pages
# Function returns quater scores from Wikipedia Super Bowl pages
get.scores<-function(numeral) {
# Base URL for Wikipedia
wp.url<-getURL(paste("http://en.wikipedia.org/wiki/Super_Bowl_",numeral,sep=""))
Expand Down Expand Up @@ -72,8 +74,9 @@ get.probs<-function(score.df) {
return(prob.df)
}

# There have been 45 Super Bowls
bowls<-lapply(1:45, to.RomanNumeral)
# There have been 47 Super Bowls
bowl.number<-47
bowls<-lapply(1:bowl.number, to.RomanNumeral)

# Create data frame of all Super Bowl scores
scores.list<-lapply(bowls, get.scores)
Expand All @@ -91,7 +94,7 @@ quarters.list<-lapply(1:nrow(scores.df), function(i) c(scores.df[i,2],sum(scores
quarters.df<-as.data.frame(do.call(rbind, quarters.list))

# Final data set
super.df<-cbind(scores.df, quarters.df,rep(as.factor(c("Home","Away")),45))
super.df<-cbind(scores.df, quarters.df,rep(as.factor(c("Home","Away")),bowl.number))
names(super.df)<-c("Team","Q1","Q2","Q3","Q4","Total","SB","Q1T","Q2T","Q3T","Type")

# Get digit count totals in workable data frame for visualization
Expand Down Expand Up @@ -132,31 +135,34 @@ boxes$Q4<-melt(q4.probs)$value
# Create a heatmap of probability of winning given different digit combinations by quarter
q1.heatmap<-ggplot(boxes, aes(xmin=x,xmax=x+1,ymin=y,ymax=y+1))+geom_rect(aes(color="white", fill=Q1))+
scale_fill_gradient(limits=c(0,.047), low="lightgrey", high="darkred", name="Pr(Winning)")+
scale_color_manual(values=c("white"="white"), legend=FALSE)+theme_bw()+
scale_color_manual(values=c("white"="white"), guide="none")+theme_bw()+
scale_x_continuous(breaks=.5:9.5,labels=0:9)+scale_y_continuous(breaks=.5:9.5,labels=0:9)+
xlab("Home Team")+ylab("Away Team")+opts(title="Heat Map of Win Probabilties -- First Quater")
xlab("Home Team")+ylab("Away Team")+labs(title="Heat Map of Win Probabilities -- First Quarter")
ggsave(plot=q1.heatmap, filename="images/q1_heatmap.png", height=12, width=12)

q2.heatmap<-ggplot(boxes, aes(xmin=x,xmax=x+1,ymin=y,ymax=y+1))+geom_rect(aes(color="white", fill=Q2))+
scale_fill_gradient(limits=c(0,.047), low="lightgrey", high="darkred", name="Pr(Winning)")+
scale_color_manual(values=c("white"="white"), legend=FALSE)+theme_bw()+
scale_color_manual(values=c("white"="white"), guide="none")+theme_bw()+
scale_x_continuous(breaks=.5:9.5,labels=0:9)+scale_y_continuous(breaks=.5:9.5,labels=0:9)+
xlab("Home Team")+ylab("Away Team")+opts(title="Heat Map of Win Probabilties -- Half Time")
xlab("Home Team")+ylab("Away Team")+labs(title="Heat Map of Win Probabilities -- Half Time")
ggsave(plot=q2.heatmap, filename="images/q2_heatmap.png", height=12, width=12)

q3.heatmap<-ggplot(boxes, aes(xmin=x,xmax=x+1,ymin=y,ymax=y+1))+geom_rect(aes(color="white", fill=Q3))+
scale_fill_gradient(limits=c(0,.047), low="lightgrey", high="darkred", name="Pr(Winning)")+
scale_color_manual(values=c("white"="white"), legend=FALSE)+theme_bw()+
scale_color_manual(values=c("white"="white"), guide="none")+theme_bw()+
scale_x_continuous(breaks=.5:9.5,labels=0:9)+scale_y_continuous(breaks=.5:9.5,labels=0:9)+
xlab("Home Team")+ylab("Away Team")+opts(title="Heat Map of Win Probabilties -- Third Quarter")
xlab("Home Team")+ylab("Away Team")+labs(title="Heat Map of Win Probabilities -- Third Quarter")
ggsave(plot=q3.heatmap, filename="images/q3_heatmap.png", height=12, width=12)

q4.heatmap<-ggplot(boxes, aes(xmin=x,xmax=x+1,ymin=y,ymax=y+1))+geom_rect(aes(color="white", fill=Q4))+
scale_fill_gradient(limits=c(0,.047), low="lightgrey", high="darkred", name="Pr(Winning)")+
scale_color_manual(values=c("white"="white"), legend=FALSE)+theme_bw()+
scale_color_manual(values=c("white"="white"), guide="none")+theme_bw()+
scale_x_continuous(breaks=.5:9.5,labels=0:9)+scale_y_continuous(breaks=.5:9.5,labels=0:9)+
xlab("Home Team")+ylab("Away Team")+opts(title="Heat Map of Win Probabilties -- Final")
xlab("Home Team")+ylab("Away Team")+labs(title="Heat Map of Win Probabilities -- Final")
ggsave(plot=q4.heatmap, filename="images/q4_heatmap.png", height=12, width=12)



### Store quarterly probabilities as local CSV so they can be used again without having to re-download from the web.
write.csv(q1.probs,"q1.probs.csv",row.names=F)
write.csv(q2.probs,"q2.probs.csv",row.names=F)
write.csv(q3.probs,"q3.probs.csv",row.names=F)
write.csv(q4.probs,"q4.probs.csv",row.names=F)
83 changes: 83 additions & 0 deletions R/Super Bowl boxes/super_bowl_simulation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# File-Name: super_bowl_simulation.R
# Date: 2013-01-05
# Author: Will Townes
# Email: will.townes@gmail.com
# Purpose: Simulating the expected profits as a function of number of squares chosen in a super bowl digits matrix based on historic Super Bowl score data.
# Data Used: Wikipedia Super Bowl data
# Packages Used: super_bowl.R
# Output File:

# Copyright (c) 2013, under the Simplified BSD License.
# For more information on FreeBSD see: http://www.opensource.org/licenses/bsd-license.php
# All rights reserved.

###Read in quarterly probabilities from local CSV files, if they exist. Otherwise call super_bowl.R to download the data from wikipedia and write the CSV files.
fnames<-c("q1.probs.csv","q2.probs.csv","q3.probs.csv","q4.probs.csv")
if(all(file.exists(fnames))){
for(fname in fnames){
vname<-strtrim(fname,8)
assign(vname,read.csv(fname))
}
}else{
print('files not found')
source('super_bowl.R')
}
#we don't care at this point about the identities of the squares (only care about the chance of winning as a function of the number of squares randomly chosen), so we can flatten quarterly probabilities into vectors and stack in data frame
probs<-as.data.frame(lapply(list(q1.probs,q2.probs,q3.probs,q4.probs),function(q){as.numeric(c(q,recursive=T))}))
colnames(probs)<-c('q1','q2','q3','q4')
cdfs<-cumsum(probs)
rq<-function(n,cdf){
#pseudo random variate based on the quarterly probability of "winning" as defined by the empirical "cdf" vector. Returns a vector of length n. Each item in the vector is an index of a single square in the super bowl card
cdf<-sort(cdf) #cdf must be sorted in ascending order
u<-runif(n) #get uniform variates
res<-sapply(u,function(val){min(which(cdf>val))})
#res is a vector of length n. Each element is the value of the cdf whose probability is closest to the uniform variate but still greater than it (this is basically an inverse transformation from the uniform distribution into the empirical discrete probability distribution)
return(res)
}
rchoice<-function(){
#generates a list of samples of size 1,2,3,...100 chosen from the integers 1:100. Simulates choosing a certain number of squares from the board on a single super bowl.
res<-list()
rng<-1:100
for(j in rng){
res[[j]]<-sample(rng,j)
}
return(res)
}
getwinnings<-function(qwinners,choices,rewards){
#qwinners is a 4-vector of quarterly scores, choices is a 100-list of square choices, and rewards is a 4-vector assigning the revenue value of winning a particular quarter. Returns a 100-vector with the amount of revenue gained by each group of "k" square choices
winnings<-rep(0,100)
for(i in 1:4){
winners<-which(sapply(choices,function(squares){qwinners[i] %in% squares}))
winnings[winners]<-winnings[winners]+rewards[i]
}
return(winnings)
}
run.sim<-function(nsims,squarecost=10,rewards=c(20,20,20,40)){
#runs the specified number of super bowl simulations and returns the expected (average for a single game) profits for each choice of square count
winners<-apply(cdfs,2,FUN=function(cdf){rq(nsims,cdf)}) #simulate a bunch of games and see the predicted scores at the end of each quarter of each game. A data frame of same dimension as cdfs
rewards<-squarecost*rewards
profit<-rep(0,100)
costs<-seq(1,100)*squarecost
for(sim in 1:nsims){
choices<-rchoice()
profit<-profit-costs+getwinnings(winners[sim,],choices,rewards)
}
return(profit/nsims)
}
nsims<-c(10,100,1000,10000)
results<-matrix(nrow=100,ncol=length(nsims),dimnames=list(as.character(1:100),as.character(nsims)))
colnames(results)<-as.character(nsims)
for(i in 1:length(nsims)){
results[,i]<-run.sim(nsims[i])
}
profits<-results[,length(nsims)]
#roi<-profits/(10*1:100)
#plot of expected profits for the 10,000 simulations experiment shows high variability:
barplot(profits,xlab='Number of Squares Chosen',ylab='Average Profit ($)',main="Unpredictable variation in Profits")
scatter.smooth(1:100,profits,xlab="Number of Squares Purchased",ylab="Average Profit ($)",main="Unpredictable variation in Profits")

#but note that as the number of experiments run increases, the range of variation between maximum and minimum profits tends toward zero:

plot(log10(nsims),apply(results,2,max),type='b',ylim=c(min(results),max(results)),ylab="Maximum and Minimum Average Profits",xlab="Number of simulations run, log scale",main="Expected Profits squeezed to zero")
lines(log10(nsims),apply(results,2,min),type='b')
abline(0,0)