22# specifically in the data-processed folder
33# to get the rds, run
44#
5- # full_results <- readr::read_csv("../covid19 -forecast-hub/data-processed/covid19-2023season-results.csv")
6- # aws.s3::s3save(full_results, object = "covid19_forecast_hub_2023 .rds", bucket = "forecasting-team-data")
5+ # full_results <- readr::read_csv("../OLDcovid19 -forecast-hub/data-processed/covid19-2023season-results.csv")
6+ # aws.s3::s3save(full_results, object = "covid19_forecast_hub_2023_full_summed .rds", bucket = "forecasting-team-data")
77#
8+ using Base: floatrange
89using CSV
910using DataFrames
1011using DataFramesMeta
1112using Dates
1213using RData
14+ import Base. lowercase
1315pwd ()
14- res = CSV. read (" COVIDhub -ensemble/2023-10-02-COVIDhub -ensemble.csv" , DataFrame)
15- pathname = " COVIDhub -ensemble/"
16- filename = " 2023-10-02-COVIDhub -ensemble.csv"
16+ res = CSV. read (" COVIDhub_CDC -ensemble/2023-10-02-COVIDhub_CDC -ensemble.csv" , DataFrame)
17+ pathname = " COVIDhub_CDC -ensemble/"
18+ filename = " 2023-10-02-COVIDhub_CDC -ensemble.csv"
1719state_names = CSV. read (" ../data-locations/locations.csv" , DataFrame)
1820lowercase (m:: Missing ) = m
1921@rtransform! state_names @passmissing :abbreviation = lowercase (:abbreviation )
2022@select! state_names :abbreviation :location
21-
2223function format_file (pathname, filename, state_names)
2324 if length (filename) < 10 ||
2425 match (r" [0-9]{4}-[0-9]{2}-[0-9]{2}" , filename[1 : 10 ]) == nothing ||
2526 Date (filename[1 : 10 ]) < Date (2023 , 1 , 1 )
2627 return DataFrame ()
2728 end
2829 println (joinpath (pathname, filename))
29-
30- res = CSV. read (joinpath (pathname, filename), DataFrame, missingstring= " NA" )
31-
30+ res = CSV. read (joinpath (pathname, filename), DataFrame, missingstring= " NA" , types= Dict (" value" => Float64))
3231 if ! (" forecast_date" in names (res)) ||
3332 res[! , :forecast_date ] |> minimum < Date (2023 , 1 , 1 )
3433 return DataFrame ()
@@ -41,7 +40,11 @@ function format_file(pathname, filename, state_names)
4140 end
4241 res = leftjoin (res, state_names, on= :location )
4342 @select! res :forecaster :geo_value = :abbreviation :forecast_date :target_end_date :ahead = :target :quantile :value
44- res
43+ @chain res begin
44+ @rtransform :week_ahead = div (:ahead , 7 )
45+ @groupby :forecaster :geo_value :forecast_date :week_ahead :quantile
46+ @combine :value = sum (:value )
47+ end
4548end
4649results = DataFrame[]
4750for (root, dirs, files) in walkdir (" ." )
@@ -50,11 +53,4 @@ for (root, dirs, files) in walkdir(".")
5053 end
5154end
5255full_results = vcat (results... )
53- CSV. write (" covid19-2023season-results.csv" , full_results)
54- full_results[! , :forecaster ] |> unique
55- @rsubset! full_results :ahead % 7 == 0
56- @rtransform! full_results :forecaster = :forecaster [3 : end ]
57- " ./fqfae" [3 : end ]
58- 3 % 7
59- @rsubset full_results ! ismissing (:geo_value ) :forecast_date == Date (2023 ,11 ,13 )
60- @rsubset res :forecast_date == Date (2023 ,11 ,0 )
56+ CSV. write (" covid19-2023season-results.csv" , full_results)
0 commit comments