|
| 1 | +# this was run from within the https://github.com/reichlab/covid19-forecast-hub repo, |
| 2 | +# specifically in the data-processed folder |
| 3 | +# to get the rds, run |
| 4 | +# |
| 5 | +# full_results <- readr::read_csv("../covid19-forecast-hub/data-processed/covid19-2023season-results.csv") |
| 6 | +# aws.s3::s3save(full_results, object = "covid19_forecast_hub_2023.rds", bucket = "forecasting-team-data") |
| 7 | +# |
| 8 | +using CSV |
| 9 | +using DataFrames |
| 10 | +using DataFramesMeta |
| 11 | +using Dates |
| 12 | +using RData |
| 13 | +pwd() |
| 14 | +res = CSV.read("COVIDhub-ensemble/2023-10-02-COVIDhub-ensemble.csv", DataFrame) |
| 15 | +pathname = "COVIDhub-ensemble/" |
| 16 | +filename = "2023-10-02-COVIDhub-ensemble.csv" |
| 17 | +state_names = CSV.read("../data-locations/locations.csv", DataFrame) |
| 18 | +lowercase(m::Missing) = m |
| 19 | +@rtransform! state_names @passmissing :abbreviation = lowercase(:abbreviation) |
| 20 | +@select! state_names :abbreviation :location |
| 21 | + |
| 22 | +function format_file(pathname, filename, state_names) |
| 23 | + if length(filename) < 10 || |
| 24 | + match(r"[0-9]{4}-[0-9]{2}-[0-9]{2}", filename[1:10]) == nothing || |
| 25 | + Date(filename[1:10]) < Date(2023, 1, 1) |
| 26 | + return DataFrame() |
| 27 | + end |
| 28 | + println(joinpath(pathname, filename)) |
| 29 | + |
| 30 | + res = CSV.read(joinpath(pathname, filename), DataFrame, missingstring="NA") |
| 31 | + |
| 32 | + if !("forecast_date" in names(res)) || |
| 33 | + res[!, :forecast_date] |> minimum < Date(2023, 1, 1) |
| 34 | + return DataFrame() |
| 35 | + end |
| 36 | + @transform(res, :target = (:target)) |
| 37 | + res = @chain res begin |
| 38 | + @rtransform :target = parse(Int64, match(r"[0-9]*", :target).match) |
| 39 | + @transform :forecaster = pathname |
| 40 | + @rsubset :type == "quantile" |
| 41 | + end |
| 42 | + res = leftjoin(res, state_names, on=:location) |
| 43 | + @select! res :forecaster :geo_value = :abbreviation :forecast_date :target_end_date :ahead = :target :quantile :value |
| 44 | + res |
| 45 | +end |
| 46 | +results = DataFrame[] |
| 47 | +for (root, dirs, files) in walkdir(".") |
| 48 | + for file in files |
| 49 | + push!(results, format_file(root, file, state_names)) |
| 50 | + end |
| 51 | +end |
| 52 | +full_results = vcat(results...) |
| 53 | +CSV.write("covid19-2023season-results.csv", full_results) |
| 54 | +full_results[!, :forecaster] |> unique |
| 55 | +@rsubset! full_results :ahead % 7 == 0 |
| 56 | +@rtransform! full_results :forecaster = :forecaster[3:end] |
| 57 | +"./fqfae"[3:end] |
| 58 | +3 % 7 |
| 59 | +@rsubset full_results !ismissing(:geo_value) :forecast_date == Date(2023,11,13) |
| 60 | +@rsubset res :forecast_date == Date(2023,11,0) |
0 commit comments