@@ -6,10 +6,21 @@ submission_directory <- Sys.getenv("COVID_SUBMISSION_DIRECTORY", "cache")
66insufficient_data_geos <- c(" as" , " mp" , " vi" , " gu" )
77# date to cut the truth data off at, so we don't have too much of the past
88truth_data_date <- " 2023-09-01"
9- # Generically set the generation date to the next Wednesday (or today if it's Wednesday)
9+
10+ # This is the as_of for the forecast. If run on our typical schedule, it's
11+ # today, which is a Wednesday. Sometimes, if we're doing a delayed forecast,
12+ # it's a Thursday. It's used for stamping the data and for determining the
13+ # appropriate as_of when creating the forecast.
1014forecast_generation_date <- Sys.Date()
11- # forecast_date <- seq.Date(as.Date("2024-11-20"), Sys.Date(), by = 7L)
12- forecast_date <- Sys.Date()
15+ # Usually, the forecast_date is the same as the generation date, but you can
16+ # override this. It should be a Wednesday.
17+ forecast_date <- round_date(forecast_generation_date , " weeks" , week_start = 3 )
18+ # If doing backfill, you can set the forecast_date to a sequence of dates.
19+ # forecast_date <- seq.Date(as.Date("2024-11-20"), Sys.Date(), by = 7L)
20+ # forecast_generation_date needs to follow suit, but it's more complicated
21+ # because sometimes we forecast on Thursday.
22+ # forecast_generation_date <- c(as.Date(c("2024-11-21", "2024-11-27", "2024-12-04", "2024-12-11", "2024-12-18", "2024-12-26", "2025-01-02")), seq.Date(as.Date("2025-01-08"), Sys.Date(), by = 7L))
23+
1324forecaster_fns <- list2(
1425 linear = function (... ) {
1526 forecaster_baseline_linear(... , residual_tail = 0.97 , residual_center = 0.097 , no_intercept = TRUE )
@@ -59,16 +70,17 @@ rlang::list2(
5970 tar_map(
6071 values = tidyr :: expand_grid(
6172 tibble(
62- forecast_date = forecast_date
73+ forecast_date_int = forecast_date ,
74+ forecast_generation_date_int = forecast_generation_date
6375 )
6476 ),
6577 names = " forecast_date" ,
6678 tar_target(
6779 name = geo_forecasters_weights ,
6880 command = {
69- geo_forecasters_weights <- parse_prod_weights(here :: here(" covid_geo_exclusions.csv" ), forecast_date )
70- if (nrow(geo_forecasters_weights %> % filter(forecast_date == forecast_date )) == 0 ) {
71- cli_abort(" there are no weights for the forecast date {forecast_date}" )
81+ geo_forecasters_weights <- parse_prod_weights(here :: here(" covid_geo_exclusions.csv" ), forecast_date_int )
82+ if (nrow(geo_forecasters_weights %> % filter(forecast_date == forecast_date_int )) == 0 ) {
83+ cli_abort(" there are no weights for the forecast date {forecast_date}" )
7284 }
7385 geo_forecasters_weights
7486 },
@@ -83,10 +95,9 @@ rlang::list2(
8395 tar_target(
8496 forecast_res ,
8597 command = {
86- forecast_date <- as.Date(forecast_date )
87- if (forecast_date < Sys.Date()) {
98+ if (as.Date(forecast_generation_date_int ) < Sys.Date()) {
8899 train_data <- nhsn_archive_data %> %
89- epix_as_of(forecast_date ) %> %
100+ epix_as_of(as.Date( forecast_generation_date_int ) ) %> %
90101 mutate(
91102 geo_value = ifelse(geo_value == " usa" , " us" , geo_value ),
92103 time_value = time_value - 3
@@ -96,9 +107,9 @@ rlang::list2(
96107 train_data <-
97108 nhsn_latest_data %> %
98109 data_substitutions(disease = " covid" ) %> %
99- as_epi_df(as_of = as.Date(forecast_date ))
110+ as_epi_df(as_of = as.Date(forecast_date_int ))
100111 }
101- attributes(train_data )$ metadata $ as_of <- round_date( forecast_date , " weeks " , week_start = 3 )
112+ attributes(train_data )$ metadata $ as_of <- as.Date( forecast_date_int )
102113 train_data %> %
103114 forecaster_fns [[forecasters ]](ahead = aheads ) %> %
104115 mutate(
@@ -112,7 +123,6 @@ rlang::list2(
112123 tar_target(
113124 name = ensemble_res ,
114125 command = {
115- forecasts <- forecast_res
116126 forecasts %> %
117127 mutate(quantile = round(quantile , digits = 3 )) %> %
118128 left_join(geo_forecasters_weights , by = join_by(forecast_date , forecaster , geo_value )) %> %
@@ -127,15 +137,20 @@ rlang::list2(
127137 name = ensemble_mixture_res ,
128138 command = {
129139 forecast_res %> %
130- ensemble_linear_climate(aheads , other_weights = geo_forecasters_weights , max_climate_ahead_weight = 0.6 , max_climate_quantile_weight = 0.6 ) %> %
140+ ensemble_linear_climate(
141+ aheads ,
142+ other_weights = geo_forecasters_weights ,
143+ max_climate_ahead_weight = 0.6 ,
144+ max_climate_quantile_weight = 0.6
145+ ) %> %
131146 filter(geo_value %nin % geo_exclusions ) %> %
132147 ungroup()
133148 },
134149 ),
135150 tar_target(
136151 name = make_submission_csv ,
137152 command = {
138- forecast_reference_date <- get_forecast_reference_date(as.Date( forecast_date ) )
153+ forecast_reference_date <- get_forecast_reference_date(forecast_date_int )
139154 ensemble_mixture_res %> %
140155 format_flusight(disease = " covid" ) %> %
141156 write_submission_file(forecast_reference_date , file.path(submission_directory , " model-output/CMU-TimeSeries" ))
@@ -154,7 +169,7 @@ rlang::list2(
154169 ungroup() %> %
155170 format_flusight(disease = " covid" ) %> %
156171 write_submission_file(
157- get_forecast_reference_date(as.Date( forecast_date ) ),
172+ get_forecast_reference_date(forecast_date_int ),
158173 submission_directory = file.path(submission_directory , " model-output/CMU-climatological-baseline" ),
159174 file_name = " CMU-climatological-baseline"
160175 )
@@ -170,7 +185,7 @@ rlang::list2(
170185 if (submission_directory != " cache" ) {
171186 validation <- validate_submission(
172187 submission_directory ,
173- file_path = sprintf(" CMU-TimeSeries/%s-CMU-TimeSeries.csv" , get_forecast_reference_date(as.Date( forecast_date ) ))
188+ file_path = sprintf(" CMU-TimeSeries/%s-CMU-TimeSeries.csv" , get_forecast_reference_date(forecast_date_int ))
174189 )
175190 } else {
176191 validation <- " not validating when there is no hub (set submission_directory)"
@@ -187,7 +202,7 @@ rlang::list2(
187202 if (submission_directory != " cache" && submit_climatological ) {
188203 validation <- validate_submission(
189204 submission_directory ,
190- file_path = sprintf(" CMU-climatological-baseline/%s-CMU-climatological-baseline.csv" , get_forecast_reference_date(as.Date( forecast_date ) ))
205+ file_path = sprintf(" CMU-climatological-baseline/%s-CMU-climatological-baseline.csv" , get_forecast_reference_date(forecast_date_int ))
191206 )
192207 } else {
193208 validation <- " not validating when there is no hub (set submission_directory)"
@@ -243,13 +258,13 @@ rlang::list2(
243258 " scripts/reports/forecast_report.Rmd" ,
244259 output_file = here :: here(
245260 " reports" ,
246- sprintf(" %s_covid_prod_on_%s.html" , as.Date(forecast_date ), as.Date(Sys.Date() ))
261+ sprintf(" %s_covid_prod_on_%s.html" , as.Date(forecast_date_int ), as.Date(forecast_generation_date_int ))
247262 ),
248263 params = list (
249264 disease = " covid" ,
250265 forecast_res = forecast_res %> % bind_rows(ensemble_mixture_res %> % mutate(forecaster = " ensemble_mix" )),
251266 ensemble_res = ensemble_res ,
252- forecast_date = as.Date(forecast_date ),
267+ forecast_date = as.Date(forecast_date_int ),
253268 truth_data = truth_data
254269 )
255270 )
0 commit comments