@@ -15,35 +15,83 @@ forecast_generation_date <- Sys.Date()
1515# Usually, the forecast_date is the same as the generation date, but you can
1616# override this. It should be a Wednesday.
1717forecast_date <- round_date(forecast_generation_date , " weeks" , week_start = 3 )
18- # If doing backfill, you can set the forecast_date to a sequence of dates.
19- # forecast_date <- seq.Date(as.Date("2024-11-20"), Sys.Date(), by = 7L)
2018# forecast_generation_date needs to follow suit, but it's more complicated
2119# because sometimes we forecast on Thursday.
22- # forecast_generation_date <- c(as.Date(c("2024-11-21", "2024-11-27", "2024-12-04", "2024-12-11", "2024-12-18", "2024-12-26", "2025-01-02")), seq.Date(as.Date("2025-01-08"), Sys.Date(), by = 7L))
20+ # forecast_generation_date <- c(as.Date(c("2024-11-20", "2024-11-27", "2024-12-04", "2024-12-11", "2024-12-18", "2024-12-26", "2025-01-02")), seq.Date(as.Date("2025-01-08"), Sys.Date(), by = 7L))
21+ # If doing backfill, you can set the forecast_date to a sequence of dates.
22+ # forecast_date <- seq.Date(as.Date("2024-11-20"), Sys.Date(), by = 7L)
2323
2424forecaster_fns <- list2(
25- linear = function (... ) {
26- forecaster_baseline_linear(... , residual_tail = 0.97 , residual_center = 0.097 , no_intercept = TRUE )
25+ linear = function (epi_data , ahead , extra_data , ... ) {
26+ forecaster_baseline_linear(epi_data , ahead , ... , residual_tail = 0.97 , residual_center = 0.097 , no_intercept = TRUE )
2727 },
28- # linearlog = function(...) {
28+ # linearlog = function(epi_data, ahead, extra_data, ...) {
2929 # forecaster_baseline_linear(..., log = TRUE)
3030 # },
31- climate_base = function (... ) {
31+ climate_base = function (epi_data , ahead , extra_data , ... ) {
3232 climatological_model(
33- ... ,
33+ epi_data , ahead , ... ,
3434 )
3535 },
36- climate_geo_agged = function (... ) {
36+ climate_geo_agged = function (epi_data , ahead , extra_data , ... ) {
3737 climatological_model(
38- ... ,
38+ epi_data , ahead , ... ,
3939 geo_agg = TRUE
4040 )
4141 },
42+ windowed_seasonal_extra_sources = function (epi_data , ahead , extra_data , ... ) {
43+ fcst <-
44+ epi_data %> %
45+ left_join(extra_data , by = join_by(geo_value , time_value )) %> %
46+ scaled_pop_seasonal(
47+ outcome = " value" ,
48+ ahead = ahead * 7 ,
49+ extra_sources = " nssp" ,
50+ ... ,
51+ seasonal_method = " window" ,
52+ trainer = epipredict :: quantile_reg(),
53+ drop_non_seasons = TRUE ,
54+ pop_scaling = FALSE ,
55+ lags = list (c(0 , 7 ), c(0 , 7 ))
56+ ) %> %
57+ mutate(target_end_date = target_end_date + 3 )
58+ fcst
59+ }
4260)
4361
4462rlang :: list2(
4563 tar_target(aheads , command = - 1 : 3 ),
4664 tar_target(forecasters , command = seq_along(forecaster_fns )),
65+ tar_target(
66+ download_latest ,
67+ command = {
68+ if (wday(Sys.Date()) < 6 & wday(Sys.Date()) > 3 ) {
69+ # download from the preliminary data source from Wednesday to Friday
70+ most_recent_result <- readr :: read_csv(" https://data.cdc.gov/resource/mpgq-jmmr.csv?$limit=20000&$select=weekendingdate,jurisdiction,totalconfc19newadm,totalconfflunewadm" )
71+ } else {
72+ most_recent_result <- readr :: read_csv(" https://data.cdc.gov/resource/ua7e-t2fy.csv?$limit=20000&$select=weekendingdate,jurisdiction,totalconfc19newadm,totalconfflunewadm" )
73+ }
74+ most_recent_result <-
75+ most_recent_result %> %
76+ process_nhsn_data() %> %
77+ filter(disease == " nhsn_covid" ) %> %
78+ select(- disease ) %> %
79+ filter(geo_value %nin % insufficient_data_geos )
80+ # if there's not already a result we need to save it no matter what
81+ if (file.exists(here :: here(" .nhsn_covid_cache.parquet" ))) {
82+ previous_result <- qs :: qread(here :: here(" .nhsn_covid_cache.parquet" ))
83+ } else
84+ # if something is different, update the file
85+ if (any(previous_result != most_recent_result )) {
86+ qs :: qsave(most_recent_result , here :: here(" .nhsn_covid_cache.parquet" ))
87+ } else {
88+ qs :: qsave(most_recent_result , here :: here(" .nhsn_covid_cache.parquet" ))
89+ }
90+ NULL
91+ },
92+ description = " Download the result, and update the file only if it's actually different" ,
93+ priority = 1
94+ ),
4795 tar_target(
4896 nhsn_latest_data ,
4997 command = {
@@ -59,14 +107,19 @@ rlang::list2(
59107 select(- disease ) %> %
60108 filter(geo_value %nin % insufficient_data_geos )
61109 },
62- cue = tar_cue(mode = " always" )
63110 ),
64111 tar_target(
65112 name = nhsn_archive_data ,
66113 command = {
67114 create_nhsn_data_archive(disease = " nhsn_covid" )
68115 }
69116 ),
117+ tar_target(
118+ current_nssp_archive ,
119+ command = {
120+ up_to_date_nssp_state_archive(" covid" )
121+ }
122+ ),
70123 tar_map(
71124 values = tidyr :: expand_grid(
72125 tibble(
@@ -84,7 +137,6 @@ rlang::list2(
84137 }
85138 geo_forecasters_weights
86139 },
87- cue = tar_cue(mode = " always" )
88140 ),
89141 tar_target(
90142 name = geo_exclusions ,
@@ -109,29 +161,31 @@ rlang::list2(
109161 data_substitutions(disease = " covid" ) %> %
110162 as_epi_df(as_of = as.Date(forecast_date_int ))
111163 }
164+ nssp <- current_nssp_archive %> % epix_as_of(min(forecast_date , current_nssp_archive $ versions_end ))
112165 attributes(train_data )$ metadata $ as_of <- as.Date(forecast_date_int )
113166 train_data %> %
114- forecaster_fns [[forecasters ]](ahead = aheads ) %> %
167+ forecaster_fns [[forecasters ]](ahead = aheads , extra_data = nssp ) %> %
115168 mutate(
116169 forecaster = names(forecaster_fns [forecasters ]),
117170 geo_value = as.factor(geo_value )
118171 )
119172 },
120- pattern = cross(aheads , forecasters ),
121- cue = tar_cue(mode = " always" )
173+ pattern = cross(aheads , forecasters )
122174 ),
123175 tar_target(
124176 name = ensemble_res ,
125177 command = {
126178 forecast_res %> %
127- mutate(quantile = round(quantile , digits = 3 )) %> %
128- left_join(geo_forecasters_weights , by = join_by(forecast_date , forecaster , geo_value )) %> %
129- mutate(value = value * weight ) %> %
130- group_by(forecast_date , geo_value , target_end_date , quantile ) %> %
131- summarize(value = sum(value , na.rm = TRUE ), .groups = " drop" ) %> %
132- filter(geo_value %nin % geo_exclusions )
179+ ensemble_linear_climate(
180+ aheads ,
181+ other_weights = geo_forecasters_weights ,
182+ max_climate_ahead_weight = 0.6 ,
183+ max_climate_quantile_weight = 0.6
184+ ) %> %
185+ filter(geo_value %nin % geo_exclusions ) %> %
186+ ungroup() %> %
187+ sort_by_quantile()
133188 },
134- cue = tar_cue(mode = " always" )
135189 ),
136190 tar_target(
137191 name = ensemble_mixture_res ,
@@ -144,7 +198,11 @@ rlang::list2(
144198 max_climate_quantile_weight = 0.6
145199 ) %> %
146200 filter(geo_value %nin % geo_exclusions ) %> %
147- ungroup()
201+ ungroup() %> %
202+ bind_rows(forecast_res %> % filter(forecaster == " windowed_seasonal_extra_sources" )) %> %
203+ group_by(geo_value , forecast_date , target_end_date , quantile ) %> %
204+ summarize(value = mean(value , na.rm = TRUE ), .groups = " drop" ) %> %
205+ sort_by_quantile()
148206 },
149207 ),
150208 tar_target(
@@ -168,7 +226,6 @@ rlang::list2(
168226 format_flusight(disease = " covid" ) %> %
169227 write_submission_file(forecast_reference_date , file.path(submission_directory , " model-output/CMU-TimeSeries" ))
170228 },
171- cue = tar_cue(mode = " always" )
172229 ),
173230 tar_target(
174231 name = make_climate_submission_csv ,
@@ -183,12 +240,11 @@ rlang::list2(
183240 format_flusight(disease = " covid" ) %> %
184241 write_submission_file(
185242 get_forecast_reference_date(forecast_date_int ),
186- submission_directory = file.path(submission_directory , " model-output/CMU-climatological-baseline " ),
187- file_name = " CMU-climatological-baseline "
243+ submission_directory = file.path(submission_directory , " model-output/CMU-climate_baseline " ),
244+ file_name = " CMU-climate_baseline "
188245 )
189246 }
190247 },
191- cue = tar_cue(mode = " always" )
192248 ),
193249 tar_target(
194250 name = validate_result ,
@@ -205,7 +261,6 @@ rlang::list2(
205261 }
206262 validation
207263 },
208- cue = tar_cue(mode = " always" )
209264 ),
210265 tar_target(
211266 name = validate_climate_result ,
@@ -215,14 +270,13 @@ rlang::list2(
215270 if (submission_directory != " cache" && submit_climatological ) {
216271 validation <- validate_submission(
217272 submission_directory ,
218- file_path = sprintf(" CMU-climatological-baseline /%s-CMU-climatological-baseline .csv" , get_forecast_reference_date(forecast_date_int ))
273+ file_path = sprintf(" CMU-climate_baseline /%s-CMU-climate_baseline .csv" , get_forecast_reference_date(forecast_date_int ))
219274 )
220275 } else {
221276 validation <- " not validating when there is no hub (set submission_directory)"
222277 }
223278 validation
224279 },
225- cue = tar_cue(mode = " always" )
226280 ),
227281 tar_target(
228282 name = truth_data ,
@@ -261,7 +315,6 @@ rlang::list2(
261315 select(- rel_max_value )
262316 truth_data %> % bind_rows(nssp_renormalized )
263317 },
264- cue = tar_cue(mode = " always" )
265318 ),
266319 tar_target(
267320 notebook ,
@@ -271,7 +324,7 @@ rlang::list2(
271324 " scripts/reports/forecast_report.Rmd" ,
272325 output_file = here :: here(
273326 " reports" ,
274- sprintf(" %s_covid_prod_on_%s.html" , as.Date(forecast_date_int ), as.Date(forecast_generation_date_int ))
327+ sprintf(" %s_covid_prod_on_%s.html" , as.Date(forecast_date_int ), as.Date(Sys.Date() ))
275328 ),
276329 params = list (
277330 disease = " covid" ,
@@ -281,7 +334,6 @@ rlang::list2(
281334 )
282335 )
283336 },
284- cue = tar_cue(mode = " always" )
285337 )
286338 ),
287339)
0 commit comments