@@ -19,8 +19,8 @@ fetch_args <- epidatr::fetch_args_list(return_empty = FALSE, timeout_seconds = 4
1919# with prototyping the pipeline.
2020dummy_mode <- as.logical(Sys.getenv(" DUMMY_MODE" , FALSE ))
2121# For testing, reduce dates
22- forecast_generation_dates <- forecast_generation_dates [1 : 10 ]
23- forecast_dates <- forecast_dates [1 : 10 ]
22+ # forecast_generation_dates <- forecast_generation_dates[1:10]
23+ # forecast_dates <- forecast_dates[1:10]
2424
2525
2626# ================================ FORECASTER PARAMETERS ====================
@@ -199,6 +199,7 @@ get_partially_applied_forecaster <- function(id) {
199199 }
200200}
201201
202+
202203# ================================ TARGETS =================================
203204# ================================ PARAMETERS TARGETS ======================
204205parameter_targets <- list2(
@@ -232,7 +233,7 @@ data_targets <- list2(
232233 }
233234 ),
234235 tar_target(
235- name = hhs_latest_data ,
236+ name = hhs_evaluation_data ,
236237 command = {
237238 epidatr :: pub_covidcast(
238239 source = " hhs" ,
@@ -242,20 +243,20 @@ data_targets <- list2(
242243 geo_values = " *" ,
243244 time_values = " *" ,
244245 fetch_args = fetch_args
245- )
246+ ) %> %
247+ select(signal , geo_value , time_value , value ) %> %
248+ daily_to_weekly(keys = c(" geo_value" , " signal" )) %> %
249+ select(signal , geo_value , target_end_date = time_value , true_value = value ) %> %
250+ # Correction for timing offsets
251+ mutate(target_end_date = target_end_date + 3 )
246252 }
247253 ),
248254 tar_target(
249- name = hhs_evaluation_data ,
255+ name = state_geo_values ,
250256 command = {
251- hhs_latest_data %> %
252- select(signal , geo_value , time_value , value ) %> %
253- daily_to_weekly(keys = c(" geo_value" , " signal" )) %> %
254- rename(
255- true_value = value ,
256- target_end_date = time_value
257- ) %> %
258- select(signal , geo_value , target_end_date , true_value )
257+ hhs_evaluation_data %> %
258+ pull(geo_value ) %> %
259+ unique()
259260 }
260261 ),
261262 tar_target(
@@ -490,13 +491,7 @@ forecasts_and_scores <- tar_map(
490491 forecast_scaled <- forecast
491492 actual_eval_data <- hhs_evaluation_data
492493 }
493- # Fix for timing offsets
494- actual_eval_data <- actual_eval_data %> % mutate(target_end_date = target_end_date + 3 )
495- state_geo_values <- actual_eval_data %> %
496- pull(geo_value ) %> %
497- unique()
498494 forecast_scaled <- forecast_scaled %> %
499- filter(geo_value %in% state_geo_values ) %> %
500495 mutate(forecast_date = forecast_date + 3 , target_end_date = target_end_date + 3 ) %> %
501496 rename(" model" = " id" )
502497
@@ -512,34 +507,52 @@ combined_forecasts_and_scores <- rlang::list2(
512507 tar_combine(
513508 delphi_forecasts ,
514509 forecasts_and_scores [[" forecast" ]],
515- command = dplyr :: bind_rows(!!! .x ) %> % rename(forecaster = id )
510+ command = {
511+ dplyr :: bind_rows(!!! .x ) %> %
512+ rename(forecaster = id ) %> %
513+ filter(geo_value %in% state_geo_values ) %> %
514+ mutate(forecast_date = forecast_date + 3 , target_end_date = target_end_date + 3 )
515+ }
516516 ),
517517 tar_combine(
518518 delphi_scores ,
519519 forecasts_and_scores [[" score" ]],
520- command = dplyr :: bind_rows(!!! .x ) %> % rename(forecaster = id )
520+ command = {
521+ dplyr :: bind_rows(!!! .x ) %> %
522+ rename(forecaster = id ) %> %
523+ filter(geo_value %in% state_geo_values )
524+ }
521525 ),
522526)
523527external_forecasts_and_scores <- rlang :: list2(
524528 tar_target(
525- external_forecasts ,
529+ outside_forecaster_subset ,
530+ command = c(" COVIDhub-baseline" , " COVIDhub-trained_ensemble" , " COVIDhub_CDC-ensemble" )
531+ ),
532+ tar_target(
533+ external_forecasts_file ,
526534 command = {
527535 s3load(" covid19_forecast_hub_2023.rds" , bucket = " forecasting-team-data" , verbose = FALSE )
528536 full_results
529537 }
530538 ),
531539 tar_target(
532- external_scores ,
540+ external_forecasts ,
533541 command = {
534- actual_eval_data <- hhs_evaluation_data %> %
535- mutate(target_end_date = target_end_date + 3 )
536- cmu_forecast_dates <- ref_time_values + 3
537- filtered_forecasts <- external_forecasts %> %
542+ external_forecasts_file %> %
543+ filter(geo_value %in% state_geo_values , forecaster %in% outside_forecaster_subset ) %> %
538544 mutate(forecast_date = forecast_date + 5 , target_end_date = target_end_date + 5 ) %> %
539- filter(forecast_date %in% cmu_forecast_dates ) %> %
540- rename(model = forecaster ) %> %
545+ filter(forecast_date %in% (ref_time_values + 3 )) %> %
541546 rename(prediction = value ) %> %
542- filter(! is.na(geo_value ))
547+ mutate(prediction = prediction * 7 )
548+ }
549+ ),
550+ tar_target(
551+ external_scores ,
552+ command = {
553+ actual_eval_data <- hhs_evaluation_data
554+ filtered_forecasts <- external_forecasts %> %
555+ rename(model = forecaster )
543556 evaluate_predictions(forecasts = filtered_forecasts , truth_data = actual_eval_data ) %> %
544557 rename(forecaster = model )
545558 }
@@ -551,14 +564,28 @@ joined_forecasts_and_scores <- rlang::list2(
551564 tar_target(
552565 family_notebooks ,
553566 command = {
554- actual_eval_data <- hhs_evaluation_data %> %
555- mutate(target_end_date = target_end_date + 3 )
567+ actual_eval_data <- hhs_evaluation_data
556568 delphi_forecaster_subset <- forecaster_parameter_combinations [[forecaster_families ]]$ id
557- outside_forecaster_subset <- c( " COVIDhub-baseline " , " COVIDhub-ensemble " )
569+
558570 filtered_forecasts <- joined_forecasts %> %
559571 filter(forecaster %in% c(delphi_forecaster_subset , outside_forecaster_subset ))
560572 filtered_scores <- joined_scores %> %
561573 filter(forecaster %in% c(delphi_forecaster_subset , outside_forecaster_subset ))
574+
575+ # TODO: Write an assert to make sure that these dates are similar. It's a bit tricky.
576+ # actual_eval_data %>%
577+ # filter(target_end_date > "2023-09-01") %>%
578+ # distinct(target_end_date) %>%
579+ # pull(target_end_date) %>%
580+ # sort()
581+ # filtered_forecasts %>%
582+ # distinct(target_end_date) %>%
583+ # pull(target_end_date) %>%
584+ # sort()
585+ # filtered_scores %>%
586+ # distinct(target_end_date) %>%
587+ # pull(target_end_date) %>%
588+ # sort()
562589 forecaster_parameters <- forecaster_parameter_combinations [[forecaster_families ]]
563590 rmarkdown :: render(
564591 " scripts/reports/comparison-notebook.Rmd" ,
0 commit comments