@@ -6,8 +6,9 @@ source("scripts/targets-exploration-common.R")
66hhs_signal <- " confirmed_admissions_covid_1d"
77if (! exists(" ref_time_values_" )) {
88 # Alternatively you can let slide_forecaster figure out ref_time_values
9- start_date <- as.Date(" 2023-10-04 " )
9+ start_date <- as.Date(" 2023-11-08 " )
1010 end_date <- as.Date(" 2024-04-24" )
11+ # end_date <- start_date + 7
1112 date_step <- 7L
1213 ref_time_values_ <- seq.Date(start_date , end_date , by = date_step )
1314}
@@ -62,12 +63,7 @@ forecaster_parameter_combinations_ <- rlang::list2(
6263 ),
6364 pop_scaling = FALSE ,
6465 scale_method = " quantile" ,
65- center_method = " median" ,
66- nonlin_method = " quart_root" ,
67- filter_source = " " ,
68- filter_agg_level = " " ,
69- n_training = Inf ,
70- drop_non_seasons = FALSE ,
66+ n_training = Inf
7167 ),
7268 expand_grid(
7369 forecaster = " scaled_pop" ,
@@ -93,12 +89,7 @@ forecaster_parameter_combinations_ <- rlang::list2(
9389 ),
9490 pop_scaling = FALSE ,
9591 scale_method = " quantile" ,
96- center_method = " median" ,
97- nonlin_method = " quart_root" ,
98- filter_source = " " ,
99- filter_agg_level = " " ,
100- n_training = Inf ,
101- drop_non_seasons = FALSE ,
92+ n_training = Inf
10293 ),
10394 expand_grid(
10495 forecaster = " scaled_pop" ,
@@ -124,12 +115,7 @@ forecaster_parameter_combinations_ <- rlang::list2(
124115 ),
125116 pop_scaling = FALSE ,
126117 scale_method = " quantile" ,
127- center_method = " median" ,
128- nonlin_method = " quart_root" ,
129- filter_source = " " ,
130- filter_agg_level = " " ,
131- n_training = Inf ,
132- drop_non_seasons = FALSE ,
118+ n_training = Inf
133119 )
134120 ),
135121 scled_pop_season = tidyr :: expand_grid(
@@ -141,7 +127,13 @@ forecaster_parameter_combinations_ <- rlang::list2(
141127 ),
142128 pop_scaling = FALSE ,
143129 n_training = Inf ,
144- seasonal_method = list (c(" covid" ), c(" window" ), c(" covid" , " window" ), c(" climatological" ), c(" climatological" , " window" ))
130+ seasonal_method = list (
131+ c(" covid" ),
132+ c(" window" ),
133+ c(" covid" , " window" ),
134+ c(" climatological" ),
135+ c(" climatological" , " window" )
136+ )
145137 )
146138) %> %
147139 map(function (x ) {
@@ -178,16 +170,16 @@ scaled_pop_scaled <- list(
178170smooth_scaled <- list (
179171 forecaster = " smoothed_scaled" ,
180172 trainer = " quantreg" ,
181- lags =
182- # list(smoothed, sd)
183- list (c(0 , 7 , 14 , 21 , 28 ), c(0 )),
173+ # lags = list(smoothed, sd)
174+ lags = list (c(0 , 7 , 14 , 21 , 28 ), c(0 )),
184175 smooth_width = as.difftime(2 , units = " weeks" ),
185176 sd_width = as.difftime(4 , units = " weeks" ),
186177 sd_mean_width = as.difftime(2 , units = " weeks" ),
187178 pop_scaling = TRUE ,
188179 n_training = Inf
189180)
190181# Human-readable object to be used for inspecting the ensembles in the pipeline.
182+ # fmt: skip
191183ensemble_parameter_combinations_ <- tribble(
192184 ~ ensemble , ~ ensemble_args , ~ forecasters ,
193185 # mean forecaster
@@ -240,7 +232,12 @@ ensemble_parameter_combinations_ <- tribble(
240232 ) %> %
241233 add_id(exclude = " forecasters" )
242234# spoofing ensembles for right now
243- ensemble_parameter_combinations_ <- tibble :: tibble(id = character (), ensemble = character (), ensemble_args = character (), children_ids = character ())
235+ ensemble_parameter_combinations_ <- tibble :: tibble(
236+ id = character (),
237+ ensemble = character (),
238+ ensemble_args = character (),
239+ children_ids = character ()
240+ )
244241# Check that every ensemble dependent is actually included.
245242missing_forecasters <- setdiff(
246243 ensemble_parameter_combinations_ %> % pull(children_ids ) %> % unlist() %> % unique(),
@@ -272,7 +269,7 @@ rlang::list2(
272269 tar_target(
273270 name = hhs_archive_data_asof ,
274271 command = {
275- get_health_data(as.Date(ref_time_values )) %> %
272+ get_health_data(as.Date(ref_time_values ), disease = " covid " ) %> %
276273 mutate(version = as.Date(ref_time_values )) %> %
277274 relocate(geo_value , time_value , version , hhs )
278275 },
@@ -348,6 +345,9 @@ rlang::list2(
348345 # weekly data is indexed from the start of the week
349346 mutate(time_value = time_value + 6 - time_value_adjust ) %> %
350347 mutate(version = time_value ) %> %
348+ # Always convert to data.frame after dplyr operations on data.table.
349+ # https://github.com/cmu-delphi/epiprocess/issues/618
350+ as.data.frame() %> %
351351 as_epi_archive(compactify = TRUE )
352352 nssp_archive
353353 }
@@ -380,39 +380,52 @@ rlang::list2(
380380 geo_type = " hhs" ,
381381 geo_values = " *"
382382 )
383- google_symptoms_archive_min <-
384- google_symptoms_state_archive %> %
383+ google_symptoms_archive_min <- google_symptoms_state_archive %> %
385384 bind_rows(google_symptoms_hhs_archive ) %> %
386385 select(geo_value , time_value , value ) %> %
387386 daily_to_weekly() %> %
388387 mutate(version = time_value ) %> %
389- as_epi_archive(compactify = TRUE )
390- google_symptoms_archive_min $ DT %> %
391388 filter(! is.na(value )) %> %
392389 relocate(geo_value , time_value , version , value ) %> %
390+ as.data.frame() %> %
393391 as_epi_archive(compactify = TRUE )
394392 })
395- all_of_them [[1 ]]$ DT %<> % rename(google_symptoms_4_bronchitis = value )
396- all_of_them [[2 ]]$ DT %<> % rename(google_symptoms_5_ageusia = value )
393+ all_of_them [[1 ]] <- all_of_them [[1 ]]$ DT %> %
394+ rename(google_symptoms_4_bronchitis = value ) %> %
395+ # Always convert to data.frame after dplyr operations on data.table.
396+ # https://github.com/cmu-delphi/epiprocess/issues/618
397+ as.data.frame() %> %
398+ as_epi_archive(compactify = TRUE )
399+ all_of_them [[2 ]] <- all_of_them [[2 ]]$ DT %> %
400+ rename(google_symptoms_5_ageusia = value ) %> %
401+ # Always convert to data.frame after dplyr operations on data.table.
402+ # https://github.com/cmu-delphi/epiprocess/issues/618
403+ as.data.frame() %> %
404+ as_epi_archive(compactify = TRUE )
397405 google_symptoms_archive <- epix_merge(all_of_them [[1 ]], all_of_them [[2 ]])
398406 google_symptoms_archive <- google_symptoms_archive $ DT %> %
399407 mutate(google_symptoms = google_symptoms_4_bronchitis + google_symptoms_5_ageusia ) %> %
408+ # Always convert to data.frame after dplyr operations on data.table.
409+ # https://github.com/cmu-delphi/epiprocess/issues/618
410+ as.data.frame() %> %
400411 as_epi_archive(compactify = TRUE )
401- # not just using dplyr to allow for na.rm
402- google_symptoms_archive $ DT $ google_symptoms <-
403- rowSums(google_symptoms_archive $ DT [, c(" google_symptoms_4_bronchitis" , " google_symptoms_5_ageusia" )],
404- na.rm = TRUE
405- )
406412 pre_pipeline <- google_symptoms_archive %> %
407413 epix_as_of(as.Date(" 2023-10-04" )) %> %
408414 mutate(source = " none" )
409- colnames <- c(" google_symptoms_4_bronchitis" , " google_symptoms_5_ageusia" , " google_symptoms " )
415+ colnames <- c(" google_symptoms_4_bronchitis" , " google_symptoms_5_ageusia" )
410416 for (colname in colnames ) {
411417 learned_params <- calculate_whitening_params(pre_pipeline , colname = colname )
412418 google_symptoms_archive $ DT %<> % data_whitening(colname = colname , learned_params , join_cols = " geo_value" )
413419 }
414420 google_symptoms_archive $ DT %> %
421+ mutate(
422+ google_symptoms = ifelse(is.na(google_symptoms_4_bronchitis ), 0 , google_symptoms_4_bronchitis ) +
423+ ifelse(is.na(google_symptoms_5_ageusia ), 0 , google_symptoms_5_ageusia )
424+ ) %> %
415425 select(- starts_with(" source" )) %> %
426+ # Always convert to data.frame after dplyr operations on data.table
427+ # https://github.com/cmu-delphi/epiprocess/issues/618
428+ as.data.frame() %> %
416429 as_epi_archive(compactify = TRUE )
417430 }
418431 ),
@@ -479,8 +492,14 @@ rlang::list2(
479492 nwss <- readr :: read_csv(most_recent ) %> %
480493 rename(value = state_med_conc ) %> %
481494 arrange(geo_value , time_value )
482- state_code <- readr :: read_csv(here :: here(" aux_data" , " flusion_data" , " state_codes_table.csv" ), show_col_types = FALSE )
483- hhs_codes <- readr :: read_csv(here :: here(" aux_data" , " flusion_data" , " state_code_hhs_table.csv" ), show_col_types = FALSE )
495+ state_code <- readr :: read_csv(
496+ here :: here(" aux_data" , " flusion_data" , " state_codes_table.csv" ),
497+ show_col_types = FALSE
498+ )
499+ hhs_codes <- readr :: read_csv(
500+ here :: here(" aux_data" , " flusion_data" , " state_code_hhs_table.csv" ),
501+ show_col_types = FALSE
502+ )
484503 state_to_hhs <- hhs_codes %> %
485504 left_join(state_code , by = " state_code" ) %> %
486505 select(hhs_region = hhs , geo_value = state_id )
@@ -489,8 +508,7 @@ rlang::list2(
489508 drop_na() %> %
490509 select(- agg_level , - year , - agg_level , - population , - density )
491510 pop_data <- gen_pop_and_density_data()
492- nwss_hhs_region <-
493- nwss %> %
511+ nwss_hhs_region <- nwss %> %
494512 left_join(state_to_hhs , by = " geo_value" ) %> %
495513 mutate(year = year(time_value )) %> %
496514 left_join(pop_data , by = join_by(geo_value , year )) %> %
@@ -517,8 +535,12 @@ rlang::list2(
517535 tar_target(
518536 name = hhs_region ,
519537 command = {
520- hhs_region <- readr :: read_csv(" https://raw.githubusercontent.com/cmu-delphi/covidcast-indicators/refs/heads/main/_delphi_utils_python/delphi_utils/data/2020/state_code_hhs_table.csv" )
521- state_id <- readr :: read_csv(" https://raw.githubusercontent.com/cmu-delphi/covidcast-indicators/refs/heads/main/_delphi_utils_python/delphi_utils/data/2020/state_codes_table.csv" )
538+ hhs_region <- readr :: read_csv(
539+ " https://raw.githubusercontent.com/cmu-delphi/covidcast-indicators/refs/heads/main/_delphi_utils_python/delphi_utils/data/2020/state_code_hhs_table.csv"
540+ )
541+ state_id <- readr :: read_csv(
542+ " https://raw.githubusercontent.com/cmu-delphi/covidcast-indicators/refs/heads/main/_delphi_utils_python/delphi_utils/data/2020/state_codes_table.csv"
543+ )
522544 hhs_region %> %
523545 left_join(state_id , by = " state_code" ) %> %
524546 select(hhs_region = hhs , geo_value = state_id ) %> %
@@ -534,22 +556,22 @@ rlang::list2(
534556 rename(" hhs" : = value ) %> %
535557 add_hhs_region_sum(hhs_region ) %> %
536558 filter(geo_value != " us" ) %> %
537- as_epi_archive(
538- compactify = TRUE
539- )
559+ # Always convert to data.frame after dplyr operations on data.table
560+ # https://github.com/cmu-delphi/epiprocess/issues/618
561+ as.data.frame() %> %
562+ as_epi_archive(compactify = TRUE )
540563 joined_archive_data $ geo_type <- " custom"
541564 # drop aggregated geo_values
542- joined_archive_data <- joined_archive_data %> %
543- epix_merge(nwss_coarse , sync = " locf" )
544- joined_archive_data $ geo_type <- " custom"
545- # TODO: Maybe bring these back
546- # epix_merge(doctor_visits_weekly_archive, sync = "locf") %>%
547- joined_archive_data %<> %
548- epix_merge(nssp_archive , sync = " locf" )
565+ joined_archive_data <- joined_archive_data %> % epix_merge(nwss_coarse , sync = " locf" )
566+ joined_archive_data %<> % epix_merge(nssp_archive , sync = " locf" )
549567 joined_archive_data $ geo_type <- " custom"
550- joined_archive_data %<> %
551- epix_merge(google_symptoms_archive , sync = " locf" )
552- joined_archive_data $ DT %<> % filter(grepl(" [a-z]{2}" , geo_value ), ! (geo_value %in% c(" as" , " pr" , " vi" , " gu" , " mp" )))
568+ joined_archive_data %<> % epix_merge(google_symptoms_archive , sync = " locf" )
569+ joined_archive_data <- joined_archive_data $ DT %> %
570+ filter(grepl(" [a-z]{2}" , geo_value ), ! (geo_value %in% c(" as" , " pr" , " vi" , " gu" , " mp" ))) %> %
571+ # Always convert to data.frame after dplyr operations on data.table
572+ # https://github.com/cmu-delphi/epiprocess/issues/618
573+ as.data.frame() %> %
574+ as_epi_archive(compactify = TRUE )
553575 joined_archive_data $ geo_type <- " state"
554576 slide_forecaster(
555577 epi_archive = joined_archive_data ,
@@ -591,7 +613,7 @@ rlang::list2(
591613 rename(model = forecaster ) %> %
592614 rename(prediction = value ) %> %
593615 filter(! is.na(geo_value ))
594- evaluate_predictions(predictions_cards = filtered_forecasts , truth_data = actual_eval_data ) %> %
616+ evaluate_predictions(forecasts = filtered_forecasts , truth_data = actual_eval_data ) %> %
595617 rename(forecaster = model )
596618 }
597619 ),
0 commit comments