Skip to content

Commit a6de31a

Browse files
committed
climatological-baseline submission process
1 parent 65b4094 commit a6de31a

File tree

5 files changed

+106
-19
lines changed

5 files changed

+106
-19
lines changed

R/forecasters/formatters.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ format_flusight <- function(pred, disease = c("flu", "covid")) {
6464
mutate(
6565
reference_date = get_forecast_reference_date(forecast_date),
6666
target = glue::glue("wk inc {disease} hosp"),
67-
horizon = floor((target_end_date - reference_date) / 7),
67+
horizon = as.integer(floor((target_end_date - reference_date) / 7)),
6868
output_type = "quantile",
6969
output_type_id = quantile,
7070
value = value

R/utils.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,11 +252,11 @@ filter_forecast_geos <- function(forecasts, truth_data) {
252252
}
253253

254254
#' Write a submission file. pred is assumed to be in the correct submission format.
255-
write_submission_file <- function(pred, forecast_reference_date, submission_directory) {
255+
write_submission_file <- function(pred, forecast_reference_date, submission_directory, file_name = "CMU-TimeSeries") {
256256
if (!file.exists(submission_directory)) {
257257
cli::cli_abort("Submission directory does not exist.", call = rlang::current_call())
258258
}
259-
file_path <- file.path(submission_directory, sprintf("%s-CMU-TimeSeries.csv", forecast_reference_date))
259+
file_path <- file.path(submission_directory, sprintf("%s-%s.csv", forecast_reference_date, file_name))
260260
if (file.exists(file_path)) {
261261
cli::cli_warn(c("Overwriting existing file in", file_path), call = rlang::current_call())
262262
file.remove(file_path)

scripts/covid_hosp_explore.R

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,12 @@ source("scripts/targets-exploration-common.R")
44
# These globals are needed by make_forecasts_and_scores (and they need to persist
55
# during the actual targets run, since the commands are frozen as expressions).
66
hhs_signal <- "confirmed_admissions_covid_1d"
7-
ref_time_values_ <- as.Date(c("2023-11-08", "2023-11-22"))
87
if (!exists("ref_time_values_")) {
98
# Alternatively you can let slide_forecaster figure out ref_time_values
109
start_date <- as.Date("2023-10-04")
1110
end_date <- as.Date("2024-04-24")
1211
date_step <- 7L
13-
ref_time_values_ <- NULL
12+
#ref_time_values_ <- as.Date(c("2023-11-08", "2023-11-22"))
1413
}
1514
time_value_adjust <- 3 # this moves the week marker from Saturday to Wednesday
1615

scripts/covid_hosp_prod.R

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ insufficient_data_geos <- c("as", "mp", "vi", "gu")
88
# date to cut the truth data off at, so we don't have too much of the past
99
truth_data_date <- "2023-09-01"
1010
# Generically set the generation date to the next Wednesday (or today if it's Wednesday)
11-
forecast_generation_date <- Sys.Date()
11+
forecast_generation_date <- seq.Date(as.Date("2024-11-20"), Sys.Date(), by = 7L)
1212

1313
forecaster_fns <- list2(
1414
linear = function(...) {
@@ -29,12 +29,6 @@ forecaster_fns <- list2(
2929
)
3030
},
3131
)
32-
geo_forecasters_weights <- parse_prod_weights(here::here("covid_geo_exclusions.csv"), forecast_generation_date)
33-
geo_exclusions <- exclude_geos(geo_forecasters_weights)
34-
if (nrow(geo_forecasters_weights %>% filter(forecast_date == forecast_generation_date)) == 0) {
35-
cli_abort("there are no weights for the forecast date {forecast_generation_date}")
36-
}
37-
3832

3933
rlang::list2(
4034
tar_target(
@@ -56,6 +50,23 @@ rlang::list2(
5650
)
5751
),
5852
names = "forecast_generation_date",
53+
tar_target(
54+
name = geo_forecasters_weights,
55+
command = {
56+
geo_forecasters_weights <- parse_prod_weights(here::here("covid_geo_exclusions.csv"), forecast_generation_date)
57+
if (nrow(geo_forecasters_weights %>% filter(forecast_date == forecast_generation_date)) == 0) {
58+
cli_abort("there are no weights for the forecast date {forecast_generation_date}")
59+
}
60+
geo_forecasters_weights
61+
},
62+
cue = tar_cue(mode = "always")
63+
),
64+
tar_target(
65+
name = geo_exclusions,
66+
command = {
67+
exclude_geos(geo_forecasters_weights)
68+
}
69+
),
5970
tar_target(
6071
nhsn_latest_data,
6172
command = {
@@ -123,6 +134,23 @@ rlang::list2(
123134
},
124135
cue = tar_cue(mode = "always")
125136
),
137+
tar_target(
138+
name = make_climate_submission_csv,
139+
command = {
140+
forecasts <- forecast_res
141+
forecasts %>%
142+
filter(forecaster %in% c("climate_base", "climate_geo_agged")) %>%
143+
group_by(geo_value, target_end_date, quantile) %>%
144+
summarize(forecast_date = first(forecast_date), value = mean(value, na.rm = TRUE), .groups = "drop") %>%
145+
ungroup() %>%
146+
format_flusight(disease = "covid") %>%
147+
write_submission_file(
148+
get_forecast_reference_date(as.Date(forecast_generation_date)),
149+
file.path(submission_directory, "model-output/CMU-climatological-baseline")
150+
)
151+
},
152+
cue = tar_cue(mode = "always")
153+
),
126154
tar_target(
127155
name = validate_result,
128156
command = {
@@ -139,6 +167,22 @@ rlang::list2(
139167
},
140168
cue = tar_cue(mode = "always")
141169
),
170+
tar_target(
171+
name = validate_climate_result,
172+
command = {
173+
make_climate_submission_csv
174+
# only validate if we're saving the result to a hub
175+
if (submission_directory != "cache") {
176+
validation <- validate_submission(
177+
submission_directory,
178+
file_path = sprintf("CMU-climatological-baseline/%s-CMU-climatological-baseline.csv", get_forecast_reference_date(as.Date(forecast_generation_date))))
179+
} else {
180+
validation <- "not validating when there is no hub (set submission_directory)"
181+
}
182+
validation
183+
},
184+
cue = tar_cue(mode = "always")
185+
),
142186
tar_target(
143187
name = truth_data,
144188
command = {

scripts/flu_hosp_prod.R

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ truth_data_date <- "2023-09-01"
99
# needed to create the aux data targets
1010
end_date <- Sys.Date()
1111
# Generically set the generation date to the next Wednesday (or today if it's Wednesday)
12-
forecast_generation_date <- Sys.Date()
12+
forecast_generation_date <- seq.Date(as.Date("2024-11-20"), Sys.Date(), by = 7L)
1313
very_latent_locations <- list(list(
1414
c("source"),
1515
c("flusurv", "ILI+")
@@ -50,12 +50,6 @@ forecaster_fns <- list2(
5050
mutate(target_end_date = target_end_date + 3)
5151
},
5252
)
53-
geo_forecasters_weights <- parse_prod_weights(here::here("flu_geo_exclusions.csv"), forecast_generation_date)
54-
geo_exclusions <- exclude_geos(geo_forecasters_weights)
55-
if (nrow(geo_forecasters_weights %>% filter(forecast_date == forecast_generation_date)) == 0) {
56-
geo_forecasters_weights
57-
cli_abort("there are no weights for the forecast date {forecast_generation_date}")
58-
}
5953

6054
# This is needed to build the data archive
6155
ref_time_values_ <- seq.Date(as.Date("2023-10-04"), as.Date("2024-04-24"), by = 7L)
@@ -107,6 +101,23 @@ rlang::list2(
107101
tar_map(
108102
values = tidyr::expand_grid(tibble(forecast_generation_date = forecast_generation_date)),
109103
names = "forecast_generation_date",
104+
tar_target(
105+
name = geo_forecasters_weights,
106+
command = {
107+
geo_forecasters_weights <- parse_prod_weights(here::here("flu_geo_exclusions.csv"), forecast_generation_date)
108+
if (nrow(geo_forecasters_weights %>% filter(forecast_date == forecast_generation_date)) == 0) {
109+
cli_abort("there are no weights for the forecast date {forecast_generation_date}")
110+
}
111+
geo_forecasters_weights
112+
},
113+
cue = tar_cue(mode = "always")
114+
),
115+
tar_target(
116+
name = geo_exclusions,
117+
command = {
118+
exclude_geos(geo_forecasters_weights)
119+
}
120+
),
110121
tar_target(
111122
forecast_res,
112123
command = {
@@ -162,6 +173,23 @@ rlang::list2(
162173
},
163174
cue = tar_cue(mode = "always")
164175
),
176+
tar_target(
177+
name = make_climate_submission_csv,
178+
command = {
179+
forecasts <- forecast_res
180+
forecasts %>%
181+
filter(forecaster %in% c("climate_base", "climate_geo_agged")) %>%
182+
group_by(geo_value, target_end_date, quantile) %>%
183+
summarize(forecast_date = first(forecast_date), value = mean(value, na.rm = TRUE), .groups = "drop") %>%
184+
ungroup() %>%
185+
format_flusight(disease = "flu") %>%
186+
write_submission_file(
187+
get_forecast_reference_date(as.Date(forecast_generation_date)),
188+
file.path(submission_directory, "model-output/CMU-climatological-baseline")
189+
)
190+
},
191+
cue = tar_cue(mode = "always")
192+
),
165193
tar_target(
166194
name = validate_result,
167195
command = {
@@ -179,6 +207,22 @@ rlang::list2(
179207
},
180208
cue = tar_cue(mode = "always")
181209
),
210+
tar_target(
211+
name = validate_climate_result,
212+
command = {
213+
make_climate_submission_csv
214+
# only validate if we're saving the result to a hub
215+
if (submission_directory != "cache") {
216+
validation <- validate_submission(
217+
submission_directory,
218+
file_path = sprintf("CMU-climatological-baseline/%s-CMU-climatological-baseline.csv", get_forecast_reference_date(as.Date(forecast_generation_date))))
219+
} else {
220+
validation <- "not validating when there is no hub (set submission_directory)"
221+
}
222+
validation
223+
},
224+
cue = tar_cue(mode = "always")
225+
),
182226
tar_target(
183227
name = truth_data,
184228
command = {

0 commit comments

Comments
 (0)