Skip to content

Commit 65e9b09

Browse files
committed
fix: add same thing to covid_hosp_prod
1 parent 1105511 commit 65e9b09

File tree

2 files changed

+37
-20
lines changed

2 files changed

+37
-20
lines changed

scripts/covid_hosp_prod.R

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,21 @@ submission_directory <- Sys.getenv("COVID_SUBMISSION_DIRECTORY", "cache")
66
insufficient_data_geos <- c("as", "mp", "vi", "gu")
77
# date to cut the truth data off at, so we don't have too much of the past
88
truth_data_date <- "2023-09-01"
9-
# Generically set the generation date to the next Wednesday (or today if it's Wednesday)
9+
10+
# This is the as_of for the forecast. If run on our typical schedule, it's
11+
# today, which is a Wednesday. Sometimes, if we're doing a delayed forecast,
12+
# it's a Thursday. It's used for stamping the data and for determining the
13+
# appropriate as_of when creating the forecast.
1014
forecast_generation_date <- Sys.Date()
11-
#forecast_date <- seq.Date(as.Date("2024-11-20"), Sys.Date(), by = 7L)
12-
forecast_date <- Sys.Date()
15+
# Usually, the forecast_date is the same as the generation date, but you can
16+
# override this. It should be a Wednesday.
17+
forecast_date <- round_date(forecast_generation_date, "weeks", week_start = 3)
18+
# If doing backfill, you can set the forecast_date to a sequence of dates.
19+
# forecast_date <- seq.Date(as.Date("2024-11-20"), Sys.Date(), by = 7L)
20+
# forecast_generation_date needs to follow suit, but it's more complicated
21+
# because sometimes we forecast on Thursday.
22+
# forecast_generation_date <- c(as.Date(c("2024-11-21", "2024-11-27", "2024-12-04", "2024-12-11", "2024-12-18", "2024-12-26", "2025-01-02")), seq.Date(as.Date("2025-01-08"), Sys.Date(), by = 7L))
23+
1324
forecaster_fns <- list2(
1425
linear = function(...) {
1526
forecaster_baseline_linear(..., residual_tail = 0.97, residual_center = 0.097, no_intercept = TRUE)
@@ -59,16 +70,17 @@ rlang::list2(
5970
tar_map(
6071
values = tidyr::expand_grid(
6172
tibble(
62-
forecast_date = forecast_date
73+
forecast_date_int = forecast_date,
74+
forecast_generation_date_int = forecast_generation_date
6375
)
6476
),
6577
names = "forecast_date",
6678
tar_target(
6779
name = geo_forecasters_weights,
6880
command = {
69-
geo_forecasters_weights <- parse_prod_weights(here::here("covid_geo_exclusions.csv"), forecast_date)
70-
if (nrow(geo_forecasters_weights %>% filter(forecast_date == forecast_date)) == 0) {
71-
cli_abort("there are no weights for the forecast date {forecast_date}")
81+
geo_forecasters_weights <- parse_prod_weights(here::here("covid_geo_exclusions.csv"), forecast_date_int)
82+
if (nrow(geo_forecasters_weights %>% filter(forecast_date == forecast_date_int)) == 0) {
83+
cli_abort("there are no weights for the forecast date {forecast_date}")
7284
}
7385
geo_forecasters_weights
7486
},
@@ -83,10 +95,9 @@ rlang::list2(
8395
tar_target(
8496
forecast_res,
8597
command = {
86-
forecast_date <- as.Date(forecast_date)
87-
if (forecast_date < Sys.Date()) {
98+
if (as.Date(forecast_generation_date_int) < Sys.Date()) {
8899
train_data <- nhsn_archive_data %>%
89-
epix_as_of(forecast_date) %>%
100+
epix_as_of(as.Date(forecast_generation_date_int)) %>%
90101
mutate(
91102
geo_value = ifelse(geo_value == "usa", "us", geo_value),
92103
time_value = time_value - 3
@@ -96,9 +107,9 @@ rlang::list2(
96107
train_data <-
97108
nhsn_latest_data %>%
98109
data_substitutions(disease = "covid") %>%
99-
as_epi_df(as_of = as.Date(forecast_date))
110+
as_epi_df(as_of = as.Date(forecast_date_int))
100111
}
101-
attributes(train_data)$metadata$as_of <- round_date(forecast_date, "weeks", week_start = 3)
112+
attributes(train_data)$metadata$as_of <- as.Date(forecast_date_int)
102113
train_data %>%
103114
forecaster_fns[[forecasters]](ahead = aheads) %>%
104115
mutate(
@@ -112,7 +123,6 @@ rlang::list2(
112123
tar_target(
113124
name = ensemble_res,
114125
command = {
115-
forecasts <- forecast_res
116126
forecasts %>%
117127
mutate(quantile = round(quantile, digits = 3)) %>%
118128
left_join(geo_forecasters_weights, by = join_by(forecast_date, forecaster, geo_value)) %>%
@@ -127,15 +137,20 @@ rlang::list2(
127137
name = ensemble_mixture_res,
128138
command = {
129139
forecast_res %>%
130-
ensemble_linear_climate(aheads, other_weights = geo_forecasters_weights, max_climate_ahead_weight = 0.6, max_climate_quantile_weight = 0.6) %>%
140+
ensemble_linear_climate(
141+
aheads,
142+
other_weights = geo_forecasters_weights,
143+
max_climate_ahead_weight = 0.6,
144+
max_climate_quantile_weight = 0.6
145+
) %>%
131146
filter(geo_value %nin% geo_exclusions) %>%
132147
ungroup()
133148
},
134149
),
135150
tar_target(
136151
name = make_submission_csv,
137152
command = {
138-
forecast_reference_date <- get_forecast_reference_date(as.Date(forecast_date))
153+
forecast_reference_date <- get_forecast_reference_date(forecast_date_int)
139154
ensemble_mixture_res %>%
140155
format_flusight(disease = "covid") %>%
141156
write_submission_file(forecast_reference_date, file.path(submission_directory, "model-output/CMU-TimeSeries"))
@@ -154,7 +169,7 @@ rlang::list2(
154169
ungroup() %>%
155170
format_flusight(disease = "covid") %>%
156171
write_submission_file(
157-
get_forecast_reference_date(as.Date(forecast_date)),
172+
get_forecast_reference_date(forecast_date_int),
158173
submission_directory = file.path(submission_directory, "model-output/CMU-climatological-baseline"),
159174
file_name = "CMU-climatological-baseline"
160175
)
@@ -170,7 +185,7 @@ rlang::list2(
170185
if (submission_directory != "cache") {
171186
validation <- validate_submission(
172187
submission_directory,
173-
file_path = sprintf("CMU-TimeSeries/%s-CMU-TimeSeries.csv", get_forecast_reference_date(as.Date(forecast_date)))
188+
file_path = sprintf("CMU-TimeSeries/%s-CMU-TimeSeries.csv", get_forecast_reference_date(forecast_date_int))
174189
)
175190
} else {
176191
validation <- "not validating when there is no hub (set submission_directory)"
@@ -187,7 +202,7 @@ rlang::list2(
187202
if (submission_directory != "cache" && submit_climatological) {
188203
validation <- validate_submission(
189204
submission_directory,
190-
file_path = sprintf("CMU-climatological-baseline/%s-CMU-climatological-baseline.csv", get_forecast_reference_date(as.Date(forecast_date)))
205+
file_path = sprintf("CMU-climatological-baseline/%s-CMU-climatological-baseline.csv", get_forecast_reference_date(forecast_date_int))
191206
)
192207
} else {
193208
validation <- "not validating when there is no hub (set submission_directory)"
@@ -243,13 +258,13 @@ rlang::list2(
243258
"scripts/reports/forecast_report.Rmd",
244259
output_file = here::here(
245260
"reports",
246-
sprintf("%s_covid_prod_on_%s.html", as.Date(forecast_date), as.Date(Sys.Date()))
261+
sprintf("%s_covid_prod_on_%s.html", as.Date(forecast_date_int), as.Date(forecast_generation_date_int))
247262
),
248263
params = list(
249264
disease = "covid",
250265
forecast_res = forecast_res %>% bind_rows(ensemble_mixture_res %>% mutate(forecaster = "ensemble_mix")),
251266
ensemble_res = ensemble_res,
252-
forecast_date = as.Date(forecast_date),
267+
forecast_date = as.Date(forecast_date_int),
253268
truth_data = truth_data
254269
)
255270
)

scripts/flu_hosp_prod.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ insufficient_data_geos <- c("as", "mp", "vi", "gu")
99
truth_data_date <- "2023-09-01"
1010
# needed to create the aux data targets
1111
end_date <- Sys.Date()
12+
1213
# This is the as_of for the forecast. If run on our typical schedule, it's
1314
# today, which is a Wednesday. Sometimes, if we're doing a delayed forecast,
1415
# it's a Thursday. It's used for stamping the data and for determining the
@@ -22,6 +23,7 @@ forecast_date <- round_date(forecast_generation_date, "weeks", week_start = 3)
2223
# forecast_generation_date needs to follow suit, but it's more complicated
2324
# because sometimes we forecast on Thursday.
2425
# forecast_generation_date <- c(as.Date(c("2024-11-21", "2024-11-27", "2024-12-04", "2024-12-11", "2024-12-18", "2024-12-26", "2025-01-02")), seq.Date(as.Date("2025-01-08"), Sys.Date(), by = 7L))
26+
2527
very_latent_locations <- list(list(
2628
c("source"),
2729
c("flusurv", "ILI+")

0 commit comments

Comments
 (0)