cmu-delphi
diff --git a/‎Makefile‎
Lines changed: 4 additions & 4 deletions b/‎Makefile‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/aux_data_utils.R‎
Lines changed: 18 additions & 1 deletion b/‎R/aux_data_utils.R‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎R/forecasters/data_transforms.R‎
Lines changed: 1 addition & 2 deletions b/‎R/forecasters/data_transforms.R‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎R/forecasters/data_validation.R‎
Lines changed: 1 addition & 1 deletion b/‎R/forecasters/data_validation.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/forecasters/ensemble_linear_climate.R‎
Lines changed: 1 addition & 1 deletion b/‎R/forecasters/ensemble_linear_climate.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/forecasters/forecaster_climatological.R‎
Lines changed: 1 addition & 1 deletion b/‎R/forecasters/forecaster_climatological.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/forecasters/forecaster_no_recent_outcome.R‎
Lines changed: 3 additions & 3 deletions b/‎R/forecasters/forecaster_no_recent_outcome.R‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎R/forecasters/forecaster_scaled_pop_seasonal.R‎
Lines changed: 1 addition & 0 deletions b/‎R/forecasters/forecaster_scaled_pop_seasonal.R‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/looping.R‎
Lines changed: 7 additions & 4 deletions b/‎R/looping.R‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎R/scoring.R‎
Lines changed: 17 additions & 2 deletions b/‎R/scoring.R‎
Lines changed: 17 additions & 2 deletions
@@ -13,12 +13,12 @@ run:
 	Rscript scripts/run.R
 
 prod-covid:
-	export TAR_RUN_PROJECT=covid_hosp_prod; \
-	Rscript scripts/run_prod.R
+	export TAR_PROJECT=covid_hosp_prod; \
+	Rscript scripts/run.R
 
 prod-flu:
-	export TAR_RUN_PROJECT=flu_hosp_prod; \
-	Rscript scripts/run_prod.R
+	export TAR_PROJECT=flu_hosp_prod; \
+	Rscript scripts/run.R
 
 prod: prod-covid prod-flu update_site netlify
 
 
@@ -176,7 +176,17 @@ gen_pop_and_density_data <-
       )
   }
 
-daily_to_weekly <- function(epi_df, agg_method = c("sum", "mean"), day_of_week = 4L, day_of_week_end = 7L, keys = "geo_value", values = c("value")) {
+#' Aggregate a daily archive to a weekly archive.
+#'
+#' By default, aggregates from Sunday to Saturday and labels with the Wednesday
+#' of that week.
+#'
+#' @param epi_df the archive to aggregate.
+#' @param agg_method the method to use to aggregate the data, one of "sum" or "mean".
+#' @param keys the columns to group by.
+#' @param values the columns to aggregate.
+daily_to_weekly <- function(epi_df, agg_method = c("sum", "mean"), keys = "geo_value", values = c("value")) {
+  agg_method <- arg_match(agg_method)
   epi_df %>%
     mutate(epiweek = epiweek(time_value), year = epiyear(time_value)) %>%
     group_by(across(any_of(c(keys, "epiweek", "year")))) %>%
@@ -299,6 +309,13 @@ drop_non_seasons <- function(epi_data, min_window = 12) {
     )
 }
 
+get_nwss_coarse_data <- function(disease = c("covid", "flu")) {
+  disease <- arg_match(disease)
+  aws.s3::get_bucket_df(prefix = glue::glue("exploration/aux_data/nwss_{disease}_data"), bucket = "forecasting-team-data") %>%
+    slice_max(LastModified) %>%
+    pull(Key) %>%
+    aws.s3::s3read_using(FUN = readr::read_csv, object = ., bucket = "forecasting-team-data")
+}
 
 #' add a column summing the values in the hhs region
 #' @param hhs_region_table the region table
 
@@ -234,8 +234,7 @@ data_whitening <- function(epi_data, colname, learned_params, nonlin_method = c(
     join_cols <- key_colnames(epi_data, exclude = "time_value")
   }
   nonlin_method <- arg_match(nonlin_method)
-  res <- epi_data %>%
-    left_join(learned_params, by = join_cols)
+  res <- epi_data %>% left_join(learned_params, by = join_cols)
   if (nonlin_method == "quart_root") {
     res %<>% mutate(across(all_of(colname), ~ (.x + 0.01)^(1 / 4)))
   }
 
@@ -69,7 +69,7 @@ confirm_sufficient_data <- function(epi_data, ahead, args_input, outcome, extra_
   # TODO: Buffer should probably be 2 * n(lags) * n(predictors). But honestly,
   # this needs to be fixed in epipredict itself, see
   # https://github.com/cmu-delphi/epipredict/issues/106.
-  if (extra_sources == c("")) {
+  if (identical(extra_sources, "")) {
     extra_sources <- character(0L)
   }
   has_no_last_nas <- epi_data %>%
 
@@ -14,7 +14,7 @@
 #' @param other_weights if non null, it should be a tibble giving a list of weights by forecaster and geo_value
 #' @importFrom rlang %||%
 #' @export
-ensemble_linear_climate <- function(forecasts,
+ensemble_climate_linear <- function(forecasts,
                                     aheads,
                                     other_weights = NULL,
                                     probs = covidhub_probs(),
 
@@ -59,7 +59,7 @@ climate_linear_ensembled <- function(epi_data,
   pred_geo_climate <- climatological_model(epi_data, ahead, geo_agg = FALSE) %>% mutate(forecaster = "climate_geo")
   pred_linear <- forecaster_baseline_linear(epi_data, ahead, residual_tail = residual_tail, residual_center = residual_center) %>% mutate(forecaster = "linear")
   pred <- bind_rows(pred_climate, pred_linear, pred_geo_climate) %>%
-    ensemble_linear_climate((args_list$aheads[[1]]) / 7) %>%
+    ensemble_climate_linear((args_list$aheads[[1]]) / 7) %>%
     ungroup()
   # undo whitening
   pred_final <- pred %>%
 
@@ -62,13 +62,13 @@ no_recent_outcome <- function(epi_data,
   args_input[["quantile_levels"]] <- quantile_levels
   args_list <- do.call(default_args_list, args_input)
   # if you want to hardcode particular predictors in a particular forecaster
-  predictors <- c(outcome, extra_sources[[1]])
-  c(args_list, tmp_pred, trainer) %<-% sanitize_args_predictors_trainer(epi_data, outcome, predictors, trainer, args_list)
-  if (extra_sources[[1]] == "") {
+  if (identical(extra_sources[[1]], "")) {
     predictors <- character()
   } else {
     predictors <- extra_sources[[1]]
   }
+  c(args_list, tmp_pred, trainer) %<-% sanitize_args_predictors_trainer(epi_data, outcome, predictors, trainer, args_list)
+
   # end of the copypasta
   # finally, any other pre-processing (e.g. smoothing) that isn't performed by
   # epipredict
 
@@ -59,6 +59,7 @@ scaled_pop_seasonal <- function(epi_data,
 
   epi_data <- validate_epi_data(epi_data)
 
+  # TODO: handle this when creating param grid?
   if (typeof(seasonal_method) == "list") {
     seasonal_method <- seasonal_method[[1]]
   }
 
@@ -84,24 +84,27 @@ slide_forecaster <- function(epi_archive,
   )
 }
 
-epix_slide_simple <- function(epi_archive, forecaster, ref_time_values, before, cache_key = NULL) {
+
+epix_slide_simple <- function(epi_archive, forecaster, ref_time_values, before = Inf, cache_key = NULL) {
   # this is so that changing the object without changing the name doesn't result in pulling the wrong cache
   cache_hash <- rlang::hash(epi_archive)
   dir.create(".exploration_cache/slide_cache", showWarnings = FALSE, recursive = TRUE)
-  purrr::map(ref_time_values, function(tv) {
+  out <- purrr::map(ref_time_values, function(tv) {
     if (is.null(cache_key)) {
       epi_df <- epi_archive %>%
-        epix_as_of(tv, min_time_value = tv - before)
+        epix_as_of(min(tv, .$versions_end), min_time_value = tv - before)
     } else {
       file_path <- glue::glue(".exploration_cache/slide_cache/{cache_key}_{cache_hash}_{before}_{tv}.parquet")
       if (file.exists(file_path)) {
         epi_df <- qs::qread(file_path)
       } else {
         epi_df <- epi_archive %>%
-          epix_as_of(tv, min_time_value = tv - before)
+          epix_as_of(min(tv, .$versions_end), min_time_value = tv - before)
         qs::qsave(epi_df, file_path)
       }
     }
     epi_df %>% forecaster()
   }) %>% bind_rows()
+  gc()
+  return(out)
 }
@@ -22,9 +22,24 @@ evaluate_predictions <- function(forecasts, truth_data) {
 
   scores <- forecast_obj %>%
     scoringutils::score(metrics = get_metrics(.)) %>%
-    as_tibble() %>%
+    as_tibble()
+  missing_metrics <- setdiff(
+    c("model", "geo_value", "forecast_date", "target_end_date", "wis", "ae_median", "interval_coverage_50", "interval_coverage_90"),
+    names(scores)
+  )
+  if (length(missing_metrics) > 0) {
+    cli::cli_abort(c(
+      "scoring error",
+      "i" = "missing metrics: {missing_metrics}",
+      "i" = "if wis is missing, then likely quantile monotonicity was violated"
+    ))
+  }
+  scores %>%
     select(
-      model, geo_value, forecast_date, target_end_date,
+      model,
+      geo_value,
+      forecast_date,
+      target_end_date,
       wis,
       ae = ae_median,
       coverage_50 = interval_coverage_50,
Original file line number	Diff line number	Diff line change
`@@ -234,8 +234,7 @@ data_whitening <- function(epi_data, colname, learned_params, nonlin_method = c(`
`234`	`234`	`join_cols <- key_colnames(epi_data, exclude = "time_value")`
`235`	`235`	`}`
`236`	`236`	`nonlin_method <- arg_match(nonlin_method)`
`237`		`- res <- epi_data %>%`
`238`		`- left_join(learned_params, by = join_cols)`
	`237`	`+ res <- epi_data %>% left_join(learned_params, by = join_cols)`
`239`	`238`	`if (nonlin_method == "quart_root") {`
`240`	`239`	`res %<>% mutate(across(all_of(colname), ~ (.x + 0.01)^(1 / 4)))`
`241`	`240`	`}`
Original file line number	Diff line number	Diff line change
`@@ -69,7 +69,7 @@ confirm_sufficient_data <- function(epi_data, ahead, args_input, outcome, extra_`
`69`	`69`	`# TODO: Buffer should probably be 2 * n(lags) * n(predictors). But honestly,`
`70`	`70`	`# this needs to be fixed in epipredict itself, see`
`71`	`71`	`# https://github.com/cmu-delphi/epipredict/issues/106.`
`72`		`- if (extra_sources == c("")) {`
	`72`	`+ if (identical(extra_sources, "")) {`
`73`	`73`	`extra_sources <- character(0L)`
`74`	`74`	`}`
`75`	`75`	`has_no_last_nas <- epi_data %>%`
Original file line number	Diff line number	Diff line change
`@@ -59,6 +59,7 @@ scaled_pop_seasonal <- function(epi_data,`
`59`	`59`
`60`	`60`	`epi_data <- validate_epi_data(epi_data)`
`61`	`61`
	`62`	`+ # TODO: handle this when creating param grid?`
`62`	`63`	`if (typeof(seasonal_method) == "list") {`
`63`	`64`	`seasonal_method <- seasonal_method[[1]]`
`64`	`65`	`}`