|
2 | 2 | #' |
3 | 3 | #' A recipe is a description of the steps to be applied to a data set in |
4 | 4 | #' order to prepare it for data analysis. This is a loose wrapper |
5 | | -#' around `recipes::recipe()` to properly handle the additional |
| 5 | +#' around [recipes::recipe()] to properly handle the additional |
6 | 6 | #' columns present in an `epi_df` |
7 | 7 | #' |
8 | 8 | #' @aliases epi_recipe epi_recipe.default epi_recipe.formula |
@@ -51,9 +51,27 @@ epi_recipe.default <- function(x, ...) { |
51 | 51 | #' as the data given in the `data` argument but can be different after |
52 | 52 | #' the recipe is trained.} |
53 | 53 | #' |
54 | | -# @includeRmd man/rmd/recipes.Rmd details |
55 | 54 | #' |
56 | 55 | #' @export |
| 56 | +#' @examples |
| 57 | +#' library(epiprocess) |
| 58 | +#' library(dplyr) |
| 59 | +#' library(recipes) |
| 60 | +#' |
| 61 | +#' jhu <- jhu_csse_daily_subset %>% |
| 62 | +#' filter(time_value > "2021-08-01") %>% |
| 63 | +#' select(geo_value:death_rate_7d_av) %>% |
| 64 | +#' rename(case_rate = case_rate_7d_av, death_rate = death_rate_7d_av) |
| 65 | +#' |
| 66 | +#' r <- epi_recipe(jhu) %>% |
| 67 | +#' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% |
| 68 | +#' step_epi_ahead(death_rate, ahead = 7) %>% |
| 69 | +#' step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% |
| 70 | +#' step_naomit(all_predictors()) %>% |
| 71 | +#' # below, `skip` means we don't do this at predict time |
| 72 | +#' step_naomit(all_outcomes(), skip = TRUE) |
| 73 | +#' |
| 74 | +#' r |
57 | 75 | epi_recipe.epi_df <- |
58 | 76 | function(x, |
59 | 77 | formula = NULL, |
@@ -137,7 +155,7 @@ epi_recipe.epi_df <- |
137 | 155 | levels = NULL, |
138 | 156 | retained = NA |
139 | 157 | ) |
140 | | - class(out) <- "recipe" |
| 158 | + class(out) <- c("epi_recipe", "recipe") |
141 | 159 | out |
142 | 160 | } |
143 | 161 |
|
@@ -210,3 +228,91 @@ epi_form2args <- function(formula, data, ...) { |
210 | 228 | list(x = data, vars = vars, roles = roles) |
211 | 229 | } |
212 | 230 |
|
| 231 | + |
| 232 | + |
| 233 | +#' Test for `epi_recipe` |
| 234 | +#' |
| 235 | +#' @param x An object. |
| 236 | +#' @return `TRUE` if the object inherits from `epi_recipe`. |
| 237 | +#' |
| 238 | +#' @export |
| 239 | +is_epi_recipe <- function(x) { |
| 240 | + inherits(x, "epi_recipe") |
| 241 | +} |
| 242 | + |
| 243 | + |
| 244 | + |
| 245 | +#' Add an epi_recipe to a workflow |
| 246 | +#' |
| 247 | +#' @seealso [workflows::add_recipe()] |
| 248 | +#' - `add_recipe()` specifies the terms of the model and any preprocessing that |
| 249 | +#' is required through the usage of a recipe. |
| 250 | +#' |
| 251 | +#' - `remove_recipe()` removes the recipe as well as any downstream objects |
| 252 | +#' |
| 253 | +#' @details |
| 254 | +#' Has the same behaviour as [workflows::add_recipe()] but sets a different |
| 255 | +#' default blueprint to automatically handle [epiprocess::epi_df] data. |
| 256 | +#' |
| 257 | +#' @param x A workflow or epi_workflow |
| 258 | +#' |
| 259 | +#' @param recipe A recipe created using [recipes::recipe()] |
| 260 | +#' |
| 261 | +#' @param ... Not used. |
| 262 | +#' |
| 263 | +#' @param blueprint A hardhat blueprint used for fine tuning the preprocessing. |
| 264 | +#' |
| 265 | +#' [default_epi_recipe_blueprint()] is used. |
| 266 | +#' |
| 267 | +#' Note that preprocessing done here is separate from preprocessing that |
| 268 | +#' might be done automatically by the underlying model. |
| 269 | +#' |
| 270 | +#' @return |
| 271 | +#' `x`, updated with a new recipe preprocessor. |
| 272 | +#' |
| 273 | +#' @export |
| 274 | +#' @examples |
| 275 | +#' library(epiprocess) |
| 276 | +#' library(dplyr) |
| 277 | +#' library(recipes) |
| 278 | +#' |
| 279 | +#' jhu <- jhu_csse_daily_subset %>% |
| 280 | +#' filter(time_value > "2021-08-01") %>% |
| 281 | +#' select(geo_value:death_rate_7d_av) %>% |
| 282 | +#' rename(case_rate = case_rate_7d_av, death_rate = death_rate_7d_av) |
| 283 | +#' |
| 284 | +#' r <- epi_recipe(jhu) %>% |
| 285 | +#' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% |
| 286 | +#' step_epi_ahead(death_rate, ahead = 7) %>% |
| 287 | +#' step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% |
| 288 | +#' step_naomit(all_predictors()) %>% |
| 289 | +#' step_naomit(all_outcomes(), skip = TRUE) |
| 290 | +#' |
| 291 | +#' workflow <- epi_workflow() %>% |
| 292 | +#' add_epi_recipe(r) |
| 293 | +#' |
| 294 | +#' workflow |
| 295 | +add_epi_recipe <- function( |
| 296 | + x, recipe, ..., blueprint = default_epi_recipe_blueprint()) { |
| 297 | + workflows::add_recipe(x, recipe, ..., blueprint = blueprint) |
| 298 | +} |
| 299 | + |
| 300 | + |
| 301 | + |
| 302 | +#' Recipe blueprint that accounts for `epi_df` panel data |
| 303 | +#' |
| 304 | +#' Used for simplicity. See [hardhat::default_recipe_blueprint()] for more |
| 305 | +#' details. |
| 306 | +#' |
| 307 | +#' @inheritParams hardhat::default_recipe_blueprint |
| 308 | +#' |
| 309 | +#' @details The `bake_dependent_roles` are automatically set to `epi_df` defaults. |
| 310 | +#' @return A recipe blueprint. |
| 311 | +#' @export |
| 312 | +default_epi_recipe_blueprint <- |
| 313 | + function(intercept = FALSE, allow_novel_levels = FALSE, fresh = TRUE, |
| 314 | + bake_dependent_roles = c("time_value", "geo_value", "key", "raw"), |
| 315 | + composition = "tibble") { |
| 316 | + hardhat::default_recipe_blueprint( |
| 317 | + intercept, allow_novel_levels, fresh, bake_dependent_roles, composition) |
| 318 | + } |
0 commit comments