Skip to content

Commit 4b1a96b

Browse files
committed
epi_ahead works
1 parent 29f744e commit 4b1a96b

File tree

4 files changed

+290
-36
lines changed

4 files changed

+290
-36
lines changed

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
S3method(bake,step_epi_ahead)
34
S3method(bake,step_epi_lag)
45
S3method(epi_keys,default)
56
S3method(epi_keys,epi_df)
67
S3method(epi_keys,recipe)
78
S3method(epi_recipe,default)
89
S3method(epi_recipe,epi_df)
910
S3method(epi_recipe,formula)
11+
S3method(prep,step_epi_ahead)
1012
S3method(prep,step_epi_lag)
13+
S3method(print,step_epi_ahead)
1114
S3method(print,step_epi_lag)
1215
export("%>%")
1316
export(arx_args_list)
@@ -20,6 +23,7 @@ export(get_precision)
2023
export(grab_names)
2124
export(smooth_arx_args_list)
2225
export(smooth_arx_forecaster)
26+
export(step_epi_ahead)
2327
export(step_epi_lag)
2428
import(recipes)
2529
importFrom(magrittr,"%>%")

R/epi_ahead.R

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
#' Create a leading outcome
2+
#'
3+
#' `step_epi_ahead` creates a *specification* of a recipe step that
4+
#' will add new columns of leading data. Leading data will
5+
#' by default include NA values where the lag was induced.
6+
#' These can be removed with [step_naomit()], or you may
7+
#' specify an alternative filler value with the `default`
8+
#' argument.
9+
#'
10+
#' @param recipe A recipe object. The step will be added to the
11+
#' sequence of operations for this recipe.
12+
#' @param ... One or more selector functions to choose variables
13+
#' for this step. See [selections()] for more details.
14+
#' @param role For model terms created by this step, what analysis role should
15+
#' they be assigned?
16+
#' @param trained A logical to indicate if the quantities for
17+
#' preprocessing have been estimated.
18+
#' @param ahead A vector of positive integers. Each specified column will be
19+
#' lead for each value in the vector.
20+
#' @param prefix A prefix for generated column names, default to "ahead_".
21+
#' @param default Determines what fills empty rows
22+
#' left by leading/lagging (defaults to NA).
23+
#' @param keys A character vector of the keys in an epi_df
24+
#' @param columns A character string of variable names that will
25+
#' be populated (eventually) by the `terms` argument.
26+
#' @param skip A logical. Should the step be skipped when the
27+
#' recipe is baked by [bake()]? While all operations are baked
28+
#' when [prep()] is run, some operations may not be able to be
29+
#' conducted on new data (e.g. processing the outcome variable(s)).
30+
#' Care should be taken when using `skip = TRUE` as it may affect
31+
#' the computations for subsequent operations.
32+
#' @param id A character string that is unique to this step to identify it.
33+
#' @template step-return
34+
#'
35+
#' @details The step assumes that the data are already _in the proper sequential
36+
#' order_ for leading.
37+
#'
38+
#' @family row operation steps
39+
#' @export
40+
#'
41+
#' @examples
42+
#' tib <- tibble(
43+
#' x = 1:5, y = 1:5,
44+
#' time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5),
45+
#' geo_value = "ca"
46+
#' ) %>% epiprocess::as_epi_df()
47+
#'
48+
#' library(recipes)
49+
#' recipe(y ~ x., data = tib) %>%
50+
#' step_epi_lag(x, lag = 2:3) %>%
51+
#' step_epi_ahead(y, ahead = 1) %>%
52+
#' prep(df) %>%
53+
#' bake(df)
54+
step_epi_ahead <-
55+
function(recipe,
56+
...,
57+
role = "outcome",
58+
trained = FALSE,
59+
ahead = 1,
60+
prefix = "ahead_",
61+
default = NA,
62+
keys = epi_keys(recipe),
63+
columns = NULL,
64+
skip = FALSE,
65+
id = rand_id("epi_ahead")) {
66+
add_step(
67+
recipe,
68+
step_epi_ahead_new(
69+
terms = dplyr::enquos(...),
70+
role = role,
71+
trained = trained,
72+
ahead = ahead,
73+
prefix = prefix,
74+
default = default,
75+
keys = keys,
76+
columns = columns,
77+
skip = skip,
78+
id = id
79+
)
80+
)
81+
}
82+
83+
step_epi_ahead_new <-
84+
function(terms, role, trained, ahead, prefix, default, keys,
85+
columns, skip, id) {
86+
step(
87+
subclass = "epi_ahead",
88+
terms = terms,
89+
role = role,
90+
trained = trained,
91+
ahead = ahead,
92+
prefix = prefix,
93+
default = default,
94+
keys = keys,
95+
columns = columns,
96+
skip = skip,
97+
id = id
98+
)
99+
}
100+
101+
#' @export
102+
prep.step_epi_ahead <- function(x, training, info = NULL, ...) {
103+
step_epi_ahead_new(
104+
terms = x$terms,
105+
role = x$role,
106+
trained = TRUE,
107+
ahead = x$ahead,
108+
prefix = x$prefix,
109+
default = x$default,
110+
keys = x$keys,
111+
columns = recipes_eval_select(x$terms, training, info),
112+
skip = x$skip,
113+
id = x$id
114+
)
115+
}
116+
117+
#' @export
118+
bake.step_epi_ahead <- function(object, new_data, ...) {
119+
if (!all(object$ahead == as.integer(object$ahead))) {
120+
rlang::abort("step_epi_ahead requires 'lag' argument to be integer valued.")
121+
}
122+
123+
grid <- tidyr::expand_grid(
124+
col = object$columns, lag_val = -object$ahead, ahead_val = object$ahead) %>%
125+
dplyr::mutate(newname = glue::glue("{object$prefix}{ahead_val}_{col}")) %>%
126+
dplyr::select(-ahead_val)
127+
128+
## ensure no name clashes
129+
new_data_names <- colnames(new_data)
130+
intersection <- new_data_names %in% grid$newname
131+
if (any(intersection)) {
132+
rlang::abort(
133+
paste0("Name collision occured in `", class(object)[1],
134+
"`. The following variable names already exists: ",
135+
paste0(new_data_names[intersection], collapse = ", "),
136+
"."))
137+
}
138+
139+
ok <- object$keys
140+
lagged <- purrr::reduce(
141+
purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok),
142+
dplyr::full_join,
143+
by = ok
144+
)
145+
146+
dplyr::full_join(new_data, lagged, by = object$keys) %>%
147+
dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>%
148+
dplyr::arrange(time_value) %>%
149+
dplyr::ungroup()
150+
151+
}
152+
153+
#' @export
154+
print.step_epi_ahead <-
155+
function(x, width = max(20, options()$width - 30), ...) {
156+
## TODO add printing of the lags
157+
title <- "Leading "
158+
recipes::print_step(x$columns, x$terms, x$trained, title, width)
159+
invisible(x)
160+
}

R/epi_lag.R

Lines changed: 9 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,47 +7,16 @@
77
#' specify an alternative filler value with the `default`
88
#' argument.
99
#'
10-
#' @param recipe A recipe object. The step will be added to the
11-
#' sequence of operations for this recipe.
12-
#' @param ... One or more selector functions to choose variables
13-
#' for this step. See [selections()] for more details.
14-
#' @param role For model terms created by this step, what analysis role should
15-
#' they be assigned? By default, the new columns created by this step from
16-
#' the original variables will be used as _predictors_ in a model.
17-
#' @param trained A logical to indicate if the quantities for
18-
#' preprocessing have been estimated.
1910
#' @param lag A vector of positive integers. Each specified column will be
2011
#' lagged for each value in the vector.
21-
#' @param prefix A prefix for generated column names, default to "lag_".
22-
#' @param columns A character string of variable names that will
23-
#' be populated (eventually) by the `terms` argument.
24-
#' @param default Determines what fills empty rows
25-
#' left by lagging (defaults to NA).
2612
#' @template step-return
2713
#'
2814
#' @details The step assumes that the data are already _in the proper sequential
2915
#' order_ for lagging.
3016
#'
3117
#' @family row operation steps
3218
#' @export
33-
#' @rdname step_epi_lag
34-
#'
35-
#' @examples
36-
#' n <- 10
37-
#' start <- as.Date("1999/01/01")
38-
#' end <- as.Date("1999/01/10")
39-
#'
40-
#' df <- data.frame(
41-
#' x = runif(n),
42-
#' index = 1:n,
43-
#' day = seq(start, end, by = "day")
44-
#' )
45-
#'
46-
#' library(recipes)
47-
#' recipe(~., data = df) %>%
48-
#' step_lag(index, day, lag = 2:3) %>%
49-
#' prep(df) %>%
50-
#' bake(df)
19+
#' @rdname step_epi_ahead
5120
step_epi_lag <-
5221
function(recipe,
5322
...,
@@ -130,14 +99,18 @@ bake.step_epi_lag <- function(object, new_data, ...) {
13099
paste0(new_data_names[intersection], collapse = ", "),
131100
"."))
132101
}
133-
102+
ok <- object$keys
134103
lagged <- purrr::reduce(
135-
purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = object$keys),
104+
purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok),
136105
dplyr::full_join,
137-
by = object$keys
106+
by = ok
138107
)
139108

140-
dplyr::full_join(new_data, lagged, by = object$keys)
109+
dplyr::full_join(new_data, lagged, by = object$keys) %>%
110+
dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>%
111+
dplyr::arrange(time_value) %>%
112+
dplyr::ungroup()
113+
141114
}
142115

143116
#' @export

man/step_epi_ahead.Rd

Lines changed: 117 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)