Skip to content

Commit 54b744c

Browse files
committed
deduplicate function/formula and quosure slides
In `as_slide_computation`, call `eval_tidy` on the quosure `x` passed directly to `as_slide_computation`, rather than generating an `f_wrapper` computation function that takes the quosure as an argument. The computation function is regenerated each time `slide` is called, with a new quosure, so the computation function doesn't need to be flexible enough to run with different `quo`s. This change makes the function/formula and quosure forks more similar, since `group_modify`, `slide_one_grp` in the `epi_slide` case, and `comp_one_grp` in the `epix_slide` case no longer need a `quo` argument in the quosure fork. To make the two forks fully identical, the quosure fork was changed to pass an empty set of dots to the computation functions. The `as_slide_computation` call and `group_modify` call can now be pulled out of the if/else block.
1 parent e188e2b commit 54b744c

File tree

3 files changed

+84
-159
lines changed

3 files changed

+84
-159
lines changed

R/grouped_epi_archive.R

Lines changed: 62 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,8 @@ grouped_epi_archive =
186186
#' object. See the documentation for the wrapper function [`epix_slide()`] for
187187
#' details.
188188
#' @importFrom data.table key address
189-
#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms env
189+
#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
190+
#' env missing_arg
190191
slide = function(f, ..., before, ref_time_values,
191192
time_step, new_col_name = "slide_value",
192193
as_list_col = FALSE, names_sep = "_",
@@ -291,71 +292,8 @@ grouped_epi_archive =
291292
!!new_col := .env$comp_value))
292293
}
293294

294-
# If f is not missing, then just go ahead, slide by group
295-
if (!missing(f)) {
296-
f = as_slide_computation(f, calc_ref_time_value = FALSE, ...)
297-
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
298-
# Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`,
299-
# `epi_archive` if `all_versions` is `TRUE`:
300-
as_of_raw = private$ungrouped$as_of(ref_time_value, min_time_value = ref_time_value - before, all_versions = all_versions)
301-
302-
# Set:
303-
# * `as_of_df`, the data.frame/tibble/epi_df/etc. that we will
304-
# `group_modify` as the `.data` argument. Might or might not
305-
# include version column.
306-
# * `group_modify_fn`, the corresponding `.f` argument
307-
if (!all_versions) {
308-
as_of_df = as_of_raw
309-
group_modify_fn = comp_one_grp
310-
} else {
311-
as_of_archive = as_of_raw
312-
# We essentially want to `group_modify` the archive, but
313-
# haven't implemented this method yet. Next best would be
314-
# `group_modify` on its `$DT`, but that has different
315-
# behavior based on whether or not `dtplyr` is loaded.
316-
# Instead, go through an ordinary data frame, trying to avoid
317-
# copies.
318-
if (address(as_of_archive$DT) == address(private$ungrouped$DT)) {
319-
# `as_of` aliased its the full `$DT`; copy before mutating:
320-
as_of_archive$DT <- copy(as_of_archive$DT)
321-
}
322-
dt_key = data.table::key(as_of_archive$DT)
323-
as_of_df = as_of_archive$DT
324-
data.table::setDF(as_of_df)
325-
326-
# Convert each subgroup chunk to an archive before running the calculation.
327-
group_modify_fn = function(.data_group, .group_key,
328-
f, ...,
329-
ref_time_value,
330-
new_col) {
331-
# .data_group is coming from as_of_df as a tibble, but we
332-
# want to feed `comp_one_grp` an `epi_archive` backed by a
333-
# DT; convert and wrap:
334-
data.table::setattr(.data_group, "sorted", dt_key)
335-
data.table::setDT(.data_group, key=dt_key)
336-
.data_group_archive = as_of_archive$clone()
337-
.data_group_archive$DT = .data_group
338-
comp_one_grp(.data_group_archive, .group_key, f = f, ...,
339-
ref_time_value = ref_time_value,
340-
new_col = new_col
341-
)
342-
}
343-
}
344-
345-
return(
346-
dplyr::group_by(as_of_df, dplyr::across(tidyselect::all_of(private$vars)),
347-
.drop=private$drop) %>%
348-
dplyr::group_modify(group_modify_fn,
349-
f = f, ...,
350-
ref_time_value = ref_time_value,
351-
new_col = new_col,
352-
.keep = TRUE)
353-
)
354-
})
355-
}
356-
357-
# Else interpret ... as an expression for tidy evaluation
358-
else {
295+
# Interpret ... as an expression for tidy evaluation
296+
if (missing(f)) {
359297
quos = enquos(...)
360298
if (length(quos) == 0) {
361299
Abort("If `f` is missing then a computation must be specified via `...`.")
@@ -364,69 +302,70 @@ grouped_epi_archive =
364302
Abort("If `f` is missing then only a single computation can be specified via `...`.")
365303
}
366304

367-
quo = quos[[1]]
368-
f = as_slide_computation(quo, calc_ref_time_value = FALSE, ...)
305+
f = quos[[1]]
369306
new_col = sym(names(rlang::quos_auto_name(quos)))
307+
... = missing_arg()
308+
}
370309

371-
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
372-
# Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`,
373-
# `epi_archive` if `all_versions` is `TRUE`:
374-
as_of_raw = private$ungrouped$as_of(ref_time_value, min_time_value = ref_time_value - before, all_versions = all_versions)
310+
f = as_slide_computation(f, calc_ref_time_value = FALSE, ...)
311+
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
312+
# Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`,
313+
# `epi_archive` if `all_versions` is `TRUE`:
314+
as_of_raw = private$ungrouped$as_of(ref_time_value, min_time_value = ref_time_value - before, all_versions = all_versions)
375315

376-
# Set:
377-
# * `as_of_df`, the data.frame/tibble/epi_df/etc. that we will
378-
# `group_modify` as the `.data` argument. Might or might not
379-
# include version column.
380-
# * `group_modify_fn`, the corresponding `.f` argument
381-
if (!all_versions) {
382-
as_of_df = as_of_raw
383-
group_modify_fn = comp_one_grp
384-
} else {
385-
as_of_archive = as_of_raw
386-
# We essentially want to `group_modify` the archive, but don't
387-
# provide an implementation yet. Next best would be
388-
# `group_modify` on its `$DT`, but that has different behavior
389-
# based on whether or not `dtplyr` is loaded. Instead, go
390-
# through an ordinary data frame, trying to avoid copies.
391-
if (address(as_of_archive$DT) == address(private$ungrouped$DT)) {
392-
# `as_of` aliased its the full `$DT`; copy before mutating:
393-
as_of_archive$DT <- copy(as_of_archive$DT)
394-
}
395-
dt_key = data.table::key(as_of_archive$DT)
396-
as_of_df = as_of_archive$DT
397-
data.table::setDF(as_of_df)
316+
# Set:
317+
# * `as_of_df`, the data.frame/tibble/epi_df/etc. that we will
318+
# `group_modify` as the `.data` argument. Might or might not
319+
# include version column.
320+
# * `group_modify_fn`, the corresponding `.f` argument
321+
if (!all_versions) {
322+
as_of_df = as_of_raw
323+
group_modify_fn = comp_one_grp
324+
} else {
325+
as_of_archive = as_of_raw
326+
# We essentially want to `group_modify` the archive, but
327+
# haven't implemented this method yet. Next best would be
328+
# `group_modify` on its `$DT`, but that has different
329+
# behavior based on whether or not `dtplyr` is loaded.
330+
# Instead, go through an ordinary data frame, trying to avoid
331+
# copies.
332+
if (address(as_of_archive$DT) == address(private$ungrouped$DT)) {
333+
# `as_of` aliased its the full `$DT`; copy before mutating:
334+
as_of_archive$DT <- copy(as_of_archive$DT)
335+
}
336+
dt_key = data.table::key(as_of_archive$DT)
337+
as_of_df = as_of_archive$DT
338+
data.table::setDF(as_of_df)
398339

399-
# Convert each subgroup chunk to an archive before running the calculation.
400-
group_modify_fn = function(.data_group, .group_key,
401-
f, ...,
402-
ref_time_value,
403-
new_col) {
404-
# .data_group is coming from as_of_df as a tibble, but we
405-
# want to feed `comp_one_grp` an `epi_archive` backed by a
406-
# DT; convert and wrap:
407-
data.table::setattr(.data_group, "sorted", dt_key)
408-
data.table::setDT(.data_group, key=dt_key)
409-
.data_group_archive = as_of_archive$clone()
410-
.data_group_archive$DT = .data_group
411-
comp_one_grp(.data_group_archive, .group_key, f = f, quo = quo,
412-
ref_time_value = ref_time_value,
413-
new_col = new_col
414-
)
415-
}
340+
# Convert each subgroup chunk to an archive before running the calculation.
341+
group_modify_fn = function(.data_group, .group_key,
342+
f, ...,
343+
ref_time_value,
344+
new_col) {
345+
# .data_group is coming from as_of_df as a tibble, but we
346+
# want to feed `comp_one_grp` an `epi_archive` backed by a
347+
# DT; convert and wrap:
348+
data.table::setattr(.data_group, "sorted", dt_key)
349+
data.table::setDT(.data_group, key=dt_key)
350+
.data_group_archive = as_of_archive$clone()
351+
.data_group_archive$DT = .data_group
352+
comp_one_grp(.data_group_archive, .group_key, f = f, ...,
353+
ref_time_value = ref_time_value,
354+
new_col = new_col
355+
)
416356
}
357+
}
417358

418-
return(
419-
dplyr::group_by(as_of_df, dplyr::across(tidyselect::all_of(private$vars)),
420-
.drop=private$drop) %>%
421-
dplyr::group_modify(group_modify_fn,
422-
f = f, quo = quo,
423-
ref_time_value = ref_time_value,
424-
comp_effective_key_vars = comp_effective_key_vars,
425-
new_col = new_col,
426-
.keep = TRUE)
427-
)
428-
})
429-
}
359+
return(
360+
dplyr::group_by(as_of_df, dplyr::across(tidyselect::all_of(private$vars)),
361+
.drop=private$drop) %>%
362+
dplyr::group_modify(group_modify_fn,
363+
f = f, ...,
364+
ref_time_value = ref_time_value,
365+
new_col = new_col,
366+
.keep = TRUE)
367+
)
368+
})
430369

431370
# Unchop/unnest if we need to
432371
if (!as_list_col) {

R/slide.R

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@
123123
#'
124124
#' @importFrom lubridate days weeks
125125
#' @importFrom dplyr bind_rows group_vars filter select
126-
#' @importFrom rlang .data .env !! enquo enquos sym env
126+
#' @importFrom rlang .data .env !! enquo enquos sym env missing_arg
127127
#' @export
128128
#' @examples
129129
#' # slide a 7-day trailing average formula on cases
@@ -351,22 +351,8 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
351351
return(mutate(.data_group, !!new_col := slide_values))
352352
}
353353

354-
# If f is not missing, then just go ahead, slide by group
355-
if (!missing(f)) {
356-
f = as_slide_computation(f, calc_ref_time_value = TRUE, before = before, ...)
357-
x = x %>%
358-
group_modify(slide_one_grp,
359-
f = f, ...,
360-
starts = starts,
361-
stops = stops,
362-
time_values = ref_time_values,
363-
all_rows = all_rows,
364-
new_col = new_col,
365-
.keep = FALSE)
366-
}
367-
368-
# Else interpret ... as an expression for tidy evaluation
369-
else {
354+
# Interpret ... as an expression for tidy evaluation
355+
if (missing(f)) {
370356
quos = enquos(...)
371357
if (length(quos) == 0) {
372358
Abort("If `f` is missing then a computation must be specified via `...`.")
@@ -375,20 +361,21 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
375361
Abort("If `f` is missing then only a single computation can be specified via `...`.")
376362
}
377363

378-
quo = quos[[1]]
364+
f = quos[[1]]
379365
new_col = sym(names(rlang::quos_auto_name(quos)))
380-
381-
f = as_slide_computation(quo, calc_ref_time_value = TRUE, before = before, ...)
382-
x = x %>%
383-
group_modify(slide_one_grp,
384-
f = f, quo = quo,
385-
starts = starts,
386-
stops = stops,
387-
time_values = ref_time_values,
388-
all_rows = all_rows,
389-
new_col = new_col,
390-
.keep = FALSE)
366+
... = missing_arg()
391367
}
368+
369+
f = as_slide_computation(f, calc_ref_time_value = TRUE, before = before, ...)
370+
x = x %>%
371+
group_modify(slide_one_grp,
372+
f = f, ...,
373+
starts = starts,
374+
stops = stops,
375+
time_values = ref_time_values,
376+
all_rows = all_rows,
377+
new_col = new_col,
378+
.keep = FALSE)
392379

393380
# Unnest if we need to, and return
394381
if (!as_list_col) {

R/utils.R

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ as_slide_computation <- function(x,
253253
# A quosure is a type of formula, so be careful with `if` logic here.
254254
if (is_quosure(x)) {
255255
if (calc_ref_time_value) {
256-
f_wrapper = function(.x, .group_key, quo, ...) {
256+
f_wrapper = function(.x, .group_key, ...) {
257257
.ref_time_value = min(.x$time_value) + before
258258
.x <- .x[.x$.real,]
259259
.x$.real <- NULL
@@ -266,25 +266,24 @@ as_slide_computation <- function(x,
266266
data_mask$.x = .x
267267
data_mask$.group_key = .group_key
268268
data_mask$.ref_time_value = .ref_time_value
269-
rlang::eval_tidy(quo, data_mask)
269+
rlang::eval_tidy(x, data_mask)
270270
}
271271
return(f_wrapper)
272272
}
273273

274-
f_wrapper = function(.x, .group_key, .ref_time_value, quo, ...) {
274+
f_wrapper = function(.x, .group_key, .ref_time_value, ...) {
275275
# Convert to environment to standardize between tibble and R6
276276
# based inputs. In both cases, we should get a simple
277277
# environment with the empty environment as its parent.
278278
data_env = rlang::as_environment(.x)
279279
data_mask = rlang::new_data_mask(bottom = data_env, top = data_env)
280280
data_mask$.data <- rlang::as_data_pronoun(data_mask)
281-
# We'll also install `.x` directly, not as an
282-
# `rlang_data_pronoun`, so that we can, e.g., use more dplyr and
283-
# epiprocess operations.
281+
# We'll also install `.x` directly, not as an `rlang_data_pronoun`, so
282+
# that we can, e.g., use more dplyr and epiprocess operations.
284283
data_mask$.x = .x
285284
data_mask$.group_key = .group_key
286285
data_mask$.ref_time_value = .ref_time_value
287-
rlang::eval_tidy(quo, data_mask)
286+
rlang::eval_tidy(x, data_mask)
288287
}
289288
return(f_wrapper)
290289
}

0 commit comments

Comments
 (0)