Skip to content

Commit 245448a

Browse files
authored
Merge pull request #216 from dajmcdon/km-fix-epix_slide-good
Replace `n` with `before` (no `after`) in `epix_slide`
2 parents 94d5ca5 + d648910 commit 245448a

File tree

11 files changed

+171
-60
lines changed

11 files changed

+171
-60
lines changed

R/archive.R

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ epi_archive =
584584
#' details.
585585
#' @importFrom data.table key
586586
#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
587-
slide = function(f, ..., n, group_by, ref_time_values,
587+
slide = function(f, ..., before, group_by, ref_time_values,
588588
time_step, new_col_name = "slide_value",
589589
as_list_col = FALSE, names_sep = "_",
590590
all_rows = FALSE) {
@@ -597,10 +597,22 @@ epi_archive =
597597
ref_time_values = ref_time_values[ref_time_values %in%
598598
unique(self$DT$time_value)]
599599
}
600-
600+
601+
# Validate and pre-process `before`:
602+
if (missing(before)) {
603+
Abort("`before` is required (and must be passed by name);
604+
if you did not want to apply a sliding window but rather
605+
to map `as_of` and `f` across various `ref_time_values`,
606+
pass a large `before` value (e.g., if time steps are days,
607+
`before=365000`).")
608+
}
609+
before <- vctrs::vec_cast(before, integer())
610+
if (length(before) != 1L || is.na(before) || before < 0L) {
611+
Abort("`before` must be length-1, non-NA, non-negative")
612+
}
613+
601614
# If a custom time step is specified, then redefine units
602-
before_num = n-1
603-
if (!missing(time_step)) before_num = time_step(n-1)
615+
if (!missing(time_step)) before <- time_step(before)
604616

605617
# What to group by? If missing, set according to internal keys;
606618
# otherwise, tidyselect.
@@ -673,12 +685,13 @@ epi_archive =
673685
if (!missing(f)) {
674686
if (rlang::is_formula(f)) f = rlang::as_function(f)
675687

676-
x = purrr::map_dfr(ref_time_values, function(t) {
677-
self$as_of(t, min_time_value = t - before_num) %>%
688+
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
689+
self$as_of(ref_time_value,
690+
min_time_value = ref_time_value - before) %>%
678691
dplyr::group_by(!!!group_by) %>%
679692
dplyr::group_modify(comp_one_grp,
680693
f = f, ...,
681-
time_value = t,
694+
time_value = ref_time_value,
682695
key_vars = key_vars,
683696
new_col = new_col,
684697
.keep = TRUE) %>%
@@ -700,12 +713,13 @@ epi_archive =
700713
f = function(x, quo, ...) rlang::eval_tidy(quo, x)
701714
new_col = sym(names(rlang::quos_auto_name(quos)))
702715

703-
x = purrr::map_dfr(ref_time_values, function(t) {
704-
self$as_of(t, min_time_value = t - before_num) %>%
716+
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
717+
self$as_of(ref_time_value,
718+
min_time_value = ref_time_value - before) %>%
705719
dplyr::group_by(!!!group_by) %>%
706720
dplyr::group_modify(comp_one_grp,
707721
f = f, quo = quo,
708-
time_value = t,
722+
time_value = ref_time_value,
709723
key_vars = key_vars,
710724
new_col = new_col,
711725
.keep = TRUE) %>%

R/methods-epi_archive.R

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -376,10 +376,21 @@ epix_merge = function(x, y,
376376
#' @param ... Additional arguments to pass to the function or formula specified
377377
#' via `f`. Alternatively, if `f` is missing, then the current argument is
378378
#' interpreted as an expression for tidy evaluation.
379-
#' @param n Number of time steps to use in the running window. For example, if
380-
#' `n = 7`, and one time step is one day, then to produce a value on January 7
381-
#' we apply the given function or formula to data in between January 1 and
382-
#' 7.
379+
#' @param before How far `before` each `ref_time_value` should the sliding
380+
#' window extend? If provided, should be a single, non-NA,
381+
#' [integer-compatible][vctrs::vec_cast] number of time steps. This window
382+
#' endpoint is inclusive. For example, if `before = 7`, and one time step is
383+
#' one day, then to produce a value for a `ref_time_value` of January 8, we
384+
#' apply the given function or formula to data (for each group present) with
385+
#' `time_value`s from January 1 onward, as they were reported on January 8.
386+
#' For typical disease surveillance sources, this will not include any data
387+
#' with a `time_value` of January 8, and, depending on the amount of reporting
388+
#' latency, may not include January 7 or even earlier `time_value`s. (If
389+
#' instead the archive were to hold nowcasts instead of regular surveillance
390+
#' data, then we would indeed expect data for `time_value` January 8. If it
391+
#' were to hold forecasts, then we would expect data for `time_value`s after
392+
#' January 8, and the sliding window would extend as far after each
393+
#' `ref_time_value` as needed to include all such `time_value`s.)
383394
#' @param group_by The variable(s) to group by before slide computation. If
384395
#' missing, then the keys in the underlying data table, excluding `time_value`
385396
#' and `version`, will be used for grouping. To omit a grouping entirely, use
@@ -414,10 +425,14 @@ epix_merge = function(x, y,
414425
#' values.
415426
#'
416427
#' @details Two key distinctions between inputs to the current function and
417-
#' `epi_slide()`:
418-
#' 1. `epix_slide()` uses windows that are **always right-aligned** (in
419-
#' `epi_slide()`, custom alignments could be specified using the `align` or
420-
#' `before` arguments).
428+
#' [`epi_slide()`]:
429+
#' 1. `epix_slide()` doesn't accept an `after` argument; its windows extend
430+
#' from `before` time steps before a given `ref_time_value` through the last
431+
#' `time_value` available as of version `ref_time_value` (typically, this
432+
#' won't include `ref_time_value` itself, as observations about a particular
433+
#' time interval (e.g., day) are only published after that time interval ends);
434+
#' `epi_slide` windows extend from `before` time steps before a
435+
#' `ref_time_value` through `after` time steps after `ref_time_value`.
421436
#' 2. `epix_slide()` uses a `group_by` to specify the grouping upfront (in
422437
#' `epi_slide()`, this would be accomplished by a preceding function call to
423438
#' `dplyr::group_by()`).
@@ -440,11 +455,11 @@ epix_merge = function(x, y,
440455
#' Finally, this is simply a wrapper around the `slide()` method of the
441456
#' `epi_archive` class, so if `x` is an `epi_archive` object, then:
442457
#' ```
443-
#' epix_slide(x, new_var = comp(old_var), n = 120)
458+
#' epix_slide(x, new_var = comp(old_var), before = 119)
444459
#' ```
445460
#' is equivalent to:
446461
#' ```
447-
#' x$slide(x, new_var = comp(old_var), n = 120)
462+
#' x$slide(new_var = comp(old_var), before = 119)
448463
#' ```
449464
#'
450465
#' @importFrom rlang enquo
@@ -462,15 +477,15 @@ epix_merge = function(x, y,
462477
#' by = "1 day")
463478
#' epix_slide(x = archive_cases_dv_subset,
464479
#' f = ~ mean(.x$case_rate_7d_av),
465-
#' n = 3,
480+
#' before = 2,
466481
#' group_by = geo_value,
467482
#' ref_time_values = time_values,
468483
#' new_col_name = 'case_rate_3d_av')
469-
epix_slide = function(x, f, ..., n, group_by, ref_time_values,
484+
epix_slide = function(x, f, ..., before, group_by, ref_time_values,
470485
time_step, new_col_name = "slide_value",
471486
as_list_col = FALSE, names_sep = "_", all_rows = FALSE) {
472487
if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.")
473-
return(x$slide(f, ..., n = n,
488+
return(x$slide(f, ..., before = before,
474489
group_by = {{group_by}},
475490
ref_time_values = ref_time_values,
476491
time_step = time_step,

man/epi_archive.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/epix_slide.Rd

Lines changed: 27 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-epix_fill_through_version.R

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ test_that("epix_fill_through_version mirrors input when it is sufficiently up to
1111
# edition 3, which is based on `waldo::compare` rather than `base::identical`;
1212
# `waldo::compare` in waldo >=0.3.1 appears (as of 0.4.0) to compare R6
1313
# objects by contents rather than address (in a way that is tested but maybe
14-
# not guaranteed via user docs). Use `local_edition` to ensure we use edition
15-
# 3 here.
16-
local_edition(3)
14+
# not guaranteed via user docs). Use `testthat::local_edition` to ensure we
15+
# use testthat edition 3 here (use `testthat::` to prevent ambiguity with
16+
# `readr`).
17+
testthat::local_edition(3)
1718
expect_identical(ea_orig, ea_trivial_fill_na1)
1819
expect_identical(ea_orig, ea_trivial_fill_na2)
1920
expect_identical(ea_orig, ea_trivial_fill_locf)
@@ -30,9 +31,9 @@ test_that("epix_fill_through_version can extend observed versions, gives expecte
3031
ea_fill_na = epix_fill_through_version(ea_orig, later_unobserved_version, "na")
3132
ea_fill_locf = epix_fill_through_version(ea_orig, later_unobserved_version, "locf")
3233

33-
# We use edition 3 features here, passing `ignore_attr` to `waldo::compare`.
34-
# Ensure we are using edition 3:
35-
local_edition(3)
34+
# We use testthat edition 3 features here, passing `ignore_attr` to
35+
# `waldo::compare`. Ensure we are using edition 3:
36+
testthat::local_edition(3)
3637
withCallingHandlers({
3738
expect_identical(ea_fill_na$versions_end, later_unobserved_version)
3839
expect_identical(tibble::as_tibble(ea_fill_na$as_of(first_unobserved_version)),

tests/testthat/test-epix_merge.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ test_that("epix_merge merges and carries forward updates properly", {
5858
)
5959
# We rely on testthat edition 3 expect_identical using waldo, not identical. See
6060
# test-epix_fill_through_version.R comments for details.
61-
local_edition(3)
61+
testthat::local_edition(3)
6262
expect_identical(xy, xy_expected)
6363
})
6464

tests/testthat/test-epix_slide.R

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
library(dplyr)
2+
3+
test_that("epix_slide only works on an epi_archive",{
4+
expect_error(epix_slide(data.frame(x=1)))
5+
})
6+
7+
x <- tibble::tribble(~version, ~time_value, ~binary,
8+
4, c(1:3), 2^(1:3),
9+
5, c(1:2,4), 2^(4:6),
10+
6, c(1:2,4:5), 2^(7:10),
11+
7, 2:6, 2^(11:15)) %>%
12+
tidyr::unnest(c(time_value,binary))
13+
14+
xx <- bind_cols(geo_value = rep("x",15), x) %>%
15+
as_epi_archive()
16+
17+
test_that("epix_slide works as intended",{
18+
xx1 <- epix_slide(x = xx,
19+
f = ~ sum(.x$binary),
20+
before = 2,
21+
group_by = geo_value,
22+
new_col_name = "sum_binary")
23+
24+
xx2 <- tibble(geo_value = rep("x",3),
25+
# 7 should also be there below; this is a bug on issue #153
26+
time_value = c(4,5,6),
27+
sum_binary = c(2^3+2^2,
28+
2^6+2^3,
29+
2^10+2^9)) %>%
30+
as_epi_df(as_of = 1) # Also a bug (issue #213)
31+
32+
expect_identical(xx1,xx2) # *
33+
34+
xx3 <- xx$slide(f = ~ sum(.x$binary),
35+
before = 2,
36+
group_by = "geo_value",
37+
new_col_name = 'sum_binary')
38+
39+
expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical
40+
})
41+
42+
test_that("epix_slide `before` validation works", {
43+
expect_error(xx$slide(f = ~ sum(.x$binary)),
44+
"`before` is required")
45+
expect_error(xx$slide(f = ~ sum(.x$binary), before=NA),
46+
"`before`.*NA")
47+
expect_error(xx$slide(f = ~ sum(.x$binary), before=-1),
48+
"`before`.*negative")
49+
expect_error(xx$slide(f = ~ sum(.x$binary), before=1.5),
50+
regexp="before",
51+
class="vctrs_error_incompatible_type")
52+
# We might want to allow this at some point (issue #219):
53+
expect_error(xx$slide(f = ~ sum(.x$binary), before=Inf),
54+
regexp="before",
55+
class="vctrs_error_incompatible_type")
56+
# (wrapper shouldn't introduce a value:)
57+
expect_error(epix_slide(xx, f = ~ sum(.x$binary)), "`before` is required")
58+
# These `before` values should be accepted:
59+
expect_error(xx$slide(f = ~ sum(.x$binary), before=0),
60+
NA)
61+
expect_error(xx$slide(f = ~ sum(.x$binary), before=2L),
62+
NA)
63+
expect_error(xx$slide(f = ~ sum(.x$binary), before=365000),
64+
NA)
65+
})

0 commit comments

Comments
 (0)