cmu-delphi
diff --git a/‎NAMESPACE‎
Lines changed: 3 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 20 additions & 10 deletions b/‎NEWS.md‎
Lines changed: 20 additions & 10 deletions
diff --git a/‎R/epiprocess.R‎
Lines changed: 1 addition & 0 deletions b/‎R/epiprocess.R‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/grouped_epi_archive.R‎
Lines changed: 16 additions & 2 deletions b/‎R/grouped_epi_archive.R‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎R/methods-epi_archive.R‎
Lines changed: 4 additions & 1 deletion b/‎R/methods-epi_archive.R‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎R/slide.R‎
Lines changed: 71 additions & 10 deletions b/‎R/slide.R‎
Lines changed: 71 additions & 10 deletions
diff --git a/‎man/epi_slide.Rd‎
Lines changed: 6 additions & 3 deletions b/‎man/epi_slide.Rd‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎man/epix_slide.Rd‎
Lines changed: 6 additions & 3 deletions b/‎man/epix_slide.Rd‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎man/reexports.Rd‎
Lines changed: 1 addition & 1 deletion b/‎man/reexports.Rd‎
Lines changed: 1 addition & 1 deletion
@@ -69,13 +69,15 @@ importFrom(data.table,key)
 importFrom(data.table,set)
 importFrom(data.table,setkeyv)
 importFrom(dplyr,arrange)
+importFrom(dplyr,bind_rows)
 importFrom(dplyr,dplyr_col_modify)
 importFrom(dplyr,dplyr_reconstruct)
 importFrom(dplyr,dplyr_row_slice)
 importFrom(dplyr,filter)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_by_drop_default)
 importFrom(dplyr,group_modify)
+importFrom(dplyr,group_vars)
 importFrom(dplyr,groups)
 importFrom(dplyr,mutate)
 importFrom(dplyr,relocate)
@@ -97,6 +99,7 @@ importFrom(rlang,caller_env)
 importFrom(rlang,check_dots_empty0)
 importFrom(rlang,enquo)
 importFrom(rlang,enquos)
+importFrom(rlang,env)
 importFrom(rlang,f_env)
 importFrom(rlang,f_rhs)
 importFrom(rlang,global_env)
 
@@ -6,16 +6,31 @@ inter-release development versions will include an additional ".9999" suffix.
 
 ## Breaking changes:
 
-* Changes to `epix_slide`:
-  * The `f` computation is now required to take at least three arguments. `f`
-    must take an `epi_df` with the same column names as the archive's `DT`,
+* Changes to `epi_slide` and `epix_slide`:
+  * If `f` is a function, it is now required to take at least three arguments.
+    `f` must take an `epi_df` with the same column names as the archive's `DT`,
     minus the `version` column; followed by a one-row tibble containing the
     values of the grouping variables for the associated group; followed by a
-    reference time value, usually as a `Date` object; followed by any number
-    of named arguments.
+    reference time value, usually as a `Date` object. Optionally, it can take
+    any number of additional arguments after that, and forward values for those
+    arguments through `epi[x]_slide`'s `...` args.
+    * To make your existing slide computations work, add a third argument to
+      your `f` function to accept this new input: e.g., change `f = function(x,
+      g, <any other arguments>) { <body> }` to `f = function(x, g, rt, <any
+      other arguments>) { <body> }`.
 
 ## New features:
 
+* `epi_slide` and `epix_slide` also make the window data, group key and reference
+  time value available to slide computations specified as formulas or tidy
+  evaluation expressions, in additional or completely new ways.
+  * If `f` is a formula, it can now access the reference time value via `.z` or
+    `.ref_time_value`.
+  * If `f` is missing, the tidy evaluation expression in `...` can now refer to
+    the window data as an `epi_df` or `tibble` with `.x`, the group key with
+    `.group_key`, and the reference time value with `.ref_time_value`. The usual
+    `.data` and `.env` pronouns also work, but`pick()` and `cur_data()` are not;
+    work off of `.x` instead.
 * `epix_slide` has been made more like `dplyr::group_modify`. It will no longer
   perform element/row recycling for size stability, accepts slide computation
   outputs containing any number of rows, and no longer supports `all_rows`.
@@ -29,11 +44,6 @@ inter-release development versions will include an additional ".9999" suffix.
   more closely whether/when/how to output an `epi_df`.
   * To keep the old behavior, convert the output of `epix_slide()` to `epi_df`
     when desired and set the metadata appropriately.
-* `epix_slide` `f` computations passed as functions or formulas now have
-  access to the reference time value. If `f` is a function, it is passed a
-  Date containing the reference time value as the third argument. If a
-  formula, `f` can access the reference time value via `.z` or
-  `.ref_time_value`.
 
 ## Improvements:
 
 
@@ -7,3 +7,4 @@
 #' @docType package
 #' @name epiprocess
 NULL
+utils::globalVariables(c(".x", ".group_key", ".ref_time_value"))
@@ -186,7 +186,7 @@ grouped_epi_archive =
 #'   object. See the documentation for the wrapper function [`epix_slide()`] for
 #'   details.
 #' @importFrom data.table key address
-#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
+#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms env
           slide = function(f, ..., before, ref_time_values,
                            time_step, new_col_name = "slide_value",
                            as_list_col = FALSE, names_sep = "_",
@@ -370,7 +370,21 @@ grouped_epi_archive =
               }
 
               quo = quos[[1]]
-              f = function(x, quo, ...) rlang::eval_tidy(quo, x)
+              f = function(.x, .group_key, .ref_time_value, quo, ...) {
+                # Convert to environment to standardize between tibble and R6
+                # based inputs. In both cases, we should get a simple
+                # environment with the empty environment as its parent.
+                data_env = rlang::as_environment(.x)
+                data_mask = rlang::new_data_mask(bottom = data_env, top = data_env)
+                data_mask$.data <- rlang::as_data_pronoun(data_mask)
+                # We'll also install `.x` directly, not as an
+                # `rlang_data_pronoun`, so that we can, e.g., use more dplyr and
+                # epiprocess operations.
+                data_mask$.x = .x
+                data_mask$.group_key = .group_key
+                data_mask$.ref_time_value = .ref_time_value
+                rlang::eval_tidy(quo, data_mask)
+              }
               new_col = sym(names(rlang::quos_auto_name(quos)))
 
               x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
 
@@ -678,7 +678,10 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #'   computation.
 #' @param ... Additional arguments to pass to the function or formula specified
 #'   via `f`. Alternatively, if `f` is missing, then `...` is interpreted as an
-#'   expression for tidy evaluation. See details of [`epi_slide`].
+#'   expression for tidy evaluation; in addition to referring to columns
+#'   directly by name, the expression has access to `.data` and `.env` pronouns
+#'   as in `dplyr` verbs, and can also refer to the `.group_key` and
+#'   `.ref_time_value`. See details of [`epi_slide`].
 #' @param before How far `before` each `ref_time_value` should the sliding
 #'   window extend? If provided, should be a single, non-NA,
 #'   [integer-compatible][vctrs::vec_cast] number of time steps. This window
 
@@ -23,7 +23,10 @@
 #'   If `f` is missing, then `...` will specify the computation.
 #' @param ... Additional arguments to pass to the function or formula specified
 #'   via `f`. Alternatively, if `f` is missing, then the `...` is interpreted as
-#'   an expression for tidy evaluation. See details.
+#'   an expression for tidy evaluation; in addition to referring to columns
+#'   directly by name, the expression has access to `.data` and `.env` pronouns
+#'   as in `dplyr` verbs, and can also refer to `.x`, `.group_key`, and
+#'   `.ref_time_value`. See details.
 #' @param before,after How far `before` and `after` each `ref_time_value` should
 #'   the sliding window extend? At least one of these two arguments must be
 #'   provided; the other's default will be 0. Any value provided for either
@@ -119,7 +122,8 @@
 #'   through the `new_col_name` argument.
 #'   
 #' @importFrom lubridate days weeks
-#' @importFrom rlang .data .env !! enquo enquos sym
+#' @importFrom dplyr bind_rows group_vars filter select
+#' @importFrom rlang .data .env !! enquo enquos sym env
 #' @export
 #' @examples 
 #' # slide a 7-day trailing average formula on cases
@@ -166,11 +170,8 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
 
   # Check that `f` takes enough args
   if (!missing(f) && is.function(f)) {
-    assert_sufficient_f_args(f, ...)
+    assert_sufficient_f_args(f, ..., n_mandatory_f_args = 3L)
   }
-
-  # Arrange by increasing time_value
-  x = arrange(x, time_value)
 
   if (missing(ref_time_values)) {
     ref_time_values = unique(x$time_value)
@@ -231,6 +232,35 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
     after <- time_step(after)
   }
 
+  min_ref_time_values = ref_time_values - before
+  min_ref_time_values_not_in_x <- min_ref_time_values[!(min_ref_time_values %in% unique(x$time_value))]
+
+  # Do set up to let us recover `ref_time_value`s later.
+  # A helper column marking real observations.
+  x$.real = TRUE
+
+  # Create df containing phony data. Df has the same columns and attributes as
+  # `x`, but filled with `NA`s aside from grouping columns. Number of rows is
+  # equal to the number of `min_ref_time_values_not_in_x` we have * the
+  # number of unique levels seen in the grouping columns.
+  before_time_values_df = data.frame(time_value=min_ref_time_values_not_in_x)
+  if (length(group_vars(x)) != 0) {
+    before_time_values_df = dplyr::cross_join(
+      # Get unique combinations of grouping columns seen in real data.
+      unique(x[, group_vars(x)]),
+      before_time_values_df
+    )
+  }
+  # Automatically fill in all other columns from `x` with `NA`s, and carry
+  # attributes over to new df.
+  before_time_values_df <- bind_rows(x[0,], before_time_values_df)
+  before_time_values_df$.real <- FALSE
+
+  x <- bind_rows(before_time_values_df, x)
+
+  # Arrange by increasing time_value
+  x = arrange(x, time_value)
+
   # Now set up starts and stops for sliding/hopping
   time_range = range(unique(x$time_value))
   starts = in_range(ref_time_values - before, time_range)
@@ -272,7 +302,9 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
     o = .data_group$time_value %in% time_values
     num_ref_rows = sum(o)
 
-    # Count the number of appearances of each reference time value
+    # Count the number of appearances of each reference time value (these
+    # appearances should all be real for now, but if we allow ref time values
+    # outside of .data_group's time values):
     counts = .data_group %>%
       dplyr::filter(.data$time_value %in% time_values) %>%
       dplyr::count(.data$time_value) %>%
@@ -282,7 +314,7 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
           !all(purrr::map_lgl(slide_values_list, is.data.frame))) {
       Abort("The slide computations must return always atomic vectors or data frames (and not a mix of these two structures).")
     }
-    
+
     # Unlist if appropriate:
     slide_values =
       if (as_list_col) {
@@ -318,16 +350,24 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
       # fills with NA equivalent.
       vctrs::vec_slice(slide_values, o) = orig_values
     } else {
+      # This implicitly removes phony (`.real` == FALSE) observations.
       .data_group = filter(.data_group, o)
     }
     return(mutate(.data_group, !!new_col := slide_values))
   }
 
   # If f is not missing, then just go ahead, slide by group
   if (!missing(f)) {
+    if (rlang::is_formula(f)) f = as_slide_computation(f)
+    f_rtv_wrapper = function(x, g, ...) {
+      ref_time_value = min(x$time_value) + before
+      x <- x[x$.real,]
+      x$.real <- NULL
+      f(x, g, ref_time_value, ...)
+    }
     x = x %>%  
       group_modify(slide_one_grp,
-                   f = f, ...,
+                   f = f_rtv_wrapper, ...,
                    starts = starts,
                    stops = stops,
                    time_values = ref_time_values, 
@@ -347,7 +387,18 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
     }
 
     quo = quos[[1]]
-    f = function(x, quo, ...) rlang::eval_tidy(quo, x)
+    f = function(.x, .group_key, quo, ...) {
+      .ref_time_value = min(.x$time_value) + before
+      .x <- .x[.x$.real,]
+      .x$.real <- NULL
+      data_mask = rlang::as_data_mask(.x)
+      # We'll also install `.x` directly, not as an `rlang_data_pronoun`, so
+      # that we can, e.g., use more dplyr and epiprocess operations.
+      data_mask$.x = .x
+      data_mask$.group_key = .group_key
+      data_mask$.ref_time_value = .ref_time_value
+      rlang::eval_tidy(quo, data_mask)
+    }
     new_col = sym(names(rlang::quos_auto_name(quos)))
 
     x = x %>%  
@@ -365,5 +416,15 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
   if (!as_list_col) {
     x = unnest(x, !!new_col, names_sep = names_sep)
   }
+
+  # Remove any remaining phony observations. When `all_rows` is TRUE, phony
+  # observations aren't necessarily removed in `slide_one_grp`.
+  if (all_rows) {
+    x <- x[x$.real,]
+  }
+
+  # Drop helper column `.real`.
+  x$.real <- NULL
+
   return(x)
 }