Skip to content

Commit ae02a87

Browse files
committed
Add some epi_archive group_by-related docs and examples
1 parent 55e14eb commit ae02a87

File tree

5 files changed

+260
-26
lines changed

5 files changed

+260
-26
lines changed

DESCRIPTION

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,18 @@ RoxygenNote: 7.2.1
6464
Depends:
6565
R (>= 2.10)
6666
URL: https://cmu-delphi.github.io/epiprocess/
67+
Collate:
68+
'archive.R'
69+
'correlation.R'
70+
'data.R'
71+
'epi_df.R'
72+
'epiprocess.R'
73+
'methods-epi_archive.R'
74+
'grouped_epi_archive.R'
75+
'growth_rate.R'
76+
'methods-epi_df.R'
77+
'outliers.R'
78+
'reexports.R'
79+
'slide.R'
80+
'utils.R'
81+
'utils_pipe.R'

R/grouped_epi_archive.R

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,6 @@ nse_dots_names2 = function(...) {
3838
rlang::names2(rlang::call_match())
3939
}
4040

41-
#' @rdname group_by.epi_archive
42-
#'
43-
#' @export
44-
group_by_drop_default.grouped_epi_archive = function(.tbl) {
45-
.tbl$group_by_drop_default()
46-
}
47-
4841
#' @importFrom dplyr group_by_drop_default
4942
#' @noRd
5043
grouped_epi_archive =
@@ -132,9 +125,6 @@ grouped_epi_archive =
132125
# Return self invisibly for convenience in `$`-"pipe":
133126
invisible(self)
134127
},
135-
group_by_drop_default = function() {
136-
private$drop
137-
},
138128
group_by = function(..., .add = FALSE, .drop = dplyr::group_by_drop_default(self)) {
139129
if (!rlang::is_bool(.add)) {
140130
Abort("`.add` must be a Boolean")
@@ -156,6 +146,9 @@ grouped_epi_archive =
156146
grouped_epi_archive$new(private$ungrouped, vars, .drop)
157147
}
158148
},
149+
group_by_drop_default = function() {
150+
private$drop
151+
},
159152
groups = function() {
160153
rlang::syms(private$vars)
161154
},
@@ -274,10 +267,11 @@ grouped_epi_archive =
274267
if (length(comp_effective_key_vars) != 0L) {
275268
sum(!duplicated(.data_group[, comp_effective_key_vars]))
276269
} else {
277-
# Same idea as above, but accounting for `duplicated` not
278-
# working as we want on 0 columns. (Should be the same as if
279-
# we were counting distinct values of a column defined as
280-
# `rep(val, target_n_rows)`.)
270+
# Same idea as above, but accounting for `duplicated` working
271+
# differently (outputting `logical(0)`) on 0-column inputs
272+
# rather than matching the number of rows. (Instead, we use
273+
# the same count we would get if we were counting distinct
274+
# values of a column defined as `rep(val, target_n_rows)`.)
281275
if (nrow(.data_group) == 0L) {
282276
0L
283277
} else {
@@ -380,6 +374,13 @@ grouped_epi_archive =
380374
)
381375
)
382376

377+
# At time of writing, roxygen parses content in collation order, impacting the
378+
# presentation of .Rd files that document multiple functions (see
379+
# https://github.com/r-lib/roxygen2/pull/324). Use @include tags (determining
380+
# `Collate:`) and ordering of functions within each file in order to get the
381+
# desired ordering.
382+
383+
#' @include methods-epi_archive.R
383384
#' @rdname group_by.epi_archive
384385
#'
385386
#' @importFrom dplyr group_by
@@ -388,6 +389,7 @@ group_by.grouped_epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::gro
388389
.data$group_by(..., .add=.add, .drop=.drop)
389390
}
390391

392+
#' @include methods-epi_archive.R
391393
#' @rdname group_by.epi_archive
392394
#'
393395
#' @importFrom dplyr groups
@@ -396,6 +398,7 @@ groups.grouped_epi_archive = function(x) {
396398
x$groups()
397399
}
398400

401+
#' @include methods-epi_archive.R
399402
#' @rdname group_by.epi_archive
400403
#'
401404
#' @importFrom dplyr ungroup
@@ -404,7 +407,18 @@ ungroup.grouped_epi_archive = function(x, ...) {
404407
x$ungroup(...)
405408
}
406409

410+
#' @include methods-epi_archive.R
411+
#' @rdname group_by.epi_archive
412+
#'
407413
#' @export
408414
is_grouped_epi_archive = function(x) {
409415
inherits(x, "grouped_epi_archive")
410416
}
417+
418+
#' @include methods-epi_archive.R
419+
#' @rdname group_by.epi_archive
420+
#'
421+
#' @export
422+
group_by_drop_default.grouped_epi_archive = function(.tbl) {
423+
.tbl$group_by_drop_default()
424+
}

R/methods-epi_archive.R

Lines changed: 99 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,29 @@ epix_detailed_restricted_mutate = function(.data, ...) {
504504

505505
#' `group_by` and related methods for `epi_archive`, `grouped_epi_archive`
506506
#'
507-
#' @aliases grouped_epi_archive
507+
#' @param .data An `epi_archive` or `grouped_epi_archive`
508+
#' @param ... Similar to [`dplyr::group_by`] (see "Details:" for edge cases);
509+
#' * In `group_by`: unquoted variable name(s) or other ["data
510+
#' masking"][dplyr::dplyr_data_masking] expression(s). It's possible to use
511+
#' [`dplyr::mutate`]-like syntax here to calculate new columns on which to
512+
#' perform grouping, but note that, if you are regrouping an already-grouped
513+
#' `.data` object, the calculations will be carried out ignoring such grouping
514+
#' (same as [in \code{dplyr}][dplyr::group_by]).
515+
#' * In `ungroup`: either
516+
#' * empty, in order to remove the grouping and output an `epi_archive`; or
517+
#' * variable name(s) or other ["tidy-select"][dplyr::dplyr_tidy_select]
518+
#' expression(s), in order to remove the matching variables from the list of
519+
#' grouping variables, and output another `grouped_epi_archive`.
520+
#' @param .add Boolean. If `FALSE`, the default, the output will be grouped by
521+
#' the variable selection from `...` only; if `TRUE`, the output will be
522+
#' grouped by the current grouping variables plus the variable selection from
523+
#' `...`.
524+
#' @param .drop As in [`dplyr::group_by`]; determines treatment of factor
525+
#' columns.
526+
#' @param x a `grouped_epi_archive`, or, in `is_grouped_epi_archive`, any object
527+
#' @param .tbl An `epi_archive` or `grouped_epi_archive` (`epi_archive`
528+
#' dispatches to the S3 default method, and `grouped_epi_archive` dispatches
529+
#' its own S3 method)
508530
#'
509531
#' @details
510532
#'
@@ -513,6 +535,16 @@ epix_detailed_restricted_mutate = function(.data, ...) {
513535
#' to `mutate`. Note that replacing or removing key columns with these
514536
#' expressions is disabled.
515537
#'
538+
#' `archive %>% group_by()` and other expressions that group or regroup by zero
539+
#' columns (indicating that all rows should be treated as part of one large
540+
#' group) will output a `grouped_epi_archive`, in order to enable the use of
541+
#' `grouped_epi_archive` methods on the result. This is in slight contrast to
542+
#' the same operations on tibbles and grouped tibbles, which will *not* output a
543+
#' `grouped_df` in these circumstances.
544+
#'
545+
#' Using `group_by` with `.add=FALSE` to override the existing grouping is
546+
#' disabled; instead, `ungroup` first then `group_by`.
547+
#'
516548
#' Mutation and aliasing: `group_by` tries to use a shallow copy of the `DT`,
517549
#' introducing column-level aliasing between its input and its result. This
518550
#' doesn't follow the general model for most `data.table` operations, which
@@ -528,8 +560,72 @@ epix_detailed_restricted_mutate = function(.data, ...) {
528560
#' to `group_by_drop_default.default` (but there is a dedicated method for
529561
#' `grouped_epi_archive`s).
530562
#'
563+
#' @examples
564+
#'
565+
#' grouped_archive = archive_cases_dv_subset %>% group_by(geo_value)
566+
#'
567+
#' # `print` for metadata and method listing:
568+
#' grouped_archive %>% print()
569+
#'
570+
#' # The primary use for grouping is to perform a grouped `epix_slide`:
571+
#'
572+
#' archive_cases_dv_subset %>%
573+
#' group_by(geo_value) %>%
574+
#' epix_slide(f = ~ mean(.x$case_rate_7d_av),
575+
#' before = 2,
576+
#' ref_time_values = as.Date("2020-06-11") + 0:2,
577+
#' new_col_name = 'case_rate_3d_av') %>%
578+
#' ungroup()
579+
#'
580+
#' # -----------------------------------------------------------------
581+
#'
582+
#' # Advanced: some other features of dplyr grouping are implemented:
583+
#'
584+
#' library(dplyr)
585+
#' toy_archive =
586+
#' tribble(
587+
#' ~geo_value, ~age_group, ~time_value, ~version, ~value,
588+
#' "us", "adult", "2000-01-01", "2000-01-02", 121,
589+
#' "us", "pediatric", "2000-01-02", "2000-01-03", 5, # (addition)
590+
#' "us", "adult", "2000-01-01", "2000-01-03", 125, # (revision)
591+
#' "us", "adult", "2000-01-02", "2000-01-03", 130 # (addition)
592+
#' ) %>%
593+
#' mutate(age_group = ordered(age_group, c("pediatric", "adult")),
594+
#' time_value = as.Date(time_value),
595+
#' version = as.Date(version)) %>%
596+
#' as_epi_archive(other_keys = "age_group")
597+
#'
598+
#' # The following are equivalent:
599+
#' toy_archive %>% group_by(geo_value, age_group)
600+
#' toy_archive %>% group_by(geo_value) %>% group_by(age_group, .add=TRUE)
601+
#' grouping_cols = c("geo_value", "age_group")
602+
#' toy_archive %>% group_by(across(all_of(grouping_cols)))
603+
#'
604+
#' # And these are equivalent:
605+
#' toy_archive %>% group_by(geo_value)
606+
#' toy_archive %>% group_by(geo_value, age_group) %>% ungroup(age_group)
607+
#'
608+
#' # To get the grouping variable names as a `list` of `name`s (a.k.a. symbols):
609+
#' toy_archive %>% group_by(geo_value) %>% groups()
610+
#'
611+
#' # `.drop = FALSE` is supported in a sense; `f` is called on 0-row inputs for
612+
#' # the missing groups identified by `dplyr`, but the row-recycling rules will
613+
#' # exclude the corresponding outputs of `f` from the output of the slide:
614+
#' all.equal(
615+
#' toy_archive %>%
616+
#' group_by(geo_value, age_group, .drop=FALSE) %>%
617+
#' epix_slide(f = ~ sum(.x$value), before = 20) %>%
618+
#' ungroup(),
619+
#' toy_archive %>%
620+
#' group_by(geo_value, age_group, .drop=TRUE) %>%
621+
#' epix_slide(f = ~ sum(.x$value), before = 20) %>%
622+
#' ungroup()
623+
#' )
624+
#'
531625
#' @importFrom dplyr group_by
532626
#' @export
627+
#'
628+
#' @aliases grouped_epi_archive
533629
group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_drop_default(.data)) {
534630
# `add` makes no difference; this is an ungrouped `epi_archive`.
535631
detailed_mutate = epix_detailed_restricted_mutate(.data, ...)
@@ -693,8 +789,8 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
693789
#' epix_slide(f = ~ mean(.x$case_rate_7d_av),
694790
#' before = 2,
695791
#' ref_time_values = ref_time_values,
696-
#' new_col_name = 'case_rate_3d_av',
697-
#' groups = "drop")
792+
#' new_col_name = 'case_rate_3d_av') %>%
793+
#' ungroup()
698794
#'
699795
#' @importFrom rlang enquo
700796
#' @export

man/epix_slide.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)