@@ -504,7 +504,29 @@ epix_detailed_restricted_mutate = function(.data, ...) {
504504
505505# ' `group_by` and related methods for `epi_archive`, `grouped_epi_archive`
506506# '
507- # ' @aliases grouped_epi_archive
507+ # ' @param .data An `epi_archive` or `grouped_epi_archive`
508+ # ' @param ... Similar to [`dplyr::group_by`] (see "Details:" for edge cases);
509+ # ' * In `group_by`: unquoted variable name(s) or other ["data
510+ # ' masking"][dplyr::dplyr_data_masking] expression(s). It's possible to use
511+ # ' [`dplyr::mutate`]-like syntax here to calculate new columns on which to
512+ # ' perform grouping, but note that, if you are regrouping an already-grouped
513+ # ' `.data` object, the calculations will be carried out ignoring such grouping
514+ # ' (same as [in \code{dplyr}][dplyr::group_by]).
515+ # ' * In `ungroup`: either
516+ # ' * empty, in order to remove the grouping and output an `epi_archive`; or
517+ # ' * variable name(s) or other ["tidy-select"][dplyr::dplyr_tidy_select]
518+ # ' expression(s), in order to remove the matching variables from the list of
519+ # ' grouping variables, and output another `grouped_epi_archive`.
520+ # ' @param .add Boolean. If `FALSE`, the default, the output will be grouped by
521+ # ' the variable selection from `...` only; if `TRUE`, the output will be
522+ # ' grouped by the current grouping variables plus the variable selection from
523+ # ' `...`.
524+ # ' @param .drop As in [`dplyr::group_by`]; determines treatment of factor
525+ # ' columns.
526+ # ' @param x a `grouped_epi_archive`, or, in `is_grouped_epi_archive`, any object
527+ # ' @param .tbl An `epi_archive` or `grouped_epi_archive` (`epi_archive`
528+ # ' dispatches to the S3 default method, and `grouped_epi_archive` dispatches
529+ # ' its own S3 method)
508530# '
509531# ' @details
510532# '
@@ -513,6 +535,16 @@ epix_detailed_restricted_mutate = function(.data, ...) {
513535# ' to `mutate`. Note that replacing or removing key columns with these
514536# ' expressions is disabled.
515537# '
538+ # ' `archive %>% group_by()` and other expressions that group or regroup by zero
539+ # ' columns (indicating that all rows should be treated as part of one large
540+ # ' group) will output a `grouped_epi_archive`, in order to enable the use of
541+ # ' `grouped_epi_archive` methods on the result. This is in slight contrast to
542+ # ' the same operations on tibbles and grouped tibbles, which will *not* output a
543+ # ' `grouped_df` in these circumstances.
544+ # '
545+ # ' Using `group_by` with `.add=FALSE` to override the existing grouping is
546+ # ' disabled; instead, `ungroup` first then `group_by`.
547+ # '
516548# ' Mutation and aliasing: `group_by` tries to use a shallow copy of the `DT`,
517549# ' introducing column-level aliasing between its input and its result. This
518550# ' doesn't follow the general model for most `data.table` operations, which
@@ -528,8 +560,72 @@ epix_detailed_restricted_mutate = function(.data, ...) {
528560# ' to `group_by_drop_default.default` (but there is a dedicated method for
529561# ' `grouped_epi_archive`s).
530562# '
563+ # ' @examples
564+ # '
565+ # ' grouped_archive = archive_cases_dv_subset %>% group_by(geo_value)
566+ # '
567+ # ' # `print` for metadata and method listing:
568+ # ' grouped_archive %>% print()
569+ # '
570+ # ' # The primary use for grouping is to perform a grouped `epix_slide`:
571+ # '
572+ # ' archive_cases_dv_subset %>%
573+ # ' group_by(geo_value) %>%
574+ # ' epix_slide(f = ~ mean(.x$case_rate_7d_av),
575+ # ' before = 2,
576+ # ' ref_time_values = as.Date("2020-06-11") + 0:2,
577+ # ' new_col_name = 'case_rate_3d_av') %>%
578+ # ' ungroup()
579+ # '
580+ # ' # -----------------------------------------------------------------
581+ # '
582+ # ' # Advanced: some other features of dplyr grouping are implemented:
583+ # '
584+ # ' library(dplyr)
585+ # ' toy_archive =
586+ # ' tribble(
587+ # ' ~geo_value, ~age_group, ~time_value, ~version, ~value,
588+ # ' "us", "adult", "2000-01-01", "2000-01-02", 121,
589+ # ' "us", "pediatric", "2000-01-02", "2000-01-03", 5, # (addition)
590+ # ' "us", "adult", "2000-01-01", "2000-01-03", 125, # (revision)
591+ # ' "us", "adult", "2000-01-02", "2000-01-03", 130 # (addition)
592+ # ' ) %>%
593+ # ' mutate(age_group = ordered(age_group, c("pediatric", "adult")),
594+ # ' time_value = as.Date(time_value),
595+ # ' version = as.Date(version)) %>%
596+ # ' as_epi_archive(other_keys = "age_group")
597+ # '
598+ # ' # The following are equivalent:
599+ # ' toy_archive %>% group_by(geo_value, age_group)
600+ # ' toy_archive %>% group_by(geo_value) %>% group_by(age_group, .add=TRUE)
601+ # ' grouping_cols = c("geo_value", "age_group")
602+ # ' toy_archive %>% group_by(across(all_of(grouping_cols)))
603+ # '
604+ # ' # And these are equivalent:
605+ # ' toy_archive %>% group_by(geo_value)
606+ # ' toy_archive %>% group_by(geo_value, age_group) %>% ungroup(age_group)
607+ # '
608+ # ' # To get the grouping variable names as a `list` of `name`s (a.k.a. symbols):
609+ # ' toy_archive %>% group_by(geo_value) %>% groups()
610+ # '
611+ # ' # `.drop = FALSE` is supported in a sense; `f` is called on 0-row inputs for
612+ # ' # the missing groups identified by `dplyr`, but the row-recycling rules will
613+ # ' # exclude the corresponding outputs of `f` from the output of the slide:
614+ # ' all.equal(
615+ # ' toy_archive %>%
616+ # ' group_by(geo_value, age_group, .drop=FALSE) %>%
617+ # ' epix_slide(f = ~ sum(.x$value), before = 20) %>%
618+ # ' ungroup(),
619+ # ' toy_archive %>%
620+ # ' group_by(geo_value, age_group, .drop=TRUE) %>%
621+ # ' epix_slide(f = ~ sum(.x$value), before = 20) %>%
622+ # ' ungroup()
623+ # ' )
624+ # '
531625# ' @importFrom dplyr group_by
532626# ' @export
627+ # '
628+ # ' @aliases grouped_epi_archive
533629group_by.epi_archive = function (.data , ... , .add = FALSE , .drop = dplyr :: group_by_drop_default(.data )) {
534630 # `add` makes no difference; this is an ungrouped `epi_archive`.
535631 detailed_mutate = epix_detailed_restricted_mutate(.data , ... )
@@ -693,8 +789,8 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
693789# ' epix_slide(f = ~ mean(.x$case_rate_7d_av),
694790# ' before = 2,
695791# ' ref_time_values = ref_time_values,
696- # ' new_col_name = 'case_rate_3d_av',
697- # ' groups = "drop" )
792+ # ' new_col_name = 'case_rate_3d_av') %>%
793+ # ' ungroup( )
698794# '
699795# ' @importFrom rlang enquo
700796# ' @export
0 commit comments