Skip to content

Commit f94030a

Browse files
committed
perf: avoid full edf group reconstruction when grouping is known valid
1 parent 64cf7d3 commit f94030a

File tree

2 files changed

+63
-13
lines changed

2 files changed

+63
-13
lines changed

R/methods-epi_df.R

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,30 @@ dplyr_reconstruct.epi_df <- function(data, template) {
164164
# keep any grouping that has been applied:
165165
res <- NextMethod()
166166

167-
col_names <- names(res)
167+
reconstruct_light_edf(res, template)
168+
}
169+
170+
#' Like `dplyr_reconstruct.epi_df` but not recomputing any grouping
171+
#'
172+
#' In the move to our current not-quite-proper/effective "implementation" of
173+
#' [`dplyr::dplyr_extending`] for `epi_df`s, we moved a lot of checks in
174+
#' `dplyr_reconstruct` and used it instead of [`reclass()`] in various
175+
#' operations to prevent operations from outputting invalid metadata/classes,
176+
#' instead of more careful tailored and relevant checks. However, this actually
177+
#' introduced extra overhead due to `dplyr_reconstruct.epi_df()` passing off to
178+
#' `dplyr_reconstruct.grouped_df()` when grouped, which assumes that it will
179+
#' need to / should for safety recompute the groups, even when it'd be safe for
180+
#' it not to do so. In many operations, we're using `NextMethod()` to dispatch
181+
#' to `grouped_df` behavior if needed, and it should output something with valid
182+
#' groupings.
183+
#'
184+
#' This function serves the original purpose of performing `epi_df`-centric
185+
#' checks rather than just throwing on potentially-incorrect metadata like
186+
#' `reclass()`, but without unnecessary `dplyr_reconstruct()` delegation.
187+
#'
188+
#' @keywords internal
189+
reconstruct_light_edf <- function(data, template) {
190+
col_names <- names(data)
168191

169192
# Duplicate columns, cli_abort
170193
dup_col_names <- col_names[duplicated(col_names)]
@@ -182,23 +205,23 @@ dplyr_reconstruct.epi_df <- function(data, template) {
182205
if (not_epi_df) {
183206
# If we're calling on an `epi_df` from one of our own functions, we need to
184207
# decay to a non-`epi_df` result. If `dplyr` is calling, `x` is a tibble,
185-
# `res` is not an `epi_df` yet (but might, e.g., be a `grouped_df`), and we
208+
# `data` is not an `epi_df` yet (but might, e.g., be a `grouped_df`), and we
186209
# simply need to skip adding the metadata & class. Current `decay_epi_df`
187210
# should work in both cases.
188-
return(decay_epi_df(res))
211+
return(decay_epi_df(data))
189212
}
190213

191-
res <- reclass(res, attr(template, "metadata"))
214+
data <- reclass(data, attr(template, "metadata"))
192215

193216
# XXX we may want verify the `geo_type` and `time_type` here. If it's
194217
# significant overhead, we may also want to keep this less strict version
195218
# around and implement some extra S3 methods that use it, when appropriate.
196219

197220
# Amend additional metadata if some other_keys cols are dropped in the subset
198221
old_other_keys <- attr(template, "metadata")$other_keys
199-
attr(res, "metadata")$other_keys <- old_other_keys[old_other_keys %in% col_names]
222+
attr(data, "metadata")$other_keys <- old_other_keys[old_other_keys %in% col_names]
200223

201-
res
224+
data
202225
}
203226

204227
#' @export
@@ -209,19 +232,19 @@ dplyr_reconstruct.epi_df <- function(data, template) {
209232
return(res)
210233
}
211234

212-
dplyr::dplyr_reconstruct(res, x)
235+
reconstruct_light_edf(res, x)
213236
}
214237

215238
#' @importFrom dplyr dplyr_col_modify
216239
#' @export
217240
dplyr_col_modify.epi_df <- function(data, cols) {
218-
dplyr::dplyr_reconstruct(NextMethod(), data)
241+
reconstruct_light_edf(NextMethod(), data)
219242
}
220243

221244
#' @importFrom dplyr dplyr_row_slice
222245
#' @export
223246
dplyr_row_slice.epi_df <- function(data, i, ...) {
224-
dplyr::dplyr_reconstruct(NextMethod(), data)
247+
reconstruct_light_edf(NextMethod(), data)
225248
}
226249

227250
#' @export
@@ -235,7 +258,7 @@ dplyr_row_slice.epi_df <- function(data, i, ...) {
235258
new_metadata[["other_keys"]] <- new_other_keys
236259
}
237260
result <- reclass(NextMethod(), new_metadata)
238-
dplyr::dplyr_reconstruct(result, result)
261+
reconstruct_light_edf(result, result)
239262
}
240263

241264
#' @method group_by epi_df
@@ -264,7 +287,7 @@ ungroup.epi_df <- function(x, ...) {
264287
#' @param .keep Boolean; see [`dplyr::group_modify`]
265288
#' @export
266289
group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
267-
dplyr::dplyr_reconstruct(NextMethod(), .data)
290+
reconstruct_light_edf(NextMethod(), .data)
268291
}
269292

270293
#' "Complete" an `epi_df`, adding missing rows and/or replacing `NA`s
@@ -344,7 +367,7 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
344367
#' )
345368
#' @export
346369
complete.epi_df <- function(data, ..., fill = list(), explicit = TRUE) {
347-
result <- dplyr::dplyr_reconstruct(NextMethod(), data)
370+
result <- reconstruct_light_edf(NextMethod(), data)
348371
if ("time_value" %in% names(rlang::call_match(dots_expand = FALSE)[["..."]])) {
349372
attr(result, "metadata")$time_type <- guess_time_type(result$time_value)
350373
}
@@ -356,7 +379,7 @@ complete.epi_df <- function(data, ..., fill = list(), explicit = TRUE) {
356379
#' @param data an `epi_df`
357380
#' @export
358381
unnest.epi_df <- function(data, ...) {
359-
dplyr::dplyr_reconstruct(NextMethod(), data)
382+
reconstruct_light_edf(NextMethod(), data)
360383
}
361384

362385
# Simple reclass function

man/reconstruct_light_edf.Rd

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)