From f391498fe0a5e1da2d1793e6b0683ca5861bc389 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Tue, 28 Oct 2025 13:02:58 -0400 Subject: [PATCH 1/9] Add Claude Code files --- .Rbuildignore | 2 + .claude/settings.local.json | 14 +++++++ CLAUDE.md | 75 +++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 .claude/settings.local.json create mode 100644 CLAUDE.md diff --git a/.Rbuildignore b/.Rbuildignore index 1606fb44c2..566845bf86 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -44,3 +44,5 @@ ^\.cache$ ^\.vscode$ ^[\.]?air\.toml$ +^\.claude$ +^CLAUDE\.md$ diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000000..6965e66ce4 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,14 @@ +{ + "$schema": "https://json.schemastore.org/claude-code-settings.json", + "permissions": { + "allow": [ + "Bash(find:*)", + "Bash(R:*)", + "Bash(air format:*)", + "Edit(R/**)", + "Edit(tests/**)", + "Edit(vignettes/**)" + ], + "deny": [] + } +} diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000000..b3def8fe79 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,75 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Package Overview + +dplyr is a tidyverse R package providing a grammar of data manipulation. + +## Key development commands + +### General + +- When running R from the console, always run it with `--quiet --vanilla` +- Always run `air format .` after generating code + +### Testing + +- Tests for `R/{name}.R` go in `tests/testthat/test-{name}.R` +- Use `devtools::test(reporter = "check")` to run all tests +- Use `devtools::test(filter = "{name}", reporter = "check")` to run tests for `R/{name}.R` +- DO NOT USE `devtools::test_active_file()` +- All testing functions automatically load code; you don't need to +- All new code should have an accompanying test +- If there are existing tests, place new tests next to similar existing tests + +### Documentation + +- Run `devtools::document()` after changing any roxygen2 docs +- Every user facing function should be exported and have roxygen2 documentation +- Whenever you add a new documentation file, make sure to also add the topic name to `_pkgdown.yml` +- Run `pkgdown::check_pkgdown()` to check that all topics are included in the reference index +- Use sentence case for all headings +- Any user facing changes should be briefly described in a bullet point at the top of `NEWS.md`, following the tidyverse style guide (https://style.tidyverse.org/news.html) + +### Code style + +- Use newspaper style/high-level first function organisation. Main logic at the top and helper functions should come below +- Don't define functions inside of functions unless they are very brief +- Error messages should use `cli::cli_abort()` and follow the tidyverse style guide (https://style.tidyverse.org/errors.html) + +## Architecture + +### Key Design Patterns + +#### Standalone Imports +- Self-contained utility functions +- `import-standalone-*.R` files reduce dependencies +- Imported from other tidyverse packages + +## Key Files + +### Core Implementation +- `R/` - R implementation files +- `src/` - C/C++ implementation files + +### Testing and Quality +- `tests/testthat/` - Test suite +- `vignettes/` - Documentation and examples +- `.github/workflows/` - CI/CD with R CMD check + +## Development Notes + +### Testing Strategy +- Snapshot testing for output validation +- Separate test files for each major component + +### Code Organization +- Utility functions grouped by purpose (`utils-*.R`) +- Standalone imports minimize external dependencies + +### Documentation +- Roxygen2 comments for all exported functions +- Vignettes demonstrate key use cases +- pkgdown site provides comprehensive documentation +- Examples use realistic but safe API interactions From c9ff2ed11e8a81e758bb45b6e66d2f04f40c5cfc Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Tue, 28 Oct 2025 13:16:08 -0400 Subject: [PATCH 2/9] More tweaks --- .claude/settings.local.json | 4 +++- CLAUDE.md | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 6965e66ce4..7e08bd20c4 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -4,10 +4,12 @@ "allow": [ "Bash(find:*)", "Bash(R:*)", + "Bash(Rscript:*)", "Bash(air format:*)", "Edit(R/**)", "Edit(tests/**)", - "Edit(vignettes/**)" + "Edit(vignettes/**)", + "Edit(NEWS.md)" ], "deny": [] } diff --git a/CLAUDE.md b/CLAUDE.md index b3def8fe79..c950254360 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -31,6 +31,7 @@ dplyr is a tidyverse R package providing a grammar of data manipulation. - Run `pkgdown::check_pkgdown()` to check that all topics are included in the reference index - Use sentence case for all headings - Any user facing changes should be briefly described in a bullet point at the top of `NEWS.md`, following the tidyverse style guide (https://style.tidyverse.org/news.html) +- Any bullets added to `NEWS.md` should always be placed at the top, just under `# {package} (development version)` ### Code style From 16ba69dc3ac37b7a031f6edfab36e2a8842af479 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Tue, 28 Oct 2025 13:16:57 -0400 Subject: [PATCH 3/9] Accept exactly what Claude Code gave us --- NEWS.md | 4 ++++ R/across.R | 16 ++++++++++++- tests/testthat/test-across.R | 44 ++++++++++++++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 190a926d36..a86d51feda 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,10 @@ * `case_when()` now throws correctly indexed errors when `NULL`s are supplied in `...` (#7739). +* Fixed inconsistent behavior in `if_any()` when called with no inputs: it now correctly returns `FALSE` in all contexts, matching the behavior of `any()` (#7077). + +* Fixed behavior of `if_any()` and `if_all()` when called with a single column input: they now properly return logical vectors rather than the column itself (#7746). + * `case_when()` has gained a new `.unmatched` argument. For extra safety, set `.unmatched = "error"` rather than providing a `.default` when you believe that you've handled every possible case, and it will error if a case is left unhandled. The new `recode_values()` also has this argument (#7653). * New `rbind()` method for `rowwise_df` to avoid creating corrupt rowwise data frames (r-lib/vctrs#1935). diff --git a/R/across.R b/R/across.R index 48f765e8fe..8f9c659eb5 100644 --- a/R/across.R +++ b/R/across.R @@ -26,6 +26,10 @@ #' - `if_all()` will return `TRUE`, consistent with the behavior of #' `all()` when called without inputs. #' +#' When a single column is provided, both `if_any()` and `if_all()` will +#' convert the column to a logical vector, ensuring consistent behavior with +#' the multiple-column case. +#' #' @param .cols <[`tidy-select`][dplyr_tidy_select]> Columns to transform. #' You can't select grouping columns because they are already automatically #' handled by the verb (i.e. [summarise()] or [mutate()]). @@ -363,7 +367,17 @@ if_across <- function(op, df) { n <- nrow(df) if (!length(df)) { - return(TRUE) + # Return FALSE for if_any() (which uses `|`) + # Return TRUE for if_all() (which uses `&`) + return(identical(op, `&`)) + } + + if (length(df) == 1) { + # For a single column, ensure we convert to a logical value + # Apply the identity function with the appropriate operator + # For if_any(), this is equivalent to `||(col, FALSE)` + # For if_all(), this is equivalent to `&&(col, TRUE)` + return(op(df[[1]], identical(op, `&`))) } combine <- function(x, y) { diff --git a/tests/testthat/test-across.R b/tests/testthat/test-across.R index 83b7b8f082..e827249822 100644 --- a/tests/testthat/test-across.R +++ b/tests/testthat/test-across.R @@ -633,6 +633,38 @@ test_that("if_any() and if_all() respect filter()-like NA handling", { ) }) +test_that("if_any() and if_all() work correctly with single column inputs (#7746)", { + # Test data + df <- tibble( + col_a = c(5, 12, 8), + col_b = c(7, 3, 15) + ) + + # Test with a single column and no function - should return a logical vector, not the column itself + result1 <- mutate(df, single_any = if_any(col_a)) + expect_type(result1$single_any, "logical") + expect_equal(result1$single_any, df$col_a > 0) + + result2 <- mutate(df, single_all = if_all(col_a)) + expect_type(result2$single_all, "logical") + expect_equal(result2$single_all, df$col_a > 0) + + # Test with a single column and a function + result3 <- mutate(df, big_any = if_any(col_a, ~ .x > 10)) + expect_equal(result3$big_any, c(FALSE, TRUE, FALSE)) + + result4 <- mutate(df, big_all = if_all(col_a, ~ .x > 10)) + expect_equal(result4$big_all, c(FALSE, TRUE, FALSE)) + + # Compare with multiple columns behavior for consistency + result_multi_any <- mutate(df, multi_any = if_any(c(col_a, col_b), ~ .x > 10)) + expect_equal(result_multi_any$multi_any, c(FALSE, TRUE, TRUE)) + + result_multi_all <- mutate(df, multi_all = if_all(c(col_a, col_b), ~ .x > 10)) + # The third row has col_a=8, col_b=15, so only col_b > 10 + expect_equal(result_multi_all$multi_all, c(FALSE, FALSE, FALSE)) +}) + test_that("if_any() and if_all() aborts when predicate mistakingly used in .cols= (#5732)", { df <- data.frame(x = 1:10, y = 1:10) expect_snapshot({ @@ -1005,17 +1037,25 @@ test_that("if_any() and if_all() expansions deal with no inputs or single inputs ) }) -test_that("if_any() on zero-column selection behaves like any() (#7059)", { +test_that("if_any() on zero-column selection behaves like any() (#7059, #7077)", { tbl <- tibble( x1 = 1:5, x2 = c(-1, 4, 5, 4, 1), y = c(1, 4, 2, 4, 9), ) + # Test behavior in filter() - should return no rows expect_equal( filter(tbl, if_any(c(), ~ is.na(.x))), tbl[0, ] ) + + # Test behavior in mutate() - should return all FALSE + result <- mutate(tbl, z = if_any(c(), ~ is.na(.x))) + expect_equal( + result$z, + rep(FALSE, nrow(tbl)) + ) }) test_that("if_all() on zero-column selection behaves like all() (#7059)", { @@ -1037,7 +1077,7 @@ test_that("if_any() and if_all() wrapped deal with no inputs or single inputs", # No inputs expect_equal( filter(d, (if_any(starts_with("c"), ~FALSE))), - filter(d) + filter(d, FALSE) # Changed from filter(d) to filter(d, FALSE) ) expect_equal( filter(d, (if_all(starts_with("c"), ~FALSE))), From 3aecb7c4922239516ddf8254568462e72d281ed2 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Tue, 28 Oct 2025 13:52:06 -0400 Subject: [PATCH 4/9] Rework Claude's attempt --- NEWS.md | 8 +++--- R/across.R | 46 ++++++++++++-------------------- tests/testthat/test-across.R | 51 ++++++++++++++++++++++++++++-------- 3 files changed, 61 insertions(+), 44 deletions(-) diff --git a/NEWS.md b/NEWS.md index a86d51feda..cb21376acb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # dplyr (development version) +* `if_any()` now correctly returns `FALSE` when called without inputs, matching the behavior of `any()` (#7077). + +* `if_any()` and `if_all()` now properly return logical vectors rather than the column itself when called with a single input (#7746). + * Empty `rowwise()` list-column elements now resolve to `logical()` rather than a random logical of length 1 (#7710). * `last_dplyr_warnings()` no longer prevents objects from being garbage collected (#7649). @@ -8,10 +12,6 @@ * `case_when()` now throws correctly indexed errors when `NULL`s are supplied in `...` (#7739). -* Fixed inconsistent behavior in `if_any()` when called with no inputs: it now correctly returns `FALSE` in all contexts, matching the behavior of `any()` (#7077). - -* Fixed behavior of `if_any()` and `if_all()` when called with a single column input: they now properly return logical vectors rather than the column itself (#7746). - * `case_when()` has gained a new `.unmatched` argument. For extra safety, set `.unmatched = "error"` rather than providing a `.default` when you believe that you've handled every possible case, and it will error if a case is left unhandled. The new `recode_values()` also has this argument (#7653). * New `rbind()` method for `rowwise_df` to avoid creating corrupt rowwise data frames (r-lib/vctrs#1935). diff --git a/R/across.R b/R/across.R index 8f9c659eb5..80551d7310 100644 --- a/R/across.R +++ b/R/across.R @@ -26,10 +26,6 @@ #' - `if_all()` will return `TRUE`, consistent with the behavior of #' `all()` when called without inputs. #' -#' When a single column is provided, both `if_any()` and `if_all()` will -#' convert the column to a logical vector, ensuring consistent behavior with -#' the multiple-column case. -#' #' @param .cols <[`tidy-select`][dplyr_tidy_select]> Columns to transform. #' You can't select grouping columns because they are already automatically #' handled by the verb (i.e. [summarise()] or [mutate()]). @@ -353,41 +349,33 @@ across <- function(.cols, .fns, ..., .names = NULL, .unpack = FALSE) { if_any <- function(.cols, .fns, ..., .names = NULL) { context_local("across_if_fn", "if_any") context_local("across_frame", current_env()) - if_across(`|`, across({{ .cols }}, .fns, ..., .names = .names)) + if_across("any", across({{ .cols }}, .fns, ..., .names = .names)) } #' @rdname across #' @export if_all <- function(.cols, .fns, ..., .names = NULL) { context_local("across_if_fn", "if_all") context_local("across_frame", current_env()) - if_across(`&`, across({{ .cols }}, .fns, ..., .names = .names)) + if_across("all", across({{ .cols }}, .fns, ..., .names = .names)) } -if_across <- function(op, df) { - n <- nrow(df) - - if (!length(df)) { - # Return FALSE for if_any() (which uses `|`) - # Return TRUE for if_all() (which uses `&`) - return(identical(op, `&`)) - } +if_across <- function(variant, df) { + switch( + variant, + any = { + op <- `|` + init <- FALSE + }, + all = { + op <- `&` + init <- TRUE + }, + abort("Unreachable", .internal = TRUE) + ) - if (length(df) == 1) { - # For a single column, ensure we convert to a logical value - # Apply the identity function with the appropriate operator - # For if_any(), this is equivalent to `||(col, FALSE)` - # For if_all(), this is equivalent to `&&(col, TRUE)` - return(op(df[[1]], identical(op, `&`))) - } + init <- vec_rep(init, times = vec_size(df)) - combine <- function(x, y) { - if (is_null(x)) { - y - } else { - op(x, y) - } - } - reduce(df, combine, .init = NULL) + reduce(df, op, .init = init) } #' Combine values from multiple columns diff --git a/tests/testthat/test-across.R b/tests/testthat/test-across.R index e827249822..31ee06d930 100644 --- a/tests/testthat/test-across.R +++ b/tests/testthat/test-across.R @@ -634,6 +634,9 @@ test_that("if_any() and if_all() respect filter()-like NA handling", { }) test_that("if_any() and if_all() work correctly with single column inputs (#7746)", { + # TODO: Still need to fix up this one + # TODO: Still need to compare to `expand_if_across()` inlining behavior + # Test data df <- tibble( col_a = c(5, 12, 8), @@ -1044,21 +1047,26 @@ test_that("if_any() on zero-column selection behaves like any() (#7059, #7077)", y = c(1, 4, 2, 4, 9), ) - # Test behavior in filter() - should return no rows expect_equal( - filter(tbl, if_any(c(), ~ is.na(.x))), - tbl[0, ] + filter(tbl, if_any(c(), ~FALSE)), + filter(tbl, FALSE) + ) + expect_equal( + filter(tbl, if_any(c(), ~TRUE)), + filter(tbl, FALSE) ) - # Test behavior in mutate() - should return all FALSE - result <- mutate(tbl, z = if_any(c(), ~ is.na(.x))) expect_equal( - result$z, + pull(mutate(tbl, z = if_any(c(), ~FALSE)), z), + rep(FALSE, nrow(tbl)) + ) + expect_equal( + pull(mutate(tbl, z = if_any(c(), ~TRUE)), z), rep(FALSE, nrow(tbl)) ) }) -test_that("if_all() on zero-column selection behaves like all() (#7059)", { +test_that("if_all() on zero-column selection behaves like all() (#7059, #7077)", { tbl <- tibble( x1 = 1:5, x2 = c(-1, 4, 5, 4, 1), @@ -1066,8 +1074,21 @@ test_that("if_all() on zero-column selection behaves like all() (#7059)", { ) expect_equal( - filter(tbl, if_all(c(), ~ is.na(.x))), - tbl + filter(tbl, if_all(c(), ~FALSE)), + filter(tbl, TRUE) + ) + expect_equal( + filter(tbl, if_all(c(), ~TRUE)), + filter(tbl, TRUE) + ) + + expect_equal( + pull(mutate(tbl, z = if_all(c(), ~FALSE)), z), + rep(TRUE, nrow(tbl)) + ) + expect_equal( + pull(mutate(tbl, z = if_all(c(), ~TRUE)), z), + rep(TRUE, nrow(tbl)) ) }) @@ -1077,11 +1098,11 @@ test_that("if_any() and if_all() wrapped deal with no inputs or single inputs", # No inputs expect_equal( filter(d, (if_any(starts_with("c"), ~FALSE))), - filter(d, FALSE) # Changed from filter(d) to filter(d, FALSE) + filter(d, FALSE) ) expect_equal( filter(d, (if_all(starts_with("c"), ~FALSE))), - filter(d) + filter(d, TRUE) ) # Single inputs @@ -1093,6 +1114,14 @@ test_that("if_any() and if_all() wrapped deal with no inputs or single inputs", filter(d, (if_all(x, ~FALSE))), filter(d, FALSE) ) + expect_equal( + filter(d, (if_any(x, ~TRUE))), + filter(d, TRUE) + ) + expect_equal( + filter(d, (if_all(x, ~TRUE))), + filter(d, TRUE) + ) }) test_that("expanded if_any() finds local data", { From c22e0f6354d03cfdf2506aef58d78cae7ce24d00 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Wed, 29 Oct 2025 15:36:57 -0400 Subject: [PATCH 5/9] Make evaluation and expansion cases more consistent And add a battery of tests to ensure we don't regress on this consistency --- NEWS.md | 12 +- R/across.R | 151 ++++++---- R/filter.R | 21 +- R/vctrs.R | 25 ++ tests/testthat/_snaps/across.md | 466 ++++++++++++++++++++++++++++++ tests/testthat/test-across.R | 484 ++++++++++++++++++++++++++++---- 6 files changed, 1038 insertions(+), 121 deletions(-) diff --git a/NEWS.md b/NEWS.md index cb21376acb..59c957a902 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,12 @@ # dplyr (development version) -* `if_any()` now correctly returns `FALSE` when called without inputs, matching the behavior of `any()` (#7077). +* `if_any()` and `if_all()` are now more consistent in all use cases (#7059, #7077, #7746, @jrwinget). In particular: -* `if_any()` and `if_all()` now properly return logical vectors rather than the column itself when called with a single input (#7746). + * When called with zero inputs, `if_any()` returns `FALSE` and `if_all()` returns `TRUE`. + + * When called with one input, both now return logical vectors rather than the original column. + + * The result of applying `.fns` now must be a logical vector. * Empty `rowwise()` list-column elements now resolve to `logical()` rather than a random logical of length 1 (#7710). @@ -101,10 +105,6 @@ * Fixed an issue where duckplyr's ALTREP data frames were being materialized early due to internal usage of `ncol()` (#7049). -* `if_any()` and `if_all()` are now fully consistent with `any()` and `all()`. - In particular, when called with empty inputs `if_any()` returns `FALSE` and - `if_all()` returns `TRUE` (#7059, @jrwinget). - # dplyr 1.1.4 * `join_by()` now allows its helper functions to be namespaced with `dplyr::`, diff --git a/R/across.R b/R/across.R index 80551d7310..e44d324cc4 100644 --- a/R/across.R +++ b/R/across.R @@ -349,17 +349,71 @@ across <- function(.cols, .fns, ..., .names = NULL, .unpack = FALSE) { if_any <- function(.cols, .fns, ..., .names = NULL) { context_local("across_if_fn", "if_any") context_local("across_frame", current_env()) - if_across("any", across({{ .cols }}, .fns, ..., .names = .names)) + df <- across({{ .cols }}, .fns, ..., .names = .names) + x <- dplyr_new_list(df) + size <- vec_size(df) + dplyr_list_pany(x, size = size) } + #' @rdname across #' @export if_all <- function(.cols, .fns, ..., .names = NULL) { context_local("across_if_fn", "if_all") context_local("across_frame", current_env()) - if_across("all", across({{ .cols }}, .fns, ..., .names = .names)) + df <- across({{ .cols }}, .fns, ..., .names = .names) + x <- dplyr_new_list(df) + size <- vec_size(df) + dplyr_list_pall(x, size = size) } -if_across <- function(variant, df) { +dplyr_list_pall <- function( + x, + ..., + size = NULL, + error_call = caller_env() +) { + dplyr_list_pany_pall(x, "all", ..., size = size, error_call = error_call) +} + +dplyr_list_pany <- function( + x, + ..., + size = NULL, + error_call = caller_env() +) { + dplyr_list_pany_pall(x, "any", ..., size = size, error_call = error_call) +} + +dplyr_list_pany_pall <- function( + x, + variant, + ..., + size = NULL, + error_call = caller_env() +) { + check_dots_empty0(...) + + obj_check_list(x, arg = "", call = error_call) + + # Doesn't allow `NULL`, doesn't do type casting + for (i in seq_along(x)) { + check_logical( + x[[i]], + arg = (vec_names(x) %||% paste0("..", seq_len(vec_size(x))))[[i]], + call = error_call + ) + } + + # Doesn't do recycling, asserts that all inputs are the same size + if (is.null(size)) { + if (length(x) == 0L) { + size <- 0L + } else { + size <- vec_size(x[[1L]]) + } + } + list_check_all_size(x, size, arg = "", call = error_call) + switch( variant, any = { @@ -373,9 +427,9 @@ if_across <- function(variant, df) { abort("Unreachable", .internal = TRUE) ) - init <- vec_rep(init, times = vec_size(df)) + init <- vec_rep(init, times = size) - reduce(df, op, .init = init) + reduce(x, op, .init = init) } #' Combine values from multiple columns @@ -610,79 +664,68 @@ dplyr_quosures <- function(...) { quosures } -# When mutate() or summarise() have an unnamed call to across() at the top level, e.g. -# summarise(across(<...>)) or mutate(across(<...>)) -# -# a call to top_across(<...>) is evaluated instead. -# top_across() returns a flattened list of expressions along with some -# information about the "current column" for each expression -# in the "columns" attribute: -# -# For example with: summarise(across(c(x, y), mean, .names = "mean_{.col}")) top_across() will return -# something like: -# -# structure( -# list(mean_x = expr(mean(x)), mean_y = expr(mean(y))) -# columns = c("x", "y") -# ) - -# Technically this always returns a single quosure but we wrap it in a -# list to follow the pattern in `expand_across()` +# Always guaranteed to be 1 quosure in, 1 quosure out, unlike `expand_across()` expand_if_across <- function(quo) { - quo_data <- attr(quo, "dplyr:::data") - if (!quo_is_call(quo, c("if_any", "if_all"), ns = c("", "dplyr"))) { - return(list(quo)) + if (quo_is_call(quo, "if_any", ns = c("", "dplyr"))) { + variant <- "any" + } else if (quo_is_call(quo, "if_all", ns = c("", "dplyr"))) { + variant <- "all" + } else { + # Refuse to expand + return(quo) } + # `definition` is the same between the two for the purposes of `match.call()` + definition <- if_any + call <- match.call( - definition = if_any, + definition = definition, call = quo_get_expr(quo), expand.dots = FALSE, envir = quo_get_env(quo) ) + if (!is_null(call$...)) { - return(list(quo)) + # Refuse to expand + return(quo) } - if (is_call(call, "if_any")) { - op <- "|" + if (variant == "any") { if_fn <- "if_any" - empty <- FALSE + dplyr_fn <- "dplyr_list_pany" } else { - op <- "&" if_fn <- "if_all" - empty <- TRUE + dplyr_fn <- "dplyr_list_pall" } + # `expand_across()` will always expand at this point given that we bailed on + # `...` usage early on, which is the only case that would stop expansion. + # + # Set frame here for backtrace truncation. But override error call via + # `local_error_call()` so it refers to the function we're expanding, e.g. + # `if_any()` and not `expand_if_across()`. context_local("across_if_fn", if_fn) - - # Set frame here for backtrace truncation. But override error call - # via `local_error_call()` so it refers to the function we're - # expanding, e.g. `if_any()` and not `expand_if_across()`. context_local("across_frame", current_env()) local_error_call(call(if_fn)) - call[[1]] <- quote(across) quos <- expand_across(quo_set_expr(quo, call)) - # Select all rows if there are no inputs for if_all(), - # but select no rows if there are no inputs for if_any(). - if (!length(quos)) { - return(list(quo(!!empty))) - } + expr <- expr({ + ns <- asNamespace("dplyr") - combine <- function(x, y) { - if (is_null(x)) { - y - } else { - call(op, x, y) - } - } - expr <- reduce(quos, combine, .init = NULL) + x <- list(!!!quos) + + # In the evaluation path, `across()` automatically recycles to common size, + # so we must here as well for compatibility. `across()` also returns a 0 + # col, 1 row data frame in the case of no inputs so that it will recycle to + # the group size, which we also do here. + size <- ns[["dplyr_list_size_common"]](x, absent = 1L, call = call(!!if_fn)) + x <- ns[["dplyr_list_recycle_common"]](x, size = size, call = call(!!if_fn)) + + ns[[!!dplyr_fn]](x, size = size, error_call = call(!!if_fn)) + }) - # Use `as_quosure()` instead of `new_quosure()` to avoid rewrapping - # quosure in case of single input - list(as_quosure(expr, env = baseenv())) + new_quosure(expr, env = baseenv()) } expand_across <- function(quo) { diff --git a/R/filter.R b/R/filter.R index a381a1a790..5566f53b8a 100644 --- a/R/filter.R +++ b/R/filter.R @@ -147,8 +147,16 @@ filter_rows <- function( mask <- DataMask$new(data, by, "filter", error_call = error_call) on.exit(mask$forget(), add = TRUE) - dots <- filter_expand(dots, mask = mask, error_call = error_call) - filter_eval(dots, mask = mask, error_call = error_call, user_env = user_env) + # 1:1 mapping between `dots` and `dots_expanded` + dots_expanded <- filter_expand(dots, mask = mask, error_call = error_call) + + filter_eval( + dots = dots, + dots_expanded = dots_expanded, + mask = mask, + error_call = error_call, + user_env = user_env + ) } check_filter <- function(dots, error_call = caller_env()) { @@ -174,6 +182,7 @@ check_filter <- function(dots, error_call = caller_env()) { filter_expand <- function(dots, mask, error_call = caller_env()) { env_filter <- env() + filter_expand_one <- function(dot, index) { env_filter$current_expression <- index dot <- expand_pick(dot, mask) @@ -190,13 +199,15 @@ filter_expand <- function(dots, mask, error_call = caller_env()) { } ) - dots <- list_flatten(dots) - new_quosures(dots) } +# We evaluate `dots_expanded` but report errors relative to `dots` so that +# we show "In argument: `if_any(c(x, y), is.na)`" rather than its expanded form. +# This works because `dots` and `dots_expanded` have a 1:1 mapping. filter_eval <- function( dots, + dots_expanded, mask, error_call = caller_env(), user_env = caller_env(2) @@ -218,7 +229,7 @@ filter_eval <- function( ) out <- withCallingHandlers( - mask$eval_all_filter(dots, env_filter), + mask$eval_all_filter(dots_expanded, env_filter), error = dplyr_error_handler( dots = dots, mask = mask, diff --git a/R/vctrs.R b/R/vctrs.R index 4927ed0a07..39b7bf975a 100644 --- a/R/vctrs.R +++ b/R/vctrs.R @@ -21,3 +21,28 @@ dplyr_vec_ptype_common <- function(chunks, name) { error = common_handler(name) ) } + +# Version of `vec_size_common()` that takes a list. +# Useful for delaying `!!!` when used within an `expr()` call. +dplyr_list_size_common <- function( + x, + ..., + size = NULL, + absent = 0L, + call = caller_env() +) { + check_dots_empty0(...) + vec_size_common(!!!x, .size = size, .absent = absent, .call = call) +} + +# Version of `vec_recycle_common()` that takes a list. +# Useful for delaying `!!!` when used within an `expr()` call. +dplyr_list_recycle_common <- function( + x, + ..., + size = NULL, + call = caller_env() +) { + check_dots_empty0(...) + vec_recycle_common(!!!x, .size = size, .call = call) +} diff --git a/tests/testthat/_snaps/across.md b/tests/testthat/_snaps/across.md index 0fd45398c6..b48918d1d3 100644 --- a/tests/testthat/_snaps/across.md +++ b/tests/testthat/_snaps/across.md @@ -295,6 +295,472 @@ Caused by error: ! Can't subset `.data` outside of a data mask context. +# expand_if_across() expands lambdas + + Code + quo_squash(expand_if_across(quo)) + Output + { + ns <- asNamespace("dplyr") + x <- list(cyl = cyl > 4, am = am > 4) + size <- ns[["dplyr_list_size_common"]](x, absent = 1L, call = call("if_any")) + x <- ns[["dplyr_list_recycle_common"]](x, size = size, call = call("if_any")) + ns[["dplyr_list_pany"]](x, size = size, error_call = call("if_any")) + } + +# `across()` recycle `.fns` results to common size + + Code + mutate(df, across(c(x, y), fn)) + Condition + Error in `mutate()`: + i In argument: `across(c(x, y), fn)`. + Caused by error in `across()`: + ! Can't compute column `x`. + Caused by error in `dplyr_internal_error()`: + +--- + + Code + mutate(df, (across(c(x, y), fn))) + Condition + Error in `mutate()`: + i In argument: `(across(c(x, y), fn))`. + Caused by error: + ! `(across(c(x, y), fn))` must be size 3 or 1, not 2. + +# `if_any()` and `if_all()` have consistent behavior across `filter()` and `mutate()` + + Code + filter(df, if_any(y)) + Condition + Error in `filter()`: + i In argument: `if_any(y)`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_any(y))) + Condition + Error in `filter()`: + i In argument: `(if_any(y))`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_any(y)) + Condition + Error in `mutate()`: + i In argument: `a = if_any(y)`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_any(y, identity)) + Condition + Error in `filter()`: + i In argument: `if_any(y, identity)`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_any(y, identity))) + Condition + Error in `filter()`: + i In argument: `(if_any(y, identity))`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_any(y, identity)) + Condition + Error in `mutate()`: + i In argument: `a = if_any(y, identity)`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_all(y)) + Condition + Error in `filter()`: + i In argument: `if_all(y)`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_all(y))) + Condition + Error in `filter()`: + i In argument: `(if_all(y))`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_all(y)) + Condition + Error in `mutate()`: + i In argument: `a = if_all(y)`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_all(y, identity)) + Condition + Error in `filter()`: + i In argument: `if_all(y, identity)`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_all(y, identity))) + Condition + Error in `filter()`: + i In argument: `(if_all(y, identity))`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_all(y, identity)) + Condition + Error in `mutate()`: + i In argument: `a = if_all(y, identity)`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_any(c(y, z))) + Condition + Error in `filter()`: + i In argument: `if_any(c(y, z))`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_any(c(y, z)))) + Condition + Error in `filter()`: + i In argument: `(if_any(c(y, z)))`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_any(c(y, z))) + Condition + Error in `mutate()`: + i In argument: `a = if_any(c(y, z))`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_any(c(y, z), identity)) + Condition + Error in `filter()`: + i In argument: `if_any(c(y, z), identity)`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_any(c(y, z), identity))) + Condition + Error in `filter()`: + i In argument: `(if_any(c(y, z), identity))`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_any(c(y, z), identity)) + Condition + Error in `mutate()`: + i In argument: `a = if_any(c(y, z), identity)`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_all(c(y, z))) + Condition + Error in `filter()`: + i In argument: `if_all(c(y, z))`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_all(c(y, z)))) + Condition + Error in `filter()`: + i In argument: `(if_all(c(y, z)))`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_all(c(y, z))) + Condition + Error in `mutate()`: + i In argument: `a = if_all(c(y, z))`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_all(c(y, z), identity)) + Condition + Error in `filter()`: + i In argument: `if_all(c(y, z), identity)`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_all(c(y, z), identity))) + Condition + Error in `filter()`: + i In argument: `(if_all(c(y, z), identity))`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_all(c(y, z), identity)) + Condition + Error in `mutate()`: + i In argument: `a = if_all(c(y, z), identity)`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_any(c(y, z)), .by = g) + Condition + Error in `filter()`: + i In argument: `if_any(c(y, z))`. + i In group 1: `g = "a"`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_any(c(y, z))), .by = g) + Condition + Error in `filter()`: + i In argument: `(if_any(c(y, z)))`. + i In group 1: `g = "a"`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_any(c(y, z)), .by = g) + Condition + Error in `mutate()`: + i In argument: `a = if_any(c(y, z))`. + i In group 1: `g = "a"`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_any(c(y, z), identity), .by = g) + Condition + Error in `filter()`: + i In argument: `if_any(c(y, z), identity)`. + i In group 1: `g = "a"`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_any(c(y, z), identity)), .by = g) + Condition + Error in `filter()`: + i In argument: `(if_any(c(y, z), identity))`. + i In group 1: `g = "a"`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_any(c(y, z), identity), .by = g) + Condition + Error in `mutate()`: + i In argument: `a = if_any(c(y, z), identity)`. + i In group 1: `g = "a"`. + Caused by error in `if_any()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_all(c(y, z)), .by = g) + Condition + Error in `filter()`: + i In argument: `if_all(c(y, z))`. + i In group 1: `g = "a"`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_all(c(y, z))), .by = g) + Condition + Error in `filter()`: + i In argument: `(if_all(c(y, z)))`. + i In group 1: `g = "a"`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_all(c(y, z)), .by = g) + Condition + Error in `mutate()`: + i In argument: `a = if_all(c(y, z))`. + i In group 1: `g = "a"`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, if_all(c(y, z), identity), .by = g) + Condition + Error in `filter()`: + i In argument: `if_all(c(y, z), identity)`. + i In group 1: `g = "a"`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + filter(df, (if_all(c(y, z), identity)), .by = g) + Condition + Error in `filter()`: + i In argument: `(if_all(c(y, z), identity))`. + i In group 1: `g = "a"`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +--- + + Code + mutate(df, a = if_all(c(y, z), identity), .by = g) + Condition + Error in `mutate()`: + i In argument: `a = if_all(c(y, z), identity)`. + i In group 1: `g = "a"`. + Caused by error in `if_all()`: + ! `y` must be a logical vector, not an integer vector. + +# `if_any()` and `if_all()` recycle `.fns` results to common size + + Code + filter(df, if_any(c(x, y), fn)) + Condition + Error in `filter()`: + i In argument: `if_any(c(x, y), fn)`. + Caused by error: + ! `..1` must be of size 3 or 1, not size 2. + +--- + + Code + filter(df, (if_any(c(x, y), fn))) + Condition + Error in `filter()`: + i In argument: `(if_any(c(x, y), fn))`. + Caused by error: + ! `..1` must be of size 3 or 1, not size 2. + +--- + + Code + mutate(df, a = if_any(c(x, y), fn)) + Condition + Error in `mutate()`: + i In argument: `a = if_any(c(x, y), fn)`. + Caused by error: + ! `a` must be size 3 or 1, not 2. + +--- + + Code + filter(df, if_all(c(x, y), fn)) + Condition + Error in `filter()`: + i In argument: `if_all(c(x, y), fn)`. + Caused by error: + ! `..1` must be of size 3 or 1, not size 2. + +--- + + Code + filter(df, (if_all(c(x, y), fn))) + Condition + Error in `filter()`: + i In argument: `(if_all(c(x, y), fn))`. + Caused by error: + ! `..1` must be of size 3 or 1, not size 2. + +--- + + Code + mutate(df, a = if_all(c(x, y), fn)) + Condition + Error in `mutate()`: + i In argument: `a = if_all(c(x, y), fn)`. + Caused by error: + ! `a` must be size 3 or 1, not 2. + # can't rename during selection (#6522) Code diff --git a/tests/testthat/test-across.R b/tests/testthat/test-across.R index 31ee06d930..35bed76a41 100644 --- a/tests/testthat/test-across.R +++ b/tests/testthat/test-across.R @@ -580,23 +580,6 @@ test_that("functions defined inline can use columns (#5734)", { ) }) -test_that("if_any() and if_all() do not enforce logical", { - # We used to coerce to logical using vctrs. Now we use base - # semantics because we expand `if_all(x:y)` to `x & y`. - d <- data.frame(x = 10, y = 10) - expect_equal(filter(d, if_all(x:y, identity)), d) - expect_equal(filter(d, if_any(x:y, identity)), d) - - expect_equal( - mutate(d, ok = if_any(x:y, identity)), - mutate(d, ok = TRUE) - ) - expect_equal( - mutate(d, ok = if_all(x:y, identity)), - mutate(d, ok = TRUE) - ) -}) - test_that("if_any() and if_all() can be used in mutate() (#5709)", { d <- data.frame(x = c(1, 5, 10, 10), y = c(0, 0, 0, 10), z = c(10, 5, 1, 10)) res <- d |> @@ -633,41 +616,6 @@ test_that("if_any() and if_all() respect filter()-like NA handling", { ) }) -test_that("if_any() and if_all() work correctly with single column inputs (#7746)", { - # TODO: Still need to fix up this one - # TODO: Still need to compare to `expand_if_across()` inlining behavior - - # Test data - df <- tibble( - col_a = c(5, 12, 8), - col_b = c(7, 3, 15) - ) - - # Test with a single column and no function - should return a logical vector, not the column itself - result1 <- mutate(df, single_any = if_any(col_a)) - expect_type(result1$single_any, "logical") - expect_equal(result1$single_any, df$col_a > 0) - - result2 <- mutate(df, single_all = if_all(col_a)) - expect_type(result2$single_all, "logical") - expect_equal(result2$single_all, df$col_a > 0) - - # Test with a single column and a function - result3 <- mutate(df, big_any = if_any(col_a, ~ .x > 10)) - expect_equal(result3$big_any, c(FALSE, TRUE, FALSE)) - - result4 <- mutate(df, big_all = if_all(col_a, ~ .x > 10)) - expect_equal(result4$big_all, c(FALSE, TRUE, FALSE)) - - # Compare with multiple columns behavior for consistency - result_multi_any <- mutate(df, multi_any = if_any(c(col_a, col_b), ~ .x > 10)) - expect_equal(result_multi_any$multi_any, c(FALSE, TRUE, TRUE)) - - result_multi_all <- mutate(df, multi_all = if_all(c(col_a, col_b), ~ .x > 10)) - # The third row has col_a=8, col_b=15, so only col_b > 10 - expect_equal(result_multi_all$multi_all, c(FALSE, FALSE, FALSE)) -}) - test_that("if_any() and if_all() aborts when predicate mistakingly used in .cols= (#5732)", { df <- data.frame(x = 1:10, y = 1:10) expect_snapshot({ @@ -1298,10 +1246,9 @@ test_that("expand_if_across() expands lambdas", { by <- compute_by(by = NULL, data = mtcars, error_call = call("caller")) DataMask$new(mtcars, by, "mutate", call("caller")) - expect_equal( - map(expand_if_across(quo), quo_squash), - alist(`|`(cyl > 4, am > 4)) - ) + expect_snapshot({ + quo_squash(expand_if_across(quo)) + }) }) test_that("rowwise() preserves list-cols iff no `.fns` (#5951, #6264)", { @@ -1325,6 +1272,431 @@ test_that("rowwise() preserves list-cols iff no `.fns` (#5951, #6264)", { ) }) +test_that("`across()` recycle `.fns` results to common size", { + df <- tibble( + x = c(TRUE, FALSE, TRUE), + y = c(1L, 2L, 3L) + ) + + # The `.fns` results are recycled within just the `across()` inputs first, not + # immediately to the whole group size. The returned data frame from `across()` + # is what is then recycled to the whole group size. + + fn <- function(x) { + if (is.logical(x)) { + x + } else { + TRUE + } + } + + expect_identical( + mutate(df, across(c(x, y), fn)), + tibble(x = df$x, y = rep(TRUE, times = nrow(df))) + ) + expect_identical( + mutate(df, (across(c(x, y), fn))), + tibble(x = df$x, y = rep(TRUE, times = nrow(df))) + ) + + # Not forcing the result of `.fns` to immediately recycle to the group size is + # useful for niche cases where you want to compute something with `across()` + # but it isn't actually what you return + + fn <- function(x) { + c(mean(x), median(x)) + } + + expect_identical( + mutate(df, { + # Maybe your `across()` call returns something of length 2 + values <- across(c(x, y), fn) + # But then you manipulate it to return something compatible with the group size + new_tibble(map(values, max)) + }), + tibble(x = c(1, 1, 1), y = c(2, 2, 2)) + ) + + # Unrecyclable + expect_snapshot(error = TRUE, { + # TODO: This error is bad + mutate(df, across(c(x, y), fn)) + }) + expect_snapshot(error = TRUE, { + mutate(df, (across(c(x, y), fn))) + }) +}) + +test_that("`if_any()` and `if_all()` have consistent behavior across `filter()` and `mutate()`", { + # Tests a full suite comparing: + # - `filter()` vs `mutate()` + # - `filter()`'s evaluation vs expansion models + # - With and without `.fns` + + # `w` and `x` cover all combinations of `|` and `&` + df <- data.frame( + w = c(TRUE, FALSE, NA, TRUE, FALSE, TRUE, FALSE, NA, NA), + x = c(TRUE, FALSE, NA, FALSE, TRUE, NA, NA, TRUE, FALSE), + y = 1:9, + z = 10:18, + g = c("a", "b", "a", "b", "b", "a", "c", "a", "a") + ) + + # Zero inputs + + expect_identical( + filter(df, if_any(c())), + filter(df, FALSE) + ) + expect_identical( + filter(df, (if_any(c()))), + filter(df, FALSE) + ) + expect_identical( + mutate(df, a = if_any(c())), + mutate(df, a = FALSE) + ) + + expect_identical( + filter(df, if_any(c(), identity)), + filter(df, FALSE) + ) + expect_identical( + filter(df, (if_any(c(), identity))), + filter(df, FALSE) + ) + expect_identical( + mutate(df, a = if_any(c(), identity)), + mutate(df, a = FALSE) + ) + + expect_identical( + filter(df, if_all(c())), + filter(df, TRUE) + ) + expect_identical( + filter(df, (if_all(c()))), + filter(df, TRUE) + ) + expect_identical( + mutate(df, a = if_all(c())), + mutate(df, a = TRUE) + ) + + expect_identical( + filter(df, if_all(c(), identity)), + filter(df, TRUE) + ) + expect_identical( + filter(df, (if_all(c(), identity))), + filter(df, TRUE) + ) + expect_identical( + mutate(df, a = if_all(c(), identity)), + mutate(df, a = TRUE) + ) + + # One input + + expect_identical( + filter(df, if_any(w)), + filter(df, w) + ) + expect_identical( + filter(df, (if_any(w))), + filter(df, w) + ) + expect_identical( + mutate(df, a = if_any(w)), + mutate(df, a = w) + ) + + expect_identical( + filter(df, if_any(w, identity)), + filter(df, w) + ) + expect_identical( + filter(df, (if_any(w, identity))), + filter(df, w) + ) + expect_identical( + mutate(df, a = if_any(w, identity)), + mutate(df, a = w) + ) + + expect_identical( + filter(df, if_all(w)), + filter(df, w) + ) + expect_identical( + filter(df, (if_all(w))), + filter(df, w) + ) + expect_identical( + mutate(df, a = if_all(w)), + mutate(df, a = w) + ) + + expect_identical( + filter(df, if_all(w, identity)), + filter(df, w) + ) + expect_identical( + filter(df, (if_all(w, identity))), + filter(df, w) + ) + expect_identical( + mutate(df, a = if_all(w, identity)), + mutate(df, a = w) + ) + + # Two inputs + + expect_identical( + filter(df, if_any(c(w, x))), + filter(df, w | x) + ) + expect_identical( + filter(df, (if_any(c(w, x)))), + filter(df, w | x) + ) + expect_identical( + mutate(df, a = if_any(c(w, x))), + mutate(df, a = w | x) + ) + + expect_identical( + filter(df, if_any(c(w, x), identity)), + filter(df, w | x) + ) + expect_identical( + filter(df, (if_any(c(w, x), identity))), + filter(df, w | x) + ) + expect_identical( + mutate(df, a = if_any(c(w, x), identity)), + mutate(df, a = w | x) + ) + + expect_identical( + filter(df, if_all(c(w, x))), + filter(df, w & x) + ) + expect_identical( + filter(df, (if_all(c(w, x)))), + filter(df, w & x) + ) + expect_identical( + mutate(df, a = if_all(c(w, x))), + mutate(df, a = w & x) + ) + + expect_identical( + filter(df, if_all(c(w, x), identity)), + filter(df, w & x) + ) + expect_identical( + filter(df, (if_all(c(w, x), identity))), + filter(df, w & x) + ) + expect_identical( + mutate(df, a = if_all(c(w, x), identity)), + mutate(df, a = w & x) + ) + + # Two inputs (grouped) + + expect_identical( + filter(df, if_any(c(w, x)), .by = g), + filter(df, w | x, .by = g) + ) + expect_identical( + filter(df, (if_any(c(w, x))), .by = g), + filter(df, w | x, .by = g) + ) + expect_identical( + mutate(df, a = if_any(c(w, x)), .by = g), + mutate(df, a = w | x, .by = g) + ) + + expect_identical( + filter(df, if_any(c(w, x), identity), .by = g), + filter(df, w | x, .by = g) + ) + expect_identical( + filter(df, (if_any(c(w, x), identity)), .by = g), + filter(df, w | x, .by = g) + ) + expect_identical( + mutate(df, a = if_any(c(w, x), identity), .by = g), + mutate(df, a = w | x, .by = g) + ) + + expect_identical( + filter(df, if_all(c(w, x)), .by = g), + filter(df, w & x, .by = g) + ) + expect_identical( + filter(df, (if_all(c(w, x))), .by = g), + filter(df, w & x, .by = g) + ) + expect_identical( + mutate(df, a = if_all(c(w, x)), .by = g), + mutate(df, a = w & x, .by = g) + ) + + expect_identical( + filter(df, if_all(c(w, x), identity), .by = g), + filter(df, w & x, .by = g) + ) + expect_identical( + filter(df, (if_all(c(w, x), identity)), .by = g), + filter(df, w & x, .by = g) + ) + expect_identical( + mutate(df, a = if_all(c(w, x), identity), .by = g), + mutate(df, a = w & x, .by = g) + ) + + # One non-logical input (all error) + + expect_snapshot(error = TRUE, filter(df, if_any(y))) + expect_snapshot(error = TRUE, filter(df, (if_any(y)))) + expect_snapshot(error = TRUE, mutate(df, a = if_any(y))) + + expect_snapshot(error = TRUE, filter(df, if_any(y, identity))) + expect_snapshot(error = TRUE, filter(df, (if_any(y, identity)))) + expect_snapshot(error = TRUE, mutate(df, a = if_any(y, identity))) + + expect_snapshot(error = TRUE, filter(df, if_all(y))) + expect_snapshot(error = TRUE, filter(df, (if_all(y)))) + expect_snapshot(error = TRUE, mutate(df, a = if_all(y))) + + expect_snapshot(error = TRUE, filter(df, if_all(y, identity))) + expect_snapshot(error = TRUE, filter(df, (if_all(y, identity)))) + expect_snapshot(error = TRUE, mutate(df, a = if_all(y, identity))) + + # Two non-logical inputs (all error) + + expect_snapshot(error = TRUE, filter(df, if_any(c(y, z)))) + expect_snapshot(error = TRUE, filter(df, (if_any(c(y, z))))) + expect_snapshot(error = TRUE, mutate(df, a = if_any(c(y, z)))) + + expect_snapshot(error = TRUE, filter(df, if_any(c(y, z), identity))) + expect_snapshot(error = TRUE, filter(df, (if_any(c(y, z), identity)))) + expect_snapshot(error = TRUE, mutate(df, a = if_any(c(y, z), identity))) + + expect_snapshot(error = TRUE, filter(df, if_all(c(y, z)))) + expect_snapshot(error = TRUE, filter(df, (if_all(c(y, z))))) + expect_snapshot(error = TRUE, mutate(df, a = if_all(c(y, z)))) + + expect_snapshot(error = TRUE, filter(df, if_all(c(y, z), identity))) + expect_snapshot(error = TRUE, filter(df, (if_all(c(y, z), identity)))) + expect_snapshot(error = TRUE, mutate(df, a = if_all(c(y, z), identity))) + + # Two non-logical inputs (grouped) (all error) + + expect_snapshot(error = TRUE, { + filter(df, if_any(c(y, z)), .by = g) + }) + expect_snapshot(error = TRUE, { + filter(df, (if_any(c(y, z))), .by = g) + }) + expect_snapshot(error = TRUE, { + mutate(df, a = if_any(c(y, z)), .by = g) + }) + + expect_snapshot(error = TRUE, { + filter(df, if_any(c(y, z), identity), .by = g) + }) + expect_snapshot(error = TRUE, { + filter(df, (if_any(c(y, z), identity)), .by = g) + }) + expect_snapshot(error = TRUE, { + mutate(df, a = if_any(c(y, z), identity), .by = g) + }) + + expect_snapshot(error = TRUE, { + filter(df, if_all(c(y, z)), .by = g) + }) + expect_snapshot(error = TRUE, { + filter(df, (if_all(c(y, z))), .by = g) + }) + expect_snapshot(error = TRUE, { + mutate(df, a = if_all(c(y, z)), .by = g) + }) + + expect_snapshot(error = TRUE, { + filter(df, if_all(c(y, z), identity), .by = g) + }) + expect_snapshot(error = TRUE, { + filter(df, (if_all(c(y, z), identity)), .by = g) + }) + expect_snapshot(error = TRUE, { + mutate(df, a = if_all(c(y, z), identity), .by = g) + }) +}) + +test_that("`if_any()` and `if_all()` recycle `.fns` results to common size", { + df <- data.frame( + x = c(TRUE, FALSE, NA), + y = c(1L, 2L, 3L) + ) + + # `.fns` results recycle. Both `across()` and `if_any()`/`if_all()` recycle to + # a common size amongst their inputs (here, size 1), then that data frame is + # recycled to the group size. + + fn <- function(x) { + if (is.logical(x)) { + c(TRUE, FALSE, TRUE) + } else { + TRUE + } + } + + expect_identical( + filter(df, if_any(c(x, y), fn)), + filter(df, TRUE) + ) + expect_identical( + filter(df, (if_any(c(x, y), fn))), + filter(df, TRUE) + ) + expect_identical( + mutate(df, a = if_any(c(x, y), fn)), + mutate(df, a = TRUE) + ) + + expect_identical( + filter(df, if_all(c(x, y), fn)), + filter(df, c(TRUE, FALSE, TRUE)) + ) + expect_identical( + filter(df, (if_all(c(x, y), fn))), + filter(df, c(TRUE, FALSE, TRUE)) + ) + expect_identical( + mutate(df, a = if_all(c(x, y), fn)), + mutate(df, a = c(TRUE, FALSE, TRUE)) + ) + + # Unrecyclable (all error, can't recycle to group size) + # It is correct that these show `..1` in the error for `filter()`. The error + # is about recycling of the result of `if_any()`, i.e. the data frame in the + # 1st argument slot. + + fn <- function(x) c(TRUE, FALSE) + + expect_snapshot(error = TRUE, filter(df, if_any(c(x, y), fn))) + expect_snapshot(error = TRUE, filter(df, (if_any(c(x, y), fn)))) + expect_snapshot(error = TRUE, mutate(df, a = if_any(c(x, y), fn))) + + expect_snapshot(error = TRUE, filter(df, if_all(c(x, y), fn))) + expect_snapshot(error = TRUE, filter(df, (if_all(c(x, y), fn)))) + expect_snapshot(error = TRUE, mutate(df, a = if_all(c(x, y), fn))) +}) + # c_across ---------------------------------------------------------------- test_that("selects and combines columns", { From 1ed3235061681e2ea01e1b2da5b74fa5efef4e15 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Wed, 29 Oct 2025 15:52:14 -0400 Subject: [PATCH 6/9] Remove a TODO and update a snapshot test! --- tests/testthat/_snaps/pick.md | 2 +- tests/testthat/test-pick.R | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/testthat/_snaps/pick.md b/tests/testthat/_snaps/pick.md index b722988da6..7ba36b24ee 100644 --- a/tests/testthat/_snaps/pick.md +++ b/tests/testthat/_snaps/pick.md @@ -213,7 +213,7 @@ filter(df, pick(x, y)$x) Condition Error in `filter()`: - i In argument: `asNamespace("dplyr")$dplyr_pick_tibble(x = x, y = y)$x`. + i In argument: `pick(x, y)$x`. Caused by error: ! `..1` must be a logical vector, not a double vector. diff --git a/tests/testthat/test-pick.R b/tests/testthat/test-pick.R index 3e063b1840..de370056cb 100644 --- a/tests/testthat/test-pick.R +++ b/tests/testthat/test-pick.R @@ -492,7 +492,6 @@ test_that("`filter()` with `pick()` that uses invalid tidy-selection errors", { test_that("`filter()` that doesn't use `pick()` result correctly errors", { df <- tibble(x = c(1, 2, NA, 3), y = c(2, NA, 5, 3)) - # TODO: Can we improve on the `In argument:` expression in the expansion case? expect_snapshot(error = TRUE, { filter(df, pick(x, y)$x) }) From a7afead209fc3870545afe2de7ee6b63fa977e59 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Wed, 29 Oct 2025 16:06:47 -0400 Subject: [PATCH 7/9] Collect `quos` first in case the user has a column named `ns` --- R/across.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/across.R b/R/across.R index e44d324cc4..acbf778c2a 100644 --- a/R/across.R +++ b/R/across.R @@ -711,9 +711,8 @@ expand_if_across <- function(quo) { quos <- expand_across(quo_set_expr(quo, call)) expr <- expr({ - ns <- asNamespace("dplyr") - x <- list(!!!quos) + ns <- asNamespace("dplyr") # In the evaluation path, `across()` automatically recycles to common size, # so we must here as well for compatibility. `across()` also returns a 0 From 29dab9465eeb631f592fd8be047e0b3a5d02c2b4 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Wed, 29 Oct 2025 16:14:06 -0400 Subject: [PATCH 8/9] Update snapshot test --- tests/testthat/_snaps/across.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/_snaps/across.md b/tests/testthat/_snaps/across.md index b48918d1d3..1cce076728 100644 --- a/tests/testthat/_snaps/across.md +++ b/tests/testthat/_snaps/across.md @@ -301,8 +301,8 @@ quo_squash(expand_if_across(quo)) Output { - ns <- asNamespace("dplyr") x <- list(cyl = cyl > 4, am = am > 4) + ns <- asNamespace("dplyr") size <- ns[["dplyr_list_size_common"]](x, absent = 1L, call = call("if_any")) x <- ns[["dplyr_list_recycle_common"]](x, size = size, call = call("if_any")) ns[["dplyr_list_pany"]](x, size = size, error_call = call("if_any")) From 21023cb317db719c69c0faaa442cedbf863c001a Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Thu, 30 Oct 2025 08:46:02 -0400 Subject: [PATCH 9/9] Switch to a non snapshot based test --- tests/testthat/_snaps/across.md | 13 ------------- tests/testthat/test-across.R | 14 +++++++++++--- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/tests/testthat/_snaps/across.md b/tests/testthat/_snaps/across.md index 1cce076728..10f7eed676 100644 --- a/tests/testthat/_snaps/across.md +++ b/tests/testthat/_snaps/across.md @@ -295,19 +295,6 @@ Caused by error: ! Can't subset `.data` outside of a data mask context. -# expand_if_across() expands lambdas - - Code - quo_squash(expand_if_across(quo)) - Output - { - x <- list(cyl = cyl > 4, am = am > 4) - ns <- asNamespace("dplyr") - size <- ns[["dplyr_list_size_common"]](x, absent = 1L, call = call("if_any")) - x <- ns[["dplyr_list_recycle_common"]](x, size = size, call = call("if_any")) - ns[["dplyr_list_pany"]](x, size = size, error_call = call("if_any")) - } - # `across()` recycle `.fns` results to common size Code diff --git a/tests/testthat/test-across.R b/tests/testthat/test-across.R index 35bed76a41..abdaea3435 100644 --- a/tests/testthat/test-across.R +++ b/tests/testthat/test-across.R @@ -1246,9 +1246,17 @@ test_that("expand_if_across() expands lambdas", { by <- compute_by(by = NULL, data = mtcars, error_call = call("caller")) DataMask$new(mtcars, by, "mutate", call("caller")) - expect_snapshot({ - quo_squash(expand_if_across(quo)) - }) + quo <- expand_if_across(quo) + + # We just need to look for something we know we insert into the expression. + # `expect_snapshot()` doesn't seem to play nicely with covr on CI here, the + # expression captured seems to contain `covr:::count()` calls. + expect_true( + grepl( + "asNamespace", + paste0(expr_deparse(quo_squash(quo)), collapse = " ") + ) + ) }) test_that("rowwise() preserves list-cols iff no `.fns` (#5951, #6264)", {