diff --git a/NEWS.md b/NEWS.md index 8b14a183d6..b20ae582b3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # ggplot2 (development version) +* New `stat_boxplot(min.group.n)` argument, which skips drawing the box and + whiskers for small groups (@teunbrand based on code by @dicook, #6776) * The `arrow` and `arrow.fill` arguments are now available in `geom_linerange()` and `geom_pointrange()` layers (@teunbrand, #6481). * (internal) `zeroGrob()` now returns a `grid::nullGrob()` (#6390). diff --git a/R/geom-boxplot.R b/R/geom-boxplot.R index 69aad3046c..b0850a7fd3 100644 --- a/R/geom-boxplot.R +++ b/R/geom-boxplot.R @@ -269,8 +269,8 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom, out_max <- vapply(data$outliers, max, numeric(1)) }) - data$ymin_final <- pmin(out_min, data$ymin) - data$ymax_final <- pmax(out_max, data$ymax) + data$ymin_final <- pmin(out_min, data$ymin, na.rm = TRUE) + data$ymax_final <- pmax(out_max, data$ymax, na.rm = TRUE) } # if `varwidth` not requested or not available, don't use it @@ -304,6 +304,27 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom, )) } + outliers_grob <- NULL + if (!is.null(data$outliers) && length(data$outliers[[1]]) >= 1) { + outliers <- data_frame0( + y = data$outliers[[1]], + x = data$x[1], + colour = outlier_gp$colour %||% data$colour[1], + fill = outlier_gp$fill %||% data$fill[1], + shape = outlier_gp$shape %||% data$shape[1] %||% 19, + size = outlier_gp$size %||% data$size[1] %||% 1.5, + stroke = outlier_gp$stroke %||% data$stroke[1] %||% 0.5, + fill = NA, + alpha = outlier_gp$alpha %||% data$alpha[1], + .size = length(data$outliers[[1]]) + ) + outliers <- flip_data(outliers, flipped_aes) + outliers_grob <- GeomPoint$draw_panel(outliers, panel_params, coord) + if (is.na(data$middle[1]) && is.na(data$lower[1]) && is.na(data$upper[1])) { + return(ggname("geom_boxplot", grobTree(outliers_grob))) + } + } + common <- list(fill = fill_alpha(data$fill, data$alpha), group = data$group) whiskers <- data_frame0( @@ -331,26 +352,6 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom, ) box <- flip_data(box, flipped_aes) - if (!is.null(data$outliers) && length(data$outliers[[1]]) >= 1) { - outliers <- data_frame0( - y = data$outliers[[1]], - x = data$x[1], - colour = outlier_gp$colour %||% data$colour[1], - fill = outlier_gp$fill %||% data$fill[1], - shape = outlier_gp$shape %||% data$shape[1] %||% 19, - size = outlier_gp$size %||% data$size[1] %||% 1.5, - stroke = outlier_gp$stroke %||% data$stroke[1] %||% 0.5, - fill = NA, - alpha = outlier_gp$alpha %||% data$alpha[1], - .size = length(data$outliers[[1]]) - ) - outliers <- flip_data(outliers, flipped_aes) - - outliers_grob <- GeomPoint$draw_panel(outliers, panel_params, coord) - } else { - outliers_grob <- NULL - } - if (staplewidth != 0) { staples <- data_frame0( x = rep((data$xmin - data$x) * staplewidth + data$x, 2), diff --git a/R/stat-boxplot.R b/R/stat-boxplot.R index cfc137d4c3..f8c6e22446 100644 --- a/R/stat-boxplot.R +++ b/R/stat-boxplot.R @@ -40,6 +40,11 @@ StatBoxplot <- ggproto("StatBoxplot", Stat, } params$width <- params$width %||% (resolution(data$x %||% 0, discrete = TRUE) * 0.75) + check_number_whole( + params$min.group.n %||% 1L, + min = 1, allow_infinite = TRUE, + arg = "min.group.n" + ) if (!is_mapped_discrete(data$x) && is.double(data$x) && !has_groups(data) && any(data$x != data$x[1L])) { cli::cli_warn(c( @@ -53,7 +58,7 @@ StatBoxplot <- ggproto("StatBoxplot", Stat, extra_params = c("na.rm", "orientation"), - compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5, flipped_aes = FALSE) { + compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5, min.group.n = 1L, flipped_aes = FALSE) { data <- flip_data(data, flipped_aes) qs <- c(0, 0.25, 0.5, 0.75, 1) @@ -66,9 +71,14 @@ StatBoxplot <- ggproto("StatBoxplot", Stat, names(stats) <- c("ymin", "lower", "middle", "upper", "ymax") iqr <- diff(stats[c(2, 4)]) - outliers <- data$y < (stats[2] - coef * iqr) | data$y > (stats[4] + coef * iqr) - if (any(outliers)) { - stats[c(1, 5)] <- range(c(stats[2:4], data$y[!outliers]), na.rm = TRUE) + if (nrow(data) >= min.group.n) { + outliers <- data$y < (stats[2] - coef * iqr) | data$y > (stats[4] + coef * iqr) + if (any(outliers)) { + stats[c(1, 5)] <- range(c(stats[2:4], data$y[!outliers]), na.rm = TRUE) + } + } else { + stats[] <- NA + outliers <- rep(TRUE, nrow(data)) } if (length(data$width) > 0L) { width <- data$width[1L] @@ -99,6 +109,10 @@ StatBoxplot <- ggproto("StatBoxplot", Stat, #' @rdname geom_boxplot #' @param coef Length of the whiskers as multiple of IQR. Defaults to 1.5. +#' @param min.group.n An integer setting the minimum size of a group to draw +#' the box and whiskers. Groups with less observations will be displayed as +#' points styled like outliers without box and whiskers. The default (1) draws +#' box and whiskers for all groups. #' @inheritParams shared_layer_parameters #' @export #' @eval rd_computed_vars( diff --git a/man/geom_boxplot.Rd b/man/geom_boxplot.Rd index 5f9962f6a0..d0a5edc5a0 100644 --- a/man/geom_boxplot.Rd +++ b/man/geom_boxplot.Rd @@ -53,6 +53,7 @@ stat_boxplot( ..., orientation = NA, coef = 1.5, + min.group.n = 1L, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE @@ -180,6 +181,11 @@ overriding these connections, see how the \link[=layer_stats]{stat} and \link[=layer_geoms]{geom} arguments work.} \item{coef}{Length of the whiskers as multiple of IQR. Defaults to 1.5.} + +\item{min.group.n}{An integer setting the minimum size of a group to draw +the box and whiskers. Groups with less observations will be displayed as +points styled like outliers without box and whiskers. The default (1) draws +box and whiskers for all groups.} } \description{ The boxplot compactly displays the distribution of a continuous variable. diff --git a/tests/testthat/test-stat-boxplot.R b/tests/testthat/test-stat-boxplot.R index 7878a9eb34..40cf04e841 100644 --- a/tests/testthat/test-stat-boxplot.R +++ b/tests/testthat/test-stat-boxplot.R @@ -25,3 +25,12 @@ test_that("stat_boxplot errors with missing x/y aesthetics", { geom_boxplot() expect_snapshot_error(ggplot_build(p)) }) + +test_that("stat_boxplot respects the `min.group.n` setting", { + df <- data.frame(x = rep(c("A", "B"), c(3, 7)), y = c(1:10)) + ld <- layer_data( + ggplot(df, aes(x, y)) + geom_boxplot(min.group.n = 5) + ) + expect_equal(lengths(ld$outliers), c(3, 0)) + expect_equal(ld$middle, c(NA, 7)) +})