From 0e1e44682a5b169ae8a24adb0475d1912ecdab46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= <teemu.sailynoja@gmail.com>
Date: Tue, 29 Apr 2025 15:49:32 +0300
Subject: [PATCH 1/7] .ppc_calibration_overlay_data, and
 ppc_calibration_overlay(_grouped)

---
 R/ppc-calibration.R | 96 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 R/ppc-calibration.R

diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R
new file mode 100644
index 00000000..9a8ca7a1
--- /dev/null
+++ b/R/ppc-calibration.R
@@ -0,0 +1,96 @@
+#' PPC calibration
+#'
+#' Assess the calibration of the predictive distributions `yrep` in relation to
+#' the data `y'.
+#' See the **Plot Descriptions** section, below, for details.
+#'
+#' @name PPC-calibration
+#' @family PPCs
+#'
+#' @template args-y-rep
+#' @template args-group
+#'
+#' @template return-ggplot-or-data
+#'
+#' @section Plot Descriptions:
+#' \describe{
+#'   \item{`ppc_calibration_overlay()`,`ppc_calibration_overlay_grouped()`}{
+#'
+#'   }
+#' }
+#'
+NULL
+
+#' @rdname PPC-calibration
+#' @export
+.ppc_calibration_data <- function(y, prep, group = NULL) {
+  y <- validate_y(y)
+  n_obs <- length(y)
+  prep <- validate_predictions(prep, n_obs)
+  if (any(prep > 1 | prep < 0)) {
+    stop("Values of ´prep´ should be predictive probabilities between 0 and 1.")
+  }
+  if (!is.null(group)) {
+    group <- validate_group(group, n_obs)
+  } else {
+    group <- rep(1, n_obs * nrow(prep))
+  }
+
+  if (requireNamespace("monotone", quietly = TRUE)) {
+    monotone <- monotone::monotone
+  } else {
+    monotone <- function(y) {
+      stats::isoreg(y)$yf
+    }
+  }
+  .ppd_data(prep, group = group) %>%
+    group_by(group, rep_id) %>%
+    mutate(
+      ord = order(value),
+      value = value[ord],
+      cep = monotone(y[ord])
+    ) |>
+    ungroup()
+}
+
+#' @rdname PPC-calibration
+#' @export
+ppc_calibration_overlay <- function(
+    y, prep, ..., linewidth = 0.25, alpha = 0.7) {
+  check_ignored_arguments(...)
+  data <- .ppc_calibration_data(y, prep)
+  ggplot(data) +
+    geom_abline(color = "black", linetype = 2) +
+    geom_line(
+      aes(value, cep, group = rep_id, color = "yrep"),
+      linewidth = linewidth, alpha = alpha
+    ) +
+    scale_color_ppc() +
+    bayesplot_theme_get() +
+    legend_none() +
+    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
+    xlab("Predicted probability") +
+    ylab("Conditional event probability") +
+    NULL
+}
+
+#' @rdname PPC-calibration
+#' @export
+ppc_calibration_overlay_grouped <- function(
+    y, prep, group, ..., linewidth = 0.25, alpha = 0.7) {
+  check_ignored_arguments(...)
+  data <- .ppc_calibration_data(y, prep, group)
+  ggplot(data) +
+    geom_abline(color = "black", linetype = 2) +
+    geom_line(aes(value, cep, group = rep_id, color = "yrep"),
+      linewidth = linewidth, alpha = alpha
+    ) +
+    facet_wrap(vars(group)) +
+    scale_color_ppc() +
+    bayesplot_theme_get() +
+    legend_none() +
+    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
+    xlab("Predicted probability") +
+    ylab("Conditional event probability") +
+    NULL
+}

From d784405a547a2787ab0a50e13b01edfe72eb5140 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= <teemu.sailynoja@gmail.com>
Date: Mon, 19 May 2025 18:20:12 +0300
Subject: [PATCH 2/7] draft of ppc_calibration plots

---
 R/ppc-calibration.R | 170 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 138 insertions(+), 32 deletions(-)

diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R
index 9a8ca7a1..2786aa5c 100644
--- a/R/ppc-calibration.R
+++ b/R/ppc-calibration.R
@@ -1,4 +1,4 @@
-#' PPC calibration
+# x' PPC calibration
 #'
 #' Assess the calibration of the predictive distributions `yrep` in relation to
 #' the data `y'.
@@ -14,49 +14,119 @@
 #'
 #' @section Plot Descriptions:
 #' \describe{
+#'   \item{`ppc_calibration()`,`ppc_calibration_grouped()`}{
+#'
+#'   },
 #'   \item{`ppc_calibration_overlay()`,`ppc_calibration_overlay_grouped()`}{
 #'
+#'   },
+#'   \item{`ppc_loo_calibration()`,`ppc_loo_calibration_grouped()`}{
+#'
 #'   }
 #' }
 #'
 NULL
 
+
 #' @rdname PPC-calibration
 #' @export
-.ppc_calibration_data <- function(y, prep, group = NULL) {
-  y <- validate_y(y)
-  n_obs <- length(y)
-  prep <- validate_predictions(prep, n_obs)
-  if (any(prep > 1 | prep < 0)) {
-    stop("Values of ´prep´ should be predictive probabilities between 0 and 1.")
-  }
-  if (!is.null(group)) {
-    group <- validate_group(group, n_obs)
-  } else {
-    group <- rep(1, n_obs * nrow(prep))
-  }
+ppc_calibration_overlay <- function(
+    y, prep, ..., linewidth = 0.25, alpha = 0.5) {
+  check_ignored_arguments(...)
+  data <- .ppc_calibration_data(y, prep)
+  ggplot(data) +
+    geom_abline(color = "black", linetype = 2) +
+    geom_line(
+      aes(value, cep, group = rep_id, color = "yrep"),
+      linewidth = linewidth, alpha = alpha
+    ) +
+    scale_color_ppc() +
+    bayesplot_theme_get() +
+    legend_none() +
+    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
+    xlab("Predicted probability") +
+    ylab("Conditional event probability") +
+    NULL
+}
 
-  if (requireNamespace("monotone", quietly = TRUE)) {
-    monotone <- monotone::monotone
-  } else {
-    monotone <- function(y) {
-      stats::isoreg(y)$yf
-    }
-  }
-  .ppd_data(prep, group = group) %>%
-    group_by(group, rep_id) %>%
-    mutate(
-      ord = order(value),
-      value = value[ord],
-      cep = monotone(y[ord])
-    ) |>
-    ungroup()
+#' @rdname PPC-calibration
+#' @export
+ppc_calibration_overlay_grouped <- function(
+    y, prep, group, ..., linewidth = 0.25, alpha = 0.7) {
+  check_ignored_arguments(...)
+  data <- .ppc_calibration_data(y, prep, group)
+  ggplot(data) +
+    geom_abline(color = "black", linetype = 2) +
+    geom_line(aes(value, cep, group = rep_id, color = "yrep"),
+      linewidth = linewidth, alpha = alpha
+    ) +
+    facet_wrap(vars(group)) +
+    scale_color_ppc() +
+    bayesplot_theme_get() +
+    legend_none() +
+    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
+    xlab("Predicted probability") +
+    ylab("Conditional event probability") +
+    NULL
 }
 
 #' @rdname PPC-calibration
 #' @export
-ppc_calibration_overlay <- function(
-    y, prep, ..., linewidth = 0.25, alpha = 0.7) {
+ppc_calibration <- function(
+    y, prep, prob = .95, show_mean = TRUE, ..., linewidth = 0.25, alpha = 0.7) {
+  check_ignored_arguments(...)
+  data <- .ppc_calibration_data(y, prep) %>%
+    group_by(y_id) %>%
+    summarise(
+      value = median(value),
+      lb = quantile(cep, .5 - .5 * prob),
+      ub = quantile(cep, .5 + .5 * prob),
+      cep = median(cep)
+    )
+
+  ggplot(data) +
+    aes(value, cep) +
+    geom_abline(color = "black", linetype = 2) +
+    geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) +
+    geom_line(
+      aes(color = "y"),
+      linewidth = linewidth
+    ) +
+    scale_color_ppc() +
+    scale_fill_ppc() +
+    bayesplot_theme_get() +
+    # legend_none() +
+    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
+    xlab("Predicted probability") +
+    ylab("Conditional event probability") +
+    NULL
+}
+
+#' @rdname PPC-calibration
+#' @export
+ppc_calibration_grouped <- function(
+    y, prep, show_mean, ..., linewidth = 0.25, alpha = 0.7) {
+  check_ignored_arguments(...)
+  data <- .ppc_calibration_data(y, prep, group)
+  ggplot(data) +
+    geom_abline(color = "black", linetype = 2) +
+    geom_line(aes(value, cep, group = rep_id, color = "yrep"),
+      linewidth = linewidth, alpha = alpha
+    ) +
+    facet_wrap(vars(group)) +
+    scale_color_ppc() +
+    bayesplot_theme_get() +
+    legend_none() +
+    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
+    xlab("Predicted probability") +
+    ylab("Conditional event probability") +
+    NULL
+}
+
+#' @rdname PPC-calibration
+#' @export
+ppc_loo_calibration <- function(
+    y, prep, lw, ..., linewidth = 0.25, alpha = 0.7) {
   check_ignored_arguments(...)
   data <- .ppc_calibration_data(y, prep)
   ggplot(data) +
@@ -76,8 +146,8 @@ ppc_calibration_overlay <- function(
 
 #' @rdname PPC-calibration
 #' @export
-ppc_calibration_overlay_grouped <- function(
-    y, prep, group, ..., linewidth = 0.25, alpha = 0.7) {
+ppc_loo_calibration_grouped <- function(
+    y, prep, lw, ..., linewidth = 0.25, alpha = 0.7) {
   check_ignored_arguments(...)
   data <- .ppc_calibration_data(y, prep, group)
   ggplot(data) +
@@ -94,3 +164,39 @@ ppc_calibration_overlay_grouped <- function(
     ylab("Conditional event probability") +
     NULL
 }
+
+.ppc_calibration_data <- function(y, prep, group = NULL) {
+  y <- validate_y(y)
+  n_obs <- length(y)
+  prep <- validate_predictions(prep, n_obs)
+  if (any(prep > 1 | prep < 0)) {
+    stop("Values of ´prep´ should be predictive probabilities between 0 and 1.")
+  }
+  if (!is.null(group)) {
+    group <- validate_group(group, n_obs)
+  } else {
+    group <- rep(1, n_obs * nrow(prep))
+  }
+
+  if (requireNamespace("monotone", quietly = TRUE)) {
+    monotone <- monotone::monotone
+  } else {
+    monotone <- function(y) {
+      stats::isoreg(y)$yf
+    }
+  }
+  .ppd_data(prep, group = group) %>%
+    group_by(group, rep_id) %>%
+    mutate(
+      ord = order(value),
+      value = value[ord],
+      cep = monotone(y[ord])
+    ) |>
+    ungroup()
+}
+
+.loo_resample_data <- function(prep, lw, psis_object) {
+  lw <- .get_lw(lw, psis_object)
+  stopifnot(identical(dim(prep), dim(lw)))
+  # Work in progress here...
+}

From 5cf62f96847440ebad0bcff59a23a718da5353af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= <teemu.sailynoja@gmail.com>
Date: Thu, 22 May 2025 14:59:14 +0300
Subject: [PATCH 3/7] Add example for ppc_calibration_overlay()

---
 R/ppc-calibration.R | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R
index 2786aa5c..e706b2f9 100644
--- a/R/ppc-calibration.R
+++ b/R/ppc-calibration.R
@@ -25,6 +25,16 @@
 #'   }
 #' }
 #'
+#' @examples
+#' color_scheme_set("brightblue")
+#'
+#' # Make an example dataset of binary observations
+#' ymin <- range(example_y_data(), example_yrep_draws())[1]
+#' ymax <- range(example_y_data(), example_yrep_draws())[2]
+#' y <- rbinom(length(example_y_data()), 1, (example_y_data() - ymin) / (ymax - ymin))
+#' prep <- (example_yrep_draws() - ymin) / (ymax - ymin)
+#'
+#' ppc_calibration_overlay(y, prep[1:50, ])
 NULL
 
 
@@ -73,7 +83,7 @@ ppc_calibration_overlay_grouped <- function(
 #' @rdname PPC-calibration
 #' @export
 ppc_calibration <- function(
-    y, prep, prob = .95, show_mean = TRUE, ..., linewidth = 0.25, alpha = 0.7) {
+    y, prep, prob = .95, show_mean = TRUE, ..., linewidth = 0.5, alpha = 0.7) {
   check_ignored_arguments(...)
   data <- .ppc_calibration_data(y, prep) %>%
     group_by(y_id) %>%
@@ -95,7 +105,7 @@ ppc_calibration <- function(
     scale_color_ppc() +
     scale_fill_ppc() +
     bayesplot_theme_get() +
-    # legend_none() +
+    legend_none() +
     coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
     xlab("Predicted probability") +
     ylab("Conditional event probability") +
@@ -105,9 +115,17 @@ ppc_calibration <- function(
 #' @rdname PPC-calibration
 #' @export
 ppc_calibration_grouped <- function(
-    y, prep, show_mean, ..., linewidth = 0.25, alpha = 0.7) {
+    y, prep, group, show_mean, ..., linewidth = 0.25, alpha = 0.7) {
   check_ignored_arguments(...)
-  data <- .ppc_calibration_data(y, prep, group)
+  data <- .ppc_calibration_data(y, prep) %>%
+    group_by(y_id) %>%
+    summarise(
+      value = median(value),
+      lb = quantile(cep, .5 - .5 * prob),
+      ub = quantile(cep, .5 + .5 * prob),
+      cep = median(cep)
+    )
+
   ggplot(data) +
     geom_abline(color = "black", linetype = 2) +
     geom_line(aes(value, cep, group = rep_id, color = "yrep"),
@@ -147,7 +165,7 @@ ppc_loo_calibration <- function(
 #' @rdname PPC-calibration
 #' @export
 ppc_loo_calibration_grouped <- function(
-    y, prep, lw, ..., linewidth = 0.25, alpha = 0.7) {
+    y, prep, group, lw, ..., linewidth = 0.25, alpha = 0.7) {
   check_ignored_arguments(...)
   data <- .ppc_calibration_data(y, prep, group)
   ggplot(data) +

From 01ac82635561c7dc721a4315db89f76c39a826b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= <teemu.sailynoja@gmail.com>
Date: Thu, 22 May 2025 15:07:14 +0300
Subject: [PATCH 4/7] Fix ppc_calibration_grouped()

---
 R/ppc-calibration.R | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R
index e706b2f9..a3d4af42 100644
--- a/R/ppc-calibration.R
+++ b/R/ppc-calibration.R
@@ -115,10 +115,10 @@ ppc_calibration <- function(
 #' @rdname PPC-calibration
 #' @export
 ppc_calibration_grouped <- function(
-    y, prep, group, show_mean, ..., linewidth = 0.25, alpha = 0.7) {
+    y, prep, group, prob = .95, show_mean = TRUE, ..., linewidth = 0.5, alpha = 0.7) {
   check_ignored_arguments(...)
-  data <- .ppc_calibration_data(y, prep) %>%
-    group_by(y_id) %>%
+  data <- .ppc_calibration_data(y, prep, group) %>%
+    group_by(group, y_id) %>%
     summarise(
       value = median(value),
       lb = quantile(cep, .5 - .5 * prob),
@@ -127,12 +127,16 @@ ppc_calibration_grouped <- function(
     )
 
   ggplot(data) +
+    aes(value, cep) +
     geom_abline(color = "black", linetype = 2) +
-    geom_line(aes(value, cep, group = rep_id, color = "yrep"),
-      linewidth = linewidth, alpha = alpha
+    geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) +
+    geom_line(
+      aes(color = "y"),
+      linewidth = linewidth
     ) +
     facet_wrap(vars(group)) +
     scale_color_ppc() +
+    scale_fill_ppc() +
     bayesplot_theme_get() +
     legend_none() +
     coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +

From f9806eb8e2401dde2858ddec974374fb2684fd34 Mon Sep 17 00:00:00 2001
From: jgabry <jgabry@gmail.com>
Date: Thu, 22 May 2025 16:16:21 -0600
Subject: [PATCH 5/7] fix typo preventing building doc

---
 R/ppc-calibration.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R
index a3d4af42..c26d1202 100644
--- a/R/ppc-calibration.R
+++ b/R/ppc-calibration.R
@@ -7,7 +7,7 @@
 #' @name PPC-calibration
 #' @family PPCs
 #'
-#' @template args-y-rep
+#' @template args-y-yrep
 #' @template args-group
 #'
 #' @template return-ggplot-or-data

From 5efdf479aee2467636f7bfb753e282f35f0be4f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= <teemu.sailynoja@gmail.com>
Date: Thu, 3 Jul 2025 12:03:19 +0300
Subject: [PATCH 6/7] Add ppc_calibration plots to namespace and docs.

---
 NAMESPACE                  |   6 ++
 man/PPC-calibration.Rd     | 115 +++++++++++++++++++++++++++++++++++++
 man/PPC-censoring.Rd       |   1 +
 man/PPC-discrete.Rd        |   1 +
 man/PPC-distributions.Rd   |   1 +
 man/PPC-errors.Rd          |   1 +
 man/PPC-intervals.Rd       |   1 +
 man/PPC-loo.Rd             |   1 +
 man/PPC-overview.Rd        |   1 +
 man/PPC-scatterplots.Rd    |   1 +
 man/PPC-test-statistics.Rd |   1 +
 11 files changed, 130 insertions(+)
 create mode 100644 man/PPC-calibration.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 614fe4e7..1bb79ec5 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -119,6 +119,10 @@ export(ppc_bars)
 export(ppc_bars_data)
 export(ppc_bars_grouped)
 export(ppc_boxplot)
+export(ppc_calibration)
+export(ppc_calibration_grouped)
+export(ppc_calibration_overlay)
+export(ppc_calibration_overlay_grouped)
 export(ppc_data)
 export(ppc_dens)
 export(ppc_dens_overlay)
@@ -141,6 +145,8 @@ export(ppc_intervals_data)
 export(ppc_intervals_grouped)
 export(ppc_km_overlay)
 export(ppc_km_overlay_grouped)
+export(ppc_loo_calibration)
+export(ppc_loo_calibration_grouped)
 export(ppc_loo_intervals)
 export(ppc_loo_pit)
 export(ppc_loo_pit_data)
diff --git a/man/PPC-calibration.Rd b/man/PPC-calibration.Rd
new file mode 100644
index 00000000..fbd9a0c3
--- /dev/null
+++ b/man/PPC-calibration.Rd
@@ -0,0 +1,115 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ppc-calibration.R
+\name{PPC-calibration}
+\alias{PPC-calibration}
+\alias{ppc_calibration_overlay}
+\alias{ppc_calibration_overlay_grouped}
+\alias{ppc_calibration}
+\alias{ppc_calibration_grouped}
+\alias{ppc_loo_calibration}
+\alias{ppc_loo_calibration_grouped}
+\title{Assess the calibration of the predictive distributions \code{yrep} in relation to
+the data `y'.
+See the \strong{Plot Descriptions} section, below, for details.}
+\usage{
+ppc_calibration_overlay(y, prep, ..., linewidth = 0.25, alpha = 0.5)
+
+ppc_calibration_overlay_grouped(
+  y,
+  prep,
+  group,
+  ...,
+  linewidth = 0.25,
+  alpha = 0.7
+)
+
+ppc_calibration(y, prep, prob = 0.95, ..., linewidth = 0.5, alpha = 0.7)
+
+ppc_calibration_grouped(
+  y,
+  prep,
+  group,
+  prob = 0.95,
+  ...,
+  linewidth = 0.5,
+  alpha = 0.7
+)
+
+ppc_loo_calibration(y, yrep, lw, ..., linewidth = 0.25, alpha = 0.7)
+
+ppc_loo_calibration_grouped(
+  y,
+  yrep,
+  group,
+  lw,
+  ...,
+  linewidth = 0.25,
+  alpha = 0.7
+)
+}
+\arguments{
+\item{y}{A vector of observations. See \strong{Details}.}
+
+\item{group}{A grouping variable of the same length as \code{y}.
+Will be coerced to \link[base:factor]{factor} if not already a factor.
+Each value in \code{group} is interpreted as the group level pertaining
+to the corresponding observation.}
+
+\item{yrep}{An \code{S} by \code{N} matrix of draws from the posterior (or prior)
+predictive distribution. The number of rows, \code{S}, is the size of the
+posterior (or prior) sample used to generate \code{yrep}. The number of columns,
+\code{N} is the number of predicted observations (\code{length(y)}). The columns of
+\code{yrep} should be in the same order as the data points in \code{y} for the plots
+to make sense. See the \strong{Details} and \strong{Plot Descriptions} sections for
+additional advice specific to particular plots.}
+}
+\value{
+The plotting functions return a ggplot object that can be further
+customized using the \strong{ggplot2} package. The functions with suffix
+\verb{_data()} return the data that would have been drawn by the plotting
+function.
+}
+\description{
+Assess the calibration of the predictive distributions \code{yrep} in relation to
+the data `y'.
+See the \strong{Plot Descriptions} section, below, for details.
+}
+\section{Plot Descriptions}{
+
+\describe{
+\item{\code{ppc_calibration()},\code{ppc_calibration_grouped()}}{
+
+},
+\item{\code{ppc_calibration_overlay()},\code{ppc_calibration_overlay_grouped()}}{
+
+},
+\item{\code{ppc_loo_calibration()},\code{ppc_loo_calibration_grouped()}}{
+
+}
+}
+}
+
+\examples{
+color_scheme_set("brightblue")
+
+# Make an example dataset of binary observations
+ymin <- range(example_y_data(), example_yrep_draws())[1]
+ymax <- range(example_y_data(), example_yrep_draws())[2]
+y <- rbinom(length(example_y_data()), 1, (example_y_data() - ymin) / (ymax - ymin))
+prep <- (example_yrep_draws() - ymin) / (ymax - ymin)
+
+ppc_calibration_overlay(y, prep[1:50, ])
+}
+\seealso{
+Other PPCs: 
+\code{\link{PPC-censoring}},
+\code{\link{PPC-discrete}},
+\code{\link{PPC-distributions}},
+\code{\link{PPC-errors}},
+\code{\link{PPC-intervals}},
+\code{\link{PPC-loo}},
+\code{\link{PPC-overview}},
+\code{\link{PPC-scatterplots}},
+\code{\link{PPC-test-statistics}}
+}
+\concept{PPCs}
diff --git a/man/PPC-censoring.Rd b/man/PPC-censoring.Rd
index 736a15a9..39028ee3 100644
--- a/man/PPC-censoring.Rd
+++ b/man/PPC-censoring.Rd
@@ -132,6 +132,7 @@ doi:10.1080/01621459.1958.10501452.
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
 \code{\link{PPC-errors}},
diff --git a/man/PPC-discrete.Rd b/man/PPC-discrete.Rd
index 434ba7bd..4f2fcabd 100644
--- a/man/PPC-discrete.Rd
+++ b/man/PPC-discrete.Rd
@@ -216,6 +216,7 @@ Visualizing count data regressions using rootograms.
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-distributions}},
 \code{\link{PPC-errors}},
diff --git a/man/PPC-distributions.Rd b/man/PPC-distributions.Rd
index b178dc08..864d6afd 100644
--- a/man/PPC-distributions.Rd
+++ b/man/PPC-distributions.Rd
@@ -372,6 +372,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-errors}},
diff --git a/man/PPC-errors.Rd b/man/PPC-errors.Rd
index 88610f8b..92585f56 100644
--- a/man/PPC-errors.Rd
+++ b/man/PPC-errors.Rd
@@ -213,6 +213,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/man/PPC-intervals.Rd b/man/PPC-intervals.Rd
index ddac2ebc..369ab029 100644
--- a/man/PPC-intervals.Rd
+++ b/man/PPC-intervals.Rd
@@ -240,6 +240,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/man/PPC-loo.Rd b/man/PPC-loo.Rd
index 10f29d8c..3113fab8 100644
--- a/man/PPC-loo.Rd
+++ b/man/PPC-loo.Rd
@@ -349,6 +349,7 @@ https://www.jstor.org/stable/2986005.
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/man/PPC-overview.Rd b/man/PPC-overview.Rd
index 16d3f0ff..f58465d2 100644
--- a/man/PPC-overview.Rd
+++ b/man/PPC-overview.Rd
@@ -124,6 +124,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/man/PPC-scatterplots.Rd b/man/PPC-scatterplots.Rd
index 64963c40..9e714236 100644
--- a/man/PPC-scatterplots.Rd
+++ b/man/PPC-scatterplots.Rd
@@ -137,6 +137,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/man/PPC-test-statistics.Rd b/man/PPC-test-statistics.Rd
index 83883979..c9368794 100644
--- a/man/PPC-test-statistics.Rd
+++ b/man/PPC-test-statistics.Rd
@@ -213,6 +213,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},

From a8b4264bd35eba2404e13e3bcfdb5807fa5724a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= <teemu.sailynoja@gmail.com>
Date: Tue, 15 Jul 2025 09:45:35 +0300
Subject: [PATCH 7/7] Sync process. WIP. ISSUE: ppc_calibratrion loses the
 posterior mean.

---
 R/ppc-calibration.R                   | 307 ++++++++++++++++++++------
 tests/testthat/test-ppc-calibration.R | 257 +++++++++++++++++++++
 2 files changed, 494 insertions(+), 70 deletions(-)
 create mode 100644 tests/testthat/test-ppc-calibration.R

diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R
index c26d1202..0d42d15f 100644
--- a/R/ppc-calibration.R
+++ b/R/ppc-calibration.R
@@ -1,7 +1,7 @@
-# x' PPC calibration
+#' PPC calibration
 #'
-#' Assess the calibration of the predictive distributions `yrep` in relation to
-#' the data `y'.
+#' Assess the calibration of the predictions, or predictive probabilites in relation to
+#' binary observations.
 #' See the **Plot Descriptions** section, below, for details.
 #'
 #' @name PPC-calibration
@@ -9,19 +9,29 @@
 #'
 #' @template args-y-yrep
 #' @template args-group
+#' @param interval_type For `ppc_calibration()`, `ppc_calibration_grouped()`,
+#'   'ppc_loo_calibration()', and ´ppc_loo_calibration_grouped()´, the type of
+#'    interval to compute. Options are '"consistency"' (default) for credible
+#'    intervals for the PAV-adjusted calibration curve of posterior predictive
+#'    sample, or `"confidence"` for the credible intervals of the calibration
+#'    curve of the observed binary events.
 #'
 #' @template return-ggplot-or-data
 #'
 #' @section Plot Descriptions:
 #' \describe{
 #'   \item{`ppc_calibration()`,`ppc_calibration_grouped()`}{
-#'
-#'   },
+#'   PAV-adjusted calibration plots showing the relationship between the
+#'   predicted event probabilities and the conditional event probabilities.
+#'   The `interval_type` parameter controls whether confidence intervals, or
+#'   consistency intervals are computed.},
 #'   \item{`ppc_calibration_overlay()`,`ppc_calibration_overlay_grouped()`}{
-#'
+#'   Overlay plots showing posterior samples of PAV-adjusted calibration
+#'   curves.
 #'   },
 #'   \item{`ppc_loo_calibration()`,`ppc_loo_calibration_grouped()`}{
-#'
+#'   PAV-adjusted calibration plots to assess the calibration of the
+#'   leave-one-out (LOO) predictive probabilities.
 #'   }
 #' }
 #'
@@ -35,6 +45,10 @@
 #' prep <- (example_yrep_draws() - ymin) / (ymax - ymin)
 #'
 #' ppc_calibration_overlay(y, prep[1:50, ])
+#'
+#' # Compare confidence vs consistency intervals
+#' ppc_calibration(y, prep, interval_type = "confidence")
+#' ppc_calibration(y, prep, interval_type = "consistency")
 NULL
 
 
@@ -64,7 +78,7 @@ ppc_calibration_overlay <- function(
 ppc_calibration_overlay_grouped <- function(
     y, prep, group, ..., linewidth = 0.25, alpha = 0.7) {
   check_ignored_arguments(...)
-  data <- .ppc_calibration_data(y, prep, group)
+  data <- .ppc_calibration_data(y, prep = prep, group = group)
   ggplot(data) +
     geom_abline(color = "black", linetype = 2) +
     geom_line(aes(value, cep, group = rep_id, color = "yrep"),
@@ -83,15 +97,17 @@ ppc_calibration_overlay_grouped <- function(
 #' @rdname PPC-calibration
 #' @export
 ppc_calibration <- function(
-    y, prep, prob = .95, show_mean = TRUE, ..., linewidth = 0.5, alpha = 0.7) {
+    y, prep = NULL, yrep = NULL, prob = .95, interval_type = c("confidence", "consistency"), ...,
+    linewidth = 0.5, alpha = 0.7) {
   check_ignored_arguments(...)
-  data <- .ppc_calibration_data(y, prep) %>%
-    group_by(y_id) %>%
+  interval_type <- match.arg(interval_type)
+  data <- .ppc_calibration_data(y, prep, yrep, NULL, interval_type) %>%
+    group_by(idx) %>%
     summarise(
-      value = median(value),
-      lb = quantile(cep, .5 - .5 * prob),
-      ub = quantile(cep, .5 + .5 * prob),
-      cep = median(cep)
+      value = mean(value),
+      lb = quantile(cep_intervals, .5 - .5 * prob),
+      ub = quantile(cep_intervals, .5 + .5 * prob),
+      cep = mean(cep)
     )
 
   ggplot(data) +
@@ -115,16 +131,26 @@ ppc_calibration <- function(
 #' @rdname PPC-calibration
 #' @export
 ppc_calibration_grouped <- function(
-    y, prep, group, prob = .95, show_mean = TRUE, ..., linewidth = 0.5, alpha = 0.7) {
+    y,
+    prep = NULL,
+    yrep = NULL,
+    group,
+    prob = .95,
+    interval_type = c("confidence", "consistency"),
+    ...,
+    linewidth = 0.5,
+    alpha = 0.7) {
   check_ignored_arguments(...)
-  data <- .ppc_calibration_data(y, prep, group) %>%
-    group_by(group, y_id) %>%
+  interval_type <- match.arg(interval_type)
+  data <- .ppc_calibration_data(y, prep, yrep, group, interval_type) %>%
+    group_by(group, idx) %>%
     summarise(
-      value = median(value),
-      lb = quantile(cep, .5 - .5 * prob),
-      ub = quantile(cep, .5 + .5 * prob),
-      cep = median(cep)
-    )
+      value = mean(value),
+      lb = quantile(cep_intervals, .5 - .5 * prob),
+      ub = quantile(cep_intervals, .5 + .5 * prob),
+      cep = mean(cep),
+    ) %>%
+    ungroup()
 
   ggplot(data) +
     aes(value, cep) +
@@ -148,58 +174,178 @@ ppc_calibration_grouped <- function(
 #' @rdname PPC-calibration
 #' @export
 ppc_loo_calibration <- function(
-    y, prep, lw, ..., linewidth = 0.25, alpha = 0.7) {
+    y, yrep, lw, prob = .95, interval_type = c("confidence", "consistency"), ..., linewidth = 0.5, alpha = 0.7) {
   check_ignored_arguments(...)
-  data <- .ppc_calibration_data(y, prep)
-  ggplot(data) +
-    geom_abline(color = "black", linetype = 2) +
-    geom_line(
-      aes(value, cep, group = rep_id, color = "yrep"),
-      linewidth = linewidth, alpha = alpha
-    ) +
-    scale_color_ppc() +
-    bayesplot_theme_get() +
-    legend_none() +
-    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
-    xlab("Predicted probability") +
-    ylab("Conditional event probability") +
-    NULL
+  # Create LOO-predictive samples using resampling
+  yrep_resampled <- .loo_resample_data(yrep, lw, psis_object = NULL)
+  ppc_calibration(y, yrep = yrep_resampled, prob = prob, interval_type = interval_type, ..., linewidth = linewidth, alpha = alpha)
 }
 
 #' @rdname PPC-calibration
 #' @export
 ppc_loo_calibration_grouped <- function(
-    y, prep, group, lw, ..., linewidth = 0.25, alpha = 0.7) {
+    y,
+    yrep,
+    group,
+    lw,
+    prob = .95,
+    interval_type = c("confidence", "consistency"),
+    ...,
+    linewidth = 0.5,
+    alpha = 0.7) {
   check_ignored_arguments(...)
-  data <- .ppc_calibration_data(y, prep, group)
-  ggplot(data) +
-    geom_abline(color = "black", linetype = 2) +
-    geom_line(aes(value, cep, group = rep_id, color = "yrep"),
-      linewidth = linewidth, alpha = alpha
-    ) +
-    facet_wrap(vars(group)) +
-    scale_color_ppc() +
-    bayesplot_theme_get() +
-    legend_none() +
-    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
-    xlab("Predicted probability") +
-    ylab("Conditional event probability") +
-    NULL
+  # Create LOO-predictive samples using resampling
+  yrep_resampled <- .loo_resample_data(yrep, lw, psis_object = NULL)
+  ppc_calibration_grouped(
+    y,
+    yrep = yrep_resampled,
+    group = group,
+    prob = prob,
+    interval_type = interval_type,
+    ...,
+    linewidth = linewidth,
+    alpha = alpha
+  )
 }
 
-.ppc_calibration_data <- function(y, prep, group = NULL) {
+.ppc_calibration_data <- function(
+    y,
+    prep = NULL,
+    yrep = NULL,
+    group = NULL,
+    interval_type = c("confidence", "consistency")) {
   y <- validate_y(y)
   n_obs <- length(y)
-  prep <- validate_predictions(prep, n_obs)
-  if (any(prep > 1 | prep < 0)) {
-    stop("Values of ´prep´ should be predictive probabilities between 0 and 1.")
+  interval_type <- match.arg(interval_type)
+
+  # Determine if we're using prep (probabilities) or yrep (predictive samples)
+  if (!is.null(prep)) {
+    predictions <- validate_predictions(prep, n_obs)
+    if (any(prep > 1 | prep < 0)) {
+      stop(
+        "Values of ´prep´ should be predictive probabilities between 0 and 1."
+      )
+    }
+    is_probability <- TRUE
+  } else if (!is.null(yrep)) {
+    predictions <- validate_predictions(yrep, n_obs)
+    is_probability <- FALSE
+  } else {
+    stop("Either 'prep' or 'yrep' must be provided.")
   }
+
   if (!is.null(group)) {
     group <- validate_group(group, n_obs)
   } else {
-    group <- rep(1, n_obs * nrow(prep))
+    group <- rep(1, n_obs)
+  }
+
+  data <- .ppd_data(predictions, group = group)
+
+  if (interval_type == "confidence") {
+    if (is_probability) {
+      # confidence interval from predicted probabilities:
+      # cep = cep_intervals = monotone(y[order(ppred)])
+      # i.e. posterior of the calibration curve trajectory
+      # data %>%
+      #   group_by(group, rep_id) %>%
+      #   mutate(
+      #     ord = order(value),
+      #     value = value[ord],
+      #     cep_intervals = .monotone(y[ord]),
+      #     cep = cep_intervals,
+      #     idx = seq_len(n())
+      #   ) %>%
+       data %>%
+        group_by(group) %>%
+        group_modify(.f = pava_transform(.x, .y, y, NULL)) %>%
+        ungroup() %>%
+        select(value, cep_intervals, cep, idx, group)
+    } else {
+      ppred <- colMeans(predictions)
+      data %>%
+        group_by(group, rep_id) %>%
+        mutate(
+          idx_boot = sample(y_id, n(), replace = TRUE),
+          ord = order(ppred[idx_boot]),
+          value = ppred[idx_boot][ord],
+          cep_intervals = .monotone(y[idx_boot][ord]),
+          cep = cep_intervals,
+          idx = seq_len(n()),
+        ) %>%
+        ungroup() %>%
+        select(value, cep_intervals, cep, idx, group)
+    }
+  } else {
+    # Consistency intervals
+    if (is_probability) {
+      # For prep (probabilities), generate predictive samples from binomial
+      data %>%
+        group_by(group, rep_id) %>%
+        mutate(
+          # Generate predictive samples from binomial distribution
+          ord = order(value),
+          value = value[ord],
+          cep_intervals = .monotone(rbinom(n(), 1, value)),
+          cep = .monotone(y[ord]),
+          idx = seq_len(n())
+        ) %>%
+        ungroup() %>%
+        select(value, cep, cep_intervals, idx, group)
+    } else {
+      # For yrep (predictive samples), use column averages for ordering
+      ppred <- colMeans(predictions)
+      data %>%
+        group_by(group, rep_id) %>%
+        mutate(
+          # Use column averages for ordering when yrep is provided
+          ord = order(ppred[y_id]),
+          cep_intervals = .monotone(value[ord]),
+          value = ppred[y_id][ord],
+          cep = .monotone(y[y_id][ord]),
+          idx = seq_len(n()),
+        ) %>%
+        ungroup() %>%
+        select(value, cep_intervals, cep, idx, group)
+    }
+  }
+}
+
+.loo_resample_data <- function(yrep, lw, psis_object) {
+  lw <- .get_lw(lw, psis_object)
+  stopifnot(identical(dim(yrep), dim(lw)))
+
+  # Resample each column (observation) with its corresponding weights
+  n_obs <- ncol(yrep)
+  n_draws <- nrow(yrep)
+
+  # Initialize resampled matrix
+  yrep_resampled <- matrix(NA, nrow = n_draws, ncol = n_obs)
+
+  for (i in 1:n_obs) {
+    # Create draws object for this observation
+    draws_i <- posterior::as_draws_matrix(yrep[, i, drop = FALSE])
+
+    # Resample using the weights for this observation
+    weights_i <- lw[, i]
+    resampled_i <- posterior::resample_draws(
+      draws_i,
+      weights = weights_i, ndraws = n_draws
+    )
+
+    # Extract the resampled values
+    yrep_resampled[, i] <- as.numeric(resampled_i)
+  }
+
+  # Add observation names if available
+  if (!is.null(colnames(yrep))) {
+    colnames(yrep_resampled) <- colnames(yrep)
   }
 
+  yrep_resampled
+}
+
+.monotone <- function(y) {
   if (requireNamespace("monotone", quietly = TRUE)) {
     monotone <- monotone::monotone
   } else {
@@ -207,18 +353,39 @@ ppc_loo_calibration_grouped <- function(
       stats::isoreg(y)$yf
     }
   }
-  .ppd_data(prep, group = group) %>%
-    group_by(group, rep_id) %>%
-    mutate(
-      ord = order(value),
-      value = value[ord],
-      cep = monotone(y[ord])
-    ) |>
-    ungroup()
+  monotone(y)
 }
 
-.loo_resample_data <- function(prep, lw, psis_object) {
-  lw <- .get_lw(lw, psis_object)
-  stopifnot(identical(dim(prep), dim(lw)))
-  # Work in progress here...
+pava_transform <- function(.x, .y, y, yrep, interval_type) {
+  if (no_prob) {
+    probs <- .x %>%
+      group_by(y_id) %>%
+      summarise(p = mean(yrep)) %>%
+      arrange(y_id) %>%
+      pull(p)
+    ord <- order(probs)
+    data <- .x |>
+      group_by(rep_id) |>
+      mutate(ord = ord) |>
+      ungroup()
+  } else {
+    data <- data |>
+      group_by(rep_id) |>
+      mutate(ord = order(value)) |>
+      ungroup()
+  }
+  if (interval_type == "confidence") {
+    data %>%
+      group_by(rep_id) %>%
+      mutate(
+        cep_intervals = .monotone(y[y_id][ord_v]),
+        value = ifelse(is.null(yrep), value[ord_v], probs[ord],
+        cep = cep_interval,
+        idx = seq_len(n()),
+      ) %>%
+      ungroup() %>%
+      select(value, cep_intervals, cep, idx, rep_id, y_id)
+  } else {
+    data 
+  }
 }
diff --git a/tests/testthat/test-ppc-calibration.R b/tests/testthat/test-ppc-calibration.R
new file mode 100644
index 00000000..1858ace5
--- /dev/null
+++ b/tests/testthat/test-ppc-calibration.R
@@ -0,0 +1,257 @@
+library(bayesplot)
+context("PPC: calibration")
+
+if (!exists("expect_gg")) expect_gg <- bayesplot:::expect_gg
+
+# Create binary test data for calibration plots
+set.seed(1234)
+n_obs <- 100
+n_draws <- 50
+p_true <- runif(n_obs)
+calib_y <- rbinom(n_obs, 1, p_true)
+calib_prep <- matrix(pmin(1,pmax(0, p_true + rnorm(n_obs * n_draws, 0, .1))), nrow = n_draws, ncol = n_obs)
+calib_yrep <- t(apply(calib_prep, 1, rbinom, n = n_obs, size = 1))
+calib_group <- gl(2, n_obs / 2, labels = c("A", "B"))
+calib_lw <- matrix(runif(n_obs * n_draws), ncol = n_obs, nrow = n_draws)
+
+test_that("ppc_calibration_overlay returns a ggplot object", {
+  expect_gg(ppc_calibration_overlay(calib_y, calib_prep))
+  expect_gg(ppc_calibration_overlay(calib_y, calib_prep[1:5, ], 
+                                   linewidth = 0.5, alpha = 0.3))
+})
+
+test_that("ppc_calibration_overlay_grouped returns a ggplot object", {
+  expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, calib_group))
+  expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep[1:3, ], calib_group,
+                                           linewidth = 0.5, alpha = 0.3))
+})
+
+test_that("ppc_calibration returns a ggplot object", {
+  expect_gg(ppc_calibration(calib_y, calib_prep))
+  expect_gg(ppc_calibration(calib_y, calib_yrep))
+  expect_gg(ppc_calibration(calib_y, calib_prep, prob = 0.9,
+                           linewidth = 0.8, alpha = 0.5))
+  # Test new interval_type parameter
+  expect_gg(ppc_calibration(calib_y, calib_prep, interval_type = "confidence"))
+  expect_gg(ppc_calibration(calib_y, calib_prep, interval_type = "consistency"))
+})
+
+test_that("ppc_calibration_grouped returns a ggplot object", {
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, calib_group))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_yrep, calib_group))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep[1:5, ], calib_group,
+                                   prob = 0.9, linewidth = 0.8, alpha = 0.5))
+  # Test new interval_type parameter
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, calib_group, interval_type = "confidence"))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, calib_group, interval_type = "consistency"))
+})
+
+test_that("ppc_loo_calibration returns a ggplot object", {
+  # Note: This function now returns interval plots instead of overlay plots
+  # Testing basic functionality for now
+  expect_gg(ppc_loo_calibration(calib_y, calib_yrep, calib_prep))
+  expect_gg(ppc_loo_calibration(calib_y, calib_yrep[1:3, ], calib_prep[1:3, ],
+                               prob = 0.9, linewidth = 0.8, alpha = 0.5))
+})
+
+test_that("ppc_loo_calibration_grouped returns a ggplot object", {
+  # Note: This function now returns interval plots instead of overlay plots
+  # Testing basic functionality for now
+  expect_gg(ppc_loo_calibration_grouped(calib_y, calib_prep, calib_group, calib_lw))
+  expect_gg(ppc_loo_calibration_grouped(calib_y, calib_prep[1:3, ], calib_group, calib_lw[1:3, ],
+                                       prob = 0.9, linewidth = 0.8, alpha = 0.5))
+})
+
+test_that("calibration functions handle edge cases", {
+  # Single observation
+  expect_gg(ppc_calibration_overlay(1, matrix(0.5, nrow = 1, ncol = 1)))
+  expect_gg(ppc_calibration(1, matrix(0.5, nrow = 1, ncol = 1)))
+  
+  # Single draw
+  expect_gg(ppc_calibration_overlay(calib_y, calib_prep[1, , drop = FALSE]))
+  expect_gg(ppc_calibration(calib_y, calib_prep[1, , drop = FALSE]))
+  
+  # All zeros or ones
+  expect_gg(ppc_calibration_overlay(rep(0, 10), matrix(0.1, nrow = 5, ncol = 10)))
+  expect_gg(ppc_calibration_overlay(rep(1, 10), matrix(0.9, nrow = 5, ncol = 10)))
+})
+
+test_that("calibration functions validate inputs correctly", {
+  # Invalid probabilities (outside [0,1])
+  expect_error(ppc_calibration_overlay(calib_y, matrix(1.5, nrow = 5, ncol = 50)),
+               "Values of ´prep´ should be predictive probabilities between 0 and 1")
+  expect_error(ppc_calibration_overlay(calib_y, matrix(-0.1, nrow = 5, ncol = 50)),
+               "Values of ´prep´ should be predictive probabilities between 0 and 1")
+  
+  # Mismatched dimensions
+  expect_error(ppc_calibration_overlay(calib_y, calib_prep[, 1:25]),
+               "ncol(yrep) must be equal to length(y).", fixed=TRUE)
+  
+  # Invalid group
+  expect_error(ppc_calibration_overlay_grouped(calib_y, calib_prep, calib_group[1:25]),
+               "length(group) must be equal to the number of observations.",
+              fixed=TRUE)
+  
+  # Invalid interval_type
+  expect_error(ppc_calibration(calib_y, calib_prep, interval_type = "invalid"),
+               "should be one of")
+})
+
+test_that("calibration functions work with different group types", {
+  # Numeric groups
+  expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, as.numeric(calib_group)))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, as.numeric(calib_group)))
+  
+  # Integer groups
+  expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, as.integer(calib_group)))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, as.integer(calib_group)))
+  
+  # Character groups
+  expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, as.character(calib_group)))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, as.character(calib_group)))
+})
+
+# Visual tests -----------------------------------------------------------------
+
+test_that("ppc_calibration_overlay renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_calibration_overlay(calib_y, calib_prep)
+  vdiffr::expect_doppelganger("ppc_calibration_overlay (default)", p_base)
+
+  p_custom <- ppc_calibration_overlay(
+    calib_y,
+    calib_prep,
+    prob = .99,
+    linewidth = 0.5,
+    alpha = 0.3
+  )
+  vdiffr::expect_doppelganger("ppc_calibration_overlay (custom)", p_custom)
+})
+
+test_that("ppc_calibration_overlay_grouped renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_calibration_overlay_grouped(calib_y, calib_prep, calib_group)
+  vdiffr::expect_doppelganger("ppc_calibration_overlay_grouped (default)", p_base)
+
+  p_custom <- ppc_calibration_overlay_grouped(
+    calib_y,
+    calib_prep,
+    calib_group,
+    prob = .99,
+    linewidth = 0.5,
+    alpha = 0.3
+  )
+  vdiffr::expect_doppelganger("ppc_calibration_overlay_grouped (custom)", p_custom)
+})
+
+test_that("ppc_calibration renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_calibration(calib_y, calib_prep)
+  vdiffr::expect_doppelganger("ppc_calibration (default)", p_base)
+
+  p_custom <- ppc_calibration(
+    calib_y,
+    calib_prep,
+    prob = 0.9,
+    linewidth = 0.8,
+    alpha = 0.5
+  )
+  vdiffr::expect_doppelganger("ppc_calibration (custom)", p_custom)
+  
+  # Test interval_type variants
+  p_confidence <- ppc_calibration(
+    calib_y,
+    calib_prep,
+    interval_type = "confidence"
+  )
+  vdiffr::expect_doppelganger("ppc_calibration (confidence)", p_confidence)
+  
+  p_consistency <- ppc_calibration(
+    calib_y,
+    calib_prep,
+    interval_type = "consistency"
+  )
+  vdiffr::expect_doppelganger("ppc_calibration (consistency)", p_consistency)
+})
+
+test_that("ppc_calibration_grouped renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_calibration_grouped(calib_y, calib_prep, calib_group)
+  vdiffr::expect_doppelganger("ppc_calibration_grouped (default)", p_base)
+
+  p_custom <- ppc_calibration_grouped(
+    calib_y,
+    calib_prep,
+    calib_group,
+    prob = 0.9,
+    linewidth = 0.8,
+    alpha = 0.5
+  )
+  vdiffr::expect_doppelganger("ppc_calibration_grouped (custom)", p_custom)
+  
+  # Test interval_type variants
+  p_confidence <- ppc_calibration_grouped(
+    calib_y,
+    calib_prep,
+    calib_group,
+    interval_type = "confidence"
+  )
+  vdiffr::expect_doppelganger("ppc_calibration_grouped (confidence)", p_confidence)
+  
+  p_consistency <- ppc_calibration_grouped(
+    calib_y,
+    calib_prep,
+    calib_group,
+    interval_type = "consistency"
+  )
+  vdiffr::expect_doppelganger("ppc_calibration_grouped (consistency)", p_consistency)
+})
+
+test_that("ppc_loo_calibration renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_loo_calibration(calib_y, calib_yrep, calib_lw)
+  vdiffr::expect_doppelganger("ppc_loo_calibration (default)", p_base)
+
+  p_custom <- ppc_loo_calibration(
+    calib_y,
+    calib_yrep,
+    calib_lw,
+    linewidth = 0.5,
+    alpha = 0.3
+  )
+  vdiffr::expect_doppelganger("ppc_loo_calibration (custom)", p_custom)
+})
+
+test_that("ppc_loo_calibration_grouped renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_loo_calibration_grouped(calib_y, calib_yrep, calib_group, calib_lw)
+  vdiffr::expect_doppelganger("ppc_loo_calibration_grouped (default)", p_base)
+
+  p_custom <- ppc_loo_calibration_grouped(
+    calib_y,
+    calib_yrep,
+    calib_group,
+    calib_lw,
+    linewidth = 0.5,
+    alpha = 0.3
+  )
+  vdiffr::expect_doppelganger("ppc_loo_calibration_grouped (custom)", p_custom)
+})