diff --git a/NAMESPACE b/NAMESPACE
index 683b32d..48d6f9a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -11,6 +11,7 @@ S3method(print,mipdeval_results_stats_summ)
export(accuracy)
export(add_grouping_column)
export(calculate_bayesian_impact)
+export(calculate_fit_weights)
export(calculate_shrinkage)
export(calculate_stats)
export(compare_psn_execute_results)
diff --git a/R/calculate_fit_weights.R b/R/calculate_fit_weights.R
new file mode 100644
index 0000000..195491f
--- /dev/null
+++ b/R/calculate_fit_weights.R
@@ -0,0 +1,114 @@
+#' Calculate time-based sample weights for MAP Bayesian fitting
+#'
+#' Downweights older observations relative to more recent ones during the
+#' iterative MAP Bayesian fitting step. Can be passed as the `weights`
+#' argument to [run_eval()].
+#'
+#' Available schemes:
+#' - `"weight_all"`: all samples weighted equally (weight = 1).
+#' - `"weight_last_only"`: only the most recent sample is used (weight = 1),
+#' all others are excluded (weight = 0).
+#' - `"weight_last_two_only"`: only the two most recent samples are used.
+#' - `"weight_gradient_linear"`: weights increase linearly from a minimum
+#' (`w1`) for samples older than `t1` days to a maximum (`w2`) for samples
+#' more recent than `t2` days. Accepts optional list element `gradient` with
+#' named elements `t1`, `w1`, `t2`, `w2`. Default:
+#' `list(t1 = 7, w1 = 0, t2 = 2, w2 = 1)`.
+#' - `"weight_gradient_exponential"`: weights decay exponentially with the age
+#' of the sample. Accepts optional list elements `t12_decay` (half-life of
+#' decay in hours, default 48) and `t_start` (delay in hours before decay
+#' starts, default 0).
+#'
+#' For schemes with additional parameters, pass `weights` as a named list with
+#' a `scheme` element plus any scheme-specific elements, e.g.:
+#' ```r
+#' list(scheme = "weight_gradient_exponential", t12_decay = 72)
+#' list(scheme = "weight_gradient_linear", gradient = list(t1 = 5, w1 = 0.1, t2 = 1, w2 = 1))
+#' ```
+#'
+#' @param weights weighting scheme: a string with the scheme name, or a named
+#' list with a `scheme` element plus optional scheme-specific parameters.
+#' @param t numeric vector of observation times (in hours)
+#'
+#' @returns numeric vector of weights the same length as `t`, or `NULL` if
+#' `weights` is `NULL`.
+#' @export
+calculate_fit_weights <- function(weights = NULL, t = NULL) {
+ if (is.null(weights) || is.null(t)) return(NULL)
+
+ scheme <- if (is.list(weights)) weights$scheme else weights
+
+ valid_schemes <- c(
+ "weight_gradient_linear",
+ "weight_gradient_exponential",
+ "weight_last_only",
+ "weight_last_two_only",
+ "weight_all"
+ )
+
+ if (!scheme %in% valid_schemes) {
+ warning("Weighting scheme not recognized, ignoring weights.")
+ return(NULL)
+ }
+
+ weight_vec <- NULL
+
+ if (scheme == "weight_gradient_linear") {
+ gradient <- list(t1 = 7, w1 = 0, t2 = 2, w2 = 1)
+ if (is.list(weights) && !is.null(weights$gradient)) {
+ gradient[names(weights$gradient)] <- weights$gradient
+ }
+ if (gradient$t2 > gradient$t1) {
+ warning(
+ "weight_gradient_linear: t2 (", gradient$t2, ") > t1 (", gradient$t1,
+ "). t1 should be the older threshold and t2 the more recent one."
+ )
+ }
+ t_start <- max(c(0, max(t) - gradient$t1 * 24))
+ t_end <- max(c(0, max(t) - gradient$t2 * 24))
+ if (t_end <= t_start) {
+ weight_vec <- ifelse(t >= t_end, gradient$w2, gradient$w1)
+ } else {
+ weight_vec <- ifelse(
+ t <= t_start, gradient$w1,
+ ifelse(
+ t >= t_end, gradient$w2,
+ gradient$w1 + (gradient$w2 - gradient$w1) * (t - t_start) / (t_end - t_start)
+ )
+ )
+ }
+ }
+
+ if (scheme == "weight_gradient_exponential") {
+ t12_decay <- if (is.list(weights) && !is.null(weights$t12_decay)) weights$t12_decay else 48
+ k_decay <- log(2) / t12_decay
+ t_diff <- max(t) - t
+ if (is.list(weights) && !is.null(weights$t_start)) {
+ t_diff <- t_diff - weights$t_start
+ t_diff <- ifelse(t_diff < 0, 0, t_diff)
+ }
+ weight_vec <- exp(-k_decay * t_diff)
+ }
+
+ if (scheme == "weight_last_only") {
+ weight_vec <- rep(0, length(t))
+ weight_vec[which.max(t)] <- 1
+ }
+
+ if (scheme == "weight_last_two_only") {
+ weight_vec <- rep(0, length(t))
+ ranked <- order(t, decreasing = TRUE)
+ weight_vec[ranked[1]] <- 1
+ if (length(t) > 1) weight_vec[ranked[2]] <- 1
+ }
+
+ if (scheme == "weight_all") {
+ weight_vec <- rep(1, length(t))
+ }
+
+ if (!is.null(weight_vec)) {
+ weight_vec[t < 0] <- 0
+ }
+
+ weight_vec
+}
diff --git a/R/run_eval.R b/R/run_eval.R
index c04c3cf..0f779f4 100644
--- a/R/run_eval.R
+++ b/R/run_eval.R
@@ -14,6 +14,14 @@
#' this can be used to group peaks and troughs together, or to group
#' observations on the same day together. Grouping will be done prior to
#' running the analysis, so cannot be changed afterwards.
+#' @param weights optional sample downweighting scheme based on how long ago
+#' observations were taken. Either a string naming the scheme (e.g.
+#' `"weight_gradient_exponential"`), or a named list with a `scheme` element
+#' plus any scheme-specific parameters (e.g.
+#' `list(scheme = "weight_gradient_exponential", t12_decay = 72)`). See
+#' [calculate_fit_weights()] for all available schemes and their parameters.
+#' Default is `NULL` (no downweighting; all included samples are weighted
+#' equally).
#' @param censor_covariates with the `proseval` tool in PsN, there is “data
#' leakage” (of future covariates data): since the NONMEM dataset in each step
#' contains the covariates for the future, this is technically data leakage,
@@ -126,6 +134,7 @@ run_eval <- function(
.x = data_parsed,
.f = run_eval_core,
mod_obj = mod_obj,
+ weights = weights,
censor_covariates = censor_covariates,
weight_prior = weight_prior,
incremental = incremental,
diff --git a/R/run_eval_core.R b/R/run_eval_core.R
index 7f105a1..649d675 100644
--- a/R/run_eval_core.R
+++ b/R/run_eval_core.R
@@ -28,13 +28,14 @@ run_eval_core <- function(
for(i in seq_along(iterations)) {
## Select which samples should be used in fit, for regular iterative
- ## forecasting and incremental.
- ## TODO: handle weighting down of earlier samples
- weights <- handle_sample_weighting(
+ ## forecasting and incremental. Applies time-based downweighting if a
+ ## weighting scheme is provided via the `weights` argument.
+ sample_weights <- handle_sample_weighting(
obs_data,
iterations,
incremental,
- i
+ i,
+ weights = weights
)
## Should covariate data be leaked? PsN::proseval does this,
@@ -67,7 +68,7 @@ run_eval_core <- function(
covariates = cov_data,
regimen = data$regimen,
weight_prior = weight_prior,
- weights = weights,
+ weights = sample_weights,
iov_bins = mod_obj$bins,
verbose = FALSE
),
@@ -88,7 +89,7 @@ run_eval_core <- function(
wres = fit$wres,
cwres = fit$cwres,
ofv = fit$fit$value,
- ss_w = ss(fit$dv, fit$ipred, weights),
+ ss_w = ss(fit$dv, fit$ipred, sample_weights),
`_iteration` = iterations[i],
`_grouper` = obs_data$`_grouper`
)
@@ -122,7 +123,7 @@ run_eval_core <- function(
wres = fit$wres,
cwres = fit$cwres,
ofv = fit$fit$value,
- ss_w = ss(fit$dv, fit$ipred, weights),
+ ss_w = ss(fit$dv, fit$ipred, sample_weights),
`_iteration` = iterations[i],
`_grouper` = obs_data$`_grouper`
)
@@ -239,9 +240,9 @@ handle_covariate_censoring <- function(
#' Handle weighting of samples
#'
-#' This function is used to select the samples used in the fit (1 or 0),
-#' but also to select their weight, if a sample weighting strategy is
-#' selected.
+#' Binary selection of which samples are used in the fit (0 = excluded,
+#' 1 = included), combined with optional continuous downweighting of older
+#' samples via a time-based scheme (see [calculate_fit_weights()]).
#'
#' @inheritParams run_eval_core
#' @param obs_data tibble or data.frame with observed data for individual
@@ -254,13 +255,24 @@ handle_sample_weighting <- function(
obs_data,
iterations,
incremental,
- i
+ i,
+ weights = NULL
) {
- weights <- rep(0, nrow(obs_data))
+ binary_weights <- rep(0, nrow(obs_data))
if(incremental) { # just fit current sample or group
- weights[obs_data[["_grouper"]] %in% iterations[i]] <- 1
+ binary_weights[obs_data[["_grouper"]] %in% iterations[i]] <- 1
} else { # fit all samples up until current sample
- weights[obs_data[["_grouper"]] %in% iterations[1:i]] <- 1
+ binary_weights[obs_data[["_grouper"]] %in% iterations[1:i]] <- 1
}
- weights
+ if (!is.null(weights)) {
+ active_idx <- which(binary_weights == 1)
+ scheme_weights <- calculate_fit_weights(
+ weights = weights,
+ t = obs_data$t[active_idx]
+ )
+ if (!is.null(scheme_weights)) {
+ binary_weights[active_idx] <- scheme_weights
+ }
+ }
+ binary_weights
}
diff --git a/man/calculate_fit_weights.Rd b/man/calculate_fit_weights.Rd
new file mode 100644
index 0000000..0eb5d1e
--- /dev/null
+++ b/man/calculate_fit_weights.Rd
@@ -0,0 +1,48 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/calculate_fit_weights.R
+\name{calculate_fit_weights}
+\alias{calculate_fit_weights}
+\title{Calculate time-based sample weights for MAP Bayesian fitting}
+\usage{
+calculate_fit_weights(weights = NULL, t = NULL)
+}
+\arguments{
+\item{weights}{weighting scheme: a string with the scheme name, or a named
+list with a \code{scheme} element plus optional scheme-specific parameters.}
+
+\item{t}{numeric vector of observation times (in hours)}
+}
+\value{
+numeric vector of weights the same length as \code{t}, or \code{NULL} if
+\code{weights} is \code{NULL}.
+}
+\description{
+Downweights older observations relative to more recent ones during the
+iterative MAP Bayesian fitting step. Can be passed as the \code{weights}
+argument to \code{\link[=run_eval]{run_eval()}}.
+}
+\details{
+Available schemes:
+\itemize{
+\item \code{"weight_all"}: all samples weighted equally (weight = 1).
+\item \code{"weight_last_only"}: only the most recent sample is used (weight = 1),
+all others are excluded (weight = 0).
+\item \code{"weight_last_two_only"}: only the two most recent samples are used.
+\item \code{"weight_gradient_linear"}: weights increase linearly from a minimum
+(\code{w1}) for samples older than \code{t1} days to a maximum (\code{w2}) for samples
+more recent than \code{t2} days. Accepts optional list element \code{gradient} with
+named elements \code{t1}, \code{w1}, \code{t2}, \code{w2}. Default:
+\code{list(t1 = 7, w1 = 0, t2 = 2, w2 = 1)}.
+\item \code{"weight_gradient_exponential"}: weights decay exponentially with the age
+of the sample. Accepts optional list elements \code{t12_decay} (half-life of
+decay in hours, default 48) and \code{t_start} (delay in hours before decay
+starts, default 0).
+}
+
+For schemes with additional parameters, pass \code{weights} as a named list with
+a \code{scheme} element plus any scheme-specific elements, e.g.:
+
+\if{html}{\out{
}}
+}
diff --git a/man/handle_sample_weighting.Rd b/man/handle_sample_weighting.Rd
index 40e009d..c73341b 100644
--- a/man/handle_sample_weighting.Rd
+++ b/man/handle_sample_weighting.Rd
@@ -4,7 +4,7 @@
\alias{handle_sample_weighting}
\title{Handle weighting of samples}
\usage{
-handle_sample_weighting(obs_data, iterations, incremental, i)
+handle_sample_weighting(obs_data, iterations, incremental, i, weights = NULL)
}
\arguments{
\item{obs_data}{tibble or data.frame with observed data for individual}
@@ -20,12 +20,21 @@ approach has been called "model predictive control (MPC)"
"regular" MAP in some scenarios. Default is \code{FALSE}.}
\item{i}{index}
+
+\item{weights}{optional sample downweighting scheme based on how long ago
+observations were taken. Either a string naming the scheme (e.g.
+\code{"weight_gradient_exponential"}), or a named list with a \code{scheme} element
+plus any scheme-specific parameters (e.g.
+\code{list(scheme = "weight_gradient_exponential", t12_decay = 72)}). See
+\code{\link[=calculate_fit_weights]{calculate_fit_weights()}} for all available schemes and their parameters.
+Default is \code{NULL} (no downweighting; all included samples are weighted
+equally).}
}
\value{
vector of weights (numeric)
}
\description{
-This function is used to select the samples used in the fit (1 or 0),
-but also to select their weight, if a sample weighting strategy is
-selected.
+Binary selection of which samples are used in the fit (0 = excluded,
+1 = included), combined with optional continuous downweighting of older
+samples via a time-based scheme (see \code{\link[=calculate_fit_weights]{calculate_fit_weights()}}).
}
diff --git a/man/run_eval.Rd b/man/run_eval.Rd
index 9c0c94c..6c46505 100644
--- a/man/run_eval.Rd
+++ b/man/run_eval.Rd
@@ -59,10 +59,14 @@ this can be used to group peaks and troughs together, or to group
observations on the same day together. Grouping will be done prior to
running the analysis, so cannot be changed afterwards.}
-\item{weights}{vector of weights for error. Length of vector should be same
-as length of observation vector. If NULL (default), all weights are equal.
-Used in both MAP and NP methods. Note that `weights` argument will also
-affect residuals (residuals will be scaled too).}
+\item{weights}{optional sample downweighting scheme based on how long ago
+observations were taken. Either a string naming the scheme (e.g.
+\code{"weight_gradient_exponential"}), or a named list with a \code{scheme} element
+plus any scheme-specific parameters (e.g.
+\code{list(scheme = "weight_gradient_exponential", t12_decay = 72)}). See
+\code{\link[=calculate_fit_weights]{calculate_fit_weights()}} for all available schemes and their parameters.
+Default is \code{NULL} (no downweighting; all included samples are weighted
+equally).}
\item{weight_prior}{weighting of priors in relationship to observed data,
default = 1}
diff --git a/man/run_eval_core.Rd b/man/run_eval_core.Rd
index a9c6020..4382724 100644
--- a/man/run_eval_core.Rd
+++ b/man/run_eval_core.Rd
@@ -22,10 +22,14 @@ run_eval_core(
\item{data}{NONMEM-style data.frame, or path to CSV file with NONMEM data}
-\item{weights}{vector of weights for error. Length of vector should be same
-as length of observation vector. If NULL (default), all weights are equal.
-Used in both MAP and NP methods. Note that `weights` argument will also
-affect residuals (residuals will be scaled too).}
+\item{weights}{optional sample downweighting scheme based on how long ago
+observations were taken. Either a string naming the scheme (e.g.
+\code{"weight_gradient_exponential"}), or a named list with a \code{scheme} element
+plus any scheme-specific parameters (e.g.
+\code{list(scheme = "weight_gradient_exponential", t12_decay = 72)}). See
+\code{\link[=calculate_fit_weights]{calculate_fit_weights()}} for all available schemes and their parameters.
+Default is \code{NULL} (no downweighting; all included samples are weighted
+equally).}
\item{weight_prior}{weighting of priors in relationship to observed data,
default = 1}
diff --git a/tests/testthat/_snaps/run_vpc/nm-busulfan-vpc.new.svg b/tests/testthat/_snaps/run_vpc/nm-busulfan-vpc.new.svg
new file mode 100644
index 0000000..a93e121
--- /dev/null
+++ b/tests/testthat/_snaps/run_vpc/nm-busulfan-vpc.new.svg
@@ -0,0 +1,306 @@
+
+
diff --git a/tests/testthat/_snaps/run_vpc/nm-vanco-vpc.new.svg b/tests/testthat/_snaps/run_vpc/nm-vanco-vpc.new.svg
new file mode 100644
index 0000000..b308e8d
--- /dev/null
+++ b/tests/testthat/_snaps/run_vpc/nm-vanco-vpc.new.svg
@@ -0,0 +1,134 @@
+
+
diff --git a/tests/testthat/test-calculate_fit_weights.R b/tests/testthat/test-calculate_fit_weights.R
new file mode 100644
index 0000000..d37a244
--- /dev/null
+++ b/tests/testthat/test-calculate_fit_weights.R
@@ -0,0 +1,119 @@
+test_that("weight_all returns all ones", {
+ result <- calculate_fit_weights("weight_all", t = c(0, 12, 24, 48))
+ expect_equal(result, c(1, 1, 1, 1))
+})
+
+test_that("weight_last_only returns 1 for most recent observation", {
+ result <- calculate_fit_weights("weight_last_only", t = c(0, 12, 24, 48))
+ expect_equal(result, c(0, 0, 0, 1))
+})
+
+test_that("weight_last_only handles unsorted times", {
+ result <- calculate_fit_weights("weight_last_only", t = c(24, 0, 48, 12))
+ expect_equal(result, c(0, 0, 1, 0))
+})
+
+test_that("weight_last_two_only returns 1 for two most recent observations", {
+ result <- calculate_fit_weights("weight_last_two_only", t = c(0, 12, 24, 48))
+ expect_equal(result, c(0, 0, 1, 1))
+})
+
+test_that("weight_last_two_only handles unsorted times", {
+ result <- calculate_fit_weights("weight_last_two_only", t = c(24, 0, 48, 12))
+ expect_equal(result, c(1, 0, 1, 0))
+})
+
+test_that("weight_last_two_only works with single observation", {
+ result <- calculate_fit_weights("weight_last_two_only", t = c(10))
+ expect_equal(result, c(1))
+})
+
+test_that("weight_gradient_exponential produces correct decay", {
+ t <- c(0, 24, 48)
+ result <- calculate_fit_weights("weight_gradient_exponential", t = t)
+ # default t12_decay = 48, most recent sample (t=48) gets weight 1
+ expect_equal(result[3], 1)
+ # t=24 is 24h before max, weight = exp(-log(2)/48 * 24) = exp(-log(2)/2)
+ expect_equal(result[2], exp(-log(2) / 2), tolerance = 1e-10)
+ # t=0 is 48h before max, weight = exp(-log(2)/48 * 48) = 0.5
+ expect_equal(result[1], 0.5, tolerance = 1e-10)
+})
+
+test_that("weight_gradient_exponential accepts custom t12_decay", {
+ t <- c(0, 24)
+ result <- calculate_fit_weights(
+ list(scheme = "weight_gradient_exponential", t12_decay = 24),
+ t = t
+ )
+ expect_equal(result[2], 1)
+ expect_equal(result[1], 0.5, tolerance = 1e-10)
+})
+
+test_that("weight_gradient_exponential respects t_start delay", {
+ t <- c(0, 20, 24)
+ result <- calculate_fit_weights(
+ list(scheme = "weight_gradient_exponential", t12_decay = 48, t_start = 4),
+ t = t
+ )
+ # t=24 is 0h ago, no decay -> weight 1
+ expect_equal(result[3], 1)
+ # t=20 is 4h ago, within t_start window, t_diff clamped to 0 -> weight 1
+ expect_equal(result[2], 1)
+ # t=0 is 24h ago, minus t_start=4 -> effective t_diff=20
+ expect_equal(result[1], exp(-log(2) / 48 * 20), tolerance = 1e-10)
+})
+
+test_that("weight_gradient_linear uses default gradient", {
+ t <- c(0, 100, 200)
+ result <- calculate_fit_weights("weight_gradient_linear", t = t)
+ expect_length(result, 3)
+ # most recent sample should get highest weight
+ expect_true(result[3] >= result[2])
+ expect_true(result[2] >= result[1])
+})
+
+test_that("weight_gradient_linear accepts custom gradient via list", {
+ result <- calculate_fit_weights(
+ list(scheme = "weight_gradient_linear", gradient = list(t1 = 3, w1 = 0.2, t2 = 1, w2 = 0.9)),
+ t = c(0, 24, 48, 72)
+ )
+ expect_length(result, 4)
+ expect_equal(result[4], 0.9)
+})
+
+test_that("weight_gradient_linear warns when t2 > t1", {
+ expect_warning(
+ calculate_fit_weights(
+ list(scheme = "weight_gradient_linear", gradient = list(t1 = 1, t2 = 5)),
+ t = c(0, 24, 48)
+ ),
+ "t2.*>.*t1"
+ )
+})
+
+test_that("invalid scheme returns NULL with warning", {
+ expect_warning(
+ result <- calculate_fit_weights("nonexistent_scheme", t = c(0, 12)),
+ "not recognized"
+ )
+ expect_null(result)
+})
+
+test_that("NULL weights returns NULL", {
+ expect_null(calculate_fit_weights(NULL, t = c(0, 12)))
+})
+
+test_that("NULL t returns NULL", {
+ expect_null(calculate_fit_weights("weight_all", t = NULL))
+})
+
+test_that("negative times get weight 0", {
+ result <- calculate_fit_weights("weight_all", t = c(-10, 0, 12, 24))
+ expect_equal(result[1], 0)
+ expect_equal(result[2:4], c(1, 1, 1))
+})
+
+test_that("scheme passed as list with scheme element works", {
+ result <- calculate_fit_weights(list(scheme = "weight_all"), t = c(0, 12))
+ expect_equal(result, c(1, 1))
+})
diff --git a/tests/testthat/test-handle_sample_weighting.R b/tests/testthat/test-handle_sample_weighting.R
index 27921f5..2d53531 100644
--- a/tests/testthat/test-handle_sample_weighting.R
+++ b/tests/testthat/test-handle_sample_weighting.R
@@ -95,6 +95,56 @@ test_that("handle_sample_weighting handles edge case with single observation", {
expect_equal(weights_inc, c(1))
})
+test_that("handle_sample_weighting applies weight scheme to active samples", {
+ obs_data <- data.frame(
+ t = c(0, 12, 24, 48),
+ `_grouper` = c(1, 2, 3, 4),
+ check.names = FALSE
+ )
+ iterations <- c(1, 2, 3, 4)
+
+ # At iteration 3, samples 1-3 are active. weight_last_only should
+
+ # give weight 1 only to the most recent active sample (t=24).
+ result <- handle_sample_weighting(
+ obs_data, iterations, incremental = FALSE, i = 3,
+ weights = "weight_last_only"
+ )
+ expect_equal(result, c(0, 0, 1, 0))
+})
+
+test_that("handle_sample_weighting applies exponential weights to active samples", {
+ obs_data <- data.frame(
+ t = c(0, 24, 48),
+ `_grouper` = c(1, 2, 3),
+ check.names = FALSE
+ )
+ iterations <- c(1, 2, 3)
+
+ result <- handle_sample_weighting(
+ obs_data, iterations, incremental = FALSE, i = 3,
+ weights = list(scheme = "weight_gradient_exponential", t12_decay = 48)
+ )
+ expect_equal(result[3], 1)
+ expect_equal(result[1], 0.5, tolerance = 1e-10)
+ expect_true(result[2] > result[1] && result[2] < result[3])
+})
+
+test_that("handle_sample_weighting without weights gives binary result", {
+ obs_data <- data.frame(
+ t = c(0, 12, 24),
+ `_grouper` = c(1, 2, 3),
+ check.names = FALSE
+ )
+ iterations <- c(1, 2, 3)
+
+ result <- handle_sample_weighting(
+ obs_data, iterations, incremental = FALSE, i = 2,
+ weights = NULL
+ )
+ expect_equal(result, c(1, 1, 0))
+})
+
test_that("handle_sample_weighting maintains correct vector length", {
# Test with various data sizes
for(n_obs in c(1, 5, 10, 100)) {