diff --git a/SEQTaRget/DESCRIPTION b/SEQTaRget/DESCRIPTION index bc0f419..b0f8916 100644 --- a/SEQTaRget/DESCRIPTION +++ b/SEQTaRget/DESCRIPTION @@ -1,7 +1,7 @@ Package: SEQTaRget Type: Package Title: Sequential Trial Emulation -Version: 1.4.1 +Version: 1.4.1.9000 Authors@R: c(person(given = "Ryan", family = "O'Dea", role = c("aut", "cre"), diff --git a/SEQTaRget/NEWS.md b/SEQTaRget/NEWS.md index 10a33e8..3558695 100644 --- a/SEQTaRget/NEWS.md +++ b/SEQTaRget/NEWS.md @@ -1,7 +1,14 @@ +# SEQTaRget (development version) + +* Remove mention of units from time in docs. +* Improve memory usage in the bootstrapping. +* Fix off-by-one labeling in survival output so that `followup = k` correctly represents survival after `k` intervals, adding a row at `followup = survival.max + 1` for the final interval's estimate. +* Fix expansion bug where subjects experiencing the outcome early were incorrectly carried forward with `outcome=0` rows from subsequent periods by truncating each trial at the first event row + # SEQTaRget v1.4.1 - Strip row-level vectors from fastglm objects to reduce weight.statistics memory usage and use a new internal function to print the coefficient table. -- Strip row-level vectors from outcome models before storing in @outcome.model +- Strip row-level vectors from outcome models before storing in `@outcome.model` - Fix clean_fastglm to strip row-level vectors from nested multinomial weight models - No longer store survival.curve ggplot object; regenerate on demand via `km_curve()` - Removed several `local()` wrappers and made several code optimizations. diff --git a/SEQTaRget/R/SEQexpand.R b/SEQTaRget/R/SEQexpand.R index 80aa633..83ef86e 100644 --- a/SEQTaRget/R/SEQexpand.R +++ b/SEQTaRget/R/SEQexpand.R @@ -91,6 +91,12 @@ SEQexpand <- function(params) { out <- out[get(paste0(params@eligible, params@indicator.baseline)) == 1, ][, paste0(params@eligible, params@indicator.baseline) := NULL] + # Truncate each trial at (and including) the first outcome event row, so that + # subjects who experience the outcome early are not carried forward with outcome=0 + # from subsequent periods in the original data. + out <- out[out[, .I[seq_len(match(1L, get(params@outcome), nomatch = .N))], + by = c(params@id, "trial")]$V1] + if (params@method == "dose-response") { out <- out[, dose := cumsum(get(params@treatment)), by = c(eval(params@id), "trial")][, `:=`( dose_sq = dose^2, diff --git a/SEQTaRget/R/class_definitions.R b/SEQTaRget/R/class_definitions.R index 09693cb..48ceff2 100644 --- a/SEQTaRget/R/class_definitions.R +++ b/SEQTaRget/R/class_definitions.R @@ -205,7 +205,7 @@ setClass("SEQweights", #' @slot survival.data data.table of survival data #' @slot risk.difference risk difference calculated from survival data #' @slot risk.ratio risk ratio calculated from survival data -#' @slot time time in minutes used for the SEQuential process +#' @slot time time used for the SEQuential process #' @slot weight.statistics information from the weighting process, containing weight coefficients and weight statistics #' @slot info list of outcome and switch information (if applicable) #' @slot ce.model list of competing event models if \code{compevent} is specified, NA otherwise diff --git a/SEQTaRget/R/internal_analysis.R b/SEQTaRget/R/internal_analysis.R index 01564ad..2f2ffb1 100644 --- a/SEQTaRget/R/internal_analysis.R +++ b/SEQTaRget/R/internal_analysis.R @@ -208,6 +208,7 @@ internal.analysis <- function(params) { bs <- bootstrap_sample(params@DT, params@data, params, UIDs, lnID) out <- handler(bs$RMDT, bs$RMdata, params) out$WDT <- NULL + out$model <- lapply(out$model, function(sg) { sg$model <- clean_fastglm(sg$model); sg }) return(out) }, future.seed = if (length(params@seed) > 1) params@seed[1] else params@seed) } else { @@ -216,6 +217,7 @@ internal.analysis <- function(params) { bs <- bootstrap_sample(params@DT, params@data, params, UIDs, lnID) out <- handler(bs$RMDT, bs$RMdata, params) out$WDT <- NULL + out$model <- lapply(out$model, function(sg) { sg$model <- clean_fastglm(sg$model); sg }) return(out) }) } diff --git a/SEQTaRget/R/internal_survival.R b/SEQTaRget/R/internal_survival.R index 1897f1b..f17ba1e 100644 --- a/SEQTaRget/R/internal_survival.R +++ b/SEQTaRget/R/internal_survival.R @@ -79,8 +79,12 @@ internal.survival <- function(params, outcome) { keep <- list("followup", inc, surv) kept <- intersect(keep, names(result_dt)) - out_list[[i]] <- rbind(fup0, result_dt[followup > 0 - ][, c(unlist(kept)), with = FALSE] + # result_dt[k] = survival after completing interval k = S at time k+1. + # Shift labels by +1 so followup=k means "after k intervals elapsed", + # giving rows 0..survival.max+1 with the baseline row (followup=0, surv=1) + # correctly placed and the final interval's estimate at followup=survival.max+1. + result_dt[, followup := followup + 1L] + out_list[[i]] <- rbind(fup0, result_dt[, c(unlist(kept)), with = FALSE] )[, eval(risk) := 1 - get(surv)] rm(result_dt) } diff --git a/SEQTaRget/man/SEQoutput-class.Rd b/SEQTaRget/man/SEQoutput-class.Rd index 4a8c569..b678f22 100644 --- a/SEQTaRget/man/SEQoutput-class.Rd +++ b/SEQTaRget/man/SEQoutput-class.Rd @@ -30,7 +30,7 @@ An S4 class used to hold the outputs for the SEQuential process \item{\code{risk.ratio}}{risk ratio calculated from survival data} -\item{\code{time}}{time in minutes used for the SEQuential process} +\item{\code{time}}{time used for the SEQuential process} \item{\code{weight.statistics}}{information from the weighting process, containing weight coefficients and weight statistics} diff --git a/SEQTaRget/tests/testthat/test_misc.R b/SEQTaRget/tests/testthat/test_misc.R index ed68191..058b6d4 100644 --- a/SEQTaRget/tests/testthat/test_misc.R +++ b/SEQTaRget/tests/testthat/test_misc.R @@ -12,6 +12,56 @@ test_that("Expanded dataset contains no trials beyond the last eligible row per expect_equal(max(model@DT$trial), max(last_elig_idx$last_elig)) }) +test_that("Expansion truncates trials at first outcome event - subject with early outcome not carried forward", { + # Subject 1 has outcome=1 at time=0 (the only eligible period), then continues in the + # dataset at times 1-3 with outcome=0. Without truncation they would appear in the + # expanded data for all four periods with the later outcome=0 rows overwriting the event. + # Subject 2 has no outcome and serves as a control. + dt <- data.table::data.table( + ID = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), + time = c(0L, 1L, 2L, 3L, 0L, 1L, 2L, 3L), + eligible = c(1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), + treatment = c(1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L), + outcome = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), + N = c(1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L), + sex = c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L) + ) + model <- SEQuential(dt, "ID", "time", "eligible", "treatment", "outcome", + list("N"), list("sex"), + method = "ITT", + options = SEQopts(data.return = TRUE), + verbose = FALSE) + # Subject 1's only trial (trial=0) should contain exactly one row (followup=0, outcome=1) + s1_trial0 <- model@DT[ID == 1L & trial == 0L] + expect_equal(nrow(s1_trial0), 1L) + expect_equal(s1_trial0$outcome, 1L) +}) + +test_that("Expansion truncates trials at first outcome event - subject with early outcome not carried forward - test 2", { + # Subject 1 has outcome=1 at time=0 (the only eligible period), then continues in the + # dataset at times 1-3 with outcome=1. Without truncation they would appear in the + # expanded data for all four periods with the later outcome=1 rows overwriting the event. + # Subject 2 has no outcome and serves as a control. + dt <- data.table::data.table( + ID = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), + time = c(0L, 1L, 2L, 3L, 0L, 1L, 2L, 3L), + eligible = c(1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), + treatment = c(1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L), + outcome = c(1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L), + N = c(1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L), + sex = c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L) + ) + model <- SEQuential(dt, "ID", "time", "eligible", "treatment", "outcome", + list("N"), list("sex"), + method = "ITT", + options = SEQopts(data.return = TRUE), + verbose = FALSE) + # Subject 1's only trial (trial=0) should contain exactly one row (followup=0, outcome=1) + s1_trial0 <- model@DT[ID == 1L & trial == 0L] + expect_equal(nrow(s1_trial0), 1L) + expect_equal(s1_trial0$outcome, 1L) +}) + test_that("Pre-Expansion Excused Censoring - No excusedOne given", { data <- copy(SEQdata) model <- SEQuential(data, "ID", "time", "eligible", "tx_init", "outcome", diff --git a/SEQTaRget/tests/testthat/test_survival.R b/SEQTaRget/tests/testthat/test_survival.R index 5c8f157..bb2364c 100644 --- a/SEQTaRget/tests/testthat/test_survival.R +++ b/SEQTaRget/tests/testthat/test_survival.R @@ -22,6 +22,22 @@ test_that("Bootstrapped Survival - Percentile", { expect_s3_class(km_curve(model), "ggplot") }) +test_that("Survival output followup labeling - followup=k represents survival after k intervals", { + data <- data.table::copy(SEQdata) + model <- SEQuential(data, "ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), + method = "ITT", options = SEQopts(km.curves = TRUE)) + surv <- model@survival.data[[1]] + survival_max <- max(data[["time"]]) + # Output should run from followup=0 (baseline) to followup=survival.max+1 (end of final interval) + expect_equal(min(surv$followup), 0) + expect_equal(max(surv$followup), survival_max + 1) + expect_equal(length(unique(surv$followup)), survival_max + 2) + # Baseline row should have risk=0 and surv=1 + baseline <- surv[surv$followup == 0, ] + expect_true(all(baseline$value[grepl("^risk_", baseline$variable)] == 0)) + expect_true(all(baseline$value[grepl("^surv_", baseline$variable)] == 1)) +}) + test_that("Bootstrapped Survival - Competing Event CIs present", { data <- data.table::copy(SEQdata) set.seed(42)