Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: pharmr.extra
Title: Extension of pharmr (Pharmpy) functionality
Version: 0.0.0.9028
Version: 0.0.0.9029
Authors@R: c(
person("Ron", "Keizer", email = "ron@insight-rx.com", role = c("cre", "aut")),
person("Michael", "McCarthy", email = "michael.mccarthy@insight-rx.com", role = "ctb"),
Expand Down
51 changes: 46 additions & 5 deletions R/create_sim_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ create_sim_dataset <- function(
cli::cli_abort("Could not load model into Pharmpy. Please check the supplied model file.")
}
}
build_from_scratch <- FALSE
if (!is.null(data)) {
idx <- get_required_input_variables(model, data)
if (inherits(data, "character")) {
Expand All @@ -80,9 +81,29 @@ create_sim_dataset <- function(
cli::cli_abort("Number of columns for input dataset is lower than number of columns in $INPUT. Please check dataset and $INPUT. Cannot continue creating dataset.")
}
} else {
input_data <- as.data.frame(model$dataset)
raw_dataset <- model$dataset
if (!is.null(raw_dataset)) {
input_data <- as.data.frame(raw_dataset)
} else {
## model$dataset is NULL (the $DATA file could not be found) — attempt
## to build the simulation dataset from scratch using regimen/t_obs/covariates.
build_from_scratch <- TRUE
if (is.null(regimen)) {
cli::cli_abort(
c(
"No dataset is attached to this model (the {.field $DATA} file cannot be found) and no {.arg data} argument was supplied.",
i = "Provide {.arg regimen} (and optionally {.arg t_obs}, {.arg covariates}, {.arg n_subjects}) to build a simulation dataset from scratch."
)
)
}
if (is.null(n_subjects)) {
n_subjects <- if (!is.null(covariates)) nrow(covariates) else 1L
}
if (verbose) cli::cli_alert_info("No dataset attached to model \u2014 building simulation dataset from scratch")
input_data <- data.frame(ID = seq_len(n_subjects))
}
}

if (!"ID" %in% names(input_data)) {
cli::cli_abort(
c("Column `ID` not found in the dataset.",
Expand All @@ -92,7 +113,8 @@ create_sim_dataset <- function(

input_has_column <- list()
for (key in c("CMT", "EVID", "MDV", "RATE")) {
input_has_column[[key]] <- key %in% names(input_data)
## When building from scratch, include all standard NONMEM columns in the output.
input_has_column[[key]] <- build_from_scratch || key %in% names(input_data)
}

## make sure we have regimen as a data.frame
Expand Down Expand Up @@ -156,20 +178,33 @@ create_sim_dataset <- function(
}
new_covariates <- names(covariates)
new_covariates <- new_covariates[new_covariates != "ID" & new_covariates %in% names(sim_data)]
if (verbose) cli::cli_alert_info("Updating covariates: {new_covariates}")
all_cov_cols <- setdiff(names(covariates), "ID")
if (verbose) cli::cli_alert_info("Updating covariates: {all_cov_cols}")

sim_data_cols <- names(sim_data)
## When building from scratch sim_data only has ID; union ensures newly-joined
## covariate columns are retained by the following select().
sim_data_cols <- union(sim_data_cols, all_cov_cols)
sim_data <- sim_data |>
dplyr::select(-dplyr::all_of(new_covariates)) |>
dplyr::left_join(covariates, by = "ID") |>
dplyr::select(dplyr::all_of(sim_data_cols)) |>
tidyr::fill(dplyr::all_of(new_covariates), .direction = "downup")
tidyr::fill(dplyr::all_of(all_cov_cols), .direction = "downup")
}

if (!is.null(regimen_df)) {
if (verbose) cli::cli_alert_info("Creating new regimens for subjects in simulation")
advan <- get_advan(model)
## When building from scratch, sim_data contains only placeholder rows (one per
## subject, carrying covariate values but no NONMEM columns). Mark them so they
## can be removed after fill() propagates their covariate values into dose rows.
if (build_from_scratch) {
sim_data$.placeholder <- TRUE
}
doses <- create_dosing_records(regimen_df, sim_data, n_subjects, advan)
## Setting .placeholder = FALSE on dose rows prevents fill() from propagating
## TRUE upward from placeholder rows (which sort last due to NA TIME).
if (build_from_scratch) doses$.placeholder <- FALSE
doses <- match_type(doses, sim_data, c("AMT", "RATE", "DV"))
if ("EVID" %in% names(sim_data)) {
sim_data <- sim_data |>
Expand All @@ -181,6 +216,12 @@ create_sim_dataset <- function(
dplyr::group_by(.data$ID) |>
tidyr::fill(tidyselect::everything(), .direction = "downup") |>
dplyr::mutate(dplyr::across(dplyr::everything(), ~ fill_missing(.x)))
## Remove placeholder rows now that covariates have been propagated to dose rows.
if (build_from_scratch) {
sim_data <- sim_data |>
dplyr::filter(!.data$.placeholder) |>
dplyr::select(-".placeholder")
}
if (is.null(t_obs)) {
t_max <- max(sim_data$TIME) + round(diff(utils::tail(sim_data$TIME, 2)))
t_obs <- seq(0, t_max, 4)
Expand Down
23 changes: 23 additions & 0 deletions R/run_sim.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,13 @@ run_sim <- function(
}
input_data <- model$dataset

if (is.null(input_data) && is.null(data)) {
cli::cli_abort(
c("No dataset is attached to this model and no `data` argument was provided.",
i = "Attach a dataset to the model, or supply a simulation dataset via the `data` argument (see {.fn create_sim_dataset}).")
)
}

tool <- match.arg(tool)
if(tool == "auto") {
if(inherits(model, "pharmpy.model.external.nonmem.model.Model")) {
Expand Down Expand Up @@ -103,6 +110,22 @@ run_sim <- function(
}
}

## Validate that required columns are present in the simulation dataset
req_vars <- tryCatch(
get_required_input_variables(model),
error = function(e) NULL
)
if (!is.null(req_vars)) {
required_cols <- req_vars$data_col[req_vars$required & !is.na(req_vars$data_col)]
missing_cols <- setdiff(required_cols, names(sim_data))
if (length(missing_cols) > 0) {
cli::cli_abort(
c("The simulation dataset is missing required column(s): {missing_cols}.",
i = "Use {.fn create_sim_dataset} to prepare a valid simulation dataset, or add the missing columns manually.")
)
}
}

## get unique regimens / datasets to simulate
unique_regimens <- unique(sim_data[[".regimen"]])
comb <- list()
Expand Down
142 changes: 142 additions & 0 deletions tests/testthat/test-create_sim_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,145 @@ test_that("create_sim_dataset: error when required covariates are missing", {
"Not all required covariates"
)
})

# ===========================================================================
# Build from scratch (model$dataset is NULL — no $DATA file on disk)
# ===========================================================================

## Helper: model with an absolute $DATA path that definitely does not exist,
## so that model$dataset returns NULL. Uses ADVAN1 (1-cmt IV) with CL/V only.
.make_no_data_model <- function() {
pharmr::read_model_from_string(paste0(
"$PROBLEM no-data\n",
"$INPUT ID TIME DV AMT EVID MDV\n",
"$DATA /nonexistent/pharmr_extra_test_data.csv IGNORE=@\n",
"$SUBROUTINES ADVAN1 TRANS2\n",
"$PK\nCL=THETA(1)\nV=THETA(2)\nS1=V\n",
"$ERROR\nY=F+EPS(1)\n",
"$THETA (0,10)\n$THETA (0,50)\n",
"$SIGMA 0.1\n",
"$EST METHOD=1\n"
))
}

test_that("create_sim_dataset (no-data): error when model has no dataset and regimen is NULL", {
local_pharmr.extra_options()
skip_if_nonmem_not_available()

mod <- .make_no_data_model()
skip_if(
!is.null(mod$dataset),
"Pharmpy returned a non-NULL dataset for a missing $DATA file — from-scratch path not triggered"
)

expect_error(
create_sim_dataset(model = mod, verbose = FALSE),
"No dataset is attached"
)
})

test_that("create_sim_dataset (no-data): regimen-only produces dose + obs rows", {
local_pharmr.extra_options()
skip_if_nonmem_not_available()

mod <- .make_no_data_model()
skip_if(
!is.null(mod$dataset),
"Pharmpy returned a non-NULL dataset for a missing $DATA file — from-scratch path not triggered"
)

out <- create_sim_dataset(
model = mod,
regimen = list(dose = 100, interval = 12, n = 3, route = "iv"),
t_obs = seq(0, 36, 6),
verbose = FALSE
)

expect_s3_class(out, "data.frame")
expect_true(nrow(out) > 0)
expect_true(any(out$EVID == 1)) # dose rows present
expect_true(any(out$EVID == 0)) # obs rows present
expect_true(all(c("ID", "TIME", "AMT", "EVID", "MDV") %in% names(out)))
})

test_that("create_sim_dataset (no-data): n_subjects defaults to 1 when no covariates", {
local_pharmr.extra_options()
skip_if_nonmem_not_available()

mod <- .make_no_data_model()
skip_if(!is.null(mod$dataset), "Pharmpy returned a non-NULL dataset")

out <- create_sim_dataset(
model = mod,
regimen = list(dose = 100, interval = 12, n = 3, route = "iv"),
t_obs = seq(0, 36, 6),
verbose = FALSE
)
expect_equal(length(unique(out$ID)), 1L)
})

test_that("create_sim_dataset (no-data): n_subjects controls number of subjects", {
local_pharmr.extra_options()
skip_if_nonmem_not_available()

mod <- .make_no_data_model()
skip_if(!is.null(mod$dataset), "Pharmpy returned a non-NULL dataset")

out <- create_sim_dataset(
model = mod,
regimen = list(dose = 100, interval = 12, n = 3, route = "iv"),
t_obs = seq(0, 36, 6),
n_subjects = 5,
verbose = FALSE
)
expect_equal(length(unique(out$ID)), 5L)
})

test_that("create_sim_dataset (no-data): no .placeholder column in output", {
local_pharmr.extra_options()
skip_if_nonmem_not_available()

mod <- .make_no_data_model()
skip_if(!is.null(mod$dataset), "Pharmpy returned a non-NULL dataset")

out <- create_sim_dataset(
model = mod,
regimen = list(dose = 100, interval = 12, n = 3, route = "iv"),
t_obs = seq(0, 36, 6),
verbose = FALSE
)
expect_false(".placeholder" %in% names(out))
})

test_that("create_sim_dataset (no-data): covariates are applied and appear in output", {
local_pharmr.extra_options()
skip_if_nonmem_not_available()

## Use pheno model with a non-existent absolute $DATA path so model$dataset is NULL
pheno_code <- pharmr::get_model_code(pharmr::load_example_model("pheno"))
## Replace the $DATA line with an absolute nonexistent path
pheno_code_no_data <- gsub(
"(?i)\\$DATA[^\n]*",
"$DATA /nonexistent/pharmr_extra_test_pheno.csv IGNORE=@",
pheno_code,
perl = TRUE
)
mod <- pharmr::read_model_from_string(pheno_code_no_data)
skip_if(!is.null(mod$dataset), "Pharmpy returned a non-NULL dataset")

covs <- data.frame(WGT = c(50, 100), APGR = c(6, 8))
out <- create_sim_dataset(
model = mod,
regimen = list(dose = 25, interval = 12, n = 3, route = "iv"),
t_obs = seq(0, 36, 12),
covariates = covs,
verbose = FALSE
)

expect_s3_class(out, "data.frame")
expect_equal(length(unique(out$ID)), 2L)
expect_true("WGT" %in% names(out))
## Covariate values match per subject
expect_equal(unique(out$WGT[out$ID == 1]), 50)
expect_equal(unique(out$WGT[out$ID == 2]), 100)
})
79 changes: 79 additions & 0 deletions tests/testthat/test-run_sim.R
Original file line number Diff line number Diff line change
Expand Up @@ -650,3 +650,82 @@ test_that("run_sim (stub): covariates with ID column still works (regression)",
expect_equal(sort(unique(captured_sim_data$ID)), 1:2)
})

# ===========================================================================
# run_sim() with data=NULL — uses model's attached dataset
# ===========================================================================

test_that("run_sim (stub): data=NULL uses model's attached dataset", {
local_pharmr.extra_options()
skip_if_nonmem_not_available()
withr::local_dir(tempdir())

mod <- pharmr::load_example_model("pheno")

captured_data <- NULL
local_mocked_bindings(
run_nlme = function(data, ...) {
captured_data <<- utils::read.csv(data)
.mock_nlme_result()
},
.package = "pharmr.extra"
)

## No `data` argument — should fall back to model$dataset
out <- run_sim(model = mod, verbose = FALSE)

expect_s3_class(out, "data.frame")
expect_true(nrow(out) > 0)
## Dataset sent to NONMEM must have rows (model has real data attached)
expect_true(!is.null(captured_data) && nrow(captured_data) > 0)
})

test_that("run_sim: error when model has no dataset and data=NULL", {
local_pharmr.extra_options()
skip_if_nonmem_not_available()

## read_model_from_string with a non-existent $DATA file — model$dataset is NULL
mod <- pharmr::read_model_from_string(
"$PROBLEM Test\n$INPUT ID TIME DV AMT EVID MDV\n$DATA /nonexistent/path/data.csv IGNORE=@\n$SUBROUTINES ADVAN1 TRANS2\n$PK\nCL=THETA(1)\nV=THETA(2)\nS1=V\n$ERROR\nY=F+EPS(1)\n$THETA (0,10)\n$THETA (0,50)\n$SIGMA 0.1\n$EST METHOD=1\n"
)
skip_if(
!is.null(mod$dataset),
"Pharmpy returned a non-NULL dataset for a missing $DATA file"
)

expect_error(
run_sim(model = mod, verbose = FALSE),
"No dataset is attached"
)
})

test_that("run_sim: error when data is missing a required column", {
local_pharmr.extra_options()
skip_if_nonmem_not_available()
withr::local_dir(tempdir())

mod <- make_model_without_cov() # $INPUT ID TIME DV AMT EVID MDV

## Remove AMT — a required reserved column
dat_missing_col <- .sim_dat() |> dplyr::select(-"AMT")

expect_error(
run_sim(model = mod, data = dat_missing_col, verbose = FALSE),
"missing required column"
)
})

test_that("run_sim: error mentions which column is missing", {
local_pharmr.extra_options()
skip_if_nonmem_not_available()
withr::local_dir(tempdir())

mod <- make_model_without_cov() # $INPUT ID TIME DV AMT EVID MDV

dat_missing_col <- .sim_dat() |> dplyr::select(-"AMT")

expect_error(
run_sim(model = mod, data = dat_missing_col, verbose = FALSE),
"AMT"
)
})

Loading