diff --git a/DESCRIPTION b/DESCRIPTION index c52dab3..b507a00 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pharmr.extra Title: Extension of pharmr (Pharmpy) functionality -Version: 0.0.0.9028 +Version: 0.0.0.9029 Authors@R: c( person("Ron", "Keizer", email = "ron@insight-rx.com", role = c("cre", "aut")), person("Michael", "McCarthy", email = "michael.mccarthy@insight-rx.com", role = "ctb"), diff --git a/R/create_sim_dataset.R b/R/create_sim_dataset.R index 39c08cf..6856808 100644 --- a/R/create_sim_dataset.R +++ b/R/create_sim_dataset.R @@ -62,6 +62,7 @@ create_sim_dataset <- function( cli::cli_abort("Could not load model into Pharmpy. Please check the supplied model file.") } } + build_from_scratch <- FALSE if (!is.null(data)) { idx <- get_required_input_variables(model, data) if (inherits(data, "character")) { @@ -80,9 +81,29 @@ create_sim_dataset <- function( cli::cli_abort("Number of columns for input dataset is lower than number of columns in $INPUT. Please check dataset and $INPUT. Cannot continue creating dataset.") } } else { - input_data <- as.data.frame(model$dataset) + raw_dataset <- model$dataset + if (!is.null(raw_dataset)) { + input_data <- as.data.frame(raw_dataset) + } else { + ## model$dataset is NULL (the $DATA file could not be found) — attempt + ## to build the simulation dataset from scratch using regimen/t_obs/covariates. + build_from_scratch <- TRUE + if (is.null(regimen)) { + cli::cli_abort( + c( + "No dataset is attached to this model (the {.field $DATA} file cannot be found) and no {.arg data} argument was supplied.", + i = "Provide {.arg regimen} (and optionally {.arg t_obs}, {.arg covariates}, {.arg n_subjects}) to build a simulation dataset from scratch." + ) + ) + } + if (is.null(n_subjects)) { + n_subjects <- if (!is.null(covariates)) nrow(covariates) else 1L + } + if (verbose) cli::cli_alert_info("No dataset attached to model \u2014 building simulation dataset from scratch") + input_data <- data.frame(ID = seq_len(n_subjects)) + } } - + if (!"ID" %in% names(input_data)) { cli::cli_abort( c("Column `ID` not found in the dataset.", @@ -92,7 +113,8 @@ create_sim_dataset <- function( input_has_column <- list() for (key in c("CMT", "EVID", "MDV", "RATE")) { - input_has_column[[key]] <- key %in% names(input_data) + ## When building from scratch, include all standard NONMEM columns in the output. + input_has_column[[key]] <- build_from_scratch || key %in% names(input_data) } ## make sure we have regimen as a data.frame @@ -156,20 +178,33 @@ create_sim_dataset <- function( } new_covariates <- names(covariates) new_covariates <- new_covariates[new_covariates != "ID" & new_covariates %in% names(sim_data)] - if (verbose) cli::cli_alert_info("Updating covariates: {new_covariates}") + all_cov_cols <- setdiff(names(covariates), "ID") + if (verbose) cli::cli_alert_info("Updating covariates: {all_cov_cols}") sim_data_cols <- names(sim_data) + ## When building from scratch sim_data only has ID; union ensures newly-joined + ## covariate columns are retained by the following select(). + sim_data_cols <- union(sim_data_cols, all_cov_cols) sim_data <- sim_data |> dplyr::select(-dplyr::all_of(new_covariates)) |> dplyr::left_join(covariates, by = "ID") |> dplyr::select(dplyr::all_of(sim_data_cols)) |> - tidyr::fill(dplyr::all_of(new_covariates), .direction = "downup") + tidyr::fill(dplyr::all_of(all_cov_cols), .direction = "downup") } if (!is.null(regimen_df)) { if (verbose) cli::cli_alert_info("Creating new regimens for subjects in simulation") advan <- get_advan(model) + ## When building from scratch, sim_data contains only placeholder rows (one per + ## subject, carrying covariate values but no NONMEM columns). Mark them so they + ## can be removed after fill() propagates their covariate values into dose rows. + if (build_from_scratch) { + sim_data$.placeholder <- TRUE + } doses <- create_dosing_records(regimen_df, sim_data, n_subjects, advan) + ## Setting .placeholder = FALSE on dose rows prevents fill() from propagating + ## TRUE upward from placeholder rows (which sort last due to NA TIME). + if (build_from_scratch) doses$.placeholder <- FALSE doses <- match_type(doses, sim_data, c("AMT", "RATE", "DV")) if ("EVID" %in% names(sim_data)) { sim_data <- sim_data |> @@ -181,6 +216,12 @@ create_sim_dataset <- function( dplyr::group_by(.data$ID) |> tidyr::fill(tidyselect::everything(), .direction = "downup") |> dplyr::mutate(dplyr::across(dplyr::everything(), ~ fill_missing(.x))) + ## Remove placeholder rows now that covariates have been propagated to dose rows. + if (build_from_scratch) { + sim_data <- sim_data |> + dplyr::filter(!.data$.placeholder) |> + dplyr::select(-".placeholder") + } if (is.null(t_obs)) { t_max <- max(sim_data$TIME) + round(diff(utils::tail(sim_data$TIME, 2))) t_obs <- seq(0, t_max, 4) diff --git a/R/run_sim.R b/R/run_sim.R index 8f0c12d..17e6a55 100644 --- a/R/run_sim.R +++ b/R/run_sim.R @@ -74,6 +74,13 @@ run_sim <- function( } input_data <- model$dataset + if (is.null(input_data) && is.null(data)) { + cli::cli_abort( + c("No dataset is attached to this model and no `data` argument was provided.", + i = "Attach a dataset to the model, or supply a simulation dataset via the `data` argument (see {.fn create_sim_dataset}).") + ) + } + tool <- match.arg(tool) if(tool == "auto") { if(inherits(model, "pharmpy.model.external.nonmem.model.Model")) { @@ -103,6 +110,22 @@ run_sim <- function( } } + ## Validate that required columns are present in the simulation dataset + req_vars <- tryCatch( + get_required_input_variables(model), + error = function(e) NULL + ) + if (!is.null(req_vars)) { + required_cols <- req_vars$data_col[req_vars$required & !is.na(req_vars$data_col)] + missing_cols <- setdiff(required_cols, names(sim_data)) + if (length(missing_cols) > 0) { + cli::cli_abort( + c("The simulation dataset is missing required column(s): {missing_cols}.", + i = "Use {.fn create_sim_dataset} to prepare a valid simulation dataset, or add the missing columns manually.") + ) + } + } + ## get unique regimens / datasets to simulate unique_regimens <- unique(sim_data[[".regimen"]]) comb <- list() diff --git a/tests/testthat/test-create_sim_dataset.R b/tests/testthat/test-create_sim_dataset.R index eb62292..fa42eb3 100644 --- a/tests/testthat/test-create_sim_dataset.R +++ b/tests/testthat/test-create_sim_dataset.R @@ -337,3 +337,145 @@ test_that("create_sim_dataset: error when required covariates are missing", { "Not all required covariates" ) }) + +# =========================================================================== +# Build from scratch (model$dataset is NULL — no $DATA file on disk) +# =========================================================================== + +## Helper: model with an absolute $DATA path that definitely does not exist, +## so that model$dataset returns NULL. Uses ADVAN1 (1-cmt IV) with CL/V only. +.make_no_data_model <- function() { + pharmr::read_model_from_string(paste0( + "$PROBLEM no-data\n", + "$INPUT ID TIME DV AMT EVID MDV\n", + "$DATA /nonexistent/pharmr_extra_test_data.csv IGNORE=@\n", + "$SUBROUTINES ADVAN1 TRANS2\n", + "$PK\nCL=THETA(1)\nV=THETA(2)\nS1=V\n", + "$ERROR\nY=F+EPS(1)\n", + "$THETA (0,10)\n$THETA (0,50)\n", + "$SIGMA 0.1\n", + "$EST METHOD=1\n" + )) +} + +test_that("create_sim_dataset (no-data): error when model has no dataset and regimen is NULL", { + local_pharmr.extra_options() + skip_if_nonmem_not_available() + + mod <- .make_no_data_model() + skip_if( + !is.null(mod$dataset), + "Pharmpy returned a non-NULL dataset for a missing $DATA file — from-scratch path not triggered" + ) + + expect_error( + create_sim_dataset(model = mod, verbose = FALSE), + "No dataset is attached" + ) +}) + +test_that("create_sim_dataset (no-data): regimen-only produces dose + obs rows", { + local_pharmr.extra_options() + skip_if_nonmem_not_available() + + mod <- .make_no_data_model() + skip_if( + !is.null(mod$dataset), + "Pharmpy returned a non-NULL dataset for a missing $DATA file — from-scratch path not triggered" + ) + + out <- create_sim_dataset( + model = mod, + regimen = list(dose = 100, interval = 12, n = 3, route = "iv"), + t_obs = seq(0, 36, 6), + verbose = FALSE + ) + + expect_s3_class(out, "data.frame") + expect_true(nrow(out) > 0) + expect_true(any(out$EVID == 1)) # dose rows present + expect_true(any(out$EVID == 0)) # obs rows present + expect_true(all(c("ID", "TIME", "AMT", "EVID", "MDV") %in% names(out))) +}) + +test_that("create_sim_dataset (no-data): n_subjects defaults to 1 when no covariates", { + local_pharmr.extra_options() + skip_if_nonmem_not_available() + + mod <- .make_no_data_model() + skip_if(!is.null(mod$dataset), "Pharmpy returned a non-NULL dataset") + + out <- create_sim_dataset( + model = mod, + regimen = list(dose = 100, interval = 12, n = 3, route = "iv"), + t_obs = seq(0, 36, 6), + verbose = FALSE + ) + expect_equal(length(unique(out$ID)), 1L) +}) + +test_that("create_sim_dataset (no-data): n_subjects controls number of subjects", { + local_pharmr.extra_options() + skip_if_nonmem_not_available() + + mod <- .make_no_data_model() + skip_if(!is.null(mod$dataset), "Pharmpy returned a non-NULL dataset") + + out <- create_sim_dataset( + model = mod, + regimen = list(dose = 100, interval = 12, n = 3, route = "iv"), + t_obs = seq(0, 36, 6), + n_subjects = 5, + verbose = FALSE + ) + expect_equal(length(unique(out$ID)), 5L) +}) + +test_that("create_sim_dataset (no-data): no .placeholder column in output", { + local_pharmr.extra_options() + skip_if_nonmem_not_available() + + mod <- .make_no_data_model() + skip_if(!is.null(mod$dataset), "Pharmpy returned a non-NULL dataset") + + out <- create_sim_dataset( + model = mod, + regimen = list(dose = 100, interval = 12, n = 3, route = "iv"), + t_obs = seq(0, 36, 6), + verbose = FALSE + ) + expect_false(".placeholder" %in% names(out)) +}) + +test_that("create_sim_dataset (no-data): covariates are applied and appear in output", { + local_pharmr.extra_options() + skip_if_nonmem_not_available() + + ## Use pheno model with a non-existent absolute $DATA path so model$dataset is NULL + pheno_code <- pharmr::get_model_code(pharmr::load_example_model("pheno")) + ## Replace the $DATA line with an absolute nonexistent path + pheno_code_no_data <- gsub( + "(?i)\\$DATA[^\n]*", + "$DATA /nonexistent/pharmr_extra_test_pheno.csv IGNORE=@", + pheno_code, + perl = TRUE + ) + mod <- pharmr::read_model_from_string(pheno_code_no_data) + skip_if(!is.null(mod$dataset), "Pharmpy returned a non-NULL dataset") + + covs <- data.frame(WGT = c(50, 100), APGR = c(6, 8)) + out <- create_sim_dataset( + model = mod, + regimen = list(dose = 25, interval = 12, n = 3, route = "iv"), + t_obs = seq(0, 36, 12), + covariates = covs, + verbose = FALSE + ) + + expect_s3_class(out, "data.frame") + expect_equal(length(unique(out$ID)), 2L) + expect_true("WGT" %in% names(out)) + ## Covariate values match per subject + expect_equal(unique(out$WGT[out$ID == 1]), 50) + expect_equal(unique(out$WGT[out$ID == 2]), 100) +}) diff --git a/tests/testthat/test-run_sim.R b/tests/testthat/test-run_sim.R index 71fa578..5470b35 100644 --- a/tests/testthat/test-run_sim.R +++ b/tests/testthat/test-run_sim.R @@ -650,3 +650,82 @@ test_that("run_sim (stub): covariates with ID column still works (regression)", expect_equal(sort(unique(captured_sim_data$ID)), 1:2) }) +# =========================================================================== +# run_sim() with data=NULL — uses model's attached dataset +# =========================================================================== + +test_that("run_sim (stub): data=NULL uses model's attached dataset", { + local_pharmr.extra_options() + skip_if_nonmem_not_available() + withr::local_dir(tempdir()) + + mod <- pharmr::load_example_model("pheno") + + captured_data <- NULL + local_mocked_bindings( + run_nlme = function(data, ...) { + captured_data <<- utils::read.csv(data) + .mock_nlme_result() + }, + .package = "pharmr.extra" + ) + + ## No `data` argument — should fall back to model$dataset + out <- run_sim(model = mod, verbose = FALSE) + + expect_s3_class(out, "data.frame") + expect_true(nrow(out) > 0) + ## Dataset sent to NONMEM must have rows (model has real data attached) + expect_true(!is.null(captured_data) && nrow(captured_data) > 0) +}) + +test_that("run_sim: error when model has no dataset and data=NULL", { + local_pharmr.extra_options() + skip_if_nonmem_not_available() + + ## read_model_from_string with a non-existent $DATA file — model$dataset is NULL + mod <- pharmr::read_model_from_string( + "$PROBLEM Test\n$INPUT ID TIME DV AMT EVID MDV\n$DATA /nonexistent/path/data.csv IGNORE=@\n$SUBROUTINES ADVAN1 TRANS2\n$PK\nCL=THETA(1)\nV=THETA(2)\nS1=V\n$ERROR\nY=F+EPS(1)\n$THETA (0,10)\n$THETA (0,50)\n$SIGMA 0.1\n$EST METHOD=1\n" + ) + skip_if( + !is.null(mod$dataset), + "Pharmpy returned a non-NULL dataset for a missing $DATA file" + ) + + expect_error( + run_sim(model = mod, verbose = FALSE), + "No dataset is attached" + ) +}) + +test_that("run_sim: error when data is missing a required column", { + local_pharmr.extra_options() + skip_if_nonmem_not_available() + withr::local_dir(tempdir()) + + mod <- make_model_without_cov() # $INPUT ID TIME DV AMT EVID MDV + + ## Remove AMT — a required reserved column + dat_missing_col <- .sim_dat() |> dplyr::select(-"AMT") + + expect_error( + run_sim(model = mod, data = dat_missing_col, verbose = FALSE), + "missing required column" + ) +}) + +test_that("run_sim: error mentions which column is missing", { + local_pharmr.extra_options() + skip_if_nonmem_not_available() + withr::local_dir(tempdir()) + + mod <- make_model_without_cov() # $INPUT ID TIME DV AMT EVID MDV + + dat_missing_col <- .sim_dat() |> dplyr::select(-"AMT") + + expect_error( + run_sim(model = mod, data = dat_missing_col, verbose = FALSE), + "AMT" + ) +}) +