diff --git a/.Rbuildignore b/.Rbuildignore index 96e9c86..ac3fa7d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,3 +11,5 @@ dev ^codecov\.yml$ ^CLAUDE\.md$ ^ROADMAP\.md$ +^doc$ +^Meta$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index f4b17a4..d042c21 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, master, develop] pull_request: - branches: [main, master] + branches: [main, master, develop] name: R-CMD-check diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index e050312..ad6779c 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -2,7 +2,7 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, master, develop] pull_request: name: test-coverage.yaml diff --git a/.gitignore b/.gitignore index 852b15f..14ebbf3 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ docs inst/doc *.DS_Store +/doc/ +/Meta/ +*.rds \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 68400ef..b9e59b2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,11 +12,15 @@ URL: https://randcorporation.github.io/R6Sim/ Imports: assertthat, data.table, + doFuture, dplyr, + foreach, + future, + future.apply, jsonlite, lhs, magrittr, - parallel, + progressr, purrr, R6, readxl, @@ -25,8 +29,6 @@ Imports: utils, yaml Suggests: - deSolve, - ggplot2, knitr, rmarkdown, testthat (>= 3.0.0), diff --git a/NAMESPACE b/NAMESPACE index bc4aca3..64d5122 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,13 +5,24 @@ export(R6Sim) export(is.R6Experiment) export(is.R6Sim) import(R6) +import(doFuture) import(dplyr) -import(parallel) +import(foreach) +import(future.apply) +import(progressr) import(tidyr) importFrom(data.table,as.data.table) importFrom(data.table,data.table) +importFrom(doFuture,registerDoFuture) +importFrom(dplyr,any_of) +importFrom(dplyr,bind_rows) +importFrom(dplyr,select) +importFrom(foreach,"%dopar%") +importFrom(foreach,foreach) importFrom(lhs,randomLHS) importFrom(magrittr,"%>%") +importFrom(progressr,progressor) +importFrom(progressr,with_progress) importFrom(readxl,excel_sheets) importFrom(readxl,read_xlsx) importFrom(stats,qunif) diff --git a/R/R6Experiment.R b/R/R6Experiment.R index f8f5f88..7c94e7c 100644 --- a/R/R6Experiment.R +++ b/R/R6Experiment.R @@ -30,6 +30,10 @@ #' @description #' Manages experimental designs and execution for R6Sim models. #' @import R6 +#' @import progressr +#' @import future.apply +#' @import foreach +#' @import doFuture #' @export R6Experiment <- R6::R6Class( classname = "R6Experiment", @@ -67,6 +71,9 @@ R6Experiment <- R6::R6Class( #' @field experimental_parameters is a list containing details about each experimental parameter. Experimental parameters can be either policy levers or uncertainties. Defining this distinction is up to the user. experimental_parameters = list(), + #' @field results is a data.frame containing the results of the experiment. + results = NULL, + #' @description #' This function is used to initialize a `R6Experiment` object. This object represents an experiment that will be run and can encompass multiple models. #' @param ... set of R6Sim to be included in the experiment. One `R6Experiment` can contain multiple models of the `c19model` class. @@ -114,24 +121,137 @@ R6Experiment <- R6::R6Class( #' @description #' Run Experiment #' - #' @param n_cores number of cores to use - #' @param parallel whether to evaluate run in parallel - #' @param cluster_eval_script Optional path to R script that is sourced once in each parallel process before running experiments. - #' Useful for model setup that should happen once per process, like: - #' - Loading required packages - #' - Compiling models (e.g. odin models) - #' - Setting up simulation parameters/data - #' - Creating model instances for use across runs - #' @param model_from_cluster_eval If TRUE, expects model instances to be created in cluster_eval_script. - #' Set TRUE when model compilation is needed (like with odin). - #' @param packages character vector of packages to be loaded before running the model in parallel. + #' @importFrom dplyr bind_rows + #' @importFrom dplyr select + #' @importFrom dplyr any_of + #' @importFrom foreach foreach + #' @importFrom doFuture registerDoFuture + #' @importFrom foreach %dopar% + #' @importFrom progressr with_progress + #' @importFrom progressr progressor + #' + #' @param checkpoint_frequency Frequency of checkpoints during the experiment. If NULL, defaults to 10 percent of the experimental design. + #' @param checkpoint_dir Directory to save checkpoints. Default is NULL. + #' @param backend Backend to use for parallelization. Options are future.apply (default) or foreach. + #' @param ... Additional parameters passed to model simulation. + #' @return A data.frame containing the results of the experiment. + run = function(checkpoint_frequency = NULL, checkpoint_dir = NULL, backend = "future.apply", ...) { + if (missing(checkpoint_dir)) { + checkpoint_dir <- file.path("experiments") + } + + if (is.null(self$results)) { + self$results <- data.frame() + } + + total_steps <- length(unique(self$policy_design$policy.exp.id)) + completed_steps <- length(unique(self$results$policy.exp.id)) + remaining_steps <- total_steps - completed_steps + + if (remaining_steps <= 0) { + message("All experiments have already been completed.") + return(self$results) + } + + if (missing(checkpoint_frequency)) { + checkpoint_frequency <- max(1, ceiling(remaining_steps * 0.1)) + } + + checkpoint_iterations <- ceiling(remaining_steps / checkpoint_frequency) + + # Define the checkpoint file name once + timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S") + checkpoint_file <- file.path(checkpoint_dir, paste0(timestamp, "_checkpoint.rds")) + + progressr::with_progress({ + overall_progress <- progressr::progressor(steps = total_steps) + + for (checkpoint_iteration in seq_len(checkpoint_iterations)) { + self$run_checkpoint_iteration( + checkpoint_iteration, checkpoint_frequency, remaining_steps, overall_progress, checkpoint_file, completed_steps, backend, ... + ) + } + }) + + return(self$results) + }, + + #' @description + #' Run a single experiment + #' + #' @param policy_design_id ID of the policy design to run #' @param ... additional parameters passed to model simulation - run = function(n_cores = 3, parallel = F, cluster_eval_script = NULL, - model_from_cluster_eval = F, packages = NULL, ...) { - R6Experiment_run(self = self, n_cores = n_cores, parallel = parallel, - cluster_eval_script = cluster_eval_script, - model_from_cluster_eval = model_from_cluster_eval, packages = packages, ...) - }), + run_single_experiment = function(policy_design_id, ...) { + model <- self$models[[self$policy_design$model.id[policy_design_id]]] + + id_cols <- c("grid.id", "lhs.id", "params_design.id", "param.id", "model.id", "all.params.id", "policy.exp.id", "rep.id", "seed") + + scenario_inputs <- self$policy_design[policy_design_id, ] %>% + select(-any_of(id_cols)) %>% + as.data.frame() + + # Set each input + for (var in names(scenario_inputs)) { + model$set_input(var, scenario_inputs[, var]) + } + + # If setting seed, do it + if(self$set_seed) { + set.seed(self$policy_design[policy_design_id, ]$seed) + } + + res <- model$simulate(...) %>% as.data.frame() + + return(dplyr::bind_cols(self$policy_design[policy_design_id, ], res)) + }, + + #' @description + #' Save a checkpoint of the experiment. + #' @param checkpoint_file Directory to save the checkpoint. + checkpoint = function(checkpoint_file) { + if (!dir.exists(dirname(checkpoint_file))) { + dir.create(dirname(checkpoint_file), recursive = TRUE) + } + suppressWarnings({ + saveRDS(self, checkpoint_file) + }) + }, + + #' @description + #' Run a single checkpoint iteration + #' + #' @param checkpoint_iteration Current checkpoint iteration. + #' @param checkpoint_frequency Frequency of checkpoints. + #' @param remaining_steps Number of steps remaining in the experiment. + #' @param overall_progress Overall progressor object for tracking progress. + #' @param checkpoint_file Path to the file where the checkpoint will be saved. + #' @param completed_steps Number of steps already completed in the experiment. + #' @param backend Backend to use for parallelization. Options are "future.apply" (default) or "foreach". + #' @param ... Additional parameters passed to model simulation. + run_checkpoint_iteration = function(checkpoint_iteration, checkpoint_frequency, remaining_steps, overall_progress, checkpoint_file, completed_steps, backend, ...) { + checkpoint_start <- completed_steps + (checkpoint_iteration - 1) * checkpoint_frequency + 1 + checkpoint_end <- min(completed_steps + checkpoint_iteration * checkpoint_frequency, completed_steps + remaining_steps) + + if (backend == "future.apply") { + checkpoint_results <- future.apply::future_lapply(seq(checkpoint_start, checkpoint_end), function(policy_design_id) { + overall_progress(sprintf("Running policy design %d", policy_design_id)) + self$run_single_experiment(policy_design_id, ...) + }, future.seed=TRUE, future.packages = c("dplyr", "R6Sim", "progressr")) + checkpoint_results <- dplyr::bind_rows(checkpoint_results) + } else if (backend == "foreach") { + checkpoint_results <- foreach(policy_design_id = seq(checkpoint_start, checkpoint_end), .combine = dplyr::bind_rows, .options.future = list(seed = TRUE)) %dofuture% { + overall_progress(sprintf("Running policy design %d", policy_design_id)) + self$run_single_experiment(policy_design_id, ...) + } + } else { + stop("Unsupported backend. Please choose either 'future.apply' or 'foreach'.") + } + + self$results <- dplyr::bind_rows(self$results, checkpoint_results) + + self$checkpoint(checkpoint_file) + } + ), # Use private to hold data that will not be accessed by the user directly. private = list( diff --git a/R/R6Experiment_run.R b/R/R6Experiment_run.R deleted file mode 100644 index 3d138c0..0000000 --- a/R/R6Experiment_run.R +++ /dev/null @@ -1,119 +0,0 @@ -#------------------------------------------------------------------------------# -# R6Sim: R6-based Simulation Modeling Toolkit -# Copyright (C) 2024 by The RAND Corporation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# -# See LICENSE.md and README.md for more information on usage and licensing -# -# Author: Pedro Nascimento de Lima. -#------------------------------------------------------------------------------# - -#------------------------------------------------------------------------------# -# Run an R6Experiment -# Purpose: Runs an R6Experiment in parallel, assuming each input assigned -# maps to one model input -#------------------------------------------------------------------------------# - -#' Runs R6Experiment in parallel -#' -#' -#' This function is most useful to simulate the posterior distribution for a single model in parallel in one machine. This function is not used when calibrating the model and not useful for parallelization across multiple nodes. -#' -#' @param self experiment object -#' @param n_cores number of cores to use -#' @param parallel whether to evaluate run in parallel -#' @param cluster_eval_script path to script that instantiates necessary functions. this will often mean sourcing functions external to the package and loading dependencies for the model. needed if parallel = T -#' @param model_from_cluster_eval T if model is instantiated in the cluter eval scripts, F otherwise. Use T if using models that need compilation (like odin) and F otherwise. -#' @param cluster_type either "FORK" or "PSOCK". -#' @param packages character vector of packages to be loaded before running the model in paralle. -#' @param ... additional parameters to be passed to the model simulation function. -#' @return results data.frame from all simulations in parallel -#' -#' @import parallel -R6Experiment_run <- function(self, n_cores, parallel, cluster_eval_script = NULL, - model_from_cluster_eval, cluster_type = "PSOCK", - packages = NULL, ...) { - - if (parallel) { - cl <- parallel::makeCluster(n_cores, type = cluster_type) - - # Load packages if specified - if (!is.null(packages)) { - parallel::clusterExport(cl, "packages", envir = environment()) - parallel::clusterEvalQ(cl, { - for (pkg in packages) { - library(pkg, character.only = TRUE) - } - }) - } - - if (!is.null(cluster_eval_script)) { - parallel::clusterExport(cl, "cluster_eval_script", envir = environment()) - parallel::clusterEvalQ(cl, source(cluster_eval_script)) - } - - results <- parLapply(cl = cl, X = 1:nrow(self$policy_design), - fun = run_single_experiment, - model_from_cluster_eval = model_from_cluster_eval, - self = self, parallel = parallel, ...) - - parallel::stopCluster(cl) - - } else { - results <- lapply(X = 1:nrow(self$policy_design), - FUN = run_single_experiment, - model_from_cluster_eval = model_from_cluster_eval, - self = self, parallel = parallel, ...) - } - - return(do.call(dplyr::bind_rows, results)) -} - -run_single_experiment <- function(policy_design_id, self, model_from_cluster_eval, parallel, ...) { - - if (!model_from_cluster_eval | !parallel) { - model <- self$models[[self$policy_design$model.id[policy_design_id]]] - } else { - stopifnot("cluster_experiment object not defined. Create an R6Experiment object called cluster_experiment in your cluster_eval_script file, containing the models used in this analysis." = exists("cluster_experiment")) - - stopifnot("cluster_experiment object is not an R6Experiment. Make sure to use R6Experiment to create the cluster_experiment object." = is.R6Experiment(cluster_experiment)) - - model <- cluster_experiment$models[[self$policy_design$model.id[policy_design_id]]] - } - - id_cols <- c("grid.id", "lhs.id", "params_design.id", "param.id", "model.id", "all.params.id", "policy.exp.id", "rep.id", "seed") - - - - scenario_inputs <- self$policy_design[policy_design_id, ] %>% - select(-any_of(id_cols)) %>% - as.data.frame() - - # Set each input - for (var in names(scenario_inputs)) { - model$set_input(var, scenario_inputs[, var]) - } - - # If setting seed, do it - if(self$set_seed) { - set.seed(self$policy_design[policy_design_id, ]$seed) - } - - res <- model$simulate(...) %>% as.data.frame() - - return(dplyr::bind_cols(self$policy_design[policy_design_id, ], res)) - -} - diff --git a/R/R6Sim.R b/R/R6Sim.R index 8e81a64..c8b6a13 100644 --- a/R/R6Sim.R +++ b/R/R6Sim.R @@ -42,29 +42,6 @@ #' * Parallel execution #' * Parameter sampling #' -#' @examples -#' # Create simulation model -#' MyModel <- R6::R6Class( -#' "MyModel", -#' inherit = R6Sim, -#' public = list( -#' initialize = function(name) { -#' super$initialize(name) -#' self$set_input("population", 1000) -#' self$set_input("growth_rate", 0.05) -#' }, -#' simulate = function(...) { -#' pop <- self$inputs$population -#' growth <- self$inputs$growth_rate -#' results <- pop * (1 + growth)^(1:10) -#' return(data.frame(year = 1:10, population = results)) -#' } -#' ) -#' ) -#' -#' model <- MyModel$new("pop_model") -#' results <- model$simulate() -#' #' @export R6Sim <- R6::R6Class( @@ -115,11 +92,6 @@ R6Sim <- R6::R6Class( #' Accepts numeric, character, logical, data.frame and list inputs. #' Type tags enable selective JSON export. #' - #' @examples - #' model$set_input("population", 1000, type = "parameter") - #' model$set_input("growth_rates", c(0.01, 0.02), type = "scenario") - #' model$set_input("settings", list(iterations = 100), type = "config") - #' #' @export set_input = function(name, value, type = NA_character_) { R6Sim_set_input(self = self, name = name, value = value, type = type) diff --git a/R/get_parallel_backend_fn.R b/R/get_parallel_backend_fn.R new file mode 100644 index 0000000..e6922c0 --- /dev/null +++ b/R/get_parallel_backend_fn.R @@ -0,0 +1,13 @@ +# Helper function to get the parallel backend function +get_parallel_backend_fn <- function() { + backend <- getOption("parallel_backend", "multicore") + if (backend == "multicore") { + future::multicore + } else if (backend == "multisession") { + future::multisession + } else if (backend == "sequential") { + future::sequential + } else { + stop("Unsupported parallel backend: ", backend) + } +} \ No newline at end of file diff --git a/dev/R6Sim_simulate_posterior.R b/dev/R6Sim_simulate_posterior.R index db9d478..757ac42 100644 --- a/dev/R6Sim_simulate_posterior.R +++ b/dev/R6Sim_simulate_posterior.R @@ -28,9 +28,6 @@ #' This function is most useful to simulate the posterior distribution for a single model in parallel in one machine. This function is not used when calibrating the model and not useful for parallelization across multiple nodes. #' #' @param self model object -#' @param n_cores number of cores to use -#' @param parallel whether to evaluate run in parallel -#' @param cluster_eval_script path to script that instantiates necessary functions. this will often mean sourcing functions external to the package and loading dependencies for the model. needed if parallel = T #' #' @return results data.frame from all simulations in parallel #' diff --git a/dev/checkpointing.md b/dev/checkpointing.md new file mode 100644 index 0000000..e69de29 diff --git a/dev/dev_tasks.R b/dev/dev_tasks.R index 9352e3f..bd6d741 100644 --- a/dev/dev_tasks.R +++ b/dev/dev_tasks.R @@ -5,5 +5,7 @@ attachment::att_amend_desc() devtools::document() devtools::build_manual() +devtools::build_vignettes() + # load package devtools::load_all() diff --git a/dev/prob_sample.R b/dev/prob_sample.R new file mode 100644 index 0000000..4fb3ee0 --- /dev/null +++ b/dev/prob_sample.R @@ -0,0 +1,90 @@ +library(dplyr) +library(tidyr) +library(purrr) +library(rlang) +library(truncnorm) + + +# ---- Function: Construct Prior Table ---- +construct_priors <- function(..., .list = NULL) { + exprs <- c(list(...), .list) + stopifnot(length(exprs) > 0) + + parse_prior <- function(expr) { + if (!inherits(expr, "formula")) stop("Each prior must be a formula like x ~ dist(...)") + + lhs <- as_string(expr[[2]]) + call <- expr[[3]] + dist <- as_string(call[[1]]) + args <- as.list(call[-1]) + + tibble( + name = lhs, + dist = dist, + params = list(args) + ) + } + + map_dfr(exprs, parse_prior) +} + +# ---- Function: Sample Priors with Constraints ---- +sample_priors <- function(param_specs, n, constraints = NULL, max_attempts = 10) { + stopifnot(is.data.frame(param_specs)) + required_cols <- c("name", "dist", "params") + stopifnot(all(required_cols %in% names(param_specs))) + + # Sampling dispatch function + draw_samples <- function(dist, args, n) { + switch(dist, + uniform = do.call(runif, c(list(n = n), args)), + normal = do.call(rnorm, c(list(n = n), args)), + beta = do.call(rbeta, c(list(n = n), args)), + truncnorm = do.call(truncnorm::rtruncnorm, c(list(n = n), args)), + stop(paste("Unsupported distribution:", dist)) + ) + } + + samples <- tibble() + attempts <- 0 + while (nrow(samples) < n && attempts < max_attempts) { + raw_samples <- param_specs %>% + mutate(sample = map2(dist, params, ~ draw_samples(.x, .y, n))) %>% + select(name, sample) %>% + unnest_wider(sample, names_sep = "_") %>% + pivot_longer(cols = starts_with("sample_"), names_to = "draw", values_to = "value") %>% + mutate(draw = as.integer(gsub("sample_", "", draw))) %>% + pivot_wider(names_from = name, values_from = value) + + if (!is.null(constraints)) { + constraint_exprs <- parse_exprs(constraints) + raw_samples <- raw_samples %>% filter(!!!constraint_exprs) + } + + samples <- bind_rows(samples, raw_samples) + attempts <- attempts + 1 + } + + if (nrow(samples) < n) { + warning(paste("Only", nrow(samples), "samples returned after", max_attempts, "attempts.")) + } + + samples %>% slice_head(n = n) +} + + +# Construct priors +priors <- construct_priors( + x ~ uniform(min = 2, max = 44), + y ~ normal(mean = 5, sd = 2), + z ~ truncnorm(a = 0, b = 10, mean = 5, sd = 1), + w ~ beta(shape1 = 1, shape2 = 2) +) + +# Sample with constraints +set.seed(42) +samples <- sample_priors(priors, n = 1000, constraints = c("x + y < 1000", "z + 1000 > x")) + +print(head(samples)) + + diff --git a/dev/running_experiments.Rmd b/dev/running_experiments.Rmd index 6c91429..31fda1d 100644 --- a/dev/running_experiments.Rmd +++ b/dev/running_experiments.Rmd @@ -165,8 +165,7 @@ experiment$policy_design %>% Now we'll run the experiment across all parameter combinations: ```{r} -# Run the experiment (in serial mode for simplicity) -results <- experiment$run(parallel = FALSE) +results <- experiment$run() # Examine the first few results head(results) @@ -265,7 +264,7 @@ Now we can run the stochastic experiment: ```{r} # Run the stochastic experiment -stochastic_results <- stochastic_experiment$run(parallel = FALSE) +stochastic_results <- stochastic_experiment$run() # Examine results with replications stochastic_results %>% @@ -305,7 +304,7 @@ comparison_experiment$set_parameter( comparison_experiment$set_design() # Run the experiment -comparison_results <- comparison_experiment$run(parallel = FALSE) +comparison_results <- comparison_experiment$run() # Analyze results by model and beta comparison_results %>% @@ -323,9 +322,7 @@ You can also run experiments with specific parameter sets or design points: ```{r} # Let's run the experiment again, but only for the first 2 parameter combinations -selected_results <- experiment$run( - parallel = FALSE -)[1:2, ] +selected_results <- experiment$run()[1:2, ] # View the results selected_results diff --git a/man/R6Experiment.Rd b/man/R6Experiment.Rd index b154ba4..1144600 100644 --- a/man/R6Experiment.Rd +++ b/man/R6Experiment.Rd @@ -28,6 +28,8 @@ Manages experimental designs and execution for R6Sim models. \item{\code{set_seed}}{is a T if the experiment will be controlling and setting seeds.} \item{\code{experimental_parameters}}{is a list containing details about each experimental parameter. Experimental parameters can be either policy levers or uncertainties. Defining this distinction is up to the user.} + +\item{\code{results}}{is a data.frame containing the results of the experiment.} } \if{html}{\out{}} } @@ -38,6 +40,9 @@ Manages experimental designs and execution for R6Sim models. \item \href{#method-R6Experiment-set_parameter}{\code{R6Experiment$set_parameter()}} \item \href{#method-R6Experiment-set_design}{\code{R6Experiment$set_design()}} \item \href{#method-R6Experiment-run}{\code{R6Experiment$run()}} +\item \href{#method-R6Experiment-run_single_experiment}{\code{R6Experiment$run_single_experiment()}} +\item \href{#method-R6Experiment-checkpoint}{\code{R6Experiment$checkpoint()}} +\item \href{#method-R6Experiment-run_checkpoint_iteration}{\code{R6Experiment$run_checkpoint_iteration()}} \item \href{#method-R6Experiment-clone}{\code{R6Experiment$clone()}} } } @@ -148,11 +153,9 @@ The policy experimental design will have `blocks` \* `n_lhs` \* `n_grid_points` Run Experiment \subsection{Usage}{ \if{html}{\out{
}}\preformatted{R6Experiment$run( - n_cores = 3, - parallel = F, - cluster_eval_script = NULL, - model_from_cluster_eval = F, - packages = NULL, + checkpoint_frequency = NULL, + checkpoint_dir = NULL, + backend = "future.apply", ... )}\if{html}{\out{
}} } @@ -160,21 +163,33 @@ Run Experiment \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{n_cores}}{number of cores to use} +\item{\code{checkpoint_frequency}}{Frequency of checkpoints during the experiment. If NULL, defaults to 10 percent of the experimental design.} -\item{\code{parallel}}{whether to evaluate run in parallel} +\item{\code{checkpoint_dir}}{Directory to save checkpoints. Default is NULL.} -\item{\code{cluster_eval_script}}{Optional path to R script that is sourced once in each parallel process before running experiments. -Useful for model setup that should happen once per process, like: -- Loading required packages -- Compiling models (e.g. odin models) -- Setting up simulation parameters/data -- Creating model instances for use across runs} +\item{\code{backend}}{Backend to use for parallelization. Options are future.apply (default) or foreach.} -\item{\code{model_from_cluster_eval}}{If TRUE, expects model instances to be created in cluster_eval_script. -Set TRUE when model compilation is needed (like with odin).} +\item{\code{...}}{Additional parameters passed to model simulation.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +A data.frame containing the results of the experiment. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-R6Experiment-run_single_experiment}{}}} +\subsection{Method \code{run_single_experiment()}}{ +Run a single experiment +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{R6Experiment$run_single_experiment(policy_design_id, ...)}\if{html}{\out{
}} +} -\item{\code{packages}}{character vector of packages to be loaded before running the model in parallel.} +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{policy_design_id}}{ID of the policy design to run} \item{\code{...}}{additional parameters passed to model simulation} } @@ -182,6 +197,63 @@ Set TRUE when model compilation is needed (like with odin).} } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-R6Experiment-checkpoint}{}}} +\subsection{Method \code{checkpoint()}}{ +Save a checkpoint of the experiment. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{R6Experiment$checkpoint(checkpoint_file)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{checkpoint_file}}{Directory to save the checkpoint.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-R6Experiment-run_checkpoint_iteration}{}}} +\subsection{Method \code{run_checkpoint_iteration()}}{ +Run a single checkpoint iteration +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{R6Experiment$run_checkpoint_iteration( + checkpoint_iteration, + checkpoint_frequency, + remaining_steps, + overall_progress, + checkpoint_file, + completed_steps, + backend, + ... +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{checkpoint_iteration}}{Current checkpoint iteration.} + +\item{\code{checkpoint_frequency}}{Frequency of checkpoints.} + +\item{\code{remaining_steps}}{Number of steps remaining in the experiment.} + +\item{\code{overall_progress}}{Overall progressor object for tracking progress.} + +\item{\code{checkpoint_file}}{Path to the file where the checkpoint will be saved.} + +\item{\code{completed_steps}}{Number of steps already completed in the experiment.} + +\item{\code{backend}}{Backend to use for parallelization. Options are "future.apply" (default) or "foreach".} + +\item{\code{...}}{Additional parameters passed to model simulation.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-R6Experiment-clone}{}}} \subsection{Method \code{clone()}}{ diff --git a/man/R6Experiment_run.Rd b/man/R6Experiment_run.Rd deleted file mode 100644 index 4e77ecf..0000000 --- a/man/R6Experiment_run.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/R6Experiment_run.R -\name{R6Experiment_run} -\alias{R6Experiment_run} -\title{Runs R6Experiment in parallel} -\usage{ -R6Experiment_run( - self, - n_cores, - parallel, - cluster_eval_script = NULL, - model_from_cluster_eval, - cluster_type = "PSOCK", - packages = NULL, - ... -) -} -\arguments{ -\item{self}{experiment object} - -\item{n_cores}{number of cores to use} - -\item{parallel}{whether to evaluate run in parallel} - -\item{cluster_eval_script}{path to script that instantiates necessary functions. this will often mean sourcing functions external to the package and loading dependencies for the model. needed if parallel = T} - -\item{model_from_cluster_eval}{T if model is instantiated in the cluter eval scripts, F otherwise. Use T if using models that need compilation (like odin) and F otherwise.} - -\item{cluster_type}{either "FORK" or "PSOCK".} - -\item{packages}{character vector of packages to be loaded before running the model in paralle.} - -\item{...}{additional parameters to be passed to the model simulation function.} -} -\value{ -results data.frame from all simulations in parallel -} -\description{ -This function is most useful to simulate the posterior distribution for a single model in parallel in one machine. This function is not used when calibrating the model and not useful for parallelization across multiple nodes. -} diff --git a/man/R6Sim.Rd b/man/R6Sim.Rd index 1169b65..986992e 100644 --- a/man/R6Sim.Rd +++ b/man/R6Sim.Rd @@ -13,39 +13,6 @@ The R6Sim class includes functionality for: * JSON serialization * Parallel execution * Parameter sampling -} -\examples{ -# Create simulation model -MyModel <- R6::R6Class( - "MyModel", - inherit = R6Sim, - public = list( - initialize = function(name) { - super$initialize(name) - self$set_input("population", 1000) - self$set_input("growth_rate", 0.05) - }, - simulate = function(...) { - pop <- self$inputs$population - growth <- self$inputs$growth_rate - results <- pop * (1 + growth)^(1:10) - return(data.frame(year = 1:10, population = results)) - } - ) -) - -model <- MyModel$new("pop_model") -results <- model$simulate() - - -## ------------------------------------------------ -## Method `R6Sim$set_input` -## ------------------------------------------------ - -model$set_input("population", 1000, type = "parameter") -model$set_input("growth_rates", c(0.01, 0.02), type = "scenario") -model$set_input("settings", list(iterations = 100), type = "config") - } \section{Public fields}{ \if{html}{\out{
}} @@ -124,17 +91,6 @@ Accepts numeric, character, logical, data.frame and list inputs. Type tags enable selective JSON export. } -\subsection{Examples}{ -\if{html}{\out{
}} -\preformatted{model$set_input("population", 1000, type = "parameter") -model$set_input("growth_rates", c(0.01, 0.02), type = "scenario") -model$set_input("settings", list(iterations = 100), type = "config") - -} -\if{html}{\out{
}} - -} - } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/tests/testthat.R b/tests/testthat.R index 63c13f0..0182e92 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -9,5 +9,12 @@ library(testthat) library(R6Sim) library(dplyr) +library(future) + +# Set default parallel backend for tests: multicore, multisession or sequential +options(parallel_backend = "sequential") + +# Set up sequential plan before starting tests. +future::plan(sequential) test_check("R6Sim") diff --git a/tests/testthat/helper-R6Sim.R b/tests/testthat/helper-R6Sim.R new file mode 100644 index 0000000..b90edff --- /dev/null +++ b/tests/testthat/helper-R6Sim.R @@ -0,0 +1,44 @@ +# This helper file contains shared setup code for R6Sim tests. +library(dplyr) +library(tidyr) + +# Sample model: +Mymodel <- R6::R6Class( + classname = "Mymodel", + inherit = R6Sim, + public = list( + sim_res = NULL, + + # Custom Initialize function + initialize = function(name) { + super$initialize(name = name) + self$set_input("pop.size", 100)$ + set_input("risk.mean", 0.01)$ + set_input("risk.sd", 0.001)$ + set_input(name = "trials", value = 10) + }, + + # Sample Simulate function + simulate = function() { + + # Create a sample population with some health events: + sim_res <- data.frame( + p.id = 1:self$inputs$pop.size, + risk = self$inputs$risk.mean + #risk = rnorm(n = self$inputs$pop.size, mean = self$inputs$risk.mean, sd = self$inputs$risk.sd + ) %>% + mutate(probability.event = 1 - exp(-risk)) %>% + mutate(n.events = self$inputs$pop.size * self$inputs$trials * probability.event) %>% + #mutate(n.events = rbinom(n = 1:self$inputs$pop.size, size = self$inputs$trials, prob = probability.event)) %>% + group_by() %>% + summarise(n.events = sum(n.events)) + + invisible(sim_res) + } + ) +) + +# Creating a model object ------------------------------------------------- + +# Creates a model object and gives it a name. +model <- Mymodel$new(name = "test") diff --git a/tests/testthat/test-R6Experiment.R b/tests/testthat/test-R6Experiment.R new file mode 100644 index 0000000..9758b55 --- /dev/null +++ b/tests/testthat/test-R6Experiment.R @@ -0,0 +1,134 @@ +# This file contains tests specifically for the R6Experiment class. + +# Ensure all required libraries are loaded +library(dplyr) +library(tidyr) +library(future) + + +# Creating a model object ------------------------------------------------- + + +# Test R6Experiment with set_parameter + +test_that("R6Experiment works with set_parameter", { + experiment <- R6Experiment$new(model) + + experiment$ + set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c("Colonoscopy", "FIT"))$ + set_parameter(parameter_name = "abc", experimental_design = "lhs", min = 1, max = 10)$ + set_design(n_lhs = 2) + + expect_true(is.R6Experiment(experiment)) +}) + +# Test R6Experiment without set_parameter + +test_that("R6Experiment works without set_parameter", { + experiment <- R6Experiment$new(model) + experiment$set_design() + expect_true(is.R6Experiment(experiment)) +}) + +# Test R6Experiment with convert to grid = T + +experiment <- R6Experiment$new(model) + +experiment$ + set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c("Colonoscopy", "FIT"))$ + set_parameter(parameter_name = "abc", experimental_design = "lhs", min = 1, max = 10)$ + set_design(n_lhs = 2, convert_lhs_to_grid = T) + +test_that("R6Experiment works with convert to grid = T", { + expect_true(is.R6Experiment(experiment)) +}) + +# Test R6Experiment with pre-existing design + +test_that("R6Experiment works with pre-existing design", { + experiment <- R6Experiment$new(model) + # External grid: + grid_design <- expand.grid(c(1:10), c(10:13)) + # Create an experimental design: + experiment$set_design(grid_design_df = grid_design) + expect_true(is.R6Experiment(experiment)) +}) + +# Test R6Experiment runs in parallel using future + +test_that("R6Experiment runs in parallel using future", { + # Ensure all required libraries are loaded for this test + experiment <- R6Experiment$new(model) + + experiment$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c(1, 2)) + experiment$set_design(n_reps = 3) + + # Assign the parallel backend function + backend_fn <- R6Sim:::get_parallel_backend_fn() + + # Use the backend function in the test + plan(backend_fn) + + # Run in parallel mode with checkpoint_dir + temp_checkpoint_dir <- tempfile() + results <- experiment$run(checkpoint_dir = temp_checkpoint_dir) + + expect_equal(length(unique(results$rep.id)), 3) + expect_equal(nrow(results), nrow(experiment$policy_design)) + expect_true(all(c("rep.id", "seed") %in% names(results))) + + # Reset future plan to sequential + plan(sequential) + + # Close/delete the temporary checkpoint file + unlink(temp_checkpoint_dir, recursive = TRUE) +}) + +# Test R6Experiment runs sequentially + +test_that("R6Experiment runs sequentially", { + experiment <- R6Experiment$new(model) + + experiment$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c(1, 2)) + experiment$set_design(n_reps = 3) + + # Run in sequential mode with checkpoint_dir + temp_checkpoint_dir <- tempfile() + results <- experiment$run(checkpoint_dir = temp_checkpoint_dir) + + expect_equal(length(unique(results$rep.id)), 3) + expect_equal(nrow(results), nrow(experiment$policy_design)) + expect_true(all(c("rep.id", "seed") %in% names(results))) + + # Close/delete the temporary checkpoint file + unlink(temp_checkpoint_dir, recursive = TRUE) +}) + +# Test R6Experiment produces identical results with the same seed + +test_that("R6Experiment produces identical results with the same seed", { + # Set the same seed before creating experiments + seed_value <- 1234 + set.seed(seed_value) + experiment1 <- R6Experiment$new(model) + experiment1$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c(1, 2)) + experiment1$set_design(n_reps = 3, set_seed = TRUE) + + set.seed(seed_value) + experiment2 <- R6Experiment$new(model) + experiment2$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c(1, 2)) + experiment2$set_design(n_reps = 3, set_seed = TRUE) + + # Run both experiments with checkpoint_dir + temp_checkpoint_dir1 <- tempfile() + temp_checkpoint_dir2 <- tempfile() + results1 <- experiment1$run(checkpoint_dir = temp_checkpoint_dir1) + results2 <- experiment2$run(checkpoint_dir = temp_checkpoint_dir2) + + # Verify that results are identical + expect_identical(results1, results2) + + # Close/delete the temporary checkpoint files + unlink(temp_checkpoint_dir1, recursive = TRUE) + unlink(temp_checkpoint_dir2, recursive = TRUE) +}) diff --git a/tests/testthat/test-R6Experiment_checkpointing.R b/tests/testthat/test-R6Experiment_checkpointing.R new file mode 100644 index 0000000..4641d75 --- /dev/null +++ b/tests/testthat/test-R6Experiment_checkpointing.R @@ -0,0 +1,56 @@ +# Test for checkpointing functionality in R6Experiment +library(testthat) +library(future) +library(doFuture) + +# Register multisession for parallel processing +registerDoFuture() + +# Assign the parallel backend function +backend_fn <- R6Sim:::get_parallel_backend_fn() + +# Helper function to create a mock R6Experiment object +create_mock_experiment <- function() { + R6Experiment$new(model) +} + +test_that("Checkpointing saves and resumes correctly", { + # Create a mock experiment + experiment <- create_mock_experiment() + + full_design <- data.frame(some_param = 1:100) + + # Mock policy design + experiment$set_design(grid_design_df = full_design) + + # Temporary directory for checkpoints + checkpoint_dir <- tempfile() + + length(unique(experiment$policy_design$policy.exp.id)) + + # Run experiment with checkpointing + results <- experiment$run(checkpoint_frequency = 10, checkpoint_dir = checkpoint_dir) + + # Check if checkpoint files are created + checkpoint_files <- list.files(checkpoint_dir, pattern = "*.rds", full.names = TRUE) + expect_true(length(checkpoint_files) > 0) + + # Resume from the last checkpoint + last_checkpoint <- checkpoint_files[length(checkpoint_files)] + + checkpoint_experiment <- readRDS(last_checkpoint) + + # Update the design to add new runs + + checkpoint_experiment$set_design(grid_design_df = full_design) + + # Here, it should start from where we left + resumed_results <- checkpoint_experiment$run(checkpoint_frequency = 2, checkpoint_dir = checkpoint_dir) + + # Ensure results are consistent + expect_equal(nrow(resumed_results), nrow(experiment$policy_design)) + + # Clean up checkpoint directory + unlink(checkpoint_dir, recursive = TRUE) + expect_false(dir.exists(checkpoint_dir)) +}) diff --git a/tests/testthat/test-R6Sim_JSON.R b/tests/testthat/test-R6Sim_JSON.R new file mode 100644 index 0000000..3db1e9e --- /dev/null +++ b/tests/testthat/test-R6Sim_JSON.R @@ -0,0 +1,60 @@ +# This file contains tests for JSON-related functionalities of the R6Sim class. + +# json tests -------------------------------------------------------------- + +json <- model$to_json() + +# Ensure `res` is defined by running the simulation on the original model +set.seed(1234) +res <- model$simulate() + +# Re-creating the model from json: +new_model <- Mymodel$new(name = "Mymodel") +new_model$set_inputs_from_json(json = json) + +test_that("to_json and set_input_from_json work", { + expect_equal(length(new_model$inputs), length(model$inputs)) +}) + +test_that("results from a json-converted model is identical to original model", { + set.seed(1234) + new_res <- new_model$simulate() + + expect_identical(res, new_res) +}) + +test_that("to_json preserves inputs and model functionality", { + # Create a duplicate model with different input values + test_model <- Mymodel$new(name = "test_json_model") + test_model$set_input("pop.size", 500) + test_model$set_input("risk.mean", 0.05) + + # Convert to JSON and back + json_model <- test_model$to_json() + + # Create new model from JSON + new_model <- Mymodel$new("new_model") + new_model$set_inputs_from_json(json_model) + + # Verify input values were preserved + expect_equal(new_model$inputs$pop.size, 500) + expect_equal(new_model$inputs$risk.mean, 0.05) + + # Verify simulation results match + set.seed(123) + result1 <- test_model$simulate() + + set.seed(123) + result2 <- new_model$simulate() + + expect_equal(result1, result2) +}) + +test_that("set_inputs_from_json handles errors", { + # Test error handling for parse errors + test_model <- Mymodel$new(name = "test") + expect_error(test_model$set_inputs_from_json("invalid json string")) + + # Test handling invalid JSON structure + expect_error(test_model$set_inputs_from_json('{"not_inputs": {}}')) +}) \ No newline at end of file diff --git a/tests/testthat/test-R6Sim_inputs.R b/tests/testthat/test-R6Sim_inputs.R new file mode 100644 index 0000000..a4e42d6 --- /dev/null +++ b/tests/testthat/test-R6Sim_inputs.R @@ -0,0 +1,78 @@ +# This file contains tests for input handling functionalities of the R6Sim class. + +# Test get_inputs from yaml and excel ------------------------------------- + +test_that("get_inputs works with YAML files", { + # Create a temporary YAML file + yaml_content <- " +pop.size: 1000 +risk.mean: 0.15 +risk.sd: 0.02 +trials: 10 +" + yaml_file <- tempfile(fileext = ".yaml") + writeLines(yaml_content, yaml_file) + + # Create model and test get_inputs + model <- Mymodel$new(name = "test") + model$get_inputs(yaml_file) + + # Test that inputs were set correctly + expect_equal(model$inputs$pop.size, 1000) + expect_equal(model$inputs$risk.mean, 0.15) + expect_equal(model$inputs$risk.sd, 0.02) + expect_equal(model$inputs$trials, 10) + + # Cleanup + unlink(yaml_file) +}) + +test_that("get_inputs works with Excel files", { + # Skip if readxl not available + skip_if_not_installed("readxl") + + # Create test workbook + wb_file <- tempfile(fileext = ".xlsx") + + # Create data for sheets + sheet1_data <- data.frame( + pop.size = 2000, + risk.mean = 0.25 + ) + + sheet2_data <- data.frame( + risk.sd = 0.03, + trials = 20 + ) + + # Write Excel file with multiple sheets + writexl::write_xlsx(list( + Sheet1 = sheet1_data, + Sheet2 = sheet2_data + ), wb_file) + + # Test get_inputs with Excel file + model <- Mymodel$new(name = "test") + model$get_inputs(wb_file) + + # Verify inputs were set correctly + expect_equal(model$inputs$Sheet1$pop.size, 2000) + expect_equal(model$inputs$Sheet1$risk.mean, 0.25) + expect_equal(model$inputs$Sheet2$risk.sd, 0.03) + expect_equal(model$inputs$Sheet2$trials, 20) + + # Cleanup + unlink(wb_file) +}) + +test_that("get_inputs handles errors appropriately", { + # Test non-existent file + model <- Mymodel$new(name = "test") + expect_error(model$get_inputs("nonexistent.yaml")) + + # Test invalid file type + invalid_file <- tempfile(fileext = ".txt") + writeLines("some text", invalid_file) + expect_error(model$get_inputs(invalid_file)) + unlink(invalid_file) +}) \ No newline at end of file diff --git a/tests/testthat/test-R6Sim_set_input.R b/tests/testthat/test-R6Sim_set_input.R new file mode 100644 index 0000000..0dd18c2 --- /dev/null +++ b/tests/testthat/test-R6Sim_set_input.R @@ -0,0 +1,18 @@ +test_that("R6Sim_set_input handles replacing inputs with different lengths", { + sim <- R6Sim$new() + sim$inputs <- list(existing_input = c(1, 2, 3)) + sim$inputs_table <- data.frame(name = "existing_input", type = "numeric") + + expect_warning( + sim$set_input("existing_input", c(1, 2), "numeric") + ) +}) + +test_that("R6Sim_set_input warns for unsupported input types", { + sim <- R6Sim$new() + unsupported_value <- structure(1, class = "unsupported_class") + + expect_warning( + sim$set_input("unsupported_input", unsupported_value, "numeric") + ) +}) diff --git a/tests/testthat/test-R6Sim_simulate.R b/tests/testthat/test-R6Sim_simulate.R new file mode 100644 index 0000000..ca734ae --- /dev/null +++ b/tests/testthat/test-R6Sim_simulate.R @@ -0,0 +1,32 @@ +# This file contains tests for the simulation functionalities of the R6Sim class. + +# simulate model ---------------------------------------------------------- + +set.seed(1234) + +res <- model$simulate() + +test_that("simulate works", { + expect_equal(object = nrow(res), expected = 1) +}) + +test_that("both setup_run and simulate methods must be implemented", { + # Create a model inheriting from R6Sim without overriding methods + BasicModel <- R6::R6Class( + classname = "BasicModel", + inherit = R6Sim, + public = list( + initialize = function(name) { + super$initialize(name = name) + } + # No simulate or setup_run implementation + ) + ) + + # Create instance + basic_model <- BasicModel$new("basic") + + # Methods should throw errors because they must be implemented + expect_error(basic_model$setup_run(), "Setup_run method must be implemented by your class") + expect_error(basic_model$simulate(), "Simulate method must be implemented by your class") +}) \ No newline at end of file diff --git a/tests/testthat/test-test_R6Sim.R b/tests/testthat/test-test_R6Sim.R deleted file mode 100644 index 2e1fc21..0000000 --- a/tests/testthat/test-test_R6Sim.R +++ /dev/null @@ -1,587 +0,0 @@ - - -# Sample model: - -Mymodel <- R6::R6Class( - classname = "Mymodel", - inherit = R6Sim, - public = list( - sim_res = NULL, - - # Custom Initialize function - initialize = function(name) { - super$initialize(name = name) - self$set_input("pop.size", 100)$ - set_input("risk.mean", 0.01)$ - set_input("risk.sd", 0.001)$ - set_input(name = "trials", value = 10) - }, - - # Sample Simulate function - simulate = function() { - - # Create a sample population with some health events: - sim_res <- data.frame( - p.id = 1:self$inputs$pop.size, - risk = rnorm(n = self$inputs$pop.size, mean = self$inputs$risk.mean, sd = self$inputs$risk.sd) - ) %>% - mutate(probability.event = 1 - exp(-risk)) %>% - mutate(n.events = rbinom(n = 1:self$inputs$pop.size, size = self$inputs$trials, prob = probability.event)) %>% - group_by() %>% - summarise(n.events = sum(n.events)) - - invisible(sim_res) - } - ) -) - -# Creating a model object ------------------------------------------------- - -# Creates a model object and gives it a name. -model <- Mymodel$new(name = "test") - -test_that("R6Sim was created", { - expect_true(is.R6Sim(model)) -}) - - -# Setting model inputs: -model$ - set_input(name = "pop.size", value = 1000, type = "settings")$ - set_input(name = "risk.mean", value = 0.15, type = "natural_history")$ - set_input(name = "risk.sd", value = 0.02, type = "natural_history")$ - set_input(name = "trials", value = 10, type = "natural_history")$ - set_input(name = "strategy.id", value = 1, type = "policy")$ - set_input(name = "some_date", value = "2020-01-01", type = "policy")$ - set_input(name = "det.ratios", value = seq.default(from = 0, to = 1, length.out = 101), type = "policy") - -# Setting an input twice: -model$set_input(name = "risk.sd", value = 0.02, type = "natural_history") - -test_that("input was created", { - expect_true(model$inputs$pop.size == 1000) -}) - - -# Set posterior ----------------------------------------------------------- - -# Loading multiple posterior data.frames: -posterior.a <- data.frame( - risk.mean = rnorm(n = 1000, mean = 0, sd = 0.5), - risk.sd = rnorm(n = 1000, mean = 1, sd = 2), - weights = 1 / 1000 -) -# Let's suppose a different calibration resulted in a different posterior: -posterior.b <- data.frame( - risk.mean = rnorm(n = 1000, mean = 1, sd = 0.5), - risk.sd = rnorm(n = 1000, mean = 0.5, sd = 2), - weights = 1 / 1000 -) - -# Here we set the posterior of the model using three posterior files: -model$set_param_dist( - params_list = list(pa = posterior.a, pb = posterior.b, pc = posterior.b), - param_dist_weights = "weights", use_average = F, n_sample = 10 -) - -test_that("set_posterior works", { - expect_true(nrow(model$params_df) == 30) -}) - -# Set posterior works without sampling: - -model$set_param_dist( - params_list = list(pa = posterior.a, pb = posterior.b, pc = posterior.b), - param_dist_weights = "weights", use_average = F, resample = F -) - - -test_that("set_posterior works without resampling", { - expect_true(nrow(model$params_df) == 3000) -}) - -# Here we set the posterior of the model using three posterior files: -model$set_param_dist( - params_list = list(pa = posterior.a, pb = posterior.b, pc = posterior.b), - param_dist_weights = "weights", use_average = T -) - -test_that("set_posterior works with averages", { - expect_true(nrow(model$params_df) == 3) -}) - - - - -# simulate model ---------------------------------------------------------- - -set.seed(1234) - -res <- model$simulate() - -test_that("simulate works", { - expect_equal(object = nrow(res), expected = 1) -}) - -test_that("both setup_run and simulate methods must be implemented", { - # Create a model inheriting from R6Sim without overriding methods - BasicModel <- R6::R6Class( - classname = "BasicModel", - inherit = R6Sim, - public = list( - initialize = function(name) { - super$initialize(name = name) - } - # No simulate or setup_run implementation - ) - ) - - # Create instance - basic_model <- BasicModel$new("basic") - - # Methods should throw errors because they must be implemented - expect_error(basic_model$setup_run(), "Setup_run method must be implemented by your class") - expect_error(basic_model$simulate(), "Simulate method must be implemented by your class") -}) - - -# json tests -------------------------------------------------------------- - -json <- model$to_json() -# Re-creating the model from json: -new_model <- Mymodel$new(name = "Mymodel") -new_model$set_inputs_from_json(json = json) - -test_that("to_json and set_input_from_json work", { - expect_equal(length(new_model$inputs), length(model$inputs)) -}) - - -test_that("results from a json-converted model is identical to original model", { - set.seed(1234) - new_res <- new_model$simulate() - - expect_identical(res, new_res) -}) - -test_that("to_json preserves inputs and model functionality", { - # Create a duplicate model with different input values - test_model <- Mymodel$new(name = "test_json_model") - test_model$set_input("pop.size", 500) - test_model$set_input("risk.mean", 0.05) - - # Convert to JSON and back - json_model <- test_model$to_json() - - # Create new model from JSON - new_model <- Mymodel$new("new_model") - new_model$set_inputs_from_json(json_model) - - # Verify input values were preserved - expect_equal(new_model$inputs$pop.size, 500) - expect_equal(new_model$inputs$risk.mean, 0.05) - - # Verify simulation results match - set.seed(123) - result1 <- test_model$simulate() - - set.seed(123) - result2 <- new_model$simulate() - - expect_equal(result1, result2) -}) - -test_that("set_inputs_from_json handles errors", { - # Test error handling for parse errors - test_model <- Mymodel$new(name = "test") - expect_error(test_model$set_inputs_from_json("invalid json string")) - - # Test handling invalid JSON structure - expect_error(test_model$set_inputs_from_json('{"not_inputs": {}}')) -}) - - - -# Test get_inputs from yaml and excel ------------------------------------- - -test_that("get_inputs works with YAML files", { - # Create a temporary YAML file - yaml_content <- " -pop.size: 1000 -risk.mean: 0.15 -risk.sd: 0.02 -trials: 10 -" - yaml_file <- tempfile(fileext = ".yaml") - writeLines(yaml_content, yaml_file) - - # Create model and test get_inputs - model <- Mymodel$new(name = "test") - model$get_inputs(yaml_file) - - # Test that inputs were set correctly - expect_equal(model$inputs$pop.size, 1000) - expect_equal(model$inputs$risk.mean, 0.15) - expect_equal(model$inputs$risk.sd, 0.02) - expect_equal(model$inputs$trials, 10) - - # Cleanup - unlink(yaml_file) -}) - - -test_that("get_inputs works with Excel files", { - # Skip if readxl not available - skip_if_not_installed("readxl") - - # Create test workbook - wb_file <- tempfile(fileext = ".xlsx") - - # Create data for sheets - sheet1_data <- data.frame( - pop.size = 2000, - risk.mean = 0.25 - ) - - sheet2_data <- data.frame( - risk.sd = 0.03, - trials = 20 - ) - - # Write Excel file with multiple sheets - writexl::write_xlsx(list( - Sheet1 = sheet1_data, - Sheet2 = sheet2_data - ), wb_file) - - # Test get_inputs with Excel file - model <- Mymodel$new(name = "test") - model$get_inputs(wb_file) - - # Verify inputs were set correctly - expect_equal(model$inputs$Sheet1$pop.size, 2000) - expect_equal(model$inputs$Sheet1$risk.mean, 0.25) - expect_equal(model$inputs$Sheet2$risk.sd, 0.03) - expect_equal(model$inputs$Sheet2$trials, 20) - - # Cleanup - unlink(wb_file) -}) - -test_that("get_inputs handles errors appropriately", { - # Test non-existent file - model <- Mymodel$new(name = "test") - expect_error(model$get_inputs("nonexistent.yaml")) - - # Test invalid file type - invalid_file <- tempfile(fileext = ".txt") - writeLines("some text", invalid_file) - expect_error(model$get_inputs(invalid_file)) - unlink(invalid_file) -}) - - - -# set_input tests --------------------------------------------------------- - -test_that("set_inputs handles unusual inputs", { - model <- Mymodel$new(name = "test") - - # Error on missing parameters - expect_error(model$set_input()) - - # Set an initial value to be replaced - model$set_input("test_length", 10) - - # Warning on length mismatch (input change) - expect_warning( - model$set_input(name = "test_length", value = c(1,2,3)), - "You are replacing the input test_length which had length 1 with an object of length 3" - ) - - # Warning on unsupported classes - custom_class <- structure(1, class = "custom_class") - expect_warning( - model$set_input(name = "custom_obj", value = custom_class), - "Input custom_obj includes values using classes that we do not recommend" - ) - - # The second warning output from weird classes in list is harder to test exactly - # So we'll just verify that a warning is thrown - weird_list <- list(a = structure(1, class = "weird")) - expect_warning(model$set_input(name = "weird_list", value = weird_list)) -}) - - - -# R6Experiment tests ----------------------------------------------------- - -test_that("R6Experiment works with set_parameter", { - experiment <- R6Experiment$new(model) - - experiment$ - set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c("Colonoscopy", "FIT"))$ - set_parameter(parameter_name = "abc", experimental_design = "lhs", min = 1, max = 10)$ - set_design(n_lhs = 2) - - expect_true(is.R6Experiment(experiment)) -}) - -test_that("R6Experiment works without set_parameter", { - experiment <- R6Experiment$new(model) - experiment$set_design() - expect_true(is.R6Experiment(experiment)) -}) - -experiment <- R6Experiment$new(model) - -experiment$ - set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c("Colonoscopy", "FIT"))$ - set_parameter(parameter_name = "abc", experimental_design = "lhs", min = 1, max = 10)$ - set_design(n_lhs = 2, convert_lhs_to_grid = T) - -test_that("R6Experiment works with convert to grid = T", { - expect_true(is.R6Experiment(experiment)) -}) - -test_that("R6Experiment works with pre-existing design", { - experiment <- R6Experiment$new(model) - # External grid: - grid_design <- expand.grid(c(1:10), c(10:13)) - # Create an experimental design: - experiment$set_design(grid_design_df = grid_design) - expect_true(is.R6Experiment(experiment)) -}) - - -test_that("R6Experiment supports stochastic replications with seeds", { - experiment <- R6Experiment$new(model) - - # Test with default n_reps=1 and set_seed=T - experiment$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c("A", "B")) - experiment$set_design(n_lhs = 2, ) - - # Check experiment has seed column - expect_true("seed" %in% names(experiment$policy_design)) - expect_equal(length(unique(experiment$policy_design$rep.id)), 1) - expect_false(any(is.na(experiment$policy_design$seed))) - - # Test with n_reps=3 and set_seed=T - experiment$set_design(n_lhs = 2, n_reps = 3) - expect_equal(length(unique(experiment$policy_design$rep.id)), 3) - expect_false(any(is.na(experiment$policy_design$seed))) - expect_equal(length(unique(experiment$policy_design$seed)), 3) - - # Test with set_seed=F - experiment$set_design(n_lhs = 2, n_reps = 2, set_seed = F) - expect_true(all(is.na(experiment$policy_design$seed))) - - # Test reproducibility with seeds - set.seed(123) - experiment$set_design(n_lhs = 2, n_reps = 2) - seeds1 <- experiment$policy_design$seed - - set.seed(123) - experiment$set_design(n_lhs = 2, n_reps = 2) - seeds2 <- experiment$policy_design$seed - - expect_equal(seeds1, seeds2) -}) - -test_that("run_single_experiment sets seeds correctly", { - - experiment <- R6Experiment$new(model) - - experiment$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c(1, 2)) - - experiment$set_design(n_reps = 2, set_seed = T) - - # Get results with same policy_design_id but different seeds - res1 <- experiment$run() - res2 <- experiment$run() - - # Same policy_design_id should give different results due to different seeds - expect_true(identical(res1, res2)) - - # Same policy_design_id and seed should give identical results - experiment$set_design(n_reps = 2, set_seed = F) - res3 <- experiment$run() - - expect_false(identical(res1, res3)) -}) - -test_that("R6Experiment runs with replications", { - experiment <- R6Experiment$new(model) - - experiment$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c(1, 2)) - experiment$set_design(n_reps = 3) - - # Run in sequential mode - results <- experiment$run(parallel = FALSE) - - expect_equal(length(unique(results$rep.id)), 3) - expect_equal(nrow(results), nrow(experiment$policy_design)) - expect_true(all(c("rep.id", "seed") %in% names(results))) -}) - -test_that("R6Experiment basic run works", { - experiment <- R6Experiment$new(model) - experiment$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c(1, 2)) - experiment$set_design(n_reps = 2) - - # Test with default parameters - results <- experiment$run() - expect_equal(nrow(results), nrow(experiment$policy_design)) - expect_true("Test1" %in% names(results)) - - # Check correct number of replications - expect_equal(length(unique(results$rep.id)), 2) -}) - -# Skip parallel tests as they're harder to run consistently -test_that("R6Experiment parallel tests are skipped", { - skip("Skipping parallel execution tests") - - # Test with PSOCK cluster (default) - experiment <- R6Experiment$new(model) - experiment$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c(1, 2)) - experiment$set_design(n_reps = 2) - - # These would be run if not skipped - results <- experiment$run(n_cores = 2, parallel = TRUE, packages = "dplyr") - - # Test model_from_cluster_eval = TRUE - expect_error( - experiment$run(n_cores = 2, parallel = TRUE, model_from_cluster_eval = TRUE), - regexp = "cluster_experiment" - ) -}) - - -test_that("R6Experiment handles models with no parameter distributions", { - # Create two models - model1 <- Mymodel$new(name = "test1") - model2 <- Mymodel$new(name = "test2") - - # Create experiment with models having no params_df - experiment <- R6Experiment$new(model1, model2) - experiment$set_design(n_lhs = 2) - - # Check that params_df was created for both models - expect_false(is.null(experiment$models[[1]]$params_df)) - expect_false(is.null(experiment$models[[2]]$params_df)) - - # Check params_df structure - expect_true(nrow(experiment$models[[1]]$params_df) == 1) - expect_true("param.id" %in% names(experiment$models[[1]]$params_df)) -}) - -test_that("R6Experiment errors on inconsistent parameter distributions", { - # Create two models - model1 <- Mymodel$new(name = "test1") - model2 <- Mymodel$new(name = "test2") - - # Set params_df for only one model - model1$set_param_dist( - params_list = list(default = data.frame(weights = 1)), - param_dist_weights = "weights", - use_average = TRUE - ) - - # Create experiment with inconsistent models - experiment <- R6Experiment$new(model1, model2) - - # Should error when setting design - expect_error( - experiment$set_design(n_lhs = 2), - regexp = "Inconsistent parameter distributions" - ) -}) - -test_that("R6Experiment works with all models having parameter distributions", { - # Create two models - model1 <- Mymodel$new(name = "test1") - model2 <- Mymodel$new(name = "test2") - - # Set params_df for both models - params <- list(default = data.frame(weights = 1)) - model1$set_param_dist(params_list = params, param_dist_weights = "weights", use_average = TRUE) - model2$set_param_dist(params_list = params, param_dist_weights = "weights", use_average = TRUE) - - # Create and set up experiment - experiment <- R6Experiment$new(model1, model2) - experiment$set_design(n_lhs = 2) - - # Check experiment setup worked - expect_false(is.null(experiment$params)) - expect_false(is.null(experiment$policy_design)) -}) - - - -# Test writing experiments to disk ---------------------------------------- - -# Implement tests when this is implemented in the package: - -# test_that("write_design works with CSV format", { -# # Setup test experiment -# model <- Mymodel$new(name = "test") -# experiment <- R6Experiment$new(model) -# experiment$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c("A", "B")) -# experiment$set_design(n_lhs = 2) -# -# # Create temp directory -# temp_dir <- file.path(tempdir(), "R6Sim-test") -# dir.create(temp_dir, showWarnings = FALSE, recursive = TRUE) -# -# # Test CSV write -# experiment$write_design(path = temp_dir, format = "csv") -# expect_true(file.exists(file.path(temp_dir, "exp_design.txt"))) -# expect_true(file.exists(file.path(temp_dir, "exp_design_col_names.txt"))) -# -# # Cleanup -# if (dir.exists(temp_dir)) { -# unlink(temp_dir, recursive = TRUE) -# } -# }) - -# test_that("write_design works with temp directory", { -# # Setup test experiment -# model <- Mymodel$new(name = "test") -# experiment <- R6Experiment$new(model) -# experiment$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c("A", "B")) -# experiment$set_design(n_lhs = 2) -# -# # Create temp directory that works cross-platform -# temp_dir <- file.path(tempdir(), "R6Sim-test") -# dir.create(temp_dir, showWarnings = FALSE, recursive = TRUE) -# -# # Test JSON write -# experiment$write_design(path = temp_dir, format = "json") -# expect_true(file.exists(file.path(temp_dir, "exp_design.txt"))) -# -# # Test CSV write -# experiment$write_design(path = temp_dir, format = "csv") -# expect_true(file.exists(file.path(temp_dir, "exp_design.txt"))) -# expect_true(file.exists(file.path(temp_dir, "exp_design_col_names.txt"))) -# -# # Cleanup -# if (dir.exists(temp_dir)) { -# unlink(temp_dir, recursive = TRUE) -# } -# }) -# -# test_that("write_design returns experimental design when no path provided", { -# # Setup -# model <- Mymodel$new(name = "test") -# experiment <- R6Experiment$new(model) -# experiment$set_parameter(parameter_name = "Test1", experimental_design = "grid", values = c("A", "B")) -# experiment$set_design(n_lhs = 2) -# -# # Test return value when no path provided -# result <- experiment$write_design(format = "json") -# expect_type(result, "character") -# expect_gt(length(result), 0) -# }) - - diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index d0ac8b1..f0ab4a3 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -102,7 +102,12 @@ experiment$ set_parameter(parameter_name = "risk.mean",experimental_design = "grid",values = seq.default(from = 0.01, to = 0.05, by = 0.01))$ set_design() -results <- experiment$run(parallel = F, model_from_cluster_eval = F) +# This package uses future.apply to run experiments, so you can run experments +# in parallel by doing +# library(future) +# future::plan(multisession, workers = 2) + +results <- experiment$run() # model.id =1 is the first model passed to experiment, and so on: results %>%