From b465637257fbb34757d30d607ebab797973c9cd0 Mon Sep 17 00:00:00 2001 From: Douglas Ezra Morrison Date: Thu, 30 Jan 2025 10:04:44 -0800 Subject: [PATCH 1/3] add sample size code --author "Kwan Ho Lee" --- data-raw/sample-size.R | 138 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 data-raw/sample-size.R diff --git a/data-raw/sample-size.R b/data-raw/sample-size.R new file mode 100644 index 000000000..a40e64e4b --- /dev/null +++ b/data-raw/sample-size.R @@ -0,0 +1,138 @@ +library(dplyr) +library(tibble) +library(serocalculator) + +############################################################################### +## Load longitudinal parameters + +test_sim<-"https://osf.io/download/rtw5k" %>% + load_curve_params() %>% + dplyr::filter(iter < 500) +############################################################################### + +# Define the simulation function +simulate_seroincidence <- function(nrep, n_sim) { + # Parameters + dmcmc <- test_sim # Curve parameters + antibodies <-c("HlyE_IgA", "HlyE_IgG") + lambda <- 0.2 # Simulated incidence rate per person-year + lifespan <- c(0, 10) # Age range + + # biologic noise distribution + dlims <- rbind( + "HlyE_IgA" = c(min = 0, max = 0.5), + "HlyE_IgG" = c(min = 0, max = 0.5) + ) + + # Noise parameters + cond <- tibble( + antigen_iso = c("HlyE_IgG", "HlyE_IgA"), + nu = c(0.5, 0.5), # Biologic noise (nu) + eps = c(0, 0), # Measurement noise (eps) + y.low = c(1, 1), # Low cutoff (llod) + y.high = c(5e6, 5e6) # High cutoff (y.high) + ) + + # Perform simulations in parallel + results <- future_map(1:n_sim, function(i) { + # Generate cross-sectional data + csdata <- sim.cs( + curve_params = dmcmc, + lambda = lambda, + n.smpl = nrep, + age.rng = lifespan, + antigen_isos = antibodies, + n.mc = 0, + renew.params = TRUE, # Use different parameters for each simulation + add.noise = TRUE, + noise_limits = dlims, + format = "long" + ) + + # Estimate seroincidence + est <- est.incidence( + pop_data = csdata, + curve_params = dmcmc, + noise_params = cond, + lambda_start = 0.1, + build_graph = TRUE, + verbose = FALSE, + print_graph = FALSE, + antigen_isos = antibodies + ) + + # Return results for this simulation + list( + csdata = csdata, + est1 = est + ) + }, .options = furrr_options(seed = TRUE)) + + return(results) +} + +############################################################################### +## Run simulation +plan(multisession) # Use multiple sessions for parallelism (local machine) + +# Run the simulations in parallel +set.seed(129251) +results_50 <- simulate_seroincidence(nrep = 50, n_sim = 300) + +set.seed(129252) +results_100 <- simulate_seroincidence(nrep = 100, n_sim = 300) + +# Stop parallel processing +plan(sequential) # Return to sequential processing + + +# Define a function to generate final tables +generate_final_table <- function(results_list, sample_size) { + # Initialize an empty list to store the results + summary_results <- list() + + # Loop through each of the 100 results and extract the required columns + for (i in 1:300) { + # Extract the summary for each result + result_summary <- summary(results_list[[i]]$est1) + + # Select the required columns + extracted_columns <- result_summary %>% + select(incidence.rate, SE, CI.lwr, CI.upr) + + # Add a column for the index (optional, for tracking) + extracted_columns <- extracted_columns %>% + mutate(index = i) + + # Append to the list + summary_results[[i]] <- extracted_columns + } + + # Combine all results into a single data frame + final_table <- bind_rows(summary_results) %>% + mutate(sample_size = sample_size) # Add sample size column for clarity + + return(final_table) +} + +# Store each sample size's summary as table +final_table_50 <- generate_final_table(results_50, 50) +final_table_100 <- generate_final_table(results_100, 100) + + +# Define the true lambda +true_lambda <- 0.2 + +# Check how many rows have CI covering the true lambda +coverage_count_50 <- final_table_50 %>% + filter(CI.lwr <= true_lambda & CI.upr >= true_lambda) %>% + nrow() + +coverage_count_100 <- final_table_100 %>% + filter(CI.lwr <= true_lambda & CI.upr >= true_lambda) %>% + nrow() + +# Print the result +print(paste("Number of rows where CI covers true lambda:", coverage_count_50)) +print(paste("Number of rows where CI covers true lambda:", coverage_count_100)) + From 0e2c4be434a4be2116598def467bd88e29470ef4 Mon Sep 17 00:00:00 2001 From: Douglas Ezra Morrison Date: Thu, 30 Jan 2025 10:16:33 -0800 Subject: [PATCH 2/3] clarify namespaces --- data-raw/sample-size.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/data-raw/sample-size.R b/data-raw/sample-size.R index a40e64e4b..a584bbf0a 100644 --- a/data-raw/sample-size.R +++ b/data-raw/sample-size.R @@ -34,7 +34,7 @@ simulate_seroincidence <- function(nrep, n_sim) { ) # Perform simulations in parallel - results <- future_map(1:n_sim, function(i) { + results <- furrr::future_map(1:n_sim, function(i) { # Generate cross-sectional data csdata <- sim.cs( curve_params = dmcmc, @@ -73,7 +73,8 @@ simulate_seroincidence <- function(nrep, n_sim) { ############################################################################### ## Run simulation -plan(multisession) # Use multiple sessions for parallelism (local machine) +library(future) +future::plan(multisession) # Use multiple sessions for parallelism (local machine) # Run the simulations in parallel set.seed(129251) From 0862d29c10e3024a66a3b6d684313ae4278e8738 Mon Sep 17 00:00:00 2001 From: Douglas Ezra Morrison Date: Thu, 30 Jan 2025 20:14:43 -0800 Subject: [PATCH 3/3] in progress --- DESCRIPTION | 3 +- R/sim_pop_data.R | 16 ++++++- data-raw/sample-size.R | 98 ++++++++++++++++++++++++++++++------------ 3 files changed, 87 insertions(+), 30 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9838a83b9..05e617f2d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -60,7 +60,8 @@ Suggests: tidyverse, qrcode, svglite, - vdiffr + vdiffr, + furrr LinkingTo: Rcpp Config/testthat/edition: 3 diff --git a/R/sim_pop_data.R b/R/sim_pop_data.R index 5770b3f02..5e07040e8 100644 --- a/R/sim_pop_data.R +++ b/R/sim_pop_data.R @@ -194,7 +194,19 @@ sim_pop_data <- function( #' consistent API. #' #' @keywords internal -sim.cs <- function(...) { # nolint: object_name_linter +sim.cs <- function( + n.smpl, + age.rng, + n.mc, + renew.params, + add.noise, + ...) { # nolint: object_name_linter lifecycle::deprecate_soft("1.3.1", "sim.cs()", "sim_pop_data()") - sim_pop_data(...) + sim_pop_data( + n_samples = n.smpl, + age_range = age.rng, + n_mcmc_samples = n.mc, + renew_params = renew.params, + add_noise = add.noise, + ...) } diff --git a/data-raw/sample-size.R b/data-raw/sample-size.R index a584bbf0a..d7b7257ff 100644 --- a/data-raw/sample-size.R +++ b/data-raw/sample-size.R @@ -1,22 +1,25 @@ library(dplyr) library(tibble) library(serocalculator) +library(furrr) ############################################################################### ## Load longitudinal parameters -test_sim<-"https://osf.io/download/rtw5k" %>% +test_sim <- "https://osf.io/download/rtw5k" %>% load_curve_params() %>% dplyr::filter(iter < 500) ############################################################################### # Define the simulation function -simulate_seroincidence <- function(nrep, n_sim) { - # Parameters - dmcmc <- test_sim # Curve parameters - antibodies <-c("HlyE_IgA", "HlyE_IgG") - lambda <- 0.2 # Simulated incidence rate per person-year - lifespan <- c(0, 10) # Age range +simulate_seroincidence <- function( + nrep, + n_sim, + dmcmc = test_sim, # Curve parameters + antibodies = c("HlyE_IgA", "HlyE_IgG"), + lambda = 0.2, # Simulated incidence rate per person-year + lifespan = c(0, 10) # Age range + ) { # biologic noise distribution dlims <- rbind( @@ -33,8 +36,7 @@ simulate_seroincidence <- function(nrep, n_sim) { y.high = c(5e6, 5e6) # High cutoff (y.high) ) - # Perform simulations in parallel - results <- furrr::future_map(1:n_sim, function(i) { + run_one_sim_iter <- function(i) { # Generate cross-sectional data csdata <- sim.cs( curve_params = dmcmc, @@ -55,7 +57,7 @@ simulate_seroincidence <- function(nrep, n_sim) { curve_params = dmcmc, noise_params = cond, lambda_start = 0.1, - build_graph = TRUE, + build_graph = FALSE, verbose = FALSE, print_graph = FALSE, antigen_isos = antibodies @@ -66,7 +68,21 @@ simulate_seroincidence <- function(nrep, n_sim) { csdata = csdata, est1 = est ) - }, .options = furrr_options(seed = TRUE)) + } + + + # Perform simulations in parallel + results <- + furrr::future_map( + .x = 1:n_sim, + .f = run_one_sim_iter, + .options = furrr_options(seed = TRUE)) |> + structure( + antibodies = antibodies, + lambda = lambda, # Simulated incidence rate per person-year + lifespan = lifespan, # Age range + sample_size = nrep + ) return(results) } @@ -78,22 +94,30 @@ future::plan(multisession) # Use multiple sessions for parallelism (local machi # Run the simulations in parallel set.seed(129251) -results_50 <- simulate_seroincidence(nrep = 50, n_sim = 300) +results_50 <- simulate_seroincidence(lambda = 0.2, nrep = 50, n_sim = 300) set.seed(129252) -results_100 <- simulate_seroincidence(nrep = 100, n_sim = 300) +results_100 <- simulate_seroincidence(lambda = 0.2, nrep = 100, n_sim = 300) + +results_100_0.1 <- simulate_seroincidence(lambda = 0.1, nrep = 100, n_sim = 300) + +results_100_0.01 <- simulate_seroincidence(lambda = 0.1, nrep = 100, n_sim = 300) # Stop parallel processing -plan(sequential) # Return to sequential processing +future::plan(sequential) # Return to sequential processing +n_obs <- function(results_list) { + results_list |> attr("sample_size") +} # Define a function to generate final tables -generate_final_table <- function(results_list, sample_size) { +generate_final_table <- function(results_list, + sample_size = n_obs(results_list)) { # Initialize an empty list to store the results summary_results <- list() # Loop through each of the 100 results and extract the required columns - for (i in 1:300) { + for (i in 1:length(results_list)) { # Extract the summary for each result result_summary <- summary(results_list[[i]]$est1) @@ -113,26 +137,46 @@ generate_final_table <- function(results_list, sample_size) { final_table <- bind_rows(summary_results) %>% mutate(sample_size = sample_size) # Add sample size column for clarity + attributes(final_table) <- + c( + attributes(final_table), + attributes(results_list) + ) + return(final_table) } # Store each sample size's summary as table -final_table_50 <- generate_final_table(results_50, 50) -final_table_100 <- generate_final_table(results_100, 100) - +final_table_50 <- results_50 |> generate_final_table(sample_size = 50) +final_table_100 <- results_100 |> generate_final_table(sample_size = 100) +final_table_100_0.1 <- results_100_0.1 |> generate_final_table(sample_size = 100) +ft_100_0.01 <- results_100_0.01 |> generate_final_table() +lambda <- function(results_table) +{ + results_table |> attr("lambda") +} -# Define the true lambda -true_lambda <- 0.2 +compute_coverage <- function( + results_table, + true_lambda = results_table |> lambda()) { + + results_table %>% + dplyr::mutate( + covered = CI.lwr <= true_lambda & CI.upr >= true_lambda, + no_error = !is.na(SE)) %>% + dplyr::summarize( + no_error_count = sum(no_error), + coverage_count = sum(covered, na.rm = TRUE), + coverage_rate = mean(covered, na.rm = TRUE)) +} # Check how many rows have CI covering the true lambda -coverage_count_50 <- final_table_50 %>% - filter(CI.lwr <= true_lambda & CI.upr >= true_lambda) %>% - nrow() +coverage_count_50 <- final_table_50 %>% compute_coverage() |> print() -coverage_count_100 <- final_table_100 %>% - filter(CI.lwr <= true_lambda & CI.upr >= true_lambda) %>% - nrow() +coverage_count_100 <- final_table_100 %>% compute_coverage() |> print() +final_table_100_0.1 |> compute_coverage() +ft_100_0.01 |> compute_coverage() # Print the result print(paste("Number of rows where CI covers true lambda:", coverage_count_50)) print(paste("Number of rows where CI covers true lambda:", coverage_count_100))