From b465637257fbb34757d30d607ebab797973c9cd0 Mon Sep 17 00:00:00 2001
From: Douglas Ezra Morrison <demorrison@ucdavis.edu>
Date: Thu, 30 Jan 2025 10:04:44 -0800
Subject: [PATCH 1/3] add sample size code

--author "Kwan Ho Lee" <ksjlee@ucdavis.edu>
---
 data-raw/sample-size.R | 138 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 data-raw/sample-size.R

diff --git a/data-raw/sample-size.R b/data-raw/sample-size.R
new file mode 100644
index 000000000..a40e64e4b
--- /dev/null
+++ b/data-raw/sample-size.R
@@ -0,0 +1,138 @@
+library(dplyr)
+library(tibble)
+library(serocalculator)
+
+###############################################################################
+## Load longitudinal parameters
+
+test_sim<-"https://osf.io/download/rtw5k" %>%
+  load_curve_params() %>%
+  dplyr::filter(iter < 500)
+###############################################################################
+
+# Define the simulation function
+simulate_seroincidence <- function(nrep, n_sim) {
+  # Parameters
+  dmcmc <- test_sim  # Curve parameters
+  antibodies <-c("HlyE_IgA", "HlyE_IgG")
+  lambda <- 0.2  # Simulated incidence rate per person-year
+  lifespan <- c(0, 10)  # Age range
+
+  # biologic noise distribution
+  dlims <- rbind(
+    "HlyE_IgA" = c(min = 0, max = 0.5),
+    "HlyE_IgG" = c(min = 0, max = 0.5)
+  )
+
+  # Noise parameters
+  cond <- tibble(
+    antigen_iso = c("HlyE_IgG", "HlyE_IgA"),
+    nu = c(0.5, 0.5),  # Biologic noise (nu)
+    eps = c(0, 0),     # Measurement noise (eps)
+    y.low = c(1, 1),   # Low cutoff (llod)
+    y.high = c(5e6, 5e6)  # High cutoff (y.high)
+  )
+
+  # Perform simulations in parallel
+  results <- future_map(1:n_sim, function(i) {
+    # Generate cross-sectional data
+    csdata <- sim.cs(
+      curve_params = dmcmc,
+      lambda = lambda,
+      n.smpl = nrep,
+      age.rng = lifespan,
+      antigen_isos = antibodies,
+      n.mc = 0,
+      renew.params = TRUE,  # Use different parameters for each simulation
+      add.noise = TRUE,
+      noise_limits = dlims,
+      format = "long"
+    )
+
+    # Estimate seroincidence
+    est <- est.incidence(
+      pop_data = csdata,
+      curve_params = dmcmc,
+      noise_params = cond,
+      lambda_start = 0.1,
+      build_graph = TRUE,
+      verbose = FALSE,
+      print_graph = FALSE,
+      antigen_isos = antibodies
+    )
+
+    # Return results for this simulation
+    list(
+      csdata = csdata,
+      est1 = est
+    )
+  }, .options = furrr_options(seed = TRUE))
+
+  return(results)
+}
+
+###############################################################################
+## Run simulation
+plan(multisession)  # Use multiple sessions for parallelism (local machine)
+
+# Run the simulations in parallel
+set.seed(129251)
+results_50 <- simulate_seroincidence(nrep = 50, n_sim = 300)
+
+set.seed(129252)
+results_100 <- simulate_seroincidence(nrep = 100, n_sim = 300)
+
+# Stop parallel processing
+plan(sequential)  # Return to sequential processing
+
+
+# Define a function to generate final tables
+generate_final_table <- function(results_list, sample_size) {
+  # Initialize an empty list to store the results
+  summary_results <- list()
+
+  # Loop through each of the 100 results and extract the required columns
+  for (i in 1:300) {
+    # Extract the summary for each result
+    result_summary <- summary(results_list[[i]]$est1)
+
+    # Select the required columns
+    extracted_columns <- result_summary %>%
+      select(incidence.rate, SE, CI.lwr, CI.upr)
+
+    # Add a column for the index (optional, for tracking)
+    extracted_columns <- extracted_columns %>%
+      mutate(index = i)
+
+    # Append to the list
+    summary_results[[i]] <- extracted_columns
+  }
+
+  # Combine all results into a single data frame
+  final_table <- bind_rows(summary_results) %>%
+    mutate(sample_size = sample_size) # Add sample size column for clarity
+
+  return(final_table)
+}
+
+# Store each sample size's summary as table
+final_table_50 <- generate_final_table(results_50, 50)
+final_table_100 <- generate_final_table(results_100, 100)
+
+
+# Define the true lambda
+true_lambda <- 0.2
+
+# Check how many rows have CI covering the true lambda
+coverage_count_50 <- final_table_50 %>%
+  filter(CI.lwr <= true_lambda & CI.upr >= true_lambda) %>%
+  nrow()
+
+coverage_count_100 <- final_table_100 %>%
+  filter(CI.lwr <= true_lambda & CI.upr >= true_lambda) %>%
+  nrow()
+
+# Print the result
+print(paste("Number of rows where CI covers true lambda:", coverage_count_50))
+print(paste("Number of rows where CI covers true lambda:", coverage_count_100))
+

From 0e2c4be434a4be2116598def467bd88e29470ef4 Mon Sep 17 00:00:00 2001
From: Douglas Ezra Morrison <demorrison@ucdavis.edu>
Date: Thu, 30 Jan 2025 10:16:33 -0800
Subject: [PATCH 2/3] clarify namespaces

---
 data-raw/sample-size.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/data-raw/sample-size.R b/data-raw/sample-size.R
index a40e64e4b..a584bbf0a 100644
--- a/data-raw/sample-size.R
+++ b/data-raw/sample-size.R
@@ -34,7 +34,7 @@ simulate_seroincidence <- function(nrep, n_sim) {
   )
 
   # Perform simulations in parallel
-  results <- future_map(1:n_sim, function(i) {
+  results <- furrr::future_map(1:n_sim, function(i) {
     # Generate cross-sectional data
     csdata <- sim.cs(
       curve_params = dmcmc,
@@ -73,7 +73,8 @@ simulate_seroincidence <- function(nrep, n_sim) {
 
 ###############################################################################
 ## Run simulation
-plan(multisession)  # Use multiple sessions for parallelism (local machine)
+library(future)
+future::plan(multisession)  # Use multiple sessions for parallelism (local machine)
 
 # Run the simulations in parallel
 set.seed(129251)

From 0862d29c10e3024a66a3b6d684313ae4278e8738 Mon Sep 17 00:00:00 2001
From: Douglas Ezra Morrison <demorrison@ucdavis.edu>
Date: Thu, 30 Jan 2025 20:14:43 -0800
Subject: [PATCH 3/3] in progress

---
 DESCRIPTION            |  3 +-
 R/sim_pop_data.R       | 16 ++++++-
 data-raw/sample-size.R | 98 ++++++++++++++++++++++++++++++------------
 3 files changed, 87 insertions(+), 30 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9838a83b9..05e617f2d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -60,7 +60,8 @@ Suggests:
     tidyverse,
     qrcode,
     svglite,
-    vdiffr
+    vdiffr,
+    furrr
 LinkingTo: 
     Rcpp
 Config/testthat/edition: 3
diff --git a/R/sim_pop_data.R b/R/sim_pop_data.R
index 5770b3f02..5e07040e8 100644
--- a/R/sim_pop_data.R
+++ b/R/sim_pop_data.R
@@ -194,7 +194,19 @@ sim_pop_data <- function(
 #' consistent API.
 #'
 #' @keywords internal
-sim.cs <- function(...) { # nolint: object_name_linter
+sim.cs <- function(
+    n.smpl,
+    age.rng,
+    n.mc,
+    renew.params,
+    add.noise,
+    ...) { # nolint: object_name_linter
   lifecycle::deprecate_soft("1.3.1", "sim.cs()", "sim_pop_data()")
-  sim_pop_data(...)
+  sim_pop_data(
+    n_samples = n.smpl,
+    age_range = age.rng,
+    n_mcmc_samples = n.mc,
+    renew_params = renew.params,
+    add_noise = add.noise,
+    ...)
 }
diff --git a/data-raw/sample-size.R b/data-raw/sample-size.R
index a584bbf0a..d7b7257ff 100644
--- a/data-raw/sample-size.R
+++ b/data-raw/sample-size.R
@@ -1,22 +1,25 @@
 library(dplyr)
 library(tibble)
 library(serocalculator)
+library(furrr)
 
 ###############################################################################
 ## Load longitudinal parameters
 
-test_sim<-"https://osf.io/download/rtw5k" %>%
+test_sim <- "https://osf.io/download/rtw5k" %>%
   load_curve_params() %>%
   dplyr::filter(iter < 500)
 ###############################################################################
 
 # Define the simulation function
-simulate_seroincidence <- function(nrep, n_sim) {
-  # Parameters
-  dmcmc <- test_sim  # Curve parameters
-  antibodies <-c("HlyE_IgA", "HlyE_IgG")
-  lambda <- 0.2  # Simulated incidence rate per person-year
-  lifespan <- c(0, 10)  # Age range
+simulate_seroincidence <- function(
+    nrep,
+    n_sim,
+    dmcmc = test_sim,  # Curve parameters
+    antibodies = c("HlyE_IgA", "HlyE_IgG"),
+    lambda = 0.2,  # Simulated incidence rate per person-year
+    lifespan = c(0, 10)  # Age range
+    ) {
 
   # biologic noise distribution
   dlims <- rbind(
@@ -33,8 +36,7 @@ simulate_seroincidence <- function(nrep, n_sim) {
     y.high = c(5e6, 5e6)  # High cutoff (y.high)
   )
 
-  # Perform simulations in parallel
-  results <- furrr::future_map(1:n_sim, function(i) {
+  run_one_sim_iter <- function(i) {
     # Generate cross-sectional data
     csdata <- sim.cs(
       curve_params = dmcmc,
@@ -55,7 +57,7 @@ simulate_seroincidence <- function(nrep, n_sim) {
       curve_params = dmcmc,
       noise_params = cond,
       lambda_start = 0.1,
-      build_graph = TRUE,
+      build_graph = FALSE,
       verbose = FALSE,
       print_graph = FALSE,
       antigen_isos = antibodies
@@ -66,7 +68,21 @@ simulate_seroincidence <- function(nrep, n_sim) {
       csdata = csdata,
       est1 = est
     )
-  }, .options = furrr_options(seed = TRUE))
+  }
+
+
+  # Perform simulations in parallel
+  results <-
+    furrr::future_map(
+      .x = 1:n_sim,
+      .f = run_one_sim_iter,
+      .options = furrr_options(seed = TRUE)) |>
+    structure(
+      antibodies = antibodies,
+      lambda = lambda, # Simulated incidence rate per person-year
+      lifespan = lifespan, # Age range
+      sample_size = nrep
+    )
 
   return(results)
 }
@@ -78,22 +94,30 @@ future::plan(multisession)  # Use multiple sessions for parallelism (local machi
 
 # Run the simulations in parallel
 set.seed(129251)
-results_50 <- simulate_seroincidence(nrep = 50, n_sim = 300)
+results_50 <- simulate_seroincidence(lambda = 0.2, nrep = 50, n_sim = 300)
 
 set.seed(129252)
-results_100 <- simulate_seroincidence(nrep = 100, n_sim = 300)
+results_100 <- simulate_seroincidence(lambda = 0.2, nrep = 100, n_sim = 300)
+
+results_100_0.1 <- simulate_seroincidence(lambda = 0.1, nrep = 100, n_sim = 300)
+
+results_100_0.01 <- simulate_seroincidence(lambda = 0.1, nrep = 100, n_sim = 300)
 
 # Stop parallel processing
-plan(sequential)  # Return to sequential processing
+future::plan(sequential)  # Return to sequential processing
 
+n_obs <- function(results_list) {
+  results_list |> attr("sample_size")
+}
 
 # Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
+generate_final_table <- function(results_list,
+                                 sample_size = n_obs(results_list)) {
   # Initialize an empty list to store the results
   summary_results <- list()
 
   # Loop through each of the 100 results and extract the required columns
-  for (i in 1:300) {
+  for (i in 1:length(results_list)) {
     # Extract the summary for each result
     result_summary <- summary(results_list[[i]]$est1)
 
@@ -113,26 +137,46 @@ generate_final_table <- function(results_list, sample_size) {
   final_table <- bind_rows(summary_results) %>%
     mutate(sample_size = sample_size) # Add sample size column for clarity
 
+  attributes(final_table) <-
+    c(
+      attributes(final_table),
+      attributes(results_list)
+    )
+
   return(final_table)
 }
 
 # Store each sample size's summary as table
-final_table_50 <- generate_final_table(results_50, 50)
-final_table_100 <- generate_final_table(results_100, 100)
-
+final_table_50 <- results_50 |> generate_final_table(sample_size = 50)
+final_table_100 <- results_100 |> generate_final_table(sample_size = 100)
+final_table_100_0.1 <- results_100_0.1 |> generate_final_table(sample_size = 100)
+ft_100_0.01 <- results_100_0.01 |> generate_final_table()
+lambda <- function(results_table)
+{
+  results_table |> attr("lambda")
+}
 
-# Define the true lambda
-true_lambda <- 0.2
+compute_coverage <- function(
+    results_table,
+    true_lambda = results_table |> lambda()) {
+
+  results_table %>%
+    dplyr::mutate(
+      covered = CI.lwr <= true_lambda & CI.upr >= true_lambda,
+      no_error = !is.na(SE)) %>%
+    dplyr::summarize(
+      no_error_count = sum(no_error),
+      coverage_count = sum(covered, na.rm = TRUE),
+      coverage_rate = mean(covered, na.rm = TRUE))
+}
 
 # Check how many rows have CI covering the true lambda
-coverage_count_50 <- final_table_50 %>%
-  filter(CI.lwr <= true_lambda & CI.upr >= true_lambda) %>%
-  nrow()
+coverage_count_50 <- final_table_50 %>% compute_coverage() |> print()
 
-coverage_count_100 <- final_table_100 %>%
-  filter(CI.lwr <= true_lambda & CI.upr >= true_lambda) %>%
-  nrow()
+coverage_count_100 <- final_table_100 %>% compute_coverage() |> print()
 
+final_table_100_0.1 |> compute_coverage()
+ft_100_0.01 |> compute_coverage()
 # Print the result
 print(paste("Number of rows where CI covers true lambda:", coverage_count_50))
 print(paste("Number of rows where CI covers true lambda:", coverage_count_100))