UCD-SERG · Kwan-Jenny · Feb 25, 2025 · Feb 26, 2025 · Feb 26, 2025 · Mar 3, 2025
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -8,3 +8,6 @@
 ^vignettes/articles/*_files$
 ^vignettes/articles$
 ^codecov\.yml$
+^shigella\.Rcheck$
+^shigella.*\.tar\.gz$
+^shigella.*\.tgz$
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,6 @@ data/
 /.quarto/
 *.pdf
 README_files
+shigella.Rcheck/
+shigella*.tar.gz
+shigella*.tgz
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: shigella
 Title: What the Package Does (One Line, Title Case)
-Version: 0.0.0.9003
+Version: 0.0.0.9004
 Authors@R: c(
     person("Kwan Ho", "Lee", , "ksjlee@ucdavis.edu", role = c("aut", "cre")),
     person("Douglas Ezra", "Morrison", , "demorrison@ucdavis.edu", role = c("aut"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
+export(calculate_metrics)
+export(generate_final_table)
 export(postprocess_jags_output)
+export(simulate_seroincidence2)
 importFrom(dplyr,any_of)
 importFrom(dplyr,filter)
 importFrom(dplyr,mutate)

diff --git a/NEWS.md b/NEWS.md
@@ -6,6 +6,8 @@
 
 ## Bug fixes
 
+None yet
+
 ## Internal changes
 
 * Moved helper functions into `R/` subdirectory (#1)

diff --git a/R/calculate_metrics.R b/R/calculate_metrics.R
@@ -0,0 +1,33 @@
+#' Calculate Empirical Standard Error for Incidence Rates
+#'
+#' This function computes the empirical standard error (SE) of incidence rates
+#' from the provided dataset and adds sample size and age group information.
+#'
+#' @param data A data frame containing a column named \code{incidence.rate}.
+#' @param sample_size Optional. Integer representing the sample size. If NULL, retrieved from \code{attr(data, "sample_size")}.
+#' @param age_group Optional. Character string specifying the age group. If NULL, retrieved from \code{attr(data, "age_group")}.
+#'
+#' @return A data frame with \code{sample_size}, \code{empirical_se}, and \code{Age_Group}.
+#' @examples
+#' \dontrun{
+#' data <- data.frame(incidence.rate = c(0.1, 0.2, 0.15, 0.18))
+#' attr(data, "sample_size") <- 100
+#' attr(data, "age_group") <- "Age 0-2"
+#' calculate_metrics(data)
+#' }
+#' @export
+calculate_metrics <- function(data, sample_size = NULL, age_group = NULL) {
+  if (is.null(sample_size)) {
+    sample_size <- attr(data, "sample_size")
+  }
+  if (is.null(age_group)) {
+    age_group <- attr(data, "age_group")
+  }
+
+  data.frame(
+    sample_size = sample_size,
+    empirical_se = sd(data$incidence.rate, na.rm = TRUE),
+    Age_Group = age_group
+  )
+}
+
diff --git a/R/generate_final_table.R b/R/generate_final_table.R
@@ -1,28 +1,44 @@
-# Define a function to generate final tables
+#' Generate Final Table from Simulation Results
+#'
+#' This function loops through a list of simulation results, extracts the required columns,
+#' and combines them into a single data frame. It also adds the sample size for clarity.
+#'
+#' @param results_list A list of simulation results. Each element should contain an element named \code{est1}
+#' which can be summarized.
+#' @param sample_size An integer specifying the sample size used in the simulation.
+#'
+#' @return A data frame combining the selected columns (\code{incidence.rate}, \code{SE}, \code{CI.lwr}, \code{CI.upr})
+#' from each simulation result along with the sample size and an index for tracking.
+#' @examples
+#' \dontrun{
+#' # Suppose you have a list of simulation results
+#' final_table <- generate_final_table(results_list, sample_size = 100)
+#' }
+#' @export
 generate_final_table <- function(results_list, sample_size) {
   # Initialize an empty list to store the results
   summary_results <- list()
 
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:200) {
+  # Loop through each result and extract the required columns
+  for (i in 1:length(results_list)) {
     # Extract the summary for each result
     result_summary <- summary(results_list[[i]]$est1)
 
-    # Select the required columns
+    # Select the required columns (ensure that result_summary is a data frame)
     extracted_columns <- result_summary %>%
       select(incidence.rate, SE, CI.lwr, CI.upr)
 
     # Add a column for the index (optional, for tracking)
     extracted_columns <- extracted_columns %>%
       mutate(index = i)
 
-    # Append to the list
+    # Append the extracted data to the list
     summary_results[[i]] <- extracted_columns
   }
 
-  # Combine all results into a single data frame
+  # Combine all results into a single data frame and add the sample size column for clarity
   final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
+    mutate(sample_size = sample_size)
 
   return(final_table)
 }
diff --git a/R/postprocess_jags_output.R b/R/postprocess_jags_output.R
@@ -45,3 +45,7 @@ postprocess_jags_output <- function(jags_output) {
 
   return(to_return)
 }
+
+
+
+
diff --git a/R/simulate_seroincidence2.R b/R/simulate_seroincidence2.R
@@ -0,0 +1,75 @@
+#' Description of the function here.
+#' @param nrep Number of repetitions.
+#' @param n_sim Number of simulations.
+#' @param observed Observed incidence rate.
+#' @param range Range for simulation.
+#' @return A list of simulated seroincidence results.
+#' @export
+# Define the simulation function
+# Define the simulation function
+simulate_seroincidence2 <- function(nrep, n_sim, observed, range = NULL) {
+  # Set parallel plan inside function to avoid issues with distributed nodes
+  plan(multicore)  # Use multiple cores for parallel processing (works best on HPC)
+
+  # Parameters
+  dmcmc <- curve_params_shigella_ipab  # Curve parameters
+  antibodies <- c("IgG")  # Antigen-isotypes
+  lambda <- observed  # Simulated incidence rate per person-year
+
+  # Biologic noise distribution
+  dlims <- rbind("IgG" = c(min = 0, max = 0.5))
+
+  # Noise parameters
+  cond <- tibble(
+    antigen_iso = c("IgG"),
+    nu = c(0.5),  # Biologic noise (nu)
+    eps = c(0.25),  # Measurement noise (eps)
+    y.low = c(25),  # Low cutoff (llod)
+    y.high = c(200000)  # High cutoff (y.high)
+  )
+
+  # Perform simulations in parallel
+  results <- future_map(1:n_sim, function(i) {
+    tryCatch({
+      # Generate cross-sectional data
+      csdata <- sim_pop_data(
+        curve_params = dmcmc,
+        lambda = lambda,
+        n.smpl = nrep,
+        age_range = range,
+        antigen_isos = antibodies,
+        n.mc = 0,
+        renew_params = TRUE,  # Use different parameters for each simulation
+        add.noise = TRUE,
+        noise_limits = dlims,
+        format = "long"
+      )
+
+      # Estimate seroincidence
+      est <- est.incidence(
+        pop_data = csdata,
+        curve_params = dmcmc,
+        noise_params = cond,
+        lambda_start = 0.1,
+        build_graph = TRUE,
+        verbose = FALSE,
+        print_graph = FALSE,
+        antigen_isos = antibodies
+      )
+
+      # Return results for this simulation
+      list(csdata = csdata, est1 = est)
+    }, error = function(e) {
+      return(list(error = e$message))  # Capture and store errors instead of stopping execution
+    })
+  }, .options = furrr_options(seed = TRUE))
+
+  # Ensure sequential processing after function execution
+  plan(sequential)
+  results <- results |> 
+     structure(
+         sample_size = nrep,
+         age_range = paste(range, collapse = " - ")
+      )
+  return(results)
+}
diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -5,3 +5,10 @@ ORCID
 Postprocess
 Seroresponse
 repo
+HPC
+IgG
+IpaB
+isotypes
+qmd
+seroincidence
+ses
diff --git a/man/calculate_metrics.Rd b/man/calculate_metrics.Rd
diff --git a/man/generate_final_table.Rd b/man/generate_final_table.Rd
diff --git a/man/simulate_seroincidence2.Rd b/man/simulate_seroincidence2.Rd
diff --git a/project.Rproj b/project.Rproj
@@ -1,5 +1,4 @@
 Version: 1.0
-ProjectId: 3be2b60e-1279-4ad7-941b-69ed270815d6
 
 RestoreWorkspace: Default
 SaveWorkspace: Default
Original file line number	Diff line number	Diff line change
Expand Up		@@ -45,3 +45,7 @@ postprocess_jags_output <- function(jags_output) {

		return(to_return)
		}