diff --git a/.Rbuildignore b/.Rbuildignore
index a0f6a52..0694f55 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -8,3 +8,6 @@
 ^vignettes/articles/*_files$
 ^vignettes/articles$
 ^codecov\.yml$
+^shigella\.Rcheck$
+^shigella.*\.tar\.gz$
+^shigella.*\.tgz$
diff --git a/.gitignore b/.gitignore
index cbe14e1..80788f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,6 @@ data/
 /.quarto/
 *.pdf
 README_files
+shigella.Rcheck/
+shigella*.tar.gz
+shigella*.tgz
diff --git a/DESCRIPTION b/DESCRIPTION
index 43d2893..6b43288 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: shigella
 Title: What the Package Does (One Line, Title Case)
-Version: 0.0.0.9003
+Version: 0.0.0.9004
 Authors@R: c(
     person("Kwan Ho", "Lee", , "ksjlee@ucdavis.edu", role = c("aut", "cre")),
     person("Douglas Ezra", "Morrison", , "demorrison@ucdavis.edu", role = c("aut"),
diff --git a/NAMESPACE b/NAMESPACE
index 56e7b54..155493c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
+export(calculate_metrics)
+export(generate_final_table)
 export(postprocess_jags_output)
+export(simulate_seroincidence2)
 importFrom(dplyr,any_of)
 importFrom(dplyr,filter)
 importFrom(dplyr,mutate)
diff --git a/NEWS.md b/NEWS.md
index c3eade1..1fe3b1e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,6 +6,8 @@
 
 ## Bug fixes
 
+None yet
+
 ## Internal changes
 
 * Moved helper functions into `R/` subdirectory (#1)
diff --git a/R/calculate_metrics.R b/R/calculate_metrics.R
new file mode 100644
index 0000000..2d9cf37
--- /dev/null
+++ b/R/calculate_metrics.R
@@ -0,0 +1,33 @@
+#' Calculate Empirical Standard Error for Incidence Rates
+#'
+#' This function computes the empirical standard error (SE) of incidence rates
+#' from the provided dataset and adds sample size and age group information.
+#'
+#' @param data A data frame containing a column named \code{incidence.rate}.
+#' @param sample_size Optional. Integer representing the sample size. If NULL, retrieved from \code{attr(data, "sample_size")}.
+#' @param age_group Optional. Character string specifying the age group. If NULL, retrieved from \code{attr(data, "age_group")}.
+#'
+#' @return A data frame with \code{sample_size}, \code{empirical_se}, and \code{Age_Group}.
+#' @examples
+#' \dontrun{
+#' data <- data.frame(incidence.rate = c(0.1, 0.2, 0.15, 0.18))
+#' attr(data, "sample_size") <- 100
+#' attr(data, "age_group") <- "Age 0-2"
+#' calculate_metrics(data)
+#' }
+#' @export
+calculate_metrics <- function(data, sample_size = NULL, age_group = NULL) {
+  if (is.null(sample_size)) {
+    sample_size <- attr(data, "sample_size")
+  }
+  if (is.null(age_group)) {
+    age_group <- attr(data, "age_group")
+  }
+  
+  data.frame(
+    sample_size = sample_size,
+    empirical_se = sd(data$incidence.rate, na.rm = TRUE),
+    Age_Group = age_group
+  )
+}
+
diff --git a/R/generate_final_table.R b/R/generate_final_table.R
index d84a3f0..6fab724 100644
--- a/R/generate_final_table.R
+++ b/R/generate_final_table.R
@@ -1,14 +1,30 @@
-# Define a function to generate final tables
+#' Generate Final Table from Simulation Results
+#'
+#' This function loops through a list of simulation results, extracts the required columns,
+#' and combines them into a single data frame. It also adds the sample size for clarity.
+#'
+#' @param results_list A list of simulation results. Each element should contain an element named \code{est1}
+#' which can be summarized.
+#' @param sample_size An integer specifying the sample size used in the simulation.
+#'
+#' @return A data frame combining the selected columns (\code{incidence.rate}, \code{SE}, \code{CI.lwr}, \code{CI.upr})
+#' from each simulation result along with the sample size and an index for tracking.
+#' @examples
+#' \dontrun{
+#' # Suppose you have a list of simulation results
+#' final_table <- generate_final_table(results_list, sample_size = 100)
+#' }
+#' @export
 generate_final_table <- function(results_list, sample_size) {
   # Initialize an empty list to store the results
   summary_results <- list()
   
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:200) {
+  # Loop through each result and extract the required columns
+  for (i in 1:length(results_list)) {
     # Extract the summary for each result
     result_summary <- summary(results_list[[i]]$est1)
     
-    # Select the required columns
+    # Select the required columns (ensure that result_summary is a data frame)
     extracted_columns <- result_summary %>%
       select(incidence.rate, SE, CI.lwr, CI.upr)
     
@@ -16,13 +32,13 @@ generate_final_table <- function(results_list, sample_size) {
     extracted_columns <- extracted_columns %>%
       mutate(index = i)
     
-    # Append to the list
+    # Append the extracted data to the list
     summary_results[[i]] <- extracted_columns
   }
   
-  # Combine all results into a single data frame
+  # Combine all results into a single data frame and add the sample size column for clarity
   final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
+    mutate(sample_size = sample_size)
   
   return(final_table)
 }
diff --git a/R/postprocess_jags_output.R b/R/postprocess_jags_output.R
index 32bfd22..660cc78 100644
--- a/R/postprocess_jags_output.R
+++ b/R/postprocess_jags_output.R
@@ -45,3 +45,7 @@ postprocess_jags_output <- function(jags_output) {
   
   return(to_return)
 }
+
+
+
+
diff --git a/R/simulate_seroincidence2.R b/R/simulate_seroincidence2.R
new file mode 100644
index 0000000..953fa4b
--- /dev/null
+++ b/R/simulate_seroincidence2.R
@@ -0,0 +1,75 @@
+#' Description of the function here.
+#' @param nrep Number of repetitions.
+#' @param n_sim Number of simulations.
+#' @param observed Observed incidence rate.
+#' @param range Range for simulation.
+#' @return A list of simulated seroincidence results.
+#' @export
+# Define the simulation function
+# Define the simulation function
+simulate_seroincidence2 <- function(nrep, n_sim, observed, range = NULL) {
+  # Set parallel plan inside function to avoid issues with distributed nodes
+  plan(multicore)  # Use multiple cores for parallel processing (works best on HPC)
+  
+  # Parameters
+  dmcmc <- curve_params_shigella_ipab  # Curve parameters
+  antibodies <- c("IgG")  # Antigen-isotypes
+  lambda <- observed  # Simulated incidence rate per person-year
+  
+  # Biologic noise distribution
+  dlims <- rbind("IgG" = c(min = 0, max = 0.5))
+  
+  # Noise parameters
+  cond <- tibble(
+    antigen_iso = c("IgG"),
+    nu = c(0.5),  # Biologic noise (nu)
+    eps = c(0.25),  # Measurement noise (eps)
+    y.low = c(25),  # Low cutoff (llod)
+    y.high = c(200000)  # High cutoff (y.high)
+  )
+  
+  # Perform simulations in parallel
+  results <- future_map(1:n_sim, function(i) {
+    tryCatch({
+      # Generate cross-sectional data
+      csdata <- sim_pop_data(
+        curve_params = dmcmc,
+        lambda = lambda,
+        n.smpl = nrep,
+        age_range = range,
+        antigen_isos = antibodies,
+        n.mc = 0,
+        renew_params = TRUE,  # Use different parameters for each simulation
+        add.noise = TRUE,
+        noise_limits = dlims,
+        format = "long"
+      )
+      
+      # Estimate seroincidence
+      est <- est.incidence(
+        pop_data = csdata,
+        curve_params = dmcmc,
+        noise_params = cond,
+        lambda_start = 0.1,
+        build_graph = TRUE,
+        verbose = FALSE,
+        print_graph = FALSE,
+        antigen_isos = antibodies
+      )
+      
+      # Return results for this simulation
+      list(csdata = csdata, est1 = est)
+    }, error = function(e) {
+      return(list(error = e$message))  # Capture and store errors instead of stopping execution
+    })
+  }, .options = furrr_options(seed = TRUE))
+  
+  # Ensure sequential processing after function execution
+  plan(sequential)
+  results <- results |> 
+     structure(
+         sample_size = nrep,
+         age_range = paste(range, collapse = " - ")
+      )
+  return(results)
+}
diff --git a/inst/WORDLIST b/inst/WORDLIST
index d314679..4121428 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -5,3 +5,10 @@ ORCID
 Postprocess
 Seroresponse
 repo
+HPC
+IgG
+IpaB
+isotypes
+qmd
+seroincidence
+ses
\ No newline at end of file
diff --git a/man/calculate_metrics.Rd b/man/calculate_metrics.Rd
new file mode 100644
index 0000000..47c1a9e
--- /dev/null
+++ b/man/calculate_metrics.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/calculate_metrics.R
+\name{calculate_metrics}
+\alias{calculate_metrics}
+\title{Calculate Empirical Standard Error for Incidence Rates}
+\usage{
+calculate_metrics(data, sample_size = NULL, age_group = NULL)
+}
+\arguments{
+\item{data}{A data frame containing a column named \code{incidence.rate}.}
+
+\item{sample_size}{Optional. Integer representing the sample size. If NULL, retrieved from \code{attr(data, "sample_size")}.}
+
+\item{age_group}{Optional. Character string specifying the age group. If NULL, retrieved from \code{attr(data, "age_group")}.}
+}
+\value{
+A data frame with \code{sample_size}, \code{empirical_se}, and \code{Age_Group}.
+}
+\description{
+This function computes the empirical standard error (SE) of incidence rates
+from the provided dataset and adds sample size and age group information.
+}
+\examples{
+\dontrun{
+data <- data.frame(incidence.rate = c(0.1, 0.2, 0.15, 0.18))
+attr(data, "sample_size") <- 100
+attr(data, "age_group") <- "Age 0-2"
+calculate_metrics(data)
+}
+}
diff --git a/man/generate_final_table.Rd b/man/generate_final_table.Rd
new file mode 100644
index 0000000..9f3d67b
--- /dev/null
+++ b/man/generate_final_table.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/generate_final_table.R
+\name{generate_final_table}
+\alias{generate_final_table}
+\title{Generate Final Table from Simulation Results}
+\usage{
+generate_final_table(results_list, sample_size)
+}
+\arguments{
+\item{results_list}{A list of simulation results. Each element should contain an element named \code{est1}
+which can be summarized.}
+
+\item{sample_size}{An integer specifying the sample size used in the simulation.}
+}
+\value{
+A data frame combining the selected columns (\code{incidence.rate}, \code{SE}, \code{CI.lwr}, \code{CI.upr})
+from each simulation result along with the sample size and an index for tracking.
+}
+\description{
+This function loops through a list of simulation results, extracts the required columns,
+and combines them into a single data frame. It also adds the sample size for clarity.
+}
+\examples{
+\dontrun{
+# Suppose you have a list of simulation results
+final_table <- generate_final_table(results_list, sample_size = 100)
+}
+}
diff --git a/man/simulate_seroincidence2.Rd b/man/simulate_seroincidence2.Rd
new file mode 100644
index 0000000..be1bf24
--- /dev/null
+++ b/man/simulate_seroincidence2.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simulate_seroincidence2.R
+\name{simulate_seroincidence2}
+\alias{simulate_seroincidence2}
+\title{Description of the function here.}
+\usage{
+simulate_seroincidence2(nrep, n_sim, observed, range = NULL)
+}
+\arguments{
+\item{nrep}{Number of repetitions.}
+
+\item{n_sim}{Number of simulations.}
+
+\item{observed}{Observed incidence rate.}
+
+\item{range}{Range for simulation.}
+}
+\value{
+A list of simulated seroincidence results.
+}
+\description{
+Description of the function here.
+}
diff --git a/project.Rproj b/project.Rproj
index 21a5a96..008497b 100644
--- a/project.Rproj
+++ b/project.Rproj
@@ -1,5 +1,4 @@
 Version: 1.0
-ProjectId: 3be2b60e-1279-4ad7-941b-69ed270815d6
 
 RestoreWorkspace: Default
 SaveWorkspace: Default
diff --git a/sim_hpc_ipab.qmd b/sim_hpc_ipab.qmd
deleted file mode 100644
index 90a24aa..0000000
--- a/sim_hpc_ipab.qmd
+++ /dev/null
@@ -1,408 +0,0 @@
----
-title: "Simulation with the Smallest Lambda for IpaB IgG"
-format: 
-  pdf:
-    number-sections: true
-    number-depth: 2
-    number-offset: [0, 0]
-    pdf-engine: pdflatex
-header-includes: 
-  - \usepackage{graphicx}  # Required for ggplot figures
-  - \usepackage{float}  # Helps place figures properly
-  - \usepackage{wrapfig}  # Now safely included since you installed it
-editor: visual
-editor_options: 
-  chunk_output_type: console
----
-
-```{r setup, include=FALSE,echo=FALSE}
-library(knitr)
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-```{r, echo=FALSE, message=FALSE}
-library(tibble)
-library(dplyr)
-library(serocalculator)
-library(haven)
-library(knitr)
-library(plotly)
-library(kableExtra)
-library(tidyr)
-library(arsenal)
-library(forcats)
-library(huxtable)
-library(magrittr)
-library(parameters)
-library(ggplot2)
-library(ggeasy)
-library(scales)
-library(plotly)
-library(patchwork)
-library(tidyverse)
-library(readxl)
-library(purrr)
-library(here)
-library(table1)
-library(furrr)
-library(future)
-```
-
-
-# Vary simulation lambdas across the range in the real cross-sectional data
-
-## 1. Estimate the shigella incidence rate using the real cross-sectional Shigella data, 
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Load data
-load("~/ipab.RData")
-```
-
-## Get parameters from longitudinal data
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Set noise 
-create_noise_df <- function(country) {
-  noise_df <- tibble(
-    antigen_iso = c("IgG"),
-    Country     = factor(c(country)),  # Use input argument for Country
-    y.low       = c(25),
-    eps         = c(0.25),
-    nu          = c(0.5),
-    y.high      = c(200000)
-  )
-  
-  return(noise_df)
-}
-
-noise_df_USA <- create_noise_df("MA USA")
-noise_df_Niger <- create_noise_df("Niger")
-noise_df_Sierra <- create_noise_df("Sierra Leone")
-noise_df_Ghana <- create_noise_df("Ghana")
-```
-
-## 2. Run simulations using: 
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# MA USA
-est_USA <- est.incidence(
-  pop_data = df_xs_USA_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_USA,
-  antigen_isos = c("IgG"),
-)
-
-# Ghana
-est_Ghana <- est.incidence(
-  pop_data = df_xs_Ghana_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Ghana,
-  antigen_isos = c("IgG"),
-)
-
-# Niger
-est_Niger <- est.incidence(
-  pop_data = df_xs_Niger_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Niger,
-  antigen_isos = c("IgG"),
-)
-
-# Sierra Leone
-est_Sierra <- est.incidence(
-  pop_data = df_xs_Sierra_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Sierra,
-  antigen_isos = c("IgG"),
-)
-
-# create table of incidence.rate of each region
-
-create_incidence_table <- function(...) {
-  # Capture input objects and their names
-  est_list <- list(...)
-  country_names <- names(est_list)
-  
-  # Extract incidence.rate from the summary() of each estimate object
-  incidence_rates <- sapply(est_list, function(x) summary(x)$incidence.rate)
-  
-  # Create a tidy tibble
-  incidence_table <- tibble(
-    Country = country_names,
-    Incidence_Rate = incidence_rates
-  )
-  
-  return(incidence_table)
-}
-# Example usage with four different country estimates:
-incidence_summary <- create_incidence_table(
-  USA = est_USA,
-  Ghana = est_Ghana,
-  Niger = est_Niger,
-  Sierra_Leone = est_Sierra
-)
-
-# Display the summary table
-print(incidence_summary)
-
-# 1) The median of the incidence rate estimates from step 1.
-# Get incidence rate estimates from each region (4 regions) and do median
-med_ipab_IgG<-median(incidence_summary$Incidence_Rate)
-
-#1.188
-
-# 2) 2x the maximum incidence rate estimate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the maximum one and 2x
-max2_ipab_IgG<-2*max(incidence_summary$Incidence_Rate)
-
-# 2.788
-
-# 3) 1/2 the minimum incidence rate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the minimum one and 1/2
-min_half_ipab_IgG<-0.5*min(incidence_summary$Incidence_Rate)
-
-#0.185
-# Create a data frame for the incidence rate summary
-incidence_rate_summary <- data.frame(
-  Metric = c("Median Incidence Rate", 
-             "2x Maximum Incidence Rate", 
-             "1/2 Minimum Incidence Rate"),
-  Value = c(median(incidence_summary$Incidence_Rate),  # Median of incidence rates
-            2 * max(incidence_summary$Incidence_Rate),  # 2x Maximum incidence rate
-            0.5 * min(incidence_summary$Incidence_Rate)) # 1/2 Minimum incidence rate
-)
-
-# Print the table
-print(incidence_rate_summary)
-
-```
-These three values are preliminary observations of lambda from ipab_IgG.
-
-We choose a lambda that is half of the minimum lambda from the four regions.
- 
-\newpage 
-
-# Simulation (300times) with the smallest lambda
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-
-# Define the simulation function
-simulate_seroincidence <- function(nrep, n_sim, observed, range = NULL) {
-  # Set parallel plan inside function to avoid issues with distributed nodes
-  plan(multicore)  # Use multiple cores for parallel processing (works best on HPC)
-  
-  # Parameters
-  dmcmc <- curve_params_shigella  # Curve parameters
-  antibodies <- c("IgG")  # Antigen-isotypes
-  lambda <- observed  # Simulated incidence rate per person-year
-  
-  # Biologic noise distribution
-  dlims <- rbind("IgG" = c(min = 0, max = 0.5))
-  
-  # Noise parameters
-  cond <- tibble(
-    antigen_iso = c("IgG"),
-    nu = c(0.5),  # Biologic noise (nu)
-    eps = c(0.25),  # Measurement noise (eps)
-    y.low = c(25),  # Low cutoff (llod)
-    y.high = c(200000)  # High cutoff (y.high)
-  )
-  
-  # Perform simulations in parallel
-  results <- future_map(1:n_sim, function(i) {
-    tryCatch({
-      # Generate cross-sectional data
-      csdata <- sim.cs(
-        curve_params = dmcmc,
-        lambda = lambda,
-        n.smpl = nrep,
-        age_range = range,
-        antigen_isos = antibodies,
-        n.mc = 0,
-        renew_params = TRUE,  # Use different parameters for each simulation
-        add.noise = TRUE,
-        noise_limits = dlims,
-        format = "long"
-      )
-      
-      # Estimate seroincidence
-      est <- est.incidence(
-        pop_data = csdata,
-        curve_params = dmcmc,
-        noise_params = cond,
-        lambda_start = 0.1,
-        build_graph = TRUE,
-        verbose = FALSE,
-        print_graph = FALSE,
-        antigen_isos = antibodies
-      )
-      
-      # Return results for this simulation
-      list(csdata = csdata, est1 = est)
-    }, error = function(e) {
-      return(list(error = e$message))  # Capture and store errors instead of stopping execution
-    })
-  }, .options = furrr_options(seed = TRUE))
-  
-  # Ensure sequential processing after function execution
-  plan(sequential)
-  
-  return(results)
-}
-
-# ------------------------ #
-# 🔹 Run Simulations in Parallel
-# ------------------------ #
-
-# Set up parallel processing (Ensure future::plan() is set before execution)
-plan(multicore)  # Works best on HPC
-
-# Define parameter sets
-sim_params <- list(
-  list(nrep = 100, range = c(0, 2)),
-  list(nrep = 100, range = c(2, 5)),
-  list(nrep = 200, range = c(0, 2)),
-  list(nrep = 200, range = c(2, 5)),
-  list(nrep = 300, range = c(0, 2)),
-  list(nrep = 300, range = c(2, 5)),
-  list(nrep = 400, range = c(0, 2)),
-  list(nrep = 400, range = c(2, 5))
-)
-
-# Run simulations in a loop to reduce redundant code
-results_list <- lapply(sim_params, function(params) {
-  cat("Running simulation for nrep =", params$nrep, "range =", params$range, "\n")
-  simulate_seroincidence(nrep = params$nrep, n_sim = 300, observed = min_half_ipab_IgG, range = params$range)
-})
-
-# Assign results to variables
-results_100_1 <- results_list[[1]]
-results_100_2 <- results_list[[2]]
-results_200_1 <- results_list[[3]]
-results_200_2 <- results_list[[4]]
-results_300_1 <- results_list[[5]]
-results_300_2 <- results_list[[6]]
-results_400_1 <- results_list[[7]]
-results_400_2 <- results_list[[8]]
-
-# Stop parallel processing
-plan(sequential)  # Return to sequential processing after execution
-
-```
-
-
-## Store each sample size in table
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
-  # Initialize an empty list to store the results
-  summary_results <- list()
-  
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:300) {
-    # Extract the summary for each result
-    result_summary <- summary(results_list[[i]]$est1)
-    
-    # Select the required columns
-    extracted_columns <- result_summary %>%
-      select(incidence.rate, SE, CI.lwr, CI.upr)
-    
-    # Add a column for the index (optional, for tracking)
-    extracted_columns <- extracted_columns %>%
-      mutate(index = i)
-    
-    # Append to the list
-    summary_results[[i]] <- extracted_columns
-  }
-  
-  # Combine all results into a single data frame
-  final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
-  
-  return(final_table)
-}
-
-# Example usage for different sample sizes
-final_table_100_1 <- generate_final_table(results_100_1, 100)
-final_table_100_2 <- generate_final_table(results_100_2, 100)
-
-final_table_200_1 <- generate_final_table(results_200_1, 200)
-final_table_200_2 <- generate_final_table(results_200_2, 200)
-
-final_table_300_1 <- generate_final_table(results_300_1, 300)
-final_table_300_2 <- generate_final_table(results_300_2, 300)
-
-final_table_400_1 <- generate_final_table(results_400_1, 400)
-final_table_400_2 <- generate_final_table(results_400_2, 400)
-```
-
-
-
-## Graphs of where the x axis is the sample size and the y axis is the empirical standard error
-
-Set the preliminary observed lambda as half of the minimum incidence rate from the four regions.
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# Function to calculate metrics for each table
-calculate_metrics <- function(data, sample_size) {
-  empirical_se <- sd(data$incidence.rate)
-  
-  
-  # Return a data frame with results
-  data.frame(
-    sample_size = sample_size,
-    empirical_se = empirical_se
-    
-  )
-}
-
-# Apply the function to each table
-metrics_100_1 <- calculate_metrics(final_table_100_1, 100)
-metrics_100_2 <- calculate_metrics(final_table_100_2, 100)
-
-metrics_200_1 <- calculate_metrics(final_table_200_1, 200)
-metrics_200_2 <- calculate_metrics(final_table_200_2, 200)
-
-metrics_300_1 <- calculate_metrics(final_table_300_1, 300)
-metrics_300_2 <- calculate_metrics(final_table_300_2, 300)
-
-metrics_400_1 <- calculate_metrics(final_table_400_1, 400)
-metrics_400_2 <- calculate_metrics(final_table_400_2, 400)
-
-
-# Combine the results into a single summary table
-summary_metrics_1 <- bind_rows(metrics_100_1, metrics_200_1, metrics_300_1,
-                               metrics_400_1)
-
-
-summary_metrics_2 <- bind_rows(metrics_100_2, metrics_200_2, metrics_300_2,
-                               metrics_400_2)
-
-# Add a column to distinguish the datasets
-summary_metrics_1 <- summary_metrics_1 %>%
-  mutate(Age_Group = "Age 0-2")
-
-summary_metrics_2 <- summary_metrics_2 %>%
-  mutate(Age_Group = "Age 2-5")
-
-# Combine both datasets into one
-summary_metrics_combined <- bind_rows(summary_metrics_1, summary_metrics_2)
-
-# Plot with color to differentiate age groups
-ggplot(summary_metrics_combined, aes(x = sample_size, y = empirical_se, color = Age_Group)) +
-  geom_line() +
-  geom_point() +
-  labs(
-    title = "Empirical Standard Error vs. Sample Size",
-    x = "Sample Size",
-    y = "Empirical Standard Error",
-    color = "Age Group"
-  ) +
-  theme_minimal()
-```
-
-\newpage
-
-## Table
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-summary_metrics_combined %>%
-  kable()
-```
\ No newline at end of file
diff --git a/sim_hpc_ipab_med.qmd b/sim_hpc_ipab_med.qmd
deleted file mode 100644
index d0bf79a..0000000
--- a/sim_hpc_ipab_med.qmd
+++ /dev/null
@@ -1,408 +0,0 @@
----
-title: "Simulation with the Median Lambda for IpaB IgG"
-format: 
-  pdf:
-    number-sections: true
-    number-depth: 2
-    number-offset: [0, 0]
-    pdf-engine: pdflatex
-header-includes: 
-  - \usepackage{graphicx}  # Required for ggplot figures
-  - \usepackage{float}  # Helps place figures properly
-  - \usepackage{wrapfig}  # Now safely included since you installed it
-editor: visual
-editor_options: 
-  chunk_output_type: console
----
-
-```{r setup, include=FALSE,echo=FALSE}
-library(knitr)
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-```{r, echo=FALSE, message=FALSE}
-library(tibble)
-library(dplyr)
-library(serocalculator)
-library(haven)
-library(knitr)
-library(plotly)
-library(kableExtra)
-library(tidyr)
-library(arsenal)
-library(forcats)
-library(huxtable)
-library(magrittr)
-library(parameters)
-library(ggplot2)
-library(ggeasy)
-library(scales)
-library(plotly)
-library(patchwork)
-library(tidyverse)
-library(readxl)
-library(purrr)
-library(here)
-library(table1)
-library(furrr)
-library(future)
-```
-
-
-# Vary simulation lambdas across the range in the real cross-sectional data
-
-## 1. Estimate the shigella incidence rate using the real cross-sectional Shigella data, 
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Load data
-load("~/ipab.RData")
-```
-
-## Get parameters from longitudinal data
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Set noise 
-create_noise_df <- function(country) {
-  noise_df <- tibble(
-    antigen_iso = c("IgG"),
-    Country     = factor(c(country)),  # Use input argument for Country
-    y.low       = c(25),
-    eps         = c(0.25),
-    nu          = c(0.5),
-    y.high      = c(200000)
-  )
-  
-  return(noise_df)
-}
-
-noise_df_USA <- create_noise_df("MA USA")
-noise_df_Niger <- create_noise_df("Niger")
-noise_df_Sierra <- create_noise_df("Sierra Leone")
-noise_df_Ghana <- create_noise_df("Ghana")
-```
-
-## 2. Run simulations using: 
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# MA USA
-est_USA <- est.incidence(
-  pop_data = df_xs_USA_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_USA,
-  antigen_isos = c("IgG"),
-)
-
-# Ghana
-est_Ghana <- est.incidence(
-  pop_data = df_xs_Ghana_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Ghana,
-  antigen_isos = c("IgG"),
-)
-
-# Niger
-est_Niger <- est.incidence(
-  pop_data = df_xs_Niger_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Niger,
-  antigen_isos = c("IgG"),
-)
-
-# Sierra Leone
-est_Sierra <- est.incidence(
-  pop_data = df_xs_Sierra_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Sierra,
-  antigen_isos = c("IgG"),
-)
-
-# create table of incidence.rate of each region
-
-create_incidence_table <- function(...) {
-  # Capture input objects and their names
-  est_list <- list(...)
-  country_names <- names(est_list)
-  
-  # Extract incidence.rate from the summary() of each estimate object
-  incidence_rates <- sapply(est_list, function(x) summary(x)$incidence.rate)
-  
-  # Create a tidy tibble
-  incidence_table <- tibble(
-    Country = country_names,
-    Incidence_Rate = incidence_rates
-  )
-  
-  return(incidence_table)
-}
-# Example usage with four different country estimates:
-incidence_summary <- create_incidence_table(
-  USA = est_USA,
-  Ghana = est_Ghana,
-  Niger = est_Niger,
-  Sierra_Leone = est_Sierra
-)
-
-# Display the summary table
-print(incidence_summary)
-
-# 1) The median of the incidence rate estimates from step 1.
-# Get incidence rate estimates from each region (4 regions) and do median
-med_ipab_IgG<-median(incidence_summary$Incidence_Rate)
-
-#1.188
-
-# 2) 2x the maximum incidence rate estimate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the maximum one and 2x
-max2_ipab_IgG<-2*max(incidence_summary$Incidence_Rate)
-
-# 2.788
-
-# 3) 1/2 the minimum incidence rate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the minimum one and 1/2
-min_half_ipab_IgG<-0.5*min(incidence_summary$Incidence_Rate)
-
-#0.185
-# Create a data frame for the incidence rate summary
-incidence_rate_summary <- data.frame(
-  Metric = c("Median Incidence Rate", 
-             "2x Maximum Incidence Rate", 
-             "1/2 Minimum Incidence Rate"),
-  Value = c(median(incidence_summary$Incidence_Rate),  # Median of incidence rates
-            2 * max(incidence_summary$Incidence_Rate),  # 2x Maximum incidence rate
-            0.5 * min(incidence_summary$Incidence_Rate)) # 1/2 Minimum incidence rate
-)
-
-# Print the table
-print(incidence_rate_summary)
-
-```
-These three values are preliminary observations of lambda from ipab_IgG.
-
-We choose a lambda that is the median of the four regions.
- 
-\newpage 
-
-# Simulation (300times) with the smallest lambda
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-
-# Define the simulation function
-simulate_seroincidence <- function(nrep, n_sim, observed, range = NULL) {
-  # Set parallel plan inside function to avoid issues with distributed nodes
-  plan(multicore)  # Use multiple cores for parallel processing (works best on HPC)
-  
-  # Parameters
-  dmcmc <- curve_params_shigella  # Curve parameters
-  antibodies <- c("IgG")  # Antigen-isotypes
-  lambda <- observed  # Simulated incidence rate per person-year
-  
-  # Biologic noise distribution
-  dlims <- rbind("IgG" = c(min = 0, max = 0.5))
-  
-  # Noise parameters
-  cond <- tibble(
-    antigen_iso = c("IgG"),
-    nu = c(0.5),  # Biologic noise (nu)
-    eps = c(0.25),  # Measurement noise (eps)
-    y.low = c(25),  # Low cutoff (llod)
-    y.high = c(200000)  # High cutoff (y.high)
-  )
-  
-  # Perform simulations in parallel
-  results <- future_map(1:n_sim, function(i) {
-    tryCatch({
-      # Generate cross-sectional data
-      csdata <- sim.cs(
-        curve_params = dmcmc,
-        lambda = lambda,
-        n.smpl = nrep,
-        age_range = range,
-        antigen_isos = antibodies,
-        n.mc = 0,
-        renew_params = TRUE,  # Use different parameters for each simulation
-        add.noise = TRUE,
-        noise_limits = dlims,
-        format = "long"
-      )
-      
-      # Estimate seroincidence
-      est <- est.incidence(
-        pop_data = csdata,
-        curve_params = dmcmc,
-        noise_params = cond,
-        lambda_start = 0.1,
-        build_graph = TRUE,
-        verbose = FALSE,
-        print_graph = FALSE,
-        antigen_isos = antibodies
-      )
-      
-      # Return results for this simulation
-      list(csdata = csdata, est1 = est)
-    }, error = function(e) {
-      return(list(error = e$message))  # Capture and store errors instead of stopping execution
-    })
-  }, .options = furrr_options(seed = TRUE))
-  
-  # Ensure sequential processing after function execution
-  plan(sequential)
-  
-  return(results)
-}
-
-# ------------------------ #
-# 🔹 Run Simulations in Parallel
-# ------------------------ #
-
-# Set up parallel processing (Ensure future::plan() is set before execution)
-plan(multicore)  # Works best on HPC
-
-# Define parameter sets
-sim_params <- list(
-  list(nrep = 100, range = c(0, 2)),
-  list(nrep = 100, range = c(2, 5)),
-  list(nrep = 200, range = c(0, 2)),
-  list(nrep = 200, range = c(2, 5)),
-  list(nrep = 300, range = c(0, 2)),
-  list(nrep = 300, range = c(2, 5)),
-  list(nrep = 400, range = c(0, 2)),
-  list(nrep = 400, range = c(2, 5))
-)
-
-# Run simulations in a loop to reduce redundant code
-results_list <- lapply(sim_params, function(params) {
-  cat("Running simulation for nrep =", params$nrep, "range =", params$range, "\n")
-  simulate_seroincidence(nrep = params$nrep, n_sim = 300, observed = med_ipab_IgG, range = params$range)
-})
-
-# Assign results to variables
-results_100_1 <- results_list[[1]]
-results_100_2 <- results_list[[2]]
-results_200_1 <- results_list[[3]]
-results_200_2 <- results_list[[4]]
-results_300_1 <- results_list[[5]]
-results_300_2 <- results_list[[6]]
-results_400_1 <- results_list[[7]]
-results_400_2 <- results_list[[8]]
-
-# Stop parallel processing
-plan(sequential)  # Return to sequential processing after execution
-
-```
-
-
-## Store each sample size in table
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
-  # Initialize an empty list to store the results
-  summary_results <- list()
-  
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:300) {
-    # Extract the summary for each result
-    result_summary <- summary(results_list[[i]]$est1)
-    
-    # Select the required columns
-    extracted_columns <- result_summary %>%
-      select(incidence.rate, SE, CI.lwr, CI.upr)
-    
-    # Add a column for the index (optional, for tracking)
-    extracted_columns <- extracted_columns %>%
-      mutate(index = i)
-    
-    # Append to the list
-    summary_results[[i]] <- extracted_columns
-  }
-  
-  # Combine all results into a single data frame
-  final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
-  
-  return(final_table)
-}
-
-# Example usage for different sample sizes
-final_table_100_1 <- generate_final_table(results_100_1, 100)
-final_table_100_2 <- generate_final_table(results_100_2, 100)
-
-final_table_200_1 <- generate_final_table(results_200_1, 200)
-final_table_200_2 <- generate_final_table(results_200_2, 200)
-
-final_table_300_1 <- generate_final_table(results_300_1, 300)
-final_table_300_2 <- generate_final_table(results_300_2, 300)
-
-final_table_400_1 <- generate_final_table(results_400_1, 400)
-final_table_400_2 <- generate_final_table(results_400_2, 400)
-```
-
-
-
-## Graphs of where the x axis is the sample size and the y axis is the empirical standard error
-
-Set the preliminary observed lambda as the median incidence rate across the four regions.
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# Function to calculate metrics for each table
-calculate_metrics <- function(data, sample_size) {
-  empirical_se <- sd(data$incidence.rate)
-  
-  
-  # Return a data frame with results
-  data.frame(
-    sample_size = sample_size,
-    empirical_se = empirical_se
-    
-  )
-}
-
-# Apply the function to each table
-metrics_100_1 <- calculate_metrics(final_table_100_1, 100)
-metrics_100_2 <- calculate_metrics(final_table_100_2, 100)
-
-metrics_200_1 <- calculate_metrics(final_table_200_1, 200)
-metrics_200_2 <- calculate_metrics(final_table_200_2, 200)
-
-metrics_300_1 <- calculate_metrics(final_table_300_1, 300)
-metrics_300_2 <- calculate_metrics(final_table_300_2, 300)
-
-metrics_400_1 <- calculate_metrics(final_table_400_1, 400)
-metrics_400_2 <- calculate_metrics(final_table_400_2, 400)
-
-
-# Combine the results into a single summary table
-summary_metrics_1 <- bind_rows(metrics_100_1, metrics_200_1, metrics_300_1,
-                               metrics_400_1)
-
-
-summary_metrics_2 <- bind_rows(metrics_100_2, metrics_200_2, metrics_300_2,
-                               metrics_400_2)
-
-# Add a column to distinguish the datasets
-summary_metrics_1 <- summary_metrics_1 %>%
-  mutate(Age_Group = "Age 0-2")
-
-summary_metrics_2 <- summary_metrics_2 %>%
-  mutate(Age_Group = "Age 2-5")
-
-# Combine both datasets into one
-summary_metrics_combined <- bind_rows(summary_metrics_1, summary_metrics_2)
-
-# Plot with color to differentiate age groups
-ggplot(summary_metrics_combined, aes(x = sample_size, y = empirical_se, color = Age_Group)) +
-  geom_line() +
-  geom_point() +
-  labs(
-    title = "Empirical Standard Error vs. Sample Size",
-    x = "Sample Size",
-    y = "Empirical Standard Error",
-    color = "Age Group"
-  ) +
-  theme_minimal()
-```
-
-\newpage
-
-## Table
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-summary_metrics_combined %>%
-  kable()
-```
\ No newline at end of file
diff --git a/sim_hpc_sf2a.qmd b/sim_hpc_sf2a.qmd
deleted file mode 100644
index d586d9f..0000000
--- a/sim_hpc_sf2a.qmd
+++ /dev/null
@@ -1,401 +0,0 @@
----
-title: "Simulation with the Smallest Lambda for Sf2a IgG"
-format: 
-  pdf:
-    number-sections: true
-    number-depth: 2
-    number-offset: [0, 0]
-    pdf-engine: pdflatex  # <-- Use pdflatex instead of xelatex or lualatex
-editor: visual
-editor_options: 
-  chunk_output_type: console
----
-
-```{r setup, include=FALSE,echo=FALSE}
-library(knitr)
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-```{r, echo=FALSE, message=FALSE}
-library(tibble)
-library(dplyr)
-library(serocalculator)
-library(haven)
-library(knitr)
-library(plotly)
-library(kableExtra)
-library(tidyr)
-library(arsenal)
-library(forcats)
-library(huxtable)
-library(magrittr)
-library(parameters)
-library(ggplot2)
-library(ggeasy)
-library(scales)
-library(plotly)
-library(patchwork)
-library(tidyverse)
-library(readxl)
-library(purrr)
-library(here)
-library(table1)
-library(furrr)
-library(future)
-```
-
-
-# Vary simulation lambdas across the range in the real cross-sectional data
-
-## 1. Estimate the shigella incidence rate using the real cross-sectional Shigella data, 
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Load data
-load("~/sf2a.RData")
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Set noise 
-create_noise_df <- function(country) {
-  noise_df <- tibble(
-    antigen_iso = c("IgG"),
-    Country     = factor(c(country)),  # Use input argument for Country
-    y.low       = c(25),
-    eps         = c(0.25),
-    nu          = c(0.5),
-    y.high      = c(200000)
-  )
-  
-  return(noise_df)
-}
-
-noise_df_USA <- create_noise_df("MA USA")
-noise_df_Niger <- create_noise_df("Niger")
-noise_df_Sierra <- create_noise_df("Sierra Leone")
-noise_df_Ghana <- create_noise_df("Ghana")
-```
-
-## 2. Run simulations using: 
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# MA USA
-est_USA <- est.incidence(
-  pop_data = df_xs_USA_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_USA,
-  antigen_isos = c("IgG"),
-)
-
-# Ghana
-est_Ghana <- est.incidence(
-  pop_data = df_xs_Ghana_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Ghana,
-  antigen_isos = c("IgG"),
-)
-
-# Niger
-est_Niger <- est.incidence(
-  pop_data = df_xs_Niger_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Niger,
-  antigen_isos = c("IgG"),
-)
-
-# Sierra Leone
-est_Sierra <- est.incidence(
-  pop_data = df_xs_Sierra_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Sierra,
-  antigen_isos = c("IgG"),
-)
-
-# create table of incidence.rate of each region
-
-create_incidence_table <- function(...) {
-  # Capture input objects and their names
-  est_list <- list(...)
-  country_names <- names(est_list)
-  
-  # Extract incidence.rate from the summary() of each estimate object
-  incidence_rates <- sapply(est_list, function(x) summary(x)$incidence.rate)
-  
-  # Create a tidy tibble
-  incidence_table <- tibble(
-    Country = country_names,
-    Incidence_Rate = incidence_rates
-  )
-  
-  return(incidence_table)
-}
-# Example usage with four different country estimates:
-incidence_summary <- create_incidence_table(
-  USA = est_USA,
-  Ghana = est_Ghana,
-  Niger = est_Niger,
-  Sierra_Leone = est_Sierra
-)
-
-# Display the summary table
-print(incidence_summary)
-
-# 1) The median of the incidence rate estimates from step 1.
-# Get incidence rate estimates from each region (4 regions) and do median
-med_sf2a_IgG<-median(incidence_summary$Incidence_Rate)
-
-#0.756
-
-# 2) 2x the maximum incidence rate estimate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the maximum one and 2x
-max2_sf2a_IgG<-2*max(incidence_summary$Incidence_Rate)
-
-# 2.058
-
-# 3) 1/2 the minimum incidence rate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the minimum one and 1/2
-min_half_sf2a_IgG<-0.5*min(incidence_summary$Incidence_Rate)
-
-#0.288
-
-# Create a data frame for the incidence rate summary
-incidence_rate_summary <- data.frame(
-  Metric = c("Median Incidence Rate", 
-             "2x Maximum Incidence Rate", 
-             "1/2 Minimum Incidence Rate"),
-  Value = c(median(incidence_summary$Incidence_Rate),  # Median of incidence rates
-            2 * max(incidence_summary$Incidence_Rate),  # 2x Maximum incidence rate
-            0.5 * min(incidence_summary$Incidence_Rate)) # 1/2 Minimum incidence rate
-)
-
-# Print the table
-print(incidence_rate_summary)
-```
-These three values are preliminary observations of lambda from sf2a_IgG.
-
-We choose a lambda that is half of the minimum lambda from the four regions.
-
-\newpage 
-
-# Simulation (300times) with the smallest lambda
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-
-# Define the simulation function
-simulate_seroincidence <- function(nrep, n_sim, observed, range = NULL) {
-  # Set parallel plan inside function to avoid issues with distributed nodes
-  plan(multicore)  # Use multiple cores for parallel processing (works best on HPC)
-  
-  # Parameters
-  dmcmc <- curve_params_shigella  # Curve parameters
-  antibodies <- c("IgG")  # Antigen-isotypes
-  lambda <- observed  # Simulated incidence rate per person-year
-  
-  # Biologic noise distribution
-  dlims <- rbind("IgG" = c(min = 0, max = 0.5))
-  
-  # Noise parameters
-  cond <- tibble(
-    antigen_iso = c("IgG"),
-    nu = c(0.5),  # Biologic noise (nu)
-    eps = c(0.25),  # Measurement noise (eps)
-    y.low = c(25),  # Low cutoff (llod)
-    y.high = c(200000)  # High cutoff (y.high)
-  )
-  
-  # Perform simulations in parallel
-  results <- future_map(1:n_sim, function(i) {
-    tryCatch({
-      # Generate cross-sectional data
-      csdata <- sim.cs(
-        curve_params = dmcmc,
-        lambda = lambda,
-        n.smpl = nrep,
-        age_range = range,
-        antigen_isos = antibodies,
-        n.mc = 0,
-        renew_params = TRUE,  # Use different parameters for each simulation
-        add.noise = TRUE,
-        noise_limits = dlims,
-        format = "long"
-      )
-      
-      # Estimate seroincidence
-      est <- est.incidence(
-        pop_data = csdata,
-        curve_params = dmcmc,
-        noise_params = cond,
-        lambda_start = 0.1,
-        build_graph = TRUE,
-        verbose = FALSE,
-        print_graph = FALSE,
-        antigen_isos = antibodies
-      )
-      
-      # Return results for this simulation
-      list(csdata = csdata, est1 = est)
-    }, error = function(e) {
-      return(list(error = e$message))  # Capture and store errors instead of stopping execution
-    })
-  }, .options = furrr_options(seed = TRUE))
-  
-  # Ensure sequential processing after function execution
-  plan(sequential)
-  
-  return(results)
-}
-
-# ------------------------ #
-# 🔹 Run Simulations in Parallel
-# ------------------------ #
-
-# Set up parallel processing (Ensure future::plan() is set before execution)
-plan(multicore)  # Works best on HPC
-
-# Define parameter sets
-sim_params <- list(
-  list(nrep = 100, range = c(0, 2)),
-  list(nrep = 100, range = c(2, 5)),
-  list(nrep = 200, range = c(0, 2)),
-  list(nrep = 200, range = c(2, 5)),
-  list(nrep = 300, range = c(0, 2)),
-  list(nrep = 300, range = c(2, 5)),
-  list(nrep = 400, range = c(0, 2)),
-  list(nrep = 400, range = c(2, 5))
-)
-
-# Run simulations in a loop to reduce redundant code
-results_list <- lapply(sim_params, function(params) {
-  cat("Running simulation for nrep =", params$nrep, "range =", params$range, "\n")
-  simulate_seroincidence(nrep = params$nrep, n_sim = 300, observed = min_half_sf2a_IgG, range = params$range)
-})
-
-# Assign results to variables
-results_100_1 <- results_list[[1]]
-results_100_2 <- results_list[[2]]
-results_200_1 <- results_list[[3]]
-results_200_2 <- results_list[[4]]
-results_300_1 <- results_list[[5]]
-results_300_2 <- results_list[[6]]
-results_400_1 <- results_list[[7]]
-results_400_2 <- results_list[[8]]
-
-# Stop parallel processing
-plan(sequential)  # Return to sequential processing after execution
-```
-
-
-## Store each sample size in table
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
-  # Initialize an empty list to store the results
-  summary_results <- list()
-  
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:300) {
-    # Extract the summary for each result
-    result_summary <- summary(results_list[[i]]$est1)
-    
-    # Select the required columns
-    extracted_columns <- result_summary %>%
-      select(incidence.rate, SE, CI.lwr, CI.upr)
-    
-    # Add a column for the index (optional, for tracking)
-    extracted_columns <- extracted_columns %>%
-      mutate(index = i)
-    
-    # Append to the list
-    summary_results[[i]] <- extracted_columns
-  }
-  
-  # Combine all results into a single data frame
-  final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
-  
-  return(final_table)
-}
-
-# Example usage for different sample sizes
-final_table_100_1 <- generate_final_table(results_100_1, 100)
-final_table_100_2 <- generate_final_table(results_100_2, 100)
-
-final_table_200_1 <- generate_final_table(results_200_1, 200)
-final_table_200_2 <- generate_final_table(results_200_2, 200)
-
-final_table_300_1 <- generate_final_table(results_300_1, 300)
-final_table_300_2 <- generate_final_table(results_300_2, 300)
-
-final_table_400_1 <- generate_final_table(results_400_1, 400)
-final_table_400_2 <- generate_final_table(results_400_2, 400)
-```
-
-
-
-## Graphs of where the x axis is the sample size and the y axis is the empirical standard error
-
-Set the preliminary observed lambda as half of the minimum incidence rate from the four regions.
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# Function to calculate metrics for each table
-calculate_metrics <- function(data, sample_size) {
-  empirical_se <- sd(data$incidence.rate)
-  
-  
-  # Return a data frame with results
-  data.frame(
-    sample_size = sample_size,
-    empirical_se = empirical_se
-    
-  )
-}
-
-# Apply the function to each table
-metrics_100_1 <- calculate_metrics(final_table_100_1, 100)
-metrics_100_2 <- calculate_metrics(final_table_100_2, 100)
-
-metrics_200_1 <- calculate_metrics(final_table_200_1, 200)
-metrics_200_2 <- calculate_metrics(final_table_200_2, 200)
-
-metrics_300_1 <- calculate_metrics(final_table_300_1, 300)
-metrics_300_2 <- calculate_metrics(final_table_300_2, 300)
-
-metrics_400_1 <- calculate_metrics(final_table_400_1, 400)
-metrics_400_2 <- calculate_metrics(final_table_400_2, 400)
-
-
-# Combine the results into a single summary table
-summary_metrics_1 <- bind_rows(metrics_100_1, metrics_200_1, metrics_300_1,
-                               metrics_400_1)
-
-
-summary_metrics_2 <- bind_rows(metrics_100_2, metrics_200_2, metrics_300_2,
-                               metrics_400_2)
-
-# Add a column to distinguish the datasets
-summary_metrics_1 <- summary_metrics_1 %>%
-  mutate(Age_Group = "Age 0-2")
-
-summary_metrics_2 <- summary_metrics_2 %>%
-  mutate(Age_Group = "Age 2-5")
-
-# Combine both datasets into one
-summary_metrics_combined <- bind_rows(summary_metrics_1, summary_metrics_2)
-
-# Plot with color to differentiate age groups
-ggplot(summary_metrics_combined, aes(x = sample_size, y = empirical_se, color = Age_Group)) +
-  geom_line() +
-  geom_point() +
-  labs(
-    title = "Empirical Standard Error vs. Sample Size",
-    x = "Sample Size",
-    y = "Empirical Standard Error",
-    color = "Age Group"
-  ) +
-  theme_minimal()
-```
-
-\newpage
-
-## Table
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-summary_metrics_combined %>%
-  kable()
-```
\ No newline at end of file
diff --git a/sim_hpc_sf2a_med.qmd b/sim_hpc_sf2a_med.qmd
deleted file mode 100644
index f808dfb..0000000
--- a/sim_hpc_sf2a_med.qmd
+++ /dev/null
@@ -1,401 +0,0 @@
----
-title: "Simulation with the Median Lambda for Sf2a IgG"
-format: 
-  pdf:
-    number-sections: true
-    number-depth: 2
-    number-offset: [0, 0]
-    pdf-engine: pdflatex  # <-- Use pdflatex instead of xelatex or lualatex
-editor: visual
-editor_options: 
-  chunk_output_type: console
----
-
-```{r setup, include=FALSE,echo=FALSE}
-library(knitr)
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-```{r, echo=FALSE, message=FALSE}
-library(tibble)
-library(dplyr)
-library(serocalculator)
-library(haven)
-library(knitr)
-library(plotly)
-library(kableExtra)
-library(tidyr)
-library(arsenal)
-library(forcats)
-library(huxtable)
-library(magrittr)
-library(parameters)
-library(ggplot2)
-library(ggeasy)
-library(scales)
-library(plotly)
-library(patchwork)
-library(tidyverse)
-library(readxl)
-library(purrr)
-library(here)
-library(table1)
-library(furrr)
-library(future)
-```
-
-
-# Vary simulation lambdas across the range in the real cross-sectional data
-
-## 1. Estimate the shigella incidence rate using the real cross-sectional Shigella data, 
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Load data
-load("~/sf2a.RData")
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Set noise 
-create_noise_df <- function(country) {
-  noise_df <- tibble(
-    antigen_iso = c("IgG"),
-    Country     = factor(c(country)),  # Use input argument for Country
-    y.low       = c(25),
-    eps         = c(0.25),
-    nu          = c(0.5),
-    y.high      = c(200000)
-  )
-  
-  return(noise_df)
-}
-
-noise_df_USA <- create_noise_df("MA USA")
-noise_df_Niger <- create_noise_df("Niger")
-noise_df_Sierra <- create_noise_df("Sierra Leone")
-noise_df_Ghana <- create_noise_df("Ghana")
-```
-
-## 2. Run simulations using: 
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# MA USA
-est_USA <- est.incidence(
-  pop_data = df_xs_USA_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_USA,
-  antigen_isos = c("IgG"),
-)
-
-# Ghana
-est_Ghana <- est.incidence(
-  pop_data = df_xs_Ghana_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Ghana,
-  antigen_isos = c("IgG"),
-)
-
-# Niger
-est_Niger <- est.incidence(
-  pop_data = df_xs_Niger_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Niger,
-  antigen_isos = c("IgG"),
-)
-
-# Sierra Leone
-est_Sierra <- est.incidence(
-  pop_data = df_xs_Sierra_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Sierra,
-  antigen_isos = c("IgG"),
-)
-
-# create table of incidence.rate of each region
-
-create_incidence_table <- function(...) {
-  # Capture input objects and their names
-  est_list <- list(...)
-  country_names <- names(est_list)
-  
-  # Extract incidence.rate from the summary() of each estimate object
-  incidence_rates <- sapply(est_list, function(x) summary(x)$incidence.rate)
-  
-  # Create a tidy tibble
-  incidence_table <- tibble(
-    Country = country_names,
-    Incidence_Rate = incidence_rates
-  )
-  
-  return(incidence_table)
-}
-# Example usage with four different country estimates:
-incidence_summary <- create_incidence_table(
-  USA = est_USA,
-  Ghana = est_Ghana,
-  Niger = est_Niger,
-  Sierra_Leone = est_Sierra
-)
-
-# Display the summary table
-print(incidence_summary)
-
-# 1) The median of the incidence rate estimates from step 1.
-# Get incidence rate estimates from each region (4 regions) and do median
-med_sf2a_IgG<-median(incidence_summary$Incidence_Rate)
-
-#0.756
-
-# 2) 2x the maximum incidence rate estimate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the maximum one and 2x
-max2_sf2a_IgG<-2*max(incidence_summary$Incidence_Rate)
-
-# 2.058
-
-# 3) 1/2 the minimum incidence rate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the minimum one and 1/2
-min_half_sf2a_IgG<-0.5*min(incidence_summary$Incidence_Rate)
-
-#0.288
-
-# Create a data frame for the incidence rate summary
-incidence_rate_summary <- data.frame(
-  Metric = c("Median Incidence Rate", 
-             "2x Maximum Incidence Rate", 
-             "1/2 Minimum Incidence Rate"),
-  Value = c(median(incidence_summary$Incidence_Rate),  # Median of incidence rates
-            2 * max(incidence_summary$Incidence_Rate),  # 2x Maximum incidence rate
-            0.5 * min(incidence_summary$Incidence_Rate)) # 1/2 Minimum incidence rate
-)
-
-# Print the table
-print(incidence_rate_summary)
-```
-These three values are preliminary observations of lambda from sf2a_IgG.
-
-We choose a lambda that is the median of the four regions.
-
-\newpage 
-
-# Simulation (300times) with the smallest lambda
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-
-# Define the simulation function
-simulate_seroincidence <- function(nrep, n_sim, observed, range = NULL) {
-  # Set parallel plan inside function to avoid issues with distributed nodes
-  plan(multicore)  # Use multiple cores for parallel processing (works best on HPC)
-  
-  # Parameters
-  dmcmc <- curve_params_shigella  # Curve parameters
-  antibodies <- c("IgG")  # Antigen-isotypes
-  lambda <- observed  # Simulated incidence rate per person-year
-  
-  # Biologic noise distribution
-  dlims <- rbind("IgG" = c(min = 0, max = 0.5))
-  
-  # Noise parameters
-  cond <- tibble(
-    antigen_iso = c("IgG"),
-    nu = c(0.5),  # Biologic noise (nu)
-    eps = c(0.25),  # Measurement noise (eps)
-    y.low = c(25),  # Low cutoff (llod)
-    y.high = c(200000)  # High cutoff (y.high)
-  )
-  
-  # Perform simulations in parallel
-  results <- future_map(1:n_sim, function(i) {
-    tryCatch({
-      # Generate cross-sectional data
-      csdata <- sim.cs(
-        curve_params = dmcmc,
-        lambda = lambda,
-        n.smpl = nrep,
-        age_range = range,
-        antigen_isos = antibodies,
-        n.mc = 0,
-        renew_params = TRUE,  # Use different parameters for each simulation
-        add.noise = TRUE,
-        noise_limits = dlims,
-        format = "long"
-      )
-      
-      # Estimate seroincidence
-      est <- est.incidence(
-        pop_data = csdata,
-        curve_params = dmcmc,
-        noise_params = cond,
-        lambda_start = 0.1,
-        build_graph = TRUE,
-        verbose = FALSE,
-        print_graph = FALSE,
-        antigen_isos = antibodies
-      )
-      
-      # Return results for this simulation
-      list(csdata = csdata, est1 = est)
-    }, error = function(e) {
-      return(list(error = e$message))  # Capture and store errors instead of stopping execution
-    })
-  }, .options = furrr_options(seed = TRUE))
-  
-  # Ensure sequential processing after function execution
-  plan(sequential)
-  
-  return(results)
-}
-
-# ------------------------ #
-# 🔹 Run Simulations in Parallel
-# ------------------------ #
-
-# Set up parallel processing (Ensure future::plan() is set before execution)
-plan(multicore)  # Works best on HPC
-
-# Define parameter sets
-sim_params <- list(
-  list(nrep = 100, range = c(0, 2)),
-  list(nrep = 100, range = c(2, 5)),
-  list(nrep = 200, range = c(0, 2)),
-  list(nrep = 200, range = c(2, 5)),
-  list(nrep = 300, range = c(0, 2)),
-  list(nrep = 300, range = c(2, 5)),
-  list(nrep = 400, range = c(0, 2)),
-  list(nrep = 400, range = c(2, 5))
-)
-
-# Run simulations in a loop to reduce redundant code
-results_list <- lapply(sim_params, function(params) {
-  cat("Running simulation for nrep =", params$nrep, "range =", params$range, "\n")
-  simulate_seroincidence(nrep = params$nrep, n_sim = 300, observed = med_sf2a_IgG, range = params$range)
-})
-
-# Assign results to variables
-results_100_1 <- results_list[[1]]
-results_100_2 <- results_list[[2]]
-results_200_1 <- results_list[[3]]
-results_200_2 <- results_list[[4]]
-results_300_1 <- results_list[[5]]
-results_300_2 <- results_list[[6]]
-results_400_1 <- results_list[[7]]
-results_400_2 <- results_list[[8]]
-
-# Stop parallel processing
-plan(sequential)  # Return to sequential processing after execution
-```
-
-
-## Store each sample size in table
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
-  # Initialize an empty list to store the results
-  summary_results <- list()
-  
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:300) {
-    # Extract the summary for each result
-    result_summary <- summary(results_list[[i]]$est1)
-    
-    # Select the required columns
-    extracted_columns <- result_summary %>%
-      select(incidence.rate, SE, CI.lwr, CI.upr)
-    
-    # Add a column for the index (optional, for tracking)
-    extracted_columns <- extracted_columns %>%
-      mutate(index = i)
-    
-    # Append to the list
-    summary_results[[i]] <- extracted_columns
-  }
-  
-  # Combine all results into a single data frame
-  final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
-  
-  return(final_table)
-}
-
-# Example usage for different sample sizes
-final_table_100_1 <- generate_final_table(results_100_1, 100)
-final_table_100_2 <- generate_final_table(results_100_2, 100)
-
-final_table_200_1 <- generate_final_table(results_200_1, 200)
-final_table_200_2 <- generate_final_table(results_200_2, 200)
-
-final_table_300_1 <- generate_final_table(results_300_1, 300)
-final_table_300_2 <- generate_final_table(results_300_2, 300)
-
-final_table_400_1 <- generate_final_table(results_400_1, 400)
-final_table_400_2 <- generate_final_table(results_400_2, 400)
-```
-
-
-
-## Graphs of where the x axis is the sample size and the y axis is the empirical standard error
-
-Set the preliminary observed lambda as the median incidence rate across the four regions.
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# Function to calculate metrics for each table
-calculate_metrics <- function(data, sample_size) {
-  empirical_se <- sd(data$incidence.rate)
-  
-  
-  # Return a data frame with results
-  data.frame(
-    sample_size = sample_size,
-    empirical_se = empirical_se
-    
-  )
-}
-
-# Apply the function to each table
-metrics_100_1 <- calculate_metrics(final_table_100_1, 100)
-metrics_100_2 <- calculate_metrics(final_table_100_2, 100)
-
-metrics_200_1 <- calculate_metrics(final_table_200_1, 200)
-metrics_200_2 <- calculate_metrics(final_table_200_2, 200)
-
-metrics_300_1 <- calculate_metrics(final_table_300_1, 300)
-metrics_300_2 <- calculate_metrics(final_table_300_2, 300)
-
-metrics_400_1 <- calculate_metrics(final_table_400_1, 400)
-metrics_400_2 <- calculate_metrics(final_table_400_2, 400)
-
-
-# Combine the results into a single summary table
-summary_metrics_1 <- bind_rows(metrics_100_1, metrics_200_1, metrics_300_1,
-                               metrics_400_1)
-
-
-summary_metrics_2 <- bind_rows(metrics_100_2, metrics_200_2, metrics_300_2,
-                               metrics_400_2)
-
-# Add a column to distinguish the datasets
-summary_metrics_1 <- summary_metrics_1 %>%
-  mutate(Age_Group = "Age 0-2")
-
-summary_metrics_2 <- summary_metrics_2 %>%
-  mutate(Age_Group = "Age 2-5")
-
-# Combine both datasets into one
-summary_metrics_combined <- bind_rows(summary_metrics_1, summary_metrics_2)
-
-# Plot with color to differentiate age groups
-ggplot(summary_metrics_combined, aes(x = sample_size, y = empirical_se, color = Age_Group)) +
-  geom_line() +
-  geom_point() +
-  labs(
-    title = "Empirical Standard Error vs. Sample Size",
-    x = "Sample Size",
-    y = "Empirical Standard Error",
-    color = "Age Group"
-  ) +
-  theme_minimal()
-```
-
-\newpage
-
-## Table
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-summary_metrics_combined %>%
-  kable()
-```
\ No newline at end of file
diff --git a/sim_hpc_sf3a.qmd b/sim_hpc_sf3a.qmd
deleted file mode 100644
index cf6f3f3..0000000
--- a/sim_hpc_sf3a.qmd
+++ /dev/null
@@ -1,401 +0,0 @@
----
-title: "Simulation with the Smallest Lambda for Sf3a IgG"
-format: 
-  pdf:
-    number-sections: true
-    number-depth: 2
-    number-offset: [0, 0]
-    pdf-engine: pdflatex  # <-- Use pdflatex instead of xelatex or lualatex
-editor: visual
-editor_options: 
-  chunk_output_type: console
----
-
-```{r setup, include=FALSE,echo=FALSE}
-library(knitr)
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-```{r, echo=FALSE, message=FALSE}
-library(tibble)
-library(dplyr)
-library(serocalculator)
-library(haven)
-library(knitr)
-library(plotly)
-library(kableExtra)
-library(tidyr)
-library(arsenal)
-library(forcats)
-library(huxtable)
-library(magrittr)
-library(parameters)
-library(ggplot2)
-library(ggeasy)
-library(scales)
-library(plotly)
-library(patchwork)
-library(tidyverse)
-library(readxl)
-library(purrr)
-library(here)
-library(table1)
-library(furrr)
-library(future)
-```
-
-
-# Vary simulation lambdas across the range in the real cross-sectional data
-
-## 1. Estimate the shigella incidence rate using the real cross-sectional Shigella data, 
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Load data
-load("~/sf3a.RData")
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Set noise 
-create_noise_df <- function(country) {
-  noise_df <- tibble(
-    antigen_iso = c("IgG"),
-    Country     = factor(c(country)),  # Use input argument for Country
-    y.low       = c(25),
-    eps         = c(0.25),
-    nu          = c(0.5),
-    y.high      = c(200000)
-  )
-  
-  return(noise_df)
-}
-
-noise_df_USA <- create_noise_df("MA USA")
-noise_df_Niger <- create_noise_df("Niger")
-noise_df_Sierra <- create_noise_df("Sierra Leone")
-noise_df_Ghana <- create_noise_df("Ghana")
-```
-
-## 2. Run simulations using: 
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# MA USA
-est_USA <- est.incidence(
-  pop_data = df_xs_USA_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_USA,
-  antigen_isos = c("IgG"),
-)
-
-# Ghana
-est_Ghana <- est.incidence(
-  pop_data = df_xs_Ghana_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Ghana,
-  antigen_isos = c("IgG"),
-)
-
-# Niger
-est_Niger <- est.incidence(
-  pop_data = df_xs_Niger_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Niger,
-  antigen_isos = c("IgG"),
-)
-
-# Sierra Leone
-est_Sierra <- est.incidence(
-  pop_data = df_xs_Sierra_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Sierra,
-  antigen_isos = c("IgG"),
-)
-
-# create table of incidence.rate of each region
-
-create_incidence_table <- function(...) {
-  # Capture input objects and their names
-  est_list <- list(...)
-  country_names <- names(est_list)
-  
-  # Extract incidence.rate from the summary() of each estimate object
-  incidence_rates <- sapply(est_list, function(x) summary(x)$incidence.rate)
-  
-  # Create a tidy tibble
-  incidence_table <- tibble(
-    Country = country_names,
-    Incidence_Rate = incidence_rates
-  )
-  
-  return(incidence_table)
-}
-# Example usage with four different country estimates:
-incidence_summary <- create_incidence_table(
-  USA = est_USA,
-  Ghana = est_Ghana,
-  Niger = est_Niger,
-  Sierra_Leone = est_Sierra
-)
-
-# Display the summary table
-print(incidence_summary)
-
-# 1) The median of the incidence rate estimates from step 1.
-# Get incidence rate estimates from each region (4 regions) and do median
-med_sf3a_IgG<-median(incidence_summary$Incidence_Rate)
-
-#0.756
-
-# 2) 2x the maximum incidence rate estimate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the maximum one and 2x
-max2_sf3a_IgG<-2*max(incidence_summary$Incidence_Rate)
-
-# 2.058
-
-# 3) 1/2 the minimum incidence rate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the minimum one and 1/2
-min_half_sf3a_IgG<-0.5*min(incidence_summary$Incidence_Rate)
-
-#0.288
-
-# Create a data frame for the incidence rate summary
-incidence_rate_summary <- data.frame(
-  Metric = c("Median Incidence Rate", 
-             "2x Maximum Incidence Rate", 
-             "1/2 Minimum Incidence Rate"),
-  Value = c(median(incidence_summary$Incidence_Rate),  # Median of incidence rates
-            2 * max(incidence_summary$Incidence_Rate),  # 2x Maximum incidence rate
-            0.5 * min(incidence_summary$Incidence_Rate)) # 1/2 Minimum incidence rate
-)
-
-# Print the table
-print(incidence_rate_summary)
-```
-These three values are preliminary observations of lambda from sf3a_IgG.
-
-We choose a lambda that is half of the minimum lambda from the four regions.
-
-\newpage 
-
-# Simulation (300times) with the smallest lambda
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-
-# Define the simulation function
-simulate_seroincidence <- function(nrep, n_sim, observed, range = NULL) {
-  # Set parallel plan inside function to avoid issues with distributed nodes
-  plan(multicore)  # Use multiple cores for parallel processing (works best on HPC)
-  
-  # Parameters
-  dmcmc <- curve_params_shigella  # Curve parameters
-  antibodies <- c("IgG")  # Antigen-isotypes
-  lambda <- observed  # Simulated incidence rate per person-year
-  
-  # Biologic noise distribution
-  dlims <- rbind("IgG" = c(min = 0, max = 0.5))
-  
-  # Noise parameters
-  cond <- tibble(
-    antigen_iso = c("IgG"),
-    nu = c(0.5),  # Biologic noise (nu)
-    eps = c(0.25),  # Measurement noise (eps)
-    y.low = c(25),  # Low cutoff (llod)
-    y.high = c(200000)  # High cutoff (y.high)
-  )
-  
-  # Perform simulations in parallel
-  results <- future_map(1:n_sim, function(i) {
-    tryCatch({
-      # Generate cross-sectional data
-      csdata <- sim.cs(
-        curve_params = dmcmc,
-        lambda = lambda,
-        n.smpl = nrep,
-        age_range = range,
-        antigen_isos = antibodies,
-        n.mc = 0,
-        renew_params = TRUE,  # Use different parameters for each simulation
-        add.noise = TRUE,
-        noise_limits = dlims,
-        format = "long"
-      )
-      
-      # Estimate seroincidence
-      est <- est.incidence(
-        pop_data = csdata,
-        curve_params = dmcmc,
-        noise_params = cond,
-        lambda_start = 0.1,
-        build_graph = TRUE,
-        verbose = FALSE,
-        print_graph = FALSE,
-        antigen_isos = antibodies
-      )
-      
-      # Return results for this simulation
-      list(csdata = csdata, est1 = est)
-    }, error = function(e) {
-      return(list(error = e$message))  # Capture and store errors instead of stopping execution
-    })
-  }, .options = furrr_options(seed = TRUE))
-  
-  # Ensure sequential processing after function execution
-  plan(sequential)
-  
-  return(results)
-}
-
-# ------------------------ #
-# 🔹 Run Simulations in Parallel
-# ------------------------ #
-
-# Set up parallel processing (Ensure future::plan() is set before execution)
-plan(multicore)  # Works best on HPC
-
-# Define parameter sets
-sim_params <- list(
-  list(nrep = 100, range = c(0, 2)),
-  list(nrep = 100, range = c(2, 5)),
-  list(nrep = 200, range = c(0, 2)),
-  list(nrep = 200, range = c(2, 5)),
-  list(nrep = 300, range = c(0, 2)),
-  list(nrep = 300, range = c(2, 5)),
-  list(nrep = 400, range = c(0, 2)),
-  list(nrep = 400, range = c(2, 5))
-)
-
-# Run simulations in a loop to reduce redundant code
-results_list <- lapply(sim_params, function(params) {
-  cat("Running simulation for nrep =", params$nrep, "range =", params$range, "\n")
-  simulate_seroincidence(nrep = params$nrep, n_sim = 300, observed = min_half_sf3a_IgG, range = params$range)
-})
-
-# Assign results to variables
-results_100_1 <- results_list[[1]]
-results_100_2 <- results_list[[2]]
-results_200_1 <- results_list[[3]]
-results_200_2 <- results_list[[4]]
-results_300_1 <- results_list[[5]]
-results_300_2 <- results_list[[6]]
-results_400_1 <- results_list[[7]]
-results_400_2 <- results_list[[8]]
-
-# Stop parallel processing
-plan(sequential)  # Return to sequential processing after execution
-```
-
-
-## Store each sample size in table
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
-  # Initialize an empty list to store the results
-  summary_results <- list()
-  
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:300) {
-    # Extract the summary for each result
-    result_summary <- summary(results_list[[i]]$est1)
-    
-    # Select the required columns
-    extracted_columns <- result_summary %>%
-      select(incidence.rate, SE, CI.lwr, CI.upr)
-    
-    # Add a column for the index (optional, for tracking)
-    extracted_columns <- extracted_columns %>%
-      mutate(index = i)
-    
-    # Append to the list
-    summary_results[[i]] <- extracted_columns
-  }
-  
-  # Combine all results into a single data frame
-  final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
-  
-  return(final_table)
-}
-
-# Example usage for different sample sizes
-final_table_100_1 <- generate_final_table(results_100_1, 100)
-final_table_100_2 <- generate_final_table(results_100_2, 100)
-
-final_table_200_1 <- generate_final_table(results_200_1, 200)
-final_table_200_2 <- generate_final_table(results_200_2, 200)
-
-final_table_300_1 <- generate_final_table(results_300_1, 300)
-final_table_300_2 <- generate_final_table(results_300_2, 300)
-
-final_table_400_1 <- generate_final_table(results_400_1, 400)
-final_table_400_2 <- generate_final_table(results_400_2, 400)
-```
-
-
-
-## Graphs of where the x axis is the sample size and the y axis is the empirical standard error
-
-Set the preliminary observed lambda as half of the minimum incidence rate from the four regions.
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# Function to calculate metrics for each table
-calculate_metrics <- function(data, sample_size) {
-  empirical_se <- sd(data$incidence.rate)
-  
-  
-  # Return a data frame with results
-  data.frame(
-    sample_size = sample_size,
-    empirical_se = empirical_se
-    
-  )
-}
-
-# Apply the function to each table
-metrics_100_1 <- calculate_metrics(final_table_100_1, 100)
-metrics_100_2 <- calculate_metrics(final_table_100_2, 100)
-
-metrics_200_1 <- calculate_metrics(final_table_200_1, 200)
-metrics_200_2 <- calculate_metrics(final_table_200_2, 200)
-
-metrics_300_1 <- calculate_metrics(final_table_300_1, 300)
-metrics_300_2 <- calculate_metrics(final_table_300_2, 300)
-
-metrics_400_1 <- calculate_metrics(final_table_400_1, 400)
-metrics_400_2 <- calculate_metrics(final_table_400_2, 400)
-
-
-# Combine the results into a single summary table
-summary_metrics_1 <- bind_rows(metrics_100_1, metrics_200_1, metrics_300_1,
-                               metrics_400_1)
-
-
-summary_metrics_2 <- bind_rows(metrics_100_2, metrics_200_2, metrics_300_2,
-                               metrics_400_2)
-
-# Add a column to distinguish the datasets
-summary_metrics_1 <- summary_metrics_1 %>%
-  mutate(Age_Group = "Age 0-2")
-
-summary_metrics_2 <- summary_metrics_2 %>%
-  mutate(Age_Group = "Age 2-5")
-
-# Combine both datasets into one
-summary_metrics_combined <- bind_rows(summary_metrics_1, summary_metrics_2)
-
-# Plot with color to differentiate age groups
-ggplot(summary_metrics_combined, aes(x = sample_size, y = empirical_se, color = Age_Group)) +
-  geom_line() +
-  geom_point() +
-  labs(
-    title = "Empirical Standard Error vs. Sample Size",
-    x = "Sample Size",
-    y = "Empirical Standard Error",
-    color = "Age Group"
-  ) +
-  theme_minimal()
-```
-
-\newpage
-
-## Table
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-summary_metrics_combined %>%
-  kable()
-```
\ No newline at end of file
diff --git a/sim_hpc_sf3a_med.qmd b/sim_hpc_sf3a_med.qmd
deleted file mode 100644
index f7c0c71..0000000
--- a/sim_hpc_sf3a_med.qmd
+++ /dev/null
@@ -1,401 +0,0 @@
----
-title: "Simulation with the Median Lambda for Sf3a IgG"
-format: 
-  pdf:
-    number-sections: true
-    number-depth: 2
-    number-offset: [0, 0]
-    pdf-engine: pdflatex  # <-- Use pdflatex instead of xelatex or lualatex
-editor: visual
-editor_options: 
-  chunk_output_type: console
----
-
-```{r setup, include=FALSE,echo=FALSE}
-library(knitr)
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-```{r, echo=FALSE, message=FALSE}
-library(tibble)
-library(dplyr)
-library(serocalculator)
-library(haven)
-library(knitr)
-library(plotly)
-library(kableExtra)
-library(tidyr)
-library(arsenal)
-library(forcats)
-library(huxtable)
-library(magrittr)
-library(parameters)
-library(ggplot2)
-library(ggeasy)
-library(scales)
-library(plotly)
-library(patchwork)
-library(tidyverse)
-library(readxl)
-library(purrr)
-library(here)
-library(table1)
-library(furrr)
-library(future)
-```
-
-
-# Vary simulation lambdas across the range in the real cross-sectional data
-
-## 1. Estimate the shigella incidence rate using the real cross-sectional Shigella data, 
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Load data
-load("~/sf3a.RData")
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Set noise 
-create_noise_df <- function(country) {
-  noise_df <- tibble(
-    antigen_iso = c("IgG"),
-    Country     = factor(c(country)),  # Use input argument for Country
-    y.low       = c(25),
-    eps         = c(0.25),
-    nu          = c(0.5),
-    y.high      = c(200000)
-  )
-  
-  return(noise_df)
-}
-
-noise_df_USA <- create_noise_df("MA USA")
-noise_df_Niger <- create_noise_df("Niger")
-noise_df_Sierra <- create_noise_df("Sierra Leone")
-noise_df_Ghana <- create_noise_df("Ghana")
-```
-
-## 2. Run simulations using: 
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# MA USA
-est_USA <- est.incidence(
-  pop_data = df_xs_USA_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_USA,
-  antigen_isos = c("IgG"),
-)
-
-# Ghana
-est_Ghana <- est.incidence(
-  pop_data = df_xs_Ghana_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Ghana,
-  antigen_isos = c("IgG"),
-)
-
-# Niger
-est_Niger <- est.incidence(
-  pop_data = df_xs_Niger_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Niger,
-  antigen_isos = c("IgG"),
-)
-
-# Sierra Leone
-est_Sierra <- est.incidence(
-  pop_data = df_xs_Sierra_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Sierra,
-  antigen_isos = c("IgG"),
-)
-
-# create table of incidence.rate of each region
-
-create_incidence_table <- function(...) {
-  # Capture input objects and their names
-  est_list <- list(...)
-  country_names <- names(est_list)
-  
-  # Extract incidence.rate from the summary() of each estimate object
-  incidence_rates <- sapply(est_list, function(x) summary(x)$incidence.rate)
-  
-  # Create a tidy tibble
-  incidence_table <- tibble(
-    Country = country_names,
-    Incidence_Rate = incidence_rates
-  )
-  
-  return(incidence_table)
-}
-# Example usage with four different country estimates:
-incidence_summary <- create_incidence_table(
-  USA = est_USA,
-  Ghana = est_Ghana,
-  Niger = est_Niger,
-  Sierra_Leone = est_Sierra
-)
-
-# Display the summary table
-print(incidence_summary)
-
-# 1) The median of the incidence rate estimates from step 1.
-# Get incidence rate estimates from each region (4 regions) and do median
-med_sf3a_IgG<-median(incidence_summary$Incidence_Rate)
-
-#0.756
-
-# 2) 2x the maximum incidence rate estimate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the maximum one and 2x
-max2_sf3a_IgG<-2*max(incidence_summary$Incidence_Rate)
-
-# 2.058
-
-# 3) 1/2 the minimum incidence rate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the minimum one and 1/2
-min_half_sf3a_IgG<-0.5*min(incidence_summary$Incidence_Rate)
-
-#0.288
-
-# Create a data frame for the incidence rate summary
-incidence_rate_summary <- data.frame(
-  Metric = c("Median Incidence Rate", 
-             "2x Maximum Incidence Rate", 
-             "1/2 Minimum Incidence Rate"),
-  Value = c(median(incidence_summary$Incidence_Rate),  # Median of incidence rates
-            2 * max(incidence_summary$Incidence_Rate),  # 2x Maximum incidence rate
-            0.5 * min(incidence_summary$Incidence_Rate)) # 1/2 Minimum incidence rate
-)
-
-# Print the table
-print(incidence_rate_summary)
-```
-These three values are preliminary observations of lambda from sf3a_IgG.
-
-We choose a lambda that is the median of the four regions.
-
-\newpage 
-
-# Simulation (300times) with the smallest lambda
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-
-# Define the simulation function
-simulate_seroincidence <- function(nrep, n_sim, observed, range = NULL) {
-  # Set parallel plan inside function to avoid issues with distributed nodes
-  plan(multicore)  # Use multiple cores for parallel processing (works best on HPC)
-  
-  # Parameters
-  dmcmc <- curve_params_shigella  # Curve parameters
-  antibodies <- c("IgG")  # Antigen-isotypes
-  lambda <- observed  # Simulated incidence rate per person-year
-  
-  # Biologic noise distribution
-  dlims <- rbind("IgG" = c(min = 0, max = 0.5))
-  
-  # Noise parameters
-  cond <- tibble(
-    antigen_iso = c("IgG"),
-    nu = c(0.5),  # Biologic noise (nu)
-    eps = c(0.25),  # Measurement noise (eps)
-    y.low = c(25),  # Low cutoff (llod)
-    y.high = c(200000)  # High cutoff (y.high)
-  )
-  
-  # Perform simulations in parallel
-  results <- future_map(1:n_sim, function(i) {
-    tryCatch({
-      # Generate cross-sectional data
-      csdata <- sim.cs(
-        curve_params = dmcmc,
-        lambda = lambda,
-        n.smpl = nrep,
-        age_range = range,
-        antigen_isos = antibodies,
-        n.mc = 0,
-        renew_params = TRUE,  # Use different parameters for each simulation
-        add.noise = TRUE,
-        noise_limits = dlims,
-        format = "long"
-      )
-      
-      # Estimate seroincidence
-      est <- est.incidence(
-        pop_data = csdata,
-        curve_params = dmcmc,
-        noise_params = cond,
-        lambda_start = 0.1,
-        build_graph = TRUE,
-        verbose = FALSE,
-        print_graph = FALSE,
-        antigen_isos = antibodies
-      )
-      
-      # Return results for this simulation
-      list(csdata = csdata, est1 = est)
-    }, error = function(e) {
-      return(list(error = e$message))  # Capture and store errors instead of stopping execution
-    })
-  }, .options = furrr_options(seed = TRUE))
-  
-  # Ensure sequential processing after function execution
-  plan(sequential)
-  
-  return(results)
-}
-
-# ------------------------ #
-# 🔹 Run Simulations in Parallel
-# ------------------------ #
-
-# Set up parallel processing (Ensure future::plan() is set before execution)
-plan(multicore)  # Works best on HPC
-
-# Define parameter sets
-sim_params <- list(
-  list(nrep = 100, range = c(0, 2)),
-  list(nrep = 100, range = c(2, 5)),
-  list(nrep = 200, range = c(0, 2)),
-  list(nrep = 200, range = c(2, 5)),
-  list(nrep = 300, range = c(0, 2)),
-  list(nrep = 300, range = c(2, 5)),
-  list(nrep = 400, range = c(0, 2)),
-  list(nrep = 400, range = c(2, 5))
-)
-
-# Run simulations in a loop to reduce redundant code
-results_list <- lapply(sim_params, function(params) {
-  cat("Running simulation for nrep =", params$nrep, "range =", params$range, "\n")
-  simulate_seroincidence(nrep = params$nrep, n_sim = 300, observed = med_sf3a_IgG, range = params$range)
-})
-
-# Assign results to variables
-results_100_1 <- results_list[[1]]
-results_100_2 <- results_list[[2]]
-results_200_1 <- results_list[[3]]
-results_200_2 <- results_list[[4]]
-results_300_1 <- results_list[[5]]
-results_300_2 <- results_list[[6]]
-results_400_1 <- results_list[[7]]
-results_400_2 <- results_list[[8]]
-
-# Stop parallel processing
-plan(sequential)  # Return to sequential processing after execution
-```
-
-
-## Store each sample size in table
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
-  # Initialize an empty list to store the results
-  summary_results <- list()
-  
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:300) {
-    # Extract the summary for each result
-    result_summary <- summary(results_list[[i]]$est1)
-    
-    # Select the required columns
-    extracted_columns <- result_summary %>%
-      select(incidence.rate, SE, CI.lwr, CI.upr)
-    
-    # Add a column for the index (optional, for tracking)
-    extracted_columns <- extracted_columns %>%
-      mutate(index = i)
-    
-    # Append to the list
-    summary_results[[i]] <- extracted_columns
-  }
-  
-  # Combine all results into a single data frame
-  final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
-  
-  return(final_table)
-}
-
-# Example usage for different sample sizes
-final_table_100_1 <- generate_final_table(results_100_1, 100)
-final_table_100_2 <- generate_final_table(results_100_2, 100)
-
-final_table_200_1 <- generate_final_table(results_200_1, 200)
-final_table_200_2 <- generate_final_table(results_200_2, 200)
-
-final_table_300_1 <- generate_final_table(results_300_1, 300)
-final_table_300_2 <- generate_final_table(results_300_2, 300)
-
-final_table_400_1 <- generate_final_table(results_400_1, 400)
-final_table_400_2 <- generate_final_table(results_400_2, 400)
-```
-
-
-
-## Graphs of where the x axis is the sample size and the y axis is the empirical standard error
-
-Set the preliminary observed lambda as the median incidence rate across the four regions.
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# Function to calculate metrics for each table
-calculate_metrics <- function(data, sample_size) {
-  empirical_se <- sd(data$incidence.rate)
-  
-  
-  # Return a data frame with results
-  data.frame(
-    sample_size = sample_size,
-    empirical_se = empirical_se
-    
-  )
-}
-
-# Apply the function to each table
-metrics_100_1 <- calculate_metrics(final_table_100_1, 100)
-metrics_100_2 <- calculate_metrics(final_table_100_2, 100)
-
-metrics_200_1 <- calculate_metrics(final_table_200_1, 200)
-metrics_200_2 <- calculate_metrics(final_table_200_2, 200)
-
-metrics_300_1 <- calculate_metrics(final_table_300_1, 300)
-metrics_300_2 <- calculate_metrics(final_table_300_2, 300)
-
-metrics_400_1 <- calculate_metrics(final_table_400_1, 400)
-metrics_400_2 <- calculate_metrics(final_table_400_2, 400)
-
-
-# Combine the results into a single summary table
-summary_metrics_1 <- bind_rows(metrics_100_1, metrics_200_1, metrics_300_1,
-                               metrics_400_1)
-
-
-summary_metrics_2 <- bind_rows(metrics_100_2, metrics_200_2, metrics_300_2,
-                               metrics_400_2)
-
-# Add a column to distinguish the datasets
-summary_metrics_1 <- summary_metrics_1 %>%
-  mutate(Age_Group = "Age 0-2")
-
-summary_metrics_2 <- summary_metrics_2 %>%
-  mutate(Age_Group = "Age 2-5")
-
-# Combine both datasets into one
-summary_metrics_combined <- bind_rows(summary_metrics_1, summary_metrics_2)
-
-# Plot with color to differentiate age groups
-ggplot(summary_metrics_combined, aes(x = sample_size, y = empirical_se, color = Age_Group)) +
-  geom_line() +
-  geom_point() +
-  labs(
-    title = "Empirical Standard Error vs. Sample Size",
-    x = "Sample Size",
-    y = "Empirical Standard Error",
-    color = "Age Group"
-  ) +
-  theme_minimal()
-```
-
-\newpage
-
-## Table
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-summary_metrics_combined %>%
-  kable()
-```
\ No newline at end of file
diff --git a/sim_lambdas_ipab.r b/sim_lambdas_ipab.r
deleted file mode 100644
index 7969800..0000000
--- a/sim_lambdas_ipab.r
+++ /dev/null
@@ -1,335 +0,0 @@
-library(tibble)
-library(dplyr)
-library(serocalculator)
-library(haven)
-library(knitr)
-library(plotly)
-library(kableExtra)
-library(tidyr)
-library(arsenal)
-library(forcats)
-library(huxtable)
-library(magrittr)
-library(parameters)
-library(ggplot2)
-library(ggeasy)
-library(scales)
-library(plotly)
-library(patchwork)
-library(tidyverse)
-library(readxl)
-library(purrr)
-library(here)
-library(table1)
-library(furrr)
-library(future)
-
-### Vary simulation lambdas across the range in the real cross-sectional data
-
-## 1. Estimate the shigella incidence rate using the real cross-sectional Shigella data, 
-##    separately for each geographic region in the data. 
-
-load("~/ipab.RData")
-
-################################################################################
-## Set noise 
-create_noise_df <- function(country) {
-  noise_df <- tibble(
-    antigen_iso = c("IgG"),
-    Country     = factor(c(country)),  # Use input argument for Country
-    y.low       = c(25),
-    eps         = c(0.25),
-    nu          = c(0.5),
-    y.high      = c(200000)
-  )
-  
-  return(noise_df)
-}
-
-noise_df_USA <- create_noise_df("MA USA")
-noise_df_Niger <- create_noise_df("Niger")
-noise_df_Sierra <- create_noise_df("Sierra Leone")
-noise_df_Ghana <- create_noise_df("Ghana")
-
-
-################################################################################
-## 2. Run simulations using: 
-# MA USA
-est_USA <- est.incidence(
-  pop_data = df_xs_USA_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_USA,
-  antigen_isos = c("IgG"),
-)
-
-# Ghana
-est_Ghana <- est.incidence(
-  pop_data = df_xs_Ghana_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Ghana,
-  antigen_isos = c("IgG"),
-)
-
-# Niger
-est_Niger <- est.incidence(
-  pop_data = df_xs_Niger_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Niger,
-  antigen_isos = c("IgG"),
-)
-
-# Sierra Leone
-est_Sierra <- est.incidence(
-  pop_data = df_xs_Sierra_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Sierra,
-  antigen_isos = c("IgG"),
-)
-
-# create table of incidence.rate of each region
-
-create_incidence_table <- function(...) {
-  # Capture input objects and their names
-  est_list <- list(...)
-  country_names <- names(est_list)
-  
-  # Extract incidence.rate from the summary() of each estimate object
-  incidence_rates <- sapply(est_list, function(x) summary(x)$incidence.rate)
-  
-  # Create a tidy tibble
-  incidence_table <- tibble(
-    Country = country_names,
-    Incidence_Rate = incidence_rates
-  )
-  
-  return(incidence_table)
-}
-# Example usage with four different country estimates:
-incidence_summary <- create_incidence_table(
-  USA = est_USA,
-  Ghana = est_Ghana,
-  Niger = est_Niger,
-  Sierra_Leone = est_Sierra
-)
-
-# Display the summary table
-print(incidence_summary)
-
-# 1) The median of the incidence rate estimates from step 1.
-# Get incidence rate estimates from each region (4 regions) and do median
-med_ipab_IgG<-median(incidence_summary$Incidence_Rate)
-#1.188
-
-# 2) 2x the maximum incidence rate estimate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the maximum one and 2x
-max2_ipab_IgG<-2*max(incidence_summary$Incidence_Rate)
-# 2.788
-
-# 3) 1/2 the minimum incidence rate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the minimum one and 1/2
-min_half_ipab_IgG<-0.5*min(incidence_summary$Incidence_Rate)
-#0.185
-
-## These three values are preliminary observations of lambda from ipab_IgG.
-
-################################################################################
-################################################################################
-################################################################################
-
-## Simulation (200times)
-# Define the simulation function
-
-simulate_seroincidence <- function(nrep, n_sim, observed, range = NULL) {
-  # Set parallel plan inside function to avoid issues with distributed nodes
-  plan(multicore)  # Use multiple cores for parallel processing (works best on HPC)
-  
-  # Parameters
-  dmcmc <- curve_params_shigella  # Curve parameters
-  antibodies <- c("IgG")  # Antigen-isotypes
-  lambda <- observed  # Simulated incidence rate per person-year
-  
-  # Biologic noise distribution
-  dlims <- rbind("IgG" = c(min = 0, max = 0.5))
-  
-  # Noise parameters
-  cond <- tibble(
-    antigen_iso = c("IgG"),
-    nu = c(0.5),  # Biologic noise (nu)
-    eps = c(0.25),  # Measurement noise (eps)
-    y.low = c(25),  # Low cutoff (llod)
-    y.high = c(200000)  # High cutoff (y.high)
-  )
-  
-  # Perform simulations in parallel
-  results <- future_map(1:n_sim, function(i) {
-    tryCatch({
-      # Generate cross-sectional data
-      csdata <- sim.cs(
-        curve_params = dmcmc,
-        lambda = lambda,
-        n.smpl = nrep,
-        age_range = range,
-        antigen_isos = antibodies,
-        n.mc = 0,
-        renew_params = TRUE,  # Use different parameters for each simulation
-        add.noise = TRUE,
-        noise_limits = dlims,
-        format = "long"
-      )
-      
-      # Estimate seroincidence
-      est <- est.incidence(
-        pop_data = csdata,
-        curve_params = dmcmc,
-        noise_params = cond,
-        lambda_start = 0.1,
-        build_graph = TRUE,
-        verbose = FALSE,
-        print_graph = FALSE,
-        antigen_isos = antibodies
-      )
-      
-      # Return results for this simulation
-      list(csdata = csdata, est1 = est)
-    }, error = function(e) {
-      return(list(error = e$message))  # Capture and store errors instead of stopping execution
-    })
-  }, .options = furrr_options(seed = TRUE))
-  
-  # Ensure sequential processing after function execution
-  plan(sequential)
-  
-  return(results)
-}
-
-# ------------------------ #
-# 🔹 Run Simulations in Parallel
-# ------------------------ #
-
-# Set up parallel processing (Ensure future::plan() is set before execution)
-plan(multicore)  # Works best on HPC
-
-# Define parameter sets
-sim_params <- list(
-  list(nrep = 100, range = c(0, 2)),
-  list(nrep = 100, range = c(2, 5)),
-  list(nrep = 200, range = c(0, 2)),
-  list(nrep = 200, range = c(2, 5)),
-  list(nrep = 300, range = c(0, 2)),
-  list(nrep = 300, range = c(2, 5)),
-  list(nrep = 400, range = c(0, 2)),
-  list(nrep = 400, range = c(2, 5))
-)
-
-# Run simulations in a loop to reduce redundant code
-results_list <- lapply(sim_params, function(params) {
-  cat("Running simulation for nrep =", params$nrep, "range =", params$range, "\n")
-  simulate_seroincidence(nrep = params$nrep, n_sim = 200, observed = min_half_ipab_IgG, range = params$range)
-})
-
-# Assign results to variables
-results_100_1 <- results_list[[1]]
-results_100_2 <- results_list[[2]]
-results_200_1 <- results_list[[3]]
-results_200_2 <- results_list[[4]]
-results_300_1 <- results_list[[5]]
-results_300_2 <- results_list[[6]]
-results_400_1 <- results_list[[7]]
-results_400_2 <- results_list[[8]]
-
-# Stop parallel processing
-plan(sequential)  # Return to sequential processing after execution
-
-
-## Store each sample size in table
-# Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
-  # Initialize an empty list to store the results
-  summary_results <- list()
-  
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:200) {
-    # Extract the summary for each result
-    result_summary <- summary(results_list[[i]]$est1)
-    
-    # Select the required columns
-    extracted_columns <- result_summary %>%
-      select(incidence.rate, SE, CI.lwr, CI.upr)
-    
-    # Add a column for the index (optional, for tracking)
-    extracted_columns <- extracted_columns %>%
-      mutate(index = i)
-    
-    # Append to the list
-    summary_results[[i]] <- extracted_columns
-  }
-  
-  # Combine all results into a single data frame
-  final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
-  
-  return(final_table)
-}
-
-# Example usage for different sample sizes
-final_table_100_1 <- generate_final_table(results_100_1, 100)
-final_table_100_2 <- generate_final_table(results_100_2, 100)
-
-final_table_200_1 <- generate_final_table(results_200_1, 200)
-final_table_200_2 <- generate_final_table(results_200_2, 200)
-
-final_table_300_1 <- generate_final_table(results_300_1, 300)
-final_table_300_2 <- generate_final_table(results_300_2, 300)
-
-final_table_400_1 <- generate_final_table(results_400_1, 400)
-final_table_400_2 <- generate_final_table(results_400_2, 400)
-
-
-## Graphs of where the x axis is the sample size and the y axis is 
-## the empirical standard error
-
-# Function to calculate metrics for each table
-calculate_metrics <- function(data, sample_size) {
-  empirical_se <- sd(data$incidence.rate)
-  
-  
-  # Return a data frame with results
-  data.frame(
-    sample_size = sample_size,
-    empirical_se = empirical_se
-    
-  )
-}
-
-# Apply the function to each table
-metrics_100_1 <- calculate_metrics(final_table_100_1, 100)
-metrics_100_2 <- calculate_metrics(final_table_100_2, 100)
-
-metrics_200_1 <- calculate_metrics(final_table_200_1, 100)
-metrics_200_2 <- calculate_metrics(final_table_200_2, 100)
-
-metrics_300_1 <- calculate_metrics(final_table_300_1, 100)
-metrics_300_2 <- calculate_metrics(final_table_300_2, 100)
-
-metrics_400_1 <- calculate_metrics(final_table_400_1, 100)
-metrics_400_2 <- calculate_metrics(final_table_400_2, 100)
-
-
-# Combine the results into a single summary table
-summary_metrics_1 <- bind_rows(metrics_100_1, metrics_200_1, metrics_300_1,
-                               metrics_400_1)
-
-summary_metrics_2 <- bind_rows(metrics_100_2, metrics_200_2, metrics_300_2,
-                               metrics_400_2)
-
-# Add a column to distinguish the datasets
-summary_metrics_1 <- summary_metrics_1 %>%
-  mutate(Age_Group = "Age 0-2")
-
-summary_metrics_2 <- summary_metrics_2 %>%
-  mutate(Age_Group = "Age 2-5")
-
-# Combine both datasets into one
-summary_metrics_combined <- bind_rows(summary_metrics_1, summary_metrics_2)
-
-save.image("ipab_results.RData")
\ No newline at end of file
diff --git a/simulation1.qmd b/simulation1.qmd
deleted file mode 100644
index 8ce424e..0000000
--- a/simulation1.qmd
+++ /dev/null
@@ -1,621 +0,0 @@
----
-title: "Simulation with the Smallest Lambda for IpaB IgG"
-format: 
-  pdf:
-    number-sections: true
-    number-depth: 2
-    number-offset: [0, 0]
-editor: visual
-output:
-  pdf_document:
-    orientation: landscape
-editor_options: 
-  chunk_output_type: console
----
-
-```{r setup, include=FALSE,echo=FALSE}
-library(knitr)
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-```{r, echo=FALSE, message=FALSE}
-library(gridExtra)
-library(mgcv)  # For advanced GAM smoothing
-library(haven)
-library(knitr)
-library(plotly)
-library(kableExtra)
-library(tidyr)
-library(arsenal)
-library(dplyr)
-library(forcats)
-library(huxtable)
-library(magrittr)
-library(parameters)
-library(kableExtra)
-library(ggplot2)
-library(ggeasy)
-library(scales)
-library(plotly)
-library(patchwork)
-library(tidyverse)
-library(gtsummary)
-library(readxl)
-library(purrr)
-library(serocalculator)
-library(runjags)
-library(coda)
-library(ggmcmc)
-library(here)
-library(bayesplot)
-library(table1)
-library(tibble)
-library(furrr)
-library(dplyr)
-```
-
-
-# Vary simulation lambdas across the range in the real cross-sectional data
-
-## 1. Estimate the shigella incidence rate using the real cross-sectional Shigella data, 
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-##    separately for each geographic region in the data. 
-
-# load shigella data
-df <- read_excel("3.8.2024 Compiled Shigella datav2.xlsx", 
-                 sheet = "Compiled")
-
-
-# create function for generating specific region data
-create_xs_data <- function(df, filter_countries, filter_antigen_iso, value_col) {
-  df %>%
-    # First, create/rename columns
-    mutate(
-      id = sid,
-      Country = site_name,
-      study   = study_name,
-      age     = age,
-      antigen_iso = factor(isotype_name),
-      value   = {{ value_col }}  # value_col is specified by the user, e.g. n_ipab_MFI
-    ) %>%
-    # Then filter by the desired catchment(s) and antigen iso(s)
-    filter(
-      Country %in% filter_countries,
-      antigen_iso %in% filter_antigen_iso
-    ) %>%
-    # Create age categories
-    mutate(
-      ageCat = factor(case_when(
-        age < 5             ~ "<5",
-        age >= 5 & age <=15  ~ "5-15",
-        age > 15            ~ "16+"
-      ))
-    ) %>%
-    # Optionally select only the needed columns
-    select(id, Country, study, age, antigen_iso, value, ageCat)
-}
-
-# Cross-sectional data: MA USA, ipab_IgG
-df_xs_USA_ipab_IgG <- create_xs_data(
-  df,
-  filter_countries = c("MA USA"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_ipab_MFI
-)
-
-# explicitly rename the age column
-df_xs_USA_ipab_IgG  <- df_xs_USA_ipab_IgG  %>%
-  rename(age = age)  # Ensure it is correctly named
-
-# If get_age_var() looks for an attribute, you can manually assign it:
-attr(df_xs_USA_ipab_IgG , "age_var") <- "age"
-
-# Similarly, check if the antibody value column is recognized:
-get_value_var <- serocalculator:::get_value_var
-get_value_var(df_xs_USA_ipab_IgG )
-
-# If it returns NULL, assign it:
-attr(df_xs_USA_ipab_IgG , "value_var") <- "value"
-
-# Cross-sectional data: MA USA, ipab_IgA
-
-# Cross-sectional data: Ghana, ipab_IgG
-df_xs_Ghana_ipab_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Ghana"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_ipab_MFI
-)
-
-# Cross-sectional data: Ghana, ipab_IgA
-
-# Cross-sectional data: Niger, ipab_IgG
-df_xs_Niger_ipab_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Niger"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_ipab_MFI
-)
-
-# Cross-sectional data: Niger, ipab_IgA
-
-# Cross-sectional data: Sierra Leone, ipab_IgG
-df_xs_Sierra_ipab_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Sierra Leone"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_ipab_MFI
-)
-
-# Cross-sectional data: Sierra Leone, ipab_IgA
-
-# Create function to clearly define cross-sectional data
-prepare_df_for_serocalculator <- function(df, age_col = "age", value_col = "value") {
-  # Ensure correct column names
-  df <- df %>%
-    rename(age = all_of(age_col))
-  
-  # Assign attributes for serocalculator
-  attr(df, "age_var") <- "age"
-  attr(df, "value_var") <- value_col
-  
-  # Check if serocalculator recognizes attributes
-  get_value_var <- serocalculator:::get_value_var
-  detected_value_var <- get_value_var(df)
-  
-  if (is.null(detected_value_var)) {
-    warning("serocalculator did not detect the 'value' column. Check column naming.")
-  } else {
-    message("serocalculator recognized 'value' column: ", detected_value_var)
-  }
-  
-  return(df)
-}
-
-# Application
-df_xs_USA_ipab_IgG <- prepare_df_for_serocalculator(df_xs_USA_ipab_IgG)
-df_xs_Ghana_ipab_IgG<- prepare_df_for_serocalculator(df_xs_Ghana_ipab_IgG)
-df_xs_Niger_ipab_IgG<- prepare_df_for_serocalculator(df_xs_Niger_ipab_IgG)
-df_xs_Sierra_ipab_IgG<- prepare_df_for_serocalculator(df_xs_Sierra_ipab_IgG)
-```
-
-
-
-## Get parameters from longitudinal data
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to filter and manipulate Shigella data
-process_shigella_data <- function(data, study_filter, antigen) {
-  # Filter the data for the specific study
-  filtered_data <- data %>% 
-    filter(study_name == study_filter)
-  
-  # Capture the column name of the antigen
-  antigen_col <- ensym(antigen)
-  
-  # Manipulate and restructure the data
-  processed_data <- filtered_data %>%
-    select(isotype_name, sid, timepoint, `Actual day`, !!antigen_col) %>%
-    mutate(
-      index_id = sid,
-      antigen_iso = isotype_name,
-      visit = timepoint,
-      timeindays = `Actual day`,
-      result = !!antigen_col
-    ) %>%
-    group_by(index_id, antigen_iso) %>%
-    arrange(visit) %>%
-    mutate(visit_num = rank(visit, ties.method = "first")) %>%
-    ungroup() %>%
-    # Remove rows with NA in timeindays
-    filter(!is.na(timeindays))
-  
-  return(processed_data)
-}
-
-
-dL_clean <- process_shigella_data(data = df, study_filter = "SOSAR", antigen = n_ipab_MFI)
-
-
-# Construct the path to "prep_data.r" using here
-prep_data_path <- here::here("R", "prep_data.r")
-prep_priors_path <- here::here("R", "prep_priors.R")
-
-# Source the file to load the prep_data function
-source(prep_data_path)
-source(prep_priors_path)
-
-#prepare data for modeline
-# Create 5 different longdata
-longdata <- prep_data(dL_clean)
-priors <- prep_priors(max_antigens = longdata$n_antigen_isos)
-
-
-nchains <- 4;                # nr of MC chains to run simultaneously
-nadapt  <- 1000;             # nr of iterations for adaptation
-nburnin <- 100;            # nr of iterations to use for burn-in
-nmc     <- 100;             # nr of samples in posterior chains
-niter   <- 200;            # nr of iterations for posterior sample
-nthin   <- round(niter/nmc); # thinning needed to produce nmc from niter
-
-tomonitor <- c("y0", "y1", "t1", "alpha", "shape");
-
-#This handles the seed to reproduce the results 
-initsfunction <- function(chain){
-  stopifnot(chain %in% (1:4)); # max 4 chains allowed...
-  .RNG.seed <- (1:4)[chain];
-  .RNG.name <- c("base::Wichmann-Hill","base::Marsaglia-Multicarry",
-                 "base::Super-Duper","base::Mersenne-Twister")[chain];
-  return(list(".RNG.seed"=.RNG.seed,".RNG.name"=.RNG.name));
-}
-
-file.mod <- here::here("inst", "extdata", "model.jags.r")
-
-set.seed(11325)
-jags.post <- run.jags(model = file.mod, 
-                      data = c(longdata, priors),
-                      inits = initsfunction,
-                      method = "parallel",
-                      adapt = nadapt,
-                      burnin = nburnin,
-                      thin = nthin,
-                      sample = nmc,
-                      n.chains = nchains,
-                      monitor = tomonitor,
-                      summarise = FALSE)
-
-mcmc_list <- as.mcmc.list(jags.post)
-
-mcmc_df <- ggs(mcmc_list)
-
-wide_predpar_df <- mcmc_df %>%
-  mutate(
-    parameter = sub("^(\\w+)\\[.*", "\\1", Parameter),
-    index_id = as.numeric(sub("^\\w+\\[(\\d+),.*", "\\1", Parameter)),
-    antigen_iso = as.numeric(sub("^\\w+\\[\\d+,(\\d+).*", "\\1", Parameter))
-  ) %>%
-  mutate(
-    index_id = factor(index_id, labels = c(unique(dL_clean$index_id), "newperson")),
-    antigen_iso = factor(antigen_iso, labels = unique(dL_clean$antigen_iso))) %>%
-  filter(index_id == "newperson") %>%
-  select(-Parameter) %>%
-  pivot_wider(names_from = "parameter", values_from="value") %>%
-  rowwise() %>%
-  droplevels() %>%
-  ungroup() %>%
-  rename(r = shape)
-
-# Assuming wide_predpar_df is your data frame
-curve_params <- wide_predpar_df
-
-# Set class and attributes for serocalculator
-class(curve_params) <- c("curve_params", class(curve_params))
-antigen_isos <- unique(curve_params$antigen_iso)
-attr(curve_params, "antigen_isos") <- antigen_isos
-
-curve_params<-curve_params%>%
-  mutate(
-    iter=Iteration)%>%
-  select(antigen_iso,iter,y0,y1,t1,alpha,r)
-
-curve_params_shigella<-curve_params
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Set noise 
-create_noise_df <- function(country) {
-  noise_df <- tibble(
-    antigen_iso = c("IgG"),
-    Country     = factor(c(country)),  # Use input argument for Country
-    y.low       = c(25),
-    eps         = c(0.25),
-    nu          = c(0.5),
-    y.high      = c(200000)
-  )
-  
-  return(noise_df)
-}
-
-noise_df_USA <- create_noise_df("MA USA")
-noise_df_Niger <- create_noise_df("Niger")
-noise_df_Sierra <- create_noise_df("Sierra Leone")
-noise_df_Ghana <- create_noise_df("Ghana")
-```
-
-## 2. Run simulations using: 
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# MA USA
-est_USA <- est.incidence(
-  pop_data = df_xs_USA_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_USA,
-  antigen_isos = c("IgG"),
-)
-
-# Ghana
-est_Ghana <- est.incidence(
-  pop_data = df_xs_Ghana_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Ghana,
-  antigen_isos = c("IgG"),
-)
-
-# Niger
-est_Niger <- est.incidence(
-  pop_data = df_xs_Niger_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Niger,
-  antigen_isos = c("IgG"),
-)
-
-# Sierra Leone
-est_Sierra <- est.incidence(
-  pop_data = df_xs_Sierra_ipab_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Sierra,
-  antigen_isos = c("IgG"),
-)
-
-# create table of incidence.rate of each region
-
-create_incidence_table <- function(...) {
-  # Capture input objects and their names
-  est_list <- list(...)
-  country_names <- names(est_list)
-  
-  # Extract incidence.rate from the summary() of each estimate object
-  incidence_rates <- sapply(est_list, function(x) summary(x)$incidence.rate)
-  
-  # Create a tidy tibble
-  incidence_table <- tibble(
-    Country = country_names,
-    Incidence_Rate = incidence_rates
-  )
-  
-  return(incidence_table)
-}
-# Example usage with four different country estimates:
-incidence_summary <- create_incidence_table(
-  USA = est_USA,
-  Ghana = est_Ghana,
-  Niger = est_Niger,
-  Sierra_Leone = est_Sierra
-)
-
-# Display the summary table
-print(incidence_summary)
-
-# 1) The median of the incidence rate estimates from step 1.
-# Get incidence rate estimates from each region (4 regions) and do median
-med_ipab_IgG<-median(incidence_summary$Incidence_Rate)
-#1.188
-med_ipab_IgG
-# 2) 2x the maximum incidence rate estimate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the maximum one and 2x
-max2_ipab_IgG<-2*max(incidence_summary$Incidence_Rate)
-# 2.788
-max2_ipab_IgG
-# 3) 1/2 the minimum incidence rate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the minimum one and 1/2
-min_half_ipab_IgG<-0.5*min(incidence_summary$Incidence_Rate)
-#0.185
-min_half_ipab_IgG
-```
-These three values are preliminary observations of lambda from ipab_IgG.
-
-We choose a lambda that is half of the minimum lambda from the four regions.
- 
-
-# Simulation (200times) with the smallest lambda
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-
-# Define the simulation function
-simulate_seroincidence <- function(nrep, n_sim, observed,range=NULL) {
-  # Parameters
-  dmcmc <- curve_params_shigella  # Curve parameters
-  antibodies <- c("IgG")  # Antigen-isotypes
-  lambda <- observed  # Simulated incidence rate per person-year
-  
-  # Biologic noise distribution
-  dlims <- rbind(
-    
-    "IgG" = c(min = 0, max = 0.5)
-  )
-  
-  # Noise parameters
-  cond <- tibble(
-    antigen_iso = c("IgG"),
-    nu = c(0.5),  # Biologic noise (nu)
-    eps = c(0.25),     # Measurement noise (eps)
-    y.low = c(25),   # Low cutoff (llod)
-    y.high = c(200000)  # High cutoff (y.high)
-  )
-  
-  # Perform simulations in parallel
-  results <- future_map(1:n_sim, function(i) {
-    # Generate cross-sectional data
-    csdata <- sim.cs(
-      curve_params = dmcmc,
-      lambda = lambda,
-      n.smpl = nrep,
-      age_range = range,
-      antigen_isos = antibodies,
-      n.mc = 0,
-      renew_params = TRUE,  # Use different parameters for each simulation
-      add.noise = TRUE,
-      noise_limits = dlims,
-      format = "long"
-    )
-    
-    # Estimate seroincidence
-    est <- est.incidence(
-      pop_data = csdata,
-      curve_params = dmcmc,
-      noise_params = cond,
-      lambda_start = 0.1,
-      build_graph = TRUE,
-      verbose = FALSE,
-      print_graph = FALSE,
-      antigen_isos = antibodies
-    )
-    
-    # Return results for this simulation
-    list(
-      csdata = csdata,
-      est1 = est
-    )
-  }, .options = furrr_options(seed = TRUE))
-  
-  return(results)
-}
-
-## Generate
-# Set up parallel processing with `future`
-plan(multisession)  # Use multiple sessions for parallelism (local machine)
-
-# Run the simulations in parallel
-set.seed(206251)
-results_100_1 <- simulate_seroincidence(nrep = 100, n_sim = 200,
-                                        observed=min_half_ipab_IgG,range=c(0,2))
-results_100_2 <- simulate_seroincidence(nrep = 100, n_sim = 200,
-                                        observed=min_half_ipab_IgG,range=c(2,5))
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-set.seed(206252)
-results_200_1 <- simulate_seroincidence(nrep = 200, n_sim = 200,
-                                        observed=min_half_ipab_IgG,range=c(0,2))
-results_200_2 <- simulate_seroincidence(nrep = 200, n_sim = 200,
-                                        observed=min_half_ipab_IgG,range=c(2,5))
-
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-set.seed(206253)
-results_300_1 <- simulate_seroincidence(nrep = 300, n_sim = 200,
-                                        observed=min_half_ipab_IgG,range=c(0,2))
-results_300_2 <- simulate_seroincidence(nrep = 300, n_sim = 200,
-                                        observed=min_half_ipab_IgG,range=c(2,5))
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-set.seed(206254)
-results_400_1 <- simulate_seroincidence(nrep = 400, n_sim = 200,
-                                        observed=min_half_ipab_IgG,range=c(0,2))
-results_400_2 <- simulate_seroincidence(nrep = 400, n_sim = 200,
-                                        observed=min_half_ipab_IgG,range=c(2,5))
-# Stop parallel processing
-plan(sequential)  # Return to sequential processing
-```
-
-
-## Store each sample size in table
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
-  # Initialize an empty list to store the results
-  summary_results <- list()
-  
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:200) {
-    # Extract the summary for each result
-    result_summary <- summary(results_list[[i]]$est1)
-    
-    # Select the required columns
-    extracted_columns <- result_summary %>%
-      select(incidence.rate, SE, CI.lwr, CI.upr)
-    
-    # Add a column for the index (optional, for tracking)
-    extracted_columns <- extracted_columns %>%
-      mutate(index = i)
-    
-    # Append to the list
-    summary_results[[i]] <- extracted_columns
-  }
-  
-  # Combine all results into a single data frame
-  final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
-  
-  return(final_table)
-}
-
-# Example usage for different sample sizes
-final_table_100_1 <- generate_final_table(results_100_1, 100)
-final_table_100_2 <- generate_final_table(results_100_2, 100)
-
-final_table_200_1 <- generate_final_table(results_200_1, 200)
-final_table_200_2 <- generate_final_table(results_200_2, 200)
-
-final_table_300_1 <- generate_final_table(results_300_1, 300)
-final_table_300_2 <- generate_final_table(results_300_2, 300)
-
-final_table_400_1 <- generate_final_table(results_400_1, 400)
-final_table_400_2 <- generate_final_table(results_400_2, 400)
-```
-
-
-
-## Graphs of where the x axis is the sample size and the y axis is the empirical standard error
-
-Set the preliminary observed lambda as half of the minimum incidence rate from the four regions.
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# Function to calculate metrics for each table
-calculate_metrics <- function(data, sample_size) {
-  empirical_se <- sd(data$incidence.rate)
-  
-  
-  # Return a data frame with results
-  data.frame(
-    sample_size = sample_size,
-    empirical_se = empirical_se
-    
-  )
-}
-
-# Apply the function to each table
-metrics_100_1 <- calculate_metrics(final_table_100_1, 100)
-metrics_100_2 <- calculate_metrics(final_table_100_2, 100)
-
-metrics_200_1 <- calculate_metrics(final_table_200_1, 200)
-metrics_200_2 <- calculate_metrics(final_table_200_2, 200)
-
-metrics_300_1 <- calculate_metrics(final_table_300_1, 300)
-metrics_300_2 <- calculate_metrics(final_table_300_2, 300)
-
-metrics_400_1 <- calculate_metrics(final_table_400_1, 400)
-metrics_400_2 <- calculate_metrics(final_table_400_2, 400)
-
-
-# Combine the results into a single summary table
-summary_metrics_1 <- bind_rows(metrics_100_1, metrics_200_1, metrics_300_1,
-                               metrics_400_1)
-
-
-summary_metrics_2 <- bind_rows(metrics_100_2, metrics_200_2, metrics_300_2,
-                               metrics_400_2)
-
-# Add a column to distinguish the datasets
-summary_metrics_1 <- summary_metrics_1 %>%
-  mutate(Age_Group = "Age 0-2")
-
-summary_metrics_2 <- summary_metrics_2 %>%
-  mutate(Age_Group = "Age 2-5")
-
-# Combine both datasets into one
-summary_metrics_combined <- bind_rows(summary_metrics_1, summary_metrics_2)
-
-# Plot with color to differentiate age groups
-ggplot(summary_metrics_combined, aes(x = sample_size, y = empirical_se, color = Age_Group)) +
-  geom_line() +
-  geom_point() +
-  labs(
-    title = "Empirical Standard Error vs. Sample Size",
-    x = "Sample Size",
-    y = "Empirical Standard Error",
-    color = "Age Group"
-  ) +
-  theme_minimal()
-```
-
-
diff --git a/simulation2.qmd b/simulation2.qmd
deleted file mode 100644
index 73c83c0..0000000
--- a/simulation2.qmd
+++ /dev/null
@@ -1,651 +0,0 @@
----
-title: "Simulation with the Smallest Lambda for sf3a IgG"
-format: 
-  pdf:
-    number-sections: true
-    number-depth: 2
-    number-offset: [0, 0]
-editor: visual
-output:
-  pdf_document:
-    orientation: landscape
-editor_options: 
-  chunk_output_type: console
----
-
-```{r setup, include=FALSE,echo=FALSE}
-library(knitr)
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-```{r, echo=FALSE, message=FALSE}
-library(future.apply)
-library(future)
-library(gridExtra)
-library(mgcv)  # For advanced GAM smoothing
-library(haven)
-library(knitr)
-library(plotly)
-library(kableExtra)
-library(tidyr)
-library(arsenal)
-library(dplyr)
-library(forcats)
-library(huxtable)
-library(magrittr)
-library(parameters)
-library(kableExtra)
-library(ggplot2)
-library(ggeasy)
-library(scales)
-library(plotly)
-library(patchwork)
-library(tidyverse)
-library(gtsummary)
-library(readxl)
-library(purrr)
-library(serocalculator)
-library(runjags)
-library(coda)
-library(ggmcmc)
-library(here)
-library(bayesplot)
-library(table1)
-library(tibble)
-library(furrr)
-library(dplyr)
-```
-
-
-# Vary simulation lambdas across the range in the real cross-sectional data
-
-## 1. Estimate the shigella incidence rate using the real cross-sectional Shigella data, 
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-##    separately for each geographic region in the data. 
-
-# load shigella data
-df <- read_excel("3.8.2024 Compiled Shigella datav2.xlsx", 
-                 sheet = "Compiled")
-
-
-# create function for generating specific region data
-create_xs_data <- function(df, filter_countries, filter_antigen_iso, value_col) {
-  df %>%
-    # First, create/rename columns
-    mutate(
-      id = sid,
-      Country = site_name,
-      study   = study_name,
-      age     = age,
-      antigen_iso = factor(isotype_name),
-      value   = {{ value_col }}  # value_col is specified by the user, e.g. n_ipab_MFI
-    ) %>%
-    # Then filter by the desired catchment(s) and antigen iso(s)
-    filter(
-      Country %in% filter_countries,
-      antigen_iso %in% filter_antigen_iso
-    ) %>%
-    # Create age categories
-    mutate(
-      ageCat = factor(case_when(
-        age < 5             ~ "<5",
-        age >= 5 & age <=15  ~ "5-15",
-        age > 15            ~ "16+"
-      ))
-    ) %>%
-    # Optionally select only the needed columns
-    select(id, Country, study, age, antigen_iso, value, ageCat)
-}
-
-# Cross-sectional data: MA USA, sf3a_IgG
-df_xs_USA_sf3a_IgG <- create_xs_data(
-  df,
-  filter_countries = c("MA USA"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_sf3aospbsa_MFI
-)
-
-# Cross-sectional data: Ghana, sf3a_IgG
-df_xs_Ghana_sf3a_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Ghana"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_sf3aospbsa_MFI
-)
-
-
-# Cross-sectional data: Niger, sf3a_IgG
-df_xs_Niger_sf3a_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Niger"),
-  filter_antigen_iso = c("IgG"),
-  value_col =n_sf3aospbsa_MFI
-)
-
-
-# Cross-sectional data: Sierra Leone, sf3a_IgG
-df_xs_Sierra_sf3a_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Sierra Leone"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_sf3aospbsa_MFI
-)
-
-# Create function to clearly define cross-sectional data
-prepare_df_for_serocalculator <- function(df, age_col = "age", value_col = "value") {
-  # Ensure correct column names
-  df <- df %>%
-    rename(age = all_of(age_col))
-  
-  # Assign attributes for serocalculator
-  attr(df, "age_var") <- "age"
-  attr(df, "value_var") <- value_col
-  
-  # Check if serocalculator recognizes attributes
-  get_value_var <- serocalculator:::get_value_var
-  detected_value_var <- get_value_var(df)
-  
-  if (is.null(detected_value_var)) {
-    warning("serocalculator did not detect the 'value' column. Check column naming.")
-  } else {
-    message("serocalculator recognized 'value' column: ", detected_value_var)
-  }
-  
-  return(df)
-}
-
-# Application
-df_xs_USA_sf3a_IgG <- prepare_df_for_serocalculator(df_xs_USA_sf3a_IgG)
-df_xs_Ghana_sf3a_IgG<- prepare_df_for_serocalculator(df_xs_Ghana_sf3a_IgG)
-df_xs_Niger_sf3a_IgG<- prepare_df_for_serocalculator(df_xs_Niger_sf3a_IgG)
-df_xs_Sierra_sf3a_IgG<- prepare_df_for_serocalculator(df_xs_Sierra_sf3a_IgG)
-```
-
-
-
-## Get parameters from longitudinal data
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to filter and manipulate Shigella data
-process_shigella_data <- function(data, study_filter, antigen) {
-  # Filter the data for the specific study
-  filtered_data <- data %>% 
-    filter(study_name == study_filter)
-  
-  # Capture the column name of the antigen
-  antigen_col <- ensym(antigen)
-  
-  # Manipulate and restructure the data
-  processed_data <- filtered_data %>%
-    select(isotype_name, sid, timepoint, `Actual day`, !!antigen_col) %>%
-    mutate(
-      index_id = sid,
-      antigen_iso = isotype_name,
-      visit = timepoint,
-      timeindays = `Actual day`,
-      result = !!antigen_col
-    ) %>%
-    group_by(index_id, antigen_iso) %>%
-    arrange(visit) %>%
-    mutate(visit_num = rank(visit, ties.method = "first")) %>%
-    ungroup() %>%
-    # Remove rows with NA in timeindays
-    filter(!is.na(timeindays))
-  
-  return(processed_data)
-}
-
-
-dL_clean <- process_shigella_data(data = df, study_filter = "SOSAR", antigen = n_sf3aospbsa_MFI)
-
-
-# Construct the path to "prep_data.r" using here
-prep_data_path <- here::here("R", "prep_data.r")
-prep_priors_path <- here::here("R", "prep_priors.R")
-
-# Source the file to load the prep_data function
-source(prep_data_path)
-source(prep_priors_path)
-
-#prepare data for modeline
-# Create 5 different longdata
-longdata <- prep_data(dL_clean)
-priors <- prep_priors(max_antigens = longdata$n_antigen_isos)
-
-
-nchains <- 4;                # nr of MC chains to run simultaneously
-nadapt  <- 1000;             # nr of iterations for adaptation
-nburnin <- 100;            # nr of iterations to use for burn-in
-nmc     <- 100;             # nr of samples in posterior chains
-niter   <- 200;            # nr of iterations for posterior sample
-nthin   <- round(niter/nmc); # thinning needed to produce nmc from niter
-
-tomonitor <- c("y0", "y1", "t1", "alpha", "shape");
-
-#This handles the seed to reproduce the results 
-initsfunction <- function(chain){
-  stopifnot(chain %in% (1:4)); # max 4 chains allowed...
-  .RNG.seed <- (1:4)[chain];
-  .RNG.name <- c("base::Wichmann-Hill","base::Marsaglia-Multicarry",
-                 "base::Super-Duper","base::Mersenne-Twister")[chain];
-  return(list(".RNG.seed"=.RNG.seed,".RNG.name"=.RNG.name));
-}
-
-file.mod <- here::here("inst", "extdata", "model.jags.r")
-
-set.seed(11325)
-jags.post <- run.jags(model = file.mod, 
-                      data = c(longdata, priors),
-                      inits = initsfunction,
-                      method = "parallel",
-                      adapt = nadapt,
-                      burnin = nburnin,
-                      thin = nthin,
-                      sample = nmc,
-                      n.chains = nchains,
-                      monitor = tomonitor,
-                      summarise = FALSE)
-
-mcmc_list <- as.mcmc.list(jags.post)
-
-mcmc_df <- ggs(mcmc_list)
-
-wide_predpar_df <- mcmc_df %>%
-  mutate(
-    parameter = sub("^(\\w+)\\[.*", "\\1", Parameter),
-    index_id = as.numeric(sub("^\\w+\\[(\\d+),.*", "\\1", Parameter)),
-    antigen_iso = as.numeric(sub("^\\w+\\[\\d+,(\\d+).*", "\\1", Parameter))
-  ) %>%
-  mutate(
-    index_id = factor(index_id, labels = c(unique(dL_clean$index_id), "newperson")),
-    antigen_iso = factor(antigen_iso, labels = unique(dL_clean$antigen_iso))) %>%
-  filter(index_id == "newperson") %>%
-  select(-Parameter) %>%
-  pivot_wider(names_from = "parameter", values_from="value") %>%
-  rowwise() %>%
-  droplevels() %>%
-  ungroup() %>%
-  rename(r = shape)
-
-# Assuming wide_predpar_df is your data frame
-curve_params <- wide_predpar_df
-
-# Set class and attributes for serocalculator
-class(curve_params) <- c("curve_params", class(curve_params))
-antigen_isos <- unique(curve_params$antigen_iso)
-attr(curve_params, "antigen_isos") <- antigen_isos
-
-curve_params<-curve_params%>%
-  mutate(
-    iter=Iteration)%>%
-  select(antigen_iso,iter,y0,y1,t1,alpha,r)
-
-curve_params_shigella<-curve_params
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Set noise 
-create_noise_df <- function(country) {
-  noise_df <- tibble(
-    antigen_iso = c("IgG"),
-    Country     = factor(c(country)),  # Use input argument for Country
-    y.low       = c(25),
-    eps         = c(0.25),
-    nu          = c(0.5),
-    y.high      = c(200000)
-  )
-  
-  return(noise_df)
-}
-
-noise_df_USA <- create_noise_df("MA USA")
-noise_df_Niger <- create_noise_df("Niger")
-noise_df_Sierra <- create_noise_df("Sierra Leone")
-noise_df_Ghana <- create_noise_df("Ghana")
-```
-
-## 2. Run simulations using: 
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# MA USA
-est_USA <- est.incidence(
-  pop_data = df_xs_USA_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_USA,
-  antigen_isos = c("IgG"),
-)
-
-# Ghana
-est_Ghana <- est.incidence(
-  pop_data = df_xs_Ghana_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Ghana,
-  antigen_isos = c("IgG"),
-)
-
-# Niger
-est_Niger <- est.incidence(
-  pop_data = df_xs_Niger_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Niger,
-  antigen_isos = c("IgG"),
-)
-
-# Sierra Leone
-est_Sierra <- est.incidence(
-  pop_data = df_xs_Sierra_sf3a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Sierra,
-  antigen_isos = c("IgG"),
-)
-
-# create table of incidence.rate of each region
-
-create_incidence_table <- function(...) {
-  # Capture input objects and their names
-  est_list <- list(...)
-  country_names <- names(est_list)
-  
-  # Extract incidence.rate from the summary() of each estimate object
-  incidence_rates <- sapply(est_list, function(x) summary(x)$incidence.rate)
-  
-  # Create a tidy tibble
-  incidence_table <- tibble(
-    Country = country_names,
-    Incidence_Rate = incidence_rates
-  )
-  
-  return(incidence_table)
-}
-# Example usage with four different country estimates:
-incidence_summary <- create_incidence_table(
-  USA = est_USA,
-  Ghana = est_Ghana,
-  Niger = est_Niger,
-  Sierra_Leone = est_Sierra
-)
-
-# Display the summary table
-print(incidence_summary)
-
-# 1) The median of the incidence rate estimates from step 1.
-# Get incidence rate estimates from each region (4 regions) and do median
-med_sf3a_IgG<-median(incidence_summary$Incidence_Rate)
-med_sf3a_IgG
-#0.756
-
-# 2) 2x the maximum incidence rate estimate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the maximum one and 2x
-max2_sf3a_IgG<-2*max(incidence_summary$Incidence_Rate)
-max2_sf3a_IgG
-# 2.058
-
-# 3) 1/2 the minimum incidence rate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the minimum one and 1/2
-min_half_sf3a_IgG<-0.5*min(incidence_summary$Incidence_Rate)
-min_half_sf3a_IgG
-#0.288
-```
-These three values are preliminary observations of lambda from sf3a_IgG.
-
-We choose a lambda that is half of the minimum lambda from the four regions.
- 
-
-# Simulation (200times) with the smallest lambda
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-
-# Define the simulation function
-simulate_seroincidence <- function(nrep, n_sim, observed, range = NULL, batch_size = 40, parallel = TRUE) {
-  # Parameters
-  dmcmc <- curve_params_shigella  # Curve parameters
-  antibodies <- c("IgG")  # Antigen-isotypes
-  lambda <- observed  # Simulated incidence rate per person-year
-  
-  # Biologic noise distribution
-  dlims <- rbind(
-    "IgG" = c(min = 0, max = 0.5)
-  )
-  
-  # Noise parameters
-  cond <- tibble(
-    antigen_iso = c("IgG"),
-    nu = c(0.5),  
-    eps = c(0.25),     
-    y.low = c(25),   
-    y.high = c(200000)  
-  )
-
-  # Calculate number of batches
-  n_batches <- ceiling(n_sim / batch_size)
-  
-  # Parallel processing: Use future_map for efficiency
-  if (parallel) {
-    plan(multisession, workers = max(1, future::availableCores() - 1))
-    
-    results <- future_map(1:n_batches, function(batch) {
-      # Run simulations within each batch
-      replicate(batch_size, expr = {
-        csdata <- sim.cs(
-          curve_params = dmcmc,
-          lambda = lambda,
-          n.smpl = nrep,
-          age_range = range,
-          antigen_isos = antibodies,
-          n.mc = 0,
-          renew_params = FALSE,  # Keep the same params for speed
-          add.noise = TRUE,
-          noise_limits = dlims,
-          format = "long"
-        )
-        
-        # Estimate seroincidence
-        est <- est.incidence(
-          pop_data = csdata,
-          curve_params = dmcmc,
-          noise_params = cond,
-          lambda_start = 0.1,
-          build_graph = FALSE,  # Disable visualization for speed
-          verbose = FALSE,
-          print_graph = FALSE,
-          antigen_isos = antibodies
-        )
-        
-        list(csdata = csdata, est1 = est)
-      }, simplify = FALSE)  # Keep list structure
-    }, .options = furrr_options(seed = TRUE))  # Set seed for reproducibility
-    
-    # Flatten nested list
-    results <- unlist(results, recursive = FALSE)
-    
-  } else {
-    # Sequential execution (for debugging or single-core use)
-    results <- lapply(1:n_sim, function(i) {
-      csdata <- sim.cs(
-        curve_params = dmcmc,
-        lambda = lambda,
-        n.smpl = nrep,
-        age_range = range,
-        antigen_isos = antibodies,
-        n.mc = 0,
-        renew_params = FALSE,  
-        add.noise = TRUE,
-        noise_limits = dlims,
-        format = "long"
-      )
-      
-      est <- est.incidence(
-        pop_data = csdata,
-        curve_params = dmcmc,
-        noise_params = cond,
-        lambda_start = 0.1,
-        build_graph = FALSE,  
-        verbose = FALSE,
-        print_graph = FALSE,
-        antigen_isos = antibodies
-      )
-      
-      list(csdata = csdata, est1 = est)
-    })
-  }
-  
-  return(results)
-}
-
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Run the optimized simulations with n_sim = 200
-set.seed(206251)
-
-results_100_1 <- simulate_seroincidence(nrep = 100, n_sim = 200,  
-                                        observed = min_half_sf3a_IgG, range = c(0,2),
-                                        batch_size = 40)
-
-results_100_2 <- simulate_seroincidence(nrep = 100, n_sim = 200,  
-                                        observed = min_half_sf3a_IgG, range = c(2,5),
-                                        batch_size = 40)
-
-results_200_1 <- simulate_seroincidence(nrep = 200, n_sim = 200,  
-                                        observed = min_half_sf3a_IgG, range = c(0,2),
-                                        batch_size = 40)
-
-results_200_2 <- simulate_seroincidence(nrep = 200, n_sim = 200,  
-                                        observed = min_half_sf3a_IgG, range = c(2,5),
-                                        batch_size = 40)
-
-results_300_1 <- simulate_seroincidence(nrep = 300, n_sim = 200,  
-                                        observed = min_half_sf3a_IgG, range = c(0,2),
-                                        batch_size = 40)
-
-results_300_2 <- simulate_seroincidence(nrep = 300, n_sim = 200,  
-                                        observed = min_half_sf3a_IgG, range = c(2,5),
-                                        batch_size = 40)
-
-results_400_1 <- simulate_seroincidence(nrep = 400, n_sim = 200,  
-                                        observed = min_half_sf3a_IgG, range = c(0,2),
-                                        batch_size = 40)
-
-results_400_2 <- simulate_seroincidence(nrep = 400, n_sim = 200,  
-                                        observed = min_half_sf3a_IgG, range = c(2,5),
-                                        batch_size = 40)
-
-# Stop parallel processing to free memory
-plan(sequential)
-```
-
-
-## Store each sample size in table
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
-  # Initialize an empty list to store the results
-  summary_results <- list()
-  
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:200) {
-    # Extract the summary for each result
-    result_summary <- summary(results_list[[i]]$est1)
-    
-    # Select the required columns
-    extracted_columns <- result_summary %>%
-      select(incidence.rate, SE, CI.lwr, CI.upr)
-    
-    # Add a column for the index (optional, for tracking)
-    extracted_columns <- extracted_columns %>%
-      mutate(index = i)
-    
-    # Append to the list
-    summary_results[[i]] <- extracted_columns
-  }
-  
-  # Combine all results into a single data frame
-  final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
-  
-  return(final_table)
-}
-
-# Example usage for different sample sizes
-final_table_100_1 <- generate_final_table(results_100_1, 100)
-final_table_100_2 <- generate_final_table(results_100_2, 100)
-
-final_table_200_1 <- generate_final_table(results_200_1, 200)
-final_table_200_2 <- generate_final_table(results_200_2, 200)
-
-final_table_300_1 <- generate_final_table(results_300_1, 300)
-final_table_300_2 <- generate_final_table(results_300_2, 300)
-
-final_table_400_1 <- generate_final_table(results_400_1, 400)
-final_table_400_2 <- generate_final_table(results_400_2, 400)
-```
-
-
-
-## Graphs of where the x axis is the sample size and the y axis is the empirical standard error
-
-Set the preliminary observed lambda as half of the minimum incidence rate from the four regions.
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# Function to calculate metrics for each table
-calculate_metrics <- function(data, sample_size) {
-  empirical_se <- sd(data$incidence.rate)
-  
-  
-  # Return a data frame with results
-  data.frame(
-    sample_size = sample_size,
-    empirical_se = empirical_se
-    
-  )
-}
-
-# Apply the function to each table
-metrics_100_1 <- calculate_metrics(final_table_100_1, 100)
-metrics_100_2 <- calculate_metrics(final_table_100_2, 100)
-
-metrics_200_1 <- calculate_metrics(final_table_200_1, 200)
-metrics_200_2 <- calculate_metrics(final_table_200_2, 200)
-
-metrics_300_1 <- calculate_metrics(final_table_300_1, 300)
-metrics_300_2 <- calculate_metrics(final_table_300_2, 300)
-
-metrics_400_1 <- calculate_metrics(final_table_400_1, 400)
-metrics_400_2 <- calculate_metrics(final_table_400_2, 400)
-
-
-# Combine the results into a single summary table
-summary_metrics_1 <- bind_rows(metrics_100_1, metrics_200_1, metrics_300_1,
-                               metrics_400_1)
-
-
-summary_metrics_2 <- bind_rows(metrics_100_2, metrics_200_2, metrics_300_2,
-                               metrics_400_2)
-
-# Add a column to distinguish the datasets
-summary_metrics_1 <- summary_metrics_1 %>%
-  mutate(Age_Group = "Age 0-2")
-
-summary_metrics_2 <- summary_metrics_2 %>%
-  mutate(Age_Group = "Age 2-5")
-
-# Combine both datasets into one
-summary_metrics_combined <- bind_rows(summary_metrics_1, summary_metrics_2)
-
-# Plot with color to differentiate age groups
-ggplot(summary_metrics_combined, aes(x = sample_size, y = empirical_se, color = Age_Group)) +
-  geom_line() +
-  geom_point() +
-  labs(
-    title = "Empirical Standard Error vs. Sample Size",
-    x = "Sample Size",
-    y = "Empirical Standard Error",
-    color = "Age Group"
-  ) +
-  theme_minimal()
-```
-
-
-## Table
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-summary_metrics_combined %>%
-  kable()
-```
\ No newline at end of file
diff --git a/simulation3.qmd b/simulation3.qmd
deleted file mode 100644
index da260b2..0000000
--- a/simulation3.qmd
+++ /dev/null
@@ -1,651 +0,0 @@
----
-title: "Simulation with the Smallest Lambda for sf2a IgG"
-format: 
-  pdf:
-    number-sections: true
-    number-depth: 2
-    number-offset: [0, 0]
-editor: visual
-output:
-  pdf_document:
-    orientation: landscape
-editor_options: 
-  chunk_output_type: console
----
-
-```{r setup, include=FALSE,echo=FALSE}
-library(knitr)
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-```{r, echo=FALSE, message=FALSE}
-library(future.apply)
-library(future)
-library(gridExtra)
-library(mgcv)  # For advanced GAM smoothing
-library(haven)
-library(knitr)
-library(plotly)
-library(kableExtra)
-library(tidyr)
-library(arsenal)
-library(dplyr)
-library(forcats)
-library(huxtable)
-library(magrittr)
-library(parameters)
-library(kableExtra)
-library(ggplot2)
-library(ggeasy)
-library(scales)
-library(plotly)
-library(patchwork)
-library(tidyverse)
-library(gtsummary)
-library(readxl)
-library(purrr)
-library(serocalculator)
-library(runjags)
-library(coda)
-library(ggmcmc)
-library(here)
-library(bayesplot)
-library(table1)
-library(tibble)
-library(furrr)
-library(dplyr)
-```
-
-
-# Vary simulation lambdas across the range in the real cross-sectional data
-
-## 1. Estimate the shigella incidence rate using the real cross-sectional Shigella data, 
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-##    separately for each geographic region in the data. 
-
-# load shigella data
-df <- read_excel("3.8.2024 Compiled Shigella datav2.xlsx", 
-                 sheet = "Compiled")
-
-
-# create function for generating specific region data
-create_xs_data <- function(df, filter_countries, filter_antigen_iso, value_col) {
-  df %>%
-    # First, create/rename columns
-    mutate(
-      id = sid,
-      Country = site_name,
-      study   = study_name,
-      age     = age,
-      antigen_iso = factor(isotype_name),
-      value   = {{ value_col }}  # value_col is specified by the user, e.g. n_ipab_MFI
-    ) %>%
-    # Then filter by the desired catchment(s) and antigen iso(s)
-    filter(
-      Country %in% filter_countries,
-      antigen_iso %in% filter_antigen_iso
-    ) %>%
-    # Create age categories
-    mutate(
-      ageCat = factor(case_when(
-        age < 5             ~ "<5",
-        age >= 5 & age <=15  ~ "5-15",
-        age > 15            ~ "16+"
-      ))
-    ) %>%
-    # Optionally select only the needed columns
-    select(id, Country, study, age, antigen_iso, value, ageCat)
-}
-
-# Cross-sectional data: MA USA, sf2a_IgG
-df_xs_USA_sf2a_IgG <- create_xs_data(
-  df,
-  filter_countries = c("MA USA"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_sf2aospbsa_MFI
-)
-
-# Cross-sectional data: Ghana, sf2a_IgG
-df_xs_Ghana_sf2a_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Ghana"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_sf2aospbsa_MFI
-)
-
-
-# Cross-sectional data: Niger, sf2a_IgG
-df_xs_Niger_sf2a_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Niger"),
-  filter_antigen_iso = c("IgG"),
-  value_col =n_sf2aospbsa_MFI
-)
-
-
-# Cross-sectional data: Sierra Leone, sf2a_IgG
-df_xs_Sierra_sf2a_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Sierra Leone"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_sf2aospbsa_MFI
-)
-
-# Create function to clearly define cross-sectional data
-prepare_df_for_serocalculator <- function(df, age_col = "age", value_col = "value") {
-  # Ensure correct column names
-  df <- df %>%
-    rename(age = all_of(age_col))
-  
-  # Assign attributes for serocalculator
-  attr(df, "age_var") <- "age"
-  attr(df, "value_var") <- value_col
-  
-  # Check if serocalculator recognizes attributes
-  get_value_var <- serocalculator:::get_value_var
-  detected_value_var <- get_value_var(df)
-  
-  if (is.null(detected_value_var)) {
-    warning("serocalculator did not detect the 'value' column. Check column naming.")
-  } else {
-    message("serocalculator recognized 'value' column: ", detected_value_var)
-  }
-  
-  return(df)
-}
-
-# Application
-df_xs_USA_sf2a_IgG <- prepare_df_for_serocalculator(df_xs_USA_sf2a_IgG)
-df_xs_Ghana_sf2a_IgG<- prepare_df_for_serocalculator(df_xs_Ghana_sf2a_IgG)
-df_xs_Niger_sf2a_IgG<- prepare_df_for_serocalculator(df_xs_Niger_sf2a_IgG)
-df_xs_Sierra_sf2a_IgG<- prepare_df_for_serocalculator(df_xs_Sierra_sf2a_IgG)
-```
-
-
-
-## Get parameters from longitudinal data
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to filter and manipulate Shigella data
-process_shigella_data <- function(data, study_filter, antigen) {
-  # Filter the data for the specific study
-  filtered_data <- data %>% 
-    filter(study_name == study_filter)
-  
-  # Capture the column name of the antigen
-  antigen_col <- ensym(antigen)
-  
-  # Manipulate and restructure the data
-  processed_data <- filtered_data %>%
-    select(isotype_name, sid, timepoint, `Actual day`, !!antigen_col) %>%
-    mutate(
-      index_id = sid,
-      antigen_iso = isotype_name,
-      visit = timepoint,
-      timeindays = `Actual day`,
-      result = !!antigen_col
-    ) %>%
-    group_by(index_id, antigen_iso) %>%
-    arrange(visit) %>%
-    mutate(visit_num = rank(visit, ties.method = "first")) %>%
-    ungroup() %>%
-    # Remove rows with NA in timeindays
-    filter(!is.na(timeindays))
-  
-  return(processed_data)
-}
-
-
-dL_clean <- process_shigella_data(data = df, study_filter = "SOSAR", antigen = n_sf2aospbsa_MFI)
-
-
-# Construct the path to "prep_data.r" using here
-prep_data_path <- here::here("R", "prep_data.r")
-prep_priors_path <- here::here("R", "prep_priors.R")
-
-# Source the file to load the prep_data function
-source(prep_data_path)
-source(prep_priors_path)
-
-#prepare data for modeline
-# Create 5 different longdata
-longdata <- prep_data(dL_clean)
-priors <- prep_priors(max_antigens = longdata$n_antigen_isos)
-
-
-nchains <- 4;                # nr of MC chains to run simultaneously
-nadapt  <- 1000;             # nr of iterations for adaptation
-nburnin <- 100;            # nr of iterations to use for burn-in
-nmc     <- 100;             # nr of samples in posterior chains
-niter   <- 200;            # nr of iterations for posterior sample
-nthin   <- round(niter/nmc); # thinning needed to produce nmc from niter
-
-tomonitor <- c("y0", "y1", "t1", "alpha", "shape");
-
-#This handles the seed to reproduce the results 
-initsfunction <- function(chain){
-  stopifnot(chain %in% (1:4)); # max 4 chains allowed...
-  .RNG.seed <- (1:4)[chain];
-  .RNG.name <- c("base::Wichmann-Hill","base::Marsaglia-Multicarry",
-                 "base::Super-Duper","base::Mersenne-Twister")[chain];
-  return(list(".RNG.seed"=.RNG.seed,".RNG.name"=.RNG.name));
-}
-
-file.mod <- here::here("inst", "extdata", "model.jags.r")
-
-set.seed(11325)
-jags.post <- run.jags(model = file.mod, 
-                      data = c(longdata, priors),
-                      inits = initsfunction,
-                      method = "parallel",
-                      adapt = nadapt,
-                      burnin = nburnin,
-                      thin = nthin,
-                      sample = nmc,
-                      n.chains = nchains,
-                      monitor = tomonitor,
-                      summarise = FALSE)
-
-mcmc_list <- as.mcmc.list(jags.post)
-
-mcmc_df <- ggs(mcmc_list)
-
-wide_predpar_df <- mcmc_df %>%
-  mutate(
-    parameter = sub("^(\\w+)\\[.*", "\\1", Parameter),
-    index_id = as.numeric(sub("^\\w+\\[(\\d+),.*", "\\1", Parameter)),
-    antigen_iso = as.numeric(sub("^\\w+\\[\\d+,(\\d+).*", "\\1", Parameter))
-  ) %>%
-  mutate(
-    index_id = factor(index_id, labels = c(unique(dL_clean$index_id), "newperson")),
-    antigen_iso = factor(antigen_iso, labels = unique(dL_clean$antigen_iso))) %>%
-  filter(index_id == "newperson") %>%
-  select(-Parameter) %>%
-  pivot_wider(names_from = "parameter", values_from="value") %>%
-  rowwise() %>%
-  droplevels() %>%
-  ungroup() %>%
-  rename(r = shape)
-
-# Assuming wide_predpar_df is your data frame
-curve_params <- wide_predpar_df
-
-# Set class and attributes for serocalculator
-class(curve_params) <- c("curve_params", class(curve_params))
-antigen_isos <- unique(curve_params$antigen_iso)
-attr(curve_params, "antigen_isos") <- antigen_isos
-
-curve_params<-curve_params%>%
-  mutate(
-    iter=Iteration)%>%
-  select(antigen_iso,iter,y0,y1,t1,alpha,r)
-
-curve_params_shigella<-curve_params
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-## Set noise 
-create_noise_df <- function(country) {
-  noise_df <- tibble(
-    antigen_iso = c("IgG"),
-    Country     = factor(c(country)),  # Use input argument for Country
-    y.low       = c(25),
-    eps         = c(0.25),
-    nu          = c(0.5),
-    y.high      = c(200000)
-  )
-  
-  return(noise_df)
-}
-
-noise_df_USA <- create_noise_df("MA USA")
-noise_df_Niger <- create_noise_df("Niger")
-noise_df_Sierra <- create_noise_df("Sierra Leone")
-noise_df_Ghana <- create_noise_df("Ghana")
-```
-
-## 2. Run simulations using: 
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# MA USA
-est_USA <- est.incidence(
-  pop_data = df_xs_USA_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_USA,
-  antigen_isos = c("IgG"),
-)
-
-# Ghana
-est_Ghana <- est.incidence(
-  pop_data = df_xs_Ghana_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Ghana,
-  antigen_isos = c("IgG"),
-)
-
-# Niger
-est_Niger <- est.incidence(
-  pop_data = df_xs_Niger_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Niger,
-  antigen_isos = c("IgG"),
-)
-
-# Sierra Leone
-est_Sierra <- est.incidence(
-  pop_data = df_xs_Sierra_sf2a_IgG,
-  curve_params = curve_params_shigella,
-  noise_params = noise_df_Sierra,
-  antigen_isos = c("IgG"),
-)
-
-# create table of incidence.rate of each region
-
-create_incidence_table <- function(...) {
-  # Capture input objects and their names
-  est_list <- list(...)
-  country_names <- names(est_list)
-  
-  # Extract incidence.rate from the summary() of each estimate object
-  incidence_rates <- sapply(est_list, function(x) summary(x)$incidence.rate)
-  
-  # Create a tidy tibble
-  incidence_table <- tibble(
-    Country = country_names,
-    Incidence_Rate = incidence_rates
-  )
-  
-  return(incidence_table)
-}
-# Example usage with four different country estimates:
-incidence_summary <- create_incidence_table(
-  USA = est_USA,
-  Ghana = est_Ghana,
-  Niger = est_Niger,
-  Sierra_Leone = est_Sierra
-)
-
-# Display the summary table
-print(incidence_summary)
-
-# 1) The median of the incidence rate estimates from step 1.
-# Get incidence rate estimates from each region (4 regions) and do median
-med_sf2a_IgG<-median(incidence_summary$Incidence_Rate)
-med_sf2a_IgG
-#0.710
-
-# 2) 2x the maximum incidence rate estimate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the maximum one and 2x
-max2_sf2a_IgG<-2*max(incidence_summary$Incidence_Rate)
-max2_sf2a_IgG
-# 1.878
-
-# 3) 1/2 the minimum incidence rate from step 1
-# Get incidence rate estimates from each region (4 regions) and get the minimum one and 1/2
-min_half_sf2a_IgG<-0.5*min(incidence_summary$Incidence_Rate)
-min_half_sf2a_IgG
-#0.252
-```
-These three values are preliminary observations of lambda from sf2a_IgG.
-
-We choose a lambda that is half of the minimum lambda from the four regions.
- 
-
-# Simulation (200times) with the smallest lambda
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-
-# Define the simulation function
-simulate_seroincidence <- function(nrep, n_sim, observed, range = NULL, batch_size = 40, parallel = TRUE) {
-  # Parameters
-  dmcmc <- curve_params_shigella  # Curve parameters
-  antibodies <- c("IgG")  # Antigen-isotypes
-  lambda <- observed  # Simulated incidence rate per person-year
-  
-  # Biologic noise distribution
-  dlims <- rbind(
-    "IgG" = c(min = 0, max = 0.5)
-  )
-  
-  # Noise parameters
-  cond <- tibble(
-    antigen_iso = c("IgG"),
-    nu = c(0.5),  
-    eps = c(0.25),     
-    y.low = c(25),   
-    y.high = c(200000)  
-  )
-
-  # Calculate number of batches
-  n_batches <- ceiling(n_sim / batch_size)
-  
-  # Parallel processing: Use future_map for efficiency
-  if (parallel) {
-    plan(multisession, workers = max(1, future::availableCores() - 1))
-    
-    results <- future_map(1:n_batches, function(batch) {
-      # Run simulations within each batch
-      replicate(batch_size, expr = {
-        csdata <- sim.cs(
-          curve_params = dmcmc,
-          lambda = lambda,
-          n.smpl = nrep,
-          age_range = range,
-          antigen_isos = antibodies,
-          n.mc = 0,
-          renew_params = FALSE,  # Keep the same params for speed
-          add.noise = TRUE,
-          noise_limits = dlims,
-          format = "long"
-        )
-        
-        # Estimate seroincidence
-        est <- est.incidence(
-          pop_data = csdata,
-          curve_params = dmcmc,
-          noise_params = cond,
-          lambda_start = 0.1,
-          build_graph = FALSE,  # Disable visualization for speed
-          verbose = FALSE,
-          print_graph = FALSE,
-          antigen_isos = antibodies
-        )
-        
-        list(csdata = csdata, est1 = est)
-      }, simplify = FALSE)  # Keep list structure
-    }, .options = furrr_options(seed = TRUE))  # Set seed for reproducibility
-    
-    # Flatten nested list
-    results <- unlist(results, recursive = FALSE)
-    
-  } else {
-    # Sequential execution (for debugging or single-core use)
-    results <- lapply(1:n_sim, function(i) {
-      csdata <- sim.cs(
-        curve_params = dmcmc,
-        lambda = lambda,
-        n.smpl = nrep,
-        age_range = range,
-        antigen_isos = antibodies,
-        n.mc = 0,
-        renew_params = FALSE,  
-        add.noise = TRUE,
-        noise_limits = dlims,
-        format = "long"
-      )
-      
-      est <- est.incidence(
-        pop_data = csdata,
-        curve_params = dmcmc,
-        noise_params = cond,
-        lambda_start = 0.1,
-        build_graph = FALSE,  
-        verbose = FALSE,
-        print_graph = FALSE,
-        antigen_isos = antibodies
-      )
-      
-      list(csdata = csdata, est1 = est)
-    })
-  }
-  
-  return(results)
-}
-
-```
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Run the optimized simulations with n_sim = 200
-set.seed(206251)
-
-results_100_1 <- simulate_seroincidence(nrep = 100, n_sim = 200,  
-                                        observed = min_half_sf2a_IgG, range = c(0,2),
-                                        batch_size = 40)
-
-results_100_2 <- simulate_seroincidence(nrep = 100, n_sim = 200,  
-                                        observed = min_half_sf2a_IgG, range = c(2,5),
-                                        batch_size = 40)
-
-results_200_1 <- simulate_seroincidence(nrep = 200, n_sim = 200,  
-                                        observed = min_half_sf2a_IgG, range = c(0,2),
-                                        batch_size = 40)
-
-results_200_2 <- simulate_seroincidence(nrep = 200, n_sim = 200,  
-                                        observed = min_half_sf2a_IgG, range = c(2,5),
-                                        batch_size = 40)
-
-results_300_1 <- simulate_seroincidence(nrep = 300, n_sim = 200,  
-                                        observed = min_half_sf2a_IgG, range = c(0,2),
-                                        batch_size = 40)
-
-results_300_2 <- simulate_seroincidence(nrep = 300, n_sim = 200,  
-                                        observed = min_half_sf2a_IgG, range = c(2,5),
-                                        batch_size = 40)
-
-results_400_1 <- simulate_seroincidence(nrep = 400, n_sim = 200,  
-                                        observed = min_half_sf2a_IgG, range = c(0,2),
-                                        batch_size = 40)
-
-results_400_2 <- simulate_seroincidence(nrep = 400, n_sim = 200,  
-                                        observed = min_half_sf2a_IgG, range = c(2,5),
-                                        batch_size = 40)
-
-# Stop parallel processing to free memory
-plan(sequential)
-```
-
-
-## Store each sample size in table
-```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
-# Define a function to generate final tables
-generate_final_table <- function(results_list, sample_size) {
-  # Initialize an empty list to store the results
-  summary_results <- list()
-  
-  # Loop through each of the 100 results and extract the required columns
-  for (i in 1:200) {
-    # Extract the summary for each result
-    result_summary <- summary(results_list[[i]]$est1)
-    
-    # Select the required columns
-    extracted_columns <- result_summary %>%
-      select(incidence.rate, SE, CI.lwr, CI.upr)
-    
-    # Add a column for the index (optional, for tracking)
-    extracted_columns <- extracted_columns %>%
-      mutate(index = i)
-    
-    # Append to the list
-    summary_results[[i]] <- extracted_columns
-  }
-  
-  # Combine all results into a single data frame
-  final_table <- bind_rows(summary_results) %>%
-    mutate(sample_size = sample_size) # Add sample size column for clarity
-  
-  return(final_table)
-}
-
-# Example usage for different sample sizes
-final_table_100_1 <- generate_final_table(results_100_1, 100)
-final_table_100_2 <- generate_final_table(results_100_2, 100)
-
-final_table_200_1 <- generate_final_table(results_200_1, 200)
-final_table_200_2 <- generate_final_table(results_200_2, 200)
-
-final_table_300_1 <- generate_final_table(results_300_1, 300)
-final_table_300_2 <- generate_final_table(results_300_2, 300)
-
-final_table_400_1 <- generate_final_table(results_400_1, 400)
-final_table_400_2 <- generate_final_table(results_400_2, 400)
-```
-
-
-
-## Graphs of where the x axis is the sample size and the y axis is the empirical standard error
-
-Set the preliminary observed lambda as half of the minimum incidence rate from the four regions.
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-# Function to calculate metrics for each table
-calculate_metrics <- function(data, sample_size) {
-  empirical_se <- sd(data$incidence.rate)
-  
-  
-  # Return a data frame with results
-  data.frame(
-    sample_size = sample_size,
-    empirical_se = empirical_se
-    
-  )
-}
-
-# Apply the function to each table
-metrics_100_1 <- calculate_metrics(final_table_100_1, 100)
-metrics_100_2 <- calculate_metrics(final_table_100_2, 100)
-
-metrics_200_1 <- calculate_metrics(final_table_200_1, 200)
-metrics_200_2 <- calculate_metrics(final_table_200_2, 200)
-
-metrics_300_1 <- calculate_metrics(final_table_300_1, 300)
-metrics_300_2 <- calculate_metrics(final_table_300_2, 300)
-
-metrics_400_1 <- calculate_metrics(final_table_400_1, 400)
-metrics_400_2 <- calculate_metrics(final_table_400_2, 400)
-
-
-# Combine the results into a single summary table
-summary_metrics_1 <- bind_rows(metrics_100_1, metrics_200_1, metrics_300_1,
-                               metrics_400_1)
-
-
-summary_metrics_2 <- bind_rows(metrics_100_2, metrics_200_2, metrics_300_2,
-                               metrics_400_2)
-
-# Add a column to distinguish the datasets
-summary_metrics_1 <- summary_metrics_1 %>%
-  mutate(Age_Group = "Age 0-2")
-
-summary_metrics_2 <- summary_metrics_2 %>%
-  mutate(Age_Group = "Age 2-5")
-
-# Combine both datasets into one
-summary_metrics_combined <- bind_rows(summary_metrics_1, summary_metrics_2)
-
-# Plot with color to differentiate age groups
-ggplot(summary_metrics_combined, aes(x = sample_size, y = empirical_se, color = Age_Group)) +
-  geom_line() +
-  geom_point() +
-  labs(
-    title = "Empirical Standard Error vs. Sample Size",
-    x = "Sample Size",
-    y = "Empirical Standard Error",
-    color = "Age Group"
-  ) +
-  theme_minimal()
-```
-
-
-## Table
-
-```{r,echo=FALSE, message=FALSE, warning=FALSE}
-summary_metrics_combined %>%
-  kable()
-```
\ No newline at end of file
diff --git a/vignettes/articles/_sec_outtakes-data-load.qmd b/vignettes/articles/_sec_outtakes-data-load.qmd
index 9e8e839..682df16 100644
--- a/vignettes/articles/_sec_outtakes-data-load.qmd
+++ b/vignettes/articles/_sec_outtakes-data-load.qmd
@@ -1,73 +1,153 @@
+---
+title: Summary of Incidence Rates Across Cross-Sectional Data Regions
+---
 
 ```{r}
-# Cross-sectional data: MA USA, ipab_IgG
-df_xs_USA_ipab_IgG <- create_xs_data(
-  df,
-  filter_countries = c("MA USA"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_ipab_MFI
-)
+#| label: setup
+#| message: false
+#| include: false
+#| echo: false
 
-# Cross-sectional data: MA USA, ipab_IgA
+library(knitr)
+library(future.apply)
+library(future)
+library(gridExtra)
+library(mgcv) # For advanced GAM smoothing
+library(haven)
+library(knitr)
+library(plotly)
+library(kableExtra)
+library(tidyr)
+library(arsenal)
+library(dplyr)
+library(forcats)
+library(huxtable)
+library(magrittr)
+library(parameters)
+library(kableExtra)
+library(ggplot2)
+library(ggeasy)
+library(scales)
+library(patchwork)
+library(tidyverse)
+library(gtsummary)
+library(readxl)
+library(purrr)
+library(serocalculator)
+library(serodynamics)
+library(runjags)
+library(coda)
+library(ggmcmc)
+library(here)
+library(bayesplot)
+library(table1)
+library(tibble)
+library(furrr)
+library(dplyr)
+library(shigella)
+devtools::load_all()
+```
 
-# Cross-sectional data: Ghana, ipab_IgG
-df_xs_Ghana_ipab_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Ghana"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_ipab_MFI
-)
+# Load the dataset, extract cross-sectional data from a specific region
 
-# Cross-sectional data: Ghana, ipab_IgA
+## Load the dataset
+```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
+#| label: "load shigella data"
+df <- 
+  fs::path_package(
+    package = "shigella",
+    "extdata/3.8.2024 Compiled Shigella datav2.xlsx"
+  ) |> 
+  read_excel(sheet = "Compiled")
+```
 
-# Cross-sectional data: Niger, ipab_IgG
-df_xs_Niger_ipab_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Niger"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_ipab_MFI
-)
+```{r}
+df_xs_strat <- 
+  df |> 
+  rename(antigen_iso = isotype_name,
+         Country = site_name) |> 
+  filter(Country != "Dhaka") |> 
+  as_pop_data(
+    antigen_isos = "IgG",
+    value = "n_ipab_MFI", ## Choose an antigen
+    age = "age",
+    id = "sid"
+  )
+```
+
+## Create table of incidence.rate of each region
+
+```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
+#| label: "Load the simulated longitudinal dataset"
+data("curve_params_shigella_ipab", package = "shigella")
+```
+
+
+```{r}
+# Generate noise for each region
+noise_df_USA <- create_noise_df("MA USA")
+noise_df_Niger <- create_noise_df("Niger")
+noise_df_Sierra <- create_noise_df("Sierra Leone")
+noise_df_Ghana <- create_noise_df("Ghana")
+
+noise_df_strat <- 
+  bind_rows(noise_df_USA,
+            noise_df_Niger,
+            noise_df_Sierra,
+            noise_df_Ghana) |> 
+  serocalculator::as_noise_params()
+```
 
-# Cross-sectional data: Niger, ipab_IgA
 
-# Cross-sectional data: Sierra Leone, ipab_IgG
-df_xs_Sierra_ipab_IgG <- create_xs_data(
-  df,
-  filter_countries = c("Sierra Leone"),
-  filter_antigen_iso = c("IgG"),
-  value_col = n_ipab_MFI
+```{r,echo=FALSE, message=FALSE, warning=FALSE}
+
+#| label: tbl-ests-strat
+#| tbl-cap: "Stratified estimates of shigella: IpaB"
+
+ests_strat <- est.incidence.by(
+  pop_data = df_xs_strat,
+  curve_param = curve_params_shigella_ipab,
+  noise_params = noise_df_strat,
+  strata = "Country",
+  noise_strata_varnames = "Country",
+  curve_strata_varnames = NULL,
+  antigen_isos = "IgG",
+  num_cores = 2,
+  verbose = FALSE
 )
 
-# Cross-sectional data: Sierra Leone, ipab_IgA
+incidence_summary <- summary(ests_strat)
 
-# Application
-df_xs_USA_ipab_IgG <- 
-  df_xs_USA_ipab_IgG |> 
-  as_pop_data(
-    value = "value",
-    id = "id",
-    age = "age"
-  )
-df_xs_Ghana_ipab_IgG <- 
-  df_xs_Ghana_ipab_IgG |>  
-  as_pop_data(
-    value = "value",
-    id = "id",
-    age = "age"
-  )
-df_xs_Niger_ipab_IgG <- 
-  df_xs_Niger_ipab_IgG |> 
-  as_pop_data(
-    value = "value",
-    id = "id",
-    age = "age"
-  )
-df_xs_Sierra_ipab_IgG <- 
-  df_xs_Sierra_ipab_IgG |> 
-  as_pop_data(
-    value = "value",
-    id = "id",
-    age = "age"
+# Select only the Country and incidence.rate columns to generate the table
+incidence_summary %>%
+  select(Country, incidence.rate) %>%
+  kable(caption = "Seroincidence Rates by Country")
+```
+
+```{r}
+#| label: tbl-ests-stat
+#| tbl-cap: "Descriptive statistics of shigella: IpaB"
+
+# Calculate median, 2x max, and 1/2 min in a single step
+incidence_stats <- incidence_summary %>%
+  summarise(
+    Median = median(incidence.rate),
+    Max2 = 2 * max(incidence.rate),
+    MinHalf = 0.5 * min(incidence.rate)
   )
 
+# Convert to long format for the summary table
+incidence_rate_summary <- incidence_stats %>%
+  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
+  mutate(Metric = recode(Metric,
+                         "Median" = "Median Incidence Rate",
+                         "Max2" = "2x Maximum Incidence Rate",
+                         "MinHalf" = "1/2 Minimum Incidence Rate"))
+
+kable(incidence_rate_summary, caption = "Incidence Rate Summary")
 ```
+
+```{r}
+incidence_stats_ipab<-incidence_stats 
+usethis::use_data(incidence_stats_ipab, overwrite = TRUE) # Saving this .rda file for use in the last .qmd file
+```
\ No newline at end of file
diff --git a/vignettes/articles/shigella-kinetics.qmd b/vignettes/articles/shigella-kinetics.qmd
index 0df4df6..0a700dc 100644
--- a/vignettes/articles/shigella-kinetics.qmd
+++ b/vignettes/articles/shigella-kinetics.qmd
@@ -44,12 +44,13 @@ library(table1)
 library(tibble)
 library(furrr)
 library(dplyr)
-# library(shigella)
+library(shigella)
 devtools::load_all()
 ```
 
+# Load the dataset, extract cross-sectional data from a specific region, and generate simulated longitudinal data for specific antigen-isotypes.
 
-
+## Load the dataset
 ```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
 #| label: "load shigella data"
 df <- 
@@ -60,7 +61,11 @@ df <-
   read_excel(sheet = "Compiled")
 ```
 
+
+<!--
+## Extract cross-sectional data from a specific region
 ```{r}
+#| label: "Need to review and fix"
 df_xs_strat <- 
   df |> 
   rename(antigen_iso = isotype_name,
@@ -74,23 +79,66 @@ df_xs_strat <-
       ) |> factor()
     ) |> 
   as_pop_data(
-    antigen_isos = "IgG",
+    antigen_isos = "IgG", # load all isotypes, IgG and IgA?
     value = "n_ipab_MFI",
     age = "age",
-    id = "sid"
+    id = "sid" # does not load sid here
   )
-  
+
+
 ```
 
+```{r}
+# Application
+# Cross-sectional data: MA USA, ipab_IgG
+df_xs_USA_ipab_IgG <- create_xs_data(
+  df,
+  filter_countries = c("MA USA"),
+  filter_antigen_iso = c("IgG"),
+  value_col = n_ipab_MFI
+) %>%
+  prepare_df_for_serocalculator()
+
+# Cross-sectional data: Ghana, ipab_IgG
+df_xs_Ghana_ipab_IgG<- create_xs_data(
+  df,
+  filter_countries = c("Ghana"),
+  filter_antigen_iso = c("IgG"),
+  value_col = n_ipab_MFI
+) %>%
+  prepare_df_for_serocalculator()
+
+# Cross-sectional data: Niger, ipab_IgG
+df_xs_Niger_ipab_IgG<- create_xs_data(
+  df,
+  filter_countries = c("Niger"),
+  filter_antigen_iso = c("IgG"),
+  value_col = n_ipab_MFI
+) %>%
+  prepare_df_for_serocalculator()
+
+# Cross-sectional data: Sierra Leone, ipab_IgG
+df_xs_Sierra_ipab_IgG<- create_xs_data(
+  df,
+  filter_countries = c("Sierra Leone"),
+  filter_antigen_iso = c("IgG"),
+  value_col = n_ipab_MFI
+) %>%
+  prepare_df_for_serocalculator()
+
 
+```
+-->
+I think the commented-out section above is not necessary in this .qmd file. I would like to generate simulated data from the longitudinal data for this .qmd file only. The section above will be handled in the _ses_outtakes-data-load.qmd file.
 
+## Generate simulated longitudinal data (choose your antigen)
 ```{r,echo=FALSE, message=FALSE, warning=FALSE,results='hide'}
 # Define a function to filter and manipulate Shigella data
 
 dL_clean <- process_shigella_data(
   data = df, 
   study_filter = "SOSAR", 
-  antigen = n_ipab_MFI
+  antigen = n_ipab_MFI ### Modify this input to use different antigens.
 ) |> 
   as_case_data(
     id_var = "index_id",
@@ -150,10 +198,19 @@ jags.post <- run.jags(
   summarise = FALSE
 )
 
-curve_params_shigella <- jags.post |> postprocess_jags_output()
-usethis::use_data(curve_params_shigella, overwrite = TRUE)
+
+#curve_params_shigella <- jags.post |> postprocess_jags_output() #somehow it doesn't work
+
+# Use Sam's package rather than my current R function
+curve_params_shigella_ipab <- jags.post |> serodynamics::postprocess_jags_output(
+  ids = attr(jags_prepped_data, "ids"),
+  antigen_isos = attr(jags_prepped_data, "antigens")
+)
+
+usethis::use_data(curve_params_shigella_ipab, overwrite = TRUE)
 ```
 
+## Exploring the simulated longitudinal data
 ```{r}
 
 curve_params_shigella |> 
@@ -165,3 +222,4 @@ curve_params_shigella |>
 curve_params_shigella |> autoplot()
 ```
 
+
diff --git a/vignettes/articles/simulation-shigella.qmd b/vignettes/articles/simulation-shigella.qmd
new file mode 100644
index 0000000..6ac0f37
--- /dev/null
+++ b/vignettes/articles/simulation-shigella.qmd
@@ -0,0 +1,157 @@
+---
+title: "Estimating Optimal Sample Size Through 300 Simulations: IpaB"
+---
+
+```{r}
+#| label: setup
+#| message: false
+#| include: false
+#| echo: false
+
+library(knitr)
+library(future.apply)
+library(future)
+library(gridExtra)
+library(mgcv) # For advanced GAM smoothing
+library(haven)
+library(knitr)
+library(plotly)
+library(kableExtra)
+library(tidyr)
+library(arsenal)
+library(dplyr)
+library(forcats)
+library(huxtable)
+library(magrittr)
+library(parameters)
+library(kableExtra)
+library(ggplot2)
+library(ggeasy)
+library(scales)
+library(patchwork)
+library(tidyverse)
+library(gtsummary)
+library(readxl)
+library(purrr)
+library(serocalculator)
+library(serodynamics)
+library(runjags)
+library(coda)
+library(ggmcmc)
+library(here)
+library(bayesplot)
+library(table1)
+library(tibble)
+library(furrr)
+library(dplyr)
+#library(shigella)
+devtools::load_all()
+```
+
+```{r}
+data("curve_params_shigella_ipab", package = "shigella")
+data("incidence_stats_ipab", package = "shigella")
+```
+
+
+# Simulation (300 iterations) with the smallest lambda (Requires HPC)
+```{r}
+plan(multicore)  # Works best on HPC
+
+# Define parameter sets
+sim_params <- list(
+  list(nrep = 100, range = c(0, 2)),
+  list(nrep = 100, range = c(2, 5)),
+  list(nrep = 200, range = c(0, 2)),
+  list(nrep = 200, range = c(2, 5)),
+  list(nrep = 300, range = c(0, 2)),
+  list(nrep = 300, range = c(2, 5)),
+  list(nrep = 400, range = c(0, 2)),
+  list(nrep = 400, range = c(2, 5))
+)
+
+# You probably need to use the HPC to run the code below to speed it up
+results_list <- lapply(sim_params, function(params) {
+  cat("Running simulation for nrep =", params$nrep, "range =", params$range, "\n")
+  simulate_seroincidence2(nrep = params$nrep, n_sim = 300, observed = incidence_stats_ipab$MinHalf, range = params$range) ### Choose the observed value based on the lambda you want to simulate
+})
+
+# Stop parallel processing
+plan(sequential)  # Return to sequential processing after execution
+```
+The simulation above should be run on the HPC rather than on a local computer.
+
+## Generate the final dataset for IpaB with the smallest lambda
+```{r}
+# Assign results to variables
+results_100_1 <- results_list[[1]]
+results_100_2 <- results_list[[2]]
+results_200_1 <- results_list[[3]]
+results_200_2 <- results_list[[4]]
+results_300_1 <- results_list[[5]]
+results_300_2 <- results_list[[6]]
+results_400_1 <- results_list[[7]]
+results_400_2 <- results_list[[8]]
+
+# For different sample sizes
+final_table_100_1 <- generate_final_table(results_100_1, 100)
+final_table_100_2 <- generate_final_table(results_100_2, 100)
+final_table_200_1 <- generate_final_table(results_200_1, 200)
+final_table_200_2 <- generate_final_table(results_200_2, 200)
+final_table_300_1 <- generate_final_table(results_300_1, 300)
+final_table_300_2 <- generate_final_table(results_300_2, 300)
+final_table_400_1 <- generate_final_table(results_400_1, 400)
+final_table_400_2 <- generate_final_table(results_400_2, 400)
+```
+
+```{r}
+# Prepare Data and Parameters 
+# List of all final tables
+final_tables <- list(final_table_100_1, final_table_100_2,
+                     final_table_200_1, final_table_200_2,
+                     final_table_300_1, final_table_300_2,
+                     final_table_400_1, final_table_400_2)
+
+# Corresponding sample sizes
+sample_sizes <- rep(c(100, 200, 300, 400), each = 2)
+# Corresponding age groups
+age_groups <- rep(c("Age 0-2", "Age 2-5"), times = 4)
+
+# Calculate Metrics in a Single Step 
+summary_metrics_combined <- pmap_dfr(
+  list(final_tables, sample_sizes, age_groups),
+  calculate_metrics
+)
+
+# Display the summary_metrics_combined as a table
+kable(summary_metrics_combined, 
+      caption = "Summary of Empirical Standard Errors by Sample Size and Age Group", 
+      digits = 3,  # Rounds numerical values to 3 decimal places
+      align = "c")  # Center align all columns
+```
+
+
+## Plot with color to differentiate age groups
+```{r}
+#| fig-cap: "IpaB-IgG with the Smallest Lambda"
+#| label: fig-se-IpaB-IgG-min-lambda
+ggplot(summary_metrics_combined, aes(x = sample_size, y = empirical_se, color = Age_Group)) +
+  geom_line() +
+  geom_point() +
+  labs(
+    x = "Sample Size",
+    y = "Empirical Standard Error",
+    color = "Age Group"
+  ) +
+  theme_minimal()
+```
+
+## Table: Smallest Lambda for IpaB-IgG
+```{r}
+summary_metrics_combined %>%
+  kable()
+```
+
+# Simulation (300 iterations) with the median lambda (Requires HPC)
+
+This part follows the exact same procedure as above, except for changing one input variable from the smallest lambda to the median lambda.
\ No newline at end of file