From 06dc8bb470318e9e74613d64fbcb5fbfd428d22b Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Thu, 17 Apr 2025 16:37:34 +0000
Subject: [PATCH 01/26] add diagnostic module

---
 R/misc.R                   |  30 ++++
 R/univariate_diagnostics.R | 305 +++++++++++++++++++++++++++++++++++++
 2 files changed, 335 insertions(+)
 create mode 100644 R/univariate_diagnostics.R
diff --git a/R/misc.R b/R/misc.R
index 3e9ecdee..8f9e5a7f 100644
--- a/R/misc.R
+++ b/R/misc.R
@@ -602,6 +602,36 @@ z_to_beta_se <- function(z, maf, n) {
   return(data.frame(beta = beta, se = se, maf = p))
 }
 
+#' Convert Z-scores to P-values
+#'
+#' This function calculates p-values from given z-scores using a two-tailed normal distribution.
+#' It supports vector input to process multiple z-scores simultaneously.
+#'
+#' @param z Numeric vector. The z-scores to be converted to p-values.
+#'
+#' @return A numeric vector of p-values corresponding to the input z-scores.
+#'
+#' @details
+#' The function uses the following formula to calculate p-values:
+#' p-value = 2 * Φ(-|z|)
+#' Where Φ is the cumulative distribution function of the standard normal distribution.
+#'
+#' @examples
+#' z <- c(2.5, -1.8, 3.2, 0.7)
+#' pvalues <- z_to_pvalue(z)
+#' print(pvalues)
+#'
+#' @note
+#' This function assumes that the input z-scores are from a two-tailed test and
+#' are normally distributed. It calculates two-sided p-values.
+#' For extremely large absolute z-scores, the resulting p-values may be computed as zero
+#' due to floating-point limitations in R. This occurs when the absolute z-score > 37.
+#'
+#' @export
+z_to_pvalue <- function(z) {
+  2 * pnorm(-abs(z))
+}
+                                                                                 
 #' Filter events based on provided context name pattern
 #'       
 #' @param events A character vector of event names 
diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R
new file mode 100644
index 00000000..739f7717
--- /dev/null
+++ b/R/univariate_diagnostics.R
@@ -0,0 +1,305 @@
+#' Extract SuSiE Results from Finemapping Data
+#'
+#' This function extracts the trimmed SuSiE results from a finemapping data object,
+#' typically obtained from a finemapping RDS file. It's designed to work with
+#' the method layer of these files, often named as 'method_RAISS_imputed', 'method',
+#' or 'method_NO_QC'. This layer is right under the study layer.
+#'
+#' @param con_data List. The method layer data from a finemapping RDS file.
+#'
+#' @return The trimmed SuSiE results (`$susie_result_trimmed`) if available,
+#' otherwise NULL.
+#'
+#' @details
+#' The function checks if the input data is empty or if the `$susie_result_trimmed`
+#' element is missing. It returns NULL in these cases. If `$susie_result_trimmed`
+#' exists and is not empty, it returns this element.
+#'
+#' @note
+#' This function is particularly useful when working with large datasets
+#' where not all method layers may contain valid SuSiE results or method layer.
+#'
+#' @export
+get_res <- function(con_data) {
+    if (length(con_data) == 0) return(NULL)
+    if (length(con_data$susie_result_trimmed) == 0) {
+        return(NULL)
+        print(paste("$susie_result_trimmed is null for", con_data))
+    } else {
+        return(con_data$susie_result_trimmed)
+    }
+}
+
+#' Process Credible Sets (CS) from Finemapping Results
+#'
+#' This function extracts and processes information for each Credible Set (CS) 
+#' from finemapping results, typically obtained from a finemapping RDS file.
+#'
+#' @param con_data List. The method layer data from a finemapping RDS file that is not empty.
+#' @param cs_names Character vector. Names of the Credible Sets, usually in the format "L_<number>".
+#' @param top_loci_table Data frame. The $top_loci layer data from the finemapping results.
+#'
+#' @return A data frame with one row per CS, containing the following columns:
+#'   \item{cs_name}{Name of the Credible Set}
+#'   \item{variants_per_cs}{Number of variants in the CS}
+#'   \item{top_variant}{ID of the variant with the highest PIP in the CS}
+#'   \item{top_variant_index}{Global index of the top variant}
+#'   \item{top_pip}{Highest Posterior Inclusion Probability (PIP) in the CS}
+#'   \item{top_z}{Z-score of the top variant}
+#'   \item{p_value}{P-value calculated from the top Z-score}
+#'   \item{cs_corr}{pairwise correlations of other CSs in this RDS with the CS of 
+#`    the current row, delimited by `|`, if there are more than one CSs in this RDS file}
+#'
+#' @details
+#' This function is designed to be used only when there is at least one Credible Set 
+#' in the finemapping results usually for a given study and block. It processes each CS, 
+#' extracting key information such as the top variant, its statistics, and 
+#' correlation information between multiple CS if available.
+#'
+#' @importFrom purrr map
+#' @importFrom dplyr bind_rows
+#' @importFrom tibble z_to_pvalue
+#'
+#' @export
+process_cs <- function(con_data, cs_names, top_loci_table) {  
+  results <- map(seq_along(cs_names), function(i) {
+    cs_name <- cs_names[i]
+    indices <- con_data$susie_result_trimmed$sets$cs[[cs_name]]
+    
+    # Get variants for this CS using the full variant_names list
+    cs_variants <- con_data$variant_names[indices]
+    cs_data <- top_loci_table[top_loci_table$variant_id %in% cs_variants, ]
+    top_row <- which.max(cs_data$pip)
+    
+    top_variant <- cs_data$variant_id[top_row]
+    # Find the global index of the top variant
+    top_variant_global_index = which(con_data$variant_names == top_variant)
+    top_pip <- cs_data$pip[top_row]
+    top_z <- cs_data$z[top_row]
+    p_value <- z_to_pvalue(top_z)
+    
+    # Extract cs_corr
+    cs_corr <- if (length(cs_names) > 1) {
+      con_data$susie_result_trimmed$cs_corr[i,]
+    } else {
+      NA  # Use NA for the second CS or when there's only one CS
+    }
+    
+    # Return results for this CS as a one-row data.frame
+    result = tibble::tibble(
+      cs_name = cs_name,
+      variants_per_cs = length(cs_variants),
+      top_variant = top_variant,
+      top_variant_index = top_variant_global_index,
+      top_pip = top_pip,
+      top_z = top_z,
+      p_value = p_value,
+      cs_corr = list(cs_corr)  # list column if cs_corr is a vector
+    )
+    return(result)
+  })
+  # Combine all tibbles into one data frame
+  final_result <- dplyr::bind_rows(results)
+  return(final_result)
+}
+
+#' Extract Information for Top Variant from Finemapping Results
+#'
+#' This function extracts information about the variant with the highest Posterior 
+#' Inclusion Probability (PIP) from finemapping results, typically used when no 
+#' Credible Sets (CS) are identified in the analysis.
+#'
+#' @param con_data List. The method layer data from a finemapping RDS file.
+#'
+#' @return A data frame with one row containing the following columns:
+#'   \item{cs_name}{NA (as no CS is identified)}
+#'   \item{variants_per_cs}{NA (as no CS is identified)}
+#'   \item{top_variant}{ID of the variant with the highest PIP}
+#'   \item{top_variant_index}{Index of the top variant in the original data}
+#'   \item{top_pip}{Highest Posterior Inclusion Probability (PIP)}
+#'   \item{top_z}{Z-score of the top variant}
+#'   \item{p_value}{P-value calculated from the top Z-score}
+#'   \item{cs_corr}{NA (as no CS correlation is available)}
+#'
+#' @details
+#' This function is designed to be used when no Credible Sets are identified in 
+#' the finemapping results, but information about the most significant variant 
+#' is still desired. It identifies the variant with the highest PIP and extracts 
+#' relevant statistical information.
+#'
+#' @note
+#' This function is particularly useful for capturing information about potentially 
+#' important variants that might be included in Credible Sets under different 
+#' analysis parameters or lower coverage. It maintains a structure similar to 
+#' the output of `process_cs()` for consistency in downstream analyses.
+#'
+#' @seealso 
+#' \code{\link{process_cs}} for processing when Credible Sets are present.
+#'
+#' @export
+extract_top_pip <- function(con_data) {
+  # Find the variant with the highest PIP
+  top_pip_index <- which.max(con_data$susie_result_trimmed$pip)
+  top_pip <- con_data$susie_result_trimmed$pip[top_pip_index]
+  top_variant = con_data$variant_names[top_pip_index]
+  top_z = con_data$sumstats$z[top_pip_index]
+  p_value = z_to_pvalue(top_z)
+  # Create the dataframe row
+  data.frame(
+    cs_name = NA_character_,
+    variants_per_cs = NA_integer_,
+    top_variant = top_variant,
+    top_variant_index = top_pip_index,
+    top_pip = top_pip,
+    top_z = top_z,
+    p_value = p_value,
+    cs_corr = NA_character_
+  )
+}
+
+#' Parse Credible Set Correlations from process_cs() Output
+#'
+#' This function takes the output from `process_cs()` and expands the `cs_corr` column
+#' into multiple columns, preserving the original order of correlations. It also
+#' calculates maximum and minimum correlation values for each Credible Set.
+#'
+#' @param df Data frame or data.table. The output from `process_cs()` function,
+#'           containing a `cs_corr` column with correlation information.
+#'
+#' @return A data.table with the original columns from the input, plus:
+#'   \item{cs_corr_1, cs_corr_2, ...}{Individual correlation values, with column names
+#'         based on their position in the original string}
+#'   \item{cs_corr_max}{Maximum absolute correlation value (excluding 1)}
+#'   \item{cs_corr_min}{Minimum absolute correlation value}
+#'
+#' @details
+#' The function splits the `cs_corr` column, which typically contains correlation
+#' values separated by '|', into individual columns. It preserves the order of
+#' these correlations, allowing for easy interpretation in a matrix-like format.
+#'
+#' @note
+#' - This function converts the input to a data.table if it isn't already one.
+#' - It handles cases where correlation values might be missing or not in the expected format.
+#' - The function assumes that correlation values of 1 represent self-correlations and excludes
+#'   these when calculating max and min correlations.
+#'
+#' @importFrom data.table setDT
+#'
+#' @export
+parse_cs_corr <- function(df) {
+  # Convert to data.table if not already
+  if (!is.data.table(df)) {
+    setDT(df)
+  }
+  
+  # Function to extract correlations
+  extract_correlations <- function(x) {
+    if(is.na(x) || x == "" || !grepl("\\|", x)) {
+      return(list(values = numeric(0), max_corr = NA, min_corr = NA))
+    }
+    
+    values <- as.numeric(unlist(strsplit(as.character(x), "\\|")))
+    
+    
+    if(length(values) == 0) {
+      return(list(values = numeric(0), max_corr = NA, min_corr = NA))
+    }
+    values_filtered <- abs(values[values != 1])
+    return(list(
+      values = values,
+      max_corr = max(abs(values_filtered), na.rm = TRUE),
+      min_corr = min(abs(values_filtered), na.rm = TRUE)
+    ))
+  }
+  
+  # Process correlations
+  processed_results <- lapply(df$cs_corr, extract_correlations)
+  
+  # Determine max number of correlations
+  max_corr_count <- max(sapply(processed_results, function(x) length(x$values)))
+  
+  # Create column names
+  col_names <- paste0("cs_corr_", 1:max_corr_count)
+  
+  # Prepare a list of correlation values for each column
+  corr_list <- lapply(1:max_corr_count, function(j) {
+    sapply(processed_results, function(x) {
+      if(length(x$values) >= j) x$values[j] else NA_real_
+    })
+  })
+  
+  # Add columns to the data.table
+  for(i in seq_along(col_names)) {
+    df[, (col_names[i]) := corr_list[[i]]]
+  }
+  
+  # Add max and min columns
+  df[, cs_corr_max := sapply(processed_results, `[[`, "max_corr")]
+  df[, cs_corr_min := sapply(processed_results, `[[`, "min_corr")]
+  
+  return(df)
+}
+
+#' Process Credible Set Information and Determine Updating Strategy
+#'
+#' This function categorizes Credible Sets (CS) within a study block into different 
+#' updating strategies based on their statistical properties and correlations.
+#'
+#' @param df Data frame. Contains information about Credible Sets for a specific study and block.
+#' @param high_corr_cols Character vector. Names of columns in df that represent high correlations.
+#'
+#' @return A modified data frame with additional columns attached to the diagnostic table:
+#'   \item{top_cs}{Logical. TRUE for the CS with the highest absolute Z-score.}
+#'   \item{tagged_cs}{Logical. TRUE for CS that are considered "tagged" based on p-value and correlation criteria.}
+#'   \item{method}{Character. The determined updating strategy ("BVSR", "SER", or "BCR").}
+#'
+#' @details
+#' This function performs the following steps:
+#' 1. Identifies the top CS based on the highest absolute Z-score.
+#' 2. Identifies tagged CS based on high p-value and high correlations.
+#' 3. Counts total, tagged, and remaining CS.
+#' 4. Determines the appropriate updating method based on these counts.
+#'
+#' The updating methods are:
+#' - BVSR (Bayesian Variable Selection Regression): Used when there's only one CS or all CS are accounted for.
+#' - SER (Single Effect Regression): Used when there are tagged CS but no remaining untagged CS.
+#' - BCR (Bayesian Conditional Regression): Used when there are remaining untagged CS.
+#'
+#' @note
+#' This function is part of a developing methodology for automatically handling 
+#' finemapping results. The thresholds and criteria used (e.g., p-value > 1e-4 for tagging) 
+#' are subject to refinement and may change in future versions.
+#'
+#' @importFrom dplyr case_when
+#'
+#' @export
+# cs_exp = cs_exp |> filter(!is.na(p_value))
+auto_decision <- function(df, high_corr_cols) {
+  # Identify top_cs
+  top_cs_index <- which.max(abs(df$top_z))
+  df$top_cs <- FALSE
+  df$top_cs[top_cs_index] <- TRUE
+
+  # Identify tagged_cs
+  df$tagged_cs <- sapply(1:nrow(df), function(i) {
+    if (df$top_cs[i]) return(FALSE)
+    if (df$p_value[i] > 1e-4) return(TRUE)
+    if (length(high_corr_cols) == 0) return(FALSE)
+    any(sapply(high_corr_cols, function(col) df[i, ..col] == 1))
+  })
+
+  # Count total and remaining CS
+  total_cs <- nrow(df)
+  tagged_cs_count <- sum(df$tagged_cs)
+  remaining_cs <- total_cs - 1 - tagged_cs_count
+
+  # Determine method
+  df$method <- case_when(
+    total_cs == 1 | (tagged_cs_count == 0 & remaining_cs == 0) ~ "BVSR",
+    remaining_cs == 0 ~ "SER",
+    remaining_cs > 0 ~ "BCR",
+    TRUE ~ NA_character_
+  )
+
+  return(df)
+}
\ No newline at end of file

From fb8dd37db5ebd03327eb67ca26fcbb3121745c43 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Thu, 17 Apr 2025 18:25:22 +0000
Subject: [PATCH 02/26] add the draft for comprehensive rss_analysis_pipeline

---
 R/univariate_diagnostics.R |  16 +++---
 R/univariate_pipeline.R    | 111 +++++++++++++++++++++++++++++++++++--
 2 files changed, 115 insertions(+), 12 deletions(-)

diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R
index 739f7717..dab979fa 100644
--- a/R/univariate_diagnostics.R
+++ b/R/univariate_diagnostics.R
@@ -20,7 +20,7 @@
 #' where not all method layers may contain valid SuSiE results or method layer.
 #'
 #' @export
-get_res <- function(con_data) {
+get_susie_result <- function(con_data) {
     if (length(con_data) == 0) return(NULL)
     if (length(con_data$susie_result_trimmed) == 0) {
         return(NULL)
@@ -61,7 +61,7 @@ get_res <- function(con_data) {
 #' @importFrom tibble z_to_pvalue
 #'
 #' @export
-process_cs <- function(con_data, cs_names, top_loci_table) {  
+extract_cs_info <- function(con_data, cs_names, top_loci_table) {  
   results <- map(seq_along(cs_names), function(i) {
     cs_name <- cs_names[i]
     indices <- con_data$susie_result_trimmed$sets$cs[[cs_name]]
@@ -131,13 +131,13 @@ process_cs <- function(con_data, cs_names, top_loci_table) {
 #' This function is particularly useful for capturing information about potentially 
 #' important variants that might be included in Credible Sets under different 
 #' analysis parameters or lower coverage. It maintains a structure similar to 
-#' the output of `process_cs()` for consistency in downstream analyses.
+#' the output of `extract_cs_info()` for consistency in downstream analyses.
 #'
 #' @seealso 
-#' \code{\link{process_cs}} for processing when Credible Sets are present.
+#' \code{\link{extract_cs_info}} for processing when Credible Sets are present.
 #'
 #' @export
-extract_top_pip <- function(con_data) {
+extract_top_pip_info <- function(con_data) {
   # Find the variant with the highest PIP
   top_pip_index <- which.max(con_data$susie_result_trimmed$pip)
   top_pip <- con_data$susie_result_trimmed$pip[top_pip_index]
@@ -157,13 +157,13 @@ extract_top_pip <- function(con_data) {
   )
 }
 
-#' Parse Credible Set Correlations from process_cs() Output
+#' Parse Credible Set Correlations from extract_cs_info() Output
 #'
-#' This function takes the output from `process_cs()` and expands the `cs_corr` column
+#' This function takes the output from `extract_cs_info()` and expands the `cs_corr` column
 #' into multiple columns, preserving the original order of correlations. It also
 #' calculates maximum and minimum correlation values for each Credible Set.
 #'
-#' @param df Data frame or data.table. The output from `process_cs()` function,
+#' @param df Data frame or data.table. The output from `extract_cs_info()` function,
 #'           containing a `cs_corr` column with correlation information.
 #'
 #' @return A data.table with the original columns from the input, plus:
diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index a80ba03c..40e5fc55 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -183,7 +183,7 @@ rss_analysis_pipeline <- function(
       coverage = c(0.95, 0.7, 0.5), signal_cutoff = 0.025
     ),
     impute = TRUE, impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01),
-    pip_cutoff_to_skip = 0, remove_indels = FALSE, comment_string = "#") {
+    pip_cutoff_to_skip = 0, remove_indels = FALSE, comment_string = "#", diagnostics = FALSE) {
   res <- list()
   rss_input <- load_rss_data(
     sumstat_path = sumstat_path, column_file_path = column_file_path,
@@ -252,14 +252,117 @@ rss_analysis_pipeline <- function(
     }
   }
   if (impute & !is.null(qc_method)) {
-    method_name <- paste0(toupper(qc_method), "_RAISS_imputed")
+    method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
   } else if (!impute & !is.null(qc_method)) {
-    method_name <- toupper(qc_method)
+    method_name <- paste0(finemapping_method, "_", toupper(qc_method))
   } else {
-    method_name <- "NO_QC"
+    method_name <- paste0(finemapping_method, "_", "NO_QC")
   }
   result_list <- list()
   result_list[[method_name]] <- res
   result_list[["rss_data_analyzed"]] <- sumstats
+  
+
+  if (diagnostics) {
+    if (length(res) > 0) { 
+        bvsr_res = get_susie_result(res)
+        bvsr_cs_num = if(!is.null(bvsr_res)) length(bvsr_res$sets$cs) else NULL
+        if (isTRUE(bvsr_cs_num > 0)) { # have CS
+            # Get the names of the credible sets
+            cs_names_bvsr = names(bvsr_res$sets$cs)
+            block_cs_metrics = process_cs(data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci)
+        } else { # no CS
+            bvsr_res = get_susie_result(res)
+            if (sum(bvsr_res$pip > signal_cutoff) > 0) {
+                block_cs_metrics = extract_top_pip(res)
+            }
+        }
+    }
+    # sensitive check for additional analyses
+    if (block_cs_metrics$variants_per_cs > 1) {
+      block_cs_metrics <- block_cs_metrics %>%
+        group_by(study, block) %>%
+        mutate(max_cs_corr_study_block = if(all(is.na(cs_corr_max))) {
+        NA_real_  # Use NA instead of -Inf when all values are NA
+        } else {
+        max(cs_corr_max, na.rm = TRUE)
+        }) %>%
+        ungroup()
+      if (block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5) {
+        finemapping_method <- "bayesian_conditional_regression"
+        pri_coverage <- finemapping_opts$coverage[1]
+        sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL
+        bcr <- susie_rss_pipeline(sumstats, LD_mat,
+          n = n, var_y = var_y,
+          L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
+          analysis_method = finemapping_method,
+          coverage = pri_coverage,
+          secondary_coverage = sec_coverage,
+          signal_cutoff = finemapping_opts$signal_cutoff
+        )
+        if (!is.null(qc_method)) {
+          bcr$outlier_number <- qc_results$outlier_number
+        }
+        if (impute & !is.null(qc_method)) {
+            method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
+        } else if (!impute & !is.null(qc_method)) {
+            method_name <- paste0(finemapping_method, "_", toupper(qc_method))
+        } else {
+            method_name <- paste0(finemapping_method, "_", "NO_QC")
+        }
+        result_list[[method_name]] <- bcr
+        
+        finemapping_method <- "single_effect"
+        sumstats <- preprocess_results$sumstats
+        LD_mat <- preprocess_results$LD_mat
+        ser <- susie_rss_pipeline(sumstats, LD_mat,
+          n = n, var_y = var_y,
+          L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
+          analysis_method = finemapping_method,
+          coverage = pri_coverage,
+          secondary_coverage = sec_coverage,
+          signal_cutoff = finemapping_opts$signal_cutoff
+        )
+        qc_method <- NULL
+        impute <- FALSE
+        # if (!is.null(qc_method)) {
+        #   ser$outlier_number <- qc_results$outlier_number
+        # }
+        if (impute & !is.null(qc_method)) {
+            method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
+        } else if (!impute & !is.null(qc_method)) {
+            method_name <- paste0(finemapping_method, "_", toupper(qc_method))
+        } else {
+            method_name <- paste0(finemapping_method, "_", "NO_QC")
+        }
+        result_list[[method_name]] <- ser
+      }
+    } else { # variants_per_cs > 1 or NA
+      finemapping_method <- "single_effect"
+      sumstats <- preprocess_results$sumstats
+      LD_mat <- preprocess_results$LD_mat
+      ser <- susie_rss_pipeline(sumstats, LD_mat,
+        n = n, var_y = var_y,
+        L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
+        analysis_method = finemapping_method,
+        coverage = pri_coverage,
+        secondary_coverage = sec_coverage,
+        signal_cutoff = finemapping_opts$signal_cutoff
+      )
+      qc_method <- NULL
+      impute <- FALSE
+      # if (!is.null(qc_method)) {
+      #   ser$outlier_number <- qc_results$outlier_number
+      # }
+      if (impute & !is.null(qc_method)) {
+          method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
+      } else if (!impute & !is.null(qc_method)) {
+          method_name <- paste0(finemapping_method, "_", toupper(qc_method))
+      } else {
+          method_name <- paste0(finemapping_method, "_", "NO_QC")
+      }
+      result_list[[method_name]] <- ser
+    } 
   return(result_list)
+  }
 }

From e9b02b41650fc4f576095ce0df97d305dd3c254f Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Thu, 17 Apr 2025 18:33:09 +0000
Subject: [PATCH 03/26] output diagnostic table

---
 R/univariate_pipeline.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 40e5fc55..876b4be2 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -262,7 +262,7 @@ rss_analysis_pipeline <- function(
   result_list[[method_name]] <- res
   result_list[["rss_data_analyzed"]] <- sumstats
   
-
+  block_cs_metrics <- list()
   if (diagnostics) {
     if (length(res) > 0) { 
         bvsr_res = get_susie_result(res)
@@ -363,6 +363,6 @@ rss_analysis_pipeline <- function(
       }
       result_list[[method_name]] <- ser
     } 
-  return(result_list)
+  return(result_list = result_list, block_cs_metrics = block_cs_metrics)
   }
 }

From b0b5e59c637a36fed07b1eff5b0be8b15a885c7c Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@users.noreply.github.com>
Date: Thu, 17 Apr 2025 20:59:40 +0000
Subject: [PATCH 04/26] Update documentation

---
 NAMESPACE                    |  8 +++++++
 man/auto_decision.Rd         | 40 ++++++++++++++++++++++++++++++++++
 man/extract_cs_info.Rd       | 28 ++++++++++++++++++++++++
 man/extract_top_pip_info.Rd  | 42 ++++++++++++++++++++++++++++++++++++
 man/get_susie_result.Rd      | 30 ++++++++++++++++++++++++++
 man/parse_cs_corr.Rd         | 35 ++++++++++++++++++++++++++++++
 man/rss_analysis_pipeline.Rd |  3 ++-
 man/z_to_pvalue.Rd           | 35 ++++++++++++++++++++++++++++++
 8 files changed, 220 insertions(+), 1 deletion(-)
 create mode 100644 man/auto_decision.Rd
 create mode 100644 man/extract_cs_info.Rd
 create mode 100644 man/extract_top_pip_info.Rd
 create mode 100644 man/get_susie_result.Rd
 create mode 100644 man/parse_cs_corr.Rd
 create mode 100644 man/z_to_pvalue.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 9f3d3773..7dc90a82 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,6 +5,7 @@ export(QUAIL_rank_score_pipeline)
 export(adjust_susie_weights)
 export(align_variant_names)
 export(allele_qc)
+export(auto_decision)
 export(batch_load_twas_weights)
 export(bayes_a_rss_weights)
 export(bayes_a_weights)
@@ -28,6 +29,8 @@ export(ctwas_ld_loader)
 export(dentist)
 export(dentist_single_window)
 export(enet_weights)
+export(extract_cs_info)
+export(extract_top_pip_info)
 export(filter_invalid_summary_stat)
 export(filter_mixture_components)
 export(filter_variants_by_ld_reference)
@@ -38,6 +41,7 @@ export(fsusie_wrapper)
 export(gbayes_rss)
 export(get_ctwas_meta_data)
 export(get_nested_element)
+export(get_susie_result)
 export(glmnet_weights)
 export(harmonize_twas)
 export(lasso_weights)
@@ -70,6 +74,7 @@ export(multicontext_ld_clumping)
 export(multigene_udr)
 export(multivariate_analysis_pipeline)
 export(mvsusie_weights)
+export(parse_cs_corr)
 export(parse_region)
 export(parse_snp_info)
 export(parse_variant_id)
@@ -107,6 +112,7 @@ export(twas_z)
 export(univariate_analysis_pipeline)
 export(wald_test_pval)
 export(xqtl_enrichment_wrapper)
+export(z_to_pvalue)
 import(vroom)
 importFrom(IRanges,IRanges)
 importFrom(IRanges,end)
@@ -120,6 +126,7 @@ importFrom(bigstatsr,FBM.code256)
 importFrom(coloc,coloc.bf_bf)
 importFrom(data.table,as.data.table)
 importFrom(data.table,fread)
+importFrom(data.table,setDT)
 importFrom(data.table,setnames)
 importFrom(doFuture,registerDoFuture)
 importFrom(dplyr,"%>%")
@@ -127,6 +134,7 @@ importFrom(dplyr,across)
 importFrom(dplyr,arrange)
 importFrom(dplyr,as_tibble)
 importFrom(dplyr,bind_rows)
+importFrom(dplyr,case_when)
 importFrom(dplyr,distinct)
 importFrom(dplyr,everything)
 importFrom(dplyr,filter)
diff --git a/man/auto_decision.Rd b/man/auto_decision.Rd
new file mode 100644
index 00000000..65cae364
--- /dev/null
+++ b/man/auto_decision.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/univariate_diagnostics.R
+\name{auto_decision}
+\alias{auto_decision}
+\title{Process Credible Set Information and Determine Updating Strategy}
+\usage{
+auto_decision(df, high_corr_cols)
+}
+\arguments{
+\item{df}{Data frame. Contains information about Credible Sets for a specific study and block.}
+
+\item{high_corr_cols}{Character vector. Names of columns in df that represent high correlations.}
+}
+\value{
+A modified data frame with additional columns attached to the diagnostic table:
+  \item{top_cs}{Logical. TRUE for the CS with the highest absolute Z-score.}
+  \item{tagged_cs}{Logical. TRUE for CS that are considered "tagged" based on p-value and correlation criteria.}
+  \item{method}{Character. The determined updating strategy ("BVSR", "SER", or "BCR").}
+}
+\description{
+This function categorizes Credible Sets (CS) within a study block into different 
+updating strategies based on their statistical properties and correlations.
+}
+\details{
+This function performs the following steps:
+1. Identifies the top CS based on the highest absolute Z-score.
+2. Identifies tagged CS based on high p-value and high correlations.
+3. Counts total, tagged, and remaining CS.
+4. Determines the appropriate updating method based on these counts.
+
+The updating methods are:
+- BVSR (Bayesian Variable Selection Regression): Used when there's only one CS or all CS are accounted for.
+- SER (Single Effect Regression): Used when there are tagged CS but no remaining untagged CS.
+- BCR (Bayesian Conditional Regression): Used when there are remaining untagged CS.
+}
+\note{
+This function is part of a developing methodology for automatically handling 
+finemapping results. The thresholds and criteria used (e.g., p-value > 1e-4 for tagging) 
+are subject to refinement and may change in future versions.
+}
diff --git a/man/extract_cs_info.Rd b/man/extract_cs_info.Rd
new file mode 100644
index 00000000..b91c8d71
--- /dev/null
+++ b/man/extract_cs_info.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/univariate_diagnostics.R
+\name{extract_cs_info}
+\alias{extract_cs_info}
+\title{Process Credible Sets (CS) from Finemapping Results}
+\usage{
+extract_cs_info(con_data, cs_names, top_loci_table)
+}
+\arguments{
+\item{con_data}{List. The method layer data from a finemapping RDS file that is not empty.}
+
+\item{cs_names}{Character vector. Names of the Credible Sets, usually in the format "L_<number>".}
+
+\item{top_loci_table}{Data frame. The $top_loci layer data from the finemapping results.}
+}
+\value{
+
+}
+\description{
+This function extracts and processes information for each Credible Set (CS) 
+from finemapping results, typically obtained from a finemapping RDS file.
+}
+\details{
+This function is designed to be used only when there is at least one Credible Set 
+in the finemapping results usually for a given study and block. It processes each CS, 
+extracting key information such as the top variant, its statistics, and 
+correlation information between multiple CS if available.
+}
diff --git a/man/extract_top_pip_info.Rd b/man/extract_top_pip_info.Rd
new file mode 100644
index 00000000..2e319b89
--- /dev/null
+++ b/man/extract_top_pip_info.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/univariate_diagnostics.R
+\name{extract_top_pip_info}
+\alias{extract_top_pip_info}
+\title{Extract Information for Top Variant from Finemapping Results}
+\usage{
+extract_top_pip_info(con_data)
+}
+\arguments{
+\item{con_data}{List. The method layer data from a finemapping RDS file.}
+}
+\value{
+A data frame with one row containing the following columns:
+  \item{cs_name}{NA (as no CS is identified)}
+  \item{variants_per_cs}{NA (as no CS is identified)}
+  \item{top_variant}{ID of the variant with the highest PIP}
+  \item{top_variant_index}{Index of the top variant in the original data}
+  \item{top_pip}{Highest Posterior Inclusion Probability (PIP)}
+  \item{top_z}{Z-score of the top variant}
+  \item{p_value}{P-value calculated from the top Z-score}
+  \item{cs_corr}{NA (as no CS correlation is available)}
+}
+\description{
+This function extracts information about the variant with the highest Posterior 
+Inclusion Probability (PIP) from finemapping results, typically used when no 
+Credible Sets (CS) are identified in the analysis.
+}
+\details{
+This function is designed to be used when no Credible Sets are identified in 
+the finemapping results, but information about the most significant variant 
+is still desired. It identifies the variant with the highest PIP and extracts 
+relevant statistical information.
+}
+\note{
+This function is particularly useful for capturing information about potentially 
+important variants that might be included in Credible Sets under different 
+analysis parameters or lower coverage. It maintains a structure similar to 
+the output of `extract_cs_info()` for consistency in downstream analyses.
+}
+\seealso{
+\code{\link{extract_cs_info}} for processing when Credible Sets are present.
+}
diff --git a/man/get_susie_result.Rd b/man/get_susie_result.Rd
new file mode 100644
index 00000000..25706df1
--- /dev/null
+++ b/man/get_susie_result.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/univariate_diagnostics.R
+\name{get_susie_result}
+\alias{get_susie_result}
+\title{Extract SuSiE Results from Finemapping Data}
+\usage{
+get_susie_result(con_data)
+}
+\arguments{
+\item{con_data}{List. The method layer data from a finemapping RDS file.}
+}
+\value{
+The trimmed SuSiE results (`$susie_result_trimmed`) if available,
+otherwise NULL.
+}
+\description{
+This function extracts the trimmed SuSiE results from a finemapping data object,
+typically obtained from a finemapping RDS file. It's designed to work with
+the method layer of these files, often named as 'method_RAISS_imputed', 'method',
+or 'method_NO_QC'. This layer is right under the study layer.
+}
+\details{
+The function checks if the input data is empty or if the `$susie_result_trimmed`
+element is missing. It returns NULL in these cases. If `$susie_result_trimmed`
+exists and is not empty, it returns this element.
+}
+\note{
+This function is particularly useful when working with large datasets
+where not all method layers may contain valid SuSiE results or method layer.
+}
diff --git a/man/parse_cs_corr.Rd b/man/parse_cs_corr.Rd
new file mode 100644
index 00000000..2d4d5568
--- /dev/null
+++ b/man/parse_cs_corr.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/univariate_diagnostics.R
+\name{parse_cs_corr}
+\alias{parse_cs_corr}
+\title{Parse Credible Set Correlations from extract_cs_info() Output}
+\usage{
+parse_cs_corr(df)
+}
+\arguments{
+\item{df}{Data frame or data.table. The output from `extract_cs_info()` function,
+containing a `cs_corr` column with correlation information.}
+}
+\value{
+A data.table with the original columns from the input, plus:
+  \item{cs_corr_1, cs_corr_2, ...}{Individual correlation values, with column names
+        based on their position in the original string}
+  \item{cs_corr_max}{Maximum absolute correlation value (excluding 1)}
+  \item{cs_corr_min}{Minimum absolute correlation value}
+}
+\description{
+This function takes the output from `extract_cs_info()` and expands the `cs_corr` column
+into multiple columns, preserving the original order of correlations. It also
+calculates maximum and minimum correlation values for each Credible Set.
+}
+\details{
+The function splits the `cs_corr` column, which typically contains correlation
+values separated by '|', into individual columns. It preserves the order of
+these correlations, allowing for easy interpretation in a matrix-like format.
+}
+\note{
+- This function converts the input to a data.table if it isn't already one.
+- It handles cases where correlation values might be missing or not in the expected format.
+- The function assumes that correlation values of 1 represent self-correlations and excludes
+  these when calculating max and min correlations.
+}
diff --git a/man/rss_analysis_pipeline.Rd b/man/rss_analysis_pipeline.Rd
index 2f3b8c4d..4883920a 100644
--- a/man/rss_analysis_pipeline.Rd
+++ b/man/rss_analysis_pipeline.Rd
@@ -23,7 +23,8 @@ rss_analysis_pipeline(
   impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01),
   pip_cutoff_to_skip = 0,
   remove_indels = FALSE,
-  comment_string = "#"
+  comment_string = "#",
+  diagnostics = FALSE
 )
 }
 \arguments{
diff --git a/man/z_to_pvalue.Rd b/man/z_to_pvalue.Rd
new file mode 100644
index 00000000..fd47ae03
--- /dev/null
+++ b/man/z_to_pvalue.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/misc.R
+\name{z_to_pvalue}
+\alias{z_to_pvalue}
+\title{Convert Z-scores to P-values}
+\usage{
+z_to_pvalue(z)
+}
+\arguments{
+\item{z}{Numeric vector. The z-scores to be converted to p-values.}
+}
+\value{
+A numeric vector of p-values corresponding to the input z-scores.
+}
+\description{
+This function calculates p-values from given z-scores using a two-tailed normal distribution.
+It supports vector input to process multiple z-scores simultaneously.
+}
+\details{
+The function uses the following formula to calculate p-values:
+p-value = 2 * Φ(-|z|)
+Where Φ is the cumulative distribution function of the standard normal distribution.
+}
+\note{
+This function assumes that the input z-scores are from a two-tailed test and
+are normally distributed. It calculates two-sided p-values.
+For extremely large absolute z-scores, the resulting p-values may be computed as zero
+due to floating-point limitations in R. This occurs when the absolute z-score > 37.
+}
+\examples{
+z <- c(2.5, -1.8, 3.2, 0.7)
+pvalues <- z_to_pvalue(z)
+print(pvalues)
+
+}

From 02839b3d131c9e9e575186b69942651e7ff715b2 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Fri, 18 Apr 2025 19:08:53 +0000
Subject: [PATCH 05/26] update the document and temporarily store this version

---
 NAMESPACE                    |   8 +++
 R/univariate_diagnostics.R   |   5 +-
 R/univariate_pipeline.R      | 126 ++++++++++++++++++-----------------
 Untitled.ipynb               |  64 ++++++++++++++++++
 man/auto_decision.Rd         |  40 +++++++++++
 man/extract_cs_info.Rd       |  37 ++++++++++
 man/extract_top_pip_info.Rd  |  42 ++++++++++++
 man/get_susie_result.Rd      |  30 +++++++++
 man/parse_cs_corr.Rd         |  35 ++++++++++
 man/rss_analysis_pipeline.Rd |   3 +-
 man/z_to_pvalue.Rd           |  35 ++++++++++
 11 files changed, 359 insertions(+), 66 deletions(-)
 create mode 100644 Untitled.ipynb
 create mode 100644 man/auto_decision.Rd
 create mode 100644 man/extract_cs_info.Rd
 create mode 100644 man/extract_top_pip_info.Rd
 create mode 100644 man/get_susie_result.Rd
 create mode 100644 man/parse_cs_corr.Rd
 create mode 100644 man/z_to_pvalue.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 9f3d3773..7dc90a82 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,6 +5,7 @@ export(QUAIL_rank_score_pipeline)
 export(adjust_susie_weights)
 export(align_variant_names)
 export(allele_qc)
+export(auto_decision)
 export(batch_load_twas_weights)
 export(bayes_a_rss_weights)
 export(bayes_a_weights)
@@ -28,6 +29,8 @@ export(ctwas_ld_loader)
 export(dentist)
 export(dentist_single_window)
 export(enet_weights)
+export(extract_cs_info)
+export(extract_top_pip_info)
 export(filter_invalid_summary_stat)
 export(filter_mixture_components)
 export(filter_variants_by_ld_reference)
@@ -38,6 +41,7 @@ export(fsusie_wrapper)
 export(gbayes_rss)
 export(get_ctwas_meta_data)
 export(get_nested_element)
+export(get_susie_result)
 export(glmnet_weights)
 export(harmonize_twas)
 export(lasso_weights)
@@ -70,6 +74,7 @@ export(multicontext_ld_clumping)
 export(multigene_udr)
 export(multivariate_analysis_pipeline)
 export(mvsusie_weights)
+export(parse_cs_corr)
 export(parse_region)
 export(parse_snp_info)
 export(parse_variant_id)
@@ -107,6 +112,7 @@ export(twas_z)
 export(univariate_analysis_pipeline)
 export(wald_test_pval)
 export(xqtl_enrichment_wrapper)
+export(z_to_pvalue)
 import(vroom)
 importFrom(IRanges,IRanges)
 importFrom(IRanges,end)
@@ -120,6 +126,7 @@ importFrom(bigstatsr,FBM.code256)
 importFrom(coloc,coloc.bf_bf)
 importFrom(data.table,as.data.table)
 importFrom(data.table,fread)
+importFrom(data.table,setDT)
 importFrom(data.table,setnames)
 importFrom(doFuture,registerDoFuture)
 importFrom(dplyr,"%>%")
@@ -127,6 +134,7 @@ importFrom(dplyr,across)
 importFrom(dplyr,arrange)
 importFrom(dplyr,as_tibble)
 importFrom(dplyr,bind_rows)
+importFrom(dplyr,case_when)
 importFrom(dplyr,distinct)
 importFrom(dplyr,everything)
 importFrom(dplyr,filter)
diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R
index dab979fa..cee9bc70 100644
--- a/R/univariate_diagnostics.R
+++ b/R/univariate_diagnostics.R
@@ -47,8 +47,8 @@ get_susie_result <- function(con_data) {
 #'   \item{top_pip}{Highest Posterior Inclusion Probability (PIP) in the CS}
 #'   \item{top_z}{Z-score of the top variant}
 #'   \item{p_value}{P-value calculated from the top Z-score}
-#'   \item{cs_corr}{pairwise correlations of other CSs in this RDS with the CS of 
-#`    the current row, delimited by `|`, if there are more than one CSs in this RDS file}
+#'   \item{cs_corr}{Pairwise correlations of other CSs in this RDS with the CS of 
+#'     the current row, delimited by '|', if there is more than one CS in this RDS file}
 #'
 #' @details
 #' This function is designed to be used only when there is at least one Credible Set 
@@ -58,7 +58,6 @@ get_susie_result <- function(con_data) {
 #'
 #' @importFrom purrr map
 #' @importFrom dplyr bind_rows
-#' @importFrom tibble z_to_pvalue
 #'
 #' @export
 extract_cs_info <- function(con_data, cs_names, top_loci_table) {  
diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 876b4be2..6e7575d2 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -237,6 +237,8 @@ rss_analysis_pipeline <- function(
 
   # Perform fine-mapping
   if (!is.null(finemapping_method)) {
+    print("finemapping_method")
+    print(finemapping_method)
     pri_coverage <- finemapping_opts$coverage[1]
     sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL
     res <- susie_rss_pipeline(sumstats, LD_mat,
@@ -247,6 +249,8 @@ rss_analysis_pipeline <- function(
       secondary_coverage = sec_coverage,
       signal_cutoff = finemapping_opts$signal_cutoff
     )
+    print("str(res)")
+    print(str(res))
     if (!is.null(qc_method)) {
       res$outlier_number <- qc_results$outlier_number
     }
@@ -273,45 +277,71 @@ rss_analysis_pipeline <- function(
             block_cs_metrics = process_cs(data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci)
         } else { # no CS
             bvsr_res = get_susie_result(res)
-            if (sum(bvsr_res$pip > signal_cutoff) > 0) {
+            if (sum(bvsr_res$pip > finemapping_opts$signal_cutoff) > 0) {
                 block_cs_metrics = extract_top_pip(res)
             }
         }
     }
+    print("block_cs_metrics")
+    print(block_cs_metrics)
     # sensitive check for additional analyses
-    if (block_cs_metrics$variants_per_cs > 1) {
-      block_cs_metrics <- block_cs_metrics %>%
-        group_by(study, block) %>%
-        mutate(max_cs_corr_study_block = if(all(is.na(cs_corr_max))) {
-        NA_real_  # Use NA instead of -Inf when all values are NA
-        } else {
-        max(cs_corr_max, na.rm = TRUE)
-        }) %>%
-        ungroup()
-      if (block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5) {
-        finemapping_method <- "bayesian_conditional_regression"
-        pri_coverage <- finemapping_opts$coverage[1]
-        sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL
-        bcr <- susie_rss_pipeline(sumstats, LD_mat,
-          n = n, var_y = var_y,
-          L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
-          analysis_method = finemapping_method,
-          coverage = pri_coverage,
-          secondary_coverage = sec_coverage,
-          signal_cutoff = finemapping_opts$signal_cutoff
-        )
-        if (!is.null(qc_method)) {
-          bcr$outlier_number <- qc_results$outlier_number
+    if (!is.null(block_cs_metrics) && length(block_cs_metrics) > 0) {
+      if (block_cs_metrics$variants_per_cs > 1) {
+        block_cs_metrics <- block_cs_metrics %>%
+          group_by(study, block) %>%
+          mutate(max_cs_corr_study_block = if(all(is.na(cs_corr_max))) {
+            NA_real_  # Use NA instead of -Inf when all values are NA
+          } else {
+            max(cs_corr_max, na.rm = TRUE)
+          }) %>%
+          ungroup()
+        if (block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5) {
+          finemapping_method <- "bayesian_conditional_regression"
+          pri_coverage <- finemapping_opts$coverage[1]
+          sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL
+          bcr <- susie_rss_pipeline(sumstats, LD_mat,
+            n = n, var_y = var_y,
+            L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
+            analysis_method = finemapping_method,
+            coverage = pri_coverage,
+            secondary_coverage = sec_coverage,
+            signal_cutoff = finemapping_opts$signal_cutoff
+          )
+          if (!is.null(qc_method)) {
+            bcr$outlier_number <- qc_results$outlier_number
+          }
+          if (impute & !is.null(qc_method)) {
+              method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
+          } else if (!impute & !is.null(qc_method)) {
+              method_name <- paste0(finemapping_method, "_", toupper(qc_method))
+          } else {
+              method_name <- paste0(finemapping_method, "_", "NO_QC")
+          }
+          result_list[[method_name]] <- bcr
+          
+          finemapping_method <- "single_effect"
+          sumstats <- preprocess_results$sumstats
+          LD_mat <- preprocess_results$LD_mat
+          ser <- susie_rss_pipeline(sumstats, LD_mat,
+            n = n, var_y = var_y,
+            L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
+            analysis_method = finemapping_method,
+            coverage = pri_coverage,
+            secondary_coverage = sec_coverage,
+            signal_cutoff = finemapping_opts$signal_cutoff
+          )
+          qc_method <- NULL
+          impute <- FALSE
+          if (impute & !is.null(qc_method)) {
+              method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
+          } else if (!impute & !is.null(qc_method)) {
+              method_name <- paste0(finemapping_method, "_", toupper(qc_method))
+          } else {
+              method_name <- paste0(finemapping_method, "_", "NO_QC")
+          }
+          result_list[[method_name]] <- ser
         }
-        if (impute & !is.null(qc_method)) {
-            method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
-        } else if (!impute & !is.null(qc_method)) {
-            method_name <- paste0(finemapping_method, "_", toupper(qc_method))
-        } else {
-            method_name <- paste0(finemapping_method, "_", "NO_QC")
-        }
-        result_list[[method_name]] <- bcr
-        
+      } else { # variants_per_cs > 1 or NA
         finemapping_method <- "single_effect"
         sumstats <- preprocess_results$sumstats
         LD_mat <- preprocess_results$LD_mat
@@ -325,9 +355,6 @@ rss_analysis_pipeline <- function(
         )
         qc_method <- NULL
         impute <- FALSE
-        # if (!is.null(qc_method)) {
-        #   ser$outlier_number <- qc_results$outlier_number
-        # }
         if (impute & !is.null(qc_method)) {
             method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
         } else if (!impute & !is.null(qc_method)) {
@@ -337,32 +364,7 @@ rss_analysis_pipeline <- function(
         }
         result_list[[method_name]] <- ser
       }
-    } else { # variants_per_cs > 1 or NA
-      finemapping_method <- "single_effect"
-      sumstats <- preprocess_results$sumstats
-      LD_mat <- preprocess_results$LD_mat
-      ser <- susie_rss_pipeline(sumstats, LD_mat,
-        n = n, var_y = var_y,
-        L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
-        analysis_method = finemapping_method,
-        coverage = pri_coverage,
-        secondary_coverage = sec_coverage,
-        signal_cutoff = finemapping_opts$signal_cutoff
-      )
-      qc_method <- NULL
-      impute <- FALSE
-      # if (!is.null(qc_method)) {
-      #   ser$outlier_number <- qc_results$outlier_number
-      # }
-      if (impute & !is.null(qc_method)) {
-          method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
-      } else if (!impute & !is.null(qc_method)) {
-          method_name <- paste0(finemapping_method, "_", toupper(qc_method))
-      } else {
-          method_name <- paste0(finemapping_method, "_", "NO_QC")
-      }
-      result_list[[method_name]] <- ser
     } 
-  return(result_list = result_list, block_cs_metrics = block_cs_metrics)
   }
+  return(list(result_list = result_list, block_cs_metrics = block_cs_metrics))
 }
diff --git a/Untitled.ipynb b/Untitled.ipynb
new file mode 100644
index 00000000..c0b6edfb
--- /dev/null
+++ b/Untitled.ipynb
@@ -0,0 +1,64 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "d0dd1849-98c8-4445-902d-013417dbf52b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m\u001b[22m\u001b[36mi\u001b[39m Updating \u001b[34mpecotmr\u001b[39m documentation\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quantile_twas_weight.R:415: \u001b[1m@importFrom\u001b[22m Excluding unknown export from stats:\n",
+      "  `qr`.\n",
+      "\u001b[1m\u001b[22m\u001b[36mi\u001b[39m Loading \u001b[34mpecotmr\u001b[39m\n",
+      "Warning message:\n",
+      "\"replacing previous import 'vroom::cols' by 'readr::cols' when loading 'pecotmr'\"\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:10: \u001b[1m@examples\u001b[22m requires a value.\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:11: \u001b[1m@noRd\u001b[22m must not be followed by any text.\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:98: \u001b[1m@examples\u001b[22m requires a value.\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:99: \u001b[1m@noRd\u001b[22m must not be followed by any text.\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quantile_twas_weight.R:415: \u001b[1m@importFrom\u001b[22m Excluding unknown export from stats:\n",
+      "  `qr`.\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_univariate_data.Rd': Skipping; no name and/or title.\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_regression_data.Rd': Skipping; no name and/or title.\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_multivariate_data.Rd': Skipping; no name and/or\n",
+      "  title.\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_functional_data.Rd': Skipping; no name and/or title.\n",
+      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'mrash_weights.Rd': Skipping; no name and/or title.\n"
+     ]
+    }
+   ],
+   "source": [
+    "devtools::document()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "28154cc4-e22b-4b7e-b257-2605b30e8485",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "R",
+   "language": "R",
+   "name": "ir"
+  },
+  "language_info": {
+   "codemirror_mode": "r",
+   "file_extension": ".r",
+   "mimetype": "text/x-r-source",
+   "name": "R",
+   "pygments_lexer": "r",
+   "version": "4.3.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/man/auto_decision.Rd b/man/auto_decision.Rd
new file mode 100644
index 00000000..65cae364
--- /dev/null
+++ b/man/auto_decision.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/univariate_diagnostics.R
+\name{auto_decision}
+\alias{auto_decision}
+\title{Process Credible Set Information and Determine Updating Strategy}
+\usage{
+auto_decision(df, high_corr_cols)
+}
+\arguments{
+\item{df}{Data frame. Contains information about Credible Sets for a specific study and block.}
+
+\item{high_corr_cols}{Character vector. Names of columns in df that represent high correlations.}
+}
+\value{
+A modified data frame with additional columns attached to the diagnostic table:
+  \item{top_cs}{Logical. TRUE for the CS with the highest absolute Z-score.}
+  \item{tagged_cs}{Logical. TRUE for CS that are considered "tagged" based on p-value and correlation criteria.}
+  \item{method}{Character. The determined updating strategy ("BVSR", "SER", or "BCR").}
+}
+\description{
+This function categorizes Credible Sets (CS) within a study block into different 
+updating strategies based on their statistical properties and correlations.
+}
+\details{
+This function performs the following steps:
+1. Identifies the top CS based on the highest absolute Z-score.
+2. Identifies tagged CS based on high p-value and high correlations.
+3. Counts total, tagged, and remaining CS.
+4. Determines the appropriate updating method based on these counts.
+
+The updating methods are:
+- BVSR (Bayesian Variable Selection Regression): Used when there's only one CS or all CS are accounted for.
+- SER (Single Effect Regression): Used when there are tagged CS but no remaining untagged CS.
+- BCR (Bayesian Conditional Regression): Used when there are remaining untagged CS.
+}
+\note{
+This function is part of a developing methodology for automatically handling 
+finemapping results. The thresholds and criteria used (e.g., p-value > 1e-4 for tagging) 
+are subject to refinement and may change in future versions.
+}
diff --git a/man/extract_cs_info.Rd b/man/extract_cs_info.Rd
new file mode 100644
index 00000000..b9ccba29
--- /dev/null
+++ b/man/extract_cs_info.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/univariate_diagnostics.R
+\name{extract_cs_info}
+\alias{extract_cs_info}
+\title{Process Credible Sets (CS) from Finemapping Results}
+\usage{
+extract_cs_info(con_data, cs_names, top_loci_table)
+}
+\arguments{
+\item{con_data}{List. The method layer data from a finemapping RDS file that is not empty.}
+
+\item{cs_names}{Character vector. Names of the Credible Sets, usually in the format "L_<number>".}
+
+\item{top_loci_table}{Data frame. The $top_loci layer data from the finemapping results.}
+}
+\value{
+A data frame with one row per CS, containing the following columns:
+  \item{cs_name}{Name of the Credible Set}
+  \item{variants_per_cs}{Number of variants in the CS}
+  \item{top_variant}{ID of the variant with the highest PIP in the CS}
+  \item{top_variant_index}{Global index of the top variant}
+  \item{top_pip}{Highest Posterior Inclusion Probability (PIP) in the CS}
+  \item{top_z}{Z-score of the top variant}
+  \item{p_value}{P-value calculated from the top Z-score}
+  \item{cs_corr}{Pairwise correlations of other CSs in this RDS with the CS of 
+    the current row, delimited by '|', if there is more than one CS in this RDS file}
+}
+\description{
+This function extracts and processes information for each Credible Set (CS) 
+from finemapping results, typically obtained from a finemapping RDS file.
+}
+\details{
+This function is designed to be used only when there is at least one Credible Set 
+in the finemapping results usually for a given study and block. It processes each CS, 
+extracting key information such as the top variant, its statistics, and 
+correlation information between multiple CS if available.
+}
diff --git a/man/extract_top_pip_info.Rd b/man/extract_top_pip_info.Rd
new file mode 100644
index 00000000..2e319b89
--- /dev/null
+++ b/man/extract_top_pip_info.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/univariate_diagnostics.R
+\name{extract_top_pip_info}
+\alias{extract_top_pip_info}
+\title{Extract Information for Top Variant from Finemapping Results}
+\usage{
+extract_top_pip_info(con_data)
+}
+\arguments{
+\item{con_data}{List. The method layer data from a finemapping RDS file.}
+}
+\value{
+A data frame with one row containing the following columns:
+  \item{cs_name}{NA (as no CS is identified)}
+  \item{variants_per_cs}{NA (as no CS is identified)}
+  \item{top_variant}{ID of the variant with the highest PIP}
+  \item{top_variant_index}{Index of the top variant in the original data}
+  \item{top_pip}{Highest Posterior Inclusion Probability (PIP)}
+  \item{top_z}{Z-score of the top variant}
+  \item{p_value}{P-value calculated from the top Z-score}
+  \item{cs_corr}{NA (as no CS correlation is available)}
+}
+\description{
+This function extracts information about the variant with the highest Posterior 
+Inclusion Probability (PIP) from finemapping results, typically used when no 
+Credible Sets (CS) are identified in the analysis.
+}
+\details{
+This function is designed to be used when no Credible Sets are identified in 
+the finemapping results, but information about the most significant variant 
+is still desired. It identifies the variant with the highest PIP and extracts 
+relevant statistical information.
+}
+\note{
+This function is particularly useful for capturing information about potentially 
+important variants that might be included in Credible Sets under different 
+analysis parameters or lower coverage. It maintains a structure similar to 
+the output of `extract_cs_info()` for consistency in downstream analyses.
+}
+\seealso{
+\code{\link{extract_cs_info}} for processing when Credible Sets are present.
+}
diff --git a/man/get_susie_result.Rd b/man/get_susie_result.Rd
new file mode 100644
index 00000000..25706df1
--- /dev/null
+++ b/man/get_susie_result.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/univariate_diagnostics.R
+\name{get_susie_result}
+\alias{get_susie_result}
+\title{Extract SuSiE Results from Finemapping Data}
+\usage{
+get_susie_result(con_data)
+}
+\arguments{
+\item{con_data}{List. The method layer data from a finemapping RDS file.}
+}
+\value{
+The trimmed SuSiE results (`$susie_result_trimmed`) if available,
+otherwise NULL.
+}
+\description{
+This function extracts the trimmed SuSiE results from a finemapping data object,
+typically obtained from a finemapping RDS file. It's designed to work with
+the method layer of these files, often named as 'method_RAISS_imputed', 'method',
+or 'method_NO_QC'. This layer is right under the study layer.
+}
+\details{
+The function checks if the input data is empty or if the `$susie_result_trimmed`
+element is missing. It returns NULL in these cases. If `$susie_result_trimmed`
+exists and is not empty, it returns this element.
+}
+\note{
+This function is particularly useful when working with large datasets
+where not all method layers may contain valid SuSiE results or method layer.
+}
diff --git a/man/parse_cs_corr.Rd b/man/parse_cs_corr.Rd
new file mode 100644
index 00000000..2d4d5568
--- /dev/null
+++ b/man/parse_cs_corr.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/univariate_diagnostics.R
+\name{parse_cs_corr}
+\alias{parse_cs_corr}
+\title{Parse Credible Set Correlations from extract_cs_info() Output}
+\usage{
+parse_cs_corr(df)
+}
+\arguments{
+\item{df}{Data frame or data.table. The output from `extract_cs_info()` function,
+containing a `cs_corr` column with correlation information.}
+}
+\value{
+A data.table with the original columns from the input, plus:
+  \item{cs_corr_1, cs_corr_2, ...}{Individual correlation values, with column names
+        based on their position in the original string}
+  \item{cs_corr_max}{Maximum absolute correlation value (excluding 1)}
+  \item{cs_corr_min}{Minimum absolute correlation value}
+}
+\description{
+This function takes the output from `extract_cs_info()` and expands the `cs_corr` column
+into multiple columns, preserving the original order of correlations. It also
+calculates maximum and minimum correlation values for each Credible Set.
+}
+\details{
+The function splits the `cs_corr` column, which typically contains correlation
+values separated by '|', into individual columns. It preserves the order of
+these correlations, allowing for easy interpretation in a matrix-like format.
+}
+\note{
+- This function converts the input to a data.table if it isn't already one.
+- It handles cases where correlation values might be missing or not in the expected format.
+- The function assumes that correlation values of 1 represent self-correlations and excludes
+  these when calculating max and min correlations.
+}
diff --git a/man/rss_analysis_pipeline.Rd b/man/rss_analysis_pipeline.Rd
index 2f3b8c4d..4883920a 100644
--- a/man/rss_analysis_pipeline.Rd
+++ b/man/rss_analysis_pipeline.Rd
@@ -23,7 +23,8 @@ rss_analysis_pipeline(
   impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01),
   pip_cutoff_to_skip = 0,
   remove_indels = FALSE,
-  comment_string = "#"
+  comment_string = "#",
+  diagnostics = FALSE
 )
 }
 \arguments{
diff --git a/man/z_to_pvalue.Rd b/man/z_to_pvalue.Rd
new file mode 100644
index 00000000..fd47ae03
--- /dev/null
+++ b/man/z_to_pvalue.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/misc.R
+\name{z_to_pvalue}
+\alias{z_to_pvalue}
+\title{Convert Z-scores to P-values}
+\usage{
+z_to_pvalue(z)
+}
+\arguments{
+\item{z}{Numeric vector. The z-scores to be converted to p-values.}
+}
+\value{
+A numeric vector of p-values corresponding to the input z-scores.
+}
+\description{
+This function calculates p-values from given z-scores using a two-tailed normal distribution.
+It supports vector input to process multiple z-scores simultaneously.
+}
+\details{
+The function uses the following formula to calculate p-values:
+p-value = 2 * Φ(-|z|)
+Where Φ is the cumulative distribution function of the standard normal distribution.
+}
+\note{
+This function assumes that the input z-scores are from a two-tailed test and
+are normally distributed. It calculates two-sided p-values.
+For extremely large absolute z-scores, the resulting p-values may be computed as zero
+due to floating-point limitations in R. This occurs when the absolute z-score > 37.
+}
+\examples{
+z <- c(2.5, -1.8, 3.2, 0.7)
+pvalues <- z_to_pvalue(z)
+print(pvalues)
+
+}

From eab9173c207168592f50af13c4501a04da3a2c6b Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Fri, 18 Apr 2025 19:09:54 +0000
Subject: [PATCH 06/26] rm unnecessary files

---
 Untitled.ipynb | 64 --------------------------------------------------
 1 file changed, 64 deletions(-)
 delete mode 100644 Untitled.ipynb

diff --git a/Untitled.ipynb b/Untitled.ipynb
deleted file mode 100644
index c0b6edfb..00000000
--- a/Untitled.ipynb
+++ /dev/null
@@ -1,64 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "d0dd1849-98c8-4445-902d-013417dbf52b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[1m\u001b[22m\u001b[36mi\u001b[39m Updating \u001b[34mpecotmr\u001b[39m documentation\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quantile_twas_weight.R:415: \u001b[1m@importFrom\u001b[22m Excluding unknown export from stats:\n",
-      "  `qr`.\n",
-      "\u001b[1m\u001b[22m\u001b[36mi\u001b[39m Loading \u001b[34mpecotmr\u001b[39m\n",
-      "Warning message:\n",
-      "\"replacing previous import 'vroom::cols' by 'readr::cols' when loading 'pecotmr'\"\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:10: \u001b[1m@examples\u001b[22m requires a value.\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:11: \u001b[1m@noRd\u001b[22m must not be followed by any text.\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:98: \u001b[1m@examples\u001b[22m requires a value.\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quail_vqtl.R:99: \u001b[1m@noRd\u001b[22m must not be followed by any text.\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m quantile_twas_weight.R:415: \u001b[1m@importFrom\u001b[22m Excluding unknown export from stats:\n",
-      "  `qr`.\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_univariate_data.Rd': Skipping; no name and/or title.\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_regression_data.Rd': Skipping; no name and/or title.\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_multivariate_data.Rd': Skipping; no name and/or\n",
-      "  title.\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'load_regional_functional_data.Rd': Skipping; no name and/or title.\n",
-      "\u001b[1m\u001b[22m\u001b[31mx\u001b[39m In topic 'mrash_weights.Rd': Skipping; no name and/or title.\n"
-     ]
-    }
-   ],
-   "source": [
-    "devtools::document()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "28154cc4-e22b-4b7e-b257-2605b30e8485",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "R",
-   "language": "R",
-   "name": "ir"
-  },
-  "language_info": {
-   "codemirror_mode": "r",
-   "file_extension": ".r",
-   "mimetype": "text/x-r-source",
-   "name": "R",
-   "pygments_lexer": "r",
-   "version": "4.3.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 300dc078a889d38b378d702fd1c24bed13135cad Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@users.noreply.github.com>
Date: Fri, 18 Apr 2025 19:31:08 +0000
Subject: [PATCH 07/26] Update documentation

---
 man/extract_cs_info.Rd | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/man/extract_cs_info.Rd b/man/extract_cs_info.Rd
index 7796cdd8..b9ccba29 100644
--- a/man/extract_cs_info.Rd
+++ b/man/extract_cs_info.Rd
@@ -14,7 +14,6 @@ extract_cs_info(con_data, cs_names, top_loci_table)
 \item{top_loci_table}{Data frame. The $top_loci layer data from the finemapping results.}
 }
 \value{
-
 A data frame with one row per CS, containing the following columns:
   \item{cs_name}{Name of the Credible Set}
   \item{variants_per_cs}{Number of variants in the CS}
@@ -25,3 +24,14 @@ A data frame with one row per CS, containing the following columns:
   \item{p_value}{P-value calculated from the top Z-score}
   \item{cs_corr}{Pairwise correlations of other CSs in this RDS with the CS of 
     the current row, delimited by '|', if there is more than one CS in this RDS file}
+}
+\description{
+This function extracts and processes information for each Credible Set (CS) 
+from finemapping results, typically obtained from a finemapping RDS file.
+}
+\details{
+This function is designed to be used only when there is at least one Credible Set 
+in the finemapping results usually for a given study and block. It processes each CS, 
+extracting key information such as the top variant, its statistics, and 
+correlation information between multiple CS if available.
+}

From 49e47b02580afce34c9bf7e55e37c65bdd693444 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Fri, 18 Apr 2025 21:10:51 +0000
Subject: [PATCH 08/26] parse the cs_corr

---
 R/univariate_pipeline.R | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 6e7575d2..63b08833 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -274,11 +274,11 @@ rss_analysis_pipeline <- function(
         if (isTRUE(bvsr_cs_num > 0)) { # have CS
             # Get the names of the credible sets
             cs_names_bvsr = names(bvsr_res$sets$cs)
-            block_cs_metrics = process_cs(data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci)
+            block_cs_metrics = extract_cs_info(data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci)
         } else { # no CS
             bvsr_res = get_susie_result(res)
             if (sum(bvsr_res$pip > finemapping_opts$signal_cutoff) > 0) {
-                block_cs_metrics = extract_top_pip(res)
+                block_cs_metrics = extract_top_pip_info(res)
             }
         }
     }
@@ -286,6 +286,9 @@ rss_analysis_pipeline <- function(
     print(block_cs_metrics)
     # sensitive check for additional analyses
     if (!is.null(block_cs_metrics) && length(block_cs_metrics) > 0) {
+      block_cs_metrics = parse_cs_corr(block_cs_metrics)
+      print("block_cs_metrics")
+      print(str(block_cs_metrics))
       if (block_cs_metrics$variants_per_cs > 1) {
         block_cs_metrics <- block_cs_metrics %>%
           group_by(study, block) %>%

From 6919f0a6c7c8b946aee78b1d2ee36fb8d3f11d6c Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Sat, 19 Apr 2025 03:16:27 +0000
Subject: [PATCH 09/26] archive

---
 R/univariate_diagnostics.R | 14 +++++++++-----
 R/univariate_pipeline.R    |  5 +++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R
index cee9bc70..29ee6ece 100644
--- a/R/univariate_diagnostics.R
+++ b/R/univariate_diagnostics.R
@@ -190,7 +190,6 @@ parse_cs_corr <- function(df) {
   if (!is.data.table(df)) {
     setDT(df)
   }
-  
   # Function to extract correlations
   extract_correlations <- function(x) {
     if(is.na(x) || x == "" || !grepl("\\|", x)) {
@@ -210,13 +209,17 @@ parse_cs_corr <- function(df) {
       min_corr = min(abs(values_filtered), na.rm = TRUE)
     ))
   }
-  
+  print("df$cs_corr")
+  print(df$cs_corr)
+  str(df$cs_corr)
   # Process correlations
   processed_results <- lapply(df$cs_corr, extract_correlations)
-  
+  print("processed_results")
+  print(processed_results)
   # Determine max number of correlations
   max_corr_count <- max(sapply(processed_results, function(x) length(x$values)))
-  
+  print("max_corr_count")
+  print(max_corr_count)
   # Create column names
   col_names <- paste0("cs_corr_", 1:max_corr_count)
   
@@ -226,7 +229,8 @@ parse_cs_corr <- function(df) {
       if(length(x$values) >= j) x$values[j] else NA_real_
     })
   })
-  
+  print("corr_list")
+  print(corr_list)
   # Add columns to the data.table
   for(i in seq_along(col_names)) {
     df[, (col_names[i]) := corr_list[[i]]]
diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 63b08833..6dbd9e20 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -274,7 +274,7 @@ rss_analysis_pipeline <- function(
         if (isTRUE(bvsr_cs_num > 0)) { # have CS
             # Get the names of the credible sets
             cs_names_bvsr = names(bvsr_res$sets$cs)
-            block_cs_metrics = extract_cs_info(data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci)
+            block_cs_metrics = extract_cs_info(con_data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci)
         } else { # no CS
             bvsr_res = get_susie_result(res)
             if (sum(bvsr_res$pip > finemapping_opts$signal_cutoff) > 0) {
@@ -285,7 +285,8 @@ rss_analysis_pipeline <- function(
     print("block_cs_metrics")
     print(block_cs_metrics)
     # sensitive check for additional analyses
-    if (!is.null(block_cs_metrics) && length(block_cs_metrics) > 0) {
+    if (!is.null(block_cs_metrics) && nrow(block_cs_metrics) > 0) {
+      print("pass")
       block_cs_metrics = parse_cs_corr(block_cs_metrics)
       print("block_cs_metrics")
       print(str(block_cs_metrics))

From 2d46b19b28172abf2ff915714ac2c12c6876c042 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Sat, 19 Apr 2025 05:36:29 +0000
Subject: [PATCH 10/26] archive

---
 R/univariate_diagnostics.R | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R
index 29ee6ece..317e93ec 100644
--- a/R/univariate_diagnostics.R
+++ b/R/univariate_diagnostics.R
@@ -190,18 +190,26 @@ parse_cs_corr <- function(df) {
   if (!is.data.table(df)) {
     setDT(df)
   }
-  # Function to extract correlations
+  print("df")
+  print(df)
   extract_correlations <- function(x) {
+    # Explicit conversion to character (main change)
+    x <- as.character(x)
+    
+    # Same NA and empty string check
     if(is.na(x) || x == "" || !grepl("\\|", x)) {
       return(list(values = numeric(0), max_corr = NA, min_corr = NA))
     }
     
-    values <- as.numeric(unlist(strsplit(as.character(x), "\\|")))
-    
+    # Conversion to numeric, similar to original
+    values <- as.numeric(unlist(strsplit(x, "\\|")))
     
+    # Same length check
     if(length(values) == 0) {
       return(list(values = numeric(0), max_corr = NA, min_corr = NA))
     }
+    
+    # Same filtering and calculation
     values_filtered <- abs(values[values != 1])
     return(list(
       values = values,
@@ -211,7 +219,6 @@ parse_cs_corr <- function(df) {
   }
   print("df$cs_corr")
   print(df$cs_corr)
-  str(df$cs_corr)
   # Process correlations
   processed_results <- lapply(df$cs_corr, extract_correlations)
   print("processed_results")

From e6c7bf215b4b5f30541d6cbecfaab40e8cc1b796 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Sat, 19 Apr 2025 22:08:33 +0000
Subject: [PATCH 11/26] archive

---
 R/univariate_diagnostics.R | 14 +++++---------
 R/univariate_pipeline.R    | 12 +++++-------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R
index 317e93ec..d07a52d4 100644
--- a/R/univariate_diagnostics.R
+++ b/R/univariate_diagnostics.R
@@ -93,7 +93,7 @@ extract_cs_info <- function(con_data, cs_names, top_loci_table) {
       top_pip = top_pip,
       top_z = top_z,
       p_value = p_value,
-      cs_corr = list(cs_corr)  # list column if cs_corr is a vector
+      cs_corr = list(paste(cs_corr, collapse = ","))  # list column if cs_corr is a vector
     )
     return(result)
   })
@@ -190,19 +190,17 @@ parse_cs_corr <- function(df) {
   if (!is.data.table(df)) {
     setDT(df)
   }
-  print("df")
-  print(df)
   extract_correlations <- function(x) {
-    # Explicit conversion to character (main change)
-    x <- as.character(x)
+    # # Explicit conversion to character (main change)
+    # x <- as.character(x)
     
     # Same NA and empty string check
-    if(is.na(x) || x == "" || !grepl("\\|", x)) {
+    if(is.na(x) || x == "" || !grepl(",", x)) {
       return(list(values = numeric(0), max_corr = NA, min_corr = NA))
     }
     
     # Conversion to numeric, similar to original
-    values <- as.numeric(unlist(strsplit(x, "\\|")))
+    values <- as.numeric(unlist(strsplit(x, ",")))
     
     # Same length check
     if(length(values) == 0) {
@@ -225,8 +223,6 @@ parse_cs_corr <- function(df) {
   print(processed_results)
   # Determine max number of correlations
   max_corr_count <- max(sapply(processed_results, function(x) length(x$values)))
-  print("max_corr_count")
-  print(max_corr_count)
   # Create column names
   col_names <- paste0("cs_corr_", 1:max_corr_count)
   
diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 6dbd9e20..7e1e6fd6 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -290,16 +290,14 @@ rss_analysis_pipeline <- function(
       block_cs_metrics = parse_cs_corr(block_cs_metrics)
       print("block_cs_metrics")
       print(str(block_cs_metrics))
-      if (block_cs_metrics$variants_per_cs > 1) {
+      if (nrow(block_cs_metrics)) {
         block_cs_metrics <- block_cs_metrics %>%
-          group_by(study, block) %>%
           mutate(max_cs_corr_study_block = if(all(is.na(cs_corr_max))) {
-            NA_real_  # Use NA instead of -Inf when all values are NA
+            NA_real_
           } else {
             max(cs_corr_max, na.rm = TRUE)
-          }) %>%
-          ungroup()
-        if (block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5) {
+          })
+        if (any(block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5)) {
           finemapping_method <- "bayesian_conditional_regression"
           pri_coverage <- finemapping_opts$coverage[1]
           sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL
@@ -345,7 +343,7 @@ rss_analysis_pipeline <- function(
           }
           result_list[[method_name]] <- ser
         }
-      } else { # variants_per_cs > 1 or NA
+      } else { # CS = 1 or NA
         finemapping_method <- "single_effect"
         sumstats <- preprocess_results$sumstats
         LD_mat <- preprocess_results$LD_mat

From 96b4d2423aa0649929e777bd5d8dc478c79b0b62 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Sun, 20 Apr 2025 03:18:23 +0000
Subject: [PATCH 12/26] archive

---
 R/univariate_diagnostics.R | 65 ++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 35 deletions(-)

diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R
index d07a52d4..1a818d0b 100644
--- a/R/univariate_diagnostics.R
+++ b/R/univariate_diagnostics.R
@@ -145,14 +145,14 @@ extract_top_pip_info <- function(con_data) {
   p_value = z_to_pvalue(top_z)
   # Create the dataframe row
   data.frame(
-    cs_name = NA_character_,
-    variants_per_cs = NA_integer_,
+    cs_name = NA,
+    variants_per_cs = NA,
     top_variant = top_variant,
     top_variant_index = top_pip_index,
     top_pip = top_pip,
     top_z = top_z,
     p_value = p_value,
-    cs_corr = NA_character_
+    cs_corr = list()
   )
 }
 
@@ -190,30 +190,23 @@ parse_cs_corr <- function(df) {
   if (!is.data.table(df)) {
     setDT(df)
   }
+  
   extract_correlations <- function(x) {
-    # # Explicit conversion to character (main change)
-    # x <- as.character(x)
-    
-    # Same NA and empty string check
-    if(is.na(x) || x == "" || !grepl(",", x)) {
-      return(list(values = numeric(0), max_corr = NA, min_corr = NA))
+    # Early return if x is invalid
+    if(is.na(x) || x == "" || is.null(x) || !grepl(",", as.character(x))) {
+      return(list(values = numeric(0), max_corr = NA_real_, min_corr = NA_real_))
     }
     
-    # Conversion to numeric, similar to original
+    # Convert and filter values
     values <- as.numeric(unlist(strsplit(x, ",")))
-    
-    # Same length check
-    if(length(values) == 0) {
-      return(list(values = numeric(0), max_corr = NA, min_corr = NA))
-    }
-    
-    # Same filtering and calculation
     values_filtered <- abs(values[values != 1])
-    return(list(
+    
+    # Return list with NA if no valid correlations
+    list(
       values = values,
-      max_corr = max(abs(values_filtered), na.rm = TRUE),
-      min_corr = min(abs(values_filtered), na.rm = TRUE)
-    ))
+      max_corr = if(length(values_filtered) > 0) max(abs(values_filtered), na.rm = TRUE) else NA_real_,
+      min_corr = if(length(values_filtered) > 0) min(abs(values_filtered), na.rm = TRUE) else NA_real_
+    )
   }
   print("df$cs_corr")
   print(df$cs_corr)
@@ -221,27 +214,29 @@ parse_cs_corr <- function(df) {
   processed_results <- lapply(df$cs_corr, extract_correlations)
   print("processed_results")
   print(processed_results)
+  # If no valid results, add NA columns and return
+  if(all(sapply(processed_results, function(x) length(x$values) == 0))) {
+    df[, c("cs_corr_max", "cs_corr_min") := list(NA_real_, NA_real_)]
+    return(df)
+  }
+  
   # Determine max number of correlations
   max_corr_count <- max(sapply(processed_results, function(x) length(x$values)))
-  # Create column names
+  
+  # Create and add correlation columns
   col_names <- paste0("cs_corr_", 1:max_corr_count)
   
-  # Prepare a list of correlation values for each column
-  corr_list <- lapply(1:max_corr_count, function(j) {
-    sapply(processed_results, function(x) {
-      if(length(x$values) >= j) x$values[j] else NA_real_
-    })
-  })
-  print("corr_list")
-  print(corr_list)
-  # Add columns to the data.table
   for(i in seq_along(col_names)) {
-    df[, (col_names[i]) := corr_list[[i]]]
+    df[, (col_names[i]) := sapply(processed_results, function(x) {
+      if(length(x$values) >= i) x$values[i] else NA_real_
+    })]
   }
   
-  # Add max and min columns
-  df[, cs_corr_max := sapply(processed_results, `[[`, "max_corr")]
-  df[, cs_corr_min := sapply(processed_results, `[[`, "min_corr")]
+  # Add max and min correlation columns
+  df[, `:=`(
+    cs_corr_max = sapply(processed_results, `[[`, "max_corr"),
+    cs_corr_min = sapply(processed_results, `[[`, "min_corr")
+  )]
   
   return(df)
 }

From 02afda6441892a1507e9bcaee404a7748dc72838 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Sun, 20 Apr 2025 04:09:52 +0000
Subject: [PATCH 13/26] archive

---
 R/univariate_pipeline.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 7e1e6fd6..4a156109 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -290,13 +290,16 @@ rss_analysis_pipeline <- function(
       block_cs_metrics = parse_cs_corr(block_cs_metrics)
       print("block_cs_metrics")
       print(str(block_cs_metrics))
-      if (nrow(block_cs_metrics)) {
+      cs_row = block_cs_metrics %>% filter(!is.na(block_cs_metrics$variants_per_cs))
+      if (nrow(cs_row)>1) {# CS > 1
         block_cs_metrics <- block_cs_metrics %>%
           mutate(max_cs_corr_study_block = if(all(is.na(cs_corr_max))) {
             NA_real_
           } else {
             max(cs_corr_max, na.rm = TRUE)
           })
+        print("block_cs_metrics")
+        print(str(block_cs_metrics))
         if (any(block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5)) {
           finemapping_method <- "bayesian_conditional_regression"
           pri_coverage <- finemapping_opts$coverage[1]

From 9331532d78b259c4a0b47f80822225adfcac2561 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Sun, 20 Apr 2025 05:38:59 +0000
Subject: [PATCH 14/26] final version

---
 R/univariate_diagnostics.R |  4 ----
 R/univariate_pipeline.R    | 11 -----------
 2 files changed, 15 deletions(-)

diff --git a/R/univariate_diagnostics.R b/R/univariate_diagnostics.R
index 1a818d0b..b1e91bd2 100644
--- a/R/univariate_diagnostics.R
+++ b/R/univariate_diagnostics.R
@@ -208,12 +208,8 @@ parse_cs_corr <- function(df) {
       min_corr = if(length(values_filtered) > 0) min(abs(values_filtered), na.rm = TRUE) else NA_real_
     )
   }
-  print("df$cs_corr")
-  print(df$cs_corr)
   # Process correlations
   processed_results <- lapply(df$cs_corr, extract_correlations)
-  print("processed_results")
-  print(processed_results)
   # If no valid results, add NA columns and return
   if(all(sapply(processed_results, function(x) length(x$values) == 0))) {
     df[, c("cs_corr_max", "cs_corr_min") := list(NA_real_, NA_real_)]
diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 4a156109..6e19c0aa 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -237,8 +237,6 @@ rss_analysis_pipeline <- function(
 
   # Perform fine-mapping
   if (!is.null(finemapping_method)) {
-    print("finemapping_method")
-    print(finemapping_method)
     pri_coverage <- finemapping_opts$coverage[1]
     sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL
     res <- susie_rss_pipeline(sumstats, LD_mat,
@@ -249,8 +247,6 @@ rss_analysis_pipeline <- function(
       secondary_coverage = sec_coverage,
       signal_cutoff = finemapping_opts$signal_cutoff
     )
-    print("str(res)")
-    print(str(res))
     if (!is.null(qc_method)) {
       res$outlier_number <- qc_results$outlier_number
     }
@@ -282,14 +278,9 @@ rss_analysis_pipeline <- function(
             }
         }
     }
-    print("block_cs_metrics")
-    print(block_cs_metrics)
     # sensitive check for additional analyses
     if (!is.null(block_cs_metrics) && nrow(block_cs_metrics) > 0) {
-      print("pass")
       block_cs_metrics = parse_cs_corr(block_cs_metrics)
-      print("block_cs_metrics")
-      print(str(block_cs_metrics))
       cs_row = block_cs_metrics %>% filter(!is.na(block_cs_metrics$variants_per_cs))
       if (nrow(cs_row)>1) {# CS > 1
         block_cs_metrics <- block_cs_metrics %>%
@@ -298,8 +289,6 @@ rss_analysis_pipeline <- function(
           } else {
             max(cs_corr_max, na.rm = TRUE)
           })
-        print("block_cs_metrics")
-        print(str(block_cs_metrics))
         if (any(block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5)) {
           finemapping_method <- "bayesian_conditional_regression"
           pri_coverage <- finemapping_opts$coverage[1]

From 582bf034a1cfa787c8949e88c58040d0f8a95e41 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Sun, 20 Apr 2025 21:37:14 +0000
Subject: [PATCH 15/26] standardize the function name

---
 R/{univariate_diagnostics.R => univariate_rss_diagnostics.R} | 1 -
 1 file changed, 1 deletion(-)
 rename R/{univariate_diagnostics.R => univariate_rss_diagnostics.R} (99%)

diff --git a/R/univariate_diagnostics.R b/R/univariate_rss_diagnostics.R
similarity index 99%
rename from R/univariate_diagnostics.R
rename to R/univariate_rss_diagnostics.R
index b1e91bd2..723d2fb7 100644
--- a/R/univariate_diagnostics.R
+++ b/R/univariate_rss_diagnostics.R
@@ -270,7 +270,6 @@ parse_cs_corr <- function(df) {
 #' @importFrom dplyr case_when
 #'
 #' @export
-# cs_exp = cs_exp |> filter(!is.na(p_value))
 auto_decision <- function(df, high_corr_cols) {
   # Identify top_cs
   top_cs_index <- which.max(abs(df$top_z))

From a2a63fcd0e2d47cbd064fc16e9e1349bc010fe6f Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@users.noreply.github.com>
Date: Sun, 20 Apr 2025 21:38:55 +0000
Subject: [PATCH 16/26] Update documentation

---
 man/auto_decision.Rd        | 2 +-
 man/extract_cs_info.Rd      | 2 +-
 man/extract_top_pip_info.Rd | 2 +-
 man/get_susie_result.Rd     | 2 +-
 man/parse_cs_corr.Rd        | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/man/auto_decision.Rd b/man/auto_decision.Rd
index 65cae364..d9dbc260 100644
--- a/man/auto_decision.Rd
+++ b/man/auto_decision.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/univariate_diagnostics.R
+% Please edit documentation in R/univariate_rss_diagnostics.R
 \name{auto_decision}
 \alias{auto_decision}
 \title{Process Credible Set Information and Determine Updating Strategy}
diff --git a/man/extract_cs_info.Rd b/man/extract_cs_info.Rd
index b9ccba29..fda919b6 100644
--- a/man/extract_cs_info.Rd
+++ b/man/extract_cs_info.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/univariate_diagnostics.R
+% Please edit documentation in R/univariate_rss_diagnostics.R
 \name{extract_cs_info}
 \alias{extract_cs_info}
 \title{Process Credible Sets (CS) from Finemapping Results}
diff --git a/man/extract_top_pip_info.Rd b/man/extract_top_pip_info.Rd
index 2e319b89..ce2ee41d 100644
--- a/man/extract_top_pip_info.Rd
+++ b/man/extract_top_pip_info.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/univariate_diagnostics.R
+% Please edit documentation in R/univariate_rss_diagnostics.R
 \name{extract_top_pip_info}
 \alias{extract_top_pip_info}
 \title{Extract Information for Top Variant from Finemapping Results}
diff --git a/man/get_susie_result.Rd b/man/get_susie_result.Rd
index 25706df1..e1895924 100644
--- a/man/get_susie_result.Rd
+++ b/man/get_susie_result.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/univariate_diagnostics.R
+% Please edit documentation in R/univariate_rss_diagnostics.R
 \name{get_susie_result}
 \alias{get_susie_result}
 \title{Extract SuSiE Results from Finemapping Data}
diff --git a/man/parse_cs_corr.Rd b/man/parse_cs_corr.Rd
index 2d4d5568..93fea320 100644
--- a/man/parse_cs_corr.Rd
+++ b/man/parse_cs_corr.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/univariate_diagnostics.R
+% Please edit documentation in R/univariate_rss_diagnostics.R
 \name{parse_cs_corr}
 \alias{parse_cs_corr}
 \title{Parse Credible Set Correlations from extract_cs_info() Output}

From b5882e311e764a087d24c9cbdeb1627e2fdb3f27 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Mon, 21 Apr 2025 11:48:39 -0400
Subject: [PATCH 17/26] fix some warning when updating package documentation

---
 R/Untitled Document.ipynb  |  1 +
 R/file_utils.R             | 19 +++++++++++++++++++
 R/quail_vqtl.R             |  5 ++---
 R/quantile_twas_weight.R   |  1 -
 R/regularized_regression.R |  6 ++++++
 5 files changed, 28 insertions(+), 4 deletions(-)
 create mode 100644 R/Untitled Document.ipynb

diff --git a/R/Untitled Document.ipynb b/R/Untitled Document.ipynb
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/R/Untitled Document.ipynb	
@@ -0,0 +1 @@
+
diff --git a/R/file_utils.R b/R/file_utils.R
index f0c8ce1e..94533314 100644
--- a/R/file_utils.R
+++ b/R/file_utils.R
@@ -489,6 +489,11 @@ load_regional_association_data <- function(genotype, # PLINK file
   ))
 }
 
+#' Load Regional Univariate Association Data
+#'
+#' This function loads regional association data for univariate analysis.
+#' It includes residual matrices, original genotype data, and additional metadata.
+#'
 #' @importFrom matrixStats colVars
 #' @return A list
 #' @export
@@ -508,6 +513,11 @@ load_regional_univariate_data <- function(...) {
   ))
 }
 
+#' Load Regional Data for Regression Modeling
+#'
+#' This function loads regional association data formatted for regression modeling.
+#' It includes phenotype, genotype, and covariate matrices along with metadata.
+#'
 #' @return A list
 #' @export
 load_regional_regression_data <- function(...) {
@@ -543,6 +553,11 @@ pheno_list_to_mat <- function(data_list) {
   return(data_list)
 }
 
+#' Load and Preprocess Regional Multivariate Data
+#'
+#' This function loads regional association data and processes it into a multivariate format.
+#' It optionally filters out samples based on missingness thresholds in the response matrix.
+#'
 #' @importFrom matrixStats colVars
 #' @return A list
 #' @export
@@ -578,6 +593,10 @@ load_regional_multivariate_data <- function(matrix_y_min_complete = NULL, # when
   ))
 }
 
+#' Load Regional Functional Association Data
+#'
+#' This function loads precomputed regional functional association data.
+#'
 #' @return A list
 #' @export
 load_regional_functional_data <- function(...) {
diff --git a/R/quail_vqtl.R b/R/quail_vqtl.R
index 9ba03d92..7649c624 100644
--- a/R/quail_vqtl.R
+++ b/R/quail_vqtl.R
@@ -8,11 +8,10 @@
 #' @param return_residuals Logical, whether to return residuals (default: FALSE).
 #' @return A list containing regression results: \code{betahat}, \code{sebetahat}, \code{z_scores}, \code{p_values}, and \code{q_values}.
 #' @examples
-#' @noRd
 #' X <- matrix(rnorm(1000), 100, 10)
 #' y <- rnorm(100)
 #' results <- univariate_regression(X, y)
-
+#' @noRd
 univariate_regression <- function(X, y, Z = NULL, center = TRUE,
                                   scale = FALSE, return_residuals = FALSE) {
   y_na <- which(is.na(y))
@@ -96,10 +95,10 @@ univariate_regression <- function(X, y, Z = NULL, center = TRUE,
 #' @param covariates Optional numeric matrix of covariates (n x k), where k is the number of covariates.
 #' @return A data frame containing regression results for each SNP, including \code{BETA}, \code{SE}, \code{Z}, \code{P}, and \code{Q}.
 #' @examples
-#' @noRd
 #' genotype <- matrix(rbinom(1000 * 10, 2, 0.3), 1000, 10)
 #' phenotype <- rnorm(1000)
 #' results <- run_linear_regression1(genotype, phenotype)
+#' @noRd
 run_linear_regression <- function(genotype, phenotype, covariates = NULL) {
   if (!is.null(covariates)) {
     covariates <- as.data.frame(lapply(covariates, as.numeric))
diff --git a/R/quantile_twas_weight.R b/R/quantile_twas_weight.R
index 8f722779..dbcd1b88 100644
--- a/R/quantile_twas_weight.R
+++ b/R/quantile_twas_weight.R
@@ -412,7 +412,6 @@ corr_filter <- function(X, cor_thres = 0.8) {
 #' @param response Optional response vector for "response_correlation" strategy
 #' @param max_iterations Maximum number of iterations to attempt removing problematic columns
 #' @return Cleaned matrix X with problematic columns removed
-#' @importFrom stats qr
 #' @noRd
 check_remove_highcorr_snp <- function(X = X, C = C, strategy = c("correlation", "variance", "response_correlation"), response = NULL, max_iterations = 300, corr_thresholds = seq(0.75, 0.5, by = -0.05)) {
   strategy <- match.arg(strategy)
diff --git a/R/regularized_regression.R b/R/regularized_regression.R
index b8a84e11..2d21335a 100644
--- a/R/regularized_regression.R
+++ b/R/regularized_regression.R
@@ -437,6 +437,12 @@ enet_weights <- function(X, y) glmnet_weights(X, y, 0.5)
 #' @export
 lasso_weights <- function(X, y) glmnet_weights(X, y, 1)
 
+#' Compute Weights Using mr.ash Shrinkage
+#'
+#' This function fits the `mr.ash` model (adaptive shrinkage regression) to estimate weights
+#' for a given set of predictors and response. It uses optional prior standard deviation initialization
+#' and can accept custom initial beta values.
+#'
 #' @examples
 #' wgt.mr.ash <- mrash_weights(eqtl$X, eqtl$y_res, beta.init = lasso_weights(X, y))
 #' @importFrom stats predict

From 16ee663f17021d2c2b74ab8762953759165228f8 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Mon, 21 Apr 2025 11:49:48 -0400
Subject: [PATCH 18/26] clean up

---
 R/Untitled Document.ipynb | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 R/Untitled Document.ipynb

diff --git a/R/Untitled Document.ipynb b/R/Untitled Document.ipynb
deleted file mode 100644
index 8b137891..00000000
--- a/R/Untitled Document.ipynb	
+++ /dev/null
@@ -1 +0,0 @@
-

From 50e8042bfa8994972f083b321f40aa18887a1e2b Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@users.noreply.github.com>
Date: Mon, 21 Apr 2025 15:51:16 +0000
Subject: [PATCH 19/26] Update documentation

---
 man/load_regional_functional_data.Rd   | 14 +++++++++++
 man/load_regional_multivariate_data.Rd | 15 ++++++++++++
 man/load_regional_regression_data.Rd   | 15 ++++++++++++
 man/load_regional_univariate_data.Rd   | 15 ++++++++++++
 man/mrash_weights.Rd                   | 16 ++++++++++++
 man/run_linear_regression.Rd           | 21 ----------------
 man/univariate_regression.Rd           | 34 --------------------------
 7 files changed, 75 insertions(+), 55 deletions(-)
 create mode 100644 man/load_regional_functional_data.Rd
 create mode 100644 man/load_regional_multivariate_data.Rd
 create mode 100644 man/load_regional_regression_data.Rd
 create mode 100644 man/load_regional_univariate_data.Rd
 create mode 100644 man/mrash_weights.Rd
 delete mode 100644 man/run_linear_regression.Rd
 delete mode 100644 man/univariate_regression.Rd

diff --git a/man/load_regional_functional_data.Rd b/man/load_regional_functional_data.Rd
new file mode 100644
index 00000000..8c66aa6f
--- /dev/null
+++ b/man/load_regional_functional_data.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/file_utils.R
+\name{load_regional_functional_data}
+\alias{load_regional_functional_data}
+\title{Load Regional Functional Association Data}
+\usage{
+load_regional_functional_data(...)
+}
+\value{
+A list
+}
+\description{
+This function loads precomputed regional functional association data.
+}
diff --git a/man/load_regional_multivariate_data.Rd b/man/load_regional_multivariate_data.Rd
new file mode 100644
index 00000000..00880981
--- /dev/null
+++ b/man/load_regional_multivariate_data.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/file_utils.R
+\name{load_regional_multivariate_data}
+\alias{load_regional_multivariate_data}
+\title{Load and Preprocess Regional Multivariate Data}
+\usage{
+load_regional_multivariate_data(matrix_y_min_complete = NULL, ...)
+}
+\value{
+A list
+}
+\description{
+This function loads regional association data and processes it into a multivariate format.
+It optionally filters out samples based on missingness thresholds in the response matrix.
+}
diff --git a/man/load_regional_regression_data.Rd b/man/load_regional_regression_data.Rd
new file mode 100644
index 00000000..f069f59f
--- /dev/null
+++ b/man/load_regional_regression_data.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/file_utils.R
+\name{load_regional_regression_data}
+\alias{load_regional_regression_data}
+\title{Load Regional Data for Regression Modeling}
+\usage{
+load_regional_regression_data(...)
+}
+\value{
+A list
+}
+\description{
+This function loads regional association data formatted for regression modeling.
+It includes phenotype, genotype, and covariate matrices along with metadata.
+}
diff --git a/man/load_regional_univariate_data.Rd b/man/load_regional_univariate_data.Rd
new file mode 100644
index 00000000..47651c05
--- /dev/null
+++ b/man/load_regional_univariate_data.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/file_utils.R
+\name{load_regional_univariate_data}
+\alias{load_regional_univariate_data}
+\title{Load Regional Univariate Association Data}
+\usage{
+load_regional_univariate_data(...)
+}
+\value{
+A list
+}
+\description{
+This function loads regional association data for univariate analysis.
+It includes residual matrices, original genotype data, and additional metadata.
+}
diff --git a/man/mrash_weights.Rd b/man/mrash_weights.Rd
new file mode 100644
index 00000000..9d567905
--- /dev/null
+++ b/man/mrash_weights.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/regularized_regression.R
+\name{mrash_weights}
+\alias{mrash_weights}
+\title{Compute Weights Using mr.ash Shrinkage}
+\usage{
+mrash_weights(X, y, init_prior_sd = TRUE, ...)
+}
+\description{
+This function fits the `mr.ash` model (adaptive shrinkage regression) to estimate weights
+for a given set of predictors and response. It uses optional prior standard deviation initialization
+and can accept custom initial beta values.
+}
+\examples{
+wgt.mr.ash <- mrash_weights(eqtl$X, eqtl$y_res, beta.init = lasso_weights(X, y))
+}
diff --git a/man/run_linear_regression.Rd b/man/run_linear_regression.Rd
deleted file mode 100644
index 6a37e133..00000000
--- a/man/run_linear_regression.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quail_vqtl.R
-\name{run_linear_regression}
-\alias{run_linear_regression}
-\title{Perform Linear Regression for GWAS}
-\usage{
-run_linear_regression(genotype, phenotype, covariates = NULL)
-}
-\arguments{
-\item{genotype}{Numeric matrix of genotypes (n x p), where n is the number of samples and p is the number of SNPs.}
-
-\item{phenotype}{Numeric vector of phenotypes (length n).}
-
-\item{covariates}{Optional numeric matrix of covariates (n x k), where k is the number of covariates.}
-}
-\value{
-A data frame containing regression results for each SNP, including \code{BETA}, \code{SE}, \code{Z}, \code{P}, and \code{Q}.
-}
-\description{
-Perform Linear Regression for GWAS
-}
diff --git a/man/univariate_regression.Rd b/man/univariate_regression.Rd
deleted file mode 100644
index bc2897be..00000000
--- a/man/univariate_regression.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quail_vqtl.R
-\name{univariate_regression}
-\alias{univariate_regression}
-\title{Perform Univariate Regression for Genotype Data}
-\usage{
-univariate_regression(
-  X,
-  y,
-  Z = NULL,
-  center = TRUE,
-  scale = FALSE,
-  return_residuals = FALSE
-)
-}
-\arguments{
-\item{X}{Numeric matrix of genotypes (n x p), where n is the number of samples and p is the number of SNPs.}
-
-\item{y}{Numeric vector of phenotypes (length n).}
-
-\item{Z}{Optional numeric matrix of covariates (n x k), where k is the number of covariates.}
-
-\item{center}{Logical, whether to center the data (default: TRUE).}
-
-\item{scale}{Logical, whether to scale the data (default: FALSE).}
-
-\item{return_residuals}{Logical, whether to return residuals (default: FALSE).}
-}
-\value{
-A list containing regression results: \code{betahat}, \code{sebetahat}, \code{z_scores}, \code{p_values}, and \code{q_values}.
-}
-\description{
-Perform Univariate Regression for Genotype Data
-}

From cf590306f69185799f713918969efa2a5b1b9e8e Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Mon, 21 Apr 2025 16:02:36 +0000
Subject: [PATCH 20/26] update the package documentation

---
 R/Untitled.ipynb | 233 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 R/Untitled.ipynb

diff --git a/R/Untitled.ipynb b/R/Untitled.ipynb
new file mode 100644
index 00000000..9cb2a10b
--- /dev/null
+++ b/R/Untitled.ipynb
@@ -0,0 +1,233 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0f57e1d8-6a44-4c87-a967-7bce7f81a899",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m\u001b[22m\u001b[36mℹ\u001b[39m Updating \u001b[34mpecotmr\u001b[39m documentation\n",
+      "Registered S3 methods overwritten by 'readr':\n",
+      "  method                    from \n",
+      "  as.data.frame.spec_tbl_df vroom\n",
+      "  as_tibble.spec_tbl_df     vroom\n",
+      "  format.col_spec           vroom\n",
+      "  print.col_spec            vroom\n",
+      "  print.collector           vroom\n",
+      "  print.date_names          vroom\n",
+      "  print.locale              vroom\n",
+      "  str.col_spec              vroom\n",
+      "\n",
+      "\u001b[1m\u001b[22m\u001b[36mℹ\u001b[39m Loading \u001b[34mpecotmr\u001b[39m\n",
+      "Registered S3 methods overwritten by 'RcppGSL':\n",
+      "  method               from         \n",
+      "  predict.fastLm       RcppArmadillo\n",
+      "  print.fastLm         RcppArmadillo\n",
+      "  summary.fastLm       RcppArmadillo\n",
+      "  print.summary.fastLm RcppArmadillo\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Exports from /home/ubuntu/pecotmr/src/dentist_iterative_impute.cpp:\n",
+      "   List dentist_iterative_impute(const arma::mat& LD_mat, size_t nSample, const arma::vec& zScore, double pValueThreshold, float propSVD, bool gcControl, int nIter, double gPvalueThreshold, int ncpus, int seed, bool correct_chen_et_al_bug, bool verbose = false)\n",
+      "\n",
+      "Exports from /home/ubuntu/pecotmr/src/mr_ash.cpp:\n",
+      "   List rcpp_mr_ash_rss(const NumericVector& bhat, const NumericVector& shat, const NumericVector& z, const NumericMatrix& R, double var_y, int n, double sigma2_e, const NumericVector& s0, const NumericVector& w0, const NumericVector& mu1_init, double tol = 1e-8, int max_iter = 1e5, bool update_w0 = true, bool update_sigma = true, bool compute_ELBO = true, bool standardize = false, int ncpus = 1)\n",
+      "\n",
+      "Exports from /home/ubuntu/pecotmr/src/prscs_mcmc.cpp:\n",
+      "   Rcpp::List prs_cs_rcpp(double a, double b, Rcpp::Nullable<double> phi, Rcpp::NumericVector bhat, Rcpp::Nullable<Rcpp::NumericVector> maf, int n, Rcpp::List ld_blk, int n_iter, int n_burnin, int thin, bool verbose, Rcpp::Nullable<unsigned int> seed)\n",
+      "\n",
+      "Exports from /home/ubuntu/pecotmr/src/qtl_enrichment.cpp:\n",
+      "   Rcpp::List qtl_enrichment_rcpp(SEXP r_gwas_pip, SEXP r_qtl_susie_fit, double pi_gwas = 0, double pi_qtl = 0, int ImpN = 25, double shrinkage_lambda = 1.0, int num_threads = 1)\n",
+      "\n",
+      "Exports from /home/ubuntu/pecotmr/src/qtl_enrichment.hpp:\n",
+      "\n",
+      "Exports from /home/ubuntu/pecotmr/src/sdpr.cpp:\n",
+      "   Rcpp::List sdpr_rcpp(const std::vector<double>& bhat, const Rcpp::List& LD, int n, Rcpp::Nullable<Rcpp::NumericVector> per_variant_sample_size = R_NilValue, Rcpp::Nullable<Rcpp::IntegerVector> array = R_NilValue, double a = 0.1, double c = 1.0, size_t M = 1000, double a0k = 0.5, double b0k = 0.5, int iter = 1000, int burn = 200, int thin = 5, unsigned n_threads = 1, int opt_llk = 1, bool verbose = true)\n",
+      "\n",
+      "/home/ubuntu/pecotmr/src/RcppExports.cpp updated.\n",
+      "/home/ubuntu/pecotmr/R/RcppExports.R updated.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36mℹ\u001b[39m Re-compiling \u001b[34m\u001b[34mpecotmr\u001b[34m\u001b[39m (debug build)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36m──\u001b[39m \u001b[36mR CMD INSTALL\u001b[39m \u001b[36m───────────────────────────────────────────────────────────────\u001b[39m\n",
+      "* installing *source* package ‘pecotmr’ ...\n",
+      "** using staged installation\n",
+      "** libs\n",
+      "using C++ compiler: ‘x86_64-conda-linux-gnu-c++ (conda-forge gcc 14.2.0-1) 14.2.0’\n",
+      "using C++11\n",
+      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c RcppExports.cpp -o RcppExports.o\n",
+      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[KRcppExports.cpp:4\u001b[m\u001b[K:\n",
+      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
+      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
+      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
+      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c dentist_iterative_impute.cpp -o dentist_iterative_impute.o\n",
+      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Kdentist_iterative_impute.cpp:8\u001b[m\u001b[K:\n",
+      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
+      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
+      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
+      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c function_pool.cpp -o function_pool.o\n",
+      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/queue:60\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Kfunction_pool.h:1\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Kfunction_pool.cpp:2\u001b[m\u001b[K:\n",
+      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
+      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
+      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
+      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c mr_ash.cpp -o mr_ash.o\n",
+      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Kmr_ash.cpp:1\u001b[m\u001b[K:\n",
+      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
+      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
+      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
+      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c prscs_mcmc.cpp -o prscs_mcmc.o\n",
+      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Kprscs_mcmc.cpp:6\u001b[m\u001b[K:\n",
+      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
+      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
+      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
+      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c qtl_enrichment.cpp -o qtl_enrichment.o\n",
+      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Kqtl_enrichment.hpp:3\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Kqtl_enrichment.cpp:1\u001b[m\u001b[K:\n",
+      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
+      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
+      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
+      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c sdpr.cpp -o sdpr.o\n",
+      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Ksdpr.cpp:1\u001b[m\u001b[K:\n",
+      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
+      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
+      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
+      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c sdpr_mcmc.cpp -o sdpr_mcmc.o\n",
+      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/stl_algobase.h:59\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/algorithm:60\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Ksdpr_mcmc.cpp:1\u001b[m\u001b[K:\n",
+      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
+      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
+      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
+      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/armadillo:27\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Ksdpr_mcmc.h:3\u001b[m\u001b[K,\n",
+      "                 from \u001b[01m\u001b[Ksdpr_mcmc.cpp:15\u001b[m\u001b[K:\n",
+      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/armadillo_bits/compiler_check.hpp:87:98:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[K'\u001b[01m\u001b[K#pragma message: INFO: support for C++11 is deprecated; minimum recommended standard is C++14\u001b[m\u001b[K'\n",
+      "   87 |   #pragma message (\"INFO: support for C++11 is deprecated; minimum recommended standard is C++14\"\u001b[01;36m\u001b[K)\u001b[m\u001b[K\n",
+      "      |                                                                                                  \u001b[01;36m\u001b[K^\u001b[m\u001b[K\n",
+      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -shared -L/home/ubuntu/.pixi/envs/r-base/lib/R/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/home/ubuntu/.pixi/envs/r-base/lib -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/r-base/lib -o pecotmr.so RcppExports.o dentist_iterative_impute.o function_pool.o mr_ash.o prscs_mcmc.o qtl_enrichment.o sdpr.o sdpr_mcmc.o -fopenmp -llapack -lblas -lgfortran -lm -lquadmath -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/home/ubuntu/.pixi/envs/r-base/lib -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/python/lib -lgsl -lgslcblas -lm -L/home/ubuntu/.pixi/envs/r-base/lib/R/lib -lR\n",
+      "installing to /tmp/RtmpDn7wph/devtools_install_e03a614934/00LOCK-pecotmr/00new/pecotmr/libs\n",
+      "** checking absolute paths in shared objects and dynamic libraries\n",
+      "* DONE (pecotmr)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Warning message:\n",
+      "“replacing previous import ‘vroom::cols’ by ‘readr::cols’ when loading ‘pecotmr’”\n"
+     ]
+    }
+   ],
+   "source": [
+    "devtools::document()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d9538a0e-86e3-4c15-9319-9ef790457442",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "R",
+   "language": "R",
+   "name": "ir"
+  },
+  "language_info": {
+   "codemirror_mode": "r",
+   "file_extension": ".r",
+   "mimetype": "text/x-r-source",
+   "name": "R",
+   "pygments_lexer": "r",
+   "version": "4.3.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 53f0c2b5bc67fbfcfae5ece39b7aca4c40a15afe Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Mon, 21 Apr 2025 16:02:58 +0000
Subject: [PATCH 21/26] clean up

---
 R/Untitled.ipynb | 233 -----------------------------------------------
 1 file changed, 233 deletions(-)
 delete mode 100644 R/Untitled.ipynb

diff --git a/R/Untitled.ipynb b/R/Untitled.ipynb
deleted file mode 100644
index 9cb2a10b..00000000
--- a/R/Untitled.ipynb
+++ /dev/null
@@ -1,233 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "0f57e1d8-6a44-4c87-a967-7bce7f81a899",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[1m\u001b[22m\u001b[36mℹ\u001b[39m Updating \u001b[34mpecotmr\u001b[39m documentation\n",
-      "Registered S3 methods overwritten by 'readr':\n",
-      "  method                    from \n",
-      "  as.data.frame.spec_tbl_df vroom\n",
-      "  as_tibble.spec_tbl_df     vroom\n",
-      "  format.col_spec           vroom\n",
-      "  print.col_spec            vroom\n",
-      "  print.collector           vroom\n",
-      "  print.date_names          vroom\n",
-      "  print.locale              vroom\n",
-      "  str.col_spec              vroom\n",
-      "\n",
-      "\u001b[1m\u001b[22m\u001b[36mℹ\u001b[39m Loading \u001b[34mpecotmr\u001b[39m\n",
-      "Registered S3 methods overwritten by 'RcppGSL':\n",
-      "  method               from         \n",
-      "  predict.fastLm       RcppArmadillo\n",
-      "  print.fastLm         RcppArmadillo\n",
-      "  summary.fastLm       RcppArmadillo\n",
-      "  print.summary.fastLm RcppArmadillo\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Exports from /home/ubuntu/pecotmr/src/dentist_iterative_impute.cpp:\n",
-      "   List dentist_iterative_impute(const arma::mat& LD_mat, size_t nSample, const arma::vec& zScore, double pValueThreshold, float propSVD, bool gcControl, int nIter, double gPvalueThreshold, int ncpus, int seed, bool correct_chen_et_al_bug, bool verbose = false)\n",
-      "\n",
-      "Exports from /home/ubuntu/pecotmr/src/mr_ash.cpp:\n",
-      "   List rcpp_mr_ash_rss(const NumericVector& bhat, const NumericVector& shat, const NumericVector& z, const NumericMatrix& R, double var_y, int n, double sigma2_e, const NumericVector& s0, const NumericVector& w0, const NumericVector& mu1_init, double tol = 1e-8, int max_iter = 1e5, bool update_w0 = true, bool update_sigma = true, bool compute_ELBO = true, bool standardize = false, int ncpus = 1)\n",
-      "\n",
-      "Exports from /home/ubuntu/pecotmr/src/prscs_mcmc.cpp:\n",
-      "   Rcpp::List prs_cs_rcpp(double a, double b, Rcpp::Nullable<double> phi, Rcpp::NumericVector bhat, Rcpp::Nullable<Rcpp::NumericVector> maf, int n, Rcpp::List ld_blk, int n_iter, int n_burnin, int thin, bool verbose, Rcpp::Nullable<unsigned int> seed)\n",
-      "\n",
-      "Exports from /home/ubuntu/pecotmr/src/qtl_enrichment.cpp:\n",
-      "   Rcpp::List qtl_enrichment_rcpp(SEXP r_gwas_pip, SEXP r_qtl_susie_fit, double pi_gwas = 0, double pi_qtl = 0, int ImpN = 25, double shrinkage_lambda = 1.0, int num_threads = 1)\n",
-      "\n",
-      "Exports from /home/ubuntu/pecotmr/src/qtl_enrichment.hpp:\n",
-      "\n",
-      "Exports from /home/ubuntu/pecotmr/src/sdpr.cpp:\n",
-      "   Rcpp::List sdpr_rcpp(const std::vector<double>& bhat, const Rcpp::List& LD, int n, Rcpp::Nullable<Rcpp::NumericVector> per_variant_sample_size = R_NilValue, Rcpp::Nullable<Rcpp::IntegerVector> array = R_NilValue, double a = 0.1, double c = 1.0, size_t M = 1000, double a0k = 0.5, double b0k = 0.5, int iter = 1000, int burn = 200, int thin = 5, unsigned n_threads = 1, int opt_llk = 1, bool verbose = true)\n",
-      "\n",
-      "/home/ubuntu/pecotmr/src/RcppExports.cpp updated.\n",
-      "/home/ubuntu/pecotmr/R/RcppExports.R updated.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[36mℹ\u001b[39m Re-compiling \u001b[34m\u001b[34mpecotmr\u001b[34m\u001b[39m (debug build)\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[36m──\u001b[39m \u001b[36mR CMD INSTALL\u001b[39m \u001b[36m───────────────────────────────────────────────────────────────\u001b[39m\n",
-      "* installing *source* package ‘pecotmr’ ...\n",
-      "** using staged installation\n",
-      "** libs\n",
-      "using C++ compiler: ‘x86_64-conda-linux-gnu-c++ (conda-forge gcc 14.2.0-1) 14.2.0’\n",
-      "using C++11\n",
-      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c RcppExports.cpp -o RcppExports.o\n",
-      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[KRcppExports.cpp:4\u001b[m\u001b[K:\n",
-      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
-      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
-      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
-      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c dentist_iterative_impute.cpp -o dentist_iterative_impute.o\n",
-      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Kdentist_iterative_impute.cpp:8\u001b[m\u001b[K:\n",
-      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
-      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
-      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
-      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c function_pool.cpp -o function_pool.o\n",
-      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/queue:60\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Kfunction_pool.h:1\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Kfunction_pool.cpp:2\u001b[m\u001b[K:\n",
-      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
-      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
-      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
-      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c mr_ash.cpp -o mr_ash.o\n",
-      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Kmr_ash.cpp:1\u001b[m\u001b[K:\n",
-      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
-      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
-      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
-      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c prscs_mcmc.cpp -o prscs_mcmc.o\n",
-      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Kprscs_mcmc.cpp:6\u001b[m\u001b[K:\n",
-      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
-      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
-      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
-      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c qtl_enrichment.cpp -o qtl_enrichment.o\n",
-      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Kqtl_enrichment.hpp:3\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Kqtl_enrichment.cpp:1\u001b[m\u001b[K:\n",
-      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
-      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
-      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
-      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c sdpr.cpp -o sdpr.o\n",
-      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/requires_hosted.h:31\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/cmath:41\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/platform/compiler.h:100\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/Rcpp/r/headers.h:66\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include/RcppCommon.h:30\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo/interface/RcppArmadilloForward.h:25\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/RcppArmadillo.h:29\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Ksdpr.cpp:1\u001b[m\u001b[K:\n",
-      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
-      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
-      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
-      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -I\"/home/ubuntu/.pixi/envs/r-base/lib/R/include\" -DNDEBUG  -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/Rcpp/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include' -I'/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppGSL/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/ubuntu/.pixi/envs/r-base/include -I/home/ubuntu/.pixi/envs/r-base/include -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib   -fopenmp -I/home/ubuntu/.pixi/envs/python/include -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/ubuntu/.pixi/envs/r-base/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1734346517311/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/home/ubuntu/.pixi/envs/r-base=/usr/local/src/conda-prefix  -UNDEBUG -Wall -pedantic -g -O0 -fdiagnostics-color=always -c sdpr_mcmc.cpp -o sdpr_mcmc.o\n",
-      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/os_defines.h:39\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/x86_64-conda-linux-gnu/bits/c++config.h:680\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/bits/stl_algobase.h:59\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/lib/gcc/x86_64-conda-linux-gnu/14.2.0/include/c++/algorithm:60\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Ksdpr_mcmc.cpp:1\u001b[m\u001b[K:\n",
-      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/python/x86_64-conda-linux-gnu/sysroot/usr/include/features.h:330:4:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K#warning _FORTIFY_SOURCE requires compiling with optimization (-O) [\u001b[01;35m\u001b[K-Wcpp\u001b[m\u001b[K]\n",
-      "  330 | #  \u001b[01;35m\u001b[Kwarning\u001b[m\u001b[K _FORTIFY_SOURCE requires compiling with optimization (-O)\n",
-      "      |    \u001b[01;35m\u001b[K^~~~~~~\u001b[m\u001b[K\n",
-      "In file included from \u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/armadillo:27\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Ksdpr_mcmc.h:3\u001b[m\u001b[K,\n",
-      "                 from \u001b[01m\u001b[Ksdpr_mcmc.cpp:15\u001b[m\u001b[K:\n",
-      "\u001b[01m\u001b[K/mnt/efs/rl3328/.pixi/envs/r-base/lib/R/library/RcppArmadillo/include/armadillo_bits/compiler_check.hpp:87:98:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[K'\u001b[01m\u001b[K#pragma message: INFO: support for C++11 is deprecated; minimum recommended standard is C++14\u001b[m\u001b[K'\n",
-      "   87 |   #pragma message (\"INFO: support for C++11 is deprecated; minimum recommended standard is C++14\"\u001b[01;36m\u001b[K)\u001b[m\u001b[K\n",
-      "      |                                                                                                  \u001b[01;36m\u001b[K^\u001b[m\u001b[K\n",
-      "x86_64-conda-linux-gnu-c++ -std=gnu++11 -shared -L/home/ubuntu/.pixi/envs/r-base/lib/R/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/home/ubuntu/.pixi/envs/r-base/lib -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/r-base/lib -o pecotmr.so RcppExports.o dentist_iterative_impute.o function_pool.o mr_ash.o prscs_mcmc.o qtl_enrichment.o sdpr.o sdpr_mcmc.o -fopenmp -llapack -lblas -lgfortran -lm -lquadmath -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/home/ubuntu/.pixi/envs/r-base/lib -Wl,-rpath-link,/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/r-base/lib -L/home/ubuntu/.pixi/envs/python/lib -lgsl -lgslcblas -lm -L/home/ubuntu/.pixi/envs/r-base/lib/R/lib -lR\n",
-      "installing to /tmp/RtmpDn7wph/devtools_install_e03a614934/00LOCK-pecotmr/00new/pecotmr/libs\n",
-      "** checking absolute paths in shared objects and dynamic libraries\n",
-      "* DONE (pecotmr)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Warning message:\n",
-      "“replacing previous import ‘vroom::cols’ by ‘readr::cols’ when loading ‘pecotmr’”\n"
-     ]
-    }
-   ],
-   "source": [
-    "devtools::document()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d9538a0e-86e3-4c15-9319-9ef790457442",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "R",
-   "language": "R",
-   "name": "ir"
-  },
-  "language_info": {
-   "codemirror_mode": "r",
-   "file_extension": ".r",
-   "mimetype": "text/x-r-source",
-   "name": "R",
-   "pygments_lexer": "r",
-   "version": "4.3.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 1e08aee2cb4aa18eb73963c99fba17d574ab9c9c Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Mon, 21 Apr 2025 19:11:18 +0000
Subject: [PATCH 22/26] fix a minor typo

---
 R/univariate_pipeline.R        |  4 +---
 R/univariate_rss_diagnostics.R | 15 +++++++--------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 6e19c0aa..7cc68686 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -234,7 +234,6 @@ rss_analysis_pipeline <- function(
     sumstats <- impute_results$result_filter
     LD_mat <- impute_results$LD_mat
   }
-
   # Perform fine-mapping
   if (!is.null(finemapping_method)) {
     pri_coverage <- finemapping_opts$coverage[1]
@@ -272,14 +271,13 @@ rss_analysis_pipeline <- function(
             cs_names_bvsr = names(bvsr_res$sets$cs)
             block_cs_metrics = extract_cs_info(con_data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci)
         } else { # no CS
-            bvsr_res = get_susie_result(res)
             if (sum(bvsr_res$pip > finemapping_opts$signal_cutoff) > 0) {
                 block_cs_metrics = extract_top_pip_info(res)
             }
         }
     }
     # sensitive check for additional analyses
-    if (!is.null(block_cs_metrics) && nrow(block_cs_metrics) > 0) {
+    if (!is.null(block_cs_metrics) && length(block_cs_metrics) > 0) {
       block_cs_metrics = parse_cs_corr(block_cs_metrics)
       cs_row = block_cs_metrics %>% filter(!is.na(block_cs_metrics$variants_per_cs))
       if (nrow(cs_row)>1) {# CS > 1
diff --git a/R/univariate_rss_diagnostics.R b/R/univariate_rss_diagnostics.R
index 723d2fb7..4a075318 100644
--- a/R/univariate_rss_diagnostics.R
+++ b/R/univariate_rss_diagnostics.R
@@ -137,14 +137,13 @@ extract_cs_info <- function(con_data, cs_names, top_loci_table) {
 #'
 #' @export
 extract_top_pip_info <- function(con_data) {
-  # Find the variant with the highest PIP
   top_pip_index <- which.max(con_data$susie_result_trimmed$pip)
   top_pip <- con_data$susie_result_trimmed$pip[top_pip_index]
-  top_variant = con_data$variant_names[top_pip_index]
-  top_z = con_data$sumstats$z[top_pip_index]
-  p_value = z_to_pvalue(top_z)
-  # Create the dataframe row
-  data.frame(
+  top_variant <- con_data$variant_names[top_pip_index]
+  top_z <- con_data$sumstats$z[top_pip_index]
+  p_value <- z_to_pvalue(top_z)
+  
+  list(
     cs_name = NA,
     variants_per_cs = NA,
     top_variant = top_variant,
@@ -152,10 +151,11 @@ extract_top_pip_info <- function(con_data) {
     top_pip = top_pip,
     top_z = top_z,
     p_value = p_value,
-    cs_corr = list()
+    cs_corr = NA  # or NULL or an empty list if that makes more sense
   )
 }
 
+
 #' Parse Credible Set Correlations from extract_cs_info() Output
 #'
 #' This function takes the output from `extract_cs_info()` and expands the `cs_corr` column
@@ -215,7 +215,6 @@ parse_cs_corr <- function(df) {
     df[, c("cs_corr_max", "cs_corr_min") := list(NA_real_, NA_real_)]
     return(df)
   }
-  
   # Determine max number of correlations
   max_corr_count <- max(sapply(processed_results, function(x) length(x$values)))
   

From 83be4b8fe3fbff75268ff7f6745fb7287049aa03 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Mon, 21 Apr 2025 19:23:27 +0000
Subject: [PATCH 23/26] clean up

---
 R/univariate_pipeline.R        | 1 +
 R/univariate_rss_diagnostics.R | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 7cc68686..fe3ab70c 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -234,6 +234,7 @@ rss_analysis_pipeline <- function(
     sumstats <- impute_results$result_filter
     LD_mat <- impute_results$LD_mat
   }
+
   # Perform fine-mapping
   if (!is.null(finemapping_method)) {
     pri_coverage <- finemapping_opts$coverage[1]
diff --git a/R/univariate_rss_diagnostics.R b/R/univariate_rss_diagnostics.R
index 4a075318..207dfbf9 100644
--- a/R/univariate_rss_diagnostics.R
+++ b/R/univariate_rss_diagnostics.R
@@ -137,6 +137,7 @@ extract_cs_info <- function(con_data, cs_names, top_loci_table) {
 #'
 #' @export
 extract_top_pip_info <- function(con_data) {
+  # Find the variant with the highest PIP
   top_pip_index <- which.max(con_data$susie_result_trimmed$pip)
   top_pip <- con_data$susie_result_trimmed$pip[top_pip_index]
   top_variant <- con_data$variant_names[top_pip_index]
@@ -151,11 +152,10 @@ extract_top_pip_info <- function(con_data) {
     top_pip = top_pip,
     top_z = top_z,
     p_value = p_value,
-    cs_corr = NA  # or NULL or an empty list if that makes more sense
+    cs_corr = NA  # or NULL
   )
 }
 
-
 #' Parse Credible Set Correlations from extract_cs_info() Output
 #'
 #' This function takes the output from `extract_cs_info()` and expands the `cs_corr` column
@@ -215,6 +215,7 @@ parse_cs_corr <- function(df) {
     df[, c("cs_corr_max", "cs_corr_min") := list(NA_real_, NA_real_)]
     return(df)
   }
+
   # Determine max number of correlations
   max_corr_count <- max(sapply(processed_results, function(x) length(x$values)))
   

From c565d38339a654adabe8431dfe7ff34393834dd3 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Tue, 22 Apr 2025 20:04:10 +0000
Subject: [PATCH 24/26] final modification

---
 R/univariate_pipeline.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index fe3ab70c..cd099aad 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -357,7 +357,8 @@ rss_analysis_pipeline <- function(
         }
         result_list[[method_name]] <- ser
       }
-    } 
+    result_list[["diagnostics"]] <- block_cs_metrics
+    }
   }
-  return(list(result_list = result_list, block_cs_metrics = block_cs_metrics))
+  return(result_list)
 }

From eb13f02554297e6592c33b69a8df32a02e5c03c5 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Thu, 24 Apr 2025 13:38:50 -0400
Subject: [PATCH 25/26] improve the documentation of imputation report

---
 R/raiss.R | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/R/raiss.R b/R/raiss.R
index 73fd06c3..4df7a671 100644
--- a/R/raiss.R
+++ b/R/raiss.R
@@ -346,14 +346,26 @@ filter_raiss_output <- function(zscores, R2_threshold = 0.6, minimum_ld = 5, ver
 
   # Print report
   if (verbose) {
+    max_label_length <- max(nchar(c(
+      "Variants before filter:", 
+      "Non-imputed variants:", 
+      "Imputed variants:", 
+      "Variants filtered because of low LD score:", 
+      "Variants filtered because of low R2:", 
+      "Remaining variants after filter:"
+    )))
+    
+    format_line <- function(label, value) {
+      sprintf("%-*s %d", max_label_length, paste0(label, ":"), value)
+    }
+
     message("IMPUTATION REPORT\n")
-    message("Number of SNPs:\n")
-    message("before filter:", NSNPs_bf_filt, "\n")
-    message("not imputed:", NSNPs_initial, "\n")
-    message("imputed:", NSNPs_imputed, "\n")
-    message("filtered because of LD:", NSNPs_ld_filt, "\n")
-    message("filtered because of R2:", NSNPs_R2_filt, "\n")
-    message("after filter:", NSNPs_af_filt, "\n")
+    message(format_line("Variants before filter", NSNPs_bf_filt))
+    message(format_line("Non-imputed variants", NSNPs_initial))
+    message(format_line("Imputed variants", NSNPs_imputed))
+    message(format_line("Variants filtered because of low LD score", NSNPs_ld_filt))
+    message(format_line("Variants filtered because of low R2", NSNPs_R2_filt))
+    message(format_line("Remaining variants after filter", NSNPs_af_filt))
   }
   return(zscore_list = list(zscores_nofilter = zscores_nofilter, zscores = zscores))
 }

From ff2b37208ac3acc820a03571354b6ac2975f0c13 Mon Sep 17 00:00:00 2001
From: rl3328 <rl3328@cumc.columbia.edu>
Date: Fri, 2 May 2025 11:35:46 -0400
Subject: [PATCH 26/26] change the auto_decision function

---
 R/LD.R                         |  5 +----
 R/raiss.R                      |  6 +-----
 R/univariate_pipeline.R        |  5 -----
 R/univariate_rss_diagnostics.R | 20 +++++++++++++-------
 4 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/R/LD.R b/R/LD.R
index 6350b13e..426790ff 100644
--- a/R/LD.R
+++ b/R/LD.R
@@ -348,9 +348,6 @@ load_LD_matrix <- function(LD_meta_file_path, region, extract_coordinates = NULL
       # Use region chromosome as fallback for empty blocks
       block_chroms[j] <- as.character(intersected_LD_files$region$chrom)
     }
-
-    # Remove large objects to free memory
-    rm(LD_matrix_processed, extracted_LD_list)
   }
 
   # Create combined LD matrix
@@ -494,7 +491,6 @@ partition_LD_matrix <- function(ld_data, merge_small_blocks = TRUE,
 
   # Partition the matrix based on block metadata
   result <- extract_block_matrices(combined_matrix, block_metadata, variant_ids)
-
   return(result)
 }
 
@@ -708,6 +704,7 @@ extract_block_matrices <- function(matrix, block_metadata, variant_ids) {
       stringsAsFactors = FALSE
     )
     variant_mapping <- rbind(variant_mapping, block_mapping)
+
   }
 
   return(list(
diff --git a/R/raiss.R b/R/raiss.R
index 4df7a671..278292fa 100644
--- a/R/raiss.R
+++ b/R/raiss.R
@@ -28,7 +28,7 @@ raiss_single_matrix <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01
   knowns_id <- intersect(known_zscores$variant_id, ref_panel$variant_id)
   knowns <- which(ref_panel$variant_id %in% knowns_id)
   unknowns <- which(!ref_panel$variant_id %in% knowns_id)
-
+  
   # Handle edge cases
   if (length(knowns) == 0) {
     if (verbose) message("No known variants found, cannot perform imputation.")
@@ -51,10 +51,8 @@ raiss_single_matrix <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01
 
   # Call raiss_model
   results <- raiss_model(zt, sig_t, sig_i_t, lamb, rcond)
-
   # Format the results
   results <- format_raiss_df(results, ref_panel, unknowns)
-
   # Filter output
   results <- filter_raiss_output(results, R2_threshold, minimum_ld, verbose)
 
@@ -72,7 +70,6 @@ raiss_single_matrix <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01
   } else {
     as.matrix(LD_matrix)
   }
-
   # Return results
   return(list(
     result_nofilter = result_nofilter,
@@ -193,7 +190,6 @@ raiss <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01, rcond = 0.01
       lamb, rcond, R2_threshold, minimum_ld,
       verbose = FALSE
     )
-
     # Skip if block returned NULL (no known variants)
     if (!is.null(block_result)) {
       results_list[[block_id]] <- block_result
diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index cd099aad..6826ad1a 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -200,7 +200,6 @@ rss_analysis_pipeline <- function(
   if (nrow(sumstats)==0){
       return(list(rss_data_analyzed = sumstats))
   }
-
   # Preprocess the input data
   preprocess_results <- rss_basic_qc(sumstats, LD_data, skip_region = skip_region, remove_indels = remove_indels)
   sumstats <- preprocess_results$sumstats
@@ -219,14 +218,12 @@ rss_analysis_pipeline <- function(
       message(paste("Follow-up on region because signals above PIP threshold", pip_cutoff_to_skip, "were detected in initial model screening."))
     }
   }
-
   # Perform quality control
   if (!is.null(qc_method)) {
     qc_results <- summary_stats_qc(sumstats, LD_data, n = n, var_y = var_y, method = qc_method)
     sumstats <- qc_results$sumstats
     LD_mat <- qc_results$LD_mat
   }
-
   # Perform imputation
   if (impute) {
     LD_matrix <- partition_LD_matrix(LD_data)
@@ -234,7 +231,6 @@ rss_analysis_pipeline <- function(
     sumstats <- impute_results$result_filter
     LD_mat <- impute_results$LD_mat
   }
-
   # Perform fine-mapping
   if (!is.null(finemapping_method)) {
     pri_coverage <- finemapping_opts$coverage[1]
@@ -311,7 +307,6 @@ rss_analysis_pipeline <- function(
               method_name <- paste0(finemapping_method, "_", "NO_QC")
           }
           result_list[[method_name]] <- bcr
-          
           finemapping_method <- "single_effect"
           sumstats <- preprocess_results$sumstats
           LD_mat <- preprocess_results$LD_mat
diff --git a/R/univariate_rss_diagnostics.R b/R/univariate_rss_diagnostics.R
index 207dfbf9..271b6256 100644
--- a/R/univariate_rss_diagnostics.R
+++ b/R/univariate_rss_diagnostics.R
@@ -286,16 +286,22 @@ auto_decision <- function(df, high_corr_cols) {
 
   # Count total and remaining CS
   total_cs <- nrow(df)
+  print("total_cs")
+  print(total_cs)
   tagged_cs_count <- sum(df$tagged_cs)
-  remaining_cs <- total_cs - 1 - tagged_cs_count
-
+  if (total_cs > 0) {
+    remaining_cs <- total_cs - 1 - tagged_cs_count
+  } else {
+    remaining_cs <- 0
+  }
   # Determine method
   df$method <- case_when(
-    total_cs == 1 | (tagged_cs_count == 0 & remaining_cs == 0) ~ "BVSR",
-    remaining_cs == 0 ~ "SER",
-    remaining_cs > 0 ~ "BCR",
-    TRUE ~ NA_character_
-  )
+  tagged_cs_count == 0 & total_cs > 1 ~ "BVSR",
+  (remaining_cs == 0 & total_cs > 1) | (total_cs == 1) ~ "SER",
+  remaining_cs > 0 ~ "BCR",
+  TRUE ~ NA_character_
+)
+
 
   return(df)
 }
\ No newline at end of file