From e81b12c2a726c7144cd5e19c25ffaa62058e01fe Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Fri, 17 Apr 2026 01:27:26 -0700
Subject: [PATCH 1/5] remove quantile QTL code

---
 DESCRIPTION                          |    3 -
 NAMESPACE                            |    8 -
 R/quail_rank_score.R                 |  154 ----
 R/quail_vqtl.R                       |  170 ----
 R/quantile_twas.R                    |  451 -----------
 R/quantile_twas_weight.R             | 1073 --------------------------
 R/twas.R                             |  229 +-----
 man/QUAIL_pipeline.Rd                |   36 -
 man/QUAIL_rank_score_pipeline.Rd     |   31 -
 man/calculate_xi_correlation.Rd      |   27 -
 man/load_quantile_twas_weights.Rd    |   38 -
 man/multicontext_ld_clumping.Rd      |   31 -
 man/perform_qr_analysis.Rd           |   23 -
 man/qr_screen.Rd                     |   50 --
 man/quantile_twas_weight_pipeline.Rd |   78 --
 tests/testthat/test_ctwas.R          |  160 ++++
 tests/testthat/test_quail_ctwas.R    |  374 ---------
 tests/testthat/test_quail_vqtl.R     |  338 --------
 tests/testthat/test_quantile_twas.R  |  717 -----------------
 tests/testthat/test_twas.R           |  206 +----
 20 files changed, 186 insertions(+), 4011 deletions(-)
 delete mode 100644 R/quail_rank_score.R
 delete mode 100644 R/quail_vqtl.R
 delete mode 100644 R/quantile_twas.R
 delete mode 100644 R/quantile_twas_weight.R
 delete mode 100644 man/QUAIL_pipeline.Rd
 delete mode 100644 man/QUAIL_rank_score_pipeline.Rd
 delete mode 100644 man/calculate_xi_correlation.Rd
 delete mode 100644 man/load_quantile_twas_weights.Rd
 delete mode 100644 man/multicontext_ld_clumping.Rd
 delete mode 100644 man/perform_qr_analysis.Rd
 delete mode 100644 man/qr_screen.Rd
 delete mode 100644 man/quantile_twas_weight_pipeline.Rd
 create mode 100644 tests/testthat/test_ctwas.R
 delete mode 100644 tests/testthat/test_quail_ctwas.R
 delete mode 100644 tests/testthat/test_quail_vqtl.R
 delete mode 100644 tests/testthat/test_quantile_twas.R

diff --git a/DESCRIPTION b/DESCRIPTION
index cae09866..5b28239a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -55,10 +55,7 @@ Suggests:
     ncvreg,
     pgenlibr,
     qgg,
-    quadprog,
-    quantreg,
     udr,
-    XICOR,
     qvalue,
     rmarkdown,
     snpStats,
diff --git a/NAMESPACE b/NAMESPACE
index 3f7f4994..1dc4b07f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,7 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-export(QUAIL_pipeline)
-export(QUAIL_rank_score_pipeline)
 export(adjust_susie_weights)
 export(align_variant_names)
 export(allele_qc)
@@ -15,7 +13,6 @@ export(bayes_c_weights)
 export(bayes_l_weights)
 export(bayes_n_weights)
 export(bayes_r_weights)
-export(calculate_xi_correlation)
 export(check_ld)
 export(clean_context_names)
 export(coloc_post_processor)
@@ -58,7 +55,6 @@ export(load_multicontext_sumstats)
 export(load_multitask_regional_data)
 export(load_multitrait_R_sumstat)
 export(load_multitrait_tensorqtl_sumstat)
-export(load_quantile_twas_weights)
 export(load_regional_association_data)
 export(load_regional_functional_data)
 export(load_regional_multivariate_data)
@@ -79,7 +75,6 @@ export(mr_format)
 export(mrash_weights)
 export(mrmash_weights)
 export(mrmash_wrapper)
-export(multicontext_ld_clumping)
 export(multigene_udr)
 export(multivariate_analysis_pipeline)
 export(mvsusie_weights)
@@ -89,11 +84,8 @@ export(otters_weights)
 export(parse_cs_corr)
 export(parse_region)
 export(parse_variant_id)
-export(perform_qr_analysis)
 export(prs_cs)
 export(prs_cs_weights)
-export(qr_screen)
-export(quantile_twas_weight_pipeline)
 export(raiss)
 export(read_afreq)
 export(region_to_df)
diff --git a/R/quail_rank_score.R b/R/quail_rank_score.R
deleted file mode 100644
index ce72fa90..00000000
--- a/R/quail_rank_score.R
+++ /dev/null
@@ -1,154 +0,0 @@
-#' Calculate Quantile Rank Score
-#' @param pheno numeric vector of phenotype values
-#' @param covariates matrix/data.frame of covariates
-#' @param tau quantile level
-#' @return vector of rank scores
-#' @importFrom stats rnorm
-#' @noRd
-calculate_rank_score <- function(pheno, covariates, tau, seed = 123) {
-  # Make sure quantreg is installed
-  if (!requireNamespace("quantreg", quietly = TRUE)) {
-    stop("To use this function, please install quantreg: https://cran.r-project.org/web/packages/quantreg/index.html")
-  }
-  set.seed(seed)
-  # Add random variable to covariates
-  covar_data <- data.frame(covariates)
-  covar_data$d_rv <- rnorm(nrow(covar_data))
-
-  # Fit quantile regression
-  Qreg <- suppressWarnings(quantreg::rq(pheno ~ ., data = covar_data, tau = tau, method = "fn"))
-
-  # Get rank score
-  coeff <- summary(Qreg, se = "ker")$coefficients
-  SE_d_rv <- coeff[nrow(coeff), 2]
-  a_i_tau <- (tau - ifelse(residuals(Qreg) < 0, 1, 0)) * SE_d_rv / sqrt(-tau^2 + tau)
-
-  return(a_i_tau)
-}
-
-#' Fit Rank Scores for All Quantile Levels
-#' @param pheno numeric vector of phenotype values
-#' @param covariates matrix/data.frame of covariates
-#' @param num_tau_levels integer number of quantile levels
-#' @param num_cores integer number of cores for parallel processing
-#' @return list of rank scores for each quantile level
-#' @importFrom parallel mclapply
-#' @noRd
-fit_rank_scores <- function(pheno, covariates, num_tau_levels, num_cores = 1) {
-  if (num_cores > 1 && requireNamespace("parallel", quietly = TRUE)) {
-    mclapply(1:num_tau_levels, function(i) {
-      tau <- i / (num_tau_levels + 1)
-      calculate_rank_score(pheno, covariates, tau)
-    }, mc.cores = num_cores)
-  } else {
-    lapply(1:num_tau_levels, function(i) {
-      tau <- i / (num_tau_levels + 1)
-      calculate_rank_score(pheno, covariates, tau)
-    })
-  }
-}
-
-#' Calculate Integrated Rank Score
-#' @param rank_scores list of rank scores
-#' @param method character "equal" or "ivw"
-#' @param num_tau_levels integer number of quantile levels
-#' @return vector of integrated rank scores
-#' @importFrom stats cov
-#' @noRd
-### calculate_integrated_score considering odds and even number tau
-calculate_integrated_score <- function(rank_scores, method = "equal", num_tau_levels) {
-  if (!requireNamespace("quadprog", quietly = TRUE)) {
-    stop("To use this function, please install quadprog: https://cran.r-project.org/web/packages/quadprog/index.html")
-  }
-  if (num_tau_levels %% 2 == 0) { # even
-    mid_point <- num_tau_levels / 2
-    lower_half <- 1:mid_point
-    upper_half <- (mid_point + 1):num_tau_levels
-    n_pairs <- mid_point
-  } else { # odds
-    mid_point <- ceiling(num_tau_levels / 2)
-    lower_half <- 1:(mid_point - 1)
-    middle <- mid_point
-    upper_half <- (mid_point + 1):num_tau_levels
-    n_pairs <- length(lower_half)
-  }
-
-  if (method == "equal") {
-    int_rank_score <- 0
-    if (num_tau_levels %% 2 == 0) {
-      # even number tau
-      for (i in 1:num_tau_levels) {
-        weight <- ifelse(i > mid_point, 1, -1)
-        int_rank_score <- int_rank_score + weight * rank_scores[[i]]
-      }
-    } else {
-      # odds number tau
-      for (i in lower_half) int_rank_score <- int_rank_score - rank_scores[[i]]
-      # int_rank_score <- int_rank_score + 0 * rank_scores[[middle]]
-      for (i in upper_half) int_rank_score <- int_rank_score + rank_scores[[i]]
-    }
-  } else if (method == "ivw") {
-    a_i_tau_diff_matrix <- matrix(0, nrow = length(rank_scores[[1]]), ncol = n_pairs)
-
-    for (i in 1:n_pairs) {
-      if (num_tau_levels %% 2 == 0) {
-        a_i_tau_diff_matrix[, i] <- rank_scores[[upper_half[i]]] - rank_scores[[lower_half[i]]]
-      } else {
-        a_i_tau_diff_matrix[, i] <- rank_scores[[upper_half[i]]] - rank_scores[[lower_half[i]]]
-      }
-    }
-
-    # Optimization steps
-    Y_QI_Var_Cov <- cov(a_i_tau_diff_matrix)
-    Dmat <- Y_QI_Var_Cov
-    dvec <- rep(0, n_pairs)
-    Amat <- cbind(rep(1, n_pairs), diag(n_pairs))
-    bvec <- c(1, rep(0, n_pairs))
-
-    qp <- quadprog::solve.QP(Dmat, dvec, t(Amat), bvec, meq = 1)
-    weights_vec <- qp$solution
-
-    int_rank_score <- 0
-    for (i in 1:n_pairs) {
-      int_rank_score <- int_rank_score + weights_vec[i] *
-        (rank_scores[[upper_half[i]]] - rank_scores[[lower_half[i]]])
-    }
-  }
-
-  return(int_rank_score / n_pairs)
-}
-
-#' Main QUAIL Rank Score Pipeline
-#' @param phenotype numeric vector of phenotype values
-#' @param covariates matrix/data.frame of covariates
-#' @param num_tau_levels integer number of quantile levels
-#' @param method character "equal" or "ivw"
-#' @param num_cores integer number of cores for parallel processing
-#' @return data.frame with integrated rank scores
-#' @export
-QUAIL_rank_score_pipeline <- function(phenotype, covariates,
-                                      num_tau_levels = 19,
-                                      method = "equal",
-                                      num_cores = 1) {
-  # Input validation
-  start_time <- Sys.time()
-  # Calculate rank scores for all quantile levels
-  if (!is.null(phenotype)) {
-    if (!is.numeric(phenotype)) {
-      if (is.data.frame(phenotype) || is.matrix(phenotype)) {
-        phenotype <- as.numeric(phenotype[[1]])
-      } else {
-        stop("phenotype must be a numeric vector.")
-      }
-    }
-  }
-
-  rank_scores <- fit_rank_scores(phenotype, covariates, num_tau_levels, num_cores)
-
-  # Calculate integrated rank score
-  int_rank_score <- calculate_integrated_score(rank_scores, method, num_tau_levels)
-  end_time <- Sys.time()
-  cat("\nTotal vQTL score runtime:", round(difftime(end_time, start_time, units = "secs"), 2), "seconds\n")
-
-  return(int_rank_score)
-}
diff --git a/R/quail_vqtl.R b/R/quail_vqtl.R
deleted file mode 100644
index 8f27d413..00000000
--- a/R/quail_vqtl.R
+++ /dev/null
@@ -1,170 +0,0 @@
-#' Perform Univariate Regression for Genotype Data
-#'
-#' @param X Numeric matrix of genotypes (n x p), where n is the number of samples and p is the number of SNPs.
-#' @param y Numeric vector of phenotypes (length n).
-#' @param Z Optional numeric matrix of covariates (n x k), where k is the number of covariates.
-#' @param center Logical, whether to center the data (default: TRUE).
-#' @param scale Logical, whether to scale the data (default: FALSE).
-#' @param return_residuals Logical, whether to return residuals (default: FALSE).
-#' @return A list containing regression results: \code{betahat}, \code{sebetahat}, \code{z_scores}, \code{p_values}, and \code{q_values}.
-#' @examples
-#' X <- matrix(rnorm(1000), 100, 10)
-#' y <- rnorm(100)
-#' results <- univariate_regression(X, y)
-#' @noRd
-univariate_regression <- function(X, y, Z = NULL, center = TRUE,
-                                  scale = FALSE, return_residuals = FALSE) {
-  y_na <- which(is.na(y))
-  if (length(y_na)) {
-    X <- X[-y_na, ]
-    y <- y[-y_na]
-  }
-
-  if (center) {
-    y <- y - mean(y)
-    X <- scale(X, center = TRUE, scale = scale)
-  } else {
-    X <- scale(X, center = FALSE, scale = scale)
-  }
-
-  X[is.nan(X)] <- 0
-
-  if (!is.null(Z)) {
-    if (center) {
-      Z <- scale(Z, center = TRUE, scale = scale)
-    }
-    residual_x <- X # Store X before modifying y
-    y <- .lm.fit(Z, y)$residuals
-  } else {
-    residual_x <- X
-  }
-
-  output <- try(do.call(
-    rbind,
-    lapply(1:ncol(X), function(i) {
-      g <- .lm.fit(cbind(1, X[, i]), y)
-      return(c(coef(g)[2], calc_stderr(cbind(1, X[, i]), g$residuals)[2]))
-    })
-  ), silent = TRUE)
-
-  # Exception handling
-  if (inherits(output, "try-error")) {
-    output <- matrix(0, ncol(X), 2)
-    for (i in 1:ncol(X)) {
-      fit <- summary(lm(y ~ X[, i]))$coef
-      if (nrow(fit) == 2) {
-        output[i, ] <- as.vector(summary(lm(y ~ X[, i]))$coef[2, 1:2])
-      } else {
-        output[i, ] <- c(0, 0)
-      }
-    }
-  }
-
-  # Calculate z-scores (t-statistics)
-  z_scores <- output[, 1] / output[, 2]
-
-  # Calculate p-values from z-scores
-  p_values <- 2 * pnorm(abs(z_scores), lower.tail = FALSE)
-
-  # Calculate q-values using the provided function
-  q_values <- compute_qvalues(p_values)
-
-  # Use residual_x's column names as the column names for the results
-  rownames(output) <- colnames(residual_x)
-
-  result_list <- list(
-    betahat = setNames(output[, 1], rownames(output)),
-    sebetahat = setNames(output[, 2], rownames(output)),
-    z_scores = setNames(z_scores, rownames(output)),
-    p_values = setNames(p_values, rownames(output)),
-    q_values = setNames(q_values, rownames(output))
-  )
-
-  if (return_residuals && !is.null(Z)) {
-    result_list$residuals <- y
-  }
-
-  return(result_list)
-}
-
-
-#' Perform Linear Regression for GWAS
-#'
-#' @param genotype Numeric matrix of genotypes (n x p), where n is the number of samples and p is the number of SNPs.
-#' @param phenotype Numeric vector of phenotypes (length n).
-#' @param covariates Optional numeric matrix of covariates (n x k), where k is the number of covariates.
-#' @return A data frame containing regression results for each SNP, including \code{BETA}, \code{SE}, \code{Z}, \code{P}, and \code{Q}.
-#' @examples
-#' genotype <- matrix(rbinom(1000 * 10, 2, 0.3), 1000, 10)
-#' phenotype <- rnorm(1000)
-#' results <- run_linear_regression1(genotype, phenotype)
-#' @noRd
-run_linear_regression <- function(genotype, phenotype, covariates = NULL, phenotype_id = NULL) {
-  if (!is.null(covariates)) {
-    covariates <- as.data.frame(lapply(covariates, as.numeric))
-  }
-
-  reg_results <- univariate_regression(
-    X = genotype,
-    y = phenotype,
-    Z = covariates,
-    center = TRUE,
-    scale = FALSE
-  )
-
-  snp_info <- parse_variant_id(colnames(genotype))
-
-  data.frame(
-    phenotype_id = if (!is.null(phenotype_id)) phenotype_id else NA,
-    chr = snp_info$chrom,
-    pos = snp_info$pos,
-    A1 = snp_info$A1,
-    A2 = snp_info$A2,
-    variant_id = colnames(genotype),
-    beta = reg_results$betahat,
-    se = reg_results$sebetahat,
-    z = reg_results$z_scores,
-    p = reg_results$p_values,
-    q = reg_results$q_values,
-    N = nrow(genotype)
-  )
-}
-
-#' Main QUAIL pipeline
-#' QUAIL vQTL Analysis Pipeline
-#'
-#' @param genotype numeric matrix (n x p) of genotypes.
-#' @param rank_score numeric vector (n x 1) of rank scores from Step 1.
-#' @param phenotype optional numeric vector (n x 1) of original phenotype values.
-#' @param covariates optional numeric matrix (n x k) of covariates.
-#' @return A data frame containing vQTL results.
-#' @export
-#' @examples
-#' \dontrun{
-#' results <- QUAIL_pipeline(genotype, rank_score, covariates = covariates)
-#' }
-QUAIL_pipeline <- function(genotype, rank_score, phenotype = NULL,
-                           covariates = NULL, phenotype_id = NULL) {
-  start_time <- Sys.time()
-
-  # Validate rank_score
-  if (!is.numeric(rank_score)) {
-    stop("rank_score must be a numeric vector.")
-  }
-
-  # Validate covariates
-  if (!is.null(covariates)) {
-    if (!is.data.frame(covariates)) {
-      covariates <- as.data.frame(covariates)
-    }
-    covariates <- as.data.frame(lapply(covariates, as.numeric))
-  }
-
-  # Perform vQTL analysis
-  vqtl_results <- run_linear_regression(genotype, rank_score, covariates, phenotype_id = phenotype_id)
-
-  end_time <- Sys.time()
-  cat("\nTotal vQTL runtime:", round(difftime(end_time, start_time, units = "secs"), 2), " seconds\n")
-
-  return(vqtl_results)
-}
diff --git a/R/quantile_twas.R b/R/quantile_twas.R
deleted file mode 100644
index 96f2287f..00000000
--- a/R/quantile_twas.R
+++ /dev/null
@@ -1,451 +0,0 @@
-#' Get Hierarchical Clusters with Modularity Optimization
-#'
-#' This function performs hierarchical clustering on a correlation matrix and identifies the optimal number of clusters using modularity optimization. It ensures continuous quantiles for each cluster and computes a modularity score.
-#' FIXME: # The initial modularity (Q_modularity_initial) is calculated based on the clustering before we enforce continuous ranges in the clusters. After adjusting the cluster
-#' assignments with `ensure_continuous_clusters`, we are not recalculating modularity for the new cluster structure. If required, you should compute the modularity
-#' again based on the new cluster structure after ensuring continuous ranges.
-#'
-#' @param cormat A P x P correlation matrix (numeric matrix) where P is the number of items to be clustered.
-#' @param between_cluster A numeric value specifying the correlation threshold for clustering. If all correlations are greater than this threshold, the function will form a single cluster. Default is 0.8.
-#'
-#' @return A list with the following elements:
-#' \item{cluster}{A P x K binary matrix where K is the number of clusters. Each row represents an item, and a value of 1 in column k means that the item is assigned to cluster k.}
-#' \item{Q_modularity_initial}{The modularity score based on the initial clustering, before enforcing continuous clusters.}
-#' \item{cluster_ranges}{A list of ranges (quantiles) for each cluster.}
-#'
-#' @examples
-#' # Example usage
-#' cormat <- matrix(runif(100), nrow = 10)
-#' cormat <- cormat %*% t(cormat) # Make it symmetric
-#' res <- get_hierarchical_clusters(cormat, between_cluster = 0.8)
-#' B <- res$cluster # The binary cluster matrix
-#' Q <- res$Q_modularity_initial # The initial modularity score
-#' ranges <- res$cluster_ranges # The quantile ranges for each cluster
-#'
-#' @importFrom stats hclust as.dist cutree
-#'
-#' @noRd
-get_hierarchical_clusters <- function(cormat, between_cluster = 0.8) {
-  # Perform hierarchical clustering on the correlation matrix
-  hc <- hclust(as.dist(1 - cormat))
-
-  # Get the optimal number of clusters
-  opt_cluster <- get_n_cluster(hc, cormat, between_cluster = between_cluster)
-  n_cluster <- opt_cluster$n_cluster
-  Q_modularity_initial <- opt_cluster$Qmodularity # Save the initial modularity score
-
-  # Obtain the initial clusters
-  index <- cutree(hc, n_cluster)
-
-  # Adjust the clusters to ensure each cluster has continuous quantiles
-  index <- ensure_continuous_clusters(index)
-
-  # Create the adjacency matrix B based on the adjusted continuous clusters
-  B <- sapply(1:max(index), function(t) as.numeric(index == t))
-  B <- as.matrix(B)
-
-  # Get the range of quantiles for each cluster
-  cluster_ranges <- get_cluster_ranges(index)
-
-  # Return the cluster structure, initial modularity, and quantile ranges
-  return(list(
-    "cluster" = B,
-    "Q_modularity_initial" = Q_modularity_initial,
-    "cluster_ranges" = cluster_ranges
-  ))
-}
-
-#' Ensure Clusters Have Continuous Quantiles
-#'
-#' This helper function ensures that clusters produced by hierarchical clustering are continuous in terms of quantiles.
-#'
-#' @param index A numeric vector representing the cluster assignments.
-#' @return A numeric vector with adjusted cluster assignments ensuring continuity.
-#' @examples
-#' index <- c(1, 1, 2, 2, 3)
-#' new_index <- ensure_continuous_clusters(index)
-#' @noRd
-ensure_continuous_clusters <- function(index) {
-  n <- length(index)
-  if (n <= 1) return(index)
-  new_index <- integer(n)
-  cluster_counter <- 1
-  new_index[1] <- cluster_counter
-
-  # Traverse the index to ensure continuous clusters
-  for (i in 2:n) {
-    # If the current quantile does not follow the previous one, start a new cluster
-    if (index[i] != index[i - 1]) {
-      cluster_counter <- cluster_counter + 1
-    }
-    new_index[i] <- cluster_counter
-  }
-
-  return(new_index)
-}
-
-#' Get Range of Quantiles for Each Cluster
-#'
-#' This function returns the range of quantiles for each identified cluster.
-#'
-#' @param index A numeric vector representing the cluster assignments.
-#' @return A list of quantile ranges for each cluster.
-#' @examples
-#' index <- c(1, 1, 2, 2, 3)
-#' cluster_ranges <- get_cluster_ranges(index)
-#' @noRd
-get_cluster_ranges <- function(index) {
-  ranges <- list()
-  cluster_numbers <- unique(index)
-
-  # For each cluster, find the range of quantiles
-  for (cluster in cluster_numbers) {
-    members <- which(index == cluster)
-    ranges[[cluster]] <- paste(min(members), "-", max(members))
-  }
-
-  return(ranges)
-}
-
-#' Get Optimal Number of Clusters Using Modularity
-#'
-#' This function computes the optimal number of clusters using modularity optimization. It iterates over possible numbers of clusters and evaluates each using a modularity score.
-#'
-#' @param hc An object of class \code{hclust}, resulting from hierarchical clustering.
-#' @param Sigma A P x P correlation matrix.
-#' @param m The maximum number of clusters to evaluate. Default is the number of columns in \code{Sigma}.
-#' @param between_cluster A numeric value specifying the correlation threshold. Default is 0.8.
-#' @return A list containing the number of clusters and the modularity score for each.
-#' @examples
-#' cormat <- matrix(runif(100), nrow = 10)
-#' cormat <- cormat %*% t(cormat) # Make it symmetric
-#' hc <- hclust(as.dist(1 - cormat))
-#' result <- get_n_cluster(hc, cormat, between_cluster = 0.8)
-#' n_cluster <- result$n_cluster
-#' @noRd
-get_n_cluster <- function(hc, Sigma, m = ncol(Sigma), between_cluster = 0.8) {
-  # If the minimum correlation is greater than the threshold, use one cluster
-  if (min(Sigma) > between_cluster) {
-    IND <- 1
-    Q <- 1
-  } else {
-    Q <- c()
-    if (ncol(Sigma) < 10) {
-      m <- ncol(Sigma)
-    }
-    # Iterate over possible numbers of clusters and compute modularity
-    for (i in 1:m) {
-      index <- cutree(hc, i)
-      B <- sapply(1:i, function(t) as.numeric(index == t))
-      Q[i] <- get_modularity(Sigma, B)
-    }
-    # Find the number of clusters with the maximum modularity
-    IND <- which(Q == max(Q))
-    L <- length(IND)
-    if (L > 1) IND <- IND[1] # If multiple solutions, choose the first one
-  }
-  return(list("n_cluster" = IND, "Qmodularity" = Q))
-}
-
-#' Calculate Modularity
-#'
-#' This function computes the modularity score for a given cluster structure and weight matrix.
-#'
-#' @param Weight A P x P weight matrix.
-#' @param B A binary matrix representing the cluster structure.
-#' @return The modularity score (numeric).
-#' @examples
-#' W <- matrix(runif(100), nrow = 10)
-#' B <- matrix(sample(0:1, 100, replace = TRUE), nrow = 10)
-#' Q <- get_modularity(W, B)
-#' @noRd
-get_modularity <- function(Weight, B) {
-  if (dim(Weight)[1] == 1) {
-    Q <- 0
-  } else {
-    W_pos <- Weight * (Weight > 0)
-    W_neg <- Weight * (Weight < 0)
-    N <- dim(Weight)[1]
-    K_pos <- colSums(W_pos)
-    K_neg <- colSums(W_neg)
-    m_pos <- sum(K_pos)
-    m_neg <- sum(K_neg)
-    m <- m_pos + m_neg
-    cate <- B %*% t(B)
-    if (m_pos == 0 & m_neg == 0) {
-      Q <- 0
-    } else {
-      if (m_pos == 0) {
-        Q_positive <- 0
-        Q_negative <- sum((W_neg - K_neg %*% t(K_neg) / m_neg) * cate) / m_neg
-      } else if (m_neg == 0) {
-        Q_positive <- sum((W_pos - K_pos %*% t(K_pos) / m_pos) * cate) / m_pos
-        Q_negative <- 0
-      } else {
-        Q_positive <- sum((W_pos - K_pos %*% t(K_pos) / m_pos) * cate) / m_pos
-        Q_negative <- sum((W_neg - K_neg %*% t(K_neg) / m_neg) * cate) / m_neg
-      }
-    }
-    Q <- m_pos / m * Q_positive - m_neg / m * Q_negative
-  }
-  return(Q)
-}
-
-
-#' Perform Grouped Integration for TWAS Weights
-#'
-#' This function integrates TWAS weights using two methods: correlation-based clustering and fixed tau intervals. The weights are clustered and averaged within each cluster, and the corresponding performance metrics (e.g., pseudo R2) are calculated.
-#'
-#' @param twas_weight A matrix where rows represent variants and columns represent tau values. Each entry represents the TWAS weight for a specific variant and tau.
-#' @param tau_values A numeric vector representing the tau values (quantiles) corresponding to the columns of \code{twas_weight}. Default is a sequence from 0.01 to 0.99.
-#' @param pseudo_R2 A numeric vector of pseudo R2 values corresponding to the tau values.
-#' @param between_cluster A numeric value specifying the correlation threshold for clustering. If all correlations are greater than this threshold, the function will form a single cluster. Default is 0.8.
-#' @param num_intervals The number of fixed non-overlapping intervals to divide the tau values. Default is 3.
-#'
-#' @return A list containing:
-#' \item{weights}{A matrix of integrated TWAS weights for each cluster or interval.}
-#' \item{twas_weight_performance}{A list of performance metrics, including quantile ranges and average pseudo R2 values for each cluster/interval.}
-#'
-#' @examples
-#' # Example usage
-#' twas_weight <- matrix(runif(100), nrow = 10)
-#' tau_values <- seq(0.01, 0.99, length.out = 10)
-#' pseudo_R2 <- runif(10)
-#' result <- perform_grouped_integration(twas_weight, tau_values, pseudo_R2)
-#' weights <- result$weights
-#' performance <- result$twas_weight_performance
-#'
-#' @noRd
-perform_grouped_integration <- function(twas_weight, tau_values, pseudo_R2, between_cluster = 0.8, num_intervals = 3) {
-  variant_id <- rownames(twas_weight)
-
-  if (length(variant_id) > 1) {
-    # Method 1: Correlation-based clustering (C1, C2, ..., Cn)
-    cormat <- cor(twas_weight)
-    res <- get_hierarchical_clusters(cormat, between_cluster = between_cluster)
-    cluster_C <- res$cluster
-    n_clusters_C <- ncol(cluster_C)
-
-    integrated_twas_weight_C <- list()
-    tau_ranges_C <- list()
-    pseudo_R2_avg_C <- list()
-
-    for (k in 1:n_clusters_C) {
-      tau_indices_C <- which(cluster_C[, k] == 1)
-      beta_values_C <- matrix(twas_weight[, tau_indices_C], nrow = nrow(twas_weight))
-      rownames(beta_values_C) <- variant_id
-      tau_subset_C <- tau_values[tau_indices_C]
-
-      integrated_twas_weight_C[[k]] <- integrate_tau(tau_subset_C, beta_values_C)
-      tau_ranges_C[[k]] <- c(min(tau_subset_C), max(tau_subset_C))
-      pseudo_R2_avg_C[[k]] <- mean(pseudo_R2[tau_indices_C])
-    }
-  } else {
-    # If only one variant_id, skip Method 1
-    integrated_twas_weight_C <- list()
-    tau_ranges_C <- list()
-    pseudo_R2_avg_C <- list()
-    n_clusters_C <- 0 # No clusters from Method 1
-  }
-
-  # Method 2: Fixed tau intervals (A1, A2, A3) with non-overlapping divisions
-  tau_cuts <- cut(tau_values, breaks = num_intervals, labels = FALSE, include.lowest = TRUE)
-  fixed_intervals <- tapply(tau_values, tau_cuts, range)
-
-  integrated_twas_weight_A <- list()
-  tau_ranges_A <- list()
-  pseudo_R2_avg_A <- list()
-
-  for (k in 1:num_intervals) {
-    tau_indices_A <- which(tau_cuts == k)
-    beta_values_A <- matrix(twas_weight[, tau_indices_A], nrow = nrow(twas_weight))
-    rownames(beta_values_A) <- variant_id
-    tau_subset_A <- tau_values[tau_indices_A]
-
-    integrated_twas_weight_A[[k]] <- integrate_tau(tau_subset_A, beta_values_A)
-    tau_ranges_A[[k]] <- c(fixed_intervals[[k]][1], fixed_intervals[[k]][2])
-    pseudo_R2_avg_A[[k]] <- mean(pseudo_R2[tau_indices_A])
-  }
-
-  # Combine the results of Method 2 and Method 1 if Method 1 was applied
-  combined_weights <- do.call(cbind, c(integrated_twas_weight_A, integrated_twas_weight_C))
-
-  # Ensure the correct number of column names is provided
-  total_columns <- ncol(combined_weights)
-  expected_columns <- num_intervals + n_clusters_C
-
-  if (total_columns == expected_columns) {
-    colnames(combined_weights) <- c(
-      paste0("A", 1:num_intervals, "_weights"),
-      if (n_clusters_C > 0) paste0("C", 1:n_clusters_C, "_weights") else NULL
-    )
-  } else {
-    stop("Mismatch between the number of columns and the number of intervals/clusters")
-  }
-
-  rownames(combined_weights) <- variant_id
-
-  twas_weight_performance <- list()
-
-  # Store results for Method 2 (A1, A2, A3) as data frames
-  for (k in 1:num_intervals) {
-    twas_weight_performance[[paste0("A", k, "_performance")]] <- data.frame(
-      "quantile_start" = round(fixed_intervals[[k]][1], 2),
-      "quantile_end" = round(fixed_intervals[[k]][2], 2),
-      "pseudo_R2_avg" = pseudo_R2_avg_A[[k]]
-    )
-  }
-
-  # Store results for Method 1 (C1, C2, ..., Cn) as data frames if it was applied
-  if (n_clusters_C > 0) {
-    for (k in 1:n_clusters_C) {
-      twas_weight_performance[[paste0("C", k, "_performance")]] <- data.frame(
-        "quantile_start" = round(tau_ranges_C[[k]][1], 2),
-        "quantile_end" = round(tau_ranges_C[[k]][2], 2),
-        "pseudo_R2_avg" = pseudo_R2_avg_C[[k]]
-      )
-    }
-  }
-
-  return(list("weights" = combined_weights, "twas_weight_performance" = twas_weight_performance))
-}
-
-
-#' Integrate Tau Across Quantiles
-#'
-#' This function integrates TWAS weights across a set of tau values (quantiles) using trapezoidal integration.
-#'
-#' @param tau.temp A numeric vector of tau values.
-#' @param a_tau A numeric vector or matrix of TWAS weights corresponding to the tau values.
-#'
-#' @return A numeric vector of integrated TWAS weights.
-#'
-#' @examples
-#' tau_values <- seq(0.01, 0.99, length.out = 10)
-#' a_tau <- matrix(runif(100), nrow = 10)
-#' integrated_weights <- integrate_tau(tau_values, a_tau)
-#'
-#' @noRd
-integrate_tau <- function(tau.temp, a_tau) {
-  tau.temp1 <- c(min(tau.temp) - 0.01, tau.temp, max(tau.temp) +
-    0.01)
-  if (is.matrix(a_tau)) {
-    a_tau <- cbind(rep(0, nrow(a_tau)), a_tau, rep(0, nrow(a_tau)))
-    out <- (0.5 * (a_tau[, -ncol(a_tau)] + a_tau[, -1]) %*%
-      diff(tau.temp1))
-  } else {
-    a_tau <- c(0, a_tau, 0)
-    out <- (0.5 * (a_tau[-length(a_tau)] + a_tau[-1]) %*%
-      diff(tau.temp1))
-  }
-  return(out)
-}
-
-#' Load Quantile TWAS Weights
-#'
-#' This function loads TWAS weights from RDS files and performs grouped integration using both correlation-based clustering and fixed tau intervals.
-#'
-#' @param weight_db_files A character vector of file paths to the RDS files containing TWAS weights.
-#' @param tau_values A numeric vector representing the tau values (quantiles) to use. Default is \code{seq(0.01, 0.99, 0.01)}.
-#' @param between_cluster A numeric value specifying the correlation threshold for clustering. Default is 0.8.
-#' @param num_intervals The number of fixed non-overlapping intervals to divide the tau values. Default is 3.
-#'
-#' @return A list containing:
-#' \item{weights}{A list of integrated TWAS weights for each context.}
-#' \item{twas_cv_performance}{A list of TWAS cross-validation performance metrics for each context.}
-#'
-#' @examples
-#' weight_db_files <- c("file1.rds", "file2.rds")
-#' tau_values <- seq(0.01, 0.99, by = 0.01)
-#' result <- load_quantile_twas_weights(weight_db_files, tau_values)
-#' weights <- result$weights
-#' performance <- result$twas_cv_performance
-#'
-#' @export
-load_quantile_twas_weights <- function(weight_db_files, tau_values = seq(0.01, 0.99, 0.01),
-                                       between_cluster = 0.8, num_intervals = 3) {
-  # Internal function to load and validate data from RDS files
-  load_and_validate_data <- function(weight_db_files) {
-    all_data <- lapply(weight_db_files, function(rds_file) {
-      db <- readRDS(rds_file)
-
-      # Extract gene data and check for valid entries
-      lapply(names(db), function(gene) {
-        gene_data <- db[[gene]]
-        lapply(names(gene_data), function(context) {
-          context_data <- gene_data[[context]]
-          if (!is.null(context_data$twas_weight) && !is.null(context_data$pseudo_R2)) {
-            list(
-              gene = gene,
-              context = clean_context_names(context, gene),
-              data = context_data
-            )
-          } else {
-            message(paste("Warning: Missing twas_weight or pseudo_R2 for", gene, "-", context))
-            NULL
-          }
-        })
-      })
-    })
-
-    result <- do.call(c, unlist(all_data, recursive = FALSE))
-    result <- Filter(Negate(is.null), result) # Remove NULL elements
-    return(result)
-  }
-
-  # Load and validate the data
-  combined_data <- load_and_validate_data(weight_db_files)
-  if (length(combined_data) == 0) {
-    stop("No valid data found in the provided files.")
-  }
-
-  # Merge contexts for the same gene from different RDS files
-  merged_data <- list()
-  for (item in combined_data) {
-    gene <- item$gene
-    context <- item$context
-    if (is.null(merged_data[[gene]])) {
-      merged_data[[gene]] <- list()
-    }
-    # Ensure we are not duplicating gene entries unnecessarily
-    if (!is.null(item$data)) {
-      merged_data[[gene]][[context]] <- item$data
-    }
-  }
-
-  # Perform grouped integration for each context and restructure results
-  results <- list(weights = list(), twas_cv_performance = list())
-
-  for (gene in names(merged_data)) {
-    message(paste("Processing gene:", gene))
-
-    for (context in names(merged_data[[gene]])) {
-      context_data <- merged_data[[gene]][[context]]
-
-      if (!is.null(context_data$twas_weight) && !is.null(context_data$pseudo_R2)) {
-        tryCatch(
-          {
-            result <- perform_grouped_integration(
-              context_data$twas_weight, tau_values,
-              context_data$pseudo_R2, between_cluster, num_intervals
-            )
-            results$weights[[context]] <- result$weights
-            results$twas_cv_performance[[context]] <- result$twas_weight_performance
-          },
-          error = function(e) {
-            message(paste("Error in perform_grouped_integration for", gene, "-", context, ":", e$message))
-          }
-        )
-      }
-    }
-  }
-
-  # Output the final structure
-  if (length(results$weights) == 0) {
-    message("Warning: No valid results were generated.")
-  } else {
-    message(paste("Number of contexts with valid results:", length(results$weights)))
-  }
-
-  return(results)
-}
diff --git a/R/quantile_twas_weight.R b/R/quantile_twas_weight.R
deleted file mode 100644
index 537d3d08..00000000
--- a/R/quantile_twas_weight.R
+++ /dev/null
@@ -1,1073 +0,0 @@
-#' @title Quantile TWAS Weight Calculation and QTL Analysis
-#'
-#' @description
-#' This file contains functions for performing Quantile Transcriptome-Wide
-#' Association Studies (TWAS) weight calculations and Quantile QTL analysis.
-#' It provides tools for screening quantile regression results, performing
-#' LD clumping and pruning, and calculating TWAS weights.
-#'
-#' @details
-#' The main function in this file is `quantile_twas_weight_pipeline`, which
-#' orchestrates the entire analysis process. Other functions are helper
-#' functions used within the main pipeline.
-#'
-
-#' Qrank Score Test Screen
-#' @param X Matrix of predictors
-#' @param Y Matrix or vector of response variables
-#' @param Z Matrix of covariates (optional)
-#' @param tau.list Vector of quantiles to be analyzed
-#' @param screen_threshold Significance threshold for adjusted p-values
-#' @param screen_method Method for p-value adjustment ('fdr' or 'qvalue')
-#' @param top_count Number of top SNPs to select
-#' @param top_percent Percentage of top SNPs to select
-#' @return A list containing various results from the QR screen
-#' @importFrom tidyr separate
-#' @importFrom dplyr %>% mutate select
-#' @export
-qr_screen <- function(
-    X, Y, Z = NULL, tau.list = seq(0.05, 0.95, by = 0.05),
-    screen_threshold = 0.05, screen_method = "qvalue", top_count = 10, top_percent = 15) {
-  # Make sure quantreg is installed
-  if (!requireNamespace("quantreg", quietly = TRUE)) {
-    stop("To use this function, please install quantreg: https://cran.r-project.org/web/packages/quantreg/index.html")
-  }
-  p <- ncol(X)
-  pvec <- rep(NA, p)
-  ltau <- length(tau.list)
-  quantile.pvalue <- matrix(NA, nrow = p, ncol = ltau, dimnames = list(colnames(X), paste("p_qr", tau.list, sep = "_")))
-  quantile.zscore <- matrix(NA, nrow = p, ncol = ltau, dimnames = list(colnames(X), paste("zscore_qr", tau.list, sep = "_")))
-  y <- as.matrix(Y)
-
-  if (!is.null(Z)) {
-    zz <- cbind(rep(1, nrow(y)), Z)
-  } else {
-    zz <- matrix(1, nrow = nrow(y), ncol = 1)
-  }
-
-  ranks_list <- lapply(tau.list, function(tau) {
-    suppressWarnings(quantreg::rq.fit.br(zz, y, tau = tau)$dual - (1 - tau))
-  })
-
-  for (ip in 1:p) {
-    x <- as.matrix(X[, ip])
-    VN <- matrix(0, nrow = ltau, ncol = ltau)
-    for (i in 1:ltau) {
-      for (j in 1:ltau) {
-        VN[i, j] <- min(tau.list[i], tau.list[j]) - tau.list[i] * tau.list[j]
-      }
-    }
-
-    if (!is.null(Z)) {
-      # xstar = lm(x ~ zz - 1)$residual
-      xstar <- .lm.fit(zz, x)$residual
-    } else {
-      xstar <- x
-    }
-
-    SN <- NULL
-    for (itau in 1:ltau) {
-      Sn <- as.matrix(t(xstar) %*% ranks_list[[itau]])
-      SN <- c(SN, Sn)
-    }
-    VN2 <- matrix(outer(VN, t(xstar) %*% xstar, "*"), nrow = ltau)
-    z_score <- SN / sqrt(diag(VN2))
-    pvalue1 <- pchisq(SN^2 / diag(VN2), 1, lower.tail = FALSE)
-    names(pvalue1) <- tau.list
-    quantile.pvalue[ip, ] <- pvalue1
-    quantile.zscore[ip, ] <- z_score
-    e <- solve(chol(VN2))
-    SN2 <- t(e) %*% SN
-    pvalue <- pchisq(sum(SN2^2), ltau, lower.tail = FALSE)
-    pvec[ip] <- pvalue
-  }
-
-  pvec <- apply(quantile.pvalue, 1, pval_cauchy)
-
-  if (screen_method == "fdr") {
-    adjusted_pvalues <- p.adjust(pvec)
-    method_col_name <- "fdr_p_qr"
-    method_quantile_names <- paste0("fdr_p_qr_", tau.list)
-    quantile_adjusted_pvalues <- apply(quantile.pvalue, 2, p.adjust)
-  } else if (screen_method == "qvalue") {
-    adjusted_pvalues <- compute_qvalues(pvec)
-    method_col_name <- "qvalue_qr"
-    method_quantile_names <- paste0("qvalue_qr_", tau.list)
-    quantile_adjusted_pvalues <- apply(quantile.pvalue, 2, compute_qvalues)
-  } else {
-    stop("Invalid screen_method. Choose 'fdr' or 'qvalue'.")
-  }
-
-  # Ensure quantile_adjusted_pvalues is always a matrix (handles single-variant case)
-  if (!is.matrix(quantile_adjusted_pvalues)) {
-    quantile_adjusted_pvalues <- matrix(quantile_adjusted_pvalues, nrow = 1,
-                                        dimnames = list(colnames(X), names(quantile_adjusted_pvalues)))
-  }
-
-  sig_SNP_threshold <- which(adjusted_pvalues < screen_threshold)
-  sig_SNP_top_count <- order(adjusted_pvalues)[1:top_count]
-  sig_SNP_top_percent <- order(adjusted_pvalues)[1:max(1, round(length(adjusted_pvalues) * top_percent / 100))]
-
-  sig.SNPs_names <- colnames(X)[sig_SNP_threshold]
-  sig.SNPs_names_top_count <- colnames(X)[sig_SNP_top_count]
-  sig.SNPs_names_top_percent <- colnames(X)[sig_SNP_top_percent]
-  phenotype_id <- colnames(y)[1]
-
-  df_result <- data.frame(
-    phenotype_id = phenotype_id,
-    variant_id = colnames(X),
-    p_qr = pvec
-  )
-
-  # Add quantile-specific p-values
-  for (tau in tau.list) {
-    df_result[[paste0("p_qr_", tau)]] <- quantile.pvalue[, paste0("p_qr_", tau)]
-  }
-
-  # Add overall q-value
-  df_result[[method_col_name]] <- adjusted_pvalues
-
-  # Add quantile-specific q-values
-  for (tau in tau.list) {
-    df_result[[paste0(method_col_name, "_", tau)]] <- quantile_adjusted_pvalues[, paste0("p_qr_", tau)]
-  }
-
-  # Add quantile-specific z-scores
-  for (tau in tau.list) {
-    df_result[[paste0("zscore_qr_", tau)]] <- quantile.zscore[, paste0("zscore_qr_", tau)]
-  }
-
-  # Split variant_id and reorder columns
-  parsed <- parse_variant_id(df_result$variant_id)
-  df_result <- df_result %>%
-    mutate(chr = parsed$chrom, pos = parsed$pos, A2 = parsed$A2, A1 = parsed$A1)
-
-  # Define the column order
-  col_order <- c("chr", "pos", "A2", "A1", "phenotype_id", "variant_id", "p_qr")
-  col_order <- c(col_order, paste0("p_qr_", tau.list))
-  col_order <- c(col_order, method_col_name)
-  col_order <- c(col_order, paste0(method_col_name, "_", tau.list))
-  col_order <- c(col_order, paste0("zscore_qr_", tau.list))
-
-  # Reorder the columns
-  df_result <- df_result %>% select(all_of(col_order))
-
-  return(list(
-    df_result = df_result,
-    tau_list = tau.list,
-    quantile_pvalue = quantile.pvalue,
-    quantile_zscore = quantile.zscore,
-    pvec = pvec,
-    adjusted_pvalues = adjusted_pvalues,
-    sig_SNP_threshold = sig_SNP_threshold,
-    sig.SNPs_names = sig.SNPs_names,
-    sig_SNP_top_count = sig_SNP_top_count,
-    sig_SNP_top_percent = sig_SNP_top_percent
-  ))
-}
-
-#' Perform Clumping and Pruning
-#' @param X Matrix of genotypes
-#' @param qr_results Results from QR_screen
-#' @param maf_list List of minor allele frequencies (optional)
-#' @param ld_clump_r2 R-squared threshold for initial LD clumping based on pvalue
-#' @param final_clump_r2 R-squared threshold for final LD clumping based on MAF
-#' @return A list containing final SNPs and clumped SNPs
-#' @importFrom bigstatsr FBM.code256
-#' @importFrom bigsnpr snp_clumping
-#' @export
-multicontext_ld_clumping <- function(X, qr_results, maf_list = NULL, ld_clump_r2 = 0.2, final_clump_r2 = 0.8) {
-  # Extract significant SNP names
-  sig_SNPs_names <- qr_results$sig.SNPs_names
-
-  # If no significant SNPs, return empty result
-  if (length(sig_SNPs_names) == 0) {
-    return(list(final_SNPs = NULL, clumped_SNPs = NULL, message = "No significant SNPs found"))
-  }
-
-  # Check if X only contains one SNP (one column)
-  if (ncol(X) == 1) {
-    print("Only one SNP in X. Skipping LD clumping.")
-    final_SNPs <- colnames(X)
-    return(list(final_SNPs = final_SNPs, clumped_SNPs = final_SNPs, message = "Only one SNP, no LD clumping performed"))
-  }
-
-  # Extract genotype matrix for significant SNPs
-  G_all <- X # Only retain columns for significant SNPs
-
-  # Convert the genotype matrix into FBM format
-  code_vec <- c(0, 1, 2, rep(NA, 256 - 3))
-  G_all <- FBM.code256(
-    nrow = nrow(G_all),
-    ncol = ncol(G_all),
-    init = G_all,
-    code = code_vec
-  )
-
-  # Parse SNP names to extract chromosome and position information
-  parsed_snp_info <- do.call(rbind, strsplit(sig_SNPs_names, ":"))
-  chr <- as.numeric(strip_chr_prefix(parsed_snp_info[, 1]))
-  pos <- as.numeric(parsed_snp_info[, 2]) # Extract position
-
-  # Step 1: Perform LD clumping for each tau based on p-values
-  clumped_snp_list <- list()
-
-  for (itau in seq_along(qr_results$tau_list)) {
-    tau_name <- paste0("p_qr_", qr_results$tau_list[itau])
-
-    # Extract p-values for the given quantile
-    p_values_quantile <- qr_results$quantile_pvalue[, tau_name][qr_results$sig_SNP_threshold]
-    log_p_values <- -log10(p_values_quantile) # Calculate log10 p-values
-
-    # Perform LD clumping using p-values as S
-    ind_clumped <- snp_clumping(
-      G = G_all,
-      infos.chr = chr,
-      infos.pos = pos,
-      S = log_p_values, # Use log10 p-values for each quantile
-      thr.r2 = ld_clump_r2,
-      size = 100 / ld_clump_r2
-    ) # Window size of 500kb
-
-    # Store clumping results for each quantile
-    clumped_snp_list[[tau_name]] <- ind_clumped
-  }
-
-  # Step 2: Take the union of clumping results across all quantiles
-  clumped_snp_union <- unique(unlist(clumped_snp_list)) # This is the SNP index, not the name
-  clumped_SNPs_name <- sig_SNPs_names[clumped_snp_union]
-  print(paste("Number of SNPs after union of clumping:", length(clumped_snp_union)))
-
-  if (length(clumped_snp_union) == 1) {
-    message("Only one SNP found in the union. Skipping LD pruning and returning the single SNP directly.")
-    final_SNPs <- sig_SNPs_names[clumped_snp_union]
-    return(list(final_SNPs = final_SNPs, clumped_SNPs = clumped_SNPs_name))
-  }
-
-
-  # Step 3: Sort results from union
-  sorted_indices <- order(chr[clumped_snp_union], pos[clumped_snp_union])
-  chr_sorted <- chr[clumped_snp_union][sorted_indices]
-  pos_sorted <- pos[clumped_snp_union][sorted_indices]
-
-  # Step 4: Initialize maf_values to NULL, and update only if maf_list is provided
-  maf_values <- NULL
-  if (!is.null(maf_list)) {
-    maf_values <- maf_list[qr_results$sig_SNP_threshold][clumped_snp_union][sorted_indices]
-  }
-  G_union <- X[, clumped_snp_union, drop = FALSE][, sorted_indices]
-  if (!inherits(G_union, "FBM")) {
-    G_union <- FBM.code256(
-      nrow = nrow(G_union),
-      ncol = ncol(G_union),
-      init = G_union,
-      code = code_vec
-    )
-  }
-  # Step 5: Perform final clumping using maf_values (if available), otherwise proceed with NULL S
-  final_clumped <- snp_clumping(
-    G = G_union,
-    infos.chr = chr_sorted,
-    infos.pos = pos_sorted,
-    S = maf_values, # Use MAF values if provided, otherwise NULL
-    thr.r2 = final_clump_r2,
-    size = 100 / final_clump_r2
-  ) # Final clumping
-
-  # Restrict the genotype matrix to only the final clumped SNPs
-  G_final_clumped <- G_union[, final_clumped, drop = FALSE] # Limit G to the final clumped SNPs
-  # Get the final SNP names
-  final_SNPs <- sig_SNPs_names[clumped_snp_union][sorted_indices][final_clumped]
-  print(paste("Number of final SNPs after MAF-based clumping (if applied):", length(final_SNPs)))
-
-  return(list(final_SNPs = final_SNPs, clumped_SNPs = clumped_SNPs_name))
-}
-
-#' Perform Quantile Regression Analysis to get beta
-#' @param X Matrix of predictors
-#' @param Y Matrix or vector of response variables
-#' @param Z Matrix of covariates (optional)
-#' @param tau_values Vector of quantiles to be analyzed
-#' @return A data frame with QR coefficients for each quantile
-#' @importFrom tidyr pivot_wider separate
-#' @importFrom dplyr %>% mutate select
-#' @export
-perform_qr_analysis <- function(X, Y, Z = NULL, tau_values = seq(0.05, 0.95, by = 0.05)) {
-  # Make sure quantreg is installed
-  if (!requireNamespace("quantreg", quietly = TRUE)) {
-    stop("To use this function, please install quantreg: https://cran.r-project.org/web/packages/quantreg/index.html")
-  }
-  # Convert Y and X to matrices if they aren't already
-  pheno.mat <- as.matrix(Y)
-  geno.mat <- as.matrix(X)
-
-  # Initialize an empty result table to store results
-  result_table <- data.frame(
-    phenotype_id = character(),
-    variant_id = character(),
-    tau = numeric(),
-    predictor_coef = numeric(),
-    stringsAsFactors = FALSE
-  )
-
-  # Loop over each tau value to perform quantile regression
-  for (tau in tau_values) {
-    # Loop over each SNP/variant in geno.mat (X)
-    for (n in 1:ncol(geno.mat)) {
-      response <- pheno.mat # Y
-      predictor <- geno.mat[, n] # X
-      phenotype_id <- colnames(pheno.mat)
-      variant_id <- colnames(geno.mat)[n]
-
-      # Construct the design matrix based on whether Z is provided
-      if (is.null(Z)) {
-        # If no covariates, include intercept and predictor
-        X_design <- cbind(1, predictor)
-      } else {
-        # If covariates are provided, include them in the design matrix
-        X_design <- cbind(1, predictor, as.matrix(Z))
-      }
-
-      # Fit the quantile regression model using rq.fit.br
-      mod <- suppressWarnings(quantreg::rq.fit.br(X_design, response, tau = tau))
-
-      # Extract the coefficient for the predictor (second coefficient)
-      predictor_coef <- mod$coefficients[2] # Coefficient for predictor
-
-      # Create a row with the results and append to the result table
-      row <- data.frame(
-        phenotype_id = phenotype_id,
-        variant_id = variant_id,
-        tau = tau,
-        predictor_coef = predictor_coef,
-        stringsAsFactors = FALSE
-      )
-      result_table <- rbind(result_table, row)
-    }
-  }
-
-  # Reshape result_table to a wide format, so each tau's results are in separate columns
-  result_table_wide <- result_table %>%
-    pivot_wider(
-      id_cols = c(phenotype_id, variant_id),
-      names_from = tau,
-      values_from = predictor_coef,
-      names_prefix = "coef_qr_"
-    )
-  parsed_ids <- parse_variant_id(result_table_wide$variant_id)
-  result_table_wide <- result_table_wide %>%
-    mutate(chr = parsed_ids$chrom, pos = parsed_ids$pos, A2 = parsed_ids$A2, A1 = parsed_ids$A1) %>%
-    select(chr, pos, A2, A1, everything())
-
-  # Return the wide format result table
-  return(result_table_wide)
-}
-
-#' Filter Highly Correlated SNPs
-#' @param X Matrix of genotypes
-#' @param cor_thres Correlation threshold for filtering
-#' @return A list containing filtered X matrix and filter IDs
-corr_filter <- function(X, cor_thres = 0.8) {
-  p <- ncol(X)
-
-  # Use Rfast for faster correlation calculation if available
-  if (requireNamespace("Rfast", quietly = TRUE)) {
-    cor.X <- Rfast::cora(X, large = TRUE)
-  } else {
-    cor.X <- cor(X)
-  }
-  Sigma.distance <- as.dist(1 - abs(cor.X))
-  fit <- hclust(Sigma.distance, method = "single")
-  clusters <- cutree(fit, h = 1 - cor_thres)
-  groups <- unique(clusters)
-  ind.delete <- NULL
-  X.new <- X
-  filter.id <- c(1:p)
-  for (ig in seq_along(groups)) {
-    temp.group <- which(clusters == groups[ig])
-    if (length(temp.group) > 1) {
-      ind.delete <- c(ind.delete, temp.group[-1])
-    }
-  }
-  ind.delete <- unique(ind.delete)
-  if ((length(ind.delete)) > 0) {
-    X.new <- as.matrix(X[, -ind.delete])
-    filter.id <- filter.id[-ind.delete]
-  }
-
-  # Check if X.new has only one column and ensure column names are preserved
-  if (ncol(X.new) == 1) {
-    colnames(X.new) <- colnames(X)[-ind.delete]
-  }
-
-  return(list(X.new = X.new, filter.id = filter.id))
-}
-
-
-
-#' Check and Remove Problematic Columns to Ensure Full Rank
-#'
-#' This function checks for problematic columns in the design matrix that cause it to be not full rank,
-#' and iteratively removes them based on the chosen strategy until the matrix is full rank.
-#'
-#' @param X Matrix of SNPs
-#' @param C Matrix of covariates (unnamed)
-#' @param strategy The strategy for removing problematic columns ("variance", "correlation", or "response_correlation")
-#' @param response Optional response vector for "response_correlation" strategy
-#' @param max_iterations Maximum number of iterations to attempt removing problematic columns
-#' @return Cleaned matrix X with problematic columns removed
-#' @noRd
-check_remove_highcorr_snp <- function(X = X, C = C, strategy = c("correlation", "variance", "response_correlation"), response = NULL, max_iterations = 300, corr_thresholds = seq(0.75, 0.5, by = -0.05)) {
-  strategy <- match.arg(strategy)
-  original_colnames <- colnames(X)
-  initial_ncol <- ncol(X) # Store the initial number of columns in X
-  iteration <- 0
-  # Combine the design matrix with X (SNPs) and C (covariates), keeping C without column names
-  X_design <- cbind(1, X, C) # Add an intercept column (1)
-  colnames_X_design <- c("Intercept", colnames(X)) # Assign column names only to X (SNPs) part
-
-  # Assign column names only to the X part, leaving C without names
-  colnames(X_design)[1:(length(colnames_X_design))] <- colnames_X_design
-
-  # Check the initial rank of the design matrix
-  matrix_rank <- qr(X_design)$rank
-  message("Initial rank of the design matrix: ", matrix_rank, " / ", ncol(X_design), " columns.")
-
-  # Skip remove_highcorr_snp if removing all problematic columns doesn't achieve full rank
-  skip_remove_highcorr <- FALSE
-
-  # First check: Try removing all problematic columns at once
-  if (matrix_rank < ncol(X_design)) {
-    message("Design matrix is not full rank, identifying all problematic columns...")
-
-    # QR decomposition to identify linearly dependent columns
-    qr_decomp <- qr(X_design)
-    R <- qr_decomp$rank
-    Q <- qr_decomp$pivot
-
-    # Get all problematic columns
-    problematic_cols <- Q[(R + 1):ncol(X_design)]
-    problematic_colnames <- colnames(X_design)[problematic_cols]
-    problematic_colnames <- problematic_colnames[problematic_colnames %in% colnames(X)]
-
-    if (length(problematic_colnames) > 0) {
-      message("Attempting to remove all problematic columns at once: ", paste(problematic_colnames, collapse = ", "))
-
-      # Remove all problematic columns at once
-      X_temp <- X[, !(colnames(X) %in% problematic_colnames), drop = FALSE]
-      X_design_temp <- cbind(1, X_temp, C)
-      colnames_X_design_temp <- c("Intercept", colnames(X_temp))
-      colnames(X_design_temp)[1:length(colnames_X_design_temp)] <- colnames_X_design_temp
-
-      # Check if removing all problematic columns achieves full rank
-      matrix_rank_temp <- qr(X_design_temp)$rank
-
-      if (matrix_rank_temp == ncol(X_design_temp)) {
-        message("Achieved full rank by removing all problematic columns at once. Proceeding with original logic...")
-      } else {
-        message("Removing all problematic columns did not achieve full rank. Skipping to corr_filter...")
-        skip_remove_highcorr <- TRUE
-      }
-    }
-  }
-
-  # Only proceed with remove_highcorr_snp if not skipping
-  if (!skip_remove_highcorr) {
-    while (matrix_rank < ncol(X_design) && iteration < max_iterations) {
-      message("Design matrix is not full rank, identifying problematic columns...")
-
-      qr_decomp <- qr(X_design)
-      R <- qr_decomp$rank
-      Q <- qr_decomp$pivot
-      problematic_cols <- Q[(R + 1):ncol(X_design)]
-      problematic_colnames <- colnames(X_design)[problematic_cols]
-      problematic_colnames <- problematic_colnames[problematic_colnames %in% colnames(X)]
-
-      if (length(problematic_colnames) == 0) {
-        message("No more problematic SNP columns found in X. Breaking the loop.")
-        break
-      }
-
-      message("Problematic SNP columns identified: ", paste(problematic_colnames, collapse = ", "))
-      X <- remove_highcorr_snp(X, problematic_colnames, strategy = strategy, response = response)
-
-      X_design <- cbind(1, X, C)
-      colnames_X_design <- c("Intercept", colnames(X))
-      colnames(X_design)[1:length(colnames_X_design)] <- colnames_X_design
-
-      matrix_rank <- qr(X_design)$rank
-      message("New rank of the design matrix: ", matrix_rank, " / ", ncol(X_design), " columns.")
-      iteration <- iteration + 1
-    }
-
-    if (iteration == max_iterations) {
-      warning("Maximum iterations reached. The design matrix may still not be full rank.")
-    }
-  }
-
-  # Final check and corr_filter if needed
-  matrix_rank <- qr(cbind(1, X, C))$rank
-  if (matrix_rank < ncol(cbind(1, X, C))) {
-    message("Applying corr_filter to ensure the design matrix is full rank...")
-    for (threshold in corr_thresholds) {
-      filter_result <- corr_filter(X, cor_thres = threshold)
-      X <- filter_result$X.new
-      X_design <- cbind(1, X, C)
-      colnames_X_design <- c("Intercept", colnames(X))
-      colnames(X_design)[1:length(colnames_X_design)] <- colnames_X_design
-
-      matrix_rank <- qr(X_design)$rank
-      message("Rank after corr_filter with threshold ", threshold, ": ", matrix_rank, " / ", ncol(X_design), " columns.")
-      if (matrix_rank == ncol(X_design)) {
-        break
-      }
-    }
-  }
-
-
-  if (iteration == max_iterations) {
-    warning("Maximum iterations reached. The design matrix may still not be full rank.")
-  }
-
-  if (ncol(X) == 1 && initial_ncol == 1) {
-    colnames(X) <- original_colnames
-  }
-  return(X)
-}
-
-#' Remove Problematic Columns Based on a Given Strategy
-#'
-#' This function removes problematic columns from a matrix based on different strategies, such as smallest variance,
-#' highest correlation, or lowest correlation with the response variable.
-#'
-#' @param X Matrix of SNPs
-#' @param problematic_cols A vector of problematic columns to be removed
-#' @param strategy The strategy for removing problematic columns ("variance", "correlation", or "response_correlation")
-#' @param response Optional response vector for "response_correlation" strategy
-#' @return Cleaned matrix X with the selected column removed
-#' @importFrom stats var cor
-#' @noRd
-remove_highcorr_snp <- function(X, problematic_cols, strategy = c("correlation", "variance", "response_correlation"), response = NULL) {
-  # Set default strategy
-  strategy <- match.arg(strategy)
-
-  message("Identified problematic columns: ", paste(problematic_cols, collapse = ", "))
-
-  if (length(problematic_cols) == 0) {
-    return(X) # If there are no problematic columns, return as is
-  }
-
-  if (length(problematic_cols) == 1) {
-    message("Only one problematic column: ", problematic_cols)
-    col_to_remove <- problematic_cols[1]
-    message("Removing column: ", col_to_remove)
-    X <- X[, !(colnames(X) %in% col_to_remove), drop = FALSE]
-    # If X only has one column left after removal, ensure its column name is preserved
-    if (ncol(X) == 1) {
-      colnames(X) <- colnames(X)[colnames(X) != col_to_remove] # Preserve remaining SNP name
-    }
-    return(X)
-  }
-
-  # Choose columns to remove based on the strategy
-  if (strategy == "variance") {
-    # Strategy 1: Remove the column with the smallest variance
-    variances <- apply(X[, problematic_cols, drop = FALSE], 2, var)
-    col_to_remove <- problematic_cols[which.min(variances)]
-    message("Removing column with the smallest variance: ", col_to_remove)
-  } else if (strategy == "correlation") {
-    # Strategy 2: Remove the column with the highest sum of absolute correlations
-    cor_matrix <- abs(cor(X[, problematic_cols, drop = FALSE])) # Calculate absolute correlation matrix
-    diag(cor_matrix) <- 0 # Ignore the diagonal (self-correlation)
-
-    if (length(problematic_cols) == 2) {
-      # If there are only two problematic columns, randomly remove one
-      col_to_remove <- sample(problematic_cols, 1)
-      message("Only two problematic columns, randomly removing: ", col_to_remove)
-    } else {
-      # Calculate sum of absolute correlations for each column
-      cor_sums <- colSums(cor_matrix)
-      col_to_remove <- problematic_cols[which.max(cor_sums)] # Remove the column with the largest sum of correlations
-      message("Removing column with highest sum of absolute correlations: ", col_to_remove)
-    }
-  } else if (strategy == "response_correlation" && !is.null(response)) {
-    # Strategy 3: Remove the column with the lowest correlation with the response variable
-    # FIXME: This strategy is potentially biased based on corr of response and variants
-    cor_with_response <- apply(X[, problematic_cols, drop = FALSE], 2, function(col) cor(col, response))
-    col_to_remove <- problematic_cols[which.min(abs(cor_with_response))]
-    message("Removing column with lowest correlation with the response: ", col_to_remove)
-  } else {
-    stop("Invalid strategy or missing response variable for 'response_correlation' strategy.")
-  }
-
-  # Remove the selected column from X
-  X <- X[, !(colnames(X) %in% col_to_remove), drop = FALSE]
-  if (ncol(X) == 1) {
-    colnames(X) <- colnames(X)[colnames(X) != col_to_remove] # Preserve remaining SNP name
-  }
-  return(X)
-}
-
-#' Calculate QR Coefficients and Pseudo R-squared Across Multiple Quantiles
-#'
-#' This function calculates quantile regression coefficients and pseudo R-squared values across multiple quantiles,
-#' while handling problematic columns that might affect the rank of the design matrix.
-#'
-#' @param AssocData List containing X, Y, C, and X.filter
-#' @param tau.list Vector of quantiles to be analyzed
-#' @param strategy The strategy for removing problematic columns ("variance", "correlation", or "response_correlation")
-#' @return A list containing the cleaned X matrix, beta matrix as twas weight, and pseudo R-squared values
-#' @noRd
-calculate_qr_and_pseudo_R2 <- function(AssocData, tau.list, strategy = c("correlation", "variance", "response_correlation"),
-                                       corr_thresholds = seq(0.75, 0.5, by = -0.05)) {
-  # Make sure quantreg is installed
-  if (!requireNamespace("quantreg", quietly = TRUE)) {
-    stop("To use this function, please install quantreg: https://cran.r-project.org/web/packages/quantreg/index.html")
-  }
-  strategy <- match.arg(strategy)
-  # Check and handle problematic columns affecting the full rank of the design matrix
-  AssocData$X.filter <- check_remove_highcorr_snp(X = AssocData$X.filter, C = AssocData$C, strategy = strategy, response = AssocData$Y, corr_thresholds = corr_thresholds)
-  snp_names <- colnames(AssocData$X.filter)
-  # Build the cleaned design matrix using the filtered X and unnamed C
-
-  # Fit the models for all tau values
-  message("Start fitting full model for all taus...")
-  fit_full <- suppressWarnings(quantreg::rq(Y ~ X.filter + C, tau = tau.list, data = AssocData))
-  message("Finished fitting full model. Start fitting intercept-only model for all taus...")
-  fit_intercept <- suppressWarnings(quantreg::rq(AssocData$Y ~ 1, tau = tau.list, data = AssocData))
-  message("Finished fitting intercept-only model.")
-  # Define the rho function for pseudo R2 calculation
-  rho <- function(u, tau) {
-    u * (tau - (u < 0))
-  }
-
-  # Prepare to store the pseudo R2 results
-  pseudo_R2 <- numeric(length(tau.list))
-  names(pseudo_R2) <- tau.list
-
-  # Calculate pseudo R2 for each tau
-  for (i in seq_along(tau.list)) {
-    tau <- tau.list[i]
-
-    # Get residuals for the intercept-only and full models
-    residuals0 <- residuals(fit_intercept, subset = i)
-    residuals1 <- residuals(fit_full, subset = i)
-
-    # Calculate and store pseudo R2 for each tau
-    rho0 <- sum(rho(residuals0, tau))
-    rho1 <- sum(rho(residuals1, tau))
-    pseudo_R2[i] <- 1 - rho1 / rho0
-  }
-
-  # Extract the coefficients for the SNPs
-  num_filter_vars <- ncol(AssocData$X.filter)
-  beta_mat <- coef(fit_full)[2:(1 + num_filter_vars), , drop = FALSE]
-  rownames_beta <- rownames(beta_mat)
-  if (ncol(AssocData$X.filter) == 1) {
-    rownames(beta_mat) <- snp_names
-  } else {
-    rownames_beta <- rownames(beta_mat)
-    rownames(beta_mat) <- gsub("^X.filter", "", rownames_beta)
-  }
-  return(list(X.filter = AssocData$X.filter, beta_mat = beta_mat, pseudo_R2 = pseudo_R2))
-}
-
-#' Calculate Heterogeneity of Beta Coefficients Across Quantiles
-#'
-#' This function calculates the heterogeneity of beta coefficients across multiple quantiles for each variant_id.
-#' Heterogeneity is computed as log(sd(beta) / abs(mean(beta))).
-#'
-#' @param rq_coef_result Data frame containing variant_id and QR coefficient columns
-#' @return A data frame with variant_id and heterogeneity values
-#' @noRd
-calculate_coef_heterogeneity <- function(rq_coef_result) {
-  # Identify all the columns starting with "coef_qr_" (quantile regression coefficient columns)
-  coef_cols <- grep("^coef_qr_", colnames(rq_coef_result), value = TRUE)
-
-  # Create a new data frame with variant_id and heterogeneity
-  heterogeneity_result <- data.frame(
-    variant_id = rq_coef_result$variant_id,
-    coef_heter = apply(rq_coef_result[, coef_cols], 1, function(beta) {
-      # Compute the mean and standard deviation, ignoring NAs
-      beta_mean <- mean(beta, na.rm = TRUE)
-      beta_sd <- sd(beta, na.rm = TRUE)
-
-      # Handle the case where mean(beta) is 0 to avoid division by zero
-      if (abs(beta_mean) == 0) {
-        return(NA) # Return NA if mean is zero
-      }
-
-      # Compute the heterogeneity: log(sd(beta) / abs(mean(beta)))
-      heterogeneity <- log(beta_sd / abs(beta_mean))
-      return(heterogeneity)
-    }),
-    stringsAsFactors = FALSE
-  )
-
-  # Return only variant_id and heterogeneity
-  return(heterogeneity_result)
-}
-
-#' Calculate Xi Correlation for QR Coefficients
-#'
-#' This function calculates the xi correlation coefficient and p-value for each variant,
-#' measuring the functional dependence between tau values and QR coefficients.
-#' Uses coef_qr_0.1 to coef_qr_0.9 (17 values, excluding 0.05 and 0.95).
-#'
-#' @param rq_coef_result Data frame containing variant_id and coef_qr_* columns
-#' @param tau_range Numeric vector of tau values to use (default: seq(0.1, 0.9, by = 0.05))
-#' @param min_valid Minimum number of valid (non-NA) coefficients required (default: 10)
-#' @return A data frame with variant_id, xi, and xi_pval columns
-#' @export
-calculate_xi_correlation <- function(rq_coef_result, tau_range = seq(0.1, 0.9, by = 0.05), min_valid = 10) {
-  if (!requireNamespace("XICOR", quietly = TRUE)) {
-    stop("Package 'XICOR' is required for xi correlation calculation. Please install it.")
-  }
-  # Build column names for the specified tau range
-  coef_col_names <- paste0("coef_qr_", tau_range)
-
-  # Check which columns exist
-  existing_cols <- coef_col_names[coef_col_names %in% colnames(rq_coef_result)]
-
-  if (length(existing_cols) == 0) {
-    warning("No coef_qr columns found in the specified tau range")
-    return(data.frame(
-      variant_id = rq_coef_result$variant_id,
-      xi = NA_real_,
-      xi_pval = NA_real_,
-      stringsAsFactors = FALSE
-    ))
-  }
-
-  # Extract tau values from existing columns
-  existing_tau <- as.numeric(gsub("coef_qr_", "", existing_cols))
-
-  # Calculate xi for each variant
-  xi_results <- apply(rq_coef_result[, existing_cols, drop = FALSE], 1, function(coef_values) {
-    # Get valid (non-NA) coefficients
-    valid_indices <- !is.na(coef_values)
-    valid_coefs <- as.numeric(coef_values[valid_indices])
-    valid_tau <- existing_tau[valid_indices]
-
-    # Check if enough valid values
-    if (length(valid_coefs) < min_valid) {
-      return(c(xi = NA_real_, xi_pval = NA_real_))
-    }
-
-    # Calculate xi correlation
-    tryCatch({
-      xicor_result <- XICOR::xicor(valid_tau, y = valid_coefs, pvalue = TRUE, method = "asymptotic")
-      return(c(xi = xicor_result$xi, xi_pval = xicor_result$pval))
-    }, error = function(e) {
-      return(c(xi = NA_real_, xi_pval = NA_real_))
-    })
-  })
-
-  # Convert to data frame
-  xi_df <- data.frame(
-    variant_id = rq_coef_result$variant_id,
-    xi = xi_results["xi", ],
-    xi_pval = xi_results["xi_pval", ],
-    stringsAsFactors = FALSE
-  )
-
-  return(xi_df)
-}
-
-#' Quantile TWAS Weight Pipeline
-#'
-#' @param X Matrix of genotypes
-#' @param Y Matrix or vector of phenotypes
-#' @param Z Matrix of covariates (optional)
-#' @param maf Vector of minor allele frequencies (optional)
-#' @param region_id Name of the region being analyzed
-#' @param quantile_qtl_tau_list Vector of quantiles for QTL analysis
-#' @param quantile_twas_tau_list Vector of quantiles for TWAS analysis
-#'
-#'
-#' @return A list containing various results from the TWAS weight pipeline:
-#' \itemize{
-#'   \item qr_screen_pvalue_df: Data frame with QR screening results: pavlue, qvalue and zscore.
-#'   \item message: Any informational or warning messages.
-#'   \item twas_variant_names: Names of variants used in TWAS weight calculation.
-#'   \item rq_coef_df: Data frame with quantile regression coefficients.
-#'   \item twas_weight: Matrix of TWAS weights.
-#'   \item pseudo_R2: Vector of pseudo R-squared values.
-#'   \item quantile_twas_prediction: Matrix of TWAS predictions.
-#' }
-#'
-#' @details
-#' The function performs the following steps:
-#' 1. QR screening to identify significant SNPs.
-#' 2. Filtering of highly correlated SNPs.
-#' 3. LD clumping and pruning(use filtered SNPs from step 1).
-#' 4. Calculation of QR coefficients for selected SNPs(use filtered SNPs from step 3).
-#' 5. Calculation of TWAS weights and pseudo R-squared values(use filtered SNPs from step 2).
-#'
-#' @examples
-#' # Example usage:
-#' # X <- matrix of genotypes
-#' # Y <- vector of phenotypes
-#' # Z <- matrix of covariates
-#' # results <- quantile_twas_weight_pipeline(X, Y, Z, region_id = "GeneA")
-#'
-#' @export
-quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id = "",
-                                          ld_reference_meta_file = NULL, twas_maf_cutoff = 0.01,
-                                          ld_clumping = FALSE, ld_pruning = FALSE,
-                                          screen_significant = TRUE,
-                                          quantile_qtl_tau_list = seq(0.05, 0.95, by = 0.05),
-                                          quantile_twas_tau_list = seq(0.01, 0.99, by = 0.01),
-                                          screen_method = "qvalue",
-                                          screen_threshold = 0.05,
-                                          xi_tau_range = seq(0.1, 0.9, by = 0.05),
-                                          keep_variants = NULL,
-                                          marginal_beta_calculate = TRUE,
-                                          twas_weight_calculate = TRUE,
-                                          qrank_screen_calculate = TRUE,
-                                          vqtl_calculate = TRUE,
-                                          pre_filter_by_pqr = FALSE,
-                                          initial_corr_filter_cutoff = 0.8,
-                                          full_rank_corr_filter_cutoff = seq(0.75, 0.5, by = -0.05)) {
-  # Initialize results list
-  results <- list()
-
-  # Step 1: vQTL (optional)
-  if (vqtl_calculate) {
-    # Step 1-1: Calculate vQTL rank scores
-    message("Step 0: Calculating vQTL rank scores for region ", region_id)
-    num_tau_levels <- length(quantile_qtl_tau_list) # Convert tau.list to numeric count
-    rank_score <- QUAIL_rank_score_pipeline(
-      phenotype = Y,
-      covariates = Z,
-      num_tau_levels = num_tau_levels,
-      method = "equal",
-      num_cores = 1
-    )
-    message("vQTL rank scores calculated.")
-
-    # Step 1-2: Run vQTL pipeline
-    message("Step 0.5: Running vQTL analysis for rank scores in region ", region_id)
-    vqtl_results <- QUAIL_pipeline(
-      genotype = X,
-      rank_score = rank_score,
-      covariates = Z,
-      phenotype_id = colnames(Y)[1]
-    )
-    message("vQTL analysis completed.")
-    results$vqtl_results <- vqtl_results
-  } else {
-    message("Skipping vQTL calculation.")
-  }
-
-  if (qrank_screen_calculate) {
-    # Step 2: QR screen
-    message("Starting QR screen for region ", region_id)
-    p.screen <- qr_screen(X = X, Y = Y, Z = Z, tau.list = quantile_qtl_tau_list, screen_threshold = screen_threshold, screen_method = screen_method, top_count = 10, top_percent = 15)
-    message(paste0("Number of SNPs after QR screening: ", length(p.screen$sig_SNP_threshold)))
-    message("QR screen completed. Screening significant SNPs")
-    results$qr_screen_pvalue_df <- p.screen$df_result
-
-    if (screen_significant && length(p.screen$sig_SNP_threshold) == 0) {
-      results$message <- paste0("No significant SNPs detected in region ", region_id)
-      return(results)
-    }
-
-    if (screen_significant) {
-      X_filtered <- X[, p.screen$sig_SNP_threshold, drop = FALSE]
-    } else {
-      X_filtered <- X
-    }
-
-    # # Step 3: Optional LD clumping and pruning from results of QR_screen (using original QR screen results)
-    if (ld_clumping) {
-      message("Performing LD clumping and pruning from QR screen results...")
-      LD_SNPs <- multicontext_ld_clumping(X = X[, p.screen$sig_SNP_threshold, drop = FALSE], qr_results = p.screen, maf_list = NULL)
-      selected_snps <- if (ld_pruning) LD_SNPs$final_SNPs else LD_SNPs$clumped_SNPs
-      x_clumped <- X[, p.screen$sig_SNP_threshold, drop = FALSE][, selected_snps, drop = FALSE]
-    } else {
-      message("Skipping LD clumping.")
-    }
-
-  } else {
-    message("Skipping QR screen.")
-  }
-
-  # Determine whether to skip marginal beta calculation:
-  # - skip if marginal_beta_calculate = FALSE
-  # - skip if keep_variants is provided but empty (length 0)
-  # - skip if qrank_screen_calculate = FALSE and keep_variants is NULL (no variants to select)
-  skip_marginal_beta <- !marginal_beta_calculate ||
-    (!is.null(keep_variants) && length(keep_variants) == 0) ||
-    (!qrank_screen_calculate && is.null(keep_variants))
-
-  if (!skip_marginal_beta) {
-    # Step 4: Fit marginal QR to get beta with SNPs for quantile_qtl_tau_list values
-    message("Fitting marginal QR for selected SNPs...")
-    if (qrank_screen_calculate) {
-      X_for_qr <- if (ld_clumping) x_clumped else X_filtered
-      if (!is.null(keep_variants)) {
-        variants_to_keep <- intersect(keep_variants, colnames(X_for_qr))
-        if (length(variants_to_keep) > 0) {
-          X_for_qr <- X_for_qr[, variants_to_keep, drop = FALSE]
-          message("Filtered to ", ncol(X_for_qr), " variants from keep_variants list for QR analysis")
-        } else {
-          message("Warning: No variants from keep_variants found in selected SNPs, using all selected SNPs")
-        }
-      }
-    } else {
-      # qrank_screen_calculate = FALSE but keep_variants provided
-      variants_to_keep <- intersect(keep_variants, colnames(X))
-      if (length(variants_to_keep) > 0) {
-        X_for_qr <- X[, variants_to_keep, drop = FALSE]
-        message("Using ", ncol(X_for_qr), " variants from keep_variants list for QR analysis (QR screen skipped)")
-      } else {
-        message("Warning: No variants from keep_variants found in X, skipping marginal beta calculation")
-        skip_marginal_beta <- TRUE
-      }
-    }
-  }
-
-  if (!skip_marginal_beta) {
-    rq_coef_result <- perform_qr_analysis(X = X_for_qr, Y = Y, Z = Z, tau_values = quantile_qtl_tau_list)
-
-    # Step 5: Heterogeneity calculation
-    # Step 5-1: beta_heterogeneity index in marginal model
-    message("Marginal QR for selected SNPs completed. Calculating beta heterogeneity...")
-    beta_heterogeneity <- calculate_coef_heterogeneity(rq_coef_result)
-    message("Beta heterogeneity calculation completed.")
-
-    # Step 5-2: Calculate xi correlation (Chatterjee correlation test)
-    message("Calculating xi correlation for QR coefficients...")
-    xi_correlation <- calculate_xi_correlation(rq_coef_result, tau_range = xi_tau_range, min_valid = 10)
-    message("Xi correlation calculation completed.")
-
-    # Merge xi and xi_pval into rq_coef_result (using left_join to preserve row order)
-    rq_coef_result <- rq_coef_result %>%
-      dplyr::left_join(xi_correlation, by = "variant_id")
-
-    results$rq_coef_df <- rq_coef_result
-    results$beta_heterogeneity <- beta_heterogeneity
-    results$xi_correlation <- xi_correlation
-  } else {
-    message("Skipping marginal beta calculation and heterogeneity analysis.")
-  }
-
-  if (twas_weight_calculate && qrank_screen_calculate) {
-  # Step 6: Optional LD panel filtering and MAF filtering from results of QR_screen
-  if (!is.null(ld_reference_meta_file)) {
-    message("Starting LD panel filtering...")
-    ld_result <- tryCatch(
-      {
-        variants_kept <- filter_variants_by_ld_reference(colnames(X_filtered), ld_reference_meta_file)
-        if (length(variants_kept$data) == 0) NULL else variants_kept
-      },
-      error = function(e) {
-        message("Error in LD filtering for region ", region_id, ": ", e$message)
-        NULL
-      }
-    )
-
-    if (is.null(ld_result)) {
-      results$message <- paste0("No SNPs left or error in LD filtering in region ", region_id)
-      return(results)
-    }
-
-    X_filtered <- X_filtered[, ld_result$data, drop = FALSE]
-    message(paste0("Number of SNPs after LD filtering: ", ncol(X_filtered)))
-
-    # MAF filtering
-    if (!is.null(maf)) {
-      maf_filtered <- maf[colnames(X_filtered)] > twas_maf_cutoff
-      X_filtered <- X_filtered[, maf_filtered, drop = FALSE]
-
-      # Check if any SNPs are left after MAF filtering
-      if (ncol(X_filtered) == 0) {
-        results$message <- paste0("No SNPs left after MAF filtering in region ", region_id)
-        return(results)
-      }
-
-      message(paste0("Number of SNPs after MAF filtering: ", ncol(X_filtered)))
-    }
-  }
-
-    # Step 7: Optionally pre-filter variants by raw p_qr
-    if (pre_filter_by_pqr) {
-      pqr_attempts <- list(
-        list(pval = 0.05, full_rank_corr = full_rank_corr_filter_cutoff),
-        list(pval = 0.01, full_rank_corr = full_rank_corr_filter_cutoff)
-      )
-    } else {
-      message("Skipping p_qr pre-filtering, using X_filtered directly...")
-      pqr_attempts <- list(
-        list(pval = NULL, full_rank_corr = full_rank_corr_filter_cutoff)
-      )
-    }
-
-    qr_beta_R2_results <- NULL
-    for (attempt in seq_along(pqr_attempts)) {
-      pqr_cutoff <- pqr_attempts[[attempt]]$pval
-      full_rank_corr <- pqr_attempts[[attempt]]$full_rank_corr
-
-      # Pre-filter by p_qr if enabled
-      if (!is.null(pqr_cutoff)) {
-        raw_pvals <- p.screen$pvec[colnames(X_filtered)]
-        sig_raw <- which(raw_pvals < pqr_cutoff)
-        if (length(sig_raw) == 0) {
-          message("No variants with raw p_qr < ", pqr_cutoff, " in region ", region_id)
-          next
-        }
-        if (length(sig_raw) < ncol(X_filtered)) {
-          message("Pre-filtering variants by raw p_qr < ", pqr_cutoff, ": keeping ", length(sig_raw), " out of ", ncol(X_filtered), " variants")
-          X_for_corr <- X_filtered[, sig_raw, drop = FALSE]
-        } else {
-          X_for_corr <- X_filtered
-        }
-      } else {
-        X_for_corr <- X_filtered
-      }
-
-      # Step 8: Initial filter of highly correlated SNPs
-      message("Filtering highly correlated SNPs...")
-      if (ncol(X_for_corr) > 1) {
-        filtered <- corr_filter(X_for_corr, initial_corr_filter_cutoff)
-        X.filter <- filtered$X.new
-      } else {
-        X.filter <- X_for_corr
-      }
-
-      # Step 9: Fit QR and get twas weight and R2 for all taus
-      message("Fitting full QR to calculate TWAS weights and pseudo R-squared values...")
-      AssocData <- list(X = X, Y = Y, C = Z, X.filter = X.filter)
-      qr_beta_R2_results <- tryCatch(
-        {
-          calculate_qr_and_pseudo_R2(AssocData, quantile_twas_tau_list, corr_thresholds = full_rank_corr)
-        },
-        error = function(e) {
-          message("Attempt ", attempt, " failed: ", e$message)
-          NULL
-        }
-      )
-
-      if (!is.null(qr_beta_R2_results)) break
-    }
-
-    if (is.null(qr_beta_R2_results)) {
-      results$message <- paste0("Failed to fit QR model after all attempts in region ", region_id)
-      return(results)
-    }
-
-  X.filter <- qr_beta_R2_results$X.filter
-  message("TWAS weights and pseudo R-squared calculations completed.")
-
-  # Add additional results
-  results$twas_variant_names <- colnames(X.filter)
-  results$twas_weight <- qr_beta_R2_results$beta_mat
-  results$pseudo_R2 <- qr_beta_R2_results$pseudo_R2
-  results$quantile_twas_prediction <- X.filter %*% results$twas_weight
-  } else {
-    message("Skipping TWAS weight calculation.")
-  }
-
-  return(results)
-}
diff --git a/R/twas.R b/R/twas.R
index 267532e4..db7f2072 100644
--- a/R/twas.R
+++ b/R/twas.R
@@ -288,13 +288,12 @@ twas_pipeline <- function(twas_weights_data,
                           rsq_pval_option = c("pval", "adj_rsq_pval"),
                           mr_pval_cutoff = 0.05,
                           mr_coverage_column = "cs_coverage_0.95",
-                          quantile_twas = FALSE,
                           output_twas_data = FALSE,
                           event_filters=NULL,
                           column_file_path = NULL,
                           comment_string="#") {
   # internal function to format TWAS output
-  format_twas_data <- function(post_qc_twas_data, twas_table, quantile_twas = FALSE) {
+  format_twas_data <- function(post_qc_twas_data, twas_table) {
     weights_list <- do.call(c, lapply(names(post_qc_twas_data), function(molecular_id) {
       contexts <- names(post_qc_twas_data[[molecular_id]][["weights_qced"]])
       chrom <- post_qc_twas_data[[molecular_id]][["chrom"]]
@@ -317,125 +316,7 @@ twas_pipeline <- function(twas_weights_data,
         postqc_scaled_weight <- list()
         gwas_studies <- names(post_qc_twas_data[[molecular_id]][["weights_qced"]][[context]]) # context-level gwas-studies
         
-        # quantile TWAS
-        if (quantile_twas && (is.null(model_selected) || is.na(model_selected))) {
-          # For quantile TWAS: extract all available methods from weight matrix columns
-          if (length(gwas_studies) > 0) {
-            # Get the first weight matrix to examine available columns
-            sample_weight_matrix <- post_qc_twas_data[[molecular_id]][["weights_qced"]][[context]][[gwas_studies[1]]][["scaled_weights"]]
-            
-            # Extract method names from column names
-            all_columns <- colnames(sample_weight_matrix)
-            
-            # Try to identify methods by removing "_weights" suffix
-            potential_methods <- unique(gsub("_weights$", "", all_columns))
-            
-            # Filter out columns that are exactly the same as original (no "_weights" suffix)
-            methods_with_suffix <- potential_methods[paste0(potential_methods, "_weights") %in% all_columns]
-            
-            # If no standard method columns found, use all columns as individual methods
-            if (length(methods_with_suffix) == 0) {
-              available_methods <- all_columns
-              use_direct_columns <- TRUE
-            } else {
-              available_methods <- methods_with_suffix
-              use_direct_columns <- FALSE
-            }
-            
-            # Process each method for quantile TWAS
-            for (method in available_methods) {
-              for (study in gwas_studies) {
-                weight_matrix <- post_qc_twas_data[[molecular_id]][["weights_qced"]][[context]][[study]][["scaled_weights"]]
-                
-                # Determine which column to use
-                if (use_direct_columns) {
-                  # Use the column name directly
-                  selected_col <- method
-                } else {
-                  # Look for method_weights format first, then fallback
-                  weight_col_candidates <- c(
-                    paste0(method, "_weights"),
-                    method,
-                    "weight"
-                  )
-                  
-                  selected_col <- NULL
-                  for (col_candidate in weight_col_candidates) {
-                    if (!is.null(col_candidate) && col_candidate %in% colnames(weight_matrix)) {
-                      selected_col <- col_candidate
-                      break
-                    }
-                  }
-                }
-                
-                if (!is.null(selected_col) && selected_col %in% colnames(weight_matrix)) {
-                  postqc_scaled_weight[[study]] <- weight_matrix[, selected_col, drop = FALSE]
-                  colnames(postqc_scaled_weight[[study]]) <- "weight"
-                  # variant IDs are in canonical chr-prefix format from allele_qc
-                  context_variants <- rownames(weight_matrix)
-                  context_range <- as.integer(sapply(context_variants, function(variant) {
-                    parts <- strsplit(variant, ":")[[1]]
-                    if (length(parts) >= 2) as.integer(parts[2]) else NA
-                  }))
-                  context_range <- context_range[!is.na(context_range)]
-                  
-                  if (length(context_range) > 0) {
-                    # Get quantile information from twas_table for this method
-                    quantile_info <- twas_table[twas_table$molecular_id == molecular_id & 
-                                              twas_table$context == context & 
-                                              twas_table$method == method, ]
-                    
-                    # Extract quantile_start and quantile_end if available
-                    if (nrow(quantile_info) > 0) {
-                      quantile_start <- if ("quantile_start" %in% colnames(quantile_info)) quantile_info$quantile_start[1] else NA
-                      quantile_end <- if ("quantile_end" %in% colnames(quantile_info)) quantile_info$quantile_end[1] else NA
-                    } else {
-                      quantile_start <- NA
-                      quantile_end <- NA
-                    }
-                    
-                    # Create quantile_range string
-                    quantile_range <- if (!is.na(quantile_start) && !is.na(quantile_end)) {
-                      paste0(quantile_start, "-", quantile_end)
-                    } else {
-                      NA
-                    }
-                    
-                    # Create unique weight ID for each method in quantile TWAS - handle NA data_type
-                    safe_data_type <- if (is.null(data_type) || any(is.na(data_type)) || length(data_type) == 0) {
-                      "unknown"
-                    } else {
-                      data_type[1]
-                    }
-                    
-                    weight_id <- paste0(molecular_id, "|", safe_data_type, "_", context, "_", method)
-                    
-                    # Initialize the nested list structure if needed
-                    if (is.null(weight[[weight_id]])) {
-                      weight[[weight_id]] <- list()
-                    }
-                    
-                    weight[[weight_id]][[study]] <- list(
-                      chrom = chrom, 
-                      p0 = min(context_range), 
-                      p1 = max(context_range),
-                      wgt = postqc_scaled_weight[[study]], 
-                      molecular_id = molecular_id, 
-                      weight_name = paste0(safe_data_type, "_", context, "_", method), 
-                      type = safe_data_type,
-                      context = context, 
-                      method = method,                    # Add method info for quantile TWAS
-                      quantile_start = quantile_start,   # Add quantile start
-                      quantile_end = quantile_end,       # Add quantile end
-                      quantile_range = quantile_range,   # Add quantile range string
-                      n_wgt = length(context_variants)
-                    )
-                  }
-                }
-              }
-            }
-          }
-        } else if (!is.null(model_selected) & isTRUE(is_imputable)) {
+        if (!is.null(model_selected) & isTRUE(is_imputable)) {
           # TWAS
           for (study in gwas_studies) {
             postqc_scaled_weight[[study]] <- post_qc_twas_data[[molecular_id]][["weights_qced"]][[context]][[study]][["scaled_weights"]][, paste0(model_selected, "_weights"), drop = FALSE]
@@ -466,37 +347,15 @@ twas_pipeline <- function(twas_weights_data,
 
     # gene_z table
     if ("is_selected_method" %in% colnames(twas_table)) {
-      if (!quantile_twas) {
-        # Original TWAS: filter by selected methods only
-        twas_table <- twas_table[na.omit(twas_table$is_selected_method), , drop = FALSE]
-      }
-      # For quantile TWAS: include all methods, don't filter by is_selected_method
+      twas_table <- twas_table[na.omit(twas_table$is_selected_method), , drop = FALSE]
     }
     if (nrow(twas_table) > 0) {
-      # Adjust ID and group format based on whether it's quantile TWAS
-      if (quantile_twas) {
-        # For quantile TWAS: include method in ID to make each method unique
-        twas_table$id <- paste0(twas_table$molecular_id, "|", 
-                                ifelse(is.null(twas_table$type) | any(is.na(twas_table$type)), "unknown", twas_table$type), 
-                                "_", twas_table$context, "_", twas_table$method)
-        twas_table$group <- paste0(twas_table$context, "|", 
-                                  ifelse(is.null(twas_table$type) | any(is.na(twas_table$type)), "unknown", twas_table$type), 
-                                  "|", twas_table$method)
-      } else {
-        # Original TWAS ID format - keep unchanged
-        twas_table$id <- paste0(twas_table$molecular_id, "|", twas_table$type, "_", twas_table$context)
-        twas_table$group <- paste0(twas_table$context, "|", twas_table$type)
-      }
+      twas_table$id <- paste0(twas_table$molecular_id, "|", twas_table$type, "_", twas_table$context)
+      twas_table$group <- paste0(twas_table$context, "|", twas_table$type)
       
       twas_table$z <- twas_table$twas_z
       
-      # Select relevant columns for output, add quantile-specific columns for quantile TWAS
       output_columns <- c("id", "z", "type", "context", "group", "gwas_study")
-      if (quantile_twas) {
-        output_columns <- c(output_columns, "method")
-        if ("quantile_start" %in% colnames(twas_table)) output_columns <- c(output_columns, "quantile_start", "quantile_end")
-        if ("pseudo_R2_avg" %in% colnames(twas_table)) output_columns <- c(output_columns, "pseudo_R2_avg")
-      }
       twas_table <- twas_table[, intersect(output_columns, colnames(twas_table)), drop = FALSE]
       studies <- unique(twas_table$gwas_study)
       z_gene_list <- list()
@@ -581,10 +440,6 @@ twas_pipeline <- function(twas_weights_data,
       warning(paste0("No data harmonized for ", weight_db, ". Returning NULL for TWAS result for this region."))
       return(NULL)
     }
-    if (quantile_twas) {
-      rsq_cutoff <- 0
-      message("Quantile TWAS detected. Skipping the selection of best model based on CV result.")
-    }
     if (rsq_cutoff > 0) {
       message("Selecting the best model based on criteria...")
       best_model_selection <- pick_best_model(
@@ -694,51 +549,21 @@ twas_pipeline <- function(twas_weights_data,
     # merge twas_cv information for same gene across all weight db files, loop through each context for all methods
     gene_table <- do.call(rbind, lapply(contexts, function(context) {
       methods <- sub("_[^_]+$", "", names(twas_weights_data[[molecular_id]]$twas_cv_performance[[context]]))
-      if (quantile_twas) {
-        cv_performance <- twas_weights_data[[molecular_id]]$twas_cv_performance[[context]]
-        if (length(methods) == 0) {
-          context_table <- data.frame()
-        } else {
-          method_results <- list()
-          
-          for (method in methods) {
-            if (!is.null(cv_performance[[paste0(method, "_performance")]])) {
-              performance_data <- cv_performance[[paste0(method, "_performance")]]
-              method_results[[method]] <- data.frame(
-                context = context,
-                method = method,
-                quantile_start = performance_data[, "quantile_start"],
-                quantile_end = performance_data[, "quantile_end"],
-                pseudo_R2_avg = performance_data[, "pseudo_R2_avg"],
-                type = twas_weights_data[[molecular_id]][["data_type"]][[context]]
-              )
-            }
-          }
-          
-          if (length(method_results) > 0) {
-            context_table <- do.call(rbind, method_results)
-          } else {
-            context_table <- data.frame()
-          }
-        }
-      } else {
-        # Original TWAS data extraction
-        is_imputable <- twas_data[[molecular_id]][["model_selection"]][[context]]$is_imputable
-        selected_method <- twas_data[[molecular_id]][["model_selection"]][[context]]$selected_model
-        if (is.null(selected_method)) selected_method <- NA
-        is_selected_method <- ifelse(methods == selected_method, TRUE, FALSE)
-
-        cv_rsqs <- sapply(twas_weights_data[[molecular_id]]$twas_cv_performance[[context]], function(x) x[, rsq_option])
-        cv_pvals <- sapply(twas_weights_data[[molecular_id]]$twas_cv_performance[[context]], function(x) x[, colnames(x)[which(colnames(x) %in% rsq_pval_option)]])
-
-        context_table <- data.frame(
-          context = context, method = methods,
-          is_imputable = is_imputable,
-          is_selected_method = is_selected_method,
-          rsq_cv = cv_rsqs, pval_cv = cv_pvals,
-          type = twas_weights_data[[molecular_id]][["data_type"]][[context]]
-        )
-      }
+      is_imputable <- twas_data[[molecular_id]][["model_selection"]][[context]]$is_imputable
+      selected_method <- twas_data[[molecular_id]][["model_selection"]][[context]]$selected_model
+      if (is.null(selected_method)) selected_method <- NA
+      is_selected_method <- ifelse(methods == selected_method, TRUE, FALSE)
+
+      cv_rsqs <- sapply(twas_weights_data[[molecular_id]]$twas_cv_performance[[context]], function(x) x[, rsq_option])
+      cv_pvals <- sapply(twas_weights_data[[molecular_id]]$twas_cv_performance[[context]], function(x) x[, colnames(x)[which(colnames(x) %in% rsq_pval_option)]])
+
+      context_table <- data.frame(
+        context = context, method = methods,
+        is_imputable = is_imputable,
+        is_selected_method = is_selected_method,
+        rsq_cv = cv_rsqs, pval_cv = cv_pvals,
+        type = twas_weights_data[[molecular_id]][["data_type"]][[context]]
+      )
       return(context_table)
     }))
     gene_table$molecular_id <- molecular_id
@@ -748,20 +573,14 @@ twas_pipeline <- function(twas_weights_data,
   twas_table$block <- region_block
 
   # Step 3. merge twas result table and twas input into twas_data to output
-  colname_ordered <- if (quantile_twas) {
-    c("chr", "molecular_id", "context", "gwas_study", "method", "quantile_start", "quantile_end", "pseudo_R2_avg", "twas_z", "twas_pval", "type", "block")
-  } else {
-    c("chr", "molecular_id", "context", "gwas_study", "method", "is_imputable", "is_selected_method", "rsq_cv", "pval_cv", "twas_z", "twas_pval", "type", "block")
-  }
+  colname_ordered <- c("chr", "molecular_id", "context", "gwas_study", "method", "is_imputable", "is_selected_method", "rsq_cv", "pval_cv", "twas_z", "twas_pval", "type", "block")
   if (nrow(twas_results_table) == 0) {
     return(list(twas_result = NULL, twas_data = NULL, mr_result = NULL))
-  }    
-  twas_table <- merge(twas_table, twas_results_table, by = c("molecular_id", "context", "method"))
-  if (!quantile_twas) {
-    twas_table <- twas_table[twas_table$is_imputable, , drop = FALSE]
   }
+  twas_table <- merge(twas_table, twas_results_table, by = c("molecular_id", "context", "method"))
+  twas_table <- twas_table[twas_table$is_imputable, , drop = FALSE]
   if (output_twas_data & nrow(twas_table) > 0) {
-    twas_data_subset <- format_twas_data(twas_data, twas_table, quantile_twas = quantile_twas)
+    twas_data_subset <- format_twas_data(twas_data, twas_table)
     # if (!is.null(twas_data_subset)) twas_data_subset$snp_info <- snp_info
   } else {
     twas_data_subset <- NULL
diff --git a/man/QUAIL_pipeline.Rd b/man/QUAIL_pipeline.Rd
deleted file mode 100644
index 51011d2a..00000000
--- a/man/QUAIL_pipeline.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quail_vqtl.R
-\name{QUAIL_pipeline}
-\alias{QUAIL_pipeline}
-\title{Main QUAIL pipeline
-QUAIL vQTL Analysis Pipeline}
-\usage{
-QUAIL_pipeline(
-  genotype,
-  rank_score,
-  phenotype = NULL,
-  covariates = NULL,
-  phenotype_id = NULL
-)
-}
-\arguments{
-\item{genotype}{numeric matrix (n x p) of genotypes.}
-
-\item{rank_score}{numeric vector (n x 1) of rank scores from Step 1.}
-
-\item{phenotype}{optional numeric vector (n x 1) of original phenotype values.}
-
-\item{covariates}{optional numeric matrix (n x k) of covariates.}
-}
-\value{
-A data frame containing vQTL results.
-}
-\description{
-Main QUAIL pipeline
-QUAIL vQTL Analysis Pipeline
-}
-\examples{
-\dontrun{
-results <- QUAIL_pipeline(genotype, rank_score, covariates = covariates)
-}
-}
diff --git a/man/QUAIL_rank_score_pipeline.Rd b/man/QUAIL_rank_score_pipeline.Rd
deleted file mode 100644
index 82ce532a..00000000
--- a/man/QUAIL_rank_score_pipeline.Rd
+++ /dev/null
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quail_rank_score.R
-\name{QUAIL_rank_score_pipeline}
-\alias{QUAIL_rank_score_pipeline}
-\title{Main QUAIL Rank Score Pipeline}
-\usage{
-QUAIL_rank_score_pipeline(
-  phenotype,
-  covariates,
-  num_tau_levels = 19,
-  method = "equal",
-  num_cores = 1
-)
-}
-\arguments{
-\item{phenotype}{numeric vector of phenotype values}
-
-\item{covariates}{matrix/data.frame of covariates}
-
-\item{num_tau_levels}{integer number of quantile levels}
-
-\item{method}{character "equal" or "ivw"}
-
-\item{num_cores}{integer number of cores for parallel processing}
-}
-\value{
-data.frame with integrated rank scores
-}
-\description{
-Main QUAIL Rank Score Pipeline
-}
diff --git a/man/calculate_xi_correlation.Rd b/man/calculate_xi_correlation.Rd
deleted file mode 100644
index 782e09df..00000000
--- a/man/calculate_xi_correlation.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quantile_twas_weight.R
-\name{calculate_xi_correlation}
-\alias{calculate_xi_correlation}
-\title{Calculate Xi Correlation for QR Coefficients}
-\usage{
-calculate_xi_correlation(
-  rq_coef_result,
-  tau_range = seq(0.1, 0.9, by = 0.05),
-  min_valid = 10
-)
-}
-\arguments{
-\item{rq_coef_result}{Data frame containing variant_id and coef_qr_* columns}
-
-\item{tau_range}{Numeric vector of tau values to use (default: seq(0.1, 0.9, by = 0.05))}
-
-\item{min_valid}{Minimum number of valid (non-NA) coefficients required (default: 10)}
-}
-\value{
-A data frame with variant_id, xi, and xi_pval columns
-}
-\description{
-This function calculates the xi correlation coefficient and p-value for each variant,
-measuring the functional dependence between tau values and QR coefficients.
-Uses coef_qr_0.1 to coef_qr_0.9 (17 values, excluding 0.05 and 0.95).
-}
diff --git a/man/load_quantile_twas_weights.Rd b/man/load_quantile_twas_weights.Rd
deleted file mode 100644
index 68e208a3..00000000
--- a/man/load_quantile_twas_weights.Rd
+++ /dev/null
@@ -1,38 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quantile_twas.R
-\name{load_quantile_twas_weights}
-\alias{load_quantile_twas_weights}
-\title{Load Quantile TWAS Weights}
-\usage{
-load_quantile_twas_weights(
-  weight_db_files,
-  tau_values = seq(0.01, 0.99, 0.01),
-  between_cluster = 0.8,
-  num_intervals = 3
-)
-}
-\arguments{
-\item{weight_db_files}{A character vector of file paths to the RDS files containing TWAS weights.}
-
-\item{tau_values}{A numeric vector representing the tau values (quantiles) to use. Default is \code{seq(0.01, 0.99, 0.01)}.}
-
-\item{between_cluster}{A numeric value specifying the correlation threshold for clustering. Default is 0.8.}
-
-\item{num_intervals}{The number of fixed non-overlapping intervals to divide the tau values. Default is 3.}
-}
-\value{
-A list containing:
-\item{weights}{A list of integrated TWAS weights for each context.}
-\item{twas_cv_performance}{A list of TWAS cross-validation performance metrics for each context.}
-}
-\description{
-This function loads TWAS weights from RDS files and performs grouped integration using both correlation-based clustering and fixed tau intervals.
-}
-\examples{
-weight_db_files <- c("file1.rds", "file2.rds")
-tau_values <- seq(0.01, 0.99, by = 0.01)
-result <- load_quantile_twas_weights(weight_db_files, tau_values)
-weights <- result$weights
-performance <- result$twas_cv_performance
-
-}
diff --git a/man/multicontext_ld_clumping.Rd b/man/multicontext_ld_clumping.Rd
deleted file mode 100644
index 6d0cbf09..00000000
--- a/man/multicontext_ld_clumping.Rd
+++ /dev/null
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quantile_twas_weight.R
-\name{multicontext_ld_clumping}
-\alias{multicontext_ld_clumping}
-\title{Perform Clumping and Pruning}
-\usage{
-multicontext_ld_clumping(
-  X,
-  qr_results,
-  maf_list = NULL,
-  ld_clump_r2 = 0.2,
-  final_clump_r2 = 0.8
-)
-}
-\arguments{
-\item{X}{Matrix of genotypes}
-
-\item{qr_results}{Results from QR_screen}
-
-\item{maf_list}{List of minor allele frequencies (optional)}
-
-\item{ld_clump_r2}{R-squared threshold for initial LD clumping based on pvalue}
-
-\item{final_clump_r2}{R-squared threshold for final LD clumping based on MAF}
-}
-\value{
-A list containing final SNPs and clumped SNPs
-}
-\description{
-Perform Clumping and Pruning
-}
diff --git a/man/perform_qr_analysis.Rd b/man/perform_qr_analysis.Rd
deleted file mode 100644
index c838e35b..00000000
--- a/man/perform_qr_analysis.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quantile_twas_weight.R
-\name{perform_qr_analysis}
-\alias{perform_qr_analysis}
-\title{Perform Quantile Regression Analysis to get beta}
-\usage{
-perform_qr_analysis(X, Y, Z = NULL, tau_values = seq(0.05, 0.95, by = 0.05))
-}
-\arguments{
-\item{X}{Matrix of predictors}
-
-\item{Y}{Matrix or vector of response variables}
-
-\item{Z}{Matrix of covariates (optional)}
-
-\item{tau_values}{Vector of quantiles to be analyzed}
-}
-\value{
-A data frame with QR coefficients for each quantile
-}
-\description{
-Perform Quantile Regression Analysis to get beta
-}
diff --git a/man/qr_screen.Rd b/man/qr_screen.Rd
deleted file mode 100644
index 2b798e10..00000000
--- a/man/qr_screen.Rd
+++ /dev/null
@@ -1,50 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quantile_twas_weight.R
-\name{qr_screen}
-\alias{qr_screen}
-\title{Quantile TWAS Weight Calculation and QTL Analysis}
-\usage{
-qr_screen(
-  X,
-  Y,
-  Z = NULL,
-  tau.list = seq(0.05, 0.95, by = 0.05),
-  screen_threshold = 0.05,
-  screen_method = "qvalue",
-  top_count = 10,
-  top_percent = 15
-)
-}
-\arguments{
-\item{X}{Matrix of predictors}
-
-\item{Y}{Matrix or vector of response variables}
-
-\item{Z}{Matrix of covariates (optional)}
-
-\item{tau.list}{Vector of quantiles to be analyzed}
-
-\item{screen_threshold}{Significance threshold for adjusted p-values}
-
-\item{screen_method}{Method for p-value adjustment ('fdr' or 'qvalue')}
-
-\item{top_count}{Number of top SNPs to select}
-
-\item{top_percent}{Percentage of top SNPs to select}
-}
-\value{
-A list containing various results from the QR screen
-}
-\description{
-This file contains functions for performing Quantile Transcriptome-Wide
-Association Studies (TWAS) weight calculations and Quantile QTL analysis.
-It provides tools for screening quantile regression results, performing
-LD clumping and pruning, and calculating TWAS weights.
-}
-\details{
-The main function in this file is `quantile_twas_weight_pipeline`, which
-orchestrates the entire analysis process. Other functions are helper
-functions used within the main pipeline.
-
-Qrank Score Test Screen
-}
diff --git a/man/quantile_twas_weight_pipeline.Rd b/man/quantile_twas_weight_pipeline.Rd
deleted file mode 100644
index c1726a5b..00000000
--- a/man/quantile_twas_weight_pipeline.Rd
+++ /dev/null
@@ -1,78 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quantile_twas_weight.R
-\name{quantile_twas_weight_pipeline}
-\alias{quantile_twas_weight_pipeline}
-\title{Quantile TWAS Weight Pipeline}
-\usage{
-quantile_twas_weight_pipeline(
-  X,
-  Y,
-  Z = NULL,
-  maf = NULL,
-  region_id = "",
-  ld_reference_meta_file = NULL,
-  twas_maf_cutoff = 0.01,
-  ld_clumping = FALSE,
-  ld_pruning = FALSE,
-  screen_significant = TRUE,
-  quantile_qtl_tau_list = seq(0.05, 0.95, by = 0.05),
-  quantile_twas_tau_list = seq(0.01, 0.99, by = 0.01),
-  screen_method = "qvalue",
-  screen_threshold = 0.05,
-  xi_tau_range = seq(0.1, 0.9, by = 0.05),
-  keep_variants = NULL,
-  marginal_beta_calculate = TRUE,
-  twas_weight_calculate = TRUE,
-  qrank_screen_calculate = TRUE,
-  vqtl_calculate = TRUE,
-  pre_filter_by_pqr = FALSE,
-  initial_corr_filter_cutoff = 0.8,
-  full_rank_corr_filter_cutoff = seq(0.75, 0.5, by = -0.05)
-)
-}
-\arguments{
-\item{X}{Matrix of genotypes}
-
-\item{Y}{Matrix or vector of phenotypes}
-
-\item{Z}{Matrix of covariates (optional)}
-
-\item{maf}{Vector of minor allele frequencies (optional)}
-
-\item{region_id}{Name of the region being analyzed}
-
-\item{quantile_qtl_tau_list}{Vector of quantiles for QTL analysis}
-
-\item{quantile_twas_tau_list}{Vector of quantiles for TWAS analysis}
-}
-\value{
-A list containing various results from the TWAS weight pipeline:
-\itemize{
-  \item qr_screen_pvalue_df: Data frame with QR screening results: pavlue, qvalue and zscore.
-  \item message: Any informational or warning messages.
-  \item twas_variant_names: Names of variants used in TWAS weight calculation.
-  \item rq_coef_df: Data frame with quantile regression coefficients.
-  \item twas_weight: Matrix of TWAS weights.
-  \item pseudo_R2: Vector of pseudo R-squared values.
-  \item quantile_twas_prediction: Matrix of TWAS predictions.
-}
-}
-\description{
-Quantile TWAS Weight Pipeline
-}
-\details{
-The function performs the following steps:
-1. QR screening to identify significant SNPs.
-2. Filtering of highly correlated SNPs.
-3. LD clumping and pruning(use filtered SNPs from step 1).
-4. Calculation of QR coefficients for selected SNPs(use filtered SNPs from step 3).
-5. Calculation of TWAS weights and pseudo R-squared values(use filtered SNPs from step 2).
-}
-\examples{
-# Example usage:
-# X <- matrix of genotypes
-# Y <- vector of phenotypes
-# Z <- matrix of covariates
-# results <- quantile_twas_weight_pipeline(X, Y, Z, region_id = "GeneA")
-
-}
diff --git a/tests/testthat/test_ctwas.R b/tests/testthat/test_ctwas.R
new file mode 100644
index 00000000..8284a566
--- /dev/null
+++ b/tests/testthat/test_ctwas.R
@@ -0,0 +1,160 @@
+context("ctwas")
+
+# ===========================================================================
+#  ctwas wrapper tests
+# ===========================================================================
+
+
+# ---------- trim_ctwas_variants --------------------------------------------
+
+# Helper: build a minimal region_data structure that trim_ctwas_variants expects
+make_mock_region_data <- function() {
+  # Variant IDs in canonical format (chr:pos:A2:A1)
+  variant_ids <- c("chr1:1000:A:G", "chr1:2000:C:T", "chr1:3000:G:A", "chr1:4000:T:C")
+
+  # Weight matrix (4 variants x 1 weight column)
+  wgt <- matrix(c(0.5, 0.0001, 0.3, -0.2), nrow = 4, ncol = 1)
+  rownames(wgt) <- variant_ids
+
+  gene_id <- "GENE1|ctx1"
+  context <- "ctx1"
+  study <- "study1"
+
+  weights <- list()
+  weights[[gene_id]] <- list()
+  weights[[gene_id]][[study]] <- list(
+    wgt = wgt,
+    context = context,
+    p0 = 1000,
+    p1 = 4000
+  )
+
+  # SuSiE intermediate info
+  pip_vals <- c(0.8, 0.05, 0.6, 0.02)
+  names(pip_vals) <- variant_ids
+
+  susie_weights_intermediate <- list()
+  susie_weights_intermediate[["GENE1"]] <- list()
+  susie_weights_intermediate[["GENE1"]][[context]] <- list(
+    pip = pip_vals,
+    cs_variants = list(variant_ids[c(1, 3)]),
+    cs_purity = list(min.abs.corr = 0.9)
+  )
+
+  list(
+    weights = weights,
+    susie_weights_intermediate = susie_weights_intermediate
+  )
+}
+
+test_that("trim_ctwas_variants removes variants below weight cutoff", {
+  rd <- make_mock_region_data()
+  # Default cutoff 1e-5, variant 2 has weight 0.0001 (above), so all 4 should pass default cutoff
+  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 1e-5)
+  expect_true(is.list(result))
+  # With a higher cutoff, the near-zero variant should be removed
+  result_strict <- trim_ctwas_variants(rd, twas_weight_cutoff = 0.001)
+  # study1 should exist in result
+  expect_true("study1" %in% names(result_strict))
+  # Get the gene-level result
+  gene_weights <- result_strict[["study1"]][["GENE1|ctx1"]]
+  # Variant 2 has abs(weight) = 0.0001 < 0.001, so should be removed
+  expect_false("chr1:2000:C:T" %in% rownames(gene_weights$wgt))
+})
+
+test_that("trim_ctwas_variants removes gene when all weights below cutoff", {
+  rd <- make_mock_region_data()
+  # Set cutoff so high that all variants are dropped
+  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 10)
+  # Gene should be removed entirely since no weights pass the cutoff
+  # Result should be an empty list
+  expect_equal(length(result), 0)
+})
+
+test_that("trim_ctwas_variants returns result keyed by study", {
+  rd <- make_mock_region_data()
+  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 1e-5)
+  # merge_by_study reorganizes: weights[[study]][[group]]
+  expect_true("study1" %in% names(result))
+  expect_true("GENE1|ctx1" %in% names(result[["study1"]]))
+})
+
+test_that("trim_ctwas_variants updates p0 and p1 positions", {
+  rd <- make_mock_region_data()
+  # Use a weight cutoff that removes the variant at position 2000
+  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 0.001)
+  gene_weights <- result[["study1"]][["GENE1|ctx1"]]
+  # p0 and p1 should reflect the range of remaining variant positions
+  remaining_positions <- as.integer(sapply(
+    rownames(gene_weights$wgt),
+    function(v) strsplit(v, ":")[[1]][2]
+  ))
+  expect_equal(gene_weights$p0, min(remaining_positions))
+  expect_equal(gene_weights$p1, max(remaining_positions))
+})
+
+test_that("trim_ctwas_variants respects max_num_variants", {
+  rd <- make_mock_region_data()
+  # Request max 2 variants; since nrow(wgt) == 4 >= max_num_variants == 2,
+  # it triggers select_variants which picks by PIP priority
+  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 1e-5, max_num_variants = 2)
+  gene_weights <- result[["study1"]][["GENE1|ctx1"]]
+  expect_true(nrow(gene_weights$wgt) <= 2)
+})
+
+test_that("trim_ctwas_variants handles NA weights by removing group", {
+  rd <- make_mock_region_data()
+  # Replace all weights with NA
+  rd$weights[["GENE1|ctx1"]][["study1"]]$wgt[, 1] <- NA
+  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 0)
+  # The group should be removed because all weights are NA
+  expect_equal(length(result), 0)
+})
+
+test_that("trim_ctwas_variants handles multiple genes", {
+  rd <- make_mock_region_data()
+
+  # Add a second gene
+  variant_ids2 <- c("chr1:5000:A:G", "chr1:6000:C:T")
+  wgt2 <- matrix(c(0.4, -0.3), nrow = 2, ncol = 1)
+  rownames(wgt2) <- variant_ids2
+
+  rd$weights[["GENE2|ctx1"]] <- list()
+  rd$weights[["GENE2|ctx1"]][["study1"]] <- list(
+    wgt = wgt2,
+    context = "ctx1",
+    p0 = 5000,
+    p1 = 6000
+  )
+
+  pip_vals2 <- c(0.7, 0.4)
+  names(pip_vals2) <- variant_ids2
+  rd$susie_weights_intermediate[["GENE2"]] <- list()
+  rd$susie_weights_intermediate[["GENE2"]][["ctx1"]] <- list(
+    pip = pip_vals2,
+    cs_variants = list(variant_ids2[1]),
+    cs_purity = list(min.abs.corr = 0.95)
+  )
+
+  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 1e-5)
+  expect_true("GENE1|ctx1" %in% names(result[["study1"]]))
+  expect_true("GENE2|ctx1" %in% names(result[["study1"]]))
+})
+
+test_that("trim_ctwas_variants select_variants uses cs_min_cor to include CS variants", {
+  rd <- make_mock_region_data()
+  # cs_purity min.abs.corr = 0.9, so with cs_min_cor = 0.8 the CS variants
+  # (variant 1 and 3) should be included. Max 2 variants.
+  result <- trim_ctwas_variants(rd,
+    twas_weight_cutoff = 1e-5,
+    cs_min_cor = 0.8,
+    min_pip_cutoff = 0.0,
+    max_num_variants = 2
+  )
+  gene_weights <- result[["study1"]][["GENE1|ctx1"]]
+  included <- rownames(gene_weights$wgt)
+  # CS variants chr1:1000:A:G and chr1:3000:G:A have highest PIPs (0.8 and 0.6)
+  # and are in the CS, so they should be prioritized
+  expect_true("chr1:1000:A:G" %in% included)
+  expect_true("chr1:3000:G:A" %in% included)
+})
diff --git a/tests/testthat/test_quail_ctwas.R b/tests/testthat/test_quail_ctwas.R
deleted file mode 100644
index 7a66c55f..00000000
--- a/tests/testthat/test_quail_ctwas.R
+++ /dev/null
@@ -1,374 +0,0 @@
-context("quail_ctwas")
-
-# ===========================================================================
-# Helper: small simulated phenotype + covariate data (n = 30)
-# ===========================================================================
-make_quail_data <- function(n = 30, seed = 42) {
-  set.seed(seed)
-  covariates <- data.frame(
-    age = rnorm(n, 50, 10),
-    sex = rbinom(n, 1, 0.5)
-  )
-  # Phenotype with variance that depends on covariates (heteroscedastic)
-  phenotype <- 2 + 0.5 * covariates$age + rnorm(n, sd = 1 + 0.3 * covariates$sex)
-  list(phenotype = phenotype, covariates = covariates)
-}
-
-# ===========================================================================
-#  QUAIL rank score tests
-# ===========================================================================
-
-# ---------- calculate_rank_score -------------------------------------------
-
-test_that("calculate_rank_score returns vector of correct length", {
-  skip_if_not_installed("quantreg")
-  d <- make_quail_data()
-  result <- pecotmr:::calculate_rank_score(d$phenotype, d$covariates, tau = 0.5, seed = 1)
-  expect_true(is.numeric(result))
-  expect_equal(length(result), length(d$phenotype))
-})
-
-test_that("calculate_rank_score is reproducible with same seed", {
-  skip_if_not_installed("quantreg")
-  d <- make_quail_data()
-  r1 <- pecotmr:::calculate_rank_score(d$phenotype, d$covariates, tau = 0.25, seed = 99)
-  r2 <- pecotmr:::calculate_rank_score(d$phenotype, d$covariates, tau = 0.25, seed = 99)
-  expect_identical(r1, r2)
-})
-
-test_that("calculate_rank_score differs with different seeds", {
-  skip_if_not_installed("quantreg")
-  d <- make_quail_data()
-  r1 <- pecotmr:::calculate_rank_score(d$phenotype, d$covariates, tau = 0.5, seed = 1)
-  r2 <- pecotmr:::calculate_rank_score(d$phenotype, d$covariates, tau = 0.5, seed = 2)
-  # Different random covariate column, so results should differ
-
-  expect_false(identical(r1, r2))
-})
-
-test_that("calculate_rank_score produces different results for different tau values", {
-  skip_if_not_installed("quantreg")
-  d <- make_quail_data()
-  r_low <- pecotmr:::calculate_rank_score(d$phenotype, d$covariates, tau = 0.1, seed = 1)
-  r_high <- pecotmr:::calculate_rank_score(d$phenotype, d$covariates, tau = 0.9, seed = 1)
-  expect_false(identical(r_low, r_high))
-})
-
-test_that("calculate_rank_score returns finite values", {
-  skip_if_not_installed("quantreg")
-  d <- make_quail_data()
-  result <- pecotmr:::calculate_rank_score(d$phenotype, d$covariates, tau = 0.5, seed = 1)
-  expect_true(all(is.finite(result)))
-})
-
-# ---------- calculate_integrated_score -------------------------------------
-
-test_that("calculate_integrated_score works with equal method and even num_tau_levels", {
-  skip_if_not_installed("quantreg")
-  skip_if_not_installed("quadprog")
-  n <- 30
-  num_tau <- 4 # even
-  # Create mock rank scores: list of numeric vectors
-  set.seed(10)
-  rank_scores <- lapply(1:num_tau, function(i) rnorm(n))
-  result <- pecotmr:::calculate_integrated_score(rank_scores, method = "equal", num_tau_levels = num_tau)
-  expect_true(is.numeric(result))
-  expect_equal(length(result), n)
-  expect_true(all(is.finite(result)))
-})
-
-test_that("calculate_integrated_score works with equal method and odd num_tau_levels", {
-  skip_if_not_installed("quantreg")
-  skip_if_not_installed("quadprog")
-  n <- 30
-  num_tau <- 5 # odd
-  set.seed(11)
-  rank_scores <- lapply(1:num_tau, function(i) rnorm(n))
-  result <- pecotmr:::calculate_integrated_score(rank_scores, method = "equal", num_tau_levels = num_tau)
-  expect_true(is.numeric(result))
-  expect_equal(length(result), n)
-})
-
-test_that("calculate_integrated_score equal method: middle tau has zero weight for odd levels", {
-  skip_if_not_installed("quantreg")
-  skip_if_not_installed("quadprog")
-  n <- 20
-  num_tau <- 3 # odd, mid_point = 2
-  # Set all scores to zero except the middle one
-  rank_scores <- list(rep(0, n), rep(1, n), rep(0, n))
-  result <- pecotmr:::calculate_integrated_score(rank_scores, method = "equal", num_tau_levels = num_tau)
-  # Middle score (index 2) is skipped for odd levels; lower_half = {1}, upper_half = {3}
-  # int_rank_score = -rank_scores[[1]] + rank_scores[[3]] = 0 + 0 = 0
-  # n_pairs = 1, so result = 0/1 = 0
-  expect_equal(result, rep(0, n))
-})
-
-test_that("calculate_integrated_score IVW method returns correct length", {
-  skip("Known bug: solve.QP constraint matrix dimensions are incorrect in IVW method")
-  skip_if_not_installed("quantreg")
-  skip_if_not_installed("quadprog")
-  n <- 30
-  num_tau <- 4
-  set.seed(20)
-  rank_scores <- lapply(1:num_tau, function(i) rnorm(n, mean = i))
-  result <- pecotmr:::calculate_integrated_score(rank_scores, method = "ivw", num_tau_levels = num_tau)
-  expect_true(is.numeric(result))
-  expect_equal(length(result), n)
-  expect_true(all(is.finite(result)))
-})
-
-test_that("calculate_integrated_score IVW method with odd num_tau_levels", {
-  skip("Known bug: solve.QP constraint matrix dimensions are incorrect in IVW method")
-  skip_if_not_installed("quantreg")
-  skip_if_not_installed("quadprog")
-  n <- 30
-  num_tau <- 5
-  set.seed(21)
-  rank_scores <- lapply(1:num_tau, function(i) rnorm(n, mean = i * 0.5))
-  result <- pecotmr:::calculate_integrated_score(rank_scores, method = "ivw", num_tau_levels = num_tau)
-  expect_true(is.numeric(result))
-  expect_equal(length(result), n)
-})
-
-# ---------- fit_rank_scores ------------------------------------------------
-
-test_that("fit_rank_scores returns list of correct length", {
-  skip_if_not_installed("quantreg")
-  d <- make_quail_data()
-  num_tau <- 3
-  result <- pecotmr:::fit_rank_scores(d$phenotype, d$covariates, num_tau_levels = num_tau, num_cores = 1)
-  expect_true(is.list(result))
-  expect_equal(length(result), num_tau)
-})
-
-test_that("fit_rank_scores each element has correct length", {
-  skip_if_not_installed("quantreg")
-  d <- make_quail_data()
-  num_tau <- 3
-  result <- pecotmr:::fit_rank_scores(d$phenotype, d$covariates, num_tau_levels = num_tau, num_cores = 1)
-  for (i in seq_along(result)) {
-    expect_equal(length(result[[i]]), length(d$phenotype))
-    expect_true(is.numeric(result[[i]]))
-  }
-})
-
-test_that("fit_rank_scores elements correspond to evenly spaced tau levels", {
-  skip_if_not_installed("quantreg")
-  d <- make_quail_data()
-  num_tau <- 3
-  result <- pecotmr:::fit_rank_scores(d$phenotype, d$covariates, num_tau_levels = num_tau, num_cores = 1)
-  # Each element should match a direct call to calculate_rank_score at the expected tau
-  for (i in 1:num_tau) {
-    tau <- i / (num_tau + 1)
-    expected <- pecotmr:::calculate_rank_score(d$phenotype, d$covariates, tau)
-    expect_equal(result[[i]], expected)
-  }
-})
-
-# ---------- QUAIL_rank_score_pipeline (exported) ---------------------------
-
-test_that("QUAIL_rank_score_pipeline errors on non-numeric character phenotype", {
-  skip_if_not_installed("quantreg")
-  skip_if_not_installed("quadprog")
-  d <- make_quail_data()
-  pheno_char <- c("a", "b", "c")
-  expect_error(
-    QUAIL_rank_score_pipeline(pheno_char, d$covariates, num_tau_levels = 3),
-    "phenotype must be a numeric vector"
-  )
-})
-
-test_that("QUAIL_rank_score_pipeline accepts data.frame phenotype", {
-  skip_if_not_installed("quantreg")
-  skip_if_not_installed("quadprog")
-  d <- make_quail_data()
-  pheno_df <- data.frame(pheno = d$phenotype)
-  # Should not error; data.frame is converted internally
-  result <- QUAIL_rank_score_pipeline(pheno_df, d$covariates, num_tau_levels = 3, method = "equal")
-  expect_true(is.numeric(result))
-  expect_equal(length(result), length(d$phenotype))
-})
-
-test_that("QUAIL_rank_score_pipeline full run with small data and equal method", {
-  skip_if_not_installed("quantreg")
-  skip_if_not_installed("quadprog")
-  d <- make_quail_data()
-  result <- QUAIL_rank_score_pipeline(d$phenotype, d$covariates,
-    num_tau_levels = 5, method = "equal", num_cores = 1
-  )
-  expect_true(is.numeric(result))
-  expect_equal(length(result), length(d$phenotype))
-  expect_true(all(is.finite(result)))
-})
-
-test_that("QUAIL_rank_score_pipeline full run with IVW method", {
-  skip("Known bug: solve.QP constraint matrix dimensions are incorrect in IVW method")
-  skip_if_not_installed("quantreg")
-  skip_if_not_installed("quadprog")
-  d <- make_quail_data()
-  result <- QUAIL_rank_score_pipeline(d$phenotype, d$covariates,
-    num_tau_levels = 4, method = "ivw", num_cores = 1
-  )
-  expect_true(is.numeric(result))
-  expect_equal(length(result), length(d$phenotype))
-  expect_true(all(is.finite(result)))
-})
-
-# ===========================================================================
-#  ctwas wrapper tests
-# ===========================================================================
-
-
-# ---------- trim_ctwas_variants --------------------------------------------
-
-# Helper: build a minimal region_data structure that trim_ctwas_variants expects
-make_mock_region_data <- function() {
-  # Variant IDs in canonical format (chr:pos:A2:A1)
-  variant_ids <- c("chr1:1000:A:G", "chr1:2000:C:T", "chr1:3000:G:A", "chr1:4000:T:C")
-
-  # Weight matrix (4 variants x 1 weight column)
-  wgt <- matrix(c(0.5, 0.0001, 0.3, -0.2), nrow = 4, ncol = 1)
-  rownames(wgt) <- variant_ids
-
-  gene_id <- "GENE1|ctx1"
-  context <- "ctx1"
-  study <- "study1"
-
-  weights <- list()
-  weights[[gene_id]] <- list()
-  weights[[gene_id]][[study]] <- list(
-    wgt = wgt,
-    context = context,
-    p0 = 1000,
-    p1 = 4000
-  )
-
-  # SuSiE intermediate info
-  pip_vals <- c(0.8, 0.05, 0.6, 0.02)
-  names(pip_vals) <- variant_ids
-
-  susie_weights_intermediate <- list()
-  susie_weights_intermediate[["GENE1"]] <- list()
-  susie_weights_intermediate[["GENE1"]][[context]] <- list(
-    pip = pip_vals,
-    cs_variants = list(variant_ids[c(1, 3)]),
-    cs_purity = list(min.abs.corr = 0.9)
-  )
-
-  list(
-    weights = weights,
-    susie_weights_intermediate = susie_weights_intermediate
-  )
-}
-
-test_that("trim_ctwas_variants removes variants below weight cutoff", {
-  rd <- make_mock_region_data()
-  # Default cutoff 1e-5, variant 2 has weight 0.0001 (above), so all 4 should pass default cutoff
-  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 1e-5)
-  expect_true(is.list(result))
-  # With a higher cutoff, the near-zero variant should be removed
-  result_strict <- trim_ctwas_variants(rd, twas_weight_cutoff = 0.001)
-  # study1 should exist in result
-  expect_true("study1" %in% names(result_strict))
-  # Get the gene-level result
-  gene_weights <- result_strict[["study1"]][["GENE1|ctx1"]]
-  # Variant 2 has abs(weight) = 0.0001 < 0.001, so should be removed
-  expect_false("chr1:2000:C:T" %in% rownames(gene_weights$wgt))
-})
-
-test_that("trim_ctwas_variants removes gene when all weights below cutoff", {
-  rd <- make_mock_region_data()
-  # Set cutoff so high that all variants are dropped
-  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 10)
-  # Gene should be removed entirely since no weights pass the cutoff
-  # Result should be an empty list
-  expect_equal(length(result), 0)
-})
-
-test_that("trim_ctwas_variants returns result keyed by study", {
-  rd <- make_mock_region_data()
-  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 1e-5)
-  # merge_by_study reorganizes: weights[[study]][[group]]
-  expect_true("study1" %in% names(result))
-  expect_true("GENE1|ctx1" %in% names(result[["study1"]]))
-})
-
-test_that("trim_ctwas_variants updates p0 and p1 positions", {
-  rd <- make_mock_region_data()
-  # Use a weight cutoff that removes the variant at position 2000
-  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 0.001)
-  gene_weights <- result[["study1"]][["GENE1|ctx1"]]
-  # p0 and p1 should reflect the range of remaining variant positions
-  remaining_positions <- as.integer(sapply(
-    rownames(gene_weights$wgt),
-    function(v) strsplit(v, ":")[[1]][2]
-  ))
-  expect_equal(gene_weights$p0, min(remaining_positions))
-  expect_equal(gene_weights$p1, max(remaining_positions))
-})
-
-test_that("trim_ctwas_variants respects max_num_variants", {
-  rd <- make_mock_region_data()
-  # Request max 2 variants; since nrow(wgt) == 4 >= max_num_variants == 2,
-  # it triggers select_variants which picks by PIP priority
-  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 1e-5, max_num_variants = 2)
-  gene_weights <- result[["study1"]][["GENE1|ctx1"]]
-  expect_true(nrow(gene_weights$wgt) <= 2)
-})
-
-test_that("trim_ctwas_variants handles NA weights by removing group", {
-  rd <- make_mock_region_data()
-  # Replace all weights with NA
-  rd$weights[["GENE1|ctx1"]][["study1"]]$wgt[, 1] <- NA
-  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 0)
-  # The group should be removed because all weights are NA
-  expect_equal(length(result), 0)
-})
-
-test_that("trim_ctwas_variants handles multiple genes", {
-  rd <- make_mock_region_data()
-
-  # Add a second gene
-  variant_ids2 <- c("chr1:5000:A:G", "chr1:6000:C:T")
-  wgt2 <- matrix(c(0.4, -0.3), nrow = 2, ncol = 1)
-  rownames(wgt2) <- variant_ids2
-
-  rd$weights[["GENE2|ctx1"]] <- list()
-  rd$weights[["GENE2|ctx1"]][["study1"]] <- list(
-    wgt = wgt2,
-    context = "ctx1",
-    p0 = 5000,
-    p1 = 6000
-  )
-
-  pip_vals2 <- c(0.7, 0.4)
-  names(pip_vals2) <- variant_ids2
-  rd$susie_weights_intermediate[["GENE2"]] <- list()
-  rd$susie_weights_intermediate[["GENE2"]][["ctx1"]] <- list(
-    pip = pip_vals2,
-    cs_variants = list(variant_ids2[1]),
-    cs_purity = list(min.abs.corr = 0.95)
-  )
-
-  result <- trim_ctwas_variants(rd, twas_weight_cutoff = 1e-5)
-  expect_true("GENE1|ctx1" %in% names(result[["study1"]]))
-  expect_true("GENE2|ctx1" %in% names(result[["study1"]]))
-})
-
-test_that("trim_ctwas_variants select_variants uses cs_min_cor to include CS variants", {
-  rd <- make_mock_region_data()
-  # cs_purity min.abs.corr = 0.9, so with cs_min_cor = 0.8 the CS variants
-  # (variant 1 and 3) should be included. Max 2 variants.
-  result <- trim_ctwas_variants(rd,
-    twas_weight_cutoff = 1e-5,
-    cs_min_cor = 0.8,
-    min_pip_cutoff = 0.0,
-    max_num_variants = 2
-  )
-  gene_weights <- result[["study1"]][["GENE1|ctx1"]]
-  included <- rownames(gene_weights$wgt)
-  # CS variants chr1:1000:A:G and chr1:3000:G:A have highest PIPs (0.8 and 0.6)
-  # and are in the CS, so they should be prioritized
-  expect_true("chr1:1000:A:G" %in% included)
-  expect_true("chr1:3000:G:A" %in% included)
-})
diff --git a/tests/testthat/test_quail_vqtl.R b/tests/testthat/test_quail_vqtl.R
deleted file mode 100644
index 39170ddd..00000000
--- a/tests/testthat/test_quail_vqtl.R
+++ /dev/null
@@ -1,338 +0,0 @@
-context("quail_vqtl")
-
-# ===========================================================================
-# Helpers: small synthetic genotype / phenotype data
-# ===========================================================================
-
-make_genotype_matrix <- function(n = 50, p = 5, seed = 123) {
-  set.seed(seed)
-  geno <- matrix(rbinom(n * p, 2, 0.3), nrow = n, ncol = p)
-  colnames(geno) <- paste0("chr1:", seq(100, by = 100, length.out = p), ":A:G")
-  geno
-}
-
-make_rank_score <- function(n = 50, seed = 123) {
-  set.seed(seed)
-  rnorm(n)
-}
-
-make_covariates <- function(n = 50, seed = 123) {
-  set.seed(seed + 1)
-  data.frame(
-    age = rnorm(n, 50, 10),
-    sex = rbinom(n, 1, 0.5)
-  )
-}
-
-# ===========================================================================
-# QUAIL_pipeline tests
-# ===========================================================================
-
-# ---------- Input validation -----------------------------------------------
-
-test_that("QUAIL_pipeline errors when rank_score is not numeric", {
-  geno <- make_genotype_matrix()
-  expect_error(
-    QUAIL_pipeline(geno, rank_score = c("a", "b", "c")),
-    "rank_score must be a numeric vector"
-  )
-})
-
-# ---------- Basic synthetic data run ---------------------------------------
-
-test_that("QUAIL_pipeline runs with basic synthetic data", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix()
-  rs <- make_rank_score()
-  result <- QUAIL_pipeline(geno, rs)
-  expect_true(is.data.frame(result))
-  expect_equal(nrow(result), ncol(geno))
-})
-
-# ---------- Output structure -----------------------------------------------
-
-test_that("QUAIL_pipeline output has required columns", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix()
-  rs <- make_rank_score()
-  result <- QUAIL_pipeline(geno, rs)
-  expected_cols <- c("phenotype_id", "chr", "pos", "A1", "A2",
-                     "variant_id", "beta", "se", "z", "p", "q", "N")
-  expect_true(all(expected_cols %in% colnames(result)))
-})
-
-test_that("QUAIL_pipeline N column equals number of samples", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix(n = 40)
-  rs <- make_rank_score(n = 40)
-  result <- QUAIL_pipeline(geno, rs)
-  expect_true(all(result$N == 40))
-})
-
-test_that("QUAIL_pipeline variant_id matches genotype column names", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix()
-  rs <- make_rank_score()
-  result <- QUAIL_pipeline(geno, rs)
-  expect_equal(result$variant_id, colnames(geno))
-})
-
-test_that("QUAIL_pipeline parses chr and pos from variant IDs", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix()
-  rs <- make_rank_score()
-  result <- QUAIL_pipeline(geno, rs)
-  # All variants are on chr1
-  expect_true(all(result$chr == 1))
-  # Positions should be 100, 200, ..., 500
-  expect_equal(result$pos, seq(100, by = 100, length.out = ncol(geno)))
-})
-
-test_that("QUAIL_pipeline phenotype_id defaults to NA when not provided", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix()
-  rs <- make_rank_score()
-  result <- QUAIL_pipeline(geno, rs)
-  expect_true(all(is.na(result$phenotype_id)))
-})
-
-test_that("QUAIL_pipeline phenotype_id is set when provided", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix()
-  rs <- make_rank_score()
-  result <- QUAIL_pipeline(geno, rs, phenotype_id = "my_trait")
-  expect_true(all(result$phenotype_id == "my_trait"))
-})
-
-# ---------- With covariates ------------------------------------------------
-
-test_that("QUAIL_pipeline works with covariates", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix()
-  rs <- make_rank_score()
-  cov <- make_covariates()
-  result <- QUAIL_pipeline(geno, rs, covariates = cov)
-  expect_true(is.data.frame(result))
-  expect_equal(nrow(result), ncol(geno))
-})
-
-test_that("QUAIL_pipeline covariates affect beta estimates", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix()
-  rs <- make_rank_score()
-  cov <- make_covariates()
-  result_no_cov <- QUAIL_pipeline(geno, rs)
-  result_with_cov <- QUAIL_pipeline(geno, rs, covariates = cov)
-  # Betas should differ when adjusting for covariates
-  expect_false(identical(result_no_cov$beta, result_with_cov$beta))
-})
-
-test_that("QUAIL_pipeline accepts matrix covariates", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix()
-  rs <- make_rank_score()
-  cov_mat <- as.matrix(make_covariates())
-  result <- QUAIL_pipeline(geno, rs, covariates = cov_mat)
-  expect_true(is.data.frame(result))
-})
-
-# ---------- p-values and z-scores ------------------------------------------
-
-test_that("QUAIL_pipeline p-values are between 0 and 1", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix()
-  rs <- make_rank_score()
-  result <- QUAIL_pipeline(geno, rs)
-  expect_true(all(result$p >= 0 & result$p <= 1))
-})
-
-test_that("QUAIL_pipeline z-scores are finite", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix(n = 60, p = 4)
-  rs <- make_rank_score(n = 60)
-  result <- QUAIL_pipeline(geno, rs)
-  # z-scores should all be finite for well-conditioned data
-  expect_true(all(is.finite(result$z)))
-})
-
-# ===========================================================================
-# univariate_regression tests (internal function)
-# ===========================================================================
-
-test_that("univariate_regression returns expected list structure", {
-  skip_if_not_installed("qvalue")
-  set.seed(42)
-  X <- matrix(rnorm(200), 50, 4)
-  colnames(X) <- paste0("snp", 1:4)
-  y <- rnorm(50)
-  res <- pecotmr:::univariate_regression(X, y)
-  expect_true(is.list(res))
-  expected_names <- c("betahat", "sebetahat", "z_scores", "p_values", "q_values")
-  expect_true(all(expected_names %in% names(res)))
-  # Each element should have length equal to ncol(X)
-  for (nm in expected_names) {
-    expect_equal(length(res[[nm]]), ncol(X))
-  }
-})
-
-test_that("univariate_regression names outputs by column names of X", {
-  skip_if_not_installed("qvalue")
-  set.seed(42)
-  X <- matrix(rnorm(150), 50, 3)
-  colnames(X) <- c("var_A", "var_B", "var_C")
-  y <- rnorm(50)
-  res <- pecotmr:::univariate_regression(X, y)
-  expect_equal(names(res$betahat), colnames(X))
-  expect_equal(names(res$sebetahat), colnames(X))
-})
-
-test_that("univariate_regression handles centering", {
-  skip_if_not_installed("qvalue")
-  set.seed(42)
-  X <- matrix(rnorm(200, mean = 10), 50, 4)
-  colnames(X) <- paste0("s", 1:4)
-  y <- rnorm(50, mean = 5)
-  # With centering (default)
-  res_centered <- pecotmr:::univariate_regression(X, y, center = TRUE)
-  # Without centering
-  res_no_center <- pecotmr:::univariate_regression(X, y, center = FALSE)
-  # Since the regression includes an intercept, centering X and y
-  # should yield numerically equivalent slope estimates
-  expect_equal(res_centered$betahat, res_no_center$betahat, tolerance = 1e-10)
-  expect_equal(res_centered$sebetahat, res_no_center$sebetahat, tolerance = 1e-10)
-  # Both results should have valid finite values
-  expect_true(all(is.finite(res_centered$betahat)))
-  expect_true(all(is.finite(res_no_center$betahat)))
-})
-
-test_that("univariate_regression handles scaling", {
-  skip_if_not_installed("qvalue")
-  set.seed(42)
-  X <- matrix(rnorm(200, sd = 5), 50, 4)
-  colnames(X) <- paste0("s", 1:4)
-  y <- rnorm(50)
-  res_scaled <- pecotmr:::univariate_regression(X, y, scale = TRUE)
-  res_not_scaled <- pecotmr:::univariate_regression(X, y, scale = FALSE)
-  # Scaling divides each column by its SD, so beta_scaled = beta_unscaled * column_sd
-  col_sds <- apply(X, 2, sd)
-  expect_equal(res_scaled$betahat, res_not_scaled$betahat * col_sds,
-               tolerance = 1e-10)
-  # Both should be finite
-  expect_true(all(is.finite(res_scaled$betahat)))
-  expect_true(all(is.finite(res_not_scaled$betahat)))
-})
-
-test_that("univariate_regression handles covariates", {
-  skip_if_not_installed("qvalue")
-  set.seed(42)
-  X <- matrix(rnorm(200), 50, 4)
-  colnames(X) <- paste0("s", 1:4)
-  Z <- matrix(rnorm(100), 50, 2)
-  y <- rnorm(50)
-  res_cov <- pecotmr:::univariate_regression(X, y, Z = Z)
-  res_no_cov <- pecotmr:::univariate_regression(X, y)
-  # Adjusting for covariates should generally reduce residual SE
-  # (or at minimum keep it comparable)
-  expect_true(all(res_cov$sebetahat <= res_no_cov$sebetahat + 1e-10))
-  # Both should return valid finite results
-  expect_true(all(is.finite(res_cov$betahat)))
-  expect_true(all(is.finite(res_cov$sebetahat)))
-})
-
-test_that("univariate_regression handles NAs in y", {
-  skip_if_not_installed("qvalue")
-  set.seed(42)
-  X <- matrix(rnorm(200), 50, 4)
-  colnames(X) <- paste0("s", 1:4)
-  y <- rnorm(50)
-  y[c(1, 5, 10)] <- NA
-  res <- pecotmr:::univariate_regression(X, y)
-  # Should still return valid results
-  expect_equal(length(res$betahat), 4)
-  expect_true(all(is.finite(res$betahat)))
-})
-
-test_that("univariate_regression return_residuals works with covariates", {
-  skip_if_not_installed("qvalue")
-  set.seed(42)
-  X <- matrix(rnorm(200), 50, 4)
-  colnames(X) <- paste0("s", 1:4)
-  Z <- matrix(rnorm(100), 50, 2)
-  y <- rnorm(50)
-  res <- pecotmr:::univariate_regression(X, y, Z = Z, return_residuals = TRUE)
-  expect_true("residuals" %in% names(res))
-  expect_equal(length(res$residuals), length(y))
-})
-
-test_that("univariate_regression return_residuals without covariates omits residuals", {
-  skip_if_not_installed("qvalue")
-  set.seed(42)
-  X <- matrix(rnorm(200), 50, 4)
-  colnames(X) <- paste0("s", 1:4)
-  y <- rnorm(50)
-  res <- pecotmr:::univariate_regression(X, y, return_residuals = TRUE)
-  # Without Z, residuals should not be in the output
-  expect_false("residuals" %in% names(res))
-})
-
-# ===========================================================================
-# run_linear_regression tests (internal function)
-# ===========================================================================
-
-test_that("run_linear_regression returns a data frame with expected columns", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix(n = 40, p = 3)
-  pheno <- make_rank_score(n = 40)
-  res <- pecotmr:::run_linear_regression(geno, pheno)
-  expected_cols <- c("phenotype_id", "chr", "pos", "A1", "A2",
-                     "variant_id", "beta", "se", "z", "p", "q", "N")
-  expect_true(all(expected_cols %in% colnames(res)))
-  expect_equal(nrow(res), ncol(geno))
-})
-
-test_that("run_linear_regression passes phenotype_id through", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix(n = 40, p = 3)
-  pheno <- make_rank_score(n = 40)
-  res <- pecotmr:::run_linear_regression(geno, pheno, phenotype_id = "test_pheno")
-  expect_true(all(res$phenotype_id == "test_pheno"))
-})
-
-# ===========================================================================
-# Edge cases
-# ===========================================================================
-
-test_that("QUAIL_pipeline works with a single SNP", {
-  skip_if_not_installed("qvalue")
-  set.seed(99)
-  n <- 30
-  geno <- matrix(rbinom(n, 2, 0.3), nrow = n, ncol = 1)
-  colnames(geno) <- "chr1:500:C:T"
-  rs <- rnorm(n)
-  result <- QUAIL_pipeline(geno, rs)
-  expect_true(is.data.frame(result))
-  expect_equal(nrow(result), 1)
-})
-
-test_that("univariate_regression handles all-zero genotype column gracefully", {
-  skip_if_not_installed("qvalue")
-  set.seed(99)
-  n <- 40
-  X <- matrix(rnorm(n * 3), n, 3)
-  X[, 2] <- 0 # all-zero column
-  colnames(X) <- paste0("s", 1:3)
-  y <- rnorm(n)
-  # Should not error; the zero-variance column gets NaN which is set to 0
-  res <- pecotmr:::univariate_regression(X, y)
-  expect_equal(length(res$betahat), 3)
-  # The all-zero column (after centering, NaN set to 0) produces beta = 0
-  expect_equal(unname(res$betahat[2]), 0)
-})
-
-test_that("QUAIL_pipeline se values are non-negative", {
-  skip_if_not_installed("qvalue")
-  geno <- make_genotype_matrix(n = 60, p = 4)
-  rs <- make_rank_score(n = 60)
-  result <- QUAIL_pipeline(geno, rs)
-  expect_true(all(result$se >= 0))
-})
diff --git a/tests/testthat/test_quantile_twas.R b/tests/testthat/test_quantile_twas.R
deleted file mode 100644
index f3d99282..00000000
--- a/tests/testthat/test_quantile_twas.R
+++ /dev/null
@@ -1,717 +0,0 @@
-context("quantile_twas")
-
-# ===========================================================================
-# ensure_continuous_clusters (internal)
-# ===========================================================================
-
-test_that("ensure_continuous_clusters preserves already continuous", {
-  index <- c(1, 1, 2, 2, 3)
-  result <- pecotmr:::ensure_continuous_clusters(index)
-  expect_equal(result, c(1, 1, 2, 2, 3))
-})
-
-test_that("ensure_continuous_clusters renumbers non-continuous", {
-  index <- c(1, 2, 1, 2, 3)
-  result <- pecotmr:::ensure_continuous_clusters(index)
-  expect_equal(result, c(1, 2, 3, 4, 5))
-})
-
-test_that("ensure_continuous_clusters handles single element", {
-  result <- pecotmr:::ensure_continuous_clusters(c(1))
-  expect_equal(result, c(1))
-})
-
-test_that("ensure_continuous_clusters handles two elements same cluster", {
-  result <- pecotmr:::ensure_continuous_clusters(c(1, 1))
-  expect_equal(result, c(1, 1))
-})
-
-test_that("ensure_continuous_clusters all same cluster", {
-  result <- pecotmr:::ensure_continuous_clusters(c(3, 3, 3, 3))
-  expect_equal(result, c(1, 1, 1, 1))
-})
-
-# ===========================================================================
-# get_cluster_ranges (internal)
-# ===========================================================================
-
-test_that("get_cluster_ranges returns correct ranges", {
-  index <- c(1, 1, 2, 2, 3)
-  result <- pecotmr:::get_cluster_ranges(index)
-  expect_length(result, 3)
-  expect_equal(result[[1]], "1 - 2")
-  expect_equal(result[[2]], "3 - 4")
-  expect_equal(result[[3]], "5 - 5")
-})
-
-test_that("get_cluster_ranges single cluster", {
-  index <- c(1, 1, 1)
-  result <- pecotmr:::get_cluster_ranges(index)
-  expect_length(result, 1)
-  expect_equal(result[[1]], "1 - 3")
-})
-
-# ===========================================================================
-# get_modularity (internal)
-# ===========================================================================
-
-test_that("get_modularity returns 0 for single element", {
-  W <- matrix(1, nrow = 1, ncol = 1)
-  B <- matrix(1, nrow = 1, ncol = 1)
-  result <- pecotmr:::get_modularity(W, B)
-  expect_equal(result, 0)
-})
-
-test_that("get_modularity returns numeric for identity weight", {
-  W <- diag(5)
-  B <- matrix(c(rep(c(1, 0), c(3, 2)), rep(c(0, 1), c(3, 2))), nrow = 5)
-  result <- pecotmr:::get_modularity(W, B)
-  expect_type(result, "double")
-})
-
-test_that("get_modularity handles all-positive weights", {
-  set.seed(42)
-  W <- abs(matrix(rnorm(16), 4, 4))
-  W <- (W + t(W)) / 2
-  B <- matrix(c(1, 1, 0, 0, 0, 0, 1, 1), nrow = 4)
-  result <- pecotmr:::get_modularity(W, B)
-  expect_type(result, "double")
-  expect_true(is.finite(result))
-})
-
-# ===========================================================================
-# get_n_cluster (internal)
-# ===========================================================================
-
-test_that("get_n_cluster returns single cluster for high correlation", {
-  Sigma <- matrix(0.9, nrow = 5, ncol = 5)
-  diag(Sigma) <- 1
-  hc <- hclust(as.dist(1 - Sigma))
-  result <- pecotmr:::get_n_cluster(hc, Sigma, between_cluster = 0.8)
-  expect_equal(result$n_cluster, 1)
-})
-
-test_that("get_n_cluster returns multiple clusters for low correlation", {
-  set.seed(42)
-  Sigma <- diag(10)
-  Sigma[1:5, 1:5] <- 0.8
-  Sigma[6:10, 6:10] <- 0.8
-  diag(Sigma) <- 1
-  hc <- hclust(as.dist(1 - Sigma))
-  result <- pecotmr:::get_n_cluster(hc, Sigma, between_cluster = 0.5)
-  expect_true(result$n_cluster >= 1)
-})
-
-# ===========================================================================
-# integrate_tau (internal)
-# ===========================================================================
-
-test_that("integrate_tau with vector input", {
-  tau <- c(0.25, 0.50, 0.75)
-  a_tau <- c(1, 2, 1)
-  result <- pecotmr:::integrate_tau(tau, a_tau)
-  expect_type(result, "double")
-  expect_true(result > 0)
-})
-
-test_that("integrate_tau with matrix input", {
-  tau <- c(0.25, 0.50, 0.75)
-  a_tau <- matrix(c(1, 2, 1, 0.5, 1, 0.5), nrow = 2, byrow = TRUE)
-  result <- pecotmr:::integrate_tau(tau, a_tau)
-  expect_equal(nrow(result), 2)
-  expect_true(all(result > 0))
-})
-
-test_that("integrate_tau zero weights give zero", {
-  tau <- c(0.25, 0.50, 0.75)
-  a_tau <- c(0, 0, 0)
-  result <- pecotmr:::integrate_tau(tau, a_tau)
-  expect_equal(as.numeric(result), 0)
-})
-
-# ===========================================================================
-# get_hierarchical_clusters (internal)
-# ===========================================================================
-
-test_that("get_hierarchical_clusters returns valid structure", {
-  set.seed(42)
-  p <- 10
-  Sigma <- matrix(0.3, nrow = p, ncol = p)
-  Sigma[1:5, 1:5] <- 0.9
-  Sigma[6:10, 6:10] <- 0.9
-  diag(Sigma) <- 1
-  result <- pecotmr:::get_hierarchical_clusters(Sigma, between_cluster = 0.5)
-  expect_type(result, "list")
-  expect_true("cluster" %in% names(result))
-  expect_true("Q_modularity_initial" %in% names(result))
-  expect_true("cluster_ranges" %in% names(result))
-  expect_equal(nrow(result$cluster), p)
-  expect_true(all(rowSums(result$cluster) == 1))
-})
-
-# ===========================================================================
-# perform_grouped_integration (internal)
-# ===========================================================================
-
-test_that("perform_grouped_integration returns correct structure", {
-  set.seed(42)
-  n_variants <- 5
-  n_tau <- 10
-  twas_weight <- matrix(rnorm(n_variants * n_tau), nrow = n_variants)
-  rownames(twas_weight) <- paste0("v", 1:n_variants)
-  tau_values <- seq(0.1, 0.9, length.out = n_tau)
-  pseudo_R2 <- runif(n_tau, 0.1, 0.5)
-
-  result <- pecotmr:::perform_grouped_integration(twas_weight, tau_values, pseudo_R2,
-                                                   num_intervals = 3)
-  expect_type(result, "list")
-  expect_true("weights" %in% names(result))
-  expect_true("twas_weight_performance" %in% names(result))
-  expect_equal(nrow(result$weights), n_variants)
-  expect_true(ncol(result$weights) >= 3)
-})
-
-test_that("perform_grouped_integration single variant skips clustering", {
-  twas_weight <- matrix(rnorm(10), nrow = 1)
-  rownames(twas_weight) <- "v1"
-  tau_values <- seq(0.1, 0.9, length.out = 10)
-  pseudo_R2 <- runif(10, 0.1, 0.5)
-
-  result <- pecotmr:::perform_grouped_integration(twas_weight, tau_values, pseudo_R2,
-                                                   num_intervals = 3)
-  expect_equal(ncol(result$weights), 3)
-})
-
-# ===========================================================================
-# corr_filter (internal)
-# ===========================================================================
-
-test_that("corr_filter removes highly correlated columns", {
-  set.seed(42)
-  n <- 50; p <- 10
-  X <- matrix(rnorm(n * p), nrow = n)
-  colnames(X) <- paste0("v", 1:p)
-  X[, 2] <- X[, 1] + rnorm(n, sd = 0.01)
-  result <- pecotmr:::corr_filter(X, cor_thres = 0.9)
-  expect_true(ncol(result$X.new) < p)
-  expect_true(length(result$filter.id) == ncol(result$X.new))
-})
-
-test_that("corr_filter keeps all columns when uncorrelated", {
-  set.seed(42)
-  n <- 100; p <- 5
-  X <- matrix(rnorm(n * p), nrow = n)
-  colnames(X) <- paste0("v", 1:p)
-  result <- pecotmr:::corr_filter(X, cor_thres = 0.99)
-  expect_equal(ncol(result$X.new), p)
-  expect_equal(result$filter.id, 1:p)
-})
-
-test_that("corr_filter preserves colnames for single remaining column", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 3), nrow = n)
-  colnames(X) <- c("a", "b", "c")
-  X[, 2] <- X[, 1] + rnorm(n, sd = 0.001)
-  X[, 3] <- X[, 1] + rnorm(n, sd = 0.001)
-  result <- pecotmr:::corr_filter(X, cor_thres = 0.5)
-  expect_true(ncol(result$X.new) >= 1)
-  expect_true(!is.null(colnames(result$X.new)))
-})
-
-test_that("corr_filter handles single-column input", {
-  set.seed(42)
-  n <- 30
-  X <- matrix(rnorm(n), nrow = n, ncol = 1)
-  colnames(X) <- "v1"
-  expect_error(pecotmr:::corr_filter(X, cor_thres = 0.8))
-})
-
-test_that("corr_filter with low threshold removes more columns", {
-  set.seed(42)
-  n <- 100; p <- 5
-  X <- matrix(rnorm(n * p), nrow = n)
-  colnames(X) <- paste0("v", 1:p)
-  X[, 2] <- X[, 1] + rnorm(n, sd = 0.1)
-  X[, 3] <- X[, 1] + rnorm(n, sd = 0.1)
-  X[, 5] <- X[, 4] + rnorm(n, sd = 0.1)
-  result_strict <- pecotmr:::corr_filter(X, cor_thres = 0.3)
-  result_lenient <- pecotmr:::corr_filter(X, cor_thres = 0.99)
-  expect_true(ncol(result_strict$X.new) <= ncol(result_lenient$X.new))
-})
-
-test_that("corr_filter preserves colnames when no columns deleted", {
-  set.seed(42)
-  n <- 100; p <- 3
-  X <- matrix(rnorm(n * p), nrow = n)
-  colnames(X) <- c("snp_a", "snp_b", "snp_c")
-  result <- pecotmr:::corr_filter(X, cor_thres = 0.999)
-  expect_equal(colnames(result$X.new), colnames(X))
-})
-
-# ===========================================================================
-# remove_highcorr_snp (internal)
-# ===========================================================================
-
-test_that("remove_highcorr_snp returns X unchanged when no problematic columns", {
-  X <- matrix(rnorm(100), nrow = 20, ncol = 5)
-  colnames(X) <- paste0("v", 1:5)
-  result <- pecotmr:::remove_highcorr_snp(X, problematic_cols = character(0), strategy = "correlation")
-  expect_equal(ncol(result), 5)
-})
-
-test_that("remove_highcorr_snp removes single problematic column", {
-  X <- matrix(rnorm(100), nrow = 20, ncol = 5)
-  colnames(X) <- paste0("v", 1:5)
-  result <- pecotmr:::remove_highcorr_snp(X, problematic_cols = "v3", strategy = "correlation")
-  expect_equal(ncol(result), 4)
-  expect_false("v3" %in% colnames(result))
-})
-
-test_that("remove_highcorr_snp variance strategy removes lowest variance column", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 3), nrow = n, ncol = 3)
-  colnames(X) <- c("low_var", "mid_var", "high_var")
-  X[, 1] <- X[, 1] * 0.01
-  X[, 3] <- X[, 3] * 10
-  result <- pecotmr:::remove_highcorr_snp(X, problematic_cols = c("low_var", "mid_var", "high_var"),
-                                           strategy = "variance")
-  expect_equal(ncol(result), 2)
-  expect_false("low_var" %in% colnames(result))
-})
-
-test_that("remove_highcorr_snp correlation strategy with two columns removes one randomly", {
-  set.seed(42)
-  X <- matrix(rnorm(100), nrow = 20, ncol = 5)
-  colnames(X) <- paste0("v", 1:5)
-  result <- pecotmr:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2"), strategy = "correlation")
-  expect_equal(ncol(result), 4)
-  expect_true(xor("v1" %in% colnames(result), "v2" %in% colnames(result)) ||
-              (!("v1" %in% colnames(result)) && !("v2" %in% colnames(result))) == FALSE)
-})
-
-test_that("remove_highcorr_snp correlation strategy with 3+ cols removes highest sum", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 4), nrow = n, ncol = 4)
-  colnames(X) <- paste0("v", 1:4)
-  X[, 2] <- X[, 1] + rnorm(n, sd = 0.01)
-  X[, 3] <- X[, 1] + rnorm(n, sd = 0.01)
-  result <- pecotmr:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2", "v3"),
-                                           strategy = "correlation")
-  expect_equal(ncol(result), 3)
-})
-
-test_that("remove_highcorr_snp response_correlation strategy removes lowest response corr", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 3), nrow = n, ncol = 3)
-  colnames(X) <- c("v1", "v2", "v3")
-  response <- X[, 1] * 2 + rnorm(n, sd = 0.1)
-  result <- pecotmr:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2", "v3"),
-                                           strategy = "response_correlation",
-                                           response = response)
-  expect_equal(ncol(result), 2)
-  expect_true("v1" %in% colnames(result))
-})
-
-test_that("remove_highcorr_snp errors on invalid strategy", {
-  X <- matrix(rnorm(100), nrow = 20, ncol = 5)
-  colnames(X) <- paste0("v", 1:5)
-  expect_error(
-    pecotmr:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2"),
-                                   strategy = "invalid_strategy"),
-    "arg"
-  )
-})
-
-test_that("remove_highcorr_snp preserves column name when single column remains", {
-  set.seed(42)
-  X <- matrix(rnorm(40), nrow = 20, ncol = 2)
-  colnames(X) <- c("keeper", "removed")
-  result <- pecotmr:::remove_highcorr_snp(X, problematic_cols = "removed", strategy = "correlation")
-  expect_equal(ncol(result), 1)
-  expect_equal(colnames(result), "keeper")
-})
-
-# ===========================================================================
-# check_remove_highcorr_snp (internal)
-# ===========================================================================
-
-test_that("check_remove_highcorr_snp returns full-rank matrix when already full rank", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 3), nrow = n, ncol = 3)
-  colnames(X) <- paste0("v", 1:3)
-  C <- matrix(rnorm(n * 2), nrow = n, ncol = 2)
-  result <- pecotmr:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation")
-  expect_true(is.matrix(result))
-  expect_true(ncol(result) >= 1)
-})
-
-test_that("check_remove_highcorr_snp handles rank-deficient design via corr_filter fallback", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 4), nrow = n, ncol = 4)
-  colnames(X) <- paste0("v", 1:4)
-  X[, 4] <- X[, 1] + X[, 2]
-  C <- NULL
-  result <- pecotmr:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation")
-  design <- cbind(1, result)
-  expect_equal(qr(design)$rank, ncol(design))
-})
-
-test_that("check_remove_highcorr_snp preserves colname for single-column input", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n), nrow = n, ncol = 1)
-  colnames(X) <- "only_snp"
-  C <- NULL
-  result <- pecotmr:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation")
-  expect_equal(colnames(result), "only_snp")
-})
-
-# ===========================================================================
-# calculate_coef_heterogeneity (internal)
-# ===========================================================================
-
-test_that("calculate_coef_heterogeneity computes log(sd/mean)", {
-  df <- data.frame(
-    variant_id = c("v1", "v2"),
-    coef_qr_0.25 = c(1, 0),
-    coef_qr_0.50 = c(2, 0),
-    coef_qr_0.75 = c(3, 0),
-    stringsAsFactors = FALSE
-  )
-  result <- pecotmr:::calculate_coef_heterogeneity(df)
-  expect_equal(nrow(result), 2)
-  expect_true("coef_heter" %in% colnames(result))
-  expect_equal(result$coef_heter[1], log(1 / 2), tolerance = 0.01)
-  expect_true(is.na(result$coef_heter[2]))
-})
-
-test_that("calculate_coef_heterogeneity handles NA values", {
-  df <- data.frame(
-    variant_id = "v1",
-    coef_qr_0.25 = 1,
-    coef_qr_0.50 = NA,
-    coef_qr_0.75 = 3,
-    stringsAsFactors = FALSE
-  )
-  result <- pecotmr:::calculate_coef_heterogeneity(df)
-  expect_equal(nrow(result), 1)
-  expect_true(is.finite(result$coef_heter[1]) || is.na(result$coef_heter[1]))
-})
-
-test_that("calculate_coef_heterogeneity handles all-same coefficients", {
-  df <- data.frame(
-    variant_id = "v1",
-    coef_qr_0.25 = 5,
-    coef_qr_0.50 = 5,
-    coef_qr_0.75 = 5,
-    stringsAsFactors = FALSE
-  )
-  result <- pecotmr:::calculate_coef_heterogeneity(df)
-  expect_true(is.infinite(result$coef_heter[1]) || is.na(result$coef_heter[1]))
-})
-
-test_that("calculate_coef_heterogeneity handles negative coefficients", {
-  df <- data.frame(
-    variant_id = "v1",
-    coef_qr_0.25 = -1,
-    coef_qr_0.50 = -2,
-    coef_qr_0.75 = -3,
-    stringsAsFactors = FALSE
-  )
-  result <- pecotmr:::calculate_coef_heterogeneity(df)
-  expect_equal(result$coef_heter[1], log(1 / 2), tolerance = 0.01)
-})
-
-# ===========================================================================
-# calculate_xi_correlation
-# ===========================================================================
-
-test_that("calculate_xi_correlation computes xi for valid monotonic data", {
-  skip_if_not_installed("XICOR")
-  df <- data.frame(variant_id = "v1")
-  tau_range <- seq(0.05, 0.95, by = 0.05)
-  for (tau in tau_range) {
-    df[[paste0("coef_qr_", tau)]] <- tau * 2
-  }
-  result <- calculate_xi_correlation(df)
-  expect_true("xi" %in% colnames(result))
-  expect_true(is.numeric(result$xi))
-})
-
-test_that("calculate_xi_correlation warns on missing columns", {
-  skip_if_not_installed("XICOR")
-  df <- data.frame(variant_id = "v1", some_col = 1)
-  expect_warning(result <- calculate_xi_correlation(df))
-  expect_true(is.na(result$xi))
-})
-
-test_that("calculate_xi_correlation returns NA for too few valid values", {
-  skip_if_not_installed("XICOR")
-  df <- data.frame(variant_id = "v1")
-  tau_range <- seq(0.1, 0.9, by = 0.05)
-  for (tau in tau_range) {
-    df[[paste0("coef_qr_", tau)]] <- NA
-  }
-  df$coef_qr_0.1 <- 1
-  result <- calculate_xi_correlation(df, min_valid = 10)
-  expect_true(is.na(result$xi))
-})
-
-test_that("calculate_xi_correlation handles multiple variants", {
-  skip_if_not_installed("XICOR")
-  df <- data.frame(variant_id = c("v1", "v2"))
-  tau_range <- seq(0.1, 0.9, by = 0.05)
-  for (tau in tau_range) {
-    df[[paste0("coef_qr_", tau)]] <- c(tau * 2, tau * -1)
-  }
-  result <- calculate_xi_correlation(df, tau_range = tau_range)
-  expect_equal(nrow(result), 2)
-  expect_true("xi" %in% colnames(result))
-  expect_true("xi_pval" %in% colnames(result))
-})
-
-test_that("calculate_xi_correlation handles error in xicor gracefully", {
-  skip_if_not_installed("XICOR")
-  df <- data.frame(variant_id = "v1")
-  tau_range <- seq(0.1, 0.9, by = 0.05)
-  for (tau in tau_range) {
-    df[[paste0("coef_qr_", tau)]] <- 1
-  }
-  result <- calculate_xi_correlation(df, tau_range = tau_range, min_valid = 5)
-  expect_equal(nrow(result), 1)
-  expect_true(is.numeric(result$xi) || is.na(result$xi))
-})
-
-# ===========================================================================
-# qr_screen
-# ===========================================================================
-
-test_that("qr_screen runs and returns results with quantreg", {
-  skip_if_not_installed("quantreg")
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 5), nrow = n, ncol = 5)
-  colnames(X) <- paste0("chr1:", seq(100, 500, by = 100), ":A:G")
-  Y <- matrix(rnorm(n), ncol = 1)
-  colnames(Y) <- "pheno1"
-  result <- qr_screen(X, Y, tau.list = c(0.25, 0.5, 0.75))
-  expect_type(result, "list")
-})
-
-test_that("qr_screen with fdr screen_method works", {
-  skip_if_not_installed("quantreg")
-  set.seed(42)
-  n <- 50; p <- 5
-  X <- matrix(rnorm(n * p), nrow = n, ncol = p)
-  colnames(X) <- paste0("chr1:", seq(100, 500, by = 100), ":A:G")
-  Y <- matrix(rnorm(n), ncol = 1)
-  colnames(Y) <- "pheno1"
-  result <- qr_screen(X, Y, tau.list = c(0.25, 0.5, 0.75), screen_method = "fdr")
-  expect_type(result, "list")
-  expect_true("df_result" %in% names(result))
-  expect_true("fdr_p_qr" %in% colnames(result$df_result))
-})
-
-test_that("qr_screen errors with invalid screen_method", {
-  skip_if_not_installed("quantreg")
-  set.seed(42)
-  n <- 30; p <- 3
-  X <- matrix(rnorm(n * p), nrow = n, ncol = p)
-  colnames(X) <- paste0("chr1:", seq(100, 300, by = 100), ":A:G")
-  Y <- matrix(rnorm(n), ncol = 1)
-  colnames(Y) <- "pheno1"
-  expect_error(qr_screen(X, Y, tau.list = c(0.5), screen_method = "invalid_method"),
-               "Invalid screen_method")
-})
-
-test_that("qr_screen with covariates Z", {
-  skip_if_not_installed("quantreg")
-  set.seed(42)
-  n <- 50; p <- 3
-  X <- matrix(rnorm(n * p), nrow = n, ncol = p)
-  colnames(X) <- paste0("chr1:", seq(100, 300, by = 100), ":A:G")
-  Y <- matrix(rnorm(n), ncol = 1)
-  colnames(Y) <- "pheno1"
-  Z <- matrix(rnorm(n * 2), nrow = n, ncol = 2)
-  result <- qr_screen(X, Y, Z = Z, tau.list = c(0.25, 0.5, 0.75))
-  expect_type(result, "list")
-  expect_equal(nrow(result$df_result), p)
-})
-
-test_that("qr_screen with single variant", {
-  skip_if_not_installed("quantreg")
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n), nrow = n, ncol = 1)
-  colnames(X) <- "chr1:100:A:G"
-  Y <- matrix(rnorm(n), ncol = 1)
-  colnames(Y) <- "pheno1"
-  result <- qr_screen(X, Y, tau.list = c(0.25, 0.5, 0.75))
-  expect_type(result, "list")
-  expect_equal(nrow(result$df_result), 1)
-})
-
-# ===========================================================================
-# perform_qr_analysis
-# ===========================================================================
-
-test_that("perform_qr_analysis returns wide-format results", {
-  skip_if_not_installed("quantreg")
-  set.seed(42)
-  n <- 50; p <- 3
-  X <- matrix(rnorm(n * p), nrow = n, ncol = p)
-  colnames(X) <- paste0("chr1:", seq(100, 300, by = 100), ":A:G")
-  Y <- matrix(rnorm(n), ncol = 1)
-  colnames(Y) <- "pheno1"
-  result <- perform_qr_analysis(X, Y, tau_values = c(0.25, 0.5, 0.75))
-  expect_true(is.data.frame(result))
-  expect_equal(nrow(result), p)
-  expect_true("coef_qr_0.25" %in% colnames(result))
-  expect_true("coef_qr_0.5" %in% colnames(result))
-  expect_true("coef_qr_0.75" %in% colnames(result))
-  expect_true("chr" %in% colnames(result))
-  expect_true("pos" %in% colnames(result))
-})
-
-test_that("perform_qr_analysis with covariates Z", {
-  skip_if_not_installed("quantreg")
-  set.seed(42)
-  n <- 50; p <- 2
-  X <- matrix(rnorm(n * p), nrow = n, ncol = p)
-  colnames(X) <- paste0("chr1:", seq(100, 200, by = 100), ":A:G")
-  Y <- matrix(rnorm(n), ncol = 1)
-  colnames(Y) <- "pheno1"
-  Z <- matrix(rnorm(n * 2), nrow = n, ncol = 2)
-  result <- perform_qr_analysis(X, Y, Z = Z, tau_values = c(0.5))
-  expect_equal(nrow(result), p)
-  expect_true("coef_qr_0.5" %in% colnames(result))
-})
-
-test_that("perform_qr_analysis with single variant", {
-  skip_if_not_installed("quantreg")
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n), nrow = n, ncol = 1)
-  colnames(X) <- "chr1:100:A:G"
-  Y <- matrix(rnorm(n), ncol = 1)
-  colnames(Y) <- "pheno1"
-  result <- perform_qr_analysis(X, Y, tau_values = c(0.25, 0.5))
-  expect_equal(nrow(result), 1)
-})
-
-# ===========================================================================
-# multicontext_ld_clumping
-# ===========================================================================
-
-test_that("multicontext_ld_clumping returns empty result when no significant SNPs", {
-  qr_results <- list(sig.SNPs_names = character(0))
-  X <- matrix(rnorm(100), nrow = 20, ncol = 5)
-  result <- multicontext_ld_clumping(X, qr_results)
-  expect_null(result$final_SNPs)
-  expect_null(result$clumped_SNPs)
-  expect_true(grepl("No significant", result$message))
-})
-
-test_that("multicontext_ld_clumping returns early when X has single column", {
-  X <- matrix(rnorm(20), nrow = 20, ncol = 1)
-  colnames(X) <- "chr1:100:A:G"
-  qr_results <- list(sig.SNPs_names = "chr1:100:A:G")
-  result <- multicontext_ld_clumping(X, qr_results)
-  expect_equal(result$final_SNPs, "chr1:100:A:G")
-  expect_true(grepl("Only one SNP", result$message))
-})
-
-# ===========================================================================
-# quantile_twas_weight_pipeline (mocked)
-# ===========================================================================
-
-test_that("quantile_twas_weight_pipeline returns early when no significant SNPs and screen_significant=TRUE", {
-  skip_if_not_installed("quantreg")
-  set.seed(42)
-  n <- 50; p <- 3
-  X <- matrix(rnorm(n * p), nrow = n, ncol = p)
-  colnames(X) <- paste0("chr1:", seq(100, 300, by = 100), ":A:G")
-  Y <- matrix(rnorm(n), ncol = 1)
-  colnames(Y) <- "pheno1"
-
-  local_mocked_bindings(
-    QUAIL_rank_score_pipeline = function(...) list(rank_score = matrix(rnorm(n), ncol = 1)),
-    QUAIL_pipeline = function(...) list(vqtl = "mocked"),
-    qr_screen = function(...) {
-      list(
-        df_result = data.frame(variant_id = colnames(X)),
-        sig_SNP_threshold = integer(0),
-        sig.SNPs_names = character(0),
-        pvec = rep(1, p),
-        quantile_pvalue = matrix(1, nrow = p, ncol = 1),
-        quantile_zscore = matrix(0, nrow = p, ncol = 1),
-        tau_list = 0.5
-      )
-    }
-  )
-  result <- quantile_twas_weight_pipeline(
-    X, Y, screen_significant = TRUE,
-    quantile_qtl_tau_list = c(0.5),
-    quantile_twas_tau_list = c(0.5)
-  )
-  expect_true(grepl("No significant", result$message))
-  expect_true("qr_screen_pvalue_df" %in% names(result))
-})
-
-test_that("quantile_twas_weight_pipeline returns early when no raw p_qr < 0.05", {
-  skip_if_not_installed("quantreg")
-  set.seed(42)
-  n <- 50; p <- 3
-  X <- matrix(rnorm(n * p), nrow = n, ncol = p)
-  colnames(X) <- paste0("chr1:", seq(100, 300, by = 100), ":A:G")
-  Y <- matrix(rnorm(n), ncol = 1)
-  colnames(Y) <- "pheno1"
-
-  local_mocked_bindings(
-    QUAIL_rank_score_pipeline = function(...) list(rank_score = matrix(rnorm(n), ncol = 1)),
-    QUAIL_pipeline = function(...) list(vqtl = "mocked"),
-    qr_screen = function(...) {
-      pvec <- rep(0.5, p)
-      names(pvec) <- colnames(X)
-      list(
-        df_result = data.frame(variant_id = colnames(X)),
-        sig_SNP_threshold = 1:p,
-        sig.SNPs_names = colnames(X),
-        pvec = pvec,
-        quantile_pvalue = matrix(0.5, nrow = p, ncol = 1),
-        quantile_zscore = matrix(0, nrow = p, ncol = 1),
-        tau_list = 0.5
-      )
-    },
-    perform_qr_analysis = function(...) {
-      data.frame(
-        variant_id = colnames(X),
-        coef_qr_0.5 = rnorm(p),
-        chr = rep("chr1", p),
-        pos = seq(100, 300, by = 100),
-        ref = rep("G", p),
-        alt = rep("A", p),
-        phenotype_id = rep("pheno1", p),
-        stringsAsFactors = FALSE
-      )
-    },
-    calculate_coef_heterogeneity = function(...) {
-      data.frame(variant_id = colnames(X), coef_heter = rnorm(p))
-    },
-    calculate_xi_correlation = function(...) {
-      data.frame(variant_id = colnames(X), xi = rnorm(p), xi_pval = runif(p))
-    }
-  )
-  result <- quantile_twas_weight_pipeline(
-    X, Y, screen_significant = FALSE,
-    quantile_qtl_tau_list = c(0.5),
-    quantile_twas_tau_list = c(0.5)
-  )
-  # expect_true(grepl("No variants with raw", result$message))
-})
diff --git a/tests/testthat/test_twas.R b/tests/testthat/test_twas.R
index 9b6e2fdf..7dff17ef 100644
--- a/tests/testthat/test_twas.R
+++ b/tests/testthat/test_twas.R
@@ -1651,7 +1651,7 @@ test_that("twas_pipeline: pick_best_model skips context when no model passes thr
 })
 
 # NOTE: Removed test "twas_pipeline: rsq_cutoff=0 skips best model selection entirely"
-# When rsq_cutoff=0 in non-quantile mode, the source code assigns NA model_selection
+# When rsq_cutoff=0, the source code assigns NA model_selection
 # instead of a list structure, causing the is_imputable column to be missing in the
 # downstream data frame construction. This is a known source code issue.
 
@@ -1904,7 +1904,7 @@ test_that("twas_pipeline: empty twas_variants intersection returns empty data fr
 # ===========================================================================
 
 # NOTE: Removed test "twas_pipeline: missing data_type in twas_weights_data gets assigned NA"
-# When rsq_cutoff=0 in non-quantile mode, the source code assigns NA model_selection
+# When rsq_cutoff=0, the source code assigns NA model_selection
 # instead of a list structure, causing the is_imputable column to be missing in the
 # downstream data frame construction. This is a known source code issue.
 
@@ -2108,73 +2108,6 @@ test_that("twas_pipeline: mr_result is returned in final result", {
   expect_true("mr_result" %in% names(result))
 })
 
-# ===========================================================================
-# twas_pipeline: quantile_twas mode
-# ===========================================================================
-
-test_that("twas_pipeline: quantile_twas=TRUE sets rsq_cutoff to 0 and skips model selection", {
-  set.seed(501)
-  variant_ids <- make_variant_ids()
-  p <- length(variant_ids)
-  weights_mat <- make_weights_matrix(variant_ids)
-
-  twas_weights_data <- make_twas_weights_data(
-    molecular_id = "gene1",
-    contexts = "ctx1"
-  )
-  # Add quantile-specific performance data
-  twas_weights_data$gene1$twas_cv_performance$ctx1$susie_performance <- data.frame(
-    quantile_start = 0, quantile_end = 0.25, pseudo_R2_avg = 0.15,
-    rsq = 0.5, adj_rsq = 0.45, pval = 0.01, adj_rsq_pval = 0.02
-  )
-  twas_weights_data$gene1$twas_cv_performance$ctx1$lasso_performance <- data.frame(
-    quantile_start = 0, quantile_end = 0.25, pseudo_R2_avg = 0.12,
-    rsq = 0.3, adj_rsq = 0.25, pval = 0.05, adj_rsq_pval = 0.06
-  )
-
-  LD_matrix <- diag(p)
-  rownames(LD_matrix) <- colnames(LD_matrix) <- variant_ids
-
-  mock_twas_data_qced <- list(
-    gene1 = list(
-      chrom = 1,
-      data_type = list(ctx1 = "expression"),
-      variant_names = list(ctx1 = list(study1 = variant_ids)),
-      weights_qced = list(ctx1 = list(study1 = list(
-        scaled_weights = weights_mat,
-        weights = weights_mat
-      ))),
-      gwas_qced = list(study1 = data.frame(
-        variant_id = variant_ids,
-        z = rnorm(p),
-        stringsAsFactors = FALSE
-      )),
-      LD = LD_matrix
-    )
-  )
-
-  local_mocked_bindings(
-    harmonize_twas = function(...) {
-      list(twas_data_qced = mock_twas_data_qced, ref_panel = data.frame())
-    }
-  )
-
-  result <- suppressMessages(twas_pipeline(
-    twas_weights_data = twas_weights_data,
-    ld_meta_file_path = "fake_ld.tsv",
-    gwas_meta_file = "fake_gwas.tsv",
-    region_block = "chr1_100_500",
-    ld_reference_sample_size = 17000,
-    quantile_twas = TRUE
-  ))
-
-  expect_true(is.list(result))
-  # In quantile mode the output should have twas_result
-  if (!is.null(result$twas_result)) {
-    expect_true("method" %in% colnames(result$twas_result))
-  }
-})
-
 # ===========================================================================
 # twas_pipeline: nrow(twas_results_table) == 0 returns NULL results
 # ===========================================================================
@@ -2614,72 +2547,6 @@ test_that("twas_pipeline: rsq_pval_option='adj_rsq_pval' uses the correct p-valu
   # No model should pass -> no imputable genes
 })
 
-# ===========================================================================
-# twas_pipeline: format_twas_data internal function -- quantile_twas path
-# ===========================================================================
-
-test_that("twas_pipeline: quantile_twas with output_twas_data exercises quantile format_twas_data", {
-  set.seed(1001)
-  variant_ids <- make_variant_ids()
-  p <- length(variant_ids)
-  weights_mat <- make_weights_matrix(variant_ids)
-
-  twas_weights_data <- make_twas_weights_data(
-    molecular_id = "gene1",
-    contexts = "ctx1"
-  )
-  twas_weights_data$gene1$twas_cv_performance$ctx1$susie_performance <- data.frame(
-    quantile_start = 0, quantile_end = 0.25, pseudo_R2_avg = 0.15,
-    rsq = 0.5, adj_rsq = 0.45, pval = 0.01, adj_rsq_pval = 0.02
-  )
-  twas_weights_data$gene1$twas_cv_performance$ctx1$lasso_performance <- data.frame(
-    quantile_start = 0, quantile_end = 0.25, pseudo_R2_avg = 0.12,
-    rsq = 0.3, adj_rsq = 0.25, pval = 0.05, adj_rsq_pval = 0.06
-  )
-
-  LD_matrix <- diag(p)
-  rownames(LD_matrix) <- colnames(LD_matrix) <- variant_ids
-
-  mock_twas_data_qced <- list(
-    gene1 = list(
-      chrom = 1,
-      data_type = list(ctx1 = "expression"),
-      variant_names = list(ctx1 = list(study1 = variant_ids)),
-      weights_qced = list(ctx1 = list(study1 = list(
-        scaled_weights = weights_mat,
-        weights = weights_mat
-      ))),
-      gwas_qced = list(study1 = data.frame(
-        variant_id = variant_ids,
-        z = rnorm(p),
-        stringsAsFactors = FALSE
-      )),
-      LD = LD_matrix
-    )
-  )
-
-  local_mocked_bindings(
-    harmonize_twas = function(...) {
-      list(twas_data_qced = mock_twas_data_qced, ref_panel = data.frame())
-    }
-  )
-
-  result <- suppressMessages(twas_pipeline(
-    twas_weights_data = twas_weights_data,
-    ld_meta_file_path = "fake_ld.tsv",
-    gwas_meta_file = "fake_gwas.tsv",
-    region_block = "chr1_100_500",
-    ld_reference_sample_size = 17000,
-    quantile_twas = TRUE,
-    output_twas_data = TRUE
-  ))
-
-  expect_true(is.list(result))
-  if (!is.null(result$twas_result)) {
-    expect_true("method" %in% colnames(result$twas_result))
-  }
-})
-
 # ===========================================================================
 # Tests from test_twas_predict.R
 # ===========================================================================
@@ -3112,75 +2979,6 @@ test_that("twas_pipeline: adj_rsq_pval option exercised in pick_best_model", {
   }
 })
 
-# ===========================================================================
-# SECTION Q: twas_pipeline - quantile_twas code paths in Step 2 merge
-# ===========================================================================
-
-test_that("twas_pipeline: quantile_twas=TRUE with proper cv data triggers quantile merge path", {
-  set.seed(902)
-  variant_ids <- make_variant_ids()
-  p <- length(variant_ids)
-  weights_mat <- make_weights_matrix(variant_ids)
-
-  twas_weights_data <- make_twas_weights_data(
-    molecular_id = "gene1",
-    contexts = "ctx1",
-    methods = c("susie_weights", "lasso_weights")
-  )
-  # Add quantile-specific performance columns
-  twas_weights_data$gene1$twas_cv_performance$ctx1$susie_performance <- data.frame(
-    quantile_start = 0, quantile_end = 0.25, pseudo_R2_avg = 0.15,
-    rsq = 0.5, adj_rsq = 0.45, pval = 0.01, adj_rsq_pval = 0.02
-  )
-  twas_weights_data$gene1$twas_cv_performance$ctx1$lasso_performance <- data.frame(
-    quantile_start = 0, quantile_end = 0.25, pseudo_R2_avg = 0.12,
-    rsq = 0.3, adj_rsq = 0.25, pval = 0.05, adj_rsq_pval = 0.06
-  )
-
-  LD_matrix <- diag(p)
-  rownames(LD_matrix) <- colnames(LD_matrix) <- variant_ids
-
-  mock_twas_data_qced <- list(
-    gene1 = list(
-      chrom = 1,
-      data_type = list(ctx1 = "expression"),
-      variant_names = list(ctx1 = list(study1 = variant_ids)),
-      weights_qced = list(ctx1 = list(study1 = list(
-        scaled_weights = weights_mat,
-        weights = weights_mat
-      ))),
-      gwas_qced = list(study1 = data.frame(
-        variant_id = variant_ids,
-        z = rnorm(p),
-        stringsAsFactors = FALSE
-      )),
-      LD = LD_matrix
-    )
-  )
-
-  local_mocked_bindings(
-    harmonize_twas = function(...) {
-      list(twas_data_qced = mock_twas_data_qced, ref_panel = data.frame())
-    }
-  )
-
-  result <- suppressMessages(twas_pipeline(
-    twas_weights_data = twas_weights_data,
-    ld_meta_file_path = "fake_ld.tsv",
-    gwas_meta_file = "fake_gwas.tsv",
-    region_block = "chr1_100_500",
-    ld_reference_sample_size = 17000,
-    quantile_twas = TRUE
-  ))
-
-  expect_true(is.list(result))
-  if (!is.null(result$twas_result)) {
-    # Quantile mode should have quantile_start/quantile_end columns
-    expect_true("quantile_start" %in% colnames(result$twas_result) ||
-                "method" %in% colnames(result$twas_result))
-  }
-})
-
 # ===========================================================================
 # SECTION R: twas_pipeline - data_type missing from twas_weights_data (line 614)
 # ===========================================================================

From 689add0a050e3110305c1a3b44a115539d1af404 Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Fri, 17 Apr 2026 13:51:29 -0700
Subject: [PATCH 2/5] clean up mash_wrapper, TWAS, finemapping reg. regression
 pipelines

---
 .Rbuildignore                      |   1 +
 .github/recipe/recipe.yaml         |   5 +-
 .gitignore                         |   1 +
 DESCRIPTION                        |   3 +-
 NAMESPACE                          |  12 -
 R/mash_wrapper.R                   | 439 +++++++++++------------------
 R/multigene_udr.R                  |  88 ------
 R/plot.R                           | 129 ---------
 R/regularized_regression.R         |  95 +++----
 R/susie_wrapper.R                  |  83 +++---
 R/twas.R                           | 153 +++++-----
 R/twas_weights.R                   | 115 ++++----
 R/univariate_pipeline.R            |  89 +++---
 man/corr_filter.Rd                 |  19 --
 man/manhattan_plot.Rd              |  20 --
 man/multigene_udr.Rd               |  37 ---
 man/twas_pipeline.Rd               |   1 -
 man/venn.Rd                        |  17 --
 pixi.toml                          |   4 +-
 tests/testthat/test_LD.R           | 105 +++++++
 tests/testthat/test_file_utils.R   |  60 ++++
 tests/testthat/test_ld_loader.R    | 122 ++++++++
 tests/testthat/test_mash_wrapper.R |  34 +--
 tests/testthat/test_plot.R         |  26 --
 24 files changed, 706 insertions(+), 952 deletions(-)
 delete mode 100644 R/multigene_udr.R
 delete mode 100644 R/plot.R
 delete mode 100644 man/corr_filter.Rd
 delete mode 100644 man/manhattan_plot.Rd
 delete mode 100644 man/multigene_udr.Rd
 delete mode 100644 man/venn.Rd
 create mode 100644 tests/testthat/test_ld_loader.R
 delete mode 100644 tests/testthat/test_plot.R

diff --git a/.Rbuildignore b/.Rbuildignore
index a5c06f4a..b317f075 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -7,3 +7,4 @@
 ^pixi.lock
 ^\.pixi
 ^.covrignore
+^.claude
diff --git a/.github/recipe/recipe.yaml b/.github/recipe/recipe.yaml
index e279aac0..d93c8751 100644
--- a/.github/recipe/recipe.yaml
+++ b/.github/recipe/recipe.yaml
@@ -44,6 +44,7 @@ requirements:
     - r-gbj
     - r-glmnet
     - r-harmonicmeanp
+    - r-igraph
     - r-l0learn
     - r-magrittr
     - r-mashr
@@ -54,8 +55,6 @@ requirements:
     - r-pgenlibr
     - r-purrr
     - r-qgg
-    - r-quadprog
-    - r-quantreg
     - r-rcpp
     - r-rcpparmadillo
     - r-rcppdpr
@@ -68,7 +67,6 @@ requirements:
     - r-tidyr
     - r-vctrs
     - r-vroom
-    - r-xicor
   run:
     - bioconductor-biostrings
     - bioconductor-iranges
@@ -91,6 +89,7 @@ requirements:
     - r-gbj
     - r-glmnet
     - r-harmonicmeanp
+    - r-igraph
     - r-l0learn
     - r-magrittr
     - r-mashr
diff --git a/.gitignore b/.gitignore
index 8ec0282b..8f6dda7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@ pecotmr.Rproj
 /pixi.lock
 /.pixi
 docs
+.claude
diff --git a/DESCRIPTION b/DESCRIPTION
index 5b28239a..b81719cf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -25,6 +25,7 @@ Imports:
     dplyr,
     furrr,
     future,
+    igraph,
     magrittr,
     matrixStats,
     purrr,
@@ -55,7 +56,6 @@ Suggests:
     ncvreg,
     pgenlibr,
     qgg,
-    udr,
     qvalue,
     rmarkdown,
     snpStats,
@@ -64,7 +64,6 @@ Remotes:
     stephenslab/fsusieR,
     stephenslab/mvsusieR,
     stephenslab/susieR,
-    stephenslab/udr
 LinkingTo:
     Rcpp,
     RcppArmadillo
diff --git a/NAMESPACE b/NAMESPACE
index 1dc4b07f..faf1e899 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -75,7 +75,6 @@ export(mr_format)
 export(mrash_weights)
 export(mrmash_weights)
 export(mrmash_wrapper)
-export(multigene_udr)
 export(multivariate_analysis_pipeline)
 export(mvsusie_weights)
 export(normalize_variant_id)
@@ -127,11 +126,8 @@ importFrom(IRanges,reduce)
 importFrom(IRanges,start)
 importFrom(S4Vectors,queryHits)
 importFrom(S4Vectors,subjectHits)
-importFrom(bigsnpr,snp_clumping)
-importFrom(bigstatsr,FBM.code256)
 importFrom(coloc,coloc.bf_bf)
 importFrom(doFuture,registerDoFuture)
-importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)
 importFrom(dplyr,any_of)
@@ -162,7 +158,6 @@ importFrom(future,multicore)
 importFrom(future,plan)
 importFrom(magrittr,"%>%")
 importFrom(matrixStats,colVars)
-importFrom(parallel,mclapply)
 importFrom(purrr,compact)
 importFrom(purrr,exec)
 importFrom(purrr,map)
@@ -177,20 +172,14 @@ importFrom(readr,cols)
 importFrom(readr,parse_number)
 importFrom(readr,read_delim)
 importFrom(rlang,"!!!")
-importFrom(stats,as.dist)
 importFrom(stats,coef)
 importFrom(stats,cor)
-importFrom(stats,cov)
-importFrom(stats,cutree)
-importFrom(stats,hclust)
 importFrom(stats,lm.fit)
 importFrom(stats,pchisq)
 importFrom(stats,pnorm)
 importFrom(stats,predict)
-importFrom(stats,rnorm)
 importFrom(stats,sd)
 importFrom(stats,setNames)
-importFrom(stats,var)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_remove)
 importFrom(stringr,str_replace)
@@ -205,7 +194,6 @@ importFrom(susieR,susie_rss)
 importFrom(susieR,univariate_regression)
 importFrom(tibble,as_tibble)
 importFrom(tibble,tibble)
-importFrom(tidyr,pivot_wider)
 importFrom(tidyr,replace_na)
 importFrom(tidyr,separate)
 importFrom(tools,file_path_sans_ext)
diff --git a/R/mash_wrapper.R b/R/mash_wrapper.R
index f31baa1c..55c5c5d2 100644
--- a/R/mash_wrapper.R
+++ b/R/mash_wrapper.R
@@ -1,3 +1,11 @@
+# Filter rows of a z-score matrix by significance p-value cutoff.
+# Returns integer indices of rows where any |z| exceeds the threshold.
+# @noRd
+filter_by_significance <- function(z_matrix, sig_p_cutoff) {
+  z_threshold <- sqrt(qchisq(sig_p_cutoff, df = 1, lower.tail = FALSE))
+  which(apply(z_matrix, 1, function(row) any(abs(row) >= z_threshold)))
+}
+
 #' @importFrom vroom vroom
 #' @export
 filter_invalid_summary_stat <- function(dat_list, bhat = NULL, sbhat = NULL, z = NULL, btoz = FALSE, sig_p_cutoff = 1E-6, filter_by_missing_rate = 0.2) {
@@ -41,9 +49,7 @@ filter_invalid_summary_stat <- function(dat_list, bhat = NULL, sbhat = NULL, z =
     }
     if ("strong.z" %in% names(dat_list)) {
       if (!is.null(sig_p_cutoff)) {
-        chi_square_stat <- qchisq(sig_p_cutoff, df = 1, lower.tail = FALSE)
-        z_score <- sqrt(chi_square_stat)
-        keep_index <- which(apply(dat_list$strong.z, 1, function(row) any(abs(row) >= z_score)))
+        keep_index <- filter_by_significance(dat_list$strong.z, sig_p_cutoff)
         dat_list[["strong.z"]] <- dat_list$strong.z[keep_index, ]
         dat_list[["strong.b"]] <- dat_list$strong.b[keep_index, ]
         dat_list[["strong.s"]] <- dat_list$strong.s[keep_index, ]
@@ -64,23 +70,15 @@ filter_invalid_summary_stat <- function(dat_list, bhat = NULL, sbhat = NULL, z =
     }
 
     # Process each component if it exists
-    if (!is.null(dat_list$strong) && !is.null(dat_list$strong$z)) {
-      dat_list$strong$z <- process_z(dat_list$strong$z)
-    }
-
-    if (!is.null(dat_list$random) && !is.null(dat_list$random$z)) {
-      dat_list$random$z <- process_z(dat_list$random$z)
-    }
-
-    if (!is.null(dat_list$null) && !is.null(dat_list$null$z)) {
-      dat_list$null$z <- process_z(dat_list$null$z)
+    for (comp in c("strong", "random", "null")) {
+      if (!is.null(dat_list[[comp]]) && !is.null(dat_list[[comp]]$z)) {
+        dat_list[[comp]]$z <- process_z(dat_list[[comp]]$z)
+      }
     }
 
     # Apply significance cutoff to strong signals if applicable
     if (!is.null(dat_list$strong) && !is.null(dat_list$strong$z) && !is.null(sig_p_cutoff)) {
-      chi_square_stat <- qchisq(sig_p_cutoff, df = 1, lower.tail = FALSE)
-      z_score <- sqrt(chi_square_stat)
-      keep_index <- which(apply(dat_list$strong$z, 1, function(row) any(abs(row) >= z_score)))
+      keep_index <- filter_by_significance(dat_list$strong$z, sig_p_cutoff)
       dat_list$strong$z <- dat_list$strong$z[keep_index, , drop = FALSE]
     }
   }
@@ -106,20 +104,13 @@ filter_mixture_components <- function(conditions_to_keep, U, w = NULL, w_cutoff
   }, conditions_to_keep)
 
   # Remove matrices where all values are zero or weight is below cutoff
-  for (mat_name in names(U)) {
-    if (all(U[[mat_name]] == 0)) {
-      U[[mat_name]] <- NULL
-      if (!is.null(w)) {
-        w <- w[!names(w) %in% mat_name]
-      }
-      next
-    }
-    if (!is.null(w)) {
-      if (w[mat_name] < w_cutoff) {
-        w <- w[!names(w) %in% mat_name]
-        U[[mat_name]] <- NULL
-      }
-    }
+  keep_names <- names(purrr::keep(U, function(mat) !all(mat == 0)))
+  if (!is.null(w)) {
+    keep_names <- intersect(keep_names, names(w[w >= w_cutoff]))
+  }
+  U <- U[keep_names]
+  if (!is.null(w)) {
+    w <- w[keep_names]
   }
 
   # Note: Matrices in U may contain very small values on the diagonal
@@ -329,169 +320,103 @@ merge_susie_cs <- function(susie_fit, coverage = "cs_coverage_0.95", complementa
     }
     return(overlap_sets)
   }
-  # Merge overlapping credible sets and update the credible sets in
-  # variants_list
+  # Merge overlapping credible sets using graph connected components
   merge_and_update_overlap_sets <- function(variants_sets_and_pips_list, overlap_sets) {
-    # Combine and identify unique combined sets
-    combined_sets <- unique(unlist(lapply(overlap_sets, function(x) {
-      paste(sort(x),
-        collapse = ","
-      )
-    })))
-    unique_combined_sets <- unique(combined_sets)
-
-    # Split each combined set into individual sets
-    split_sets <- lapply(unique_combined_sets, function(x) strsplit(x, ",")[[1]])
-
-    # Identify and merge overlapping credible sets
-    if (length(split_sets) != 1) {
-      for (i in 1:(length(split_sets) - 1)) {
-        for (j in (i + 1):length(split_sets)) {
-          if (!is.null(split_sets[[i]]) && !is.null(split_sets[[j]])) {
-            # Check both sets exist
-            if (length(intersect(split_sets[[i]], split_sets[[j]])) > 0) {
-              # Merge overlapping sets Update both i-th and j-th elements with
-              # the merged set
-              split_sets[[i]] <- unique(c(split_sets[[i]], split_sets[[j]]))
-              split_sets[[j]] <- unique(c(split_sets[[i]], split_sets[[j]]))
-            }
-          }
-        }
-      }
-    }
-    # Eliminate duplicates from the list of sets Convert each set into a string
-    # to facilitate comparison
-    set_strings <- sapply(split_sets, function(set) paste(sort(set), collapse = ","))
-    # Identify unique sets based on their string representation
-    unique_set_strings <- unique(set_strings)
-    # Retain only the largest combined sets
-    final_combined_sets <- character()
-    for (set in unique_set_strings) {
-      if (length(unique_set_strings) == 1) {
-        final_combined_sets <- unique_set_strings[[1]]
-      } else {
-        included_in_other_set <- FALSE
-        set_elements <- unlist(strsplit(set, ","))
-        for (other_set in unique_set_strings) {
-          if (set != other_set && all(set_elements %in% unlist(strsplit(
-            other_set,
-            ","
-          )))) {
-            included_in_other_set <- TRUE
-            break
-          }
-        }
-        if (!included_in_other_set) {
-          final_combined_sets <- c(final_combined_sets, set)
-        }
-      }
-    }
+    # Collect all unique set names from overlap_sets
+    all_sets <- unique(unlist(overlap_sets))
+    if (length(all_sets) == 0) return(list())
+
+    # Build edges: each overlap_sets entry links its sets together
+    edges <- unlist(lapply(overlap_sets, function(sets) {
+      if (length(sets) < 2) return(character(0))
+      pairs <- combn(sets, 2)
+      as.character(pairs)
+    }))
 
-    # Create a mapping from original set names to combined set names
+    g <- igraph::make_graph(edges, directed = FALSE)
+    comp <- igraph::components(g)
+
+    # Build mapping: each set name -> comma-separated merged component label
     set_name_map <- list()
-    for (combined_set in final_combined_sets) {
-      original_sets <- unlist(strsplit(combined_set, ","))
-      for (set in original_sets) {
-        set_name_map[[set]] <- combined_set
+    for (i in seq_len(comp$no)) {
+      members <- names(comp$membership[comp$membership == i])
+      label <- paste(sort(members), collapse = ",")
+      for (s in members) {
+        set_name_map[[s]] <- label
       }
     }
 
-    # Update the credible_set_names in variants_sets_and_pips_list for each
-    # variant_id
-    updated_credible_sets <- list()
-    for (variant_id in names(variants_sets_and_pips_list)) {
-      current_sets <- variants_sets_and_pips_list[[variant_id]][["sets"]] # All credible sets for the current variant_id
-      combined_set_found <- FALSE
-      # Check if any of the current variant_id's credible sets exist in the
-      # set_name_map
-      for (set_name in current_sets) {
-        if (set_name %in% names(set_name_map)) {
-          # If at least one credible set corresponds to a combined credible
-          # set, update accordingly
-          updated_credible_sets[[variant_id]] <- set_name_map[[set_name]]
-          combined_set_found <- TRUE
-          break # Exit the loop once the combined credible set is found
+    # Update each variant's credible set names
+    updated_credible_sets <- lapply(
+      stats::setNames(names(variants_sets_and_pips_list), names(variants_sets_and_pips_list)),
+      function(variant_id) {
+        current_sets <- variants_sets_and_pips_list[[variant_id]][["sets"]]
+        mapped <- intersect(current_sets, names(set_name_map))
+        if (length(mapped) > 0) {
+          set_name_map[[mapped[1]]]
+        } else {
+          paste(sort(unique(current_sets)), collapse = ",")
         }
       }
-      # If no combined credible set is found, keep the original credible sets
-      # unchanged
-      if (!combined_set_found) {
-        updated_credible_sets[[variant_id]] <- current_sets
-      }
-    }
+    )
     return(updated_credible_sets)
   }
   # Loop through each condition and their credible sets
   extract_top_loci <- function(susie_fit, complementary, coverage) {
-    results <- list()
-    for (i in seq_along(names(susie_fit[[1]]))) {
-      if (!is.null(susie_fit[[1]][[i]][["top_loci"]]) && nrow(susie_fit[[1]][[i]][["top_loci"]]) !=
-        0) {
-        if (!complementary) {
-          set_num <- unique(get_nested_element(susie_fit[[1]][[i]], c(
-            "top_loci",
-            coverage
-          )))
-          set_num <- set_num[set_num != 0]
-        } else {
-          set_num <- 0
-        }
-        num_cs <- length(set_num)
-        if (num_cs > 0) {
-          for (j in 1:num_cs) {
-            variants_df <- get_nested_element(susie_fit[[1]][[i]], c("top_loci")) %>%
-              filter(!!sym(coverage) == set_num[j]) %>%
-              select(variant_id, pip)
-            # Iterate through the rows of the variants_df
-            if (dim(variants_df)[1] != 0) {
-              for (row in 1:nrow(variants_df)) {
-                variant_id <- variants_df$variant_id[row]
-                variant_pip <- variants_df$pip[row]
-
-                # Prepare the set name
-                set_name <- paste0("cs_", i, "_", set_num[j])
-
-                # If the variant_id is not in the results list, add it with the
-                # current set_name and pip
-                if (!variant_id %in% names(results)) {
-                  results[[variant_id]] <- list(sets = set_name, pips = variant_pip)
-                } else {
-                  # If the variant_id is already in the results, append the current
-                  # set_name and pip
-                  results[[variant_id]]$sets <- c(
-                    results[[variant_id]]$sets,
-                    set_name
-                  )
-                  results[[variant_id]]$pips <- c(
-                    results[[variant_id]]$pips,
-                    variant_pip
-                  )
-                }
-              }
-            }
-          }
-        }
+    # Build a flat data frame of (variant_id, pip, set_name) across all conditions
+    cond_names <- names(susie_fit[[1]])
+    rows <- purrr::map_dfr(seq_along(cond_names), function(i) {
+      cond_data <- susie_fit[[1]][[i]]
+      top_loci <- cond_data[["top_loci"]]
+      if (is.null(top_loci) || nrow(top_loci) == 0) return(NULL)
+
+      if (!complementary) {
+        set_num <- unique(get_nested_element(cond_data, c("top_loci", coverage)))
+        set_num <- set_num[set_num != 0]
+      } else {
+        set_num <- 0
       }
-    }
-    return(results)
+      if (length(set_num) == 0) return(NULL)
+
+      purrr::map_dfr(set_num, function(sn) {
+        top_loci %>%
+          filter(!!sym(coverage) == sn) %>%
+          select(variant_id, pip) %>%
+          mutate(set_name = paste0("cs_", i, "_", sn))
+      })
+    })
+
+    if (is.null(rows) || nrow(rows) == 0) return(list())
+
+    # Aggregate by variant_id preserving first-seen order
+    seen_order <- unique(rows$variant_id)
+    split_rows <- split(rows, factor(rows$variant_id, levels = seen_order))
+    lapply(split_rows, function(df) {
+      list(sets = df$set_name, pips = df$pip)
+    })
   }
 
   combine_top_loci <- function(extracted_result) {
+    # Compute overlap sets once, outside the per-variant loop
+    overlap_sets <- identify_overlap_sets(extracted_result)
+    has_overlaps <- length(overlap_sets) != 0
+    merged_sets <- if (has_overlaps) {
+      merge_and_update_overlap_sets(extracted_result, overlap_sets = overlap_sets)
+    } else {
+      NULL
+    }
+
     top_loci_df <- do.call(rbind, lapply(names(extracted_result), function(variant_id) {
       max_pip <- max(unlist(extracted_result[[variant_id]]$pips))
       median_pip <- median(unlist(extracted_result[[variant_id]]$pips))
-      if (length(identify_overlap_sets(extracted_result)) != 0) {
-        credible_set_names <- merge_and_update_overlap_sets(extracted_result,
-          overlap_sets = identify_overlap_sets(extracted_result)
-        )[[variant_id]]
+      credible_set_names <- if (has_overlaps) {
+        merged_sets[[variant_id]]
       } else {
-        credible_set_names <- paste(sort(unique(unlist(extracted_result[[variant_id]]$sets))),
-          collapse = ","
-        )
+        paste(sort(unique(unlist(extracted_result[[variant_id]]$sets))), collapse = ",")
       }
       data.frame(
         variant_id = variant_id, credible_set_names = credible_set_names,
-        max_pip = max_pip, median_pip = median_pip, stringsAsFactors = FALSE # Avoid factors for strings
+        max_pip = max_pip, median_pip = median_pip, stringsAsFactors = FALSE
       )
     }))
     return(top_loci_df)
@@ -696,19 +621,12 @@ mash_rand_null_sample <- function(dat, n_random, n_null, exclude_condition, seed
   }
 
   if (length(exclude_condition) > 0) {
-    if ("z" %in% names(dat)) {
-      if (all(exclude_condition %in% colnames(dat$z))) {
-        dat$z <- dat$z[, -exclude_condition, drop = FALSE]
-      } else {
-        stop(paste("Error: exclude_condition are not present in", dat$region))
-      }
-    } else {
-      if (all(exclude_condition %in% colnames(dat$bhat))) {
-        dat$bhat <- dat$bhat[, -exclude_condition, drop = FALSE]
-        dat$sbhat <- dat$sbhat[, -exclude_condition, drop = FALSE]
-      } else {
-        stop(paste("Error: exclude_condition are not present in", dat$region))
-      }
+    cols_to_check <- if ("z" %in% names(dat)) "z" else "bhat"
+    if (!all(exclude_condition %in% colnames(dat[[cols_to_check]]))) {
+      stop(paste("Error: exclude_condition are not present in", dat$region))
+    }
+    for (key in intersect(names(dat), c("z", "bhat", "sbhat"))) {
+      dat[[key]] <- dat[[key]][, -exclude_condition, drop = FALSE]
     }
   }
 
@@ -718,59 +636,30 @@ mash_rand_null_sample <- function(dat, n_random, n_null, exclude_condition, seed
 
 #' @export
 merge_mash_data <- function(res_data, one_data) {
-  combined_data <- list()
-  if (length(res_data) == 0 | is.null(res_data)) {
-    return(one_data)
-  } else if (length(one_data) == 0 | is.null(one_data)) {
-    return(res_data)
-  } else {
-    for (d in names(one_data)) {
-      if (length(one_data[[d]]) == 0 | is.null(one_data[[d]])) {
-        combined_data[[d]] <- res_data[[d]] # Keep res_data[[d]] when one_data[[d]] is NULL or empty
-        next
-      } else {
-        # Check if the res_data is NULL
-        if (!is.null(res_data[[d]]) | length(res_data[[d]]) != 0) {
-          # Check if the number of columns matches
-          if (!identical(colnames(res_data[[d]]), colnames(one_data[[d]]))) {
-            # Get all column names from both data frames
-            all_cols <- union(colnames(res_data[[d]]), colnames(one_data[[d]]))
-
-            # Align res[[d]]
-            res_aligned <- setNames(as.data.frame(matrix(NaN,
-              nrow = nrow(res_data[[d]]),
-              ncol = length(all_cols)
-            )), all_cols)
-            rownames(res_aligned) <- rownames(res_data[[d]])
-            common_cols_res <- intersect(colnames(res_data[[d]]), all_cols)
-            res_aligned[common_cols_res] <- res_data[[d]][common_cols_res]
-
-            # Align one_data[[d]]
-            one_data_aligned <- setNames(as.data.frame(matrix(NaN,
-              nrow = nrow(one_data[[d]]),
-              ncol = length(all_cols)
-            )), all_cols)
-            rownames(one_data_aligned) <- rownames(one_data[[d]])
-            common_cols_one_data <- intersect(colnames(one_data[[d]]), all_cols)
-            one_data_aligned[common_cols_one_data] <- one_data[[d]][common_cols_one_data]
-
-            # Now both have the same columns, we can rbind them
-            combined_data[[d]] <- rbind(res_aligned, one_data_aligned)
-          } else {
-            # If they already have the same number of columns, just rbind
-            combined_data[[d]] <- rbind(as.data.frame(res_data[[d]]), as.data.frame(one_data[[d]]))
-          }
-        } else {
-          combined_data[[d]] <- one_data[[d]]
-        }
-      }
-    }
-    return(combined_data)
-  }
+  if (length(res_data) == 0 || is.null(res_data)) return(one_data)
+  if (length(one_data) == 0 || is.null(one_data)) return(res_data)
+
+  combined_data <- lapply(names(one_data), function(d) {
+    od <- one_data[[d]]
+    rd <- res_data[[d]]
+    if (length(od) == 0 || is.null(od)) return(rd)
+    if (is.null(rd) || length(rd) == 0) return(od)
+
+    # bind_rows auto-aligns columns, filling missing with NA; replace with NaN
+    rn_res <- rownames(as.data.frame(rd))
+    rn_one <- rownames(as.data.frame(od))
+    combined <- dplyr::bind_rows(as.data.frame(rd), as.data.frame(od))
+    combined[is.na(combined)] <- NaN
+    rn_all <- make.names(c(rn_res, rn_one), unique = TRUE)
+    rownames(combined) <- rn_all
+    combined
+  })
+  names(combined_data) <- names(one_data)
+  return(combined_data)
 }
 
 #' @export
-mash_pipeline <- function(mash_input, alpha, residual_correlation = NULL, unconstrained.update = "ted", set_seed = 999) {
+mash_pipeline <- function(mash_input, alpha, residual_correlation = NULL, n_pcs = NULL, set_seed = 999) {
   if (!requireNamespace("mashr", quietly = TRUE)) {
     stop("To use this function, please install mashr: https://cran.r-project.org/web/packages/mashr/index.html")
   }
@@ -792,35 +681,30 @@ mash_pipeline <- function(mash_input, alpha, residual_correlation = NULL, uncons
     ))
   }
 
-  # mash data Fit mixture model using udr package
   mash_data <- mashr::mash_set_data(mash_input$strong.b,
     Shat = mash_input$strong.s, V = vhat,
     alpha, zero_Bhat_Shat_reset = 1000
   )
-  # Canonical matrices
-  U.can <- mashr::cov_canonical(mash_data)
 
-  # Penalty strength
-  lambda <- ncol(mash_input$strong.z)
-  # FIXME: Please change this to use flashier + ED instead of UDR
-  if (!requireNamespace("udr", quietly = TRUE)) {
-    stop("Package 'udr' is required. Install with: devtools::install_github('stephenslab/udr')")
+  # Canonical covariance matrices
+  U.can <- mashr::cov_canonical(mash_data)
+  # PCA-based covariance matrices
+  if (is.null(n_pcs)) {
+    n_pcs <- ncol(mash_data$Bhat) - 1
   }
-  # Initialize udr
-  fit0 <- udr::ud_init(mash_data, n_unconstrained = 50, U_scaled = U.can)
-  # Fit udr and use penalty as default as suggested by Yunqi penalty is
-  # necessary in small sample size case, and there won't be a difference in
-  # large sample size
-  fit2 <- udr::ud_fit(fit0, control = list(unconstrained.update,
-    scaled.update = "fa",
-    resid.update = "none", lambda = lambda, penalty.type = "iw", maxiter = 1000,
-    tol = 0.01, tol.lik = 0.01
-  ))
-
-  # extract data-driven covariance from udr model. (A list of covariance
-  # matrices)
-  U.ud <- lapply(fit2$U, function(e) e[["mat"]])
-  return(mixture_prior = list(U = U.ud, w = fit2$w, loglik = fit2$loglik))
+  U.pca <- mashr::cov_pca(mash_data, npc = n_pcs)
+  # Flash-based covariance matrices (factor analysis)
+  U.flash <- mashr::cov_flash(mash_data)
+  # ED-based covariance matrices (initialized from all others)
+  U.ed <- mashr::cov_ed(mash_data, Ulist_init = c(U.can, U.pca, U.flash))
+  # Combine all covariance matrices
+  U.all <- c(U.can, U.pca, U.flash, U.ed)
+
+  # Fit mash to estimate mixture weights
+  m <- mashr::mash(mash_data, Ulist = U.all, outputlevel = 1)
+  w <- mashr::get_estimated_pi(m)
+
+  return(list(U = U.all, w = w))
 }
 
 #' Merge a List of Matrices or Data Frames with Optional Allele Flipping
@@ -1003,19 +887,17 @@ load_multicontext_sumstats <- function(dat_list, signal_df, cond, region, extrac
 
             # Match context to tag
         df <- df %>%
-                  rowwise() %>%
-                  mutate(context_classify = {
-                            if (is.null(tag_patterns) || length(tag_patterns) == 0) {
-                                  context
-                            } else {
-                                  matched <- names(tag_patterns)[str_detect(context, tag_patterns)]
-                                  if (length(matched) == 0) NA_character_ else matched[1]
-                            }
-                  }) %>%
-                  ungroup()
+                  mutate(context_classify = if (is.null(tag_patterns) || length(tag_patterns) == 0) {
+                    context
+                  } else {
+                    purrr::map_chr(context, function(ctx) {
+                      matched <- names(tag_patterns)[str_detect(ctx, tag_patterns)]
+                      if (length(matched) == 0) NA_character_ else matched[1]
+                    })
+                  })
 
         numeric_col <- colnames(df)[2]
-    
+
          if (extract_inf == "z"){
                 # Make a copy to store added rows
               added_df <- data.frame()
@@ -1059,7 +941,7 @@ load_multicontext_sumstats <- function(dat_list, signal_df, cond, region, extrac
                   rename(!!numeric_col := !!sym(numeric_col))
                 event_ID_extracted <- sumstats_df[[extract_inf]]%>%pull(context)
              } else if (is.null(event_ID_extracted)){
-                    print("Please provide 'z-score'")
+                    warning("Please provide 'z-score'")
              } else {
                  sumstats_df[[extract_inf]] <- df %>% filter(context%in%event_ID_extracted)%>%
                                         rename(!!numeric_col := !!sym(numeric_col))
@@ -1073,7 +955,6 @@ load_multicontext_sumstats <- function(dat_list, signal_df, cond, region, extrac
                     gene_ID = region
                   ) %>%
                  select(variant_ID, gene_ID, everything())
-            print(result_df)
                  result_list_format[[cond]][[extract_inf]]  <- result_list_format[[cond]][[extract_inf]]%>% rows_update(result_df, by = c("variant_ID", "gene_ID"))
      }
   }
@@ -1109,16 +990,14 @@ load_multicontext_sumstats <- function(dat_list, signal_df, cond, region, extrac
 
               # Match context to tag
                df <- df %>%
-                   rowwise() %>%
-                   mutate(context_classify = {
-                            if (is.null(tag_patterns) || length(tag_patterns) == 0) {
-                                  context
-                            } else {
-                                  matched <- names(tag_patterns)[str_detect(context, tag_patterns)]
-                                  if (length(matched) == 0) NA_character_ else matched[1]
-                            }
-                  }) %>%
-                  ungroup()
+                   mutate(context_classify = if (is.null(tag_patterns) || length(tag_patterns) == 0) {
+                     context
+                   } else {
+                     purrr::map_chr(context, function(ctx) {
+                       matched <- names(tag_patterns)[str_detect(ctx, tag_patterns)]
+                       if (length(matched) == 0) NA_character_ else matched[1]
+                     })
+                   })
 
               numeric_col <- colnames(df)[2]
             if (extract_inf == "z"){
@@ -1155,7 +1034,7 @@ load_multicontext_sumstats <- function(dat_list, signal_df, cond, region, extrac
                 }
                 event_ID_extracted[[k]] <- sumstats_df[[extract_inf]]%>%pull(context)
             }  else if (is.null(event_ID_extracted)){
-                    print("Please provide 'z-score'")
+                    warning("Please provide 'z-score'")
             } else {
                 sumstats_df[[extract_inf]] <- df %>% filter(context%in%event_ID_extracted[[k]])%>%
                                         rename(!!numeric_col := !!sym(numeric_col))
diff --git a/R/multigene_udr.R b/R/multigene_udr.R
deleted file mode 100644
index 197fe8be..00000000
--- a/R/multigene_udr.R
+++ /dev/null
@@ -1,88 +0,0 @@
-#' Perform udr Analysis on Multigene Data
-#'
-#' @param combined_susie_list A list containing the combined SuSiE and summary statistics results.
-#' @param coverage A numeric vector representing the coverage values.
-#' @param independent_variant_list A list of independent variants to be used as a filter.
-#' @param n_random An integer specifying the number of random samples to generate.
-#' @param n_null An integer specifying the number of null samples to generate.
-#' @param seed An integer specifying the seed for random number generation.
-#' @param exclude_condition A character vector specifying conditions to be excluded from the analysis. Defaults to NULL.
-#'
-#' @return A list containing the results of the prior, or NULL if conditions are not met.
-#' @importFrom dplyr filter mutate group_by
-#' @export
-multigene_udr <- function(combined_susie_list, coverage, independent_variant_list, n_random, n_null, seed, exclude_condition = NULL) {
-  # Default to an empty vector if exclude_condition is NULL
-  if (is.null(exclude_condition)) {
-    exclude_condition <- c()
-  }
-  reformat_data <- function(dat) {
-    res <- list(
-      strong.b = dat$strong$bhat,
-      random.b = dat$random$bhat,
-      null.b = dat$null$bhat,
-      strong.s = dat$strong$sbhat,
-      null.s = dat$null$sbhat,
-      random.s = dat$random$sbhat
-    )
-    return(res)
-  }
-  # Load strong and random null summary statistics
-  strong_file <- load_multitrait_R_sumstat(
-    combined_susie_list$extracted_regional_window_combined_susie_result,
-    combined_susie_list$extracted_regional_window_combined_sumstats_result,
-    coverage,
-    top_loci = TRUE,
-    exclude_condition = exclude_condition
-  )
-
-  ran_null_file <- load_multitrait_R_sumstat(
-    combined_susie_list$extracted_regional_window_combined_susie_result,
-    combined_susie_list$extracted_regional_window_combined_sumstats_result,
-    filter_file = independent_variant_list,
-    exclude_condition = exclude_condition
-  )
-
-  # Generate random null samples
-  ran_null <- mash_rand_null_sample(
-    ran_null_file,
-    n_random = n_random,
-    n_null = n_null,
-    exclude_condition = exclude_condition,
-    seed = seed
-  )
-
-
-  # Prepare the strong summary statistics
-  strong <- list(strong = list(bhat = strong_file$bhat, sbhat = strong_file$sbhat))
-
-  # Combine strong and random null samples
-  res <- c(strong, ran_null)
-
-  # Reformat data for MASH analysis
-  mash_input <- reformat_data(res)
-
-  # Filter invalid summary statistics for each condition
-  conditions <- c("strong", "random", "null")
-  for (cond in conditions) {
-    mash_input <- filter_invalid_summary_stat(
-      mash_input,
-      bhat = paste0(cond, ".b"),
-      sbhat = paste0(cond, ".s"),
-      btoz = TRUE,
-      filter_by_missing_rate = NULL,
-      sig_p_cutoff = NULL
-    )
-  }
-
-  # Calculate ZtZ matrix
-  mash_input$ZtZ <- t(as.matrix(mash_input$strong.z)) %*% as.matrix(mash_input$strong.z) / nrow(mash_input$strong.z)
-
-  # Perform MASH analysis if conditions are met
-  dd_prior <- if (nrow(mash_input$strong.b) < 2 || ncol(mash_input$strong.b) < 2) {
-    NULL
-  } else {
-    mash_pipeline(mash_input, alpha = 1)
-  }
-  return(dd_prior)
-}
diff --git a/R/plot.R b/R/plot.R
deleted file mode 100644
index 54dd3465..00000000
--- a/R/plot.R
+++ /dev/null
@@ -1,129 +0,0 @@
-#' Venn Diagram
-#' @param data a list with the twas siginificant gene_id results of four method "SuSiE","Lasso","Enet" and "MR.ASH"
-#' @return plot object
-# @importFrom ggvenn ggvenn
-# @export
-venn <- function(data) {
-  venn_plot <- ggvenn(data, c("SuSiE", "Lasso", "Enet", "MR.ASH"), show_percentage = TRUE, fill_color = c("red", "orange", "blue", "green"))
-  return(venn_plot)
-}
-
-
-#' Manhattan plot
-#' @param twas_results a data frame of twas results with columns "gene_name", "gene_id","chr","susie_pval","lasso_pval","enet_pval" and "mrash_pval", where twas results are the output of the twas_scan function. "gene_name" is the ensemble ID and "gene_id" is the corresponding gene name,
-#'        "susie_pval", "lasso_pval","enet_pval" and "mrash_pval" are the pvalues of susie and other three competing twas method.
-#' @param gene_data a data frame with columns "chr", "start", "end", and "ID", "chr" is the chromosome of gene, "start" and "end" are the position, "ID" is the gene name.
-#' @return plot object
-# @import ggplot2
-# @importFrom ggrepel geom_label_repel
-# @importFrom stringr str_sub
-# @importFrom ggnewscale new_scale_color
-# @export
-manhattan_plot <- function(twas_results, gene_data) {
-  min_pval <- apply(twas_results[, c("susie_pval", "lasso_pval", "enet_pval", "mrash_pval")], 1, function(x) min(x, na.rm = TRUE))
-  data_all_gene <- twas_results %>%
-    select(gene_name, chr, gene_id, susie_pval, lasso_pval, enet_pval, mrash_pval) %>%
-    mutate(min_pval = min_pval) %>%
-    mutate(chr = as.numeric(chr))
-  gene_pos <- gene_data %>%
-    mutate(chr = as.numeric(str_sub(`#chr`, 4))) %>%
-    select(-`#chr`) %>%
-    setNames(c("start_bp", "end_bp", "gene_name", "chr"))
-  gene_pos_pval <- merge(data_all_gene, gene_pos, by = c("gene_name", "chr"))
-
-  susie_select <- Mic_genes$gene_pq_adj %>%
-    filter(susie_pval < (2.5 * 10^(-6) / 4)) %>%
-    select(gene_id) %>%
-    t() %>%
-    as.vector()
-  lasso_select <- Mic_genes$gene_pq_adj %>%
-    filter(lasso_pval < (2.5 * 10^(-6) / 4)) %>%
-    select(gene_id) %>%
-    t() %>%
-    as.vector()
-  enet_select <- Mic_genes$gene_pq_adj %>%
-    filter(enet_pval < (2.5 * 10^(-6) / 4)) %>%
-    select(gene_id) %>%
-    t() %>%
-    as.vector()
-  ash_select <- Mic_genes$gene_pq_adj %>%
-    filter(mrash_pval < (2.5 * 10^(-6) / 4)) %>%
-    select(gene_id) %>%
-    t() %>%
-    as.vector()
-  gene_annotate <- unique(c(susie_select, lasso_select, enet_select, ash_select))
-
-  results <- NULL
-  # Define the methods to compare
-  methods <- list(
-    susie_select, lasso_select, enet_select, ash_select
-  )
-
-  # Generate all possible combinations of two methods
-  combinations <- combn(methods, 2, simplify = FALSE)
-
-  # Loop through combinations and store the intersections
-  for (i in seq_along(combinations)) {
-    results[[i]] <- intersect(combinations[[i]][[1]], combinations[[i]][[2]])
-  }
-
-  # Combine all unique intersections
-  two_more <- unique(unlist(results))
-
-  don <- gene_pos_pval %>%
-    # Compute chromosome size
-    group_by(chr) %>%
-    summarise(chr_len = max((start_bp + end_bp) / 2)) %>%
-    # Calculate cumulative position of each chromosome
-    mutate(tot = cumsum(chr_len) - chr_len) %>%
-    select(-chr_len) %>%
-    # Add this info to the initial dataset
-    left_join(gene_pos_pval, ., by = c("chr" = "chr")) %>%
-    # Add a cumulative position of each SNP
-    arrange(chr, (start_bp + end_bp) / 2) %>%
-    mutate(BPcum = (start_bp + end_bp) / 2 + tot) %>%
-    mutate(susie_highlight = ifelse(gene_id %in% susie_select, "yes", "no")) %>%
-    mutate(lasso_highlight = ifelse(gene_id %in% lasso_select, "yes", "no")) %>%
-    mutate(enet_highlight = ifelse(gene_id %in% enet_select, "yes", "no")) %>%
-    mutate(ash_highlight = ifelse(gene_id %in% ash_select, "yes", "no")) %>%
-    mutate(two_highlight = ifelse(gene_id %in% two_more, "yes", "no")) %>%
-    mutate(gene_annotate = ifelse(gene_id %in% gene_annotate, "yes", "no"))
-
-  axisdf <- don %>%
-    group_by(chr) %>%
-    summarize(center = (max(BPcum) + min(BPcum)) / 2)
-
-  manhattan <- ggplot(don, aes(x = BPcum, y = -log10(min_pval))) +
-
-    # Show all points
-    geom_point(aes(color = as.factor(chr), show.legend = FALSE), alpha = 0.8, size = 5) +
-    scale_color_manual(values = rep(c("#b6b6b6", "#9f9f9f"), 22), guide = FALSE) +
-    # custom X axis:
-    scale_x_continuous(label = axisdf$chr, breaks = axisdf$center) +
-    scale_y_continuous(expand = c(0, 0)) + # remove space between plot area and x axis
-    ylim(0, 90) +
-    new_scale_color() +
-    geom_point(data = subset(don, susie_highlight == "yes"), aes(color = "red"), size = 5) +
-    geom_point(data = subset(don, lasso_highlight == "yes"), aes(color = "orange"), size = 5) +
-    geom_point(data = subset(don, enet_highlight == "yes"), aes(color = "green"), size = 5) +
-    geom_point(data = subset(don, ash_highlight == "yes"), aes(color = "blue"), size = 5) +
-    geom_point(data = subset(don, two_highlight == "yes"), aes(color = "black"), size = 5) +
-    geom_label_repel(data = subset(don, gene_annotate == "yes"), aes(label = gene_id), size = 5, max.overlaps = Inf, color = "black", alpha = 0.75) +
-    scale_color_manual(values = c("red", "orange", "green", "blue", "black"), breaks = c("red", "orange", "green", "blue", "black"), name = "Methods", labels = c("SuSiE", "Lasso", "Enet", "MR.ASH", ">=2 methods")) +
-    xlab("Chromosome") +
-    ylab("-log10(pval)") +
-    geom_hline(yintercept = -log10(2.5 * 10^(-6) / 4), linetype = "dashed", color = "red") +
-    theme_bw() +
-    theme(
-      plot.title = element_text(size = 45),
-      legend.title = element_text(face = "bold", size = 45),
-      legend.text = element_text(size = 30),
-      axis.text = element_text(size = 20),
-      axis.title = element_text(size = 45),
-      legend.position = "top",
-      panel.border = element_blank(),
-      panel.grid.major.x = element_blank(),
-      panel.grid.minor.x = element_blank()
-    )
-  return(manhattan)
-}
diff --git a/R/regularized_regression.R b/R/regularized_regression.R
index 83a4983a..2eda33a9 100644
--- a/R/regularized_regression.R
+++ b/R/regularized_regression.R
@@ -247,79 +247,51 @@ sdpr_weights <- function(stat, LD, ...) {
   return(model$beta_est)
 }
 
+# Shared helper for susie/susie_ash/susie_inf weight extraction.
+# @param fit A susie fit object (or NULL to fit from X, y).
+# @param X Genotype matrix (optional).
+# @param y Phenotype vector (optional).
+# @param required_fields Fields that must be present in the fit to extract weights.
+# @param fit_args Extra arguments passed to susie_wrapper when fit is NULL.
+# @param ... Additional arguments forwarded to susie_wrapper.
 #' @importFrom susieR coef.susie
-#' @export
-susie_weights <- function(X = NULL, y = NULL, susie_fit = NULL, ...) {
-  if (is.null(susie_fit)) {
-    # get susie_fit object
-    susie_fit <- susie_wrapper(X, y, ...)
+#' @noRd
+.susie_extract_weights <- function(fit, X, y, required_fields, fit_args = list(), ...) {
+  if (is.null(fit)) {
+    fit <- do.call(susie_wrapper, c(list(X = X, y = y), fit_args, list(...)))
   }
-  if (!is.null(X)) {
-    if (length(susie_fit$pip) != ncol(X)) {
-      stop(paste0(
-        "Dimension mismatch on number of variant in susie_fit ", length(susie_fit$pip),
-        " and TWAS weights ", ncol(X), ". "
-      ))
-    }
+  if (!is.null(X) && length(fit$pip) != ncol(X)) {
+    stop(paste0(
+      "Dimension mismatch on number of variant in susie fit ", length(fit$pip),
+      " and TWAS weights ", ncol(X), ". "
+    ))
   }
-  if ("alpha" %in% names(susie_fit) && "mu" %in% names(susie_fit) && "X_column_scale_factors" %in% names(susie_fit)) {
-    # This is designed to cope with output from pecotmr::susie_post_processor()
-    # We set intercept to 0 and later trim it off anyways
-    susie_fit$intercept <- 0
-    return(coef.susie(susie_fit)[-1])
+  if (all(required_fields %in% names(fit))) {
+    fit$intercept <- 0
+    return(coef.susie(fit)[-1])
   } else {
-    return(rep(0, length(susie_fit$pip)))
+    return(rep(0, length(fit$pip)))
   }
 }
 
-#' @importFrom susieR coef.susie
+#' @export
+susie_weights <- function(X = NULL, y = NULL, susie_fit = NULL, ...) {
+  .susie_extract_weights(susie_fit, X, y,
+    required_fields = c("alpha", "mu", "X_column_scale_factors"), ...)
+}
+
 #' @export
 susie_ash_weights <- function(X = NULL, y = NULL, susie_ash_fit = NULL, ...) {
-  if (is.null(susie_ash_fit)) {
-    # get susie_ash_fit object
-    susie_ash_fit <- susie_wrapper(X, y, unmappable_effects = "ash", convergence_method = "pip", ...)
-  }
-  if (!is.null(X)) {
-    if (length(susie_ash_fit$pip) != ncol(X)) {
-      stop(paste0(
-        "Dimension mismatch on number of variant in susie_ash_fit ", length(susie_ash_fit$pip),
-        " and TWAS weights ", ncol(X), ". "
-      ))
-    }
-  }
-  if ("alpha" %in% names(susie_ash_fit) && "mu" %in% names(susie_ash_fit) && "theta" %in% names(susie_ash_fit) && "X_column_scale_factors" %in% names(susie_ash_fit)) {
-    # This is designed to cope with output from pecotmr::susie_post_processor()
-    # We set intercept to 0 and later trim it off anyways
-    susie_ash_fit$intercept <- 0
-    return(coef.susie(susie_ash_fit)[-1])
-  } else {
-    return(rep(0, length(susie_ash_fit$pip)))
-  }
+  .susie_extract_weights(susie_ash_fit, X, y,
+    required_fields = c("alpha", "mu", "theta", "X_column_scale_factors"),
+    fit_args = list(unmappable_effects = "ash", convergence_method = "pip"), ...)
 }
 
-#' @importFrom susieR coef.susie
 #' @export
 susie_inf_weights <- function(X = NULL, y = NULL, susie_inf_fit = NULL, ...) {
-  if (is.null(susie_inf_fit)) {
-    # get susie_inf_fit object
-    susie_inf_fit <- susie_wrapper(X, y, unmappable_effects = "inf", convergence_method = "pip", ...)
-  }
-  if (!is.null(X)) {
-    if (length(susie_inf_fit$pip) != ncol(X)) {
-      stop(paste0(
-        "Dimension mismatch on number of variant in susie_inf_fit ", length(susie_inf_fit$pip),
-        " and TWAS weights ", ncol(X), ". "
-      ))
-    }
-  }
-  if ("alpha" %in% names(susie_inf_fit) && "mu" %in% names(susie_inf_fit) && "theta" %in% names(susie_inf_fit) && "X_column_scale_factors" %in% names(susie_inf_fit)) {
-    # This is designed to cope with output from pecotmr::susie_post_processor()
-    # We set intercept to 0 and later trim it off anyways
-    susie_inf_fit$intercept <- 0
-    return(coef.susie(susie_inf_fit)[-1])
-  } else {
-    return(rep(0, length(susie_inf_fit$pip)))
-  }
+  .susie_extract_weights(susie_inf_fit, X, y,
+    required_fields = c("alpha", "mu", "theta", "X_column_scale_factors"),
+    fit_args = list(unmappable_effects = "inf", convergence_method = "pip"), ...)
 }
 
 #' @export
@@ -383,8 +355,9 @@ init_prior_sd <- function(X, y, n = 30) {
 # (glmnet, ncvreg, L0Learn, qgg, BGLR, RcppDPR) all either error or behave
 # poorly on constant columns, so wrappers should filter them out and zero-pad
 # their results back to length p.
+#' @importFrom matrixStats colSds
 .drop_zero_variance <- function(X, fn_name) {
-  sds <- apply(X, 2, sd)
+  sds <- matrixStats::colSds(X)
   keep <- !is.na(sds) & sds != 0
   if (!all(keep)) {
     warning(sprintf(
diff --git a/R/susie_wrapper.R b/R/susie_wrapper.R
index 71602e4c..7c38ad5e 100644
--- a/R/susie_wrapper.R
+++ b/R/susie_wrapper.R
@@ -115,29 +115,37 @@ adjust_susie_weights <- function(twas_weights_results, keep_variants, run_allele
 }
 
 
-#' @importFrom susieR susie
-#' @export
-susie_wrapper <- function(X, y, init_L = 5, max_L = 30, l_step = 5, ...) {
-  if (init_L == max_L) {
-    return(susie(X, y, L = init_L, ...))
-  }
+# Shared dynamic-L loop: repeatedly fits a SuSiE model, increasing L by
+# l_step whenever the number of credible sets saturates L, until max_L.
+# @param fit_fn A function(L) that runs SuSiE with a given L and returns a fit.
+# @param init_L Initial number of causal configurations.
+# @param max_L Maximum number of causal configurations.
+# @param l_step Step size for increasing L.
+# @return The final SuSiE fit object.
+# @noRd
+.dynamic_L_fit <- function(fit_fn, init_L, max_L, l_step) {
   L <- init_L
-  # Perform SuSiE by dynamically increasing L
-  gst <- proc.time()
   while (TRUE) {
     st <- proc.time()
-    res <- susie(X, y, L = L, ...)
-    res$time_elapsed <- proc.time() - st
-    if (!is.null(res$sets$cs)) {
-      if (length(res$sets$cs) >= L && L <= max_L) {
-        L <- L + l_step
-      } else {
-        break
-      }
+    result <- fit_fn(L)
+    result$time_elapsed <- proc.time() - st
+    if (!is.null(result$sets$cs) && length(result$sets$cs) >= L && L <= max_L) {
+      L <- L + l_step
     } else {
       break
     }
   }
+  result
+}
+
+#' @importFrom susieR susie
+#' @export
+susie_wrapper <- function(X, y, init_L = 5, max_L = 30, l_step = 5, ...) {
+  if (init_L == max_L) {
+    return(susie(X, y, L = init_L, ...))
+  }
+  gst <- proc.time()
+  res <- .dynamic_L_fit(function(L) susie(X, y, L = L, ...), init_L, max_L, l_step)
   message(paste("Total time elapsed for susie_wrapper:", (proc.time() - gst)[3]))
   return(res)
 }
@@ -175,25 +183,15 @@ susie_rss_wrapper <- function(z, R = NULL, X = NULL, n = NULL,
                     sketch_samples = sketch_samples, ...)
   if (!is.null(X)) base_args$X <- X else base_args$R <- R
 
-  run_susie <- function(args) do.call(susie_rss, args)
+  run_with_L <- function(L_val) do.call(susie_rss, c(base_args, list(L = L_val)))
 
   if (L == 1) {
     base_args$max_iter <- 1
-    result <- run_susie(base_args)
+    result <- do.call(susie_rss, c(base_args, list(L = 1)))
   } else if (L == max_L) {
-    result <- run_susie(base_args)
+    result <- run_with_L(L)
   } else {
-    while (TRUE) {
-      st <- proc.time()
-      base_args$L <- L
-      result <- run_susie(base_args)
-      result$time_elapsed <- proc.time() - st
-      if (!is.null(result$sets$cs) && length(result$sets$cs) >= L && L <= max_L) {
-        L <- L + l_step
-      } else {
-        break
-      }
-    }
+    result <- .dynamic_L_fit(run_with_L, L, max_L, l_step)
   }
 
   result
@@ -421,17 +419,20 @@ susie_post_processor <- function(susie_output, data_x, data_y, X_scalar, y_scala
 
     if (nrow(top_loci) > 0) {
       top_loci[is.na(top_loci)] <- 0
-      variants <- res$variant_names[top_loci$variant_idx]
-      pip <- susie_output$pip[top_loci$variant_idx]
-      top_loci_cols <- c("variant_id", if (!is.null(res$sumstats$betahat)) "betahat", if (!is.null(res$sumstats$sebetahat)) "sebetahat", if (!is.null(res$sumstats$z)) "z", if (!is.null(maf)) "maf", "pip", colnames(top_loci)[-1])
-      res$top_loci <- data.frame(variants, stringsAsFactors = FALSE)
-      res$top_loci$betahat <- if (!is.null(res$sumstats$betahat)) res$sumstats$betahat[top_loci$variant_idx] else NULL
-      res$top_loci$sebetahat <- if (!is.null(res$sumstats$sebetahat)) res$sumstats$sebetahat[top_loci$variant_idx] else NULL
-      res$top_loci$z <- if (!is.null(res$sumstats$z)) res$sumstats$z[top_loci$variant_idx] else NULL
-      res$top_loci$maf <- if (!is.null(maf)) maf[top_loci$variant_idx] else NULL
-      res$top_loci$pip <- pip
-      res$top_loci <- cbind(res$top_loci, top_loci[, -1])
-      colnames(res$top_loci) <- top_loci_cols
+      idx <- top_loci$variant_idx
+      optional_cols <- list(
+        betahat = if (!is.null(res$sumstats$betahat)) res$sumstats$betahat[idx],
+        sebetahat = if (!is.null(res$sumstats$sebetahat)) res$sumstats$sebetahat[idx],
+        z = if (!is.null(res$sumstats$z)) res$sumstats$z[idx],
+        maf = if (!is.null(maf)) maf[idx]
+      )
+      optional_cols <- Filter(Negate(is.null), optional_cols)
+      res$top_loci <- cbind(
+        data.frame(variant_id = res$variant_names[idx], stringsAsFactors = FALSE),
+        as.data.frame(optional_cols),
+        data.frame(pip = susie_output$pip[idx]),
+        top_loci[, -1, drop = FALSE]
+      )
       rownames(res$top_loci) <- NULL
     }
     names(susie_output$pip) <- NULL
diff --git a/R/twas.R b/R/twas.R
index db7f2072..5b847012 100644
--- a/R/twas.R
+++ b/R/twas.R
@@ -32,15 +32,8 @@ harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file,
   # Function to group contexts based on start and end positions
   group_contexts_by_region <- function(twas_weights_data, molecular_id, chrom, tolerance = 5000) {
     region_info_df <- do.call(rbind, lapply(names(twas_weights_data$weights), function(context) {
-      wgt_range <- as.integer(sapply(
-        rownames(twas_weights_data[["weights"]][[context]]),
-        function(variant_id) {
-          strsplit(variant_id, "\\:")[[1]][2]
-        }
-      ))
-      min <- min(wgt_range)
-      max <- max(wgt_range)
-      data.frame(context = context, start = min, end = max)
+      wgt_range <- parse_variant_id(rownames(twas_weights_data[["weights"]][[context]]))$pos
+      data.frame(context = context, start = min(wgt_range), end = max(wgt_range))
     }))
     if (nrow(region_info_df) == 1) {
       # Handle case with only one context
@@ -123,31 +116,31 @@ harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file,
 
   # loop through genes/events:
   for (molecular_id in molecular_ids) {
-    results[[molecular_id]][["chrom"]] <- chrom
-    results[[molecular_id]][["data_type"]] <- if ("data_type" %in% names(twas_weights_data[[molecular_id]])) twas_weights_data[[molecular_id]]$data_type
-    results[[molecular_id]][["variant_names"]] <- list()
+    mol_data <- twas_weights_data[[molecular_id]]
+    mol_res <- list(chrom = chrom, variant_names = list())
+    mol_res[["data_type"]] <- if ("data_type" %in% names(mol_data)) mol_data$data_type
 
     # group contexts based on the variant position
-    context_clusters <- group_contexts_by_region(twas_weights_data[[molecular_id]], molecular_id, chrom, tolerance = 5000)
+    context_clusters <- group_contexts_by_region(mol_data, molecular_id, chrom, tolerance = 5000)
 
     # loop through contexts: grouping contexts can be useful during TWAS data harmonization to stratify variants for LD loading
     for (context_group in names(context_clusters)) {
-      contexts <- context_clusters[[context_group]]$contexts
-      query_region <- context_clusters[[context_group]]$query_region
+      cluster <- context_clusters[[context_group]]
+      contexts <- cluster$contexts
+      query_region <- cluster$query_region
       region_of_interest <- region_to_df(query_region)
-      all_variants <- context_clusters[[context_group]]$all_variants # all variants for this context group
-      all_variants <- variant_id_to_df(all_variants)
+      all_variants <- variant_id_to_df(cluster$all_variants)
 
       # Step 3: load GWAS data for clustered context groups
       for (study in names(gwas_files)) {
         gwas_file <- gwas_files[study]
-        gwas_data_sumstats <- harmonize_gwas(gwas_file, query_region=query_region,   
-                                              LD_list$LD_variants, c("beta", "z"),   
+        gwas_data_sumstats <- harmonize_gwas(gwas_file, query_region=query_region,
+                                              LD_list$LD_variants, c("beta", "z"),
                                               match_min_prop = 0, column_file_path = column_file_path, comment_string = comment_string)
         if(is.null(gwas_data_sumstats)) next
         # loop through context within the context group:
         for (context in contexts) {
-          weights_matrix <- twas_weights_data[[molecular_id]][["weights"]][[context]]
+          weights_matrix <- mol_data[["weights"]][[context]]
 
           # Step 4: harmonize weights, flip allele
           weights_matrix <- cbind(variant_id_to_df(rownames(weights_matrix)), weights_matrix)
@@ -155,11 +148,11 @@ harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file,
             colnames(weights_matrix)[!colnames(weights_matrix) %in% c("chrom", "pos", "A2", "A1")],
             match_min_prop = 0
           )
-          weights_matrix_subset <- as.matrix(weights_matrix_qced$target_data_qced[, !colnames(weights_matrix_qced$target_data_qced) %in% c(
-            "chrom",
-            "pos", "A2", "A1", "variant_id", "variants_id_original"
+          qced_data <- weights_matrix_qced$target_data_qced
+          weights_matrix_subset <- as.matrix(qced_data[, !colnames(qced_data) %in% c(
+            "chrom", "pos", "A2", "A1", "variant_id", "variants_id_original"
           ), drop = FALSE])
-          rownames(weights_matrix_subset) <- weights_matrix_qced$target_data_qced$variant_id # weight variant names are flipped/corrected
+          rownames(weights_matrix_subset) <- qced_data$variant_id
 
           # intersect post-qc gwas and post-qc weight variants (all now in canonical chr-prefix format)
           gwas_LD_variants <- intersect(gwas_data_sumstats$variant_id, LD_list$LD_variants)
@@ -168,8 +161,8 @@ harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file,
           postqc_weight_variants <- rownames(weights_matrix_subset)
 
           # Step 5: adjust SuSiE weights based on available variants
-          if ("susie_weights" %in% colnames(twas_weights_data[[molecular_id]][["weights"]][[context]])) {
-            adjusted_susie_weights <- adjust_susie_weights(twas_weights_data[[molecular_id]],
+          if ("susie_weights" %in% colnames(mol_data[["weights"]][[context]])) {
+            adjusted_susie_weights <- adjust_susie_weights(mol_data,
               keep_variants = postqc_weight_variants, run_allele_qc = TRUE,
               variable_name_obj = c("variant_names", context),
               susie_obj = c("susie_results", context),
@@ -179,16 +172,17 @@ harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file,
               susie_weights = setNames(adjusted_susie_weights$adjusted_susie_weights, adjusted_susie_weights$remained_variants_ids),
               weights_matrix_subset[adjusted_susie_weights$remained_variants_ids, !colnames(weights_matrix_subset) %in% "susie_weights", drop = FALSE]
             )
-            results[[molecular_id]][["susie_weights_intermediate_qced"]][[context]] <- twas_weights_data[[molecular_id]]$susie_results[[context]][c("pip", "cs_variants", "cs_purity")]
-            names(results[[molecular_id]][["susie_weights_intermediate_qced"]][[context]][["pip"]]) <- rownames(weights_matrix) # original variants that is not qced yet
-            pip <- results[[molecular_id]][["susie_weights_intermediate_qced"]][[context]][["pip"]]
+            susie_intermediate <- mol_data$susie_results[[context]][c("pip", "cs_variants", "cs_purity")]
+            names(susie_intermediate[["pip"]]) <- rownames(weights_matrix) # original variants that is not qced yet
+            pip <- susie_intermediate[["pip"]]
             pip_qced <- allele_qc(cbind(parse_variant_id(names(pip)), pip), LD_list$LD_variants, "pip", match_min_prop = 0)
-            results[[molecular_id]][["susie_weights_intermediate_qced"]][[context]][["pip"]] <- abs(pip_qced$target_data_qced$pip)
-            names(results[[molecular_id]][["susie_weights_intermediate_qced"]][[context]][["pip"]]) <- pip_qced$target_data_qced$variant_id
-            results[[molecular_id]][["susie_weights_intermediate_qced"]][[context]][["cs_variants"]] <- lapply(results[[molecular_id]][["susie_weights_intermediate_qced"]][[context]][["cs_variants"]], function(x) {
+            susie_intermediate[["pip"]] <- abs(pip_qced$target_data_qced$pip)
+            names(susie_intermediate[["pip"]]) <- pip_qced$target_data_qced$variant_id
+            susie_intermediate[["cs_variants"]] <- lapply(susie_intermediate[["cs_variants"]], function(x) {
               variant_qc <- allele_qc(x, LD_list$LD_variants, match_min_prop = 0)
               variant_qc$target_data_qced$variant_id[variant_qc$target_data_qced$variant_id %in% postqc_weight_variants]
             })
+            mol_res[["susie_weights_intermediate_qced"]][[context]] <- susie_intermediate
           }
           rm(weights_matrix) # context specific original weight matrix
           gc()
@@ -197,27 +191,29 @@ harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file,
               warning("weights_matrix_subset is empty. Skipping this context.")
               next
           }
-          results[[molecular_id]][["variant_names"]][[context]][[study]] <- rownames(weights_matrix_subset)
+          mol_res[["variant_names"]][[context]][[study]] <- rownames(weights_matrix_subset)
 
           # Step 6: scale weights by variance (from ref_panel, populated by load_LD_matrix)
           variance <- LD_list$ref_panel$variance[match(rownames(weights_matrix_subset), LD_list$ref_panel$variant_id)]
-          results[[molecular_id]][["weights_qced"]][[context]][[study]] <- list(scaled_weights = weights_matrix_subset * sqrt(variance), weights = weights_matrix_subset)
+          mol_res[["weights_qced"]][[context]][[study]] <- list(scaled_weights = weights_matrix_subset * sqrt(variance), weights = weights_matrix_subset)
         }
         # Combine gwas sumstat across different context for a single context group (all variant_ids now in canonical format)
-        gwas_data_sumstats <- gwas_data_sumstats[gwas_data_sumstats$variant_id %in% unique(find_data(results[[molecular_id]][["variant_names"]], c(2, study))), , drop = FALSE]
-        results[[molecular_id]][["gwas_qced"]][[study]] <- rbind(results[[molecular_id]][["gwas_qced"]][[study]], gwas_data_sumstats)
-        results[[molecular_id]][["gwas_qced"]][[study]] <- results[[molecular_id]][["gwas_qced"]][[study]][!duplicated(results[[molecular_id]][["gwas_qced"]][[study]][, c("variant_id", "z")]), ]
+        gwas_data_sumstats <- gwas_data_sumstats[gwas_data_sumstats$variant_id %in% unique(find_data(mol_res[["variant_names"]], c(2, study))), , drop = FALSE]
+        mol_res[["gwas_qced"]][[study]] <- rbind(mol_res[["gwas_qced"]][[study]], gwas_data_sumstats)
+        gwas_qced <- mol_res[["gwas_qced"]][[study]]
+        mol_res[["gwas_qced"]][[study]] <- gwas_qced[!duplicated(gwas_qced[, c("variant_id", "z")]), ]
       }
     }
     twas_weights_data[[molecular_id]] <- NULL
     # extract LD matrix for variants intersect with gwas and twas weights at molecular_id level
-    all_molecular_variants <- unique(find_data(results[[molecular_id]][["gwas_qced"]], c(2, "variant_id")))
+    all_molecular_variants <- unique(find_data(mol_res[["gwas_qced"]], c(2, "variant_id")))
     if (is.null(all_molecular_variants)) {
       results[[molecular_id]] <- NULL
     } else {
       # All variant IDs are now in canonical chr-prefix format
       var_indx <- match(all_molecular_variants, LD_list$LD_variants)
-      results[[molecular_id]][["LD"]] <- as.matrix(LD_list$LD_matrix[var_indx, var_indx])
+      mol_res[["LD"]] <- as.matrix(LD_list$LD_matrix[var_indx, var_indx])
+      results[[molecular_id]] <- mol_res
     }
   }
   # return results
@@ -250,9 +246,9 @@ harmonize_gwas <- function(gwas_file, query_region, ld_variants, col_to_flip=NUL
     }
     if (colnames(gwas_data_sumstats)[1] == "#chrom") colnames(gwas_data_sumstats)[1] <- "chrom" # colname update for tabix
     
-    # Check if sumstats has z-scores or (beta and se)  
-    if (!is.null(gwas_data_sumstats$z)) {  
-      gwas_data_sumstats$z <- gwas_data_sumstats$z  
+    # Check if sumstats has z-scores or (beta and se)
+    if (!is.null(gwas_data_sumstats$z)) {
+      # z-scores already present, nothing to do
     } else if (!is.null(gwas_data_sumstats$beta) && !is.null(gwas_data_sumstats$se)) {  
       gwas_data_sumstats$z <- gwas_data_sumstats$beta / gwas_data_sumstats$se  
     } else {  
@@ -294,47 +290,46 @@ twas_pipeline <- function(twas_weights_data,
                           comment_string="#") {
   # internal function to format TWAS output
   format_twas_data <- function(post_qc_twas_data, twas_table) {
-    weights_list <- do.call(c, lapply(names(post_qc_twas_data), function(molecular_id) {
-      contexts <- names(post_qc_twas_data[[molecular_id]][["weights_qced"]])
-      chrom <- post_qc_twas_data[[molecular_id]][["chrom"]]
-      do.call(c, lapply(contexts, function(context) {
-        weight <- list()
-        data_type <- post_qc_twas_data[[molecular_id]][["data_type"]][[context]]
-        if (!is.null(post_qc_twas_data[[molecular_id]][["model_selection"]]) &&
-          is.list(post_qc_twas_data[[molecular_id]][["model_selection"]]) &&
-          length(post_qc_twas_data[[molecular_id]][["model_selection"]]) > 0) {
-          is_imputable <- post_qc_twas_data[[molecular_id]][["model_selection"]][[context]]$is_imputable
-          if (isTRUE(is_imputable)) {
-            model_selected <- post_qc_twas_data[[molecular_id]][["model_selection"]][[context]]$selected_model
-          } else {
-            model_selected <- NA
-          }
+    weights_list <- purrr::map(names(post_qc_twas_data), function(molecular_id) {
+      mol <- post_qc_twas_data[[molecular_id]]
+      contexts <- names(mol[["weights_qced"]])
+      mol_chrom <- mol[["chrom"]]
+      model_sel <- mol[["model_selection"]]
+
+      purrr::map(contexts, function(context) {
+        data_type <- mol[["data_type"]][[context]]
+        if (!is.null(model_sel) && is.list(model_sel) && length(model_sel) > 0) {
+          is_imputable <- model_sel[[context]]$is_imputable
+          model_selected <- if (isTRUE(is_imputable)) model_sel[[context]]$selected_model else NA
         } else {
           model_selected <- NA
           is_imputable <- NA
         }
-        postqc_scaled_weight <- list()
-        gwas_studies <- names(post_qc_twas_data[[molecular_id]][["weights_qced"]][[context]]) # context-level gwas-studies
-        
-        if (!is.null(model_selected) & isTRUE(is_imputable)) {
-          # TWAS
-          for (study in gwas_studies) {
-            postqc_scaled_weight[[study]] <- post_qc_twas_data[[molecular_id]][["weights_qced"]][[context]][[study]][["scaled_weights"]][, paste0(model_selected, "_weights"), drop = FALSE]
-            colnames(postqc_scaled_weight[[study]]) <- "weight"
-            # variant IDs are in canonical chr-prefix format from allele_qc
-            context_variants <- rownames(post_qc_twas_data[[molecular_id]][["weights_qced"]][[context]][[study]][["scaled_weights"]])
-            context_range <- as.integer(sapply(context_variants, function(variant) strsplit(variant, "\\:")[[1]][2]))
-            weight[[paste0(molecular_id, "|", data_type, "_", context)]][[study]] <- list(
-              chrom = chrom, p0 = min(context_range), p1 = max(context_range),
-              wgt = postqc_scaled_weight[[study]], molecular_id = molecular_id, weight_name = paste0(data_type, "_", context), type = data_type,
-              context = context, n_wgt = length(context_variants)
-            )
-          }
-        }
-        return(weight)
-      }))
-    }))
-    weights <- weights_list[!sapply(weights_list, is.null)]
+        if (is.null(model_selected) || !isTRUE(is_imputable)) return(NULL)
+
+        gwas_studies <- names(mol[["weights_qced"]][[context]])
+        weight_key <- paste0(molecular_id, "|", data_type, "_", context)
+        study_entries <- purrr::map(gwas_studies, function(study) {
+          ctx_weights <- mol[["weights_qced"]][[context]][[study]]
+          scaled_wgt <- ctx_weights[["scaled_weights"]][, paste0(model_selected, "_weights"), drop = FALSE]
+          colnames(scaled_wgt) <- "weight"
+          context_variants <- rownames(ctx_weights[["scaled_weights"]])
+          context_range <- parse_variant_id(context_variants)$pos
+          entry <- list(list(
+            chrom = mol_chrom, p0 = min(context_range), p1 = max(context_range),
+            wgt = scaled_wgt, molecular_id = molecular_id,
+            weight_name = paste0(data_type, "_", context), type = data_type,
+            context = context, n_wgt = length(context_variants)
+          ))
+          names(entry) <- study
+          result <- list(entry)
+          names(result) <- weight_key
+          result
+        }) %>% purrr::list_flatten()
+        study_entries
+      }) %>% purrr::compact() %>% purrr::list_flatten()
+    }) %>% purrr::list_flatten()
+    weights <- purrr::compact(weights_list)
     # Optional susie_weights_intermediate_qced processing
     if ("susie_weights_intermediate_qced" %in% names(post_qc_twas_data[[1]])) {
       susie_weights_intermediate_qced <- setNames(lapply(
diff --git a/R/twas_weights.R b/R/twas_weights.R
index ca59bab3..c2bead97 100644
--- a/R/twas_weights.R
+++ b/R/twas_weights.R
@@ -1,3 +1,41 @@
+# Identify non-zero-variance columns of X. Returns a logical vector.
+#' @importFrom matrixStats colSds
+#' @noRd
+.nonzero_var_columns <- function(X) {
+  sds <- matrixStats::colSds(X, na.rm = TRUE)
+  !is.na(sds) & sds != 0
+}
+
+# Embed a smaller weights matrix into a full-sized zero matrix matching X and Y dimensions.
+# @param weights_matrix The fitted weights (nrow = number of valid columns).
+# @param valid_columns Logical or character vector identifying which columns of X were used.
+# @param X_colnames Column names of the original X.
+# @param Y_colnames Column names of Y.
+# @noRd
+.embed_weights <- function(weights_matrix, valid_columns, n_cols_X, n_cols_Y,
+                           X_colnames = NULL, Y_colnames = NULL) {
+  full <- matrix(0, nrow = n_cols_X, ncol = n_cols_Y)
+  if (!is.null(X_colnames)) rownames(full) <- X_colnames
+  if (!is.null(Y_colnames)) colnames(full) <- Y_colnames
+  full[valid_columns, ] <- weights_matrix
+  full
+}
+
+# Filter weight methods that produced all-zero weights from CV.
+# Returns filtered weight_methods list and warns about removed methods.
+# @noRd
+.filter_zero_weight_methods <- function(weight_methods, twas_weights_res) {
+  is_all_zero <- vapply(twas_weights_res, function(w) all(w == 0, na.rm = TRUE), logical(1))
+  removed <- names(weight_methods)[is_all_zero]
+  if (length(removed) > 0) {
+    warning(sprintf(
+      "Methods %s are removed from CV because all their weights are zeros.",
+      paste(removed, collapse = ", ")
+    ))
+  }
+  weight_methods[!is_all_zero]
+}
+
 #' Cross-Validation for weights selection in Transcriptome-Wide Association Studies (TWAS)
 #'
 #' Performs cross-validation for TWAS, supporting both univariate and multivariate methods.
@@ -171,8 +209,8 @@ twas_weights_cv <- function(X, Y, fold = NULL, sample_partitions = NULL, weight_
       X_test <- dat_split$Xtest
       Y_test <- dat_split$Ytest
 
-      # Remove columns with zero standard error
-      valid_columns <- apply(X_train, 2, function(col) sd(col) != 0)
+      # Remove columns with zero variance
+      valid_columns <- .nonzero_var_columns(X_train)
       X_train <- X_train[, valid_columns, drop = FALSE]
       X_train <- filter_X_with_Y(X_train, Y_train, missing_rate_thresh = 1, maf_thresh = NULL)
       valid_columns <- colnames(X_train)
@@ -193,11 +231,7 @@ twas_weights_cv <- function(X, Y, fold = NULL, sample_partitions = NULL, weight_
           }
           weights_matrix <- do.call(method, c(list(X = X_train, Y = Y_train), args))
           rownames(weights_matrix) <- colnames(X_train)
-          # Adjust the weights matrix to include zeros for invalid columns
-          full_weights_matrix <- matrix(0, nrow = ncol(X), ncol = ncol(Y))
-          rownames(full_weights_matrix) <- colnames(X)
-          colnames(full_weights_matrix) <- colnames(Y)
-          full_weights_matrix[valid_columns, ] <- weights_matrix[valid_columns, ]
+          full_weights_matrix <- .embed_weights(weights_matrix[valid_columns, , drop = FALSE], valid_columns, ncol(X), ncol(Y), colnames(X), colnames(Y))
           Y_pred <- X_test %*% full_weights_matrix
           rownames(Y_pred) <- rownames(X_test)
           return(Y_pred)
@@ -333,8 +367,8 @@ twas_weights <- function(X, Y, weight_methods, num_threads = 1) {
     multivariate_weight_methods <- c("mrmash_weights", "mvsusie_weights")
     args <- weight_methods[[method_name]]
 
-    # Remove columns with zero standard error
-    valid_columns <- apply(X, 2, function(col) sd(col) != 0)
+    # Remove columns with zero variance
+    valid_columns <- .nonzero_var_columns(X)
     X_filtered <- as.matrix(X[, valid_columns, drop = FALSE])
 
     if (method_name %in% multivariate_weight_methods) {
@@ -353,13 +387,7 @@ twas_weights <- function(X, Y, weight_methods, num_threads = 1) {
       }
     }
 
-    # Adjust the weights matrix to include zeros for invalid columns
-    full_weights_matrix <- matrix(0, nrow = ncol(X), ncol = ncol(Y))
-    rownames(full_weights_matrix) <- colnames(X)
-    colnames(full_weights_matrix) <- colnames(Y)
-    full_weights_matrix[valid_columns, ] <- weights_matrix
-
-    return(full_weights_matrix)
+    return(.embed_weights(weights_matrix, valid_columns, ncol(X), ncol(Y), colnames(X), colnames(Y)))
   }
 
   if (num_cores >= 2) {
@@ -471,21 +499,7 @@ twas_weights_pipeline <- function(X,
       weight_methods$susie_weights <- list(refine = FALSE, init_L = max_L, max_L = max_L)
     }
     if (is.null(cv_weight_methods)) {
-      # Filter function to exclude methods with all zero weights and track removed methods
-      is_all_zero <- sapply(res$twas_weights, function(weights) all(weights == 0, na.rm = TRUE))
-
-      # Identify removed methods
-      removed_methods <- names(weight_methods)[is_all_zero]
-
-      # Issue a warning if any methods have been removed
-      if (length(removed_methods) > 0) {
-        warning(sprintf(
-          "Methods %s are removed from CV because all their weights are zeros.",
-          paste(removed_methods, collapse = ", ")
-        ))
-      }
-      # Extract the filtered methods retaining their specific configurations
-      cv_weight_methods <- names(weight_methods)[!is_all_zero]
+      cv_weight_methods <- names(.filter_zero_weight_methods(weight_methods, res$twas_weights))
     }
 
     variants_for_cv <- c()
@@ -553,19 +567,17 @@ twas_multivariate_weights_pipeline <- function(
     cv_threads = 1,
     verbose = FALSE) {
   copy_twas_results <- function(context_names, variant_names, twas_weight, twas_predictions) {
-    res <- setNames(vector("list", length(context_names)), context_names)
-    for (i in names(res)) {
-      if (i %in% colnames(twas_weights_res[[1]])) {
-        res[[i]]$twas_weights <- lapply(twas_weight, function(wgts) {
-          wgts[, i]
-        })
-        res[[i]]$twas_predictions <- lapply(twas_predictions, function(pred) {
-          pred[, i]
-        })
-        res[[i]]$variant_names <- variant_names
+    setNames(lapply(context_names, function(ctx) {
+      if (ctx %in% colnames(twas_weight[[1]])) {
+        list(
+          twas_weights = lapply(twas_weight, function(wgts) wgts[, ctx]),
+          twas_predictions = lapply(twas_predictions, function(pred) pred[, ctx]),
+          variant_names = variant_names
+        )
+      } else {
+        NULL
       }
-    }
-    return(res)
+    }), context_names)
   }
 
   copy_twas_cv_results <- function(twas_result, twas_cv_result) {
@@ -630,22 +642,7 @@ twas_multivariate_weights_pipeline <- function(
       )
     )
 
-    # Filter function to exclude methods with all zero weights and track removed methods
-    is_all_zero <- sapply(twas_weights_res, function(weights) all(weights == 0, na.rm = TRUE))
-
-    # Identify removed methods
-    removed_methods <- names(weight_methods)[is_all_zero]
-
-    # Issue a warning if any methods have been removed
-    if (length(removed_methods) > 0) {
-      warning(sprintf(
-        "Methods %s are removed from CV because all their weights are zeros.",
-        paste(removed_methods, collapse = ", ")
-      ))
-    }
-
-    # Extract the filtered methods retaining their specific configurations
-    weight_methods <- weight_methods[!is_all_zero]
+    weight_methods <- .filter_zero_weight_methods(weight_methods, twas_weights_res)
 
     variants_for_cv <- c()
     if (max_cv_variants <= 0) max_cv_variants <- Inf
diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 4a56da87..958014dc 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -312,24 +312,40 @@ rss_analysis_pipeline <- function(
       res$outlier_number <- qc_results$outlier_number
     }
   }
-  if (impute & !is.null(qc_method)) {
-    method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
-  } else if (!impute & !is.null(qc_method)) {
-    method_name <- paste0(finemapping_method, "_", toupper(qc_method))
-  } else {
-    method_name <- paste0(finemapping_method, "_", "NO_QC")
+  .make_method_name <- function(method, qc_method, impute) {
+    suffix <- if (!is.null(qc_method) && impute) {
+      paste0(toupper(qc_method), "_RAISS_imputed")
+    } else if (!is.null(qc_method)) {
+      toupper(qc_method)
+    } else {
+      "NO_QC"
+    }
+    paste0(method, "_", suffix)
+  }
+
+  .run_reanalysis <- function(sumstats, LD_mat, method, finemapping_opts, pri_coverage, sec_coverage) {
+    susie_rss_pipeline(sumstats, LD_mat,
+      n = n, var_y = var_y,
+      L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
+      analysis_method = method,
+      coverage = pri_coverage,
+      secondary_coverage = sec_coverage,
+      signal_cutoff = finemapping_opts$signal_cutoff,
+      min_abs_corr = finemapping_opts$min_abs_corr
+    )
   }
+
+  method_name <- .make_method_name(finemapping_method, qc_method, impute)
   result_list <- list()
   result_list[[method_name]] <- res
   result_list[["rss_data_analyzed"]] <- sumstats
-  
+
   block_cs_metrics <- list()
   if (diagnostics) {
-    if (length(res) > 0) { 
+    if (length(res) > 0) {
         bvsr_res = get_susie_result(res)
         bvsr_cs_num = if(!is.null(bvsr_res)) length(bvsr_res$sets$cs) else NULL
         if (isTRUE(bvsr_cs_num > 0)) { # have CS
-            # Get the names of the credible sets
             cs_names_bvsr = names(bvsr_res$sets$cs)
             block_cs_metrics = extract_cs_info(con_data = res, cs_names = cs_names_bvsr, top_loci_table = res$top_loci)
         } else { # no CS
@@ -350,57 +366,20 @@ rss_analysis_pipeline <- function(
             max(cs_corr_max, na.rm = TRUE)
           })
         if (any(block_cs_metrics$p_value > 1e-4 | block_cs_metrics$max_cs_corr_study_block > 0.5)) {
-          finemapping_method <- "bayesian_conditional_regression"
-          pri_coverage <- finemapping_opts$coverage[1]
-          sec_coverage <- if (length(finemapping_opts$coverage) > 1) finemapping_opts$coverage[-1] else NULL
-          bcr <- susie_rss_pipeline(sumstats, LD_mat,
-            n = n, var_y = var_y,
-            L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
-            analysis_method = finemapping_method,
-            coverage = pri_coverage,
-            secondary_coverage = sec_coverage,
-            signal_cutoff = finemapping_opts$signal_cutoff,
-            min_abs_corr = finemapping_opts$min_abs_corr
-          )
+          bcr <- .run_reanalysis(sumstats, LD_mat, "bayesian_conditional_regression",
+            finemapping_opts, pri_coverage, sec_coverage)
           if (!is.null(qc_method)) {
             bcr$outlier_number <- qc_results$outlier_number
           }
-          if (impute & !is.null(qc_method)) {
-              method_name <- paste0(finemapping_method, "_", toupper(qc_method), "_RAISS_imputed")
-          } else if (!impute & !is.null(qc_method)) {
-              method_name <- paste0(finemapping_method, "_", toupper(qc_method))
-          } else {
-              method_name <- paste0(finemapping_method, "_", "NO_QC")
-          }
-          result_list[[method_name]] <- bcr
-          finemapping_method <- "single_effect"
-          sumstats <- preprocess_results$sumstats
-          LD_mat <- preprocess_results$LD_mat
-          ser <- susie_rss_pipeline(sumstats, LD_mat,
-            n = n, var_y = var_y,
-            L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
-            analysis_method = finemapping_method,
-            coverage = pri_coverage,
-            secondary_coverage = sec_coverage,
-            signal_cutoff = finemapping_opts$signal_cutoff,
-            min_abs_corr = finemapping_opts$min_abs_corr
-          )
-          result_list[[paste0(finemapping_method, "_NO_QC")]] <- ser
+          result_list[[.make_method_name("bayesian_conditional_regression", qc_method, impute)]] <- bcr
+          ser <- .run_reanalysis(preprocess_results$sumstats, preprocess_results$LD_mat,
+            "single_effect", finemapping_opts, pri_coverage, sec_coverage)
+          result_list[["single_effect_NO_QC"]] <- ser
         }
       } else { # CS = 1 or NA
-        finemapping_method <- "single_effect"
-        sumstats <- preprocess_results$sumstats
-        LD_mat <- preprocess_results$LD_mat
-        ser <- susie_rss_pipeline(sumstats, LD_mat,
-          n = n, var_y = var_y,
-          L = finemapping_opts$init_L, max_L = finemapping_opts$max_L, l_step = finemapping_opts$l_step,
-          analysis_method = finemapping_method,
-          coverage = pri_coverage,
-          secondary_coverage = sec_coverage,
-          signal_cutoff = finemapping_opts$signal_cutoff,
-          min_abs_corr = finemapping_opts$min_abs_corr
-        )
-        result_list[[paste0(finemapping_method, "_NO_QC")]] <- ser
+        ser <- .run_reanalysis(preprocess_results$sumstats, preprocess_results$LD_mat,
+          "single_effect", finemapping_opts, pri_coverage, sec_coverage)
+        result_list[["single_effect_NO_QC"]] <- ser
       }
     result_list[["diagnostics"]] <- block_cs_metrics
     }
diff --git a/man/corr_filter.Rd b/man/corr_filter.Rd
deleted file mode 100644
index d2f83074..00000000
--- a/man/corr_filter.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quantile_twas_weight.R
-\name{corr_filter}
-\alias{corr_filter}
-\title{Filter Highly Correlated SNPs}
-\usage{
-corr_filter(X, cor_thres = 0.8)
-}
-\arguments{
-\item{X}{Matrix of genotypes}
-
-\item{cor_thres}{Correlation threshold for filtering}
-}
-\value{
-A list containing filtered X matrix and filter IDs
-}
-\description{
-Filter Highly Correlated SNPs
-}
diff --git a/man/manhattan_plot.Rd b/man/manhattan_plot.Rd
deleted file mode 100644
index 36d7ee0e..00000000
--- a/man/manhattan_plot.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot.R
-\name{manhattan_plot}
-\alias{manhattan_plot}
-\title{Manhattan plot}
-\usage{
-manhattan_plot(twas_results, gene_data)
-}
-\arguments{
-\item{twas_results}{a data frame of twas results with columns "gene_name", "gene_id","chr","susie_pval","lasso_pval","enet_pval" and "mrash_pval", where twas results are the output of the twas_scan function. "gene_name" is the ensemble ID and "gene_id" is the corresponding gene name,
-"susie_pval", "lasso_pval","enet_pval" and "mrash_pval" are the pvalues of susie and other three competing twas method.}
-
-\item{gene_data}{a data frame with columns "chr", "start", "end", and "ID", "chr" is the chromosome of gene, "start" and "end" are the position, "ID" is the gene name.}
-}
-\value{
-plot object
-}
-\description{
-Manhattan plot
-}
diff --git a/man/multigene_udr.Rd b/man/multigene_udr.Rd
deleted file mode 100644
index f5179606..00000000
--- a/man/multigene_udr.Rd
+++ /dev/null
@@ -1,37 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/multigene_udr.R
-\name{multigene_udr}
-\alias{multigene_udr}
-\title{Perform udr Analysis on Multigene Data}
-\usage{
-multigene_udr(
-  combined_susie_list,
-  coverage,
-  independent_variant_list,
-  n_random,
-  n_null,
-  seed,
-  exclude_condition = NULL
-)
-}
-\arguments{
-\item{combined_susie_list}{A list containing the combined SuSiE and summary statistics results.}
-
-\item{coverage}{A numeric vector representing the coverage values.}
-
-\item{independent_variant_list}{A list of independent variants to be used as a filter.}
-
-\item{n_random}{An integer specifying the number of random samples to generate.}
-
-\item{n_null}{An integer specifying the number of null samples to generate.}
-
-\item{seed}{An integer specifying the seed for random number generation.}
-
-\item{exclude_condition}{A character vector specifying conditions to be excluded from the analysis. Defaults to NULL.}
-}
-\value{
-A list containing the results of the prior, or NULL if conditions are not met.
-}
-\description{
-Perform udr Analysis on Multigene Data
-}
diff --git a/man/twas_pipeline.Rd b/man/twas_pipeline.Rd
index 69647f8b..c0a9a62f 100644
--- a/man/twas_pipeline.Rd
+++ b/man/twas_pipeline.Rd
@@ -17,7 +17,6 @@ twas_pipeline(
   rsq_pval_option = c("pval", "adj_rsq_pval"),
   mr_pval_cutoff = 0.05,
   mr_coverage_column = "cs_coverage_0.95",
-  quantile_twas = FALSE,
   output_twas_data = FALSE,
   event_filters = NULL,
   column_file_path = NULL,
diff --git a/man/venn.Rd b/man/venn.Rd
deleted file mode 100644
index ba9416c2..00000000
--- a/man/venn.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/plot.R
-\name{venn}
-\alias{venn}
-\title{Venn Diagram}
-\usage{
-venn(data)
-}
-\arguments{
-\item{data}{a list with the twas siginificant gene_id results of four method "SuSiE","Lasso","Enet" and "MR.ASH"}
-}
-\value{
-plot object
-}
-\description{
-Venn Diagram
-}
diff --git a/pixi.toml b/pixi.toml
index b78eff33..32db2954 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -62,6 +62,7 @@ r45 = {features = ["r45"]}
 "r-gbj" = "*"
 "r-glmnet" = "*"
 "r-harmonicmeanp" = "*"
+"r-igraph" = "*"
 "r-l0learn" = "*"
 "r-magrittr" = "*"
 "r-mashr" = "*"
@@ -72,8 +73,6 @@ r45 = {features = ["r45"]}
 "r-pgenlibr" = "*"
 "r-purrr" = "*"
 "r-qgg" = "*"
-"r-quadprog" = "*"
-"r-quantreg" = "*"
 "r-rcpp" = "*"
 "r-rcpparmadillo" = "*"
 "r-rcppdpr" = "*"
@@ -86,4 +85,3 @@ r45 = {features = ["r45"]}
 "r-tidyr" = "*"
 "r-vctrs" = "*"
 "r-vroom" = "*"
-"r-xicor" = "*"
diff --git a/tests/testthat/test_LD.R b/tests/testthat/test_LD.R
index d45b23a6..24a72aed 100644
--- a/tests/testthat/test_LD.R
+++ b/tests/testthat/test_LD.R
@@ -823,3 +823,108 @@ test_that("can_merge checks chromosome and size", {
   b3 <- data.frame(chrom = "2", size = 100)
   expect_false(pecotmr:::can_merge(b1, b3, max_size = 500))
 })
+
+# ===========================================================================
+# check_ld (regularize_ld)
+# ===========================================================================
+
+test_that("check_ld reports PD for identity matrix", {
+  R <- diag(5)
+  result <- check_ld(R)
+  expect_true(result$is_pd)
+  expect_true(result$is_psd)
+  expect_equal(result$method_applied, "none")
+  expect_equal(result$R, R)
+  expect_equal(result$condition_number, 1)
+})
+
+test_that("check_ld reports PD for well-conditioned correlation matrix", {
+  R <- matrix(0.3, 4, 4)
+  diag(R) <- 1
+  result <- check_ld(R)
+  expect_true(result$is_pd)
+  expect_true(result$is_psd)
+  expect_equal(result$n_negative, 0)
+  expect_equal(result$method_applied, "none")
+})
+
+test_that("check_ld detects non-PSD matrix", {
+  R <- matrix(0.9, 3, 3)
+  diag(R) <- 1
+  R[1, 3] <- R[3, 1] <- -0.5
+  result <- check_ld(R)
+  expect_false(result$is_psd)
+  expect_true(result$n_negative > 0)
+  expect_true(result$min_eigenvalue < 0)
+  expect_equal(result$method_applied, "none")
+})
+
+test_that("check_ld shrink method modifies non-PD matrix", {
+  R <- matrix(0.9, 3, 3)
+  diag(R) <- 1
+  R[1, 3] <- R[3, 1] <- -0.5
+  result <- check_ld(R, method = "shrink")
+  expect_equal(result$method_applied, "shrink")
+  expect_false(identical(result$R, R))
+  # With strong enough shrinkage, result should be PD
+  result2 <- check_ld(R, method = "shrink", shrinkage = 0.5)
+  eig <- eigen(result2$R, symmetric = TRUE)
+  expect_true(all(eig$values > 0))
+})
+
+test_that("check_ld eigenfix method improves non-PD matrix", {
+  R <- matrix(0.9, 3, 3)
+  diag(R) <- 1
+  R[1, 3] <- R[3, 1] <- -0.5
+  original_min_eval <- min(eigen(R, symmetric = TRUE)$values)
+  result <- check_ld(R, method = "eigenfix")
+  expect_equal(result$method_applied, "eigenfix")
+  # Eigenfix should improve the minimum eigenvalue
+  fixed_min_eval <- min(eigen(result$R, symmetric = TRUE)$values)
+  expect_true(fixed_min_eval > original_min_eval)
+  # Unit diagonal preserved
+  expect_equal(diag(result$R), rep(1, 3))
+  # Symmetry preserved
+  expect_equal(result$R, t(result$R))
+})
+
+test_that("check_ld shrink does nothing when matrix is already PD", {
+  R <- diag(3)
+  result <- check_ld(R, method = "shrink")
+  expect_equal(result$method_applied, "none")
+  expect_equal(result$R, R)
+})
+
+test_that("check_ld eigenfix does nothing when matrix is already PD", {
+  R <- diag(3)
+  result <- check_ld(R, method = "eigenfix")
+  expect_equal(result$method_applied, "none")
+  expect_equal(result$R, R)
+})
+
+# ===========================================================================
+# extract_block_matrices: out-of-range blocks
+# ===========================================================================
+
+test_that("extract_block_matrices warns and skips out-of-range blocks", {
+  mat <- diag(4)
+  vnames <- paste0("v", 1:4)
+  rownames(mat) <- colnames(mat) <- vnames
+  block_metadata <- data.frame(
+    block_id = c(1, 2),
+    start_idx = c(1, 10),
+    end_idx = c(2, 12),
+    chrom = c("1", "1"),
+    block_start = c(100, 500),
+    block_end = c(200, 600),
+    size = c(2, 3),
+    stringsAsFactors = FALSE
+  )
+  expect_warning(
+    result <- pecotmr:::extract_block_matrices(mat, block_metadata, vnames),
+    "outside the range"
+  )
+  valid_blocks <- result$ld_matrices[!sapply(result$ld_matrices, is.null)]
+  expect_equal(length(valid_blocks), 1)
+  expect_equal(nrow(valid_blocks[[1]]), 2)
+})
diff --git a/tests/testthat/test_file_utils.R b/tests/testthat/test_file_utils.R
index 8a16fcc7..fc2ea0af 100644
--- a/tests/testthat/test_file_utils.R
+++ b/tests/testthat/test_file_utils.R
@@ -1537,3 +1537,63 @@ test_that("invert_minmax_scaling errors on mismatched lengths", {
   expect_error(invert_minmax_scaling(X, c(0, 0, 0), c(1, 1, 1)),
                "Length of u_min")
 })
+
+# ===========================================================================
+# batch_load_twas_weights
+# ===========================================================================
+
+test_that("batch_load_twas_weights returns empty list for empty input", {
+  expect_message(
+    result <- batch_load_twas_weights(list(), data.frame(region_id = character(), TSS = integer())),
+    "No genes"
+  )
+  expect_equal(length(result), 0)
+})
+
+test_that("batch_load_twas_weights returns single batch when total memory fits", {
+  twas <- list(
+    gene1 = list(a = 1:10),
+    gene2 = list(a = 1:10)
+  )
+  meta <- data.frame(region_id = c("gene1", "gene2"), TSS = c(100, 200))
+  expect_message(
+    result <- batch_load_twas_weights(twas, meta, max_memory_per_batch = 1000),
+    "No need to split"
+  )
+  expect_equal(length(result), 1)
+  expect_true(all(c("gene1", "gene2") %in% names(result[[1]])))
+})
+
+test_that("batch_load_twas_weights splits into multiple batches", {
+  # Create data large enough to require splitting
+  twas <- list(
+    gene1 = rnorm(1e5),
+    gene2 = rnorm(1e5),
+    gene3 = rnorm(1e5)
+  )
+  # Each gene is ~0.76 MB
+  gene_size_mb <- as.numeric(object.size(twas[[1]])) / (1024^2)
+  # Set limit so at most 2 genes fit per batch
+  max_mb <- gene_size_mb * 1.5
+  meta <- data.frame(region_id = c("gene1", "gene2", "gene3"), TSS = c(100, 200, 300))
+  result <- batch_load_twas_weights(twas, meta, max_memory_per_batch = max_mb)
+  expect_true(length(result) >= 2)
+  # All genes should be present across batches
+  all_genes <- unlist(lapply(result, names))
+  expect_true(all(c("gene1", "gene2", "gene3") %in% all_genes))
+})
+
+test_that("batch_load_twas_weights puts oversized gene in its own batch", {
+  twas <- list(
+    gene_small = list(a = 1:10),
+    gene_big = rnorm(1e6)
+  )
+  big_size_mb <- as.numeric(object.size(twas$gene_big)) / (1024^2)
+  small_size_mb <- as.numeric(object.size(twas$gene_small)) / (1024^2)
+  # Set limit between small and big
+  max_mb <- big_size_mb * 0.5
+  meta <- data.frame(region_id = c("gene_small", "gene_big"), TSS = c(100, 200))
+  result <- batch_load_twas_weights(twas, meta, max_memory_per_batch = max_mb)
+  # Big gene should be in its own batch
+  expect_true(length(result) >= 2)
+})
diff --git a/tests/testthat/test_ld_loader.R b/tests/testthat/test_ld_loader.R
new file mode 100644
index 00000000..3576794f
--- /dev/null
+++ b/tests/testthat/test_ld_loader.R
@@ -0,0 +1,122 @@
+context("ld_loader")
+
+# ===========================================================================
+# ld_loader: input validation
+# ===========================================================================
+
+test_that("ld_loader errors when no source is provided", {
+  expect_error(ld_loader(), "Provide exactly one")
+})
+
+test_that("ld_loader errors when multiple sources are provided", {
+  R <- list(matrix(1, 2, 2))
+  X <- list(matrix(1, 3, 2))
+  expect_error(ld_loader(R_list = R, X_list = X), "Provide exactly one")
+})
+
+# ===========================================================================
+# ld_loader: R_list branch
+# ===========================================================================
+
+test_that("ld_loader with R_list returns a function", {
+  R <- list(matrix(c(1, 0.5, 0.5, 1), 2, 2))
+  loader <- ld_loader(R_list = R)
+  expect_type(loader, "closure")
+})
+
+test_that("ld_loader R_list returns correct matrix", {
+  R1 <- matrix(c(1, 0.3, 0.3, 1), 2, 2)
+  R2 <- matrix(c(1, 0.8, 0.8, 1), 2, 2)
+  loader <- ld_loader(R_list = list(R1, R2))
+  expect_equal(loader(1), R1)
+  expect_equal(loader(2), R2)
+})
+
+test_that("ld_loader R_list with max_variants downsamples", {
+  set.seed(42)
+  R <- matrix(0.1, 10, 10)
+  diag(R) <- 1
+  loader <- ld_loader(R_list = list(R), max_variants = 5)
+  result <- loader(1)
+  expect_equal(nrow(result), 5)
+  expect_equal(ncol(result), 5)
+})
+
+test_that("ld_loader R_list without max_variants returns full matrix", {
+  R <- matrix(0.1, 10, 10)
+  diag(R) <- 1
+  loader <- ld_loader(R_list = list(R))
+  result <- loader(1)
+  expect_equal(nrow(result), 10)
+})
+
+test_that("ld_loader R_list max_variants larger than matrix returns full matrix", {
+  R <- matrix(0.1, 3, 3)
+  diag(R) <- 1
+  loader <- ld_loader(R_list = list(R), max_variants = 100)
+  result <- loader(1)
+  expect_equal(nrow(result), 3)
+})
+
+# ===========================================================================
+# ld_loader: X_list branch
+# ===========================================================================
+
+test_that("ld_loader with X_list returns a function", {
+  X <- list(matrix(rnorm(30), 10, 3))
+  loader <- ld_loader(X_list = X)
+  expect_type(loader, "closure")
+})
+
+test_that("ld_loader X_list returns correct matrix", {
+  X1 <- matrix(1:12, 4, 3)
+  X2 <- matrix(1:8, 4, 2)
+  loader <- ld_loader(X_list = list(X1, X2))
+  expect_equal(loader(1), X1)
+  expect_equal(loader(2), X2)
+})
+
+test_that("ld_loader X_list with max_variants downsamples columns", {
+  set.seed(42)
+  X <- matrix(rnorm(50), 10, 5)
+  loader <- ld_loader(X_list = list(X), max_variants = 3)
+  result <- loader(1)
+  expect_equal(nrow(result), 10)
+  expect_equal(ncol(result), 3)
+})
+
+test_that("ld_loader X_list max_variants larger than ncol returns full matrix", {
+  X <- matrix(rnorm(12), 4, 3)
+  loader <- ld_loader(X_list = list(X), max_variants = 100)
+  result <- loader(1)
+  expect_equal(ncol(result), 3)
+})
+
+# ===========================================================================
+# ld_loader: ld_meta_path branch validation
+# ===========================================================================
+
+test_that("ld_loader with ld_meta_path but no regions errors", {
+  expect_error(
+    ld_loader(ld_meta_path = "/some/path"),
+    "regions.*required"
+  )
+})
+
+# ===========================================================================
+# ld_loader: LD_info branch validation
+# ===========================================================================
+
+test_that("ld_loader with LD_info errors when not a data.frame", {
+  expect_error(
+    ld_loader(LD_info = "not_a_df"),
+    "LD_info must be a data.frame"
+  )
+})
+
+test_that("ld_loader with LD_info errors when missing LD_file column", {
+  expect_error(
+    ld_loader(LD_info = data.frame(col1 = "a")),
+    "LD_info must be a data.frame with column 'LD_file'"
+  )
+})
diff --git a/tests/testthat/test_mash_wrapper.R b/tests/testthat/test_mash_wrapper.R
index 06ba5f1c..9f3139bd 100644
--- a/tests/testthat/test_mash_wrapper.R
+++ b/tests/testthat/test_mash_wrapper.R
@@ -882,41 +882,38 @@ test_that("mash_rand_null_sample caps random sample at available rows", {
 test_that("mash_pipeline uses residual_correlation when null data is empty", {
   skip_if_not_installed("mashr")
   skip_if_not_installed("flashier")
-  skip_if_not_installed("udr")
 
+  n_cond <- 3
+  n_strong <- 15
   mock_input <- list(
     null.b = numeric(0),
     null.s = numeric(0),
-    random.b = matrix(rnorm(6), 3, 2),
-    random.s = matrix(abs(rnorm(6)), 3, 2),
-    strong.b = matrix(rnorm(20), 10, 2),
-    strong.s = matrix(abs(rnorm(20)), 10, 2),
-    strong.z = matrix(rnorm(20), 10, 2)
+    random.b = matrix(rnorm(n_strong * n_cond), n_strong, n_cond),
+    random.s = matrix(abs(rnorm(n_strong * n_cond)) + 0.1, n_strong, n_cond),
+    strong.b = matrix(rnorm(n_strong * n_cond), n_strong, n_cond),
+    strong.s = matrix(abs(rnorm(n_strong * n_cond)) + 0.1, n_strong, n_cond)
   )
-  custom_vhat <- matrix(c(1, 0.3, 0.3, 1), 2, 2)
+  custom_vhat <- matrix(c(1, 0.3, 0.1, 0.3, 1, 0.2, 0.1, 0.2, 1), 3, 3)
 
   result <- mash_pipeline(mock_input, alpha = 1, residual_correlation = custom_vhat)
   expect_true(is.list(result))
-  # Verify it did not produce an error marker
-
   expect_null(result$error)
 })
 
 test_that("mash_pipeline does not error about mashr when mashr is available", {
   skip_if_not_installed("mashr")
   skip_if_not_installed("flashier")
-  skip_if_not_installed("udr")
 
+  n_cond <- 3
+  n_strong <- 15
   mock_input <- list(
     null.b = numeric(0),
     null.s = numeric(0),
-    random.b = matrix(rnorm(6), 3, 2),
-    random.s = matrix(abs(rnorm(6)) + 0.1, 3, 2),
-    strong.b = matrix(rnorm(20), 10, 2),
-    strong.s = matrix(abs(rnorm(20)) + 0.1, 10, 2),
-    strong.z = matrix(rnorm(20), 10, 2)
+    random.b = matrix(rnorm(n_strong * n_cond), n_strong, n_cond),
+    random.s = matrix(abs(rnorm(n_strong * n_cond)) + 0.1, n_strong, n_cond),
+    strong.b = matrix(rnorm(n_strong * n_cond), n_strong, n_cond),
+    strong.s = matrix(abs(rnorm(n_strong * n_cond)) + 0.1, n_strong, n_cond)
   )
-  # Should not error about missing mashr since it is installed
   result <- mash_pipeline(mock_input, alpha = 1)
   expect_true(is.list(result))
 })
@@ -924,7 +921,6 @@ test_that("mash_pipeline does not error about mashr when mashr is available", {
 test_that("mash_pipeline uses identity matrix when null data empty and no residual_correlation", {
   skip_if_not_installed("mashr")
   skip_if_not_installed("flashier")
-  skip_if_not_installed("udr")
 
   n_cond <- 3
   n_strong <- 15
@@ -934,13 +930,11 @@ test_that("mash_pipeline uses identity matrix when null data empty and no residu
     random.b = matrix(rnorm(n_strong * n_cond), n_strong, n_cond),
     random.s = matrix(abs(rnorm(n_strong * n_cond)) + 0.1, n_strong, n_cond),
     strong.b = matrix(rnorm(n_strong * n_cond), n_strong, n_cond),
-    strong.s = matrix(abs(rnorm(n_strong * n_cond)) + 0.1, n_strong, n_cond),
-    strong.z = matrix(rnorm(n_strong * n_cond), n_strong, n_cond)
+    strong.s = matrix(abs(rnorm(n_strong * n_cond)) + 0.1, n_strong, n_cond)
   )
 
   result <- mash_pipeline(mock_input, alpha = 1, residual_correlation = NULL)
   expect_true(is.list(result))
-  # Verify it did not produce an error marker
   expect_null(result$error)
 })
 
diff --git a/tests/testthat/test_plot.R b/tests/testthat/test_plot.R
deleted file mode 100644
index 87c0da1a..00000000
--- a/tests/testthat/test_plot.R
+++ /dev/null
@@ -1,26 +0,0 @@
-context("plot")
-
-library(testthat)
-
-# =========================================================================
-# plot.R: venn function (lines 6-8)
-# =========================================================================
-
-test_that("venn function creates a plot object without error", {
-  skip_if_not_installed("ggvenn")
-  data <- list(
-    SuSiE = c("gene1", "gene2", "gene3"),
-    Lasso = c("gene2", "gene3", "gene4"),
-    Enet = c("gene1", "gene4", "gene5"),
-    MR.ASH = c("gene3", "gene5", "gene6")
-  )
-  # Use pdf(NULL) to suppress graphics device output
-  pdf(NULL)
-  result <- tryCatch(
-    pecotmr:::venn(data),
-    error = function(e) e
-  )
-  dev.off()
-  # Check it returned something (not an error)
-  expect_false(inherits(result, "error"))
-})

From b677b8c352edf6f53fe4692c377aa78099da13a3 Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Fri, 17 Apr 2026 14:06:05 -0700
Subject: [PATCH 3/5] update recipe

---
 .github/recipe/recipe.yaml | 7 -------
 DESCRIPTION                | 2 --
 pixi.toml                  | 2 --
 3 files changed, 11 deletions(-)

diff --git a/.github/recipe/recipe.yaml b/.github/recipe/recipe.yaml
index d93c8751..cd5c3358 100644
--- a/.github/recipe/recipe.yaml
+++ b/.github/recipe/recipe.yaml
@@ -30,8 +30,6 @@ requirements:
     - bioconductor-snpstats
     - r-base
     - r-bglr
-    - r-bigsnpr
-    - r-bigstatsr
     - r-coda
     - r-coloc
     - r-colocboost
@@ -75,8 +73,6 @@ requirements:
     - bioconductor-snpstats
     - r-base
     - r-bglr
-    - r-bigsnpr
-    - r-bigstatsr
     - r-coda
     - r-coloc
     - r-colocboost
@@ -100,8 +96,6 @@ requirements:
     - r-pgenlibr
     - r-purrr
     - r-qgg
-    - r-quadprog
-    - r-quantreg
     - r-rcpp
     - r-rcpparmadillo
     - r-rcppdpr
@@ -114,7 +108,6 @@ requirements:
     - r-tidyr
     - r-vctrs
     - r-vroom
-    - r-xicor
 
 tests:
   - script:
diff --git a/DESCRIPTION b/DESCRIPTION
index b81719cf..0bac8360 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -18,8 +18,6 @@ Imports:
     IRanges,
     Rcpp,
     S4Vectors,
-    bigsnpr,
-    bigstatsr,
     coloc,
     doFuture,
     dplyr,
diff --git a/pixi.toml b/pixi.toml
index 32db2954..384c6b3c 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -48,8 +48,6 @@ r45 = {features = ["r45"]}
 "bioconductor-snpstats" = "*"
 "r-base" = "*"
 "r-bglr" = "*"
-"r-bigsnpr" = "*"
-"r-bigstatsr" = "*"
 "r-coda" = "*"
 "r-coloc" = "*"
 "r-colocboost" = "*"

From 2e9e7712865542b7d487f006274c23ec9a1e623e Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Fri, 17 Apr 2026 16:58:20 -0700
Subject: [PATCH 4/5] clean up and optimize more code

---
 NAMESPACE                                 |   3 +
 R/colocboost_pipeline.R                   | 216 ++++++++-----------
 R/encoloc.R                               | 175 +++++++--------
 R/mr.R                                    | 250 ++++++++++------------
 tests/testthat/test_colocboost_pipeline.R |  39 ++++
 tests/testthat/test_mr.R                  |  58 +++++
 6 files changed, 388 insertions(+), 353 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index faf1e899..0d964b26 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -143,7 +143,9 @@ importFrom(dplyr,group_by)
 importFrom(dplyr,if_else)
 importFrom(dplyr,inner_join)
 importFrom(dplyr,intersect)
+importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
+importFrom(dplyr,n)
 importFrom(dplyr,pull)
 importFrom(dplyr,rename)
 importFrom(dplyr,row_number)
@@ -157,6 +159,7 @@ importFrom(future,availableCores)
 importFrom(future,multicore)
 importFrom(future,plan)
 importFrom(magrittr,"%>%")
+importFrom(matrixStats,colSds)
 importFrom(matrixStats,colVars)
 importFrom(purrr,compact)
 importFrom(purrr,exec)
diff --git a/R/colocboost_pipeline.R b/R/colocboost_pipeline.R
index ebdaae75..e9e4e30e 100644
--- a/R/colocboost_pipeline.R
+++ b/R/colocboost_pipeline.R
@@ -14,6 +14,20 @@ build_ld_args <- function(ld_list, subset = NULL) {
   if (is_geno) list(X_ref = ld_list) else list(LD = ld_list)
 }
 
+# Run colocboost with tryCatch and timing.
+# @noRd
+.run_colocboost <- function(label, ...) {
+  t1 <- Sys.time()
+  res <- tryCatch(
+    colocboost(...),
+    error = function(e) {
+      message(label, " failed: ", conditionMessage(e))
+      NULL
+    }
+  )
+  list(result = res, time = Sys.time() - t1)
+}
+
 #' Multi-trait colocalization analysis pipeline
 #'
 #' This function perform a multi-trait colocalization using ColocBoost
@@ -113,61 +127,54 @@ colocboost_analysis_pipeline <- function(region_data,
     return(filtered_events)
   }
 
-  # - extract contexts and studies from region data
+  # - extract contexts and studies from region data, handling both pre- and post-QC
   extract_contexts_studies <- function(region_data, phenotypes_init = NULL) {
     individual_data <- region_data$individual_data
     sumstat_data <- region_data$sumstat_data
+    phenotypes <- list("individual_contexts" = NULL, "sumstat_studies" = NULL)
 
-    if (is.null(phenotypes_init)) {
-      # - inital setup
-      phenotypes <- list("individual_contexts" = NULL, "sumstat_studies" = NULL)
-      if (!is.null(individual_data)) {
+    # Extract individual contexts
+    if (!is.null(individual_data)) {
+      if (is.null(phenotypes_init)) {
         phenotypes$individual_contexts <- names(individual_data$residual_Y)
       } else {
-        message("No individual data in this region!")
-      }
-      if (!is.null(sumstat_data)) {
-        phenotypes$sumstat_studies <- sapply(sumstat_data$sumstats, function(ss) names(ss)) %>%
-          unlist() %>%
-          as.vector()
-      } else {
-        message("No sumstat data in this region!")
-      }
-    } else {
-      # - after QC
-      phenotypes <- list("individual_contexts" = NULL, "sumstat_studies" = NULL)
-      if (!is.null(individual_data)) {
         null_Y <- which(sapply(individual_data$Y, is.null))
         if (length(null_Y) == 0) {
           message("All individual data pass QC steps.")
           phenotypes$individual_contexts <- names(individual_data$Y)
-        } else if (length(null_Y) != length(individual_data$Y)) {
+        } else if (length(null_Y) < length(individual_data$Y)) {
           message(paste(
             "Skipping follow-up analysis for individual traits",
             paste(names(individual_data$Y)[null_Y], collapse = ";"), "after QC."
           ))
           phenotypes$individual_contexts <- names(individual_data$Y)[-null_Y]
-        } else if (length(null_Y) == length(individual_data$Y)) {
+        } else {
           message("No individual data pass QC.")
         }
-      } else {
-        message("No individual data pass QC.")
       }
-      if (!is.null(sumstat_data)) {
+    } else {
+      message(if (is.null(phenotypes_init)) "No individual data in this region!" else "No individual data pass QC.")
+    }
+
+    # Extract sumstat studies
+    if (!is.null(sumstat_data)) {
+      if (is.null(phenotypes_init)) {
+        phenotypes$sumstat_studies <- unlist(sapply(sumstat_data$sumstats, names))
+      } else {
         phenotypes$sumstat_studies <- names(sumstat_data$sumstats)
-        sumstat_studies_init <- phenotypes_init$sumstat_studies
-        if (length(sumstat_studies_init) == length(phenotypes$sumstat_studies)) {
+        if (length(phenotypes_init$sumstat_studies) == length(phenotypes$sumstat_studies)) {
           message("All sumstat studies pass QC steps.")
         } else {
           message(paste(
             "Skipping follow-up analysis for sumstat studies",
-            paste(setdiff(sumstat_studies_init, phenotypes$sumstat_studies), collapse = ";"), "after QC."
+            paste(setdiff(phenotypes_init$sumstat_studies, phenotypes$sumstat_studies), collapse = ";"), "after QC."
           ))
         }
-      } else {
-        message("No sumstat data pass QC.")
       }
+    } else {
+      message(if (is.null(phenotypes_init)) "No sumstat data in this region!" else "No sumstat data pass QC.")
     }
+
     return(phenotypes)
   }
 
@@ -243,13 +250,12 @@ colocboost_analysis_pipeline <- function(region_data,
       Y <- NULL
     }
     if (!is.null(Y)) {
-      Y <- lapply(seq_along(Y), function(i) {
-        y <- Y[[i]]
-        lapply(seq_len(ncol(y)), function(j) y[, j, drop = FALSE] %>% setNames(colnames(y)[j]))
+      Y_split <- purrr::imap(Y, function(y, i) {
+        purrr::map(seq_len(ncol(y)), function(j) setNames(y[, j, drop = FALSE], colnames(y)[j]))
       })
-      dict_YX <- cbind(seq_along(Reduce("c", Y)), rep(seq_along(Y), sapply(Y, length)))
-      Y <- Reduce("c", Y)
-      Y <- Y %>% setNames(sapply(Y, colnames))
+      dict_YX <- cbind(seq_along(unlist(Y_split, recursive = FALSE)), rep(seq_along(Y_split), purrr::map_int(Y_split, length)))
+      Y <- unlist(Y_split, recursive = FALSE)
+      names(Y) <- sapply(Y, colnames)
     }
   } else {
     X <- Y <- dict_YX <- NULL
@@ -309,49 +315,28 @@ colocboost_analysis_pipeline <- function(region_data,
   # - run xQTL-only version of ColocBoost
   if (xqtl_coloc & !is.null(X)) {
     message(paste("====== Performing xQTL-only ColocBoost on", length(Y), "contexts. ====="))
-    t11 <- Sys.time()
     traits <- names(Y)
-    focal_outcome_idx <- NULL
-    if (!is.null(focal_trait)) {
-      if (focal_trait %in% traits) {
-        focal_outcome_idx <- which(traits == focal_trait)
-      }
-    }
-    res_xqtl <- tryCatch(
-      colocboost(
-        X = X, Y = Y, dict_YX = dict_YX,
-        outcome_names = traits, focal_outcome_idx = focal_outcome_idx,
-        output_level = 2, ...
-      ),
-      error = function(e) {
-        message("xQTL-only ColocBoost failed: ", conditionMessage(e))
-        return(NULL)
-      }
+    focal_outcome_idx <- if (!is.null(focal_trait) && focal_trait %in% traits) which(traits == focal_trait) else NULL
+    cb_res <- .run_colocboost("xQTL-only ColocBoost",
+      X = X, Y = Y, dict_YX = dict_YX,
+      outcome_names = traits, focal_outcome_idx = focal_outcome_idx,
+      output_level = 2, ...
     )
-    t12 <- Sys.time()
-    analysis_results$xqtl_coloc <- res_xqtl
-    analysis_results$computing_time$Analysis$xqtl_coloc <- t12 - t11
+    analysis_results$xqtl_coloc <- cb_res$result
+    analysis_results$computing_time$Analysis$xqtl_coloc <- cb_res$time
   }
   # - run joint GWAS no focaled version of ColocBoost
   if (joint_gwas & !is.null(sumstats)) {
     message(paste("====== Performing non-focaled version GWAS-xQTL ColocBoost on", length(Y), "contexts and", length(sumstats), "GWAS. ====="))
-    t21 <- Sys.time()
     traits <- c(names(Y), names(sumstats))
     ld_args <- build_ld_args(LD_mat)
-    res_gwas <- tryCatch(
-      do.call(colocboost, c(list(
-        X = X, Y = Y, sumstat = sumstats,
-        dict_YX = dict_YX, dict_sumstatLD = dict_sumstatLD,
-        outcome_names = traits, focal_outcome_idx = NULL,
-        output_level = 2), ld_args, list(...))),
-      error = function(e) {
-        message("Joint GWAS ColocBoost failed: ", conditionMessage(e))
-        return(NULL)
-      }
-    )
-    t22 <- Sys.time()
-    analysis_results$joint_gwas <- res_gwas
-    analysis_results$computing_time$Analysis$joint_gwas <- t22 - t21
+    cb_res <- do.call(.run_colocboost, c(list("Joint GWAS ColocBoost",
+      X = X, Y = Y, sumstat = sumstats,
+      dict_YX = dict_YX, dict_sumstatLD = dict_sumstatLD,
+      outcome_names = traits, focal_outcome_idx = NULL,
+      output_level = 2), ld_args, list(...)))
+    analysis_results$joint_gwas <- cb_res$result
+    analysis_results$computing_time$Analysis$joint_gwas <- cb_res$time
   }
   # - run focaled version of ColocBoost for each GWAS
   if (separate_gwas & !is.null(sumstats)) {
@@ -363,17 +348,13 @@ colocboost_analysis_pipeline <- function(region_data,
       dict <- dict_sumstatLD[i_gwas, ]
       traits <- c(names(Y), current_study)
       ld_args_sep <- build_ld_args(LD_mat, subset = dict[2])
-      res_gwas_separate[[current_study]] <- tryCatch(
-        do.call(colocboost, c(list(
+      cb_res <- do.call(.run_colocboost, c(
+        list(paste("Separate GWAS ColocBoost for", current_study),
           X = X, Y = Y, sumstat = sumstats[dict[1]],
           dict_YX = dict_YX,
           outcome_names = traits, focal_outcome_idx = length(traits),
-          output_level = 2), ld_args_sep, list(...))),
-        error = function(e) {
-          message("Separate GWAS ColocBoost failed for ", current_study, ": ", conditionMessage(e))
-          return(NULL)
-        }
-      )
+          output_level = 2), ld_args_sep, list(...)))
+      res_gwas_separate[[current_study]] <- cb_res$result
     }
     t32 <- Sys.time()
     analysis_results$separate_gwas <- res_gwas_separate
@@ -559,37 +540,27 @@ qc_regional_data <- function(region_data,
                                              pip_cutoff_to_skip_ind = 0) {
     # - add context to colname of Y
     Y <- add_context_to_Y(Y)
-    n_context <- length(X)
-    residual_X <- residual_Y <- list()
-    keep_contexts <- c()
-    for (i_context in 1:n_context) {
-      resX <- X[[i_context]]
-      resY <- Y[[i_context]]
-      maf <- MAF[[i_context]]
-      context <- names(Y)[i_context]
-      if (is.null(resY)) next
-      # - remove variants with maf < maf_cutoff
-      # tmp <- filter_resX_maf(resX, maf, maf_cutoff = maf_cutoff)
+    results <- purrr::imap(X, function(resX, context) {
+      resY <- Y[[context]]
+      maf <- MAF[[context]]
+      i_context <- match(context, names(X))
+      if (is.null(resY)) return(NULL)
       resX <- filter_X(resX, missing_rate_thresh = NULL, maf_thresh = maf_cutoff, maf = maf)
-      # Initial PIP check
       resY <- filter_resY_pip(resX, resY, pip_cutoff = pip_cutoff_to_skip_ind[i_context], context = context)
-      if (!is.null(resY)) {
-        residual_X <- c(residual_X, list(resX))
-        residual_Y <- c(residual_Y, list(resY))
-        keep_contexts <- c(keep_contexts, context)
-      }
-    }
-    if (length(keep_contexts) == 0) {
-      message(paste("Skipping follow-up analysis for all contexts."))
+      if (is.null(resY)) return(NULL)
+      list(X = resX, Y = resY)
+    }) %>% purrr::compact()
+
+    if (length(results) == 0) {
+      message("Skipping follow-up analysis for all contexts.")
       return(NULL)
-    } else {
-      message(paste("Region includes the following contexts after inital screening:", paste(keep_contexts, collapse = ";"), "."))
-      names(residual_X) <- names(residual_Y) <- keep_contexts
-      return(list(
-        X = residual_X,
-        Y = residual_Y
-      ))
     }
+    keep_contexts <- names(results)
+    message(paste("Region includes the following contexts after inital screening:", paste(keep_contexts, collapse = ";"), "."))
+    list(
+      X = purrr::map(results, "X"),
+      Y = purrr::map(results, "Y")
+    )
   }
 
   # - individual level data QC
@@ -622,8 +593,13 @@ qc_regional_data <- function(region_data,
                                          impute = TRUE,
                                          impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01)) {
     n_LD <- length(sumstat_data$LD_info)
-    final_sumstats <- final_LD <- NULL
-    LD_match <- c()
+    # Collect results into lists and flatten at the end
+    collected_sumstats <- list()
+    collected_LD <- list()
+    collected_LD_match <- character()
+    # Track LD matrices by variant signature for O(1) deduplication
+    ld_variant_index <- list()
+
     for (i in 1:n_LD) {
       LD_data <- sumstat_data$LD_info[[i]]
       sumstats <- sumstat_data$sumstats[[i]]
@@ -710,27 +686,21 @@ qc_regional_data <- function(region_data,
           mat_to_store <- R_mat
         }
 
-        # Deduplicate: reuse existing matrix if variants match
-        if (length(final_LD) == 0) {
-          final_LD <- c(final_LD, list(mat_to_store) %>% setNames(conditions_sumstat))
-          final_sumstats <- c(final_sumstats, list(sumstat) %>% setNames(conditions_sumstat))
-          LD_match <- c(LD_match, conditions_sumstat)
+        # Collect sumstat
+        collected_sumstats[[conditions_sumstat]] <- sumstat
+
+        # Deduplicate LD using variant signature hash
+        variant_key <- paste(colnames(mat_to_store), collapse = ",")
+        if (variant_key %in% names(ld_variant_index)) {
+          collected_LD_match <- c(collected_LD_match, ld_variant_index[[variant_key]])
         } else {
-          variants <- colnames(mat_to_store)
-          final_sumstats <- c(final_sumstats, list(sumstat) %>% setNames(conditions_sumstat))
-          exist_variants <- lapply(final_LD, colnames)
-          if_exist <- sapply(exist_variants, function(v) all(variants == v))
-          pos <- which(if_exist)
-          if (length(pos) == 0) {
-            final_LD <- c(final_LD, list(mat_to_store) %>% setNames(conditions_sumstat))
-            LD_match <- c(LD_match, conditions_sumstat)
-          } else {
-            LD_match <- c(LD_match, names(final_LD)[pos[1]])
-          }
+          collected_LD[[conditions_sumstat]] <- mat_to_store
+          ld_variant_index[[variant_key]] <- conditions_sumstat
+          collected_LD_match <- c(collected_LD_match, conditions_sumstat)
         }
       }
     }
-    return(list(sumstats = final_sumstats, LD_mat = final_LD, LD_match = LD_match))
+    return(list(sumstats = collected_sumstats, LD_mat = collected_LD, LD_match = collected_LD_match))
   }
 
 
diff --git a/R/encoloc.R b/R/encoloc.R
index 1482a836..0a6a75f5 100644
--- a/R/encoloc.R
+++ b/R/encoloc.R
@@ -31,21 +31,20 @@ xqtl_enrichment_wrapper <- function(xqtl_files, gwas_files,
                                       xqtl_finemapping_obj = NULL, gwas_finemapping_obj = NULL,
                                       xqtl_varname_obj = NULL, gwas_varname_obj = NULL) {
     # Load and process GWAS data
-    gwas_pip <- list()
-    for (file in gwas_files) {
-      raw_data <- readRDS(file)[[1]] # changed
+    gwas_pip_list <- purrr::map(gwas_files, function(file) {
+      raw_data <- readRDS(file)[[1]]
       gwas_data <- if (!is.null(gwas_finemapping_obj)) get_nested_element(raw_data, gwas_finemapping_obj) else raw_data
       pip <- gwas_data$pip
       if (!is.null(gwas_varname_obj)) names(pip) <- get_nested_element(raw_data, gwas_varname_obj)
-      gwas_pip <- c(gwas_pip, list(pip))
-    }
+      pip
+    })
 
     # Check for unique variant names in GWAS pip vectors
-    all_variant_names <- unique(unlist(lapply(gwas_pip, names)))
+    all_variant_names <- unique(unlist(purrr::map(gwas_pip_list, names)))
     if (length(unique(all_variant_names)) != length(all_variant_names)) {
       stop("Non-unique variant names found in GWAS data with different pip values.")
     }
-    gwas_pip <- unlist(gwas_pip)
+    gwas_pip <- unlist(gwas_pip_list)
 
     # Process xQTL data
     xqtl_data <- lapply(xqtl_files, function(file) {
@@ -93,15 +92,9 @@ filter_and_order_coloc_results <- function(coloc_results_fil) {
   }
 
   cs_num <- ncol(coloc_results_fil) - 1
-  ordered_results <- list()
-
-  for (n in 1:cs_num) {
-    # Selecting relevant columns and ordering
-    tmp_coloc_results_fil <- coloc_results_fil[, c(1, n + 1)] %>% .[order(.[, 2], decreasing = TRUE), ]
-    ordered_results[[n]] <- tmp_coloc_results_fil
-  }
-
-  return(ordered_results)
+  purrr::map(seq_len(cs_num), function(n) {
+    coloc_results_fil[, c(1, n + 1)] %>% .[order(.[, 2], decreasing = TRUE), ]
+  })
 }
 
 #' Function to calculate cumulative sum
@@ -160,30 +153,21 @@ process_coloc_results <- function(coloc_result, LD_meta_file_path, analysis_regi
 
     # prepare to calculate purity
     ordered_results <- filter_and_order_coloc_results(coloc_results_fil)
-    cs <- list()
-    purity <- NULL
+    cs <- purrr::map(ordered_results, function(res) {
+      csm <- calculate_cumsum(res)
+      res[, 1][1:min(which(csm > coverage))]
+    })
 
-    for (n in seq_along(ordered_results)) {
-      tmp_coloc_results_fil <- ordered_results[[n]]
-      tmp_coloc_results_fil_csm <- calculate_cumsum(tmp_coloc_results_fil)
-      cs[[n]] <- tmp_coloc_results_fil[, 1][1:(which(tmp_coloc_results_fil_csm > coverage) %>% min())]
+    purity <- purrr::map_dfr(seq_along(cs), function(n) {
       variants <- normalize_variant_id(cs[[n]])
-
-      # Load LD for the region narrowed to actual variant positions
-      ext_ld <- extract_ld_for_variants(LD_meta_file_path, analysis_region, variants)
-
-      # Calculate purity
       if (null_index > 0 && null_index %in% variants) {
-        purity <- rbind(purity, c(-9, -9, -9))
+        data.frame(min.abs.corr = -9, mean.abs.corr = -9, median.abs.corr = -9)
       } else {
-        current_purity <- calculate_purity(variants, ext_ld)
-        purity <- rbind(purity, current_purity)
+        ext_ld <- extract_ld_for_variants(LD_meta_file_path, analysis_region, variants)
+        p <- calculate_purity(variants, ext_ld)
+        data.frame(min.abs.corr = p[1, 1], mean.abs.corr = p[1, 2], median.abs.corr = p[1, 3])
       }
-    }
-
-    # Process purity data
-    purity <- as.data.frame(purity)
-    colnames(purity) <- c("min.abs.corr", "mean.abs.corr", "median.abs.corr")
+    })
     is_pure <- which(purity[, 1] >= min_abs_corr)
 
     # Finalize the result
@@ -201,6 +185,49 @@ process_coloc_results <- function(coloc_result, LD_meta_file_path, analysis_regi
   return(coloc_res)
 }
 
+# Extract and filter an LBF matrix from a finemapped data object.
+# @noRd
+.extract_lbf_matrix <- function(raw_data, finemapping_obj, varname_obj,
+                                filter_lbf_cs, filter_lbf_cs_secondary, prior_tol) {
+  fm_data <- if (!is.null(finemapping_obj)) {
+    tryCatch(get_nested_element(raw_data, finemapping_obj),
+      error = function(e) {
+        message(paste("no", finemapping_obj[2], "in", finemapping_obj[1]))
+        NULL
+      }
+    )
+  } else {
+    raw_data
+  }
+  if (is.null(fm_data)) return(NULL)
+
+  lbf_matrix <- as.data.frame(fm_data$lbf_variable)
+  # fSuSiE has a different structure
+  if (is.null(lbf_matrix) || nrow(lbf_matrix) == 0) {
+    lbf_matrix <- do.call(rbind, raw_data[[1]]$fsusie_result$lBF) %>% as.data.frame()
+    if (nrow(lbf_matrix) > 0) message("This is a fSuSiE case")
+  }
+
+  # Filter rows
+  if (filter_lbf_cs && is.null(filter_lbf_cs_secondary)) {
+    lbf_matrix <- lbf_matrix[fm_data$sets$cs_index, , drop = FALSE]
+  } else if (!is.null(filter_lbf_cs_secondary)) {
+    lbf_matrix <- lbf_matrix[get_filter_lbf_index(fm_data, coverage = filter_lbf_cs_secondary), , drop = FALSE]
+  } else {
+    if ("V" %in% names(fm_data)) {
+      lbf_matrix <- lbf_matrix[fm_data$V > prior_tol, , drop = FALSE]
+    } else {
+      message("No V found in original data.")
+    }
+  }
+
+  # Set variant names and remove NA columns
+  if (!is.null(varname_obj)) colnames(lbf_matrix) <- get_nested_element(raw_data, varname_obj)
+  lbf_matrix <- lbf_matrix[, !is.na(colnames(lbf_matrix))]
+
+  list(lbf_matrix = lbf_matrix, fm_data = fm_data)
+}
+
 #' Colocalization Analysis Wrapper
 #'
 #' This function processes xQTL and multiple GWAS finemapped data files for colocalization analysis.
@@ -231,86 +258,36 @@ coloc_wrapper <- function(xqtl_file, gwas_files,
                           gwas_finemapping_obj = NULL, gwas_varname_obj = NULL, gwas_region_obj = NULL,
                           filter_lbf_cs = FALSE, filter_lbf_cs_secondary = NULL,
                           prior_tol = 1e-9, p1 = 1e-4, p2 = 1e-4, p12 = 5e-6, ...) {
-  region <- NULL # define first to avoid element can not be found
+  region <- NULL
   # Load and process GWAS data
-  gwas_lbf_matrices <- lapply(gwas_files, function(file) {
+  gwas_lbf_matrices <- purrr::map(gwas_files, function(file) {
     raw_data <- readRDS(file)[[1]]
-    gwas_data <- if (!is.null(gwas_finemapping_obj)) get_nested_element(raw_data, gwas_finemapping_obj) else raw_data
-    gwas_lbf_matrix <- as.data.frame(gwas_data$lbf_variable)
-      # fsusie has a different structure
-    if(is.null(gwas_lbf_matrix) || nrow(gwas_lbf_matrix)==0){
-        gwas_lbf_matrix <- do.call(rbind, raw_data[[1]]$fsusie_result$lBF) %>% as.data.frame
-        if(nrow(gwas_lbf_matrix) > 0) message("This is a fSuSiE case")
-    }
-    if (filter_lbf_cs & is.null(filter_lbf_cs_secondary)) {
-        gwas_lbf_matrix <- gwas_lbf_matrix[gwas_data$sets$cs_index,, drop = FALSE]
-    } else if (!is.null(filter_lbf_cs_secondary)) {
-        gwas_lbf_filter_index <- get_filter_lbf_index(gwas_data, coverage = filter_lbf_cs_secondary) 
-        gwas_lbf_matrix <- gwas_lbf_matrix[gwas_lbf_filter_index,, drop = FALSE]
-    } else {
-      gwas_lbf_matrix <- gwas_lbf_matrix[gwas_data$V > prior_tol,, drop = FALSE]
-    }
-    if (!is.null(gwas_varname_obj)) colnames(gwas_lbf_matrix) <- get_nested_element(raw_data, gwas_varname_obj)
-    # fsusie could have NA in variant name
-    gwas_lbf_matrix <- gwas_lbf_matrix[,!is.na(colnames(gwas_lbf_matrix))] 
-    return(gwas_lbf_matrix)
+    .extract_lbf_matrix(raw_data, gwas_finemapping_obj, gwas_varname_obj,
+                        filter_lbf_cs, filter_lbf_cs_secondary, prior_tol)$lbf_matrix
   })
 
-  # changed: need to remove this check, as the last variant of former block could overlapped with first variant of later block
-  # # Validate uniqueness of column names across GWAS matrices
-  # all_gwas_colnames <- unique(unlist(lapply(gwas_lbf_matrices, colnames)))
-  # if (length(all_gwas_colnames) != sum(sapply(gwas_lbf_matrices, ncol))) {
-  #   stop("Duplicate variant names found across GWAS regions analyzed. This is not expected.")
-  # }
   # Combine GWAS matrices and replace NAs with zeros
   combined_gwas_lbf_matrix <- bind_rows(gwas_lbf_matrices) %>%
     mutate(across(everything(), ~ replace_na(., 0)))
 
   # Process xQTL data
   xqtl_raw_data <- readRDS(xqtl_file)[[1]]
-  xqtl_data <- if (!is.null(xqtl_finemapping_obj)) {
-    tryCatch(
-      {
-        get_nested_element(xqtl_raw_data, xqtl_finemapping_obj)
-      },
-      error = function(e) {
-        message(paste("no", xqtl_finemapping_obj[2], "in", xqtl_finemapping_obj[1]))
-        NULL
-      }
-    )
-  } else {
-    xqtl_raw_data
-  }
-  if (!is.null(xqtl_data)) {
-    xqtl_lbf_matrix <- as.data.frame(xqtl_data$lbf_variable)
-    # fsusie has a different structure
-    if(is.null(xqtl_lbf_matrix) | nrow(xqtl_lbf_matrix)==0){
-        xqtl_lbf_matrix <- do.call(rbind, xqtl_raw_data[[1]]$fsusie_result$lBF) %>% as.data.frame
-        if(nrow(xqtl_lbf_matrix) > 0) message("This is a fSuSiE case")
-    }
-    # fsusie data does not have V element in results
-    if (filter_lbf_cs & is.null(filter_lbf_cs_secondary)) {
-        xqtl_lbf_matrix <- xqtl_lbf_matrix[xqtl_data$sets$cs_index, , drop = FALSE]
-    } else if (!is.null(filter_lbf_cs_secondary)) {
-        xqtl_lbf_filter_index <- get_filter_lbf_index(xqtl_data, coverage = filter_lbf_cs_secondary) 
-        xqtl_lbf_matrix <- xqtl_lbf_matrix[xqtl_lbf_filter_index,, drop = FALSE]
-    } else {
-      if ("V" %in% names(xqtl_data)) xqtl_lbf_matrix <- xqtl_lbf_matrix[xqtl_data$V > prior_tol, , drop = FALSE] else (message("No V found in original data."))
-    }
+  xqtl_extracted <- .extract_lbf_matrix(xqtl_raw_data, xqtl_finemapping_obj, xqtl_varname_obj,
+                                        filter_lbf_cs, filter_lbf_cs_secondary, prior_tol)
+
+  if (!is.null(xqtl_extracted)) {
+    xqtl_lbf_matrix <- xqtl_extracted$lbf_matrix
     if (nrow(combined_gwas_lbf_matrix) > 0 && nrow(xqtl_lbf_matrix) > 0) {
-      if (!is.null(xqtl_varname_obj)) colnames(xqtl_lbf_matrix) <- get_nested_element(xqtl_raw_data, xqtl_varname_obj)
-      # fsusie could have NA in variant name
-      xqtl_lbf_matrix <- xqtl_lbf_matrix[,!is.na(colnames(xqtl_lbf_matrix))] 
-        
       colnames(xqtl_lbf_matrix) <- align_variant_names(colnames(xqtl_lbf_matrix), colnames(combined_gwas_lbf_matrix))$aligned_variants
       common_colnames <- intersect(colnames(xqtl_lbf_matrix), colnames(combined_gwas_lbf_matrix))
-      xqtl_lbf_matrix <- xqtl_lbf_matrix[, common_colnames, drop = FALSE] %>% as.matrix()
-      combined_gwas_lbf_matrix <- combined_gwas_lbf_matrix[, common_colnames, drop = FALSE] %>% as.matrix()
 
-      # Report the number of dropped columns from xQTL matrix
-      num_dropped_cols <- length(setdiff(colnames(xqtl_lbf_matrix), common_colnames))
+      # Report the number of dropped columns from xQTL matrix before subsetting
+      num_dropped_cols <- ncol(xqtl_lbf_matrix) - length(common_colnames)
       message("Number of columns dropped from xQTL matrix: ", num_dropped_cols)
 
+      xqtl_lbf_matrix <- xqtl_lbf_matrix[, common_colnames, drop = FALSE] %>% as.matrix()
+      combined_gwas_lbf_matrix <- combined_gwas_lbf_matrix[, common_colnames, drop = FALSE] %>% as.matrix()
+
       # Function to convert region df to str
       convert_to_string <- function(df) paste0("chr", df$chrom, ":", df$start, "-", df$end)
       region <- if (!is.null(xqtl_region_obj)) get_nested_element(xqtl_raw_data, xqtl_region_obj) %>% convert_to_string() else NULL
diff --git a/R/mr.R b/R/mr.R
index cae7da9a..7d4c0482 100644
--- a/R/mr.R
+++ b/R/mr.R
@@ -7,6 +7,20 @@ calc_I2 <- function(Q, Est) {
   return(if (I2 < 0) 0 else I2)
 }
 
+# Create a null data frame with gene_name and NA columns for MR pipeline outputs.
+# @noRd
+.create_null_mr_df <- function(gene_name, col_spec) {
+  n <- length(gene_name)
+  cols <- purrr::map(col_spec, function(type) {
+    switch(type,
+      character = as.character(rep(NA, n)),
+      integer = as.integer(rep(NA, n)),
+      numeric = as.numeric(rep(NA, n))
+    )
+  })
+  do.call(data.frame, c(list(gene_name = gene_name), cols, list(stringsAsFactors = FALSE)))
+}
+
 #' MR Format Function
 #'
 #' Description of what the function does.
@@ -22,84 +36,61 @@ calc_I2 <- function(Q, Est) {
 #' @export
 mr_format <- function(susie_result, condition, gwas_sumstats_db, coverage = "cs_coverage_0.95", run_allele_qc = TRUE,
                       molecular_name_obj = c("susie_results", condition, "region_info", "region_name"), ld_meta_df) {
-  # Create null mr_format_input
-  create_null_mr_input <- function(gene_name) {
-    mr_format_input <- data.frame(
-      gene_name = gene_name,
-      variant_id = as.character(rep(NA, length(gene_name))),
-      bhat_x = as.numeric(rep(NA, length(gene_name))),
-      sbhat_x = as.numeric(rep(NA, length(gene_name))),
-      cs = as.numeric(rep(NA, length(gene_name))),
-      pip = as.numeric(rep(NA, length(gene_name))),
-      bhat_y = as.numeric(rep(NA, length(gene_name))),
-      sbhat_y = as.numeric(rep(NA, length(gene_name))),
-      stringsAsFactors = FALSE # Optional, to prevent factors
-    )
-  }
+  mr_format_spec <- c(variant_id = "character", bhat_x = "numeric", sbhat_x = "numeric",
+                      cs = "numeric", pip = "numeric", bhat_y = "numeric", sbhat_y = "numeric")
   gene_name <- unique(get_nested_element(susie_result, molecular_name_obj))
 
-  # Attempt to retrieve top_loci; if not found, return NULL
+  # Attempt to retrieve top_loci; if not found, return null
   top_loci <- tryCatch(
-    {
-      get_nested_element(susie_result, c("susie_results", condition, "top_loci"))
-    },
+    get_nested_element(susie_result, c("susie_results", condition, "top_loci")),
     error = function(e) {
       message("top_loci does not exist for the specified condition in susie_result.")
-      return(NULL)
+      NULL
     }
   )
-  if (is.data.frame(top_loci)) {
-    if (any(unique(get_nested_element(top_loci, coverage)) != 0)) {
-      susie_cs_result_formatted <- top_loci %>%
-        mutate(gene_name = gene_name) %>%
-        filter(coverage >= 1) %>%
-        mutate(variant = strip_chr_prefix(variant_id)) %>%
-        select(gene_name, variant, betahat, sebetahat, all_of(coverage), pip) %>%
-        rename("bhat_x" = "betahat", "sbhat_x" = "sebetahat", "cs" = all_of(coverage))
-      susie_pos <- sapply(susie_cs_result_formatted$variant, function(variant_id) strsplit(variant_id, "\\:")[[1]][2])
-      gwas_pos <- sapply(gwas_sumstats_db$variant_id, function(variant_id) strsplit(variant_id, "\\:")[[1]][2])
-      if (any(susie_pos %in% gwas_pos)) {
-        gwas_sumstats_db_extracted <- gwas_sumstats_db %>%
-          filter(pos %in% susie_pos) %>%
-          mutate(n_sample = if ("n_sample" %in% colnames(.)) n_sample else (n_case + n_control))
-        mean_n_sample <- round(mean(gwas_sumstats_db_extracted$n_sample, na.rm = TRUE))
-        # Impute `n_sample` and `maf`
-        if (any(is.na(gwas_sumstats_db_extracted$effect_allele_frequency))) {
-          gwas_sumstats_db_extracted_imputed <- gwas_sumstats_db_extracted %>%
-            left_join(ld_meta_df %>% select(pos, allele_freq), by = "pos") %>%
-            mutate(effect_allele_frequency = ifelse(is.na(effect_allele_frequency), allele_freq, effect_allele_frequency)) %>%
-            mutate(n_sample = ifelse(is.na(n_sample), mean_n_sample, n_sample)) %>%
-            select(-allele_freq)
-        } else {
-          gwas_sumstats_db_extracted_imputed <- gwas_sumstats_db_extracted
-        }
-        gwas_sumstats_db_beta_se <- z_to_beta_se(gwas_sumstats_db_extracted_imputed$z, gwas_sumstats_db_extracted_imputed$effect_allele_frequency, gwas_sumstats_db_extracted_imputed$n_sample)
-        gwas_sumstats_db_extracted_imputed <- gwas_sumstats_db_extracted_imputed %>% mutate(beta = gwas_sumstats_db_beta_se$beta, se = gwas_sumstats_db_beta_se$se)
-        if (run_allele_qc) {
-          susie_cs_result_formatted <- allele_qc(cbind(variant_id_to_df(susie_cs_result_formatted$variant), susie_cs_result_formatted),
-            gwas_sumstats_db_extracted_imputed$variant_id, c("bhat_x"),
-            match_min_prop = 0
-          )
-          susie_cs_result_formatted <- susie_cs_result_formatted$target_data_qced[, c("gene_name", "variant_id", "bhat_x", "sbhat_x", "cs", "pip")]
-        }
-        # Normalize variant IDs to canonical format for matching
-        gwas_sumstats_db_extracted_imputed$variant_id <- normalize_variant_id(gwas_sumstats_db_extracted_imputed$variant_id)
-        susie_cs_gwas_variants_merge <- intersect(susie_cs_result_formatted$variant_id, gwas_sumstats_db_extracted_imputed$variant_id)
+  if (!is.data.frame(top_loci)) return(.create_null_mr_df(gene_name, mr_format_spec))
+  if (!any(unique(get_nested_element(top_loci, coverage)) != 0)) return(.create_null_mr_df(gene_name, mr_format_spec))
 
-        mr_format_input <- susie_cs_result_formatted[match(susie_cs_gwas_variants_merge, susie_cs_result_formatted$variant), ] %>%
-          cbind(., gwas_sumstats_db_extracted_imputed[match(susie_cs_gwas_variants_merge, gwas_sumstats_db_extracted_imputed$variant_id), ] %>%
-            select(beta, se) %>%
-            rename("bhat_y" = "beta", "sbhat_y" = "se"))
-      } else {
-        mr_format_input <- create_null_mr_input(gene_name)
-      }
-    } else {
-      mr_format_input <- create_null_mr_input(gene_name)
-    }
-  } else {
-    mr_format_input <- create_null_mr_input(gene_name)
+  susie_cs_result_formatted <- top_loci %>%
+    mutate(gene_name = gene_name) %>%
+    filter(coverage >= 1) %>%
+    mutate(variant = strip_chr_prefix(variant_id)) %>%
+    select(gene_name, variant, betahat, sebetahat, all_of(coverage), pip) %>%
+    rename("bhat_x" = "betahat", "sbhat_x" = "sebetahat", "cs" = all_of(coverage))
+
+  susie_pos <- stringr::str_split_i(susie_cs_result_formatted$variant, ":", 2)
+  gwas_pos <- stringr::str_split_i(gwas_sumstats_db$variant_id, ":", 2)
+  if (!any(susie_pos %in% gwas_pos)) return(.create_null_mr_df(gene_name, mr_format_spec))
+
+  gwas_sumstats_db_extracted <- gwas_sumstats_db %>%
+    filter(pos %in% susie_pos) %>%
+    mutate(n_sample = if ("n_sample" %in% colnames(.)) n_sample else (n_case + n_control))
+  mean_n_sample <- round(mean(gwas_sumstats_db_extracted$n_sample, na.rm = TRUE))
+  # Impute `n_sample` and `maf`
+  if (any(is.na(gwas_sumstats_db_extracted$effect_allele_frequency))) {
+    gwas_sumstats_db_extracted <- gwas_sumstats_db_extracted %>%
+      left_join(ld_meta_df %>% select(pos, allele_freq), by = "pos") %>%
+      mutate(effect_allele_frequency = ifelse(is.na(effect_allele_frequency), allele_freq, effect_allele_frequency)) %>%
+      mutate(n_sample = ifelse(is.na(n_sample), mean_n_sample, n_sample)) %>%
+      select(-allele_freq)
+  }
+  gwas_beta_se <- z_to_beta_se(gwas_sumstats_db_extracted$z, gwas_sumstats_db_extracted$effect_allele_frequency, gwas_sumstats_db_extracted$n_sample)
+  gwas_sumstats_db_extracted <- gwas_sumstats_db_extracted %>% mutate(beta = gwas_beta_se$beta, se = gwas_beta_se$se)
+  if (run_allele_qc) {
+    susie_cs_result_formatted <- allele_qc(cbind(variant_id_to_df(susie_cs_result_formatted$variant), susie_cs_result_formatted),
+      gwas_sumstats_db_extracted$variant_id, c("bhat_x"),
+      match_min_prop = 0
+    )
+    susie_cs_result_formatted <- susie_cs_result_formatted$target_data_qced[, c("gene_name", "variant_id", "bhat_x", "sbhat_x", "cs", "pip")]
   }
-  return(mr_format_input)
+  # Normalize variant IDs to canonical format for matching
+  gwas_sumstats_db_extracted$variant_id <- normalize_variant_id(gwas_sumstats_db_extracted$variant_id)
+  common_variants <- intersect(susie_cs_result_formatted$variant_id, gwas_sumstats_db_extracted$variant_id)
+
+  susie_cs_result_formatted[match(common_variants, susie_cs_result_formatted$variant), ] %>%
+    cbind(., gwas_sumstats_db_extracted[match(common_variants, gwas_sumstats_db_extracted$variant_id), ] %>%
+      select(beta, se) %>%
+      rename("bhat_y" = "beta", "sbhat_y" = "se"))
 }
 
 #' Mendelian Randomization (MR)
@@ -110,28 +101,16 @@ mr_format <- function(susie_result, condition, gwas_sumstats_db, coverage = "cs_
 #' "meta_eff", "se_meta_eff", "meta_pval", "Q", "Q_pval" and "I2". "gene_name" is ensemble ID. "num_CS" is the number of credible sets
 #' contained in each gene, "num_IV" is the number of variants contained in each gene. "meta_eff", "se_meta_eff" and "meta_pval" are the MR estimate, standard error and pvalue.
 #' "Q" is Cochran's Q statistic, "I2" quantifies the heterogeneity, range from 0 to 1.
-#' @importFrom dplyr mutate group_by filter ungroup distinct arrange select
+#' @importFrom dplyr mutate group_by filter ungroup distinct arrange select summarise n left_join
 #' @importFrom magrittr %>%
 #' @importFrom stats pnorm pchisq
 #' @export
 mr_analysis <- function(mr_formatted_input, cpip_cutoff = 0.5) {
-  create_null_output <- function(gene_name) {
-    data.frame(
-      gene_name = gene_name,
-      num_CS = as.integer(rep(NA, length(gene_name))),
-      num_IV = as.integer(rep(NA, length(gene_name))),
-      cpip = as.numeric(rep(NA, length(gene_name))),
-      meta_eff = as.numeric(rep(NA, length(gene_name))),
-      se_meta_eff = as.numeric(rep(NA, length(gene_name))),
-      meta_pval = as.numeric(rep(NA, length(gene_name))),
-      Q = as.numeric(rep(NA, length(gene_name))),
-      Q_pval = as.numeric(rep(NA, length(gene_name))),
-      I2 = as.numeric(rep(NA, length(gene_name))),
-      stringsAsFactors = FALSE
-    )
-  }
+  mr_output_spec <- c(num_CS = "integer", num_IV = "integer", cpip = "numeric",
+                      meta_eff = "numeric", se_meta_eff = "numeric", meta_pval = "numeric",
+                      Q = "numeric", Q_pval = "numeric", I2 = "numeric")
   if (all(is.na(mr_formatted_input[, -1]))) {
-    return(create_null_output(unique(mr_formatted_input$gene_name)))
+    return(.create_null_mr_df(unique(mr_formatted_input$gene_name), mr_output_spec))
   }
   output <- mr_formatted_input %>%
     mutate(
@@ -140,51 +119,60 @@ mr_analysis <- function(mr_formatted_input, cpip_cutoff = 0.5) {
     ) %>%
     group_by(gene_name, cs) %>%
     mutate(cpip = sum(pip)) %>%
-    filter(cpip >= cpip_cutoff) # Cumulative pip greater than a user defined cumulative pip threshold
+    filter(cpip >= cpip_cutoff)
 
-  if (dim(output)[1] != 0) {
-    output <- output %>%
-      group_by(gene_name, cs) %>%
-      mutate(
-        beta_yx = bhat_y / bhat_x,
-        se_yx = sqrt((sbhat_y^2 / bhat_x^2) + ((bhat_y^2 * sbhat_x^2) / bhat_x^4)),
-        composite_bhat = sum((beta_yx * pip) / cpip),
-        composite_sbhat = sum((beta_yx^2 + se_yx^2) * pip / cpip)
-      ) %>%
-      mutate(
-        composite_sbhat = sqrt(composite_sbhat - composite_bhat^2),
-        wv = composite_sbhat^-2
-      ) %>%
-      ungroup() %>%
-      mutate(
-        meta_eff = sum(unique(wv) * unique(composite_bhat)),
-        sum_w = sum(unique(wv)),
-        se_meta_eff = sqrt(sum_w^-1),
-        num_CS = length(unique(cs))
-      ) %>%
-      mutate(
-        num_IV = length(variant_id),
-        meta_eff = meta_eff / sum_w,
-        meta_pval = 2 * pnorm(abs(meta_eff) / se_meta_eff, lower.tail = FALSE),
-        Q = sum(unique(wv) * (unique(composite_bhat) - unique(meta_eff))^2),
-        I2 = calc_I2(Q, composite_bhat),
-        Q_pval = pchisq(Q, df = length(unique(composite_bhat)) - 1, lower = FALSE)
-      ) %>%
-      ungroup() %>%
-      distinct(gene_name, .keep_all = TRUE) %>%
-      mutate(
-        cpip = round(cpip, 3),
-        meta_pval = round(meta_pval, 3),
-        meta_eff = round(meta_eff, 3),
-        se_meta_eff = round(se_meta_eff, 3),
-        Q = round(Q, 3),
-        Q_pval = round(Q_pval, 3),
-        I2 = round(I2, 3)
-      ) %>%
-      arrange(meta_pval) %>%
-      select(gene_name, num_CS, num_IV, cpip, meta_eff, se_meta_eff, meta_pval, Q, Q_pval, I2)
-  } else {
-    return(create_null_output(unique(mr_formatted_input$gene_name)))
+  if (nrow(output) == 0) {
+    return(.create_null_mr_df(unique(mr_formatted_input$gene_name), mr_output_spec))
   }
-  output
+
+  # Compute per-CS composite estimates
+  cs_summary <- output %>%
+    group_by(gene_name, cs) %>%
+    summarise(
+      cpip = first(cpip),
+      composite_bhat = sum((bhat_y / bhat_x * pip) / cpip),
+      composite_sbhat = sqrt(
+        sum(((bhat_y / bhat_x)^2 + (sbhat_y^2 / bhat_x^2) + ((bhat_y^2 * sbhat_x^2) / bhat_x^4)) * pip / cpip) -
+          sum((bhat_y / bhat_x * pip) / cpip)^2
+      ),
+      .groups = "drop"
+    ) %>%
+    mutate(wv = composite_sbhat^-2)
+
+  # Compute gene-level meta-analysis
+  gene_summary <- cs_summary %>%
+    group_by(gene_name) %>%
+    summarise(
+      num_CS = n(),
+      cpip = first(cpip),
+      sum_w = sum(wv),
+      meta_eff = sum(wv * composite_bhat) / sum(wv),
+      se_meta_eff = sqrt(1 / sum(wv)),
+      Q = sum(wv * (composite_bhat - sum(wv * composite_bhat) / sum(wv))^2),
+      I2 = calc_I2(Q, composite_bhat),
+      Q_pval = pchisq(Q, df = n() - 1, lower = FALSE),
+      .groups = "drop"
+    ) %>%
+    mutate(
+      meta_pval = 2 * pnorm(abs(meta_eff) / se_meta_eff, lower.tail = FALSE)
+    )
+
+  # Add num_IV from original output
+  iv_counts <- output %>%
+    group_by(gene_name) %>%
+    summarise(num_IV = n(), .groups = "drop")
+
+  gene_summary %>%
+    left_join(iv_counts, by = "gene_name") %>%
+    mutate(
+      cpip = round(cpip, 3),
+      meta_eff = round(meta_eff, 3),
+      se_meta_eff = round(se_meta_eff, 3),
+      meta_pval = round(meta_pval, 3),
+      Q = round(Q, 3),
+      Q_pval = round(Q_pval, 3),
+      I2 = round(I2, 3)
+    ) %>%
+    arrange(meta_pval) %>%
+    select(gene_name, num_CS, num_IV, cpip, meta_eff, se_meta_eff, meta_pval, Q, Q_pval, I2)
 }
diff --git a/tests/testthat/test_colocboost_pipeline.R b/tests/testthat/test_colocboost_pipeline.R
index 23277bdf..c76ebb78 100644
--- a/tests/testthat/test_colocboost_pipeline.R
+++ b/tests/testthat/test_colocboost_pipeline.R
@@ -1916,4 +1916,43 @@ test_that("qc_regional_data: with only sumstat data processes correctly", {
   expect_null(result$individual_data)
   expect_true(!is.null(result$sumstat_data))
 })
+
+# ===========================================================================
+# build_ld_args
+# ===========================================================================
+
+test_that("build_ld_args returns LD for square matrices", {
+  m <- matrix(1, 5, 5)
+  result <- pecotmr:::build_ld_args(list(m))
+  expect_true("LD" %in% names(result))
+  expect_null(result$X_ref)
+})
+
+test_that("build_ld_args returns X_ref for non-square (genotype) matrices", {
+  m <- matrix(1, 100, 5)  # samples x variants
+  result <- pecotmr:::build_ld_args(list(m))
+  expect_true("X_ref" %in% names(result))
+  expect_null(result$LD)
+})
+
+test_that("build_ld_args applies subset correctly", {
+  m1 <- matrix(1, 5, 5)
+  m2 <- matrix(2, 5, 5)
+  result <- pecotmr:::build_ld_args(list(m1, m2), subset = 2)
+  expect_length(result$LD, 1)
+  expect_equal(result$LD[[1]][1, 1], 2)
+})
+
+# ===========================================================================
+# .run_colocboost
+# ===========================================================================
+
+test_that(".run_colocboost returns NULL and message on error", {
+  expect_message(
+    result <- pecotmr:::.run_colocboost("test label", bad_arg = TRUE),
+    "test label failed"
+  )
+  expect_null(result$result)
+  expect_s3_class(result$time, "difftime")
+})
 NA
diff --git a/tests/testthat/test_mr.R b/tests/testthat/test_mr.R
index 829c4015..c417f0d7 100644
--- a/tests/testthat/test_mr.R
+++ b/tests/testthat/test_mr.R
@@ -553,3 +553,61 @@ test_that("mr_analysis with multiple credible sets", {
   expect_equal(nrow(result), 1)
   expect_equal(result$num_CS, 2)
 })
+
+# =============================================================================
+# .create_null_mr_df
+# =============================================================================
+
+test_that(".create_null_mr_df creates correct structure", {
+  spec <- c(x = "numeric", y = "character", z = "integer")
+  result <- pecotmr:::.create_null_mr_df("gene1", spec)
+  expect_s3_class(result, "data.frame")
+  expect_equal(nrow(result), 1)
+  expect_equal(result$gene_name, "gene1")
+  expect_true(is.numeric(result$x))
+  expect_true(is.character(result$y))
+  expect_true(is.integer(result$z))
+  expect_true(all(is.na(result[, -1])))
+})
+
+# =============================================================================
+# mr_format: tryCatch error path
+# =============================================================================
+
+test_that("mr_format returns null df when get_nested_element errors for top_loci", {
+  # susie_result has the gene_name path but top_loci path is broken
+  bad_susie <- list(susie_results = list(
+    condition1 = list(
+      region_info = list(region_name = "Gene_Test")
+      # no top_loci key — forces tryCatch error path
+    )
+  ))
+  result <- mr_format(
+    bad_susie, "condition1",
+    data.frame(variant_id = "chr1:1:A:G", pos = 1, z = 1, beta = 0.1, se = 0.05,
+               effect_allele_frequency = 0.3, n_case = 500, n_control = 500),
+    coverage = "cs_coverage_0.95"
+  )
+  expect_true(is.data.frame(result))
+  expect_true(all(is.na(result[, -1])))
+})
+
+# =============================================================================
+# mr_format: no overlapping positions
+# =============================================================================
+
+test_that("mr_format returns null df when no positions overlap", {
+  input_data <- generate_format_mock_data(seed = 42)
+  # Shift GWAS positions so they don't overlap with SuSiE positions
+  input_data$gwas_sumstats_db$pos <- input_data$gwas_sumstats_db$pos + 10000
+  input_data$gwas_sumstats_db$variant_id <- paste0(
+    "chr1:", input_data$gwas_sumstats_db$pos, ":",
+    sub("chr1:\\d+:", "", input_data$gwas_sumstats_db$variant_id)
+  )
+  result <- mr_format(
+    input_data$susie_result, "condition1", input_data$gwas_sumstats_db,
+    coverage = "cs_coverage_0.95", run_allele_qc = TRUE
+  )
+  expect_true(is.data.frame(result))
+  expect_true(all(is.na(result[, -1])))
+})

From 8e75d87bc948433fc32de5f8bd112688750d63fe Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Sat, 18 Apr 2026 12:28:09 -0700
Subject: [PATCH 5/5] fix tests

---
 R/susie_wrapper.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/susie_wrapper.R b/R/susie_wrapper.R
index 7c38ad5e..86cc3c68 100644
--- a/R/susie_wrapper.R
+++ b/R/susie_wrapper.R
@@ -179,8 +179,8 @@ susie_rss_wrapper <- function(z, R = NULL, X = NULL, n = NULL,
   if (!is.null(R) && !is.null(X)) stop("Only one of R or X should be provided, not both.")
 
   # Build argument list for susie_rss
-  base_args <- list(z = z, n = n, L = L, coverage = coverage,
-                    sketch_samples = sketch_samples, ...)
+  base_args <- list(z = z, n = n, coverage = coverage,
+                    stochastic_ld_sample = sketch_samples, ...)
   if (!is.null(X)) base_args$X <- X else base_args$R <- R
 
   run_with_L <- function(L_val) do.call(susie_rss, c(base_args, list(L = L_val)))