From 358a3bdc9b004b4f3d7509dbde96a2683cccf897 Mon Sep 17 00:00:00 2001
From: Chunmingl <Chunming.liu324@gmail.com>
Date: Tue, 27 May 2025 20:46:50 -0400
Subject: [PATCH 1/2] minor fix

---
 R/file_utils.R | 1 +
 R/twas.R       | 9 +++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/file_utils.R b/R/file_utils.R
index 23662fe1..ce3253ff 100644
--- a/R/file_utils.R
+++ b/R/file_utils.R
@@ -1049,6 +1049,7 @@ get_filter_lbf_index <- function(susie_obj, coverage = 0.5, size_factor = 0.5) {
 
   # Return filtered lbf_variable rows (one per CS)
   return(cs_index)
+}
 
 #' Function to load LD reference data variants
 #' @export
diff --git a/R/twas.R b/R/twas.R
index e732423f..352d55d9 100644
--- a/R/twas.R
+++ b/R/twas.R
@@ -165,7 +165,7 @@ harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file)
           )
           weights_matrix_subset <- as.matrix(weights_matrix_qced$target_data_qced[, !colnames(weights_matrix_qced$target_data_qced) %in% c(
             "chrom",
-            "pos", "A2", "A1", "variant_id"
+            "pos", "A2", "A1", "variant_id", "variants_id_original"
           ), drop = FALSE])
           rownames(weights_matrix_subset) <- weights_matrix_qced$target_data_qced$variant_id # weight variant names are flipped/corrected
 
@@ -259,6 +259,7 @@ harmonize_gwas <- function(gwas_file, query_region, ld_variants, col_to_flip=NUL
     if (!any(gwas_data_sumstats$pos %in% gsub("\\:.*$", "", sub("^.*?\\:", "", ld_variants)))) return(NULL)
     gwas_allele_flip <- allele_qc(gwas_data_sumstats, ld_variants, col_to_flip=col_to_flip, match_min_prop = match_min_prop)
     gwas_data_sumstats <- gwas_allele_flip$target_data_qced # post-qc gwas data that is flipped and corrected - gwas study level
+    gwas_data_sumstats <- gwas_data_sumstats[!is.na(gwas_data_sumstats$z) & !is.infinite(gwas_data_sumstats$z), ]
     return(gwas_data_sumstats)
 }
 
@@ -464,9 +465,9 @@ twas_pipeline <- function(twas_weights_data,
     # Nested lapply for contexts and gwas studies
     twas_gene_results <- lapply(contexts, function(context) {
       study_results <- lapply(gwas_studies, function(study) {
-        twas_variants <- intersect(
-          rownames(twas_data_qced[[weight_db]][["weights_qced"]][[context]][[study]][["weights"]]),
-          twas_data_qced[[weight_db]][["variant_names"]][[context]][[study]]
+        twas_variants <- Reduce(intersect, list(rownames(twas_data_qced[[weight_db]][["weights_qced"]][[context]][[study]][["weights"]]), 
+          twas_data_qced[[weight_db]][["variant_names"]][[context]][[study]],
+          twas_data_qced[[weight_db]][["gwas_qced"]][[study]]$variant_id)
         )
         if (length(twas_variants) == 0) {
           return(list(twas_rs_df = data.frame(), mr_rs_df = data.frame()))

From afad8bc187beb3b95ebb3a917a409aefaa0f96ff Mon Sep 17 00:00:00 2001
From: Chunmingl <Chunmingl@users.noreply.github.com>
Date: Wed, 28 May 2025 00:51:38 +0000
Subject: [PATCH 2/2] Update documentation

---
 NAMESPACE                                   |  6 ++++
 man/extract_flatten_sumstats_from_nested.Rd | 27 +++++++++++++++
 man/load_multicontext_sumstats.Rd           | 38 +++++++++++++++++++++
 man/merge_sumstats_matrices.Rd              | 38 +++++++++++++++++++++
 4 files changed, 109 insertions(+)
 create mode 100644 man/extract_flatten_sumstats_from_nested.Rd
 create mode 100644 man/load_multicontext_sumstats.Rd
 create mode 100644 man/merge_sumstats_matrices.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 0dc45d68..ea3fe90e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -30,6 +30,7 @@ export(dentist)
 export(dentist_single_window)
 export(enet_weights)
 export(extract_cs_info)
+export(extract_flatten_sumstats_from_nested)
 export(extract_top_pip_info)
 export(filter_invalid_summary_stat)
 export(filter_mixture_components)
@@ -52,6 +53,7 @@ export(lbf_to_alpha)
 export(load_LD_matrix)
 export(load_genotype_region)
 export(load_ld_snp_info)
+export(load_multicontext_sumstats)
 export(load_multitask_regional_data)
 export(load_multitrait_R_sumstat)
 export(load_multitrait_tensorqtl_sumstat)
@@ -67,6 +69,7 @@ export(load_twas_weights)
 export(mash_pipeline)
 export(mash_rand_null_sample)
 export(merge_mash_data)
+export(merge_sumstats_matrices)
 export(mr_analysis)
 export(mr_ash_rss)
 export(mr_ash_rss_weights)
@@ -117,6 +120,9 @@ export(univariate_analysis_pipeline)
 export(wald_test_pval)
 export(xqtl_enrichment_wrapper)
 export(z_to_pvalue)
+import(dplyr)
+import(tibble)
+import(tidyr)
 import(vroom)
 importFrom(IRanges,IRanges)
 importFrom(IRanges,end)
diff --git a/man/extract_flatten_sumstats_from_nested.Rd b/man/extract_flatten_sumstats_from_nested.Rd
new file mode 100644
index 00000000..b89b6d84
--- /dev/null
+++ b/man/extract_flatten_sumstats_from_nested.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash_wrapper.R
+\name{extract_flatten_sumstats_from_nested}
+\alias{extract_flatten_sumstats_from_nested}
+\title{Extract Summary Statistics from Nested Data Structure}
+\usage{
+extract_flatten_sumstats_from_nested(data, extract_inf = "z", max_depth = 3)
+}
+\arguments{
+\item{data}{A nested list structure potentially containing `variant_names` and `sumstats`.}
+
+\item{extract_inf}{Character. One of `"z"`, `"beta"`, or `"se"`.}
+
+\item{max_depth}{Integer. Maximum depth to search within the list. Default is 3.}
+}
+\value{
+A data.frame with columns `variants` and the requested summary statistic.
+}
+\description{
+Recursively searches a nested list to extract summary statistics (z, beta, or se) 
+using `variant_names` and `sumstats`. Computes `z` if needed from `betahat` and `sebetahat`.
+}
+\examples{
+\dontrun{
+result <- extract_data(nested_list_object, extract_inf = "z")
+}
+}
diff --git a/man/load_multicontext_sumstats.Rd b/man/load_multicontext_sumstats.Rd
new file mode 100644
index 00000000..1b6ac2cf
--- /dev/null
+++ b/man/load_multicontext_sumstats.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash_wrapper.R
+\name{load_multicontext_sumstats}
+\alias{load_multicontext_sumstats}
+\title{Load and Align Summary Statistics for a Given Gene and Condition}
+\usage{
+load_multicontext_sumstats(
+  dat_list,
+  signal_df,
+  cond,
+  region,
+  extract_infs = "z",
+  tag_patterns = NULL,
+  result_list_format
+)
+}
+\arguments{
+\item{dat_list}{A named list of matrices or data.frames, each element corresponding to a summary statistics type (e.g., z, beta).}
+
+\item{signal_df}{A data.frame containing signal information including `variant_ID`, `gene_ID`, and `event_ID`.}
+
+\item{cond}{Character. Condition type: "strong", "null", or "random".}
+
+\item{region}{Character. Target gene ID.}
+
+\item{extract_infs}{Character vector. Names of summary statistics to extract (e.g., `"z"`, `"beta"`).}
+
+\item{tag_patterns}{Optional named pattern list used to classify context.}
+
+\item{result_list_format}{A nested list used as a running result container.}
+}
+\value{
+The updated `result_list_format` with processed results for the specified gene and condition.
+}
+\description{
+This function processes summary statistics matrices for a target gene across contexts, 
+optionally aligning with a reference panel and updating an existing result list.
+}
diff --git a/man/merge_sumstats_matrices.Rd b/man/merge_sumstats_matrices.Rd
new file mode 100644
index 00000000..5f47351d
--- /dev/null
+++ b/man/merge_sumstats_matrices.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash_wrapper.R
+\name{merge_sumstats_matrices}
+\alias{merge_sumstats_matrices}
+\title{Merge a List of Matrices or Data Frames with Optional Allele Flipping}
+\usage{
+merge_sumstats_matrices(
+  matrix_list,
+  value_column,
+  ref_panel = NULL,
+  ld_meta_file = NULL,
+  id_column = "variants",
+  remove_any_missing = FALSE
+)
+}
+\arguments{
+\item{matrix_list}{A named or unnamed list of data frames or matrices.}
+
+\item{value_column}{Character string. The name of the column containing values to extract (e.g., z-scores or betas).}
+
+\item{ref_panel}{Optional data frame. A reference panel for allele QC (must be compatible with `allele_qc`).}
+
+\item{id_column}{Character string. The name of the column identifying variant IDs. Default is `"variants"`.}
+
+\item{remove_any_missing}{Logical. If `TRUE`, rows with any missing values will be removed after merging.}
+}
+\value{
+A data frame containing merged values, one column per dataset with suffix `_i`.
+}
+\description{
+This function merges a list of matrices or data frames by a shared identifier column,
+optionally aligning to a reference panel using allele QC procedures.
+}
+\examples{
+\dontrun{
+merged <- merge_matrices(list(df1, df2), value_column = "variants", ref_panel = ref_df)
+}
+}