From 358a3bdc9b004b4f3d7509dbde96a2683cccf897 Mon Sep 17 00:00:00 2001 From: Chunmingl Date: Tue, 27 May 2025 20:46:50 -0400 Subject: [PATCH 1/2] minor fix --- R/file_utils.R | 1 + R/twas.R | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/file_utils.R b/R/file_utils.R index 23662fe1..ce3253ff 100644 --- a/R/file_utils.R +++ b/R/file_utils.R @@ -1049,6 +1049,7 @@ get_filter_lbf_index <- function(susie_obj, coverage = 0.5, size_factor = 0.5) { # Return filtered lbf_variable rows (one per CS) return(cs_index) +} #' Function to load LD reference data variants #' @export diff --git a/R/twas.R b/R/twas.R index e732423f..352d55d9 100644 --- a/R/twas.R +++ b/R/twas.R @@ -165,7 +165,7 @@ harmonize_twas <- function(twas_weights_data, ld_meta_file_path, gwas_meta_file) ) weights_matrix_subset <- as.matrix(weights_matrix_qced$target_data_qced[, !colnames(weights_matrix_qced$target_data_qced) %in% c( "chrom", - "pos", "A2", "A1", "variant_id" + "pos", "A2", "A1", "variant_id", "variants_id_original" ), drop = FALSE]) rownames(weights_matrix_subset) <- weights_matrix_qced$target_data_qced$variant_id # weight variant names are flipped/corrected @@ -259,6 +259,7 @@ harmonize_gwas <- function(gwas_file, query_region, ld_variants, col_to_flip=NUL if (!any(gwas_data_sumstats$pos %in% gsub("\\:.*$", "", sub("^.*?\\:", "", ld_variants)))) return(NULL) gwas_allele_flip <- allele_qc(gwas_data_sumstats, ld_variants, col_to_flip=col_to_flip, match_min_prop = match_min_prop) gwas_data_sumstats <- gwas_allele_flip$target_data_qced # post-qc gwas data that is flipped and corrected - gwas study level + gwas_data_sumstats <- gwas_data_sumstats[!is.na(gwas_data_sumstats$z) & !is.infinite(gwas_data_sumstats$z), ] return(gwas_data_sumstats) } @@ -464,9 +465,9 @@ twas_pipeline <- function(twas_weights_data, # Nested lapply for contexts and gwas studies twas_gene_results <- lapply(contexts, function(context) { study_results <- lapply(gwas_studies, function(study) { - twas_variants <- intersect( - rownames(twas_data_qced[[weight_db]][["weights_qced"]][[context]][[study]][["weights"]]), - twas_data_qced[[weight_db]][["variant_names"]][[context]][[study]] + twas_variants <- Reduce(intersect, list(rownames(twas_data_qced[[weight_db]][["weights_qced"]][[context]][[study]][["weights"]]), + twas_data_qced[[weight_db]][["variant_names"]][[context]][[study]], + twas_data_qced[[weight_db]][["gwas_qced"]][[study]]$variant_id) ) if (length(twas_variants) == 0) { return(list(twas_rs_df = data.frame(), mr_rs_df = data.frame())) From afad8bc187beb3b95ebb3a917a409aefaa0f96ff Mon Sep 17 00:00:00 2001 From: Chunmingl Date: Wed, 28 May 2025 00:51:38 +0000 Subject: [PATCH 2/2] Update documentation --- NAMESPACE | 6 ++++ man/extract_flatten_sumstats_from_nested.Rd | 27 +++++++++++++++ man/load_multicontext_sumstats.Rd | 38 +++++++++++++++++++++ man/merge_sumstats_matrices.Rd | 38 +++++++++++++++++++++ 4 files changed, 109 insertions(+) create mode 100644 man/extract_flatten_sumstats_from_nested.Rd create mode 100644 man/load_multicontext_sumstats.Rd create mode 100644 man/merge_sumstats_matrices.Rd diff --git a/NAMESPACE b/NAMESPACE index 0dc45d68..ea3fe90e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ export(dentist) export(dentist_single_window) export(enet_weights) export(extract_cs_info) +export(extract_flatten_sumstats_from_nested) export(extract_top_pip_info) export(filter_invalid_summary_stat) export(filter_mixture_components) @@ -52,6 +53,7 @@ export(lbf_to_alpha) export(load_LD_matrix) export(load_genotype_region) export(load_ld_snp_info) +export(load_multicontext_sumstats) export(load_multitask_regional_data) export(load_multitrait_R_sumstat) export(load_multitrait_tensorqtl_sumstat) @@ -67,6 +69,7 @@ export(load_twas_weights) export(mash_pipeline) export(mash_rand_null_sample) export(merge_mash_data) +export(merge_sumstats_matrices) export(mr_analysis) export(mr_ash_rss) export(mr_ash_rss_weights) @@ -117,6 +120,9 @@ export(univariate_analysis_pipeline) export(wald_test_pval) export(xqtl_enrichment_wrapper) export(z_to_pvalue) +import(dplyr) +import(tibble) +import(tidyr) import(vroom) importFrom(IRanges,IRanges) importFrom(IRanges,end) diff --git a/man/extract_flatten_sumstats_from_nested.Rd b/man/extract_flatten_sumstats_from_nested.Rd new file mode 100644 index 00000000..b89b6d84 --- /dev/null +++ b/man/extract_flatten_sumstats_from_nested.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash_wrapper.R +\name{extract_flatten_sumstats_from_nested} +\alias{extract_flatten_sumstats_from_nested} +\title{Extract Summary Statistics from Nested Data Structure} +\usage{ +extract_flatten_sumstats_from_nested(data, extract_inf = "z", max_depth = 3) +} +\arguments{ +\item{data}{A nested list structure potentially containing `variant_names` and `sumstats`.} + +\item{extract_inf}{Character. One of `"z"`, `"beta"`, or `"se"`.} + +\item{max_depth}{Integer. Maximum depth to search within the list. Default is 3.} +} +\value{ +A data.frame with columns `variants` and the requested summary statistic. +} +\description{ +Recursively searches a nested list to extract summary statistics (z, beta, or se) +using `variant_names` and `sumstats`. Computes `z` if needed from `betahat` and `sebetahat`. +} +\examples{ +\dontrun{ +result <- extract_data(nested_list_object, extract_inf = "z") +} +} diff --git a/man/load_multicontext_sumstats.Rd b/man/load_multicontext_sumstats.Rd new file mode 100644 index 00000000..1b6ac2cf --- /dev/null +++ b/man/load_multicontext_sumstats.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash_wrapper.R +\name{load_multicontext_sumstats} +\alias{load_multicontext_sumstats} +\title{Load and Align Summary Statistics for a Given Gene and Condition} +\usage{ +load_multicontext_sumstats( + dat_list, + signal_df, + cond, + region, + extract_infs = "z", + tag_patterns = NULL, + result_list_format +) +} +\arguments{ +\item{dat_list}{A named list of matrices or data.frames, each element corresponding to a summary statistics type (e.g., z, beta).} + +\item{signal_df}{A data.frame containing signal information including `variant_ID`, `gene_ID`, and `event_ID`.} + +\item{cond}{Character. Condition type: "strong", "null", or "random".} + +\item{region}{Character. Target gene ID.} + +\item{extract_infs}{Character vector. Names of summary statistics to extract (e.g., `"z"`, `"beta"`).} + +\item{tag_patterns}{Optional named pattern list used to classify context.} + +\item{result_list_format}{A nested list used as a running result container.} +} +\value{ +The updated `result_list_format` with processed results for the specified gene and condition. +} +\description{ +This function processes summary statistics matrices for a target gene across contexts, +optionally aligning with a reference panel and updating an existing result list. +} diff --git a/man/merge_sumstats_matrices.Rd b/man/merge_sumstats_matrices.Rd new file mode 100644 index 00000000..5f47351d --- /dev/null +++ b/man/merge_sumstats_matrices.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash_wrapper.R +\name{merge_sumstats_matrices} +\alias{merge_sumstats_matrices} +\title{Merge a List of Matrices or Data Frames with Optional Allele Flipping} +\usage{ +merge_sumstats_matrices( + matrix_list, + value_column, + ref_panel = NULL, + ld_meta_file = NULL, + id_column = "variants", + remove_any_missing = FALSE +) +} +\arguments{ +\item{matrix_list}{A named or unnamed list of data frames or matrices.} + +\item{value_column}{Character string. The name of the column containing values to extract (e.g., z-scores or betas).} + +\item{ref_panel}{Optional data frame. A reference panel for allele QC (must be compatible with `allele_qc`).} + +\item{id_column}{Character string. The name of the column identifying variant IDs. Default is `"variants"`.} + +\item{remove_any_missing}{Logical. If `TRUE`, rows with any missing values will be removed after merging.} +} +\value{ +A data frame containing merged values, one column per dataset with suffix `_i`. +} +\description{ +This function merges a list of matrices or data frames by a shared identifier column, +optionally aligning to a reference panel using allele QC procedures. +} +\examples{ +\dontrun{ +merged <- merge_matrices(list(df1, df2), value_column = "variants", ref_panel = ref_df) +} +}