From 748d432c144777783bdd0bd5c0dd2a913b00bd91 Mon Sep 17 00:00:00 2001
From: alexmccreight <57416850+alexmccreight@users.noreply.github.com>
Date: Tue, 25 Mar 2025 17:24:20 -0400
Subject: [PATCH 1/2] added susie.ash and susie-inf weight functions

---
 R/regularized_regression.R | 40 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/R/regularized_regression.R b/R/regularized_regression.R
index d076231d..b8a84e11 100644
--- a/R/regularized_regression.R
+++ b/R/regularized_regression.R
@@ -823,3 +823,43 @@ bayes_c_rss_weights <- function(sumstats, LD, ...) {
 bayes_r_rss_weights <- function(sumstats, LD, ...) {
   return(bayes_alphabet_rss_weights(sumstats, LD, method = "bayesR", ...))
 }
+
+#' @export
+susie_ash_weights <- function(susie_ash_fit, X = NULL, y = NULL, ...) {
+  # If the fit object is missing or NULL, try to recover it from the parent frame.
+  if (missing(susie_ash_fit) || is.null(susie_ash_fit)) {
+    susie_ash_fit <- get0("susie_ash_fit", envir = parent.frame())
+    if (is.null(susie_ash_fit)) {
+      stop("A susie_ash_fit object is required.")
+    }
+  }
+  if (!is.null(X)) {
+    if (length(susie_ash_fit$marginal_PIP) != ncol(X)) {
+      stop(paste0("Dimension mismatch on number of variants in susie_ash_fit ",
+                  length(susie_ash_fit$marginal_PIP), " and TWAS weights ", ncol(X), ". "))
+    }
+  }
+  # Calculate coefficients as per the provided formula.
+  weights <- rowSums(susie_ash_fit$mu * susie_ash_fit$PIP) + susie_ash_fit$theta
+  return(weights)
+}
+
+#' @export
+susie_inf_weights <- function(susie_inf_fit, X = NULL, y = NULL, ...) {
+  # If the fit object is missing or NULL, try to recover it from the parent frame.
+  if (missing(susie_inf_fit) || is.null(susie_inf_fit)) {
+    susie_inf_fit <- get0("susie_inf_fit", envir = parent.frame())
+    if (is.null(susie_inf_fit)) {
+      stop("A susie_inf_fit object is required.")
+    }
+  }
+  if (!is.null(X)) {
+    if (length(susie_inf_fit$marginal_PIP) != ncol(X)) {
+      stop(paste0("Dimension mismatch on number of variants in susie_inf_fit ",
+                  length(susie_inf_fit$marginal_PIP), " and TWAS weights ", ncol(X), ". "))
+    }
+  }
+  # Calculate coefficients as per the provided formula.
+  weights <- rowSums(susie_inf_fit$mu * susie_inf_fit$PIP) + susie_inf_fit$alpha
+  return(weights)
+}

From 3fab18338241081075d71b3db71a8d62f409565d Mon Sep 17 00:00:00 2001
From: alexmccreight <alexmccreight@users.noreply.github.com>
Date: Tue, 25 Mar 2025 21:27:22 +0000
Subject: [PATCH 2/2] Update documentation

---
 NAMESPACE                           |  2 ++
 R/RcppExports.R                     | 11 ++++++-----
 man/allele_qc.Rd                    | 15 ++++++---------
 man/check_consecutive_regions.Rd    | 11 -----------
 man/load_multitask_regional_data.Rd | 19 +++++++++----------
 man/load_rss_data.Rd                | 15 +++++++--------
 man/load_tsv_region.Rd              | 26 +++++++++++++-------------
 man/rss_analysis_pipeline.Rd        | 12 +++++++++---
 8 files changed, 52 insertions(+), 59 deletions(-)
 delete mode 100644 man/check_consecutive_regions.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 8e0a54d5..9f3d3773 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -86,6 +86,8 @@ export(sdpr)
 export(sdpr_weights)
 export(slalom)
 export(summary_stats_qc)
+export(susie_ash_weights)
+export(susie_inf_weights)
 export(susie_post_processor)
 export(susie_rss_pipeline)
 export(susie_rss_qc)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 814b38fd..efa6874d 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -2,21 +2,22 @@
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 dentist_iterative_impute <- function(LD_mat, nSample, zScore, pValueThreshold, propSVD, gcControl, nIter, gPvalueThreshold, ncpus, seed, correct_chen_et_al_bug, verbose = FALSE) {
-  .Call("_pecotmr_dentist_iterative_impute", PACKAGE = "pecotmr", LD_mat, nSample, zScore, pValueThreshold, propSVD, gcControl, nIter, gPvalueThreshold, ncpus, seed, correct_chen_et_al_bug, verbose)
+    .Call('_pecotmr_dentist_iterative_impute', PACKAGE = 'pecotmr', LD_mat, nSample, zScore, pValueThreshold, propSVD, gcControl, nIter, gPvalueThreshold, ncpus, seed, correct_chen_et_al_bug, verbose)
 }
 
 rcpp_mr_ash_rss <- function(bhat, shat, z, R, var_y, n, sigma2_e, s0, w0, mu1_init, tol = 1e-8, max_iter = 1e5L, update_w0 = TRUE, update_sigma = TRUE, compute_ELBO = TRUE, standardize = FALSE, ncpus = 1L) {
-  .Call("_pecotmr_rcpp_mr_ash_rss", PACKAGE = "pecotmr", bhat, shat, z, R, var_y, n, sigma2_e, s0, w0, mu1_init, tol, max_iter, update_w0, update_sigma, compute_ELBO, standardize, ncpus)
+    .Call('_pecotmr_rcpp_mr_ash_rss', PACKAGE = 'pecotmr', bhat, shat, z, R, var_y, n, sigma2_e, s0, w0, mu1_init, tol, max_iter, update_w0, update_sigma, compute_ELBO, standardize, ncpus)
 }
 
 prs_cs_rcpp <- function(a, b, phi, bhat, maf, n, ld_blk, n_iter, n_burnin, thin, verbose, seed) {
-  .Call("_pecotmr_prs_cs_rcpp", PACKAGE = "pecotmr", a, b, phi, bhat, maf, n, ld_blk, n_iter, n_burnin, thin, verbose, seed)
+    .Call('_pecotmr_prs_cs_rcpp', PACKAGE = 'pecotmr', a, b, phi, bhat, maf, n, ld_blk, n_iter, n_burnin, thin, verbose, seed)
 }
 
 qtl_enrichment_rcpp <- function(r_gwas_pip, r_qtl_susie_fit, pi_gwas = 0, pi_qtl = 0, ImpN = 25L, shrinkage_lambda = 1.0, num_threads = 1L) {
-  .Call("_pecotmr_qtl_enrichment_rcpp", PACKAGE = "pecotmr", r_gwas_pip, r_qtl_susie_fit, pi_gwas, pi_qtl, ImpN, shrinkage_lambda, num_threads)
+    .Call('_pecotmr_qtl_enrichment_rcpp', PACKAGE = 'pecotmr', r_gwas_pip, r_qtl_susie_fit, pi_gwas, pi_qtl, ImpN, shrinkage_lambda, num_threads)
 }
 
 sdpr_rcpp <- function(bhat, LD, n, per_variant_sample_size = NULL, array = NULL, a = 0.1, c = 1.0, M = 1000L, a0k = 0.5, b0k = 0.5, iter = 1000L, burn = 200L, thin = 5L, n_threads = 1L, opt_llk = 1L, verbose = TRUE) {
-  .Call("_pecotmr_sdpr_rcpp", PACKAGE = "pecotmr", bhat, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose)
+    .Call('_pecotmr_sdpr_rcpp', PACKAGE = 'pecotmr', bhat, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose)
 }
+
diff --git a/man/allele_qc.Rd b/man/allele_qc.Rd
index 28c8f73e..6993271a 100644
--- a/man/allele_qc.Rd
+++ b/man/allele_qc.Rd
@@ -2,12 +2,11 @@
 % Please edit documentation in R/allele_qc.R
 \name{allele_qc}
 \alias{allele_qc}
-\title{Match alleles between target_variants and ref_variants}
+\title{Match alleles between target data and reference variants}
 \usage{
 allele_qc(
-  target_variants,
-  ref_variants,
   target_data,
+  ref_variants,
   col_to_flip = NULL,
   match_min_prop = 0.2,
   remove_dups = TRUE,
@@ -15,16 +14,14 @@ allele_qc(
   remove_strand_ambiguous = TRUE,
   flip_strand = FALSE,
   remove_unmatched = TRUE,
-  remove_same_vars = FALSE,
-  target_gwas = TRUE
+  remove_same_vars = FALSE
 )
 }
 \arguments{
-\item{target_variants}{A data frame with columns "chrom", "pos", "A1", "A2" or strings in the format of "chr:pos:A2:A1"/"chr:pos_A2_A1".}
-
-\item{ref_variants}{A data frame with columns "chrom", "pos", "A1", "A2" or strings in the format of "chr:pos:A2:A1"/"chr:pos_A2_A1".}
+\item{target_data}{A data frame with columns "chrom", "pos", "A2", "A1" (and optionally other columns like "beta" or "z"),
+or a vector of strings in the format of "chr:pos:A2:A1"/"chr:pos_A2_A1". Can be automatically converted to a data frame if a vector.}
 
-\item{target_data}{A data frame on which QC procedures will be applied.}
+\item{ref_variants}{A data frame with columns "chrom", "pos", "A2", "A1" or strings in the format of "chr:pos:A2:A1"/"chr:pos_A2_A1".}
 
 \item{col_to_flip}{The name of the column in target_data where flips are to be applied.}
 
diff --git a/man/check_consecutive_regions.Rd b/man/check_consecutive_regions.Rd
deleted file mode 100644
index b4566ec1..00000000
--- a/man/check_consecutive_regions.Rd
+++ /dev/null
@@ -1,11 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/LD.R
-\name{check_consecutive_regions}
-\alias{check_consecutive_regions}
-\title{Function to Check if Regions are in increasing order and remove duplicated rows}
-\usage{
-check_consecutive_regions(df)
-}
-\description{
-Function to Check if Regions are in increasing order and remove duplicated rows
-}
diff --git a/man/load_multitask_regional_data.Rd b/man/load_multitask_regional_data.Rd
index 3593c9cf..1e3d1457 100644
--- a/man/load_multitask_regional_data.Rd
+++ b/man/load_multitask_regional_data.Rd
@@ -30,12 +30,11 @@ load_multitask_regional_data(
   LD_meta_file_path_list = NULL,
   match_LD_sumstat = NULL,
   conditions_list_sumstat = NULL,
-  subset = TRUE,
   n_samples = 0,
   n_cases = 0,
   n_controls = 0,
-  target = "",
-  target_column_index = "",
+  extract_sumstats_region_name = NULL,
+  sumstats_region_name_col = NULL,
   comment_string = "#",
   extract_coordinates = NULL
 )
@@ -87,19 +86,19 @@ load_multitask_regional_data(
 
 \item{conditions_list_sumstat}{A vector of strings representing different sumstats.}
 
-\item{target}{User-specified gene/phenotype name used to further subset the phenotype data.}
+\item{n_samples}{User-specified sample size. If unknown, set as 0 to retrieve from the sumstat file.}
 
-\item{target_column_index}{Filter this specific column for the target.}
+\item{n_cases}{User-specified number of cases.}
 
-\item{comment_string}{comment sign in the column_mapping file, default is #}
+\item{n_controls}{User-specified number of controls.}
 
-\item{extract_coordinates}{Optional data frame with columns "chrom" and "pos" for specific coordinates extraction.}
+\item{extract_sumstats_region_name}{User-specified gene/phenotype name used to further subset the phenotype data.}
 
-\item{n_sample}{User-specified sample size. If unknown, set as 0 to retrieve from the sumstat file.}
+\item{sumstats_region_name_col}{Filter this specific column for the extract_sumstats_region_name.}
 
-\item{n_case}{User-specified number of cases.}
+\item{comment_string}{comment sign in the column_mapping file, default is #}
 
-\item{n_control}{User-specified number of controls.}
+\item{extract_coordinates}{Optional data frame with columns "chrom" and "pos" for specific coordinates extraction.}
 }
 \value{
 A list containing the individual_data and sumstat_data:
diff --git a/man/load_rss_data.Rd b/man/load_rss_data.Rd
index 1230d18f..2dd95a5f 100644
--- a/man/load_rss_data.Rd
+++ b/man/load_rss_data.Rd
@@ -7,13 +7,12 @@
 load_rss_data(
   sumstat_path,
   column_file_path,
-  subset = TRUE,
   n_sample = 0,
   n_case = 0,
   n_control = 0,
-  target = "",
-  region = "",
-  target_column_index = "",
+  region = NULL,
+  extract_region_name = NULL,
+  region_name_col = NULL,
   comment_string = "#"
 )
 }
@@ -28,13 +27,13 @@ load_rss_data(
 
 \item{n_control}{User-specified number of controls.}
 
-\item{target}{User-specified gene/phenotype name used to further subset the phenotype data.}
-
 \item{region}{The region where tabix use to subset the input dataset.}
 
-\item{target_column_index}{Filter this specific column for the target.}
+\item{extract_region_name}{User-specified gene/phenotype name used to further subset the phenotype data.}
+
+\item{region_name_col}{Filter this specific column for the extract_region_name.}
 
-\item{comment_string}{comment sign in the column_mapping file, default is #}
+\item{comment_string}{Comment sign in the column_mapping file, default is #}
 }
 \value{
 A list of rss_input, including the column-name-formatted summary statistics,
diff --git a/man/load_tsv_region.Rd b/man/load_tsv_region.Rd
index 0fabb7e1..88409c9e 100644
--- a/man/load_tsv_region.Rd
+++ b/man/load_tsv_region.Rd
@@ -2,29 +2,29 @@
 % Please edit documentation in R/file_utils.R
 \name{load_tsv_region}
 \alias{load_tsv_region}
-\title{Load customized tsv data}
+\title{Load and filter tabular data with optional region subsetting}
 \usage{
 load_tsv_region(
-  sumstat_path,
-  region = "",
-  target = "",
-  target_column_index = ""
+  file_path,
+  region = NULL,
+  extract_region_name = NULL,
+  region_name_col = NULL
 )
 }
 \arguments{
-\item{sumstat_path}{File path to the summary statistics.}
+\item{file_path}{Path to the summary statistics file.}
 
-\item{region}{The region where tabix use to subset the input dataset. Format: chr:start-end (eg: 9:10000-50000)}
+\item{region}{Genomic region for subsetting tabix-indexed files. Format: chr:start-end (e.g., "9:10000-50000").}
 
-\item{target}{User-specified gene/phenotype name used to further subset the phenotype data.}
+\item{extract_region_name}{Value to filter for in the specified filter column.}
 
-\item{target_column_index}{Filter this specific column for the target.}
+\item{region_name_col}{Index of the column to apply the extract_region_name against.}
 }
 \value{
-A dataframe of the subsetted summary statistics,
+A dataframe containing the filtered summary statistics.
 }
 \description{
-This function load the input data. If the input sumstat data is .gz and tabixed, then can use the region parameter to subset the data
-and filter by target column
-Otherwise, it will only filter by target column since tabix command won't function (this apply to .tsv, .txt files)
+This function loads summary statistics data from tabular files (TSV, TXT).
+For compressed (.gz) and tabix-indexed files, it can subset data by genomic region.
+Additionally, it can filter results by a specified target value in a designated column.
 }
diff --git a/man/rss_analysis_pipeline.Rd b/man/rss_analysis_pipeline.Rd
index a4aa0fb2..2f3b8c4d 100644
--- a/man/rss_analysis_pipeline.Rd
+++ b/man/rss_analysis_pipeline.Rd
@@ -11,10 +11,10 @@ rss_analysis_pipeline(
   n_sample = 0,
   n_case = 0,
   n_control = 0,
-  target = "",
-  region = "",
-  target_column_index = "",
+  region = NULL,
   skip_region = NULL,
+  extract_region_name = NULL,
+  region_name_col = NULL,
   qc_method = c("rss_qc", "dentist", "slalom"),
   finemapping_method = c("susie_rss", "single_effect", "bayesian_conditional_regression"),
   finemapping_opts = list(init_L = 5, max_L = 20, l_step = 5, coverage = c(0.95, 0.7,
@@ -39,9 +39,15 @@ rss_analysis_pipeline(
 
 \item{n_control}{User-specified number of controls.}
 
+\item{region}{The region where tabix use to subset the input dataset.}
+
 \item{skip_region}{A character vector specifying regions to be skipped in the analysis (optional).
 Each region should be in the format "chrom:start-end" (e.g., "1:1000000-2000000").}
 
+\item{extract_region_name}{User-specified gene/phenotype name used to further subset the phenotype data.}
+
+\item{region_name_col}{Filter this specific column for the extract_region_name.}
+
 \item{qc_method}{Quality control method to use. Options are "rss_qc", "dentist", or "slalom" (default: "rss_qc").}
 
 \item{impute}{Logical; if TRUE, performs imputation for outliers identified in the analysis (default: TRUE).}