diff --git a/NAMESPACE b/NAMESPACE index 9328ad5f..0b1271ff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -79,6 +79,7 @@ export(prs_cs_weights) export(qr_screen) export(quantile_twas_weight_pipeline) export(raiss) +export(raiss_single_matrix) export(region_to_df) export(rss_analysis_pipeline) export(rss_basic_qc) diff --git a/R/RcppExports.R b/R/RcppExports.R index 814b38fd..efa6874d 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -2,21 +2,22 @@ # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 dentist_iterative_impute <- function(LD_mat, nSample, zScore, pValueThreshold, propSVD, gcControl, nIter, gPvalueThreshold, ncpus, seed, correct_chen_et_al_bug, verbose = FALSE) { - .Call("_pecotmr_dentist_iterative_impute", PACKAGE = "pecotmr", LD_mat, nSample, zScore, pValueThreshold, propSVD, gcControl, nIter, gPvalueThreshold, ncpus, seed, correct_chen_et_al_bug, verbose) + .Call('_pecotmr_dentist_iterative_impute', PACKAGE = 'pecotmr', LD_mat, nSample, zScore, pValueThreshold, propSVD, gcControl, nIter, gPvalueThreshold, ncpus, seed, correct_chen_et_al_bug, verbose) } rcpp_mr_ash_rss <- function(bhat, shat, z, R, var_y, n, sigma2_e, s0, w0, mu1_init, tol = 1e-8, max_iter = 1e5L, update_w0 = TRUE, update_sigma = TRUE, compute_ELBO = TRUE, standardize = FALSE, ncpus = 1L) { - .Call("_pecotmr_rcpp_mr_ash_rss", PACKAGE = "pecotmr", bhat, shat, z, R, var_y, n, sigma2_e, s0, w0, mu1_init, tol, max_iter, update_w0, update_sigma, compute_ELBO, standardize, ncpus) + .Call('_pecotmr_rcpp_mr_ash_rss', PACKAGE = 'pecotmr', bhat, shat, z, R, var_y, n, sigma2_e, s0, w0, mu1_init, tol, max_iter, update_w0, update_sigma, compute_ELBO, standardize, ncpus) } prs_cs_rcpp <- function(a, b, phi, bhat, maf, n, ld_blk, n_iter, n_burnin, thin, verbose, seed) { - .Call("_pecotmr_prs_cs_rcpp", PACKAGE = "pecotmr", a, b, phi, bhat, maf, n, ld_blk, n_iter, n_burnin, thin, verbose, seed) + .Call('_pecotmr_prs_cs_rcpp', PACKAGE = 'pecotmr', a, b, phi, bhat, maf, n, ld_blk, n_iter, n_burnin, thin, verbose, seed) } qtl_enrichment_rcpp <- function(r_gwas_pip, r_qtl_susie_fit, pi_gwas = 0, pi_qtl = 0, ImpN = 25L, shrinkage_lambda = 1.0, num_threads = 1L) { - .Call("_pecotmr_qtl_enrichment_rcpp", PACKAGE = "pecotmr", r_gwas_pip, r_qtl_susie_fit, pi_gwas, pi_qtl, ImpN, shrinkage_lambda, num_threads) + .Call('_pecotmr_qtl_enrichment_rcpp', PACKAGE = 'pecotmr', r_gwas_pip, r_qtl_susie_fit, pi_gwas, pi_qtl, ImpN, shrinkage_lambda, num_threads) } sdpr_rcpp <- function(bhat, LD, n, per_variant_sample_size = NULL, array = NULL, a = 0.1, c = 1.0, M = 1000L, a0k = 0.5, b0k = 0.5, iter = 1000L, burn = 200L, thin = 5L, n_threads = 1L, opt_llk = 1L, verbose = TRUE) { - .Call("_pecotmr_sdpr_rcpp", PACKAGE = "pecotmr", bhat, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose) + .Call('_pecotmr_sdpr_rcpp', PACKAGE = 'pecotmr', bhat, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose) } + diff --git a/R/colocboost_pipeline.R b/R/colocboost_pipeline.R index 4545b184..f39083ca 100644 --- a/R/colocboost_pipeline.R +++ b/R/colocboost_pipeline.R @@ -758,7 +758,8 @@ qc_regional_data <- function(region_data, } # Perform imputation if (impute) { - impute_results <- raiss(LD_data$ref_panel, sumstat$sumstats, LD_data$combined_LD_matrix, + LD_matrix <- partition_LD_matrix(LD_data) + impute_results <- raiss(LD_data$ref_panel, sumstat$sumstats, LD_matrix, variant_indices = LD_matrix$variant_indices, rcond = impute_opts$rcond, R2_threshold = impute_opts$R2_threshold, minimum_ld = impute_opts$minimum_ld, lamb = impute_opts$lamb ) diff --git a/R/raiss.R b/R/raiss.R index 0700c8d4..313c5483 100644 --- a/R/raiss.R +++ b/R/raiss.R @@ -45,7 +45,7 @@ raiss_single_matrix <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01 } # Extract zt, sig_t, and sig_i_t - zt <- known_zscores$z + zt <- known_zscores$z[knowns] sig_t <- LD_matrix[knowns, knowns, drop = FALSE] sig_i_t <- LD_matrix[unknowns, knowns, drop = FALSE] @@ -143,7 +143,8 @@ raiss <- function(ref_panel, known_zscores, LD_matrix, variant_indices = NULL, l # Subset ref_panel and LD_matrix for this block block_indices <- match(block_variant_ids, ref_panel$variant_id) block_ref_panel <- ref_panel[block_indices, ] - block_LD_matrix <- LD_matrix[[block_id]] + block_LD_matrix <- LD_matrix$ld_matrices[[block_id]] + block_known_zscores <- known_zscores %>% filter(variant_id %in% block_variant_ids) # Check dimensions match if (nrow(block_LD_matrix) != nrow(block_ref_panel)) { @@ -152,7 +153,7 @@ raiss <- function(ref_panel, known_zscores, LD_matrix, variant_indices = NULL, l # Process the block using the core function block_result <- raiss_single_matrix( - block_ref_panel, known_zscores, block_LD_matrix, + block_ref_panel, block_known_zscores, block_LD_matrix, lamb, rcond, R2_threshold, minimum_ld, verbose = FALSE ) @@ -170,9 +171,14 @@ raiss <- function(ref_panel, known_zscores, LD_matrix, variant_indices = NULL, l } # Combine results from all blocks - nofilter_results <- lapply(results_list, function(x) x$result_nofilter) - filter_results <- lapply(results_list, function(x) x$result_filter) + nofilter_results <- results_list %>% lapply(function(x) x$result_nofilter) %>% bind_rows() + filter_results <- results_list %>% lapply(function(x) x$result_filter) %>% bind_rows() ld_filtered_list <- lapply(results_list, function(x) x$LD_mat) + variant_list <- lapply(ld_filtered_list, function(ld) data.frame(variants = colnames(ld)) ) + combined_LD_matrix <- create_combined_LD_matrix( + LD_matrices = ld_filtered_list, + variants = variant_list + ) # Combine into data frames result_nofilter <- dplyr::bind_rows(nofilter_results) %>% arrange(pos) @@ -182,7 +188,7 @@ raiss <- function(ref_panel, known_zscores, LD_matrix, variant_indices = NULL, l return(list( result_nofilter = result_nofilter, result_filter = result_filter, - LD_mat = ld_filtered_list + LD_mat = combined_LD_matrix )) } diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index b1f7eb94..28844c73 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -219,7 +219,8 @@ rss_analysis_pipeline <- function( # Perform imputation if (impute) { - impute_results <- raiss(LD_data$ref_panel, sumstats, partition_LD_matrix(LD_data), rcond = impute_opts$rcond, R2_threshold = impute_opts$R2_threshold, minimum_ld = impute_opts$minimum_ld, lamb = impute_opts$lamb) + LD_matrix <- partition_LD_matrix(LD_data) + impute_results <- raiss(LD_data$ref_panel, sumstats, LD_matrix, variant_indices = LD_matrix$variant_indices, rcond = impute_opts$rcond, R2_threshold = impute_opts$R2_threshold, minimum_ld = impute_opts$minimum_ld, lamb = impute_opts$lamb) sumstats <- impute_results$result_filter LD_mat <- impute_results$LD_mat } diff --git a/man/load_LD_matrix.Rd b/man/load_LD_matrix.Rd index 07c60a58..ec7d5d82 100644 --- a/man/load_LD_matrix.Rd +++ b/man/load_LD_matrix.Rd @@ -22,6 +22,9 @@ Each element of the list contains: \item{combined_LD_variants}{A data frame merging selected variants within each LD block in bim file format with columns "chrom", "variants", "GD", "pos", "A1", and "A2".} \item{combined_LD_matrix}{The LD matrix for each region, with row and column names matching variant identifiers.} +\item{block_indices}{A data frame tracking the indices of variants in the combined matrix by LD block, +with columns "block_id", "start_idx", "end_idx", "chrom", "block_start", "block_end" to facilitate +further partitioning if needed.} } } \description{ diff --git a/man/load_multitask_regional_data.Rd b/man/load_multitask_regional_data.Rd index cd489407..3593c9cf 100644 --- a/man/load_multitask_regional_data.Rd +++ b/man/load_multitask_regional_data.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/colocboost_pipeline.R \name{load_multitask_regional_data} \alias{load_multitask_regional_data} -\title{This function loads a mixture data sets for a specific region, including individual-level data (genotype, phenotype, covariate data) +\title{This function loads a mixture data sets for a specific region, including individual-level data (genotype, phenotype, covariate data) or summary statistics (sumstats, LD). Run \code{load_regional_univariate_data} and \code{load_rss_data} multiple times for different datasets} \usage{ load_multitask_regional_data( @@ -126,7 +126,7 @@ sumstat_data contains the following components if exist } } \description{ -This function loads a mixture data sets for a specific region, including individual-level data (genotype, phenotype, covariate data) +This function loads a mixture data sets for a specific region, including individual-level data (genotype, phenotype, covariate data) or summary statistics (sumstats, LD). Run \code{load_regional_univariate_data} and \code{load_rss_data} multiple times for different datasets } \section{Loading individual level data from multiple corhorts}{ diff --git a/man/raiss.Rd b/man/raiss.Rd index 84c83cb8..dc08c4de 100644 --- a/man/raiss.Rd +++ b/man/raiss.Rd @@ -8,6 +8,7 @@ raiss( ref_panel, known_zscores, LD_matrix, + variant_indices = NULL, lamb = 0.01, rcond = 0.01, R2_threshold = 0.6, @@ -20,18 +21,22 @@ raiss( \item{known_zscores}{A data frame containing 'chrom', 'pos', 'variant_id', 'A1', 'A2', and 'z' values.} -\item{LD_matrix}{A square matrix of dimension equal to the number of rows in ref_panel.} +\item{LD_matrix}{Either a square matrix or a list of matrices for LD blocks.} -\item{lamb}{Regularization term added to the diagonal of the LD_matrix in the RAImputation model.} +\item{variant_indices}{Optional data frame mapping variant IDs to block IDs when LD_matrix is a list.} -\item{rcond}{Threshold for filtering eigenvalues in the pseudo-inverse computation in the RAImputation model.} +\item{lamb}{Regularization term added to the diagonal of the LD_matrix.} + +\item{rcond}{Threshold for filtering eigenvalues in the pseudo-inverse computation.} \item{R2_threshold}{R square threshold below which SNPs are filtered from the output.} \item{minimum_ld}{Minimum LD score threshold for SNP filtering.} + +\item{verbose}{Logical indicating whether to print progress information.} } \value{ -A data frame that is the result of merging the imputed SNP data with known z-scores. +A list containing filtered and unfiltered results, and filtered LD matrix. } \description{ This function is a part of the statistical library for SNP imputation from: @@ -41,7 +46,7 @@ Noah Zaitlen, et al., titled "Fast and accurate imputation of summary statistics enhances evidence of functional enrichment", published in Bioinformatics in 2014. } -\examples{ -# Example usage (assuming appropriate data is available): -# result <- raiss(ref_panel, known_zscores, LD_matrix, lamb = 0.01, rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5) +\details{ +This function can process either a single LD matrix or a list of LD matrices for different blocks. +For a list of matrices, it processes each block separately and combines the results. } diff --git a/man/raiss_single_matrix.Rd b/man/raiss_single_matrix.Rd new file mode 100644 index 00000000..40c40e54 --- /dev/null +++ b/man/raiss_single_matrix.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/raiss.R +\name{raiss_single_matrix} +\alias{raiss_single_matrix} +\title{Core RAISS implementation for a single LD matrix} +\usage{ +raiss_single_matrix( + ref_panel, + known_zscores, + LD_matrix, + lamb = 0.01, + rcond = 0.01, + R2_threshold = 0.6, + minimum_ld = 5, + verbose = TRUE +) +} +\arguments{ +\item{ref_panel}{A data frame containing 'chrom', 'pos', 'variant_id', 'A1', and 'A2'.} + +\item{known_zscores}{A data frame containing 'chrom', 'pos', 'variant_id', 'A1', 'A2', and 'z' values.} + +\item{LD_matrix}{A square matrix of dimension equal to the number of rows in ref_panel.} + +\item{lamb}{Regularization term added to the diagonal of the LD_matrix.} + +\item{rcond}{Threshold for filtering eigenvalues in the pseudo-inverse computation.} + +\item{R2_threshold}{R square threshold below which SNPs are filtered from the output.} + +\item{minimum_ld}{Minimum LD score threshold for SNP filtering.} + +\item{verbose}{Logical indicating whether to print progress information.} +} +\value{ +A list containing filtered and unfiltered results, and filtered LD matrix. +} +\description{ +Core RAISS implementation for a single LD matrix +}