StatFunGen · gaow · May 9, 2025 · Apr 17, 2025 · Apr 17, 2025 · Apr 17, 2025
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ export(QUAIL_rank_score_pipeline)
 export(adjust_susie_weights)
 export(align_variant_names)
 export(allele_qc)
+export(auto_decision)
 export(batch_load_twas_weights)
 export(bayes_a_rss_weights)
 export(bayes_a_weights)
@@ -28,6 +29,8 @@ export(ctwas_ld_loader)
 export(dentist)
 export(dentist_single_window)
 export(enet_weights)
+export(extract_cs_info)
+export(extract_top_pip_info)
 export(filter_invalid_summary_stat)
 export(filter_mixture_components)
 export(filter_variants_by_ld_reference)
@@ -38,6 +41,7 @@ export(fsusie_wrapper)
 export(gbayes_rss)
 export(get_ctwas_meta_data)
 export(get_nested_element)
+export(get_susie_result)
 export(glmnet_weights)
 export(harmonize_twas)
 export(lasso_weights)
@@ -70,6 +74,7 @@ export(multicontext_ld_clumping)
 export(multigene_udr)
 export(multivariate_analysis_pipeline)
 export(mvsusie_weights)
+export(parse_cs_corr)
 export(parse_region)
 export(parse_snp_info)
 export(parse_variant_id)
@@ -107,6 +112,7 @@ export(twas_z)
 export(univariate_analysis_pipeline)
 export(wald_test_pval)
 export(xqtl_enrichment_wrapper)
+export(z_to_pvalue)
 import(vroom)
 importFrom(IRanges,IRanges)
 importFrom(IRanges,end)
@@ -120,13 +126,15 @@ importFrom(bigstatsr,FBM.code256)
 importFrom(coloc,coloc.bf_bf)
 importFrom(data.table,as.data.table)
 importFrom(data.table,fread)
+importFrom(data.table,setDT)
 importFrom(data.table,setnames)
 importFrom(doFuture,registerDoFuture)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,arrange)
 importFrom(dplyr,as_tibble)
 importFrom(dplyr,bind_rows)
+importFrom(dplyr,case_when)
 importFrom(dplyr,distinct)
 importFrom(dplyr,everything)
 importFrom(dplyr,filter)

diff --git a/R/LD.R b/R/LD.R
@@ -348,9 +348,6 @@ load_LD_matrix <- function(LD_meta_file_path, region, extract_coordinates = NULL
       # Use region chromosome as fallback for empty blocks
       block_chroms[j] <- as.character(intersected_LD_files$region$chrom)
     }
-
-    # Remove large objects to free memory
-    rm(LD_matrix_processed, extracted_LD_list)
   }
 
   # Create combined LD matrix
@@ -494,7 +491,6 @@ partition_LD_matrix <- function(ld_data, merge_small_blocks = TRUE,
 
   # Partition the matrix based on block metadata
   result <- extract_block_matrices(combined_matrix, block_metadata, variant_ids)
-
   return(result)
 }
 
@@ -708,6 +704,7 @@ extract_block_matrices <- function(matrix, block_metadata, variant_ids) {
       stringsAsFactors = FALSE
     )
     variant_mapping <- rbind(variant_mapping, block_mapping)
+
   }
 
   return(list(

diff --git a/R/file_utils.R b/R/file_utils.R
@@ -489,6 +489,11 @@ load_regional_association_data <- function(genotype, # PLINK file
   ))
 }
 
+#' Load Regional Univariate Association Data
+#'
+#' This function loads regional association data for univariate analysis.
+#' It includes residual matrices, original genotype data, and additional metadata.
+#'
 #' @importFrom matrixStats colVars
 #' @return A list
 #' @export
@@ -508,6 +513,11 @@ load_regional_univariate_data <- function(...) {
   ))
 }
 
+#' Load Regional Data for Regression Modeling
+#'
+#' This function loads regional association data formatted for regression modeling.
+#' It includes phenotype, genotype, and covariate matrices along with metadata.
+#'
 #' @return A list
 #' @export
 load_regional_regression_data <- function(...) {
@@ -543,6 +553,11 @@ pheno_list_to_mat <- function(data_list) {
   return(data_list)
 }
 
+#' Load and Preprocess Regional Multivariate Data
+#'
+#' This function loads regional association data and processes it into a multivariate format.
+#' It optionally filters out samples based on missingness thresholds in the response matrix.
+#'
 #' @importFrom matrixStats colVars
 #' @return A list
 #' @export
@@ -578,6 +593,10 @@ load_regional_multivariate_data <- function(matrix_y_min_complete = NULL, # when
   ))
 }
 
+#' Load Regional Functional Association Data
+#'
+#' This function loads precomputed regional functional association data.
+#'
 #' @return A list
 #' @export
 load_regional_functional_data <- function(...) {

diff --git a/R/misc.R b/R/misc.R
@@ -602,6 +602,36 @@ z_to_beta_se <- function(z, maf, n) {
   return(data.frame(beta = beta, se = se, maf = p))
 }
 
+#' Convert Z-scores to P-values
+#'
+#' This function calculates p-values from given z-scores using a two-tailed normal distribution.
+#' It supports vector input to process multiple z-scores simultaneously.
+#'
+#' @param z Numeric vector. The z-scores to be converted to p-values.
+#'
+#' @return A numeric vector of p-values corresponding to the input z-scores.
+#'
+#' @details
+#' The function uses the following formula to calculate p-values:
+#' p-value = 2 * Φ(-|z|)
+#' Where Φ is the cumulative distribution function of the standard normal distribution.
+#'
+#' @examples
+#' z <- c(2.5, -1.8, 3.2, 0.7)
+#' pvalues <- z_to_pvalue(z)
+#' print(pvalues)
+#'
+#' @note
+#' This function assumes that the input z-scores are from a two-tailed test and
+#' are normally distributed. It calculates two-sided p-values.
+#' For extremely large absolute z-scores, the resulting p-values may be computed as zero
+#' due to floating-point limitations in R. This occurs when the absolute z-score > 37.
+#'
+#' @export
+z_to_pvalue <- function(z) {
+  2 * pnorm(-abs(z))
+}
+
 #' Filter events based on provided context name pattern
 #'       
 #' @param events A character vector of event names 

diff --git a/R/quail_vqtl.R b/R/quail_vqtl.R
@@ -8,11 +8,10 @@
 #' @param return_residuals Logical, whether to return residuals (default: FALSE).
 #' @return A list containing regression results: \code{betahat}, \code{sebetahat}, \code{z_scores}, \code{p_values}, and \code{q_values}.
 #' @examples
-#' @noRd
 #' X <- matrix(rnorm(1000), 100, 10)
 #' y <- rnorm(100)
 #' results <- univariate_regression(X, y)
-
+#' @noRd
 univariate_regression <- function(X, y, Z = NULL, center = TRUE,
                                   scale = FALSE, return_residuals = FALSE) {
   y_na <- which(is.na(y))
@@ -96,10 +95,10 @@ univariate_regression <- function(X, y, Z = NULL, center = TRUE,
 #' @param covariates Optional numeric matrix of covariates (n x k), where k is the number of covariates.
 #' @return A data frame containing regression results for each SNP, including \code{BETA}, \code{SE}, \code{Z}, \code{P}, and \code{Q}.
 #' @examples
-#' @noRd
 #' genotype <- matrix(rbinom(1000 * 10, 2, 0.3), 1000, 10)
 #' phenotype <- rnorm(1000)
 #' results <- run_linear_regression1(genotype, phenotype)
+#' @noRd
 run_linear_regression <- function(genotype, phenotype, covariates = NULL) {
   if (!is.null(covariates)) {
     covariates <- as.data.frame(lapply(covariates, as.numeric))

diff --git a/R/quantile_twas_weight.R b/R/quantile_twas_weight.R
@@ -412,7 +412,6 @@ corr_filter <- function(X, cor_thres = 0.8) {
 #' @param response Optional response vector for "response_correlation" strategy
 #' @param max_iterations Maximum number of iterations to attempt removing problematic columns
 #' @return Cleaned matrix X with problematic columns removed
-#' @importFrom stats qr
 #' @noRd
 check_remove_highcorr_snp <- function(X = X, C = C, strategy = c("correlation", "variance", "response_correlation"), response = NULL, max_iterations = 300, corr_thresholds = seq(0.75, 0.5, by = -0.05)) {
   strategy <- match.arg(strategy)

diff --git a/R/raiss.R b/R/raiss.R
@@ -28,7 +28,7 @@ raiss_single_matrix <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01
   knowns_id <- intersect(known_zscores$variant_id, ref_panel$variant_id)
   knowns <- which(ref_panel$variant_id %in% knowns_id)
   unknowns <- which(!ref_panel$variant_id %in% knowns_id)
-
+  
   # Handle edge cases
   if (length(knowns) == 0) {
     if (verbose) message("No known variants found, cannot perform imputation.")
@@ -51,10 +51,8 @@ raiss_single_matrix <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01
 
   # Call raiss_model
   results <- raiss_model(zt, sig_t, sig_i_t, lamb, rcond)
-
   # Format the results
   results <- format_raiss_df(results, ref_panel, unknowns)
-
   # Filter output
   results <- filter_raiss_output(results, R2_threshold, minimum_ld, verbose)
 
@@ -72,7 +70,6 @@ raiss_single_matrix <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01
   } else {
     as.matrix(LD_matrix)
   }
-
   # Return results
   return(list(
     result_nofilter = result_nofilter,
@@ -193,7 +190,6 @@ raiss <- function(ref_panel, known_zscores, LD_matrix, lamb = 0.01, rcond = 0.01
       lamb, rcond, R2_threshold, minimum_ld,
       verbose = FALSE
     )
-
     # Skip if block returned NULL (no known variants)
     if (!is.null(block_result)) {
       results_list[[block_id]] <- block_result
@@ -346,14 +342,26 @@ filter_raiss_output <- function(zscores, R2_threshold = 0.6, minimum_ld = 5, ver
 
   # Print report
   if (verbose) {
+    max_label_length <- max(nchar(c(
+      "Variants before filter:", 
+      "Non-imputed variants:", 
+      "Imputed variants:", 
+      "Variants filtered because of low LD score:", 
+      "Variants filtered because of low R2:", 
+      "Remaining variants after filter:"
+    )))
+
+    format_line <- function(label, value) {
+      sprintf("%-*s %d", max_label_length, paste0(label, ":"), value)
+    }
+
     message("IMPUTATION REPORT\n")
-    message("Number of SNPs:\n")
-    message("before filter:", NSNPs_bf_filt, "\n")
-    message("not imputed:", NSNPs_initial, "\n")
-    message("imputed:", NSNPs_imputed, "\n")
-    message("filtered because of LD:", NSNPs_ld_filt, "\n")
-    message("filtered because of R2:", NSNPs_R2_filt, "\n")
-    message("after filter:", NSNPs_af_filt, "\n")
+    message(format_line("Variants before filter", NSNPs_bf_filt))
+    message(format_line("Non-imputed variants", NSNPs_initial))
+    message(format_line("Imputed variants", NSNPs_imputed))
+    message(format_line("Variants filtered because of low LD score", NSNPs_ld_filt))
+    message(format_line("Variants filtered because of low R2", NSNPs_R2_filt))
+    message(format_line("Remaining variants after filter", NSNPs_af_filt))
   }
   return(zscore_list = list(zscores_nofilter = zscores_nofilter, zscores = zscores))
 }

diff --git a/R/regularized_regression.R b/R/regularized_regression.R
@@ -437,6 +437,12 @@ enet_weights <- function(X, y) glmnet_weights(X, y, 0.5)
 #' @export
 lasso_weights <- function(X, y) glmnet_weights(X, y, 1)
 
+#' Compute Weights Using mr.ash Shrinkage
+#'
+#' This function fits the `mr.ash` model (adaptive shrinkage regression) to estimate weights
+#' for a given set of predictors and response. It uses optional prior standard deviation initialization
+#' and can accept custom initial beta values.
+#'
 #' @examples
 #' wgt.mr.ash <- mrash_weights(eqtl$X, eqtl$y_res, beta.init = lasso_weights(X, y))
 #' @importFrom stats predict