From 2fcd3144e0a3f2f92a971e0fcf5542aa814557fb Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Wed, 23 Apr 2025 19:30:54 -0400 Subject: [PATCH] add wordlist --- R/colocboost.R | 2 +- inst/WORDLIST | 82 +++++++++++++++++++ man/colocboost.Rd | 2 +- vignettes/Ambiguous_Colocalization.Rmd | 16 ++-- vignettes/Interpret_ColocBoost_Output.Rmd | 8 +- vignettes/LD_Free_Colocalization.Rmd | 4 +- .../Summary_Statistics_Colocalization.Rmd | 6 +- 7 files changed, 101 insertions(+), 19 deletions(-) create mode 100644 inst/WORDLIST diff --git a/R/colocboost.R b/R/colocboost.R index 0d0fed1..4c5f651 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -8,7 +8,7 @@ #' In brief, this function fits a multiple linear regression model \eqn{Y = XB + E} in matrix form. #' ColocBoost can be generally used in multi-task variable selection regression problem. #' -#' @details The function \code{colocboost} implements the proximity smoothed gradient boosting method from Cao et al (2025). +#' @details The function \code{colocboost} implements the proximity smoothed gradient boosting method from Cao etc (2025). #' There is an additional step to help merge the confidence sets with small \code{between_putiry} #' (default is 0.8) but within the same locus. This step addresses potential instabilities in linkage disequilibrium (LD) estimation #' that may arise from small sample sizes or discrepancies in minor allele frequencies (MAF) across different confidence sets. diff --git a/inst/WORDLIST b/inst/WORDLIST new file mode 100644 index 0000000..82fadaf --- /dev/null +++ b/inst/WORDLIST @@ -0,0 +1,82 @@ +Biobank +Bioinformatics +CFB +COLOC +CoS +Codecov +ColocBoost +Colocalization +Colocalized +Conda +FineBoost +GTEx +GWAS +HyPrColoc +INDELs +Jager +KK +LD +MAF +Mazumder +Micromamba +NPC +Najar +Nealelab +PIPs +PLINK +Pre +Recalibrate +SEL +SuSiE +Sumstat +UKBB +VCP +VPA +Xcorr +YI +bioinformatics +chrom +cis +colocalization +colocalize +colocalized +conda +de +decayrate +eQTL +grey +iteratively +jk +ld +lfsr +lth +maf +medRxiv +modularity +nd +npc +omics +phenotypes +pixi +pos +pre +probabilistically +pvalue +qc +rcond +recalibrate +recalibrated +reconciliate +rss +sQTL +subsampled +sumstat +sumstats +tabix +uCoS +uS +ucos +uncolocalized +vcp +xQTL +xQTLs diff --git a/man/colocboost.Rd b/man/colocboost.Rd index e5a7307..41c1488 100644 --- a/man/colocboost.Rd +++ b/man/colocboost.Rd @@ -211,7 +211,7 @@ In brief, this function fits a multiple linear regression model \eqn{Y = XB + E} ColocBoost can be generally used in multi-task variable selection regression problem. } \details{ -The function \code{colocboost} implements the proximity smoothed gradient boosting method from Cao et al (2025). +The function \code{colocboost} implements the proximity smoothed gradient boosting method from Cao etc (2025). There is an additional step to help merge the confidence sets with small \code{between_putiry} (default is 0.8) but within the same locus. This step addresses potential instabilities in linkage disequilibrium (LD) estimation that may arise from small sample sizes or discrepancies in minor allele frequencies (MAF) across different confidence sets. diff --git a/vignettes/Ambiguous_Colocalization.Rmd b/vignettes/Ambiguous_Colocalization.Rmd index 0c45594..50fe8ad 100644 --- a/vignettes/Ambiguous_Colocalization.Rmd +++ b/vignettes/Ambiguous_Colocalization.Rmd @@ -16,7 +16,7 @@ knitr::opts_chunk$set( This vignette demonstrates an example of ambiguous colocalization from trait-specific effects using the `colocboost`. Specifically, we will use the `Ambiguous_Colocalization`, which is output from `colocboost` analyzing GTEx release v8 and UK Biobank summary statistics -(see more details of the original data source in Acknowledgement section). +(see more details of the original data source in Acknowledgment section). ```{r setup} library(colocboost) @@ -160,12 +160,12 @@ names(res$ambiguous_cos[[1]]) **Explanation of results** For each ambiguous colocalization, the following information is provided: -- `ambiguous_cos`: Contains variants indices and names of the original trait-specific uCoS used to construct this ambiguouse colocalization. -- `ambiguous_cos_overlap`: Contains the overlapping variants information across the uCoS used to construct this ambiguouse colocalization. -- `ambiguous_cos_union`: Contains the union of variants information across the uCoS used to construct this ambiguouse colocalization. -- `ambiguous_cos_outcomes`: Contains the outcomes indices and names for uCoS used to construct this ambiguouse colocalization. -- `ambiguous_cos_weight`: Contains the trait-specific weights of the uCoS used to construct this ambiguouse colocalization. -- `ambiguous_cos_puriry`: Contains the purity of across uCoS used to construct this ambiguouse colocalization. +- `ambiguous_cos`: Contains variants indices and names of the original trait-specific uCoS used to construct this ambiguous colocalization. +- `ambiguous_cos_overlap`: Contains the overlapping variants information across the uCoS used to construct this ambiguous colocalization. +- `ambiguous_cos_union`: Contains the union of variants information across the uCoS used to construct this ambiguous colocalization. +- `ambiguous_cos_outcomes`: Contains the outcomes indices and names for uCoS used to construct this ambiguous colocalization. +- `ambiguous_cos_weight`: Contains the trait-specific weights of the uCoS used to construct this ambiguous colocalization. +- `ambiguous_cos_puriry`: Contains the purity of across uCoS used to construct this ambiguous colocalization. - `recalibrated_cos_vcp`: Contains the recalibrated integrative weight to analogous to variant colocalization probability (VCP) from the ambiguous colocalization results. - `recalibrated_cos`: Contains the recalibrated 95% colocalization confidence set (CoS) from the ambiguous colocalization results. @@ -206,7 +206,7 @@ if researcher decides to investigate these ambiguous colocalization events. - While we provide recalibrated weights as a suggested approach for interpreting ambiguous results, users can still choose between recalibrated weights and trait-specific weights based on their research context. - The `colocboost_plot` function will not consider it as colocalized but still showing them as uncolocalized events, with overlapping variants color labeled. -# Acknowledgement +# Acknowledgment - The eQTL data used for the analyses described in this example results were obtained from GTEx release v8 from [GTEx Portal](https://gtexportal.org/home/downloads/adult-gtex/qtl). - The GWAS summary statistics used for the analyses described in this example results were obtained from UK Biobank (UKBB) diff --git a/vignettes/Interpret_ColocBoost_Output.Rmd b/vignettes/Interpret_ColocBoost_Output.Rmd index ee9352a..36ac3c7 100644 --- a/vignettes/Interpret_ColocBoost_Output.Rmd +++ b/vignettes/Interpret_ColocBoost_Output.Rmd @@ -298,12 +298,12 @@ res$ucos_details$cos_ucos_purity #### 3.5.4. Other components -- **`ucos_weight`**: Integrative weights for each trait-specific (uncolocalized) trait, used to recalibrate UCoS when traits are filtered out. +- **`ucos_weight`**: Integrative weights for each trait-specific (uncolocalized) trait, used to recalibrate uCoS when traits are filtered out. - **`ucos_top_variables`**: Indices and names of the top variable for each uCoS, which is the variable with the highest VCP. - **`ucos_purity`**: Includes three lists, each containing an $uS \times uS$ matrix, where $uS$ is the number of uCoS: - - `min_abs_cor`: Minimum absolute correlation of variables within (diagonal) UCoS or between (off-diagonal) different uCoS. - - `median_abs_cor`: Median absolute correlation of variables within or between UCoS. - - `max_abs_cor`: Maximum absolute correlation of variables within or between UCoS. + - `min_abs_cor`: Minimum absolute correlation of variables within (diagonal) uCoS or between (off-diagonal) different uCoS. + - `median_abs_cor`: Median absolute correlation of variables within or between uCoS. + - `max_abs_cor`: Maximum absolute correlation of variables within or between uCoS. By analyzing these components, you can gain a deeper understanding of trait-specific (uncolocalized) effects that are not colocalized, providing additional insights into the data. diff --git a/vignettes/LD_Free_Colocalization.Rmd b/vignettes/LD_Free_Colocalization.Rmd index 75aaf81..730558d 100644 --- a/vignettes/LD_Free_Colocalization.Rmd +++ b/vignettes/LD_Free_Colocalization.Rmd @@ -117,7 +117,7 @@ While this method is computationally efficient, it has limitations due to the st Users should interpret the results with caution, especially in regions with complex LD structures or multiple causal variants. -ColocBoost also provides a flexibility to use Hyprcoloc compatible format for summary statistics without LD matrix. +ColocBoost also provides a flexibility to use HyPrColoc compatible format for summary statistics without LD matrix. ```{r hyprcoloc-compatible} # Loading the Dataset @@ -125,7 +125,7 @@ data(Ind_5traits) X <- Ind_5traits$X Y <- Ind_5traits$Y -# Coverting to Hyprcoloc compatible format +# Coverting to HyPrColoc compatible format effect_est <- effect_se <- effect_n <- c() for (i in 1:length(X)){ x <- X[[i]] diff --git a/vignettes/Summary_Statistics_Colocalization.Rmd b/vignettes/Summary_Statistics_Colocalization.Rmd index 6af293b..30fd224 100644 --- a/vignettes/Summary_Statistics_Colocalization.Rmd +++ b/vignettes/Summary_Statistics_Colocalization.Rmd @@ -174,9 +174,9 @@ res$cos_details$cos$cos_index ``` -## 3.4. Hyprcoloc compatible format: effect size and standard error matrices +## 3.4. HyPrColoc compatible format: effect size and standard error matrices -ColocBoost also provides a flexibility to use Hyprcoloc compatible format for summary statistics with and without LD matrix. +ColocBoost also provides a flexibility to use HyPrColoc compatible format for summary statistics with and without LD matrix. ```{r hyprcoloc-compatible} # Loading the Dataset @@ -184,7 +184,7 @@ data(Ind_5traits) X <- Ind_5traits$X Y <- Ind_5traits$Y -# Coverting to Hyprcoloc compatible format +# Coverting to HyPrColoc compatible format effect_est <- effect_se <- effect_n <- c() for (i in 1:length(X)){ x <- X[[i]]