From 709c222b9f7a3f5e4c2996887f4f8428962bf421 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 19 Apr 2025 04:20:51 -0400 Subject: [PATCH 1/5] minor fix --- R/colocboost_plot.R | 2 +- _pkgdown.yml | 4 ++ vignettes/ColocBoost_Wrapper.Rmd | 0 vignettes/FineBoost_Special_Case.Rmd | 60 +++++++++++++++++++ vignettes/Individual_Level_Colocalization.Rmd | 2 +- vignettes/Input_Data_Format.Rmd | 10 ++-- vignettes/LD_Free_Colocalization.Rmd | 0 vignettes/Pairwise_Colocalization.Rmd | 0 vignettes/Partial_Overlap_Variants.Rmd | 4 ++ vignettes/Visualization_ColocBoost_Output.Rmd | 10 ++-- 10 files changed, 81 insertions(+), 11 deletions(-) create mode 100644 vignettes/ColocBoost_Wrapper.Rmd create mode 100644 vignettes/FineBoost_Special_Case.Rmd create mode 100644 vignettes/LD_Free_Colocalization.Rmd create mode 100644 vignettes/Pairwise_Colocalization.Rmd diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index 8bbf3a5..8d1bc37 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -635,10 +635,10 @@ plot_initial <- function(cb_plot_input, y = "log10p", args$ylim <- c(0, 1) } else if (y == "vcp") { plot_data <- cb_plot_input$vcp + ylab <- "VCP" if (length(cb_plot_input$outcomes) == 1) { ylab <- "VPA" } - ylab <- "VCP" args$ylim <- c(0, 1) }else if (y == "coef") { plot_data <- cb_plot_input$coef diff --git a/_pkgdown.yml b/_pkgdown.yml index 9bdfa1f..6af6147 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,6 +33,10 @@ articles: - announcements - installation - ColocBoost_tutorial_diagnostic + - LD_Free_Colocalization + - ColocBoost_Wrapper + - Pairwise_Colocalization + - FineBoost_Special_Case reference: - title: "Example Data" diff --git a/vignettes/ColocBoost_Wrapper.Rmd b/vignettes/ColocBoost_Wrapper.Rmd new file mode 100644 index 0000000..e69de29 diff --git a/vignettes/FineBoost_Special_Case.Rmd b/vignettes/FineBoost_Special_Case.Rmd new file mode 100644 index 0000000..cf2216f --- /dev/null +++ b/vignettes/FineBoost_Special_Case.Rmd @@ -0,0 +1,60 @@ +--- +title: "Single-trait Fine-mapping with FineBoost" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Single-trait Fine-mapping with FineBoost} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + + +This vignette demonstrates how to perform single-trait fine-mapping analysis using FineBoost, a specialized single-trait version of ColocBoost, +from both individual-level data and summary statistics. Specifically focusing on the 2nd trait with 2 causal variants (644 and 2289) from +`Ind_5traits` and `Sumstat_5traits` dataset included in the package. + + +```{r setup} +library(colocboost) +``` + + +# 1. Fine-mapping with individual-level data + + +```{r load-example-indiviudal} +# Load example data +data(Ind_5traits) +X <- Ind_5traits$X[[2]] +Y <- Ind_5traits$Y[[2]] + +res <- colocboost(X = X, Y = Y) +colocboost_plot(res) +``` + + +# 2. Fine-mapping with summary statistics + +```{r load-example-sumstat} +# Load example data +data(Sumstat_5traits) +sumstat <- Sumstat_5traits$sumstat[[2]] +LD <- get_cormat(Ind_5traits$X[[2]]) + +res <- colocboost(sumstat = sumstat, LD = LD) +colocboost_plot(res) +``` + +# 3. LD-free fine-mapping with one causal variant assumption + +```{r ld-free} +# Load example data +res <- colocboost(sumstat = sumstat) +colocboost_plot(res) +``` \ No newline at end of file diff --git a/vignettes/Individual_Level_Colocalization.Rmd b/vignettes/Individual_Level_Colocalization.Rmd index b2a12f7..4a5a59e 100644 --- a/vignettes/Individual_Level_Colocalization.Rmd +++ b/vignettes/Individual_Level_Colocalization.Rmd @@ -57,7 +57,7 @@ Ind_5traits$true_effect_variants # 2. Matched individual level inputs $X$ and $Y$ -The preferred format for colocalization analysis in ColocBoost using individual level data is where genotype (X) and phenotype (Y) data are properly matched. +The preferred format for colocalization analysis in ColocBoost using individual level data is where genotype ($X$) and phenotype ($Y$) data are properly matched. - **Basic format**: `X` and `Y` are organized as lists, matched by trait index, - `(X[1], Y[1])` contains individuals for trait 1, diff --git a/vignettes/Input_Data_Format.Rmd b/vignettes/Input_Data_Format.Rmd index c6c254b..63ae925 100644 --- a/vignettes/Input_Data_Format.Rmd +++ b/vignettes/Input_Data_Format.Rmd @@ -30,15 +30,15 @@ For analyses using individual-level data, the basic format for single trait is a The input format for multiple traits is similar, but `X` matrix should be a list of matrices, each corresponding to a different trait. `Y` vector should also be a list of vectors. For example: -- `X = list(X1, X2, X3, X4, X5)` where each `Xi` is a matrix for trait `i` - with the dimension of Ni * Pi, where Ni and Pi do not need to be the same for different traits. -- `Y = list(Y1, Y2, Y3, Y4, Y5)` where each `Yi` is a vector for trait `i` - with Ni individuals. +- `X = list(X1, X2, X3, X4, X5)` where each `Xi` is a matrix for trait `i` - with the dimension of $N_i \times P_i$, where $N_i$ and $P_i$ do not need to be the same for different traits. +- `Y = list(Y1, Y2, Y3, Y4, Y5)` where each `Yi` is a vector for trait `i` - with $N_i$ individuals. `colocboost` also offers flexible input options (see detailed usage with different input formats, refer to [Individual Level Data Colocalization](https://statfungen.github.io/colocboost/articles/Individual_Level_Colocalization.html).): -- Single X matrix with $N \times P$ with Y matrix with $N \times L$ for $L$ traits. -- Multiple X matrices and unmatched Y vectors with a mapping dictionary. +- Single $X$ matrix with $N \times P$ with $Y$ matrix with $N \times L$ for $L$ traits. +- Multiple $X$ matrices and unmatched $Y$ vectors with a mapping dictionary. # 2. Summary Statistics @@ -97,7 +97,7 @@ dict_YX # 4. Hyprcoloc compatible format: effect size and standard error matrices ColocBoost also provides a flexibility to use Hyprcoloc compatible format for summary statistics with and without LD matrix. -For example, when anaylze L traits for the same P variants with the specified effect size and standard error matrices: +For example, when anaylze $L$ traits for the same $P$ variants with the specified effect size and standard error matrices: - `effect_est` (required) is $P \times L$ matrix of variable regression coefficients (i.e. regression beta values) in the genomic region. - `effect_se` (required) is $P \times L$ matrix of standard errors for the regression coefficients. diff --git a/vignettes/LD_Free_Colocalization.Rmd b/vignettes/LD_Free_Colocalization.Rmd new file mode 100644 index 0000000..e69de29 diff --git a/vignettes/Pairwise_Colocalization.Rmd b/vignettes/Pairwise_Colocalization.Rmd new file mode 100644 index 0000000..e69de29 diff --git a/vignettes/Partial_Overlap_Variants.Rmd b/vignettes/Partial_Overlap_Variants.Rmd index fc8fadf..a730497 100644 --- a/vignettes/Partial_Overlap_Variants.Rmd +++ b/vignettes/Partial_Overlap_Variants.Rmd @@ -53,6 +53,7 @@ X[[2]][1:2, 1:10] X[[3]][1:2, 1:10] ``` +## 1. Run ColocBoost with partial overlapping variants To run ColocBoost to the different genotypes with different causal variants, the variant names should be provided as the column names of the `X` matrices. Otherwise, the `colocboost` function will not be able to identify the variants correctly from different genotype matrices, @@ -69,6 +70,7 @@ res$data_info$n_variables colocboost_plot(res) ``` +## 2. Limitations of using only overlapping variables If we perform colocalization analysis using only overlapping variables, we may fail to detect any colocalization events. This is because the causal variants, which are only partially overlapping across traits, are excluded during the preprocessing step. @@ -87,6 +89,8 @@ colocboost_plot(res) ``` +## 3. Disease-prioritized colocalization analysis with variables in the focal trait + In disease-prioritized colocalization analysis with a focal trait, `ColocBoost` recommends prioritizing variants in the focal trait as the default setting. For the example above, if we consider trait 3 as the focal trait, only variants present in trait 3 will be included in the analysis. This ensures that the analysis focuses on variants relevant to the focal trait while accounting for partial overlaps across other traits. diff --git a/vignettes/Visualization_ColocBoost_Output.Rmd b/vignettes/Visualization_ColocBoost_Output.Rmd index 15b68dd..c2e9990 100644 --- a/vignettes/Visualization_ColocBoost_Output.Rmd +++ b/vignettes/Visualization_ColocBoost_Output.Rmd @@ -100,9 +100,11 @@ Following plot also shows the top variants. ```{r vertical-plot} -colocboost_plot(res, show_top_variables = TRUE, - add_vertical = TRUE, - add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants))) +colocboost_plot( + res, show_top_variables = TRUE, + add_vertical = TRUE, + add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants)) +) ``` @@ -141,7 +143,7 @@ at [Interpret ColocBoost Output](https://statfungen.github.io/colocboost/article There are three options avaiable for plotting the results from disease prioritized colocalization, considering a focal trait: - `plot_focal_only = FALSE` (default), if `TRUE` will only plot CoS with focal trait and ignoring other CoS. -- `plot_focal_cos_outocme_only = FALSE` (default) and recommend for visulization for disease prioritized colocalization. +- `plot_focal_cos_outocme_only = FALSE` (default) and **recommend** for visulization for disease prioritized colocalization. If `TRUE` will plot all CoS colocalized with at least on traits within CoS of focal traits. ```{r focal-colocalization} From a3e6115a10e6639d5cfbd5817c8bdc4650a11cdb Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 19 Apr 2025 05:57:17 -0400 Subject: [PATCH 2/5] update plot --- R/colocboost_plot.R | 62 ++++++++++++++++++++++++------------------ man/colocboost_plot.Rd | 3 ++ 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index 8d1bc37..5c3fd3c 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -9,6 +9,7 @@ #' @param grange Optional plotting range of x-axis to zoom in to a specific region. #' @param plot_cos_idx Optional indices of CoS to plot #' @param outcome_idx Optional indices of outcomes to include in the plot. \code{outcome_idx=NULL} to plot only the outcomes having colocalization. +#' @param plot_all_outcome Optional to plot all outcome in the same figure. #' @param plot_focal_only Logical, if TRUE only plots colocalization with focal outcome, default is FALSE. #' @param plot_focal_cos_outocme_only Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE. #' @param points_color Background color for non-colocalized variables, default is "grey80". @@ -72,6 +73,7 @@ colocboost_plot <- function(cb_output, y = "log10p", grange = NULL, plot_cos_idx = NULL, outcome_idx = NULL, + plot_all_outcome = FALSE, plot_focal_only = FALSE, plot_focal_cos_outocme_only = FALSE, points_color = "grey80", @@ -124,8 +126,8 @@ colocboost_plot <- function(cb_output, y = "log10p", ) colocboost_plot_basic <- function(cb_plot_input, cb_plot_init, - outcome_idx = NULL, grange = NULL, - plot_cols = 2, + outcome_idx = NULL, plot_all_outcome = FALSE, + grange = NULL, plot_cols = 2, add_vertical = FALSE, add_vertical_idx = NULL, show_top_variables = TRUE, ...) { @@ -155,33 +157,37 @@ colocboost_plot <- function(cb_output, y = "log10p", coloc_cos <- cb_plot_input$cos outcomes <- cb_plot_input$outcomes if (length(y)==1) outcome_idx <- 1 - if (is.null(outcome_idx)) { - if (is.null(coloc_cos)) { - # - no colocalized effects, draw all outcomes in this region - if (length(cb_plot_input$outcomes) == 1) { - message("There is no fine-mapped causal effect in this region!. Showing margianl for this outcome!") - } else { - if (length(y) == 1){ - message("There is no colocalization in this region!. Showing VCP = 0!") + if (plot_all_outcome){ + outcome_idx <- 1:length(y) + } else { + if (is.null(outcome_idx)) { + if (is.null(coloc_cos)) { + # - no colocalized effects, draw all outcomes in this region + if (length(cb_plot_input$outcomes) == 1) { + message("There is no fine-mapped causal effect in this region!. Showing margianl for this outcome!") } else { - message("There is no colocalization in this region!. Showing margianl for all outcomes!") + if (length(y) == 1){ + message("There is no colocalization in this region!. Showing VCP = 0!") + } else { + message("There is no colocalization in this region!. Showing margianl for all outcomes!") + } } + outcome_idx <- 1:length(y) + } else { + n.coloc <- length(coloc_cos) + coloc_index <- cb_plot_input$coloc_index + outcome_idx <- Reduce(union, coloc_index) } - outcome_idx <- 1:length(y) - } else { - n.coloc <- length(coloc_cos) - coloc_index <- cb_plot_input$coloc_index - outcome_idx <- Reduce(union, coloc_index) - } - if (!is.null(cb_plot_input$focal_outcome)) { - p_focal <- grep(cb_plot_input$focal_outcome, outcomes) - include_focal <- sapply(cb_plot_input$coloc_index, function(ci) { - p_focal %in% ci - }) - if (any(include_focal)) { - coloc_index <- cb_plot_input$coloc_index[order(include_focal == "FALSE")] - coloc_index <- Reduce(union, coloc_index) - outcome_idx <- c(p_focal, setdiff(coloc_index, p_focal)) + if (!is.null(cb_plot_input$focal_outcome)) { + p_focal <- grep(cb_plot_input$focal_outcome, outcomes) + include_focal <- sapply(cb_plot_input$coloc_index, function(ci) { + p_focal %in% ci + }) + if (any(include_focal)) { + coloc_index <- cb_plot_input$coloc_index[order(include_focal == "FALSE")] + coloc_index <- Reduce(union, coloc_index) + outcome_idx <- c(p_focal, setdiff(coloc_index, p_focal)) + } } } } @@ -304,7 +310,9 @@ colocboost_plot <- function(cb_output, y = "log10p", colocboost_plot_basic(cb_plot_input, cb_plot_init, grange = grange, - outcome_idx = outcome_idx, plot_cols = plot_cols, + outcome_idx = outcome_idx, + plot_all_outcome = plot_all_outcome, + plot_cols = plot_cols, add_vertical = add_vertical, add_vertical_idx = add_vertical_idx, show_top_variables = show_top_variables, ... diff --git a/man/colocboost_plot.Rd b/man/colocboost_plot.Rd index 2b3da6a..156178b 100644 --- a/man/colocboost_plot.Rd +++ b/man/colocboost_plot.Rd @@ -13,6 +13,7 @@ colocboost_plot( grange = NULL, plot_cos_idx = NULL, outcome_idx = NULL, + plot_all_outcome = FALSE, plot_focal_only = FALSE, plot_focal_cos_outocme_only = FALSE, points_color = "grey80", @@ -51,6 +52,8 @@ colocboost_plot( \item{outcome_idx}{Optional indices of outcomes to include in the plot. \code{outcome_idx=NULL} to plot only the outcomes having colocalization.} +\item{plot_all_outcome}{Optional to plot all outcome in the same figure.} + \item{plot_focal_only}{Logical, if TRUE only plots colocalization with focal outcome, default is FALSE.} \item{plot_focal_cos_outocme_only}{Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE.} From 8ccc74dd8784ad5594972c9c94fc34412b1bbe32 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 19 Apr 2025 06:00:39 -0400 Subject: [PATCH 3/5] Update Visualization_ColocBoost_Output.Rmd --- vignettes/Visualization_ColocBoost_Output.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/Visualization_ColocBoost_Output.Rmd b/vignettes/Visualization_ColocBoost_Output.Rmd index c2e9990..e05e04b 100644 --- a/vignettes/Visualization_ColocBoost_Output.Rmd +++ b/vignettes/Visualization_ColocBoost_Output.Rmd @@ -55,6 +55,7 @@ colocboost_plot(res) - `y = "cos_vcp"` for variant colocalization probabilities (multiple plots for each CoS - only draw VCP for variants in CoS to the colocalized traits). - `plot_cos_idx = NULL` (default) indicates all colocalization events are plotted. `plot_cos_idx = 1` can be specified to plot the 1st colocalization event, and so on. - `outcome_idx = NULL` (default) indicates only the traits with colocalization are plotted. `outcome_idx = c(1,2,5)` can be specified to plot the traits 1, 2, and 5. +- `plot_all_outcome = FALSE` (default) indicates only the traits with colocalization are plotted. If `TRUE` can plot all traits. - `cos_color = NULL` (default) indicates the colors of the colocalization events. Specify a vector of colors to customize the plot. From 88487a97c2fa2fc326a4f34f0305b53c1938b453 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 19 Apr 2025 06:03:45 -0400 Subject: [PATCH 4/5] Update colocboost_plot.R --- R/colocboost_plot.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index 5c3fd3c..4ee7f3b 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -159,6 +159,10 @@ colocboost_plot <- function(cb_output, y = "log10p", if (length(y)==1) outcome_idx <- 1 if (plot_all_outcome){ outcome_idx <- 1:length(y) + if (!is.null(cb_plot_input$focal_outcome)) { + p_focal <- grep(cb_plot_input$focal_outcome, outcomes) + outcome_idx <- c(p_focal, setdiff(outcome_idx, p_focal)) + } } else { if (is.null(outcome_idx)) { if (is.null(coloc_cos)) { From ba788fc28a23a15d3e06770d922fd60c02e6e8fe Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 19 Apr 2025 07:31:37 -0400 Subject: [PATCH 5/5] Update colocboost_output.R --- R/colocboost_output.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 8695921..144cd8f 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -117,7 +117,7 @@ get_cos_summary <- function(cb_output, all(sapply(tt, length) != 0) }) summary_table$interest_outcome <- interest_outcome - summary_table <- summary_table[-which(if.interest == "FALSE"), ] + summary_table <- summary_table[which(if.interest), ] if (sum(if.interest) == 0) { warning("No colocalization with interest outcomes.") }