StatFunGen · xueweic · Apr 19, 2025 · Apr 19, 2025 · Apr 19, 2025 · Apr 19, 2025
diff --git a/R/colocboost_output.R b/R/colocboost_output.R
@@ -117,7 +117,7 @@ get_cos_summary <- function(cb_output,
           all(sapply(tt, length) != 0)
         })
         summary_table$interest_outcome <- interest_outcome
-        summary_table <- summary_table[-which(if.interest == "FALSE"), ]
+        summary_table <- summary_table[which(if.interest), ]
         if (sum(if.interest) == 0) {
           warning("No colocalization with interest outcomes.")
         }

diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R
@@ -9,6 +9,7 @@
 #' @param grange Optional plotting range of x-axis to zoom in to a specific region.
 #' @param plot_cos_idx Optional indices of CoS to plot
 #' @param outcome_idx Optional indices of outcomes to include in the plot. \code{outcome_idx=NULL} to plot only the outcomes having colocalization.
+#' @param plot_all_outcome Optional to plot all outcome in the same figure.
 #' @param plot_focal_only Logical, if TRUE only plots colocalization with focal outcome, default is FALSE.
 #' @param plot_focal_cos_outocme_only Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE.
 #' @param points_color Background color for non-colocalized variables, default is "grey80".
@@ -72,6 +73,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
                             grange = NULL,
                             plot_cos_idx = NULL,
                             outcome_idx = NULL,
+                            plot_all_outcome = FALSE,
                             plot_focal_only = FALSE,
                             plot_focal_cos_outocme_only = FALSE,
                             points_color = "grey80",
@@ -124,8 +126,8 @@ colocboost_plot <- function(cb_output, y = "log10p",
   )
 
   colocboost_plot_basic <- function(cb_plot_input, cb_plot_init,
-                                    outcome_idx = NULL, grange = NULL,
-                                    plot_cols = 2,
+                                    outcome_idx = NULL, plot_all_outcome = FALSE,
+                                    grange = NULL, plot_cols = 2,
                                     add_vertical = FALSE, add_vertical_idx = NULL,
                                     show_top_variables = TRUE,
                                     ...) {
@@ -155,33 +157,41 @@ colocboost_plot <- function(cb_output, y = "log10p",
     coloc_cos <- cb_plot_input$cos
     outcomes <- cb_plot_input$outcomes
     if (length(y)==1) outcome_idx <- 1
-    if (is.null(outcome_idx)) {
-      if (is.null(coloc_cos)) {
-        # - no colocalized effects, draw all outcomes in this region
-        if (length(cb_plot_input$outcomes) == 1) {
-          message("There is no fine-mapped causal effect in this region!. Showing margianl for this outcome!")
-        } else {
-          if (length(y) == 1){
-            message("There is no colocalization in this region!. Showing VCP = 0!")
+    if (plot_all_outcome){
+      outcome_idx <- 1:length(y)
+      if (!is.null(cb_plot_input$focal_outcome)) {
+        p_focal <- grep(cb_plot_input$focal_outcome, outcomes)
+        outcome_idx <- c(p_focal, setdiff(outcome_idx, p_focal))
+      }
+    } else {
+      if (is.null(outcome_idx)) {
+        if (is.null(coloc_cos)) {
+          # - no colocalized effects, draw all outcomes in this region
+          if (length(cb_plot_input$outcomes) == 1) {
+            message("There is no fine-mapped causal effect in this region!. Showing margianl for this outcome!")
           } else {
-            message("There is no colocalization in this region!. Showing margianl for all outcomes!")
+            if (length(y) == 1){
+              message("There is no colocalization in this region!. Showing VCP = 0!")
+            } else {
+              message("There is no colocalization in this region!. Showing margianl for all outcomes!")
+            }
           }
+          outcome_idx <- 1:length(y)
+        } else {
+          n.coloc <- length(coloc_cos)
+          coloc_index <- cb_plot_input$coloc_index
+          outcome_idx <- Reduce(union, coloc_index)
         }
-        outcome_idx <- 1:length(y)
-      } else {
-        n.coloc <- length(coloc_cos)
-        coloc_index <- cb_plot_input$coloc_index
-        outcome_idx <- Reduce(union, coloc_index)
-      }
-      if (!is.null(cb_plot_input$focal_outcome)) {
-        p_focal <- grep(cb_plot_input$focal_outcome, outcomes)
-        include_focal <- sapply(cb_plot_input$coloc_index, function(ci) {
-          p_focal %in% ci
-        })
-        if (any(include_focal)) {
-          coloc_index <- cb_plot_input$coloc_index[order(include_focal == "FALSE")]
-          coloc_index <- Reduce(union, coloc_index)
-          outcome_idx <- c(p_focal, setdiff(coloc_index, p_focal))
+        if (!is.null(cb_plot_input$focal_outcome)) {
+          p_focal <- grep(cb_plot_input$focal_outcome, outcomes)
+          include_focal <- sapply(cb_plot_input$coloc_index, function(ci) {
+            p_focal %in% ci
+          })
+          if (any(include_focal)) {
+            coloc_index <- cb_plot_input$coloc_index[order(include_focal == "FALSE")]
+            coloc_index <- Reduce(union, coloc_index)
+            outcome_idx <- c(p_focal, setdiff(coloc_index, p_focal))
+          }
         }
       }
     }
@@ -304,7 +314,9 @@ colocboost_plot <- function(cb_output, y = "log10p",
 
   colocboost_plot_basic(cb_plot_input, cb_plot_init,
     grange = grange,
-    outcome_idx = outcome_idx, plot_cols = plot_cols,
+    outcome_idx = outcome_idx, 
+    plot_all_outcome = plot_all_outcome,
+    plot_cols = plot_cols,
     add_vertical = add_vertical, add_vertical_idx = add_vertical_idx,
     show_top_variables = show_top_variables,
     ...
@@ -635,10 +647,10 @@ plot_initial <- function(cb_plot_input, y = "log10p",
     args$ylim <- c(0, 1)
   } else if (y == "vcp") {
     plot_data <- cb_plot_input$vcp
+    ylab <- "VCP"
     if (length(cb_plot_input$outcomes) == 1) {
       ylab <- "VPA"
     }
-    ylab <- "VCP"
     args$ylim <- c(0, 1)
   }else if (y == "coef") {
     plot_data <- cb_plot_input$coef

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -33,6 +33,10 @@ articles:
     - announcements
     - installation
     - ColocBoost_tutorial_diagnostic
+    - LD_Free_Colocalization
+    - ColocBoost_Wrapper
+    - Pairwise_Colocalization
+    - FineBoost_Special_Case
 
 reference:
   - title: "Example Data"

diff --git a/man/colocboost_plot.Rd b/man/colocboost_plot.Rd
diff --git a/vignettes/ColocBoost_Wrapper.Rmd b/vignettes/ColocBoost_Wrapper.Rmd
diff --git a/vignettes/FineBoost_Special_Case.Rmd b/vignettes/FineBoost_Special_Case.Rmd
@@ -0,0 +1,60 @@
+---
+title: "Single-trait Fine-mapping with FineBoost"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Single-trait Fine-mapping with FineBoost}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+
+This vignette demonstrates how to perform single-trait fine-mapping analysis using FineBoost, a specialized single-trait version of ColocBoost,
+from both individual-level data and summary statistics. Specifically focusing on the 2nd trait with 2 causal variants (644 and 2289) from 
+`Ind_5traits` and `Sumstat_5traits` dataset included in the package.
+
+
+```{r setup}
+library(colocboost)
+```
+
+
+# 1. Fine-mapping with individual-level data
+
+
+```{r load-example-indiviudal}
+# Load example data
+data(Ind_5traits)
+X <- Ind_5traits$X[[2]]
+Y <- Ind_5traits$Y[[2]]
+
+res <- colocboost(X = X, Y = Y)
+colocboost_plot(res)
+```
+
+
+# 2. Fine-mapping with summary statistics
+
+```{r load-example-sumstat}
+# Load example data
+data(Sumstat_5traits) 
+sumstat <- Sumstat_5traits$sumstat[[2]]
+LD <- get_cormat(Ind_5traits$X[[2]])
+
+res <- colocboost(sumstat = sumstat, LD = LD)
+colocboost_plot(res)
+```
+
+# 3. LD-free fine-mapping with one causal variant assumption
+
+```{r ld-free}
+# Load example data
+res <- colocboost(sumstat = sumstat)
+colocboost_plot(res)
+```
diff --git a/vignettes/Individual_Level_Colocalization.Rmd b/vignettes/Individual_Level_Colocalization.Rmd
@@ -57,7 +57,7 @@ Ind_5traits$true_effect_variants
 # 2. Matched individual level inputs $X$ and $Y$
 
 
-The preferred format for colocalization analysis in ColocBoost using individual level data is where genotype (X) and phenotype (Y) data are properly matched.
+The preferred format for colocalization analysis in ColocBoost using individual level data is where genotype ($X$) and phenotype ($Y$) data are properly matched.
 
 - **Basic format**: `X` and `Y` are organized as lists, matched by trait index,
     - `(X[1], Y[1])` contains individuals for trait 1,

diff --git a/vignettes/Input_Data_Format.Rmd b/vignettes/Input_Data_Format.Rmd
@@ -30,15 +30,15 @@ For analyses using individual-level data, the basic format for single trait is a
 The input format for multiple traits is similar, but `X` matrix should be a list of matrices, each corresponding to a different trait. `Y` vector should also be a list of vectors. 
 For example:
 
-- `X = list(X1, X2, X3, X4, X5)` where each `Xi` is a matrix for trait `i` - with the dimension of Ni * Pi, where Ni and Pi do not need to be the same for different traits.
-- `Y = list(Y1, Y2, Y3, Y4, Y5)` where each `Yi` is a vector for trait `i` - with Ni individuals.
+- `X = list(X1, X2, X3, X4, X5)` where each `Xi` is a matrix for trait `i` - with the dimension of $N_i \times P_i$, where $N_i$ and $P_i$ do not need to be the same for different traits.
+- `Y = list(Y1, Y2, Y3, Y4, Y5)` where each `Yi` is a vector for trait `i` - with $N_i$ individuals.
 
 
 `colocboost` also offers flexible input options (see detailed usage with different input formats, 
 refer to [Individual Level Data Colocalization](https://statfungen.github.io/colocboost/articles/Individual_Level_Colocalization.html).):
 
-- Single X matrix with $N \times P$ with Y matrix with $N \times L$ for $L$ traits.
-- Multiple X matrices and unmatched Y vectors with a mapping dictionary.
+- Single $X$ matrix with $N \times P$ with $Y$ matrix with $N \times L$ for $L$ traits.
+- Multiple $X$ matrices and unmatched $Y$ vectors with a mapping dictionary.
 
 
 # 2. Summary Statistics
@@ -97,7 +97,7 @@ dict_YX
 # 4. Hyprcoloc compatible format: effect size and standard error matrices
 
 ColocBoost also provides a flexibility to use Hyprcoloc compatible format for summary statistics with and without LD matrix.
-For example, when anaylze L traits for the same P variants with the specified effect size and standard error matrices:
+For example, when anaylze $L$ traits for the same $P$ variants with the specified effect size and standard error matrices:
 
 - `effect_est` (required) is $P \times L$ matrix of variable regression coefficients (i.e. regression beta values) in the genomic region.
 - `effect_se` (required) is $P \times L$ matrix of standard errors for the regression coefficients.

diff --git a/vignettes/LD_Free_Colocalization.Rmd b/vignettes/LD_Free_Colocalization.Rmd
diff --git a/vignettes/Pairwise_Colocalization.Rmd b/vignettes/Pairwise_Colocalization.Rmd
diff --git a/vignettes/Partial_Overlap_Variants.Rmd b/vignettes/Partial_Overlap_Variants.Rmd
@@ -53,6 +53,7 @@ X[[2]][1:2, 1:10]
 X[[3]][1:2, 1:10]
 ```
 
+## 1. Run ColocBoost with partial overlapping variants
 
 To run ColocBoost to the different genotypes with different causal variants, the variant names should be provided as the column names of the `X` matrices.
 Otherwise, the `colocboost` function will not be able to identify the variants correctly from different genotype matrices,
@@ -69,6 +70,7 @@ res$data_info$n_variables
 colocboost_plot(res)
 ```
 
+## 2. Limitations of using only overlapping variables
 
 If we perform colocalization analysis using only overlapping variables, we may fail to detect any colocalization events. 
 This is because the causal variants, which are only partially overlapping across traits, are excluded during the preprocessing step. 
@@ -87,6 +89,8 @@ colocboost_plot(res)
 ```
 
 
+## 3. Disease-prioritized colocalization analysis with variables in the focal trait
+
 In disease-prioritized colocalization analysis with a focal trait, `ColocBoost` recommends prioritizing variants in the focal trait as the default setting. 
 For the example above, if we consider trait 3 as the focal trait, only variants present in trait 3 will be included in the analysis. 
 This ensures that the analysis focuses on variants relevant to the focal trait while accounting for partial overlaps across other traits.

diff --git a/vignettes/Visualization_ColocBoost_Output.Rmd b/vignettes/Visualization_ColocBoost_Output.Rmd
@@ -55,6 +55,7 @@ colocboost_plot(res)
     - `y = "cos_vcp"` for variant colocalization probabilities (multiple plots for each CoS - only draw VCP for variants in CoS to the colocalized traits).
 - `plot_cos_idx = NULL` (default) indicates all colocalization events are plotted. `plot_cos_idx = 1` can be specified to plot the 1st colocalization event, and so on.
 - `outcome_idx = NULL` (default) indicates only the traits with colocalization are plotted. `outcome_idx = c(1,2,5)` can be specified to plot the traits 1, 2, and 5.
+- `plot_all_outcome = FALSE` (default) indicates only the traits with colocalization are plotted. If `TRUE` can plot all traits.
 - `cos_color = NULL` (default) indicates the colors of the colocalization events. Specify a vector of colors to customize the plot.
 
 
@@ -100,9 +101,11 @@ Following plot also shows the top variants.
 
 
 ```{r vertical-plot}
-colocboost_plot(res, show_top_variables = TRUE, 
-                add_vertical = TRUE, 
-                add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants)))
+colocboost_plot(
+  res, show_top_variables = TRUE, 
+  add_vertical = TRUE, 
+  add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants))
+)
 ```
 
 
@@ -141,7 +144,7 @@ at [Interpret ColocBoost Output](https://statfungen.github.io/colocboost/article
 There are three options avaiable for plotting the results from disease prioritized colocalization, considering a focal trait:
 
 - `plot_focal_only = FALSE` (default), if `TRUE` will only plot CoS with focal trait and ignoring other CoS.
-- `plot_focal_cos_outocme_only = FALSE` (default) and recommend for visulization for disease prioritized colocalization.
+- `plot_focal_cos_outocme_only = FALSE` (default) and **recommend** for visulization for disease prioritized colocalization.
 If `TRUE` will plot all CoS colocalized with at least on traits within CoS of focal traits.
 
 ```{r focal-colocalization}