StatFunGen · xueweic · Apr 21, 2025 · Apr 20, 2025 · Apr 20, 2025 · Apr 20, 2025
diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R
@@ -11,7 +11,7 @@
 #' @param outcome_idx Optional indices of outcomes to include in the plot. \code{outcome_idx=NULL} to plot only the outcomes having colocalization.
 #' @param plot_all_outcome Optional to plot all outcome in the same figure.
 #' @param plot_focal_only Logical, if TRUE only plots colocalization with focal outcome, default is FALSE.
-#' @param plot_focal_cos_outocme_only Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE.
+#' @param plot_focal_cos_outcome_only Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE.
 #' @param points_color Background color for non-colocalized variables, default is "grey80".
 #' @param cos_color Optional custom colors for CoS.
 #' @param add_vertical Logical, if TRUE adds vertical lines at specified positions, default is FALSE
@@ -25,7 +25,7 @@
 #' @param show_cos_to_uncoloc_outcome Optional outcomes for showing CoS to uncolocalized outcomes
 #' @param plot_ucos Logical, if TRUE plots also trait-specific (uncolocalized) sets , default is FALSE
 #' @param plot_ucos_idx Optional indices of trait-specific (uncolocalized) sets to plot when included
-#' @param gene_name Optional gene name to display in plot title
+#' @param title_specific Optional specific title to display in plot title
 #' @param ylim_each Logical, if TRUE uses separate y-axis limits for each plot, default is TRUE
 #' @param outcome_legend_pos Position for outcome legend, default is "top"
 #' @param outcome_legend_size Size for outcome legend text, default is 1.2
@@ -75,7 +75,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
                             outcome_idx = NULL,
                             plot_all_outcome = FALSE,
                             plot_focal_only = FALSE,
-                            plot_focal_cos_outocme_only = FALSE,
+                            plot_focal_cos_outcome_only = FALSE,
                             points_color = "grey80",
                             cos_color = NULL,
                             add_vertical = FALSE,
@@ -89,7 +89,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
                             show_cos_to_uncoloc_outcome = NULL,
                             plot_ucos = FALSE,
                             plot_ucos_idx = NULL,
-                            gene_name = NULL,
+                            title_specific = NULL,
                             ylim_each = TRUE,
                             outcome_legend_pos = "top",
                             outcome_legend_size = 1.8,
@@ -109,7 +109,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
     variant_coord = variant_coord,
     outcome_names = outcome_names,
     plot_focal_only = plot_focal_only,
-    plot_focal_cos_outocme_only = plot_focal_cos_outocme_only,
+    plot_focal_cos_outcome_only = plot_focal_cos_outcome_only,
     show_cos_to_uncoloc = show_cos_to_uncoloc,
     show_cos_to_uncoloc_idx = show_cos_to_uncoloc_idx,
     show_cos_to_uncoloc_outcome = show_cos_to_uncoloc_outcome,
@@ -118,7 +118,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
   # get initial set up of plot
   cb_plot_init <- plot_initial(cb_plot_input,
     y = y, points_color = points_color, cos_color = cos_color,
-    ylim_each = ylim_each, gene_name = gene_name,
+    ylim_each = ylim_each, title_specific = title_specific,
     outcome_legend_pos = outcome_legend_pos, outcome_legend_size = outcome_legend_size,
     cos_legend_pos = cos_legend_pos,
     show_variable = show_variable, lab_style = lab_style, axis_style = axis_style,
@@ -330,7 +330,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
                            variant_coord = FALSE,
                            outcome_names = NULL,
                            plot_focal_only = FALSE,
-                           plot_focal_cos_outocme_only = FALSE,
+                           plot_focal_cos_outcome_only = FALSE,
                            show_cos_to_uncoloc = FALSE,
                            show_cos_to_uncoloc_idx = NULL,
                            show_cos_to_uncoloc_outcome = NULL,
@@ -411,12 +411,12 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
       }
       select_cs <- plot_cos_idx
     } else {
-      if (plot_focal_only || plot_focal_cos_outocme_only) {
+      if (plot_focal_only || plot_focal_cos_outcome_only) {
         if (sum(if_focal) == 0) {
           message("No focal CoS, draw all CoS.")
         } else if (plot_focal_only) {
           select_cs <- which(if_focal)
-        } else {  # plot_focal_cos_outocme_only is true here
+        } else {  # plot_focal_cos_outcome_only is true here
           # Get all outcomes colocalized with focal CoS
           focal_outcomes <- unique(unlist(coloc_index[if_focal]))
           # Find CoS that include at least one of these focal outcomes
@@ -488,7 +488,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
           stop("Please check plot_ucos_idx!")
         }
         select_ucos <- plot_ucos_idx
-      } else if (plot_focal_cos_outocme_only && sum(if_focal) != 0) {
+      } else if (plot_focal_cos_outcome_only && sum(if_focal) != 0) {
         # Get all outcomes colocalized with focal CoS
         focal_outcomes <- unique(unlist(plot_input$coloc_index))
         # Find uCoS that include at least one of these focal outcomes
@@ -597,7 +597,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
 #' @importFrom stats pnorm
 plot_initial <- function(cb_plot_input, y = "log10p",
                          points_color = "grey80", cos_color = NULL,
-                         ylim_each = TRUE, gene_name = NULL,
+                         ylim_each = TRUE, title_specific = NULL,
                          outcome_legend_size = 1.5,
                          outcome_legend_pos = "right",
                          cos_legend_pos = "bottomleft",
@@ -664,7 +664,7 @@ plot_initial <- function(cb_plot_input, y = "log10p",
   args$lab_face <- lab_style[2]
 
   # - set title format
-  args$title <- gene_name
+  args$title <- title_specific
   args$title_size <- as.numeric(title_style[1])
   args$title_face <- title_style[2]
 

diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ Learn how to perform colocalization analysis with step-by-step examples. For det
 
 If you use ColocBoost in your research, please cite:
 
-> Cao X, Sun H, Feng R, Mazumder R, Najar CFB, Li YI, de Jager PL, Bennett D, The Alzheimer's Disease Functional Genomics Consortium, Dey KK, Wang G. (2025+). Integrative multi-omics QTL colocalization maps regulatory architecture in aging human brain. medRxiv. [https://doi.org/](https://doi.org/)
+> Cao X, Sun H, Feng R, Mazumder R, Najar CFB, Li YI, de Jager PL, Bennett D, The Alzheimer's Disease Functional Genomics Consortium, Dey KK, Wang G. (2025+). Integrative multi-omics QTL colocalization maps regulatory architecture in aging human brain. medRxiv. [https://doi.org/10.1101/2025.04.17.25326042](https://doi.org/10.1101/2025.04.17.25326042)
 
 
 ## License

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -18,44 +18,53 @@ navbar:
       href: https://github.com/StatFunGen/colocboost
 
 articles:
-  - title: Vignettes
+  - title: Performing Colocalization using ColocBoost
+    desc: "Tutorials on how to perform multi-trait colocalization analysis using ColocBoost with flexible input data formats."
     contents:
     - Input_Data_Format
     - Individual_Level_Colocalization
     - Summary_Statistics_Colocalization
     - Disease_Prioritized_Colocalization
+
+  - title: Interpretation and Visualization
+    desc: "Tutorials on how to interpret and visualize the output from ColocBoost."
+    contents:
     - Interpret_ColocBoost_Output
     - Visualization_ColocBoost_Output
+
+  - title: Advanced Topics
+    desc: "Advanced topics and special cases in colocalization and fine-mapping analysis."
+    contents:
     - Partial_Overlap_Variants
     - ColocBoost_Wrapper_Pipeline
     - LD_Free_Colocalization
-    - ColocBoost_Diagnostics
     - FineBoost_Special_Case
 
   - title: internal
     contents:
     - announcements
     - installation
     - Pairwise_Colocalization
+    - ColocBoost_Diagnostics
 
 reference:
   - title: "Example Data"
-    desc: "Example datasets for demonstration and testing"
+    desc: "Example datasets for demonstration and testing."
     contents:
     - has_concept("colocboost_data")
 
   - title: "Model fitting"
-    desc: "Functions for fitting colocalization models"
+    desc: "Main interface function for fitting multi-trait colocalization model."
     contents:
     - has_concept("colocboost")
 
   - title: "Inference and summary"
-    desc: "Functions for inference and summary from fitted models"
+    desc: "Functions for inference and summary from fitted model."
     contents:
     - has_concept("colocboost_inference")
 
   - title: "Visualization"
-    desc: "Functions for visualizing ColocBoost results"
+    desc: "Functions for visualizing ColocBoost result."
     contents:
     - has_concept("colocboost_plot")
 

diff --git a/inst/CITATION b/inst/CITATION
@@ -17,11 +17,11 @@ citEntry(
   journal  = "medRxiv",
   year     = "2025",
   note     = "Preprint",
-  url      = "https://doi.org/",
+  url      = "https://doi.org/10.1101/2025.04.17.25326042",
   textVersion = paste(
     "Cao X, Sun H, Feng R, Mazumder R, Najar CFB, Li YI, de Jager PL, Bennett D, The Alzheimer's Disease Functional Genomics Consortium, Dey KK, Wang G. (2025).",
     "Integrative multi-omics QTL colocalization maps regulatory architecture in aging human brain.",
     "medRxiv.",
-    "https://doi.org/[YOUR_DOI_HERE]"
+    "https://doi.org/10.1101/2025.04.17.25326042"
   )
 )
diff --git a/man/colocboost_plot.Rd b/man/colocboost_plot.Rd
diff --git a/tests/testthat/test_plot.R b/tests/testthat/test_plot.R
@@ -177,8 +177,8 @@ test_that("colocboost_plot handles layout options", {
   # When ylim_each is FALSE, we need to provide a ylim parameter
   expect_error(suppressWarnings(colocboost_plot(cb_res, ylim_each = FALSE, ylim = c(0, 10))), NA)
 
-  # Test with gene_name option
-  expect_error(suppressWarnings(colocboost_plot(cb_res, gene_name = "BRCA1")), NA)
+  # Test with title_specific option
+  expect_error(suppressWarnings(colocboost_plot(cb_res, title_specific = "BRCA1")), NA)
 
   # Test with variant_coord option
   expect_error(suppressWarnings(colocboost_plot(cb_res, variant_coord = FALSE)), NA)
@@ -305,21 +305,21 @@ test_that("colocboost_plot handles focal outcome in complex cases", {
   # Test plot_focal_only option
   expect_error(suppressWarnings(colocboost_plot(cb_res_focal, plot_focal_only = TRUE)), NA)
 
-  # Test plot_focal_cos_outocme_only option
-  expect_error(suppressWarnings(colocboost_plot(cb_res_focal, plot_focal_cos_outocme_only = TRUE)), NA)
+  # Test plot_focal_cos_outcome_only option
+  expect_error(suppressWarnings(colocboost_plot(cb_res_focal, plot_focal_cos_outcome_only = TRUE)), NA)
 
   # Combine focal outcome filtering with other options
   expect_error(suppressWarnings(colocboost_plot(cb_res_focal, 
                                                plot_focal_only = TRUE,
                                                y = "cos_vcp")), NA)
 
   expect_error(suppressWarnings(colocboost_plot(cb_res_focal, 
-                                               plot_focal_cos_outocme_only = TRUE,
+                                               plot_focal_cos_outcome_only = TRUE,
                                                plot_ucos = TRUE)), NA)
 
   # Test focusing only on outcomes colocalized with focal outcome
   expect_error(suppressWarnings(colocboost_plot(cb_res_focal, 
-                                               plot_focal_cos_outocme_only = TRUE,
+                                               plot_focal_cos_outcome_only = TRUE,
                                                outcome_idx = 1:3)), NA)
 })
 

diff --git a/vignettes/ColocBoost_Diagnostics.Rmd b/vignettes/ColocBoost_Diagnostics.Rmd
@@ -32,17 +32,17 @@ names(res)
 - **`cb_model_para`**: parameters used in fitting ColocBoost model.
 
 
-```{r cb-model}
+```{r cb-model-para}
 names(res$diagnostic_details$cb_model_para)
 ```
 
 
 # 2. Diagnostic details of the model fitting
 
 - **`cb_model`**: trait-specific proximity gradient boosting model, including proximity weight at each iteration, residual after gradient boosting, et al.
-- **`weights_paths``**: individual trait-specific weights for each iteration.
+- **`weights_paths`**: individual trait-specific weights for each iteration.
 
-```{r cb-model-para}
+```{r cb-model}
 names(res$diagnostic_details$cb_model)
 names(res$diagnostic_details$cb_model$ind_outcome_1)
 ```

diff --git a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd
@@ -15,30 +15,31 @@ knitr::opts_chunk$set(
 ```
 
 
-This vignette demonstrates how to use the ColocBoost wrapper pipeline to perform colocalization analysis using `colocboost`. 
-See more details about functions in package `pecotmr` with [link](https://github.com/StatFunGen/pecotmr/tree/main) and 
+This vignette demonstrates how to use the ColocBoost wrapper pipeline to perform colocalization analysis with `colocboost`. 
+
+- See more details about functions in the package `pecotmr` with [link](https://github.com/StatFunGen/pecotmr/tree/main) and 
 `colocboost_pipeline` with [link](https://github.com/StatFunGen/pecotmr/blob/main/R/colocboost_pipeline.R). 
-See more etails about input data preparation in `xqtl_protocal` with [https://statfungen.github.io/xqtl-protocol/code/mnm_analysis/mnm_methods/colocboost.html].
+- See more details about input data preparation in `xqtl_protocol` with [link](https://statfungen.github.io/xqtl-protocol/code/mnm_analysis/mnm_methods/colocboost.html).
 
 
-# 1. Loading Data use `colocboost_analysis_pipeline` function
+# 1. Loading Data using `colocboost_analysis_pipeline` function
 
 
 This function harmonizes the input data and prepares it for colocalization analysis. 
 In this section, we introduce how to load the regional data required for the ColocBoost analysis using the `load_multitask_regional_data` function. 
-This function loads a mixture data sets for a specific region, including individual-level data (genotype, phenotype, covariate data)
-or summary statistics (sumstats, LD). Run \code{load_regional_univariate_data} and \code{load_rss_data} multiple times for different datasets
+This function loads mixed datasets for a specific region, including individual-level data (genotype, phenotype, covariate data)
+or summary statistics (sumstats, LD). Run `load_regional_univariate_data` and `load_rss_data` multiple times for different datasets.
 
 
 Below are the input parameters for this function:
 
-## 1.1. Loading individual level data from multiple corhorts
+## 1.1. Loading individual-level data from multiple cohorts
 
 
-- **`region`** (required): A string of `chr:start-end` for the phenotype region you want to analyzed. 
-- **`genotype_list`**: A vector of path for PLINK bed files containing genotype data. 
-- **`phenotype_list`**: A vector of path for phenotype file names.
-- **`covariate_list`**: A vector of path for covariate file names corresponding to the phenotype file vector.
+- **`region`** (required): A string of `chr:start-end` for the phenotype region you want to analyze. 
+- **`genotype_list`**: A vector of paths for PLINK bed files containing genotype data. 
+- **`phenotype_list`**: A vector of paths for phenotype file names.
+- **`covariate_list`**: A vector of paths for covariate file names corresponding to the phenotype file vector.
 - **`conditions_list_individual`**: A vector of strings representing different conditions or groups.
 - **`match_geno_pheno`**: A vector of indices of phenotypes matched to genotype if multiple genotype PLINK files are used.
 - **`maf_cutoff`**: Minimum minor allele frequency (MAF) cutoff. Default is 0.
@@ -50,14 +51,10 @@ Below are the input parameters for this function:
 - **`region_name_col`**: Column name containing the region name. Default is `NULL`.
 - **`keep_indel`**: Logical indicating whether to keep insertions/deletions (INDELs). Default is `TRUE`.
 - **`keep_samples`**: A vector of sample names to keep. Default is `NULL`.
-- **`phenotype_header`**: Number of rows to skip at the beginning of the transposed phenotype file (default is 4 for `chr`, `start`, `end`, and `ID`).
-- **`scale_residuals`**: Logical indicating whether to scale residuals. Default is `FALSE`.
-- **`tabix_header`**: Logical indicating whether the tabix file has a header. Default is `TRUE`.
-
 
 **Illustrated example**
 
-The following example demonstrates how to set up an input with 3 phenotypes and 2 cohorts, where the first cohort has 2 phenotypes and the second cohort has 1 phenotype.
+The following example demonstrates how to set up input data with 3 phenotypes and 2 cohorts, where the first cohort has 2 phenotypes and the second cohort has 1 phenotype.
 
 ```{r, data-loader-individual}
 # Example of loading individual-level data
@@ -80,7 +77,7 @@ imiss_cutoff = 0.9
 
 
 
-## 1.2. Loading summary statistics from multiple corhorts or data set
+## 1.2. Loading summary statistics from multiple cohorts or datasets
 
 - **`sumstat_path_list`**: A vector of file paths to the summary statistics.
 - **`column_file_path_list`**: A vector of file paths to the column mapping files.
@@ -98,7 +95,7 @@ imiss_cutoff = 0.9
 
 **Illustrated example**
 
-The following example demonstrates how to set up an input with 2 summary and one LD reference.
+The following example demonstrates how to set up input data with 2 summary statistics and one LD reference.
 
 ```{r, data-loader-sumstat}
 # Example of loading summary statistics
@@ -118,13 +115,13 @@ n_controls = c(20000, 40000)
 
 
 
-# Perform ColocBoost using `colocboost_analysis_pipeline` function
+# 2. Perform ColocBoost using `colocboost_analysis_pipeline` function
 
 In this section, we perform the colocalization analysis using the `colocboost_analysis_pipeline` function. Below are the input parameters for this function:
 
 
-- **`region_data`**: The output of the `load_multitask_regional_data` function, which contains harmonized summary statistics and LD matrices for the region of interest.
-- **`focal_trait`**: Name of trait if performing disease prioritized ColocBoost.
+- **`region_data`**: The output of the `load_multitask_regional_data` function.
+- **`focal_trait`**: Name of the trait if performing disease-prioritized ColocBoost.
 - **`event_filters`**: A list of patterns for filtering events based on context names. 
 Example: for sQTL, list(type_pattern = ".*clu_(\\d+_[+-?]).*", valid_pattern = "clu_(\\d+_[+-?]):PR:", exclude_pattern = "clu_(\\d+_[+-?]):IN:").
 - **`maf_cutoff`**: A scalar to remove variants with maf < maf_cutoff, default is 0.005.
@@ -135,4 +132,7 @@ Example: for sQTL, list(type_pattern = ".*clu_(\\d+_[+-?]).*", valid_pattern = "
 - **`impute_opts`**: A list of imputation options including rcond, R2_threshold, and minimum_ld (default: list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5)).
 
 
-
+```{r, colocboost-analysis}
+# region_data <- load_multitask_regional_data(...)
+# res <- colocboost_analysis_pipeline(region_data)
+```