Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions R/colocboost_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' @param outcome_idx Optional indices of outcomes to include in the plot. \code{outcome_idx=NULL} to plot only the outcomes having colocalization.
#' @param plot_all_outcome Optional to plot all outcome in the same figure.
#' @param plot_focal_only Logical, if TRUE only plots colocalization with focal outcome, default is FALSE.
#' @param plot_focal_cos_outocme_only Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE.
#' @param plot_focal_cos_outcome_only Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE.
#' @param points_color Background color for non-colocalized variables, default is "grey80".
#' @param cos_color Optional custom colors for CoS.
#' @param add_vertical Logical, if TRUE adds vertical lines at specified positions, default is FALSE
Expand All @@ -25,7 +25,7 @@
#' @param show_cos_to_uncoloc_outcome Optional outcomes for showing CoS to uncolocalized outcomes
#' @param plot_ucos Logical, if TRUE plots also trait-specific (uncolocalized) sets , default is FALSE
#' @param plot_ucos_idx Optional indices of trait-specific (uncolocalized) sets to plot when included
#' @param gene_name Optional gene name to display in plot title
#' @param title_specific Optional specific title to display in plot title
#' @param ylim_each Logical, if TRUE uses separate y-axis limits for each plot, default is TRUE
#' @param outcome_legend_pos Position for outcome legend, default is "top"
#' @param outcome_legend_size Size for outcome legend text, default is 1.2
Expand Down Expand Up @@ -75,7 +75,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
outcome_idx = NULL,
plot_all_outcome = FALSE,
plot_focal_only = FALSE,
plot_focal_cos_outocme_only = FALSE,
plot_focal_cos_outcome_only = FALSE,
points_color = "grey80",
cos_color = NULL,
add_vertical = FALSE,
Expand All @@ -89,7 +89,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
show_cos_to_uncoloc_outcome = NULL,
plot_ucos = FALSE,
plot_ucos_idx = NULL,
gene_name = NULL,
title_specific = NULL,
ylim_each = TRUE,
outcome_legend_pos = "top",
outcome_legend_size = 1.8,
Expand All @@ -109,7 +109,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
variant_coord = variant_coord,
outcome_names = outcome_names,
plot_focal_only = plot_focal_only,
plot_focal_cos_outocme_only = plot_focal_cos_outocme_only,
plot_focal_cos_outcome_only = plot_focal_cos_outcome_only,
show_cos_to_uncoloc = show_cos_to_uncoloc,
show_cos_to_uncoloc_idx = show_cos_to_uncoloc_idx,
show_cos_to_uncoloc_outcome = show_cos_to_uncoloc_outcome,
Expand All @@ -118,7 +118,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
# get initial set up of plot
cb_plot_init <- plot_initial(cb_plot_input,
y = y, points_color = points_color, cos_color = cos_color,
ylim_each = ylim_each, gene_name = gene_name,
ylim_each = ylim_each, title_specific = title_specific,
outcome_legend_pos = outcome_legend_pos, outcome_legend_size = outcome_legend_size,
cos_legend_pos = cos_legend_pos,
show_variable = show_variable, lab_style = lab_style, axis_style = axis_style,
Expand Down Expand Up @@ -330,7 +330,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
variant_coord = FALSE,
outcome_names = NULL,
plot_focal_only = FALSE,
plot_focal_cos_outocme_only = FALSE,
plot_focal_cos_outcome_only = FALSE,
show_cos_to_uncoloc = FALSE,
show_cos_to_uncoloc_idx = NULL,
show_cos_to_uncoloc_outcome = NULL,
Expand Down Expand Up @@ -411,12 +411,12 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
}
select_cs <- plot_cos_idx
} else {
if (plot_focal_only || plot_focal_cos_outocme_only) {
if (plot_focal_only || plot_focal_cos_outcome_only) {
if (sum(if_focal) == 0) {
message("No focal CoS, draw all CoS.")
} else if (plot_focal_only) {
select_cs <- which(if_focal)
} else { # plot_focal_cos_outocme_only is true here
} else { # plot_focal_cos_outcome_only is true here
# Get all outcomes colocalized with focal CoS
focal_outcomes <- unique(unlist(coloc_index[if_focal]))
# Find CoS that include at least one of these focal outcomes
Expand Down Expand Up @@ -488,7 +488,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
stop("Please check plot_ucos_idx!")
}
select_ucos <- plot_ucos_idx
} else if (plot_focal_cos_outocme_only && sum(if_focal) != 0) {
} else if (plot_focal_cos_outcome_only && sum(if_focal) != 0) {
# Get all outcomes colocalized with focal CoS
focal_outcomes <- unique(unlist(plot_input$coloc_index))
# Find uCoS that include at least one of these focal outcomes
Expand Down Expand Up @@ -597,7 +597,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
#' @importFrom stats pnorm
plot_initial <- function(cb_plot_input, y = "log10p",
points_color = "grey80", cos_color = NULL,
ylim_each = TRUE, gene_name = NULL,
ylim_each = TRUE, title_specific = NULL,
outcome_legend_size = 1.5,
outcome_legend_pos = "right",
cos_legend_pos = "bottomleft",
Expand Down Expand Up @@ -664,7 +664,7 @@ plot_initial <- function(cb_plot_input, y = "log10p",
args$lab_face <- lab_style[2]

# - set title format
args$title <- gene_name
args$title <- title_specific
args$title_size <- as.numeric(title_style[1])
args$title_face <- title_style[2]

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Learn how to perform colocalization analysis with step-by-step examples. For det

If you use ColocBoost in your research, please cite:

> Cao X, Sun H, Feng R, Mazumder R, Najar CFB, Li YI, de Jager PL, Bennett D, The Alzheimer's Disease Functional Genomics Consortium, Dey KK, Wang G. (2025+). Integrative multi-omics QTL colocalization maps regulatory architecture in aging human brain. medRxiv. [https://doi.org/](https://doi.org/)
> Cao X, Sun H, Feng R, Mazumder R, Najar CFB, Li YI, de Jager PL, Bennett D, The Alzheimer's Disease Functional Genomics Consortium, Dey KK, Wang G. (2025+). Integrative multi-omics QTL colocalization maps regulatory architecture in aging human brain. medRxiv. [https://doi.org/10.1101/2025.04.17.25326042](https://doi.org/10.1101/2025.04.17.25326042)


## License
Expand Down
21 changes: 15 additions & 6 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,44 +18,53 @@ navbar:
href: https://github.com/StatFunGen/colocboost

articles:
- title: Vignettes
- title: Performing Colocalization using ColocBoost
desc: "Tutorials on how to perform multi-trait colocalization analysis using ColocBoost with flexible input data formats."
contents:
- Input_Data_Format
- Individual_Level_Colocalization
- Summary_Statistics_Colocalization
- Disease_Prioritized_Colocalization

- title: Interpretation and Visualization
desc: "Tutorials on how to interpret and visualize the output from ColocBoost."
contents:
- Interpret_ColocBoost_Output
- Visualization_ColocBoost_Output

- title: Advanced Topics
desc: "Advanced topics and special cases in colocalization and fine-mapping analysis."
contents:
- Partial_Overlap_Variants
- ColocBoost_Wrapper_Pipeline
- LD_Free_Colocalization
- ColocBoost_Diagnostics
- FineBoost_Special_Case

- title: internal
contents:
- announcements
- installation
- Pairwise_Colocalization
- ColocBoost_Diagnostics

reference:
- title: "Example Data"
desc: "Example datasets for demonstration and testing"
desc: "Example datasets for demonstration and testing."
contents:
- has_concept("colocboost_data")

- title: "Model fitting"
desc: "Functions for fitting colocalization models"
desc: "Main interface function for fitting multi-trait colocalization model."
contents:
- has_concept("colocboost")

- title: "Inference and summary"
desc: "Functions for inference and summary from fitted models"
desc: "Functions for inference and summary from fitted model."
contents:
- has_concept("colocboost_inference")

- title: "Visualization"
desc: "Functions for visualizing ColocBoost results"
desc: "Functions for visualizing ColocBoost result."
contents:
- has_concept("colocboost_plot")

Expand Down
4 changes: 2 additions & 2 deletions inst/CITATION
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ citEntry(
journal = "medRxiv",
year = "2025",
note = "Preprint",
url = "https://doi.org/",
url = "https://doi.org/10.1101/2025.04.17.25326042",
textVersion = paste(
"Cao X, Sun H, Feng R, Mazumder R, Najar CFB, Li YI, de Jager PL, Bennett D, The Alzheimer's Disease Functional Genomics Consortium, Dey KK, Wang G. (2025).",
"Integrative multi-omics QTL colocalization maps regulatory architecture in aging human brain.",
"medRxiv.",
"https://doi.org/[YOUR_DOI_HERE]"
"https://doi.org/10.1101/2025.04.17.25326042"
)
)
8 changes: 4 additions & 4 deletions man/colocboost_plot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions tests/testthat/test_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,8 @@ test_that("colocboost_plot handles layout options", {
# When ylim_each is FALSE, we need to provide a ylim parameter
expect_error(suppressWarnings(colocboost_plot(cb_res, ylim_each = FALSE, ylim = c(0, 10))), NA)

# Test with gene_name option
expect_error(suppressWarnings(colocboost_plot(cb_res, gene_name = "BRCA1")), NA)
# Test with title_specific option
expect_error(suppressWarnings(colocboost_plot(cb_res, title_specific = "BRCA1")), NA)

# Test with variant_coord option
expect_error(suppressWarnings(colocboost_plot(cb_res, variant_coord = FALSE)), NA)
Expand Down Expand Up @@ -305,21 +305,21 @@ test_that("colocboost_plot handles focal outcome in complex cases", {
# Test plot_focal_only option
expect_error(suppressWarnings(colocboost_plot(cb_res_focal, plot_focal_only = TRUE)), NA)

# Test plot_focal_cos_outocme_only option
expect_error(suppressWarnings(colocboost_plot(cb_res_focal, plot_focal_cos_outocme_only = TRUE)), NA)
# Test plot_focal_cos_outcome_only option
expect_error(suppressWarnings(colocboost_plot(cb_res_focal, plot_focal_cos_outcome_only = TRUE)), NA)

# Combine focal outcome filtering with other options
expect_error(suppressWarnings(colocboost_plot(cb_res_focal,
plot_focal_only = TRUE,
y = "cos_vcp")), NA)

expect_error(suppressWarnings(colocboost_plot(cb_res_focal,
plot_focal_cos_outocme_only = TRUE,
plot_focal_cos_outcome_only = TRUE,
plot_ucos = TRUE)), NA)

# Test focusing only on outcomes colocalized with focal outcome
expect_error(suppressWarnings(colocboost_plot(cb_res_focal,
plot_focal_cos_outocme_only = TRUE,
plot_focal_cos_outcome_only = TRUE,
outcome_idx = 1:3)), NA)
})

Expand Down
6 changes: 3 additions & 3 deletions vignettes/ColocBoost_Diagnostics.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,17 @@ names(res)
- **`cb_model_para`**: parameters used in fitting ColocBoost model.


```{r cb-model}
```{r cb-model-para}
names(res$diagnostic_details$cb_model_para)
```


# 2. Diagnostic details of the model fitting

- **`cb_model`**: trait-specific proximity gradient boosting model, including proximity weight at each iteration, residual after gradient boosting, et al.
- **`weights_paths``**: individual trait-specific weights for each iteration.
- **`weights_paths`**: individual trait-specific weights for each iteration.

```{r cb-model-para}
```{r cb-model}
names(res$diagnostic_details$cb_model)
names(res$diagnostic_details$cb_model$ind_outcome_1)
```
Expand Down
44 changes: 22 additions & 22 deletions vignettes/ColocBoost_Wrapper_Pipeline.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,31 @@ knitr::opts_chunk$set(
```


This vignette demonstrates how to use the ColocBoost wrapper pipeline to perform colocalization analysis using `colocboost`.
See more details about functions in package `pecotmr` with [link](https://github.com/StatFunGen/pecotmr/tree/main) and
This vignette demonstrates how to use the ColocBoost wrapper pipeline to perform colocalization analysis with `colocboost`.

- See more details about functions in the package `pecotmr` with [link](https://github.com/StatFunGen/pecotmr/tree/main) and
`colocboost_pipeline` with [link](https://github.com/StatFunGen/pecotmr/blob/main/R/colocboost_pipeline.R).
See more etails about input data preparation in `xqtl_protocal` with [https://statfungen.github.io/xqtl-protocol/code/mnm_analysis/mnm_methods/colocboost.html].
- See more details about input data preparation in `xqtl_protocol` with [link](https://statfungen.github.io/xqtl-protocol/code/mnm_analysis/mnm_methods/colocboost.html).


# 1. Loading Data use `colocboost_analysis_pipeline` function
# 1. Loading Data using `colocboost_analysis_pipeline` function


This function harmonizes the input data and prepares it for colocalization analysis.
In this section, we introduce how to load the regional data required for the ColocBoost analysis using the `load_multitask_regional_data` function.
This function loads a mixture data sets for a specific region, including individual-level data (genotype, phenotype, covariate data)
or summary statistics (sumstats, LD). Run \code{load_regional_univariate_data} and \code{load_rss_data} multiple times for different datasets
This function loads mixed datasets for a specific region, including individual-level data (genotype, phenotype, covariate data)
or summary statistics (sumstats, LD). Run `load_regional_univariate_data` and `load_rss_data` multiple times for different datasets.


Below are the input parameters for this function:

## 1.1. Loading individual level data from multiple corhorts
## 1.1. Loading individual-level data from multiple cohorts


- **`region`** (required): A string of `chr:start-end` for the phenotype region you want to analyzed.
- **`genotype_list`**: A vector of path for PLINK bed files containing genotype data.
- **`phenotype_list`**: A vector of path for phenotype file names.
- **`covariate_list`**: A vector of path for covariate file names corresponding to the phenotype file vector.
- **`region`** (required): A string of `chr:start-end` for the phenotype region you want to analyze.
- **`genotype_list`**: A vector of paths for PLINK bed files containing genotype data.
- **`phenotype_list`**: A vector of paths for phenotype file names.
- **`covariate_list`**: A vector of paths for covariate file names corresponding to the phenotype file vector.
- **`conditions_list_individual`**: A vector of strings representing different conditions or groups.
- **`match_geno_pheno`**: A vector of indices of phenotypes matched to genotype if multiple genotype PLINK files are used.
- **`maf_cutoff`**: Minimum minor allele frequency (MAF) cutoff. Default is 0.
Expand All @@ -50,14 +51,10 @@ Below are the input parameters for this function:
- **`region_name_col`**: Column name containing the region name. Default is `NULL`.
- **`keep_indel`**: Logical indicating whether to keep insertions/deletions (INDELs). Default is `TRUE`.
- **`keep_samples`**: A vector of sample names to keep. Default is `NULL`.
- **`phenotype_header`**: Number of rows to skip at the beginning of the transposed phenotype file (default is 4 for `chr`, `start`, `end`, and `ID`).
- **`scale_residuals`**: Logical indicating whether to scale residuals. Default is `FALSE`.
- **`tabix_header`**: Logical indicating whether the tabix file has a header. Default is `TRUE`.


**Illustrated example**

The following example demonstrates how to set up an input with 3 phenotypes and 2 cohorts, where the first cohort has 2 phenotypes and the second cohort has 1 phenotype.
The following example demonstrates how to set up input data with 3 phenotypes and 2 cohorts, where the first cohort has 2 phenotypes and the second cohort has 1 phenotype.

```{r, data-loader-individual}
# Example of loading individual-level data
Expand All @@ -80,7 +77,7 @@ imiss_cutoff = 0.9



## 1.2. Loading summary statistics from multiple corhorts or data set
## 1.2. Loading summary statistics from multiple cohorts or datasets

- **`sumstat_path_list`**: A vector of file paths to the summary statistics.
- **`column_file_path_list`**: A vector of file paths to the column mapping files.
Expand All @@ -98,7 +95,7 @@ imiss_cutoff = 0.9

**Illustrated example**

The following example demonstrates how to set up an input with 2 summary and one LD reference.
The following example demonstrates how to set up input data with 2 summary statistics and one LD reference.

```{r, data-loader-sumstat}
# Example of loading summary statistics
Expand All @@ -118,13 +115,13 @@ n_controls = c(20000, 40000)



# Perform ColocBoost using `colocboost_analysis_pipeline` function
# 2. Perform ColocBoost using `colocboost_analysis_pipeline` function

In this section, we perform the colocalization analysis using the `colocboost_analysis_pipeline` function. Below are the input parameters for this function:


- **`region_data`**: The output of the `load_multitask_regional_data` function, which contains harmonized summary statistics and LD matrices for the region of interest.
- **`focal_trait`**: Name of trait if performing disease prioritized ColocBoost.
- **`region_data`**: The output of the `load_multitask_regional_data` function.
- **`focal_trait`**: Name of the trait if performing disease-prioritized ColocBoost.
- **`event_filters`**: A list of patterns for filtering events based on context names.
Example: for sQTL, list(type_pattern = ".*clu_(\\d+_[+-?]).*", valid_pattern = "clu_(\\d+_[+-?]):PR:", exclude_pattern = "clu_(\\d+_[+-?]):IN:").
- **`maf_cutoff`**: A scalar to remove variants with maf < maf_cutoff, default is 0.005.
Expand All @@ -135,4 +132,7 @@ Example: for sQTL, list(type_pattern = ".*clu_(\\d+_[+-?]).*", valid_pattern = "
- **`impute_opts`**: A list of imputation options including rcond, R2_threshold, and minimum_ld (default: list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5)).



```{r, colocboost-analysis}
# region_data <- load_multitask_regional_data(...)
# res <- colocboost_analysis_pipeline(region_data)
```
Loading
Loading