From e7752179c691868a0b45a6d7fe6606f1a7f39ef4 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Wed, 16 Apr 2025 20:19:02 -0400 Subject: [PATCH 1/3] Update _pkgdown.yml --- _pkgdown.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index 2ffe0fa..df7b38a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -19,6 +19,23 @@ navbar: - icon: fa-github href: https://github.com/StatFunGen/colocboost +articles: + - title: Vignettes + contents: + - Input_Data_Format + - Individual_Level_Colocalization + - Summary_Level_Colocalization + - ColocBoost_tutorial_basic + - ColocBoost_tutorial_advance + - ColocBoost_tutorial_GTEx + - ColocBoost_tutorial_strong_colocalization + - ColocBoost_tutorial_diagnostic + + - title: internal + contents: + - announcements + - installation + reference: - title: "Example Data" desc: "Example datasets for demonstration and testing" From eafcf94484c56b7395346b92ceabc26006ebf110 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Wed, 16 Apr 2025 20:21:57 -0400 Subject: [PATCH 2/3] minor change --- R/colocboost.R | 1 + R/colocboost_output.R | 2 ++ 2 files changed, 3 insertions(+) diff --git a/R/colocboost.R b/R/colocboost.R index ec8c822..43fa21d 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -114,6 +114,7 @@ #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) +#' res$cos_details$cos$cos_index #' #' @family colocboost #' @importFrom stats na.omit diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 43fb7b0..4a77624 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -166,7 +166,9 @@ get_cos_summary <- function(cb_output, #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) +#' res$cos_details$cos$cos_index #' filter_res <- get_strong_colocalization(res, cos_npc_cutoff = 0.5, npc_outcome_cutoff = 0.2) +#' filter_res$cos_details$cos$cos_index #' #' @family colocboost_inference #' @export From 0c24955c52df08b0aa8ac2378c83cb79d50fe45a Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Wed, 16 Apr 2025 20:45:33 -0400 Subject: [PATCH 3/3] minor fix --- R/colocboost.R | 10 ++-- R/colocboost_output.R | 24 ++++----- R/colocboost_plot.R | 9 ++-- man/colocboost.Rd | 9 ++-- man/colocboost_plot.Rd | 9 ++-- man/get_cos.Rd | 8 +-- man/get_cos_summary.Rd | 8 +-- man/get_strong_colocalization.Rd | 10 ++-- tmp_readme.md | 89 -------------------------------- 9 files changed, 46 insertions(+), 130 deletions(-) delete mode 100644 tmp_readme.md diff --git a/R/colocboost.R b/R/colocboost.R index 43fa21d..d9bc9d0 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -107,10 +107,10 @@ #' colnames(X) <- paste0("SNP", 1:P) #' L = 3 #' true_beta <- matrix(0, P, L) -#' true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -#' true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -#' true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -#' true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) @@ -552,7 +552,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data if (min_variables < 100) { warning( "Warning message about the number of variables.\n", - "The smallest number of variables across outcomes is ", min_variables, "< 100.", + "The smallest number of variables across outcomes is ", min_variables, " < 100.", " If this is what you expected, this is not a problem.", " If this is not what you expected, please check input data." ) diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 4a77624..1c70657 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -34,10 +34,10 @@ #' colnames(X) <- paste0("SNP", 1:P) #' L = 3 #' true_beta <- matrix(0, P, L) -#' true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -#' true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -#' true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -#' true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) @@ -159,10 +159,10 @@ get_cos_summary <- function(cb_output, #' colnames(X) <- paste0("SNP", 1:P) #' L = 3 #' true_beta <- matrix(0, P, L) -#' true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -#' true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -#' true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -#' true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) @@ -474,10 +474,10 @@ get_ucos_summary <- function(cb_output, outcome_names = NULL, region_name = NULL #' colnames(X) <- paste0("SNP", 1:P) #' L = 3 #' true_beta <- matrix(0, P, L) -#' true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -#' true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -#' true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -#' true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index cb423f4..c8335fa 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -45,14 +45,15 @@ #' colnames(X) <- paste0("SNP", 1:P) #' L = 3 #' true_beta <- matrix(0, P, L) -#' true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -#' true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -#' true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -#' true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) #' colocboost_plot(res, plot_cols = 1) +#' colocboost_plot(res, plot_cols = 1, outcome_idx = 1:3) #' #' #' @importFrom utils head tail diff --git a/man/colocboost.Rd b/man/colocboost.Rd index 3a20304..11ca213 100644 --- a/man/colocboost.Rd +++ b/man/colocboost.Rd @@ -221,13 +221,14 @@ X <- MASS::mvrnorm(N, rep(0, P), sigma) colnames(X) <- paste0("SNP", 1:P) L = 3 true_beta <- matrix(0, P, L) -true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 Y <- matrix(0, N, L) for (l in 1:L){ Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) +res$cos_details$cos$cos_index } \concept{colocboost} diff --git a/man/colocboost_plot.Rd b/man/colocboost_plot.Rd index 6972966..0c74a54 100644 --- a/man/colocboost_plot.Rd +++ b/man/colocboost_plot.Rd @@ -106,14 +106,15 @@ X <- MASS::mvrnorm(N, rep(0, P), sigma) colnames(X) <- paste0("SNP", 1:P) L = 3 true_beta <- matrix(0, P, L) -true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 Y <- matrix(0, N, L) for (l in 1:L){ Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) colocboost_plot(res, plot_cols = 1) +colocboost_plot(res, plot_cols = 1, outcome_idx = 1:3) } diff --git a/man/get_cos.Rd b/man/get_cos.Rd index 453ada6..69f3764 100644 --- a/man/get_cos.Rd +++ b/man/get_cos.Rd @@ -28,10 +28,10 @@ X <- MASS::mvrnorm(N, rep(0, P), sigma) colnames(X) <- paste0("SNP", 1:P) L = 3 true_beta <- matrix(0, P, L) -true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 Y <- matrix(0, N, L) for (l in 1:L){ Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) diff --git a/man/get_cos_summary.Rd b/man/get_cos_summary.Rd index 30ee54a..4e4197b 100644 --- a/man/get_cos_summary.Rd +++ b/man/get_cos_summary.Rd @@ -49,10 +49,10 @@ X <- MASS::mvrnorm(N, rep(0, P), sigma) colnames(X) <- paste0("SNP", 1:P) L = 3 true_beta <- matrix(0, P, L) -true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 Y <- matrix(0, N, L) for (l in 1:L){ Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) diff --git a/man/get_strong_colocalization.Rd b/man/get_strong_colocalization.Rd index 39636b9..0d43f37 100644 --- a/man/get_strong_colocalization.Rd +++ b/man/get_strong_colocalization.Rd @@ -49,14 +49,16 @@ X <- MASS::mvrnorm(N, rep(0, P), sigma) colnames(X) <- paste0("SNP", 1:P) L = 3 true_beta <- matrix(0, P, L) -true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 Y <- matrix(0, N, L) for (l in 1:L){ Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) +res$cos_details$cos$cos_index filter_res <- get_strong_colocalization(res, cos_npc_cutoff = 0.5, npc_outcome_cutoff = 0.2) +filter_res$cos_details$cos$cos_index } \seealso{ diff --git a/tmp_readme.md b/tmp_readme.md deleted file mode 100644 index 0206b49..0000000 --- a/tmp_readme.md +++ /dev/null @@ -1,89 +0,0 @@ -# ColocBoost for multi-trait colocalization in molecular QTL and GWAS studies -[![Codecov test coverage](https://codecov.io/gh/StatFunGen/colocboost/branch/main/graph/badge.svg)](https://codecov.io/gh/StatFunGen/colocboost?branch=main) -[![CRAN Version](https://www.r-pkg.org/badges/version/colocboost)](https://cran.r-project.org/package=colocboost) - -This R package implements ColocBoost --- motivated and designed for colocalization analysis ([first formulated here](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1004383)) of multiple genetic association studies --- as a multi-task learning approach to variable selection regression with highly correlated predictors and sparse effects, based on frequentist statistical inference. It provides statistical evidence to identify which subsets of predictors have non-zero effects on which subsets of response variables. - -## Installation - -### CRAN -Install released versions from CRAN - pre-built packages are available on macOS and Windows - -```r -install.packages("colocboost") -``` - -### GitHub -Install the development version from GitHub - -```r -devtools::install_github("StatFunGen/colocboost") -``` - -### Conda -Install major releases using pre-built conda package with a conda-compatible package manager (recommended) - -Global pixi installation is the easiest way to use the conda package -```bash -pixi global install r-base # Install r-base as a global package if not already installed -pixi global install --environment r-base r-colocboost # Inject r-colocboost into r-base global environment -``` -The package can also be added to a local pixi environment -```bash -pixi workspace channel add dnachun # Add the dnachun channel to the workspace -pixi add r-colocboost # Add r-colocboost as a dependency to the environment -``` -Micromamba is recommended instead of conda or mamba for traditional conda environments -```bash -micromamba install -c dnachun r-colocboost -mamba install -c dnachun r-colocboost -conda install -c dnachun r-colocboost -``` -## Usage - -### Multi-trait Colocalization -```r -# Basic multi-trait analysis -result <- colocboost(X=list(X), Y=list(y1, y2, y3)) - -# Using summary statistics -result <- colocboost(sumstat=list(sumstat1, sumstat2), LD=LD_matrix) - -# View colocalization summary -summary <- get_cos_summary(result) - -# Visualize results -colocboost_plot(result) - -# Filter for stronger colocalization evidence -filtered <- get_strong_colocalization(result, cos_npc_cutoff = 0.5) -``` - -For more complex analyses involving multiple datasets mixing individual level and summary statistics data, we recommend using [this pipeline wrapper](https://github.com/StatFunGen/pecotmr/blob/main/R/colocboost_pipeline.R) from the `pecotmr` package. The `pecotmr` package can be installed either from source or from our conda package at https://anaconda.org/dnachun/r-pecotmr. - -### Single-trait Fine-mapping (FineBoost) - Special Case -Run FineBoost for single-trait fine-mapping (similar interface to SuSiE) -```r -result <- colocboost(X=X, Y=y) -``` - -## Citation - -If you use ColocBoost in your research, please cite: - -Cao X, Sun H, Feng R, Mazumder R, Najar CFB, Li YI, de Jager PL, Bennett D, The Alzheimer's Disease Functional Genomics Consortium, Dey KK, Wang G. (2025+). Integrative multi-omics QTL colocalization maps regulatory architecture in aging human brain. bioRxiv. [https://doi.org/](https://doi.org/) - -## Documentation - -For detailed documentation, use the R help system: - -```r -?colocboost -?colocboost_plot -?get_cos_summary -?get_strong_colocalization -``` - -## License - -This package is released under the MIT License.