diff --git a/R/colocboost.R b/R/colocboost.R index ec8c822..d9bc9d0 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -107,13 +107,14 @@ #' colnames(X) <- paste0("SNP", 1:P) #' L = 3 #' true_beta <- matrix(0, P, L) -#' true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -#' true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -#' true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -#' true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) +#' res$cos_details$cos$cos_index #' #' @family colocboost #' @importFrom stats na.omit @@ -551,7 +552,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data if (min_variables < 100) { warning( "Warning message about the number of variables.\n", - "The smallest number of variables across outcomes is ", min_variables, "< 100.", + "The smallest number of variables across outcomes is ", min_variables, " < 100.", " If this is what you expected, this is not a problem.", " If this is not what you expected, please check input data." ) diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 43fb7b0..1c70657 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -34,10 +34,10 @@ #' colnames(X) <- paste0("SNP", 1:P) #' L = 3 #' true_beta <- matrix(0, P, L) -#' true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -#' true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -#' true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -#' true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) @@ -159,14 +159,16 @@ get_cos_summary <- function(cb_output, #' colnames(X) <- paste0("SNP", 1:P) #' L = 3 #' true_beta <- matrix(0, P, L) -#' true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -#' true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -#' true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -#' true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) +#' res$cos_details$cos$cos_index #' filter_res <- get_strong_colocalization(res, cos_npc_cutoff = 0.5, npc_outcome_cutoff = 0.2) +#' filter_res$cos_details$cos$cos_index #' #' @family colocboost_inference #' @export @@ -472,10 +474,10 @@ get_ucos_summary <- function(cb_output, outcome_names = NULL, region_name = NULL #' colnames(X) <- paste0("SNP", 1:P) #' L = 3 #' true_beta <- matrix(0, P, L) -#' true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -#' true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -#' true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -#' true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index cb423f4..c8335fa 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -45,14 +45,15 @@ #' colnames(X) <- paste0("SNP", 1:P) #' L = 3 #' true_beta <- matrix(0, P, L) -#' true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -#' true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -#' true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -#' true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 #' Y <- matrix(0, N, L) #' for (l in 1:L){ Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) } #' res <- colocboost(X = X, Y = Y) #' colocboost_plot(res, plot_cols = 1) +#' colocboost_plot(res, plot_cols = 1, outcome_idx = 1:3) #' #' #' @importFrom utils head tail diff --git a/_pkgdown.yml b/_pkgdown.yml index 2ffe0fa..df7b38a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -19,6 +19,23 @@ navbar: - icon: fa-github href: https://github.com/StatFunGen/colocboost +articles: + - title: Vignettes + contents: + - Input_Data_Format + - Individual_Level_Colocalization + - Summary_Level_Colocalization + - ColocBoost_tutorial_basic + - ColocBoost_tutorial_advance + - ColocBoost_tutorial_GTEx + - ColocBoost_tutorial_strong_colocalization + - ColocBoost_tutorial_diagnostic + + - title: internal + contents: + - announcements + - installation + reference: - title: "Example Data" desc: "Example datasets for demonstration and testing" diff --git a/man/colocboost.Rd b/man/colocboost.Rd index 3a20304..11ca213 100644 --- a/man/colocboost.Rd +++ b/man/colocboost.Rd @@ -221,13 +221,14 @@ X <- MASS::mvrnorm(N, rep(0, P), sigma) colnames(X) <- paste0("SNP", 1:P) L = 3 true_beta <- matrix(0, P, L) -true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 Y <- matrix(0, N, L) for (l in 1:L){ Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) +res$cos_details$cos$cos_index } \concept{colocboost} diff --git a/man/colocboost_plot.Rd b/man/colocboost_plot.Rd index 6972966..0c74a54 100644 --- a/man/colocboost_plot.Rd +++ b/man/colocboost_plot.Rd @@ -106,14 +106,15 @@ X <- MASS::mvrnorm(N, rep(0, P), sigma) colnames(X) <- paste0("SNP", 1:P) L = 3 true_beta <- matrix(0, P, L) -true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 Y <- matrix(0, N, L) for (l in 1:L){ Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) colocboost_plot(res, plot_cols = 1) +colocboost_plot(res, plot_cols = 1, outcome_idx = 1:3) } diff --git a/man/get_cos.Rd b/man/get_cos.Rd index 453ada6..69f3764 100644 --- a/man/get_cos.Rd +++ b/man/get_cos.Rd @@ -28,10 +28,10 @@ X <- MASS::mvrnorm(N, rep(0, P), sigma) colnames(X) <- paste0("SNP", 1:P) L = 3 true_beta <- matrix(0, P, L) -true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 Y <- matrix(0, N, L) for (l in 1:L){ Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) diff --git a/man/get_cos_summary.Rd b/man/get_cos_summary.Rd index 30ee54a..4e4197b 100644 --- a/man/get_cos_summary.Rd +++ b/man/get_cos_summary.Rd @@ -49,10 +49,10 @@ X <- MASS::mvrnorm(N, rep(0, P), sigma) colnames(X) <- paste0("SNP", 1:P) L = 3 true_beta <- matrix(0, P, L) -true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 Y <- matrix(0, N, L) for (l in 1:L){ Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) diff --git a/man/get_strong_colocalization.Rd b/man/get_strong_colocalization.Rd index 39636b9..0d43f37 100644 --- a/man/get_strong_colocalization.Rd +++ b/man/get_strong_colocalization.Rd @@ -49,14 +49,16 @@ X <- MASS::mvrnorm(N, rep(0, P), sigma) colnames(X) <- paste0("SNP", 1:P) L = 3 true_beta <- matrix(0, P, L) -true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 -true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) -true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 -true_beta[20, 3] <- 0.6 # SNP20 only affects trait 3 +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 Y <- matrix(0, N, L) for (l in 1:L){ Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) +res$cos_details$cos$cos_index filter_res <- get_strong_colocalization(res, cos_npc_cutoff = 0.5, npc_outcome_cutoff = 0.2) +filter_res$cos_details$cos$cos_index } \seealso{ diff --git a/tmp_readme.md b/tmp_readme.md deleted file mode 100644 index 0206b49..0000000 --- a/tmp_readme.md +++ /dev/null @@ -1,89 +0,0 @@ -# ColocBoost for multi-trait colocalization in molecular QTL and GWAS studies -[![Codecov test coverage](https://codecov.io/gh/StatFunGen/colocboost/branch/main/graph/badge.svg)](https://codecov.io/gh/StatFunGen/colocboost?branch=main) -[![CRAN Version](https://www.r-pkg.org/badges/version/colocboost)](https://cran.r-project.org/package=colocboost) - -This R package implements ColocBoost --- motivated and designed for colocalization analysis ([first formulated here](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1004383)) of multiple genetic association studies --- as a multi-task learning approach to variable selection regression with highly correlated predictors and sparse effects, based on frequentist statistical inference. It provides statistical evidence to identify which subsets of predictors have non-zero effects on which subsets of response variables. - -## Installation - -### CRAN -Install released versions from CRAN - pre-built packages are available on macOS and Windows - -```r -install.packages("colocboost") -``` - -### GitHub -Install the development version from GitHub - -```r -devtools::install_github("StatFunGen/colocboost") -``` - -### Conda -Install major releases using pre-built conda package with a conda-compatible package manager (recommended) - -Global pixi installation is the easiest way to use the conda package -```bash -pixi global install r-base # Install r-base as a global package if not already installed -pixi global install --environment r-base r-colocboost # Inject r-colocboost into r-base global environment -``` -The package can also be added to a local pixi environment -```bash -pixi workspace channel add dnachun # Add the dnachun channel to the workspace -pixi add r-colocboost # Add r-colocboost as a dependency to the environment -``` -Micromamba is recommended instead of conda or mamba for traditional conda environments -```bash -micromamba install -c dnachun r-colocboost -mamba install -c dnachun r-colocboost -conda install -c dnachun r-colocboost -``` -## Usage - -### Multi-trait Colocalization -```r -# Basic multi-trait analysis -result <- colocboost(X=list(X), Y=list(y1, y2, y3)) - -# Using summary statistics -result <- colocboost(sumstat=list(sumstat1, sumstat2), LD=LD_matrix) - -# View colocalization summary -summary <- get_cos_summary(result) - -# Visualize results -colocboost_plot(result) - -# Filter for stronger colocalization evidence -filtered <- get_strong_colocalization(result, cos_npc_cutoff = 0.5) -``` - -For more complex analyses involving multiple datasets mixing individual level and summary statistics data, we recommend using [this pipeline wrapper](https://github.com/StatFunGen/pecotmr/blob/main/R/colocboost_pipeline.R) from the `pecotmr` package. The `pecotmr` package can be installed either from source or from our conda package at https://anaconda.org/dnachun/r-pecotmr. - -### Single-trait Fine-mapping (FineBoost) - Special Case -Run FineBoost for single-trait fine-mapping (similar interface to SuSiE) -```r -result <- colocboost(X=X, Y=y) -``` - -## Citation - -If you use ColocBoost in your research, please cite: - -Cao X, Sun H, Feng R, Mazumder R, Najar CFB, Li YI, de Jager PL, Bennett D, The Alzheimer's Disease Functional Genomics Consortium, Dey KK, Wang G. (2025+). Integrative multi-omics QTL colocalization maps regulatory architecture in aging human brain. bioRxiv. [https://doi.org/](https://doi.org/) - -## Documentation - -For detailed documentation, use the R help system: - -```r -?colocboost -?colocboost_plot -?get_cos_summary -?get_strong_colocalization -``` - -## License - -This package is released under the MIT License.