diff --git a/.Rbuildignore b/.Rbuildignore index 0f64d8f..40982c5 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,3 +16,4 @@ ^\.pixi ^cran-comments\.md$ ^LICENSE\.md$ +^README\.md$ diff --git a/.github/environment/pixi.toml b/.github/environment/pixi.toml index 691796b..53cd3f8 100644 --- a/.github/environment/pixi.toml +++ b/.github/environment/pixi.toml @@ -10,7 +10,8 @@ libc = { family="glibc", version="2.17" } devtools_document = "R -e 'devtools::document()'" devtools_test = "R -e 'devtools::test()'" codecov = "R -e 'covr::codecov(quiet = FALSE)'" -rcmdcheck = "R -e 'rcmdcheck::rcmdcheck()'" +build = "R -e 'devtools::build()'" +rcmdcheck = "R -e 'pkg <- list.files(\"..\", pattern = \".tar.gz\", full.names = TRUE); rcmdcheck::rcmdcheck(path = pkg[1], args = c(\"--as-cran\", \"--no-manual\"))'" use_major_version = "R -e 'usethis::Use_version(which = \"major\", push = FALSE)'" use_minor_version = "R -e 'usethis::use_version(which = \"minor\", push = FALSE)'" use_patch_version = "R -e 'usethis::use_version(which = \"patch\", push = FALSE)'" @@ -26,7 +27,9 @@ r43 = {features = ["r43"]} r44 = {features = ["r44"]} [dependencies] +"r-ashr" = "*" +"r-covr" = "*" "r-devtools" = "*" +"r-mass" = "*" "r-rcmdcheck" = "*" -"r-covr" = "*" "r-tidyverse" = "*" diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml deleted file mode 100644 index 3d53917..0000000 --- a/.github/workflows/R-CMD-check.yaml +++ /dev/null @@ -1,50 +0,0 @@ - -on: - push: - branches: [main, master] - pull_request: - branches: [main, master] - -name: R-CMD-check - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: "release"} - - {os: macOS-latest, r: "release"} - - {os: ubuntu-latest, r: "release"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install dependencies - run: | - install.packages(c("remotes", "rcmdcheck", "covr", "testthat")) - remotes::install_deps(dependencies = TRUE) - shell: Rscript {0} - - - name: Check - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Test coverage - run: covr::codecov() - shell: Rscript {0} - diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5637fc9..45ad255 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,11 +33,16 @@ jobs: - name: Run unit tests run: pixi run --environment ${{ matrix.environment }} devtools_test - # - name: Check unit test code coverage - # run: pixi run --environment ${{ matrix.environment }} codecov + - name: Run R CMD CHECK + run: | + pixi install --environment ${{ matrix.environment }} + ln -sf $(pwd)/.pixi/envs/default/lib/libhwasan.so.0 $(pwd)/.pixi/envs/default/lib/libhwasan.so + ln -sf $(pwd)/.pixi/envs/${{ matrix.environment }}/lib/libhwasan.so.0 $(pwd)/.pixi/envs/${{ matrix.environment }}/lib/libhwasan.so + pixi run --environment ${{ matrix.environment }} build + pixi run --environment ${{ matrix.environment }} rcmdcheck - #- name: Run R CMD CHECK - #run: pixi run rcmdcheck + - name: Check unit test code coverage + run: pixi run --environment ${{ matrix.environment }} codecov ci_osx-64: name: osx-64 CI @@ -62,8 +67,10 @@ jobs: - name: Run unit tests run: pixi run --environment ${{ matrix.environment }} devtools_test - #- name: Run R CMD CHECK - #run: pixi run rcmdcheck + - name: Run R CMD CHECK + run: | + pixi run --environment ${{ matrix.environment }} build + pixi run --environment ${{ matrix.environment }} rcmdcheck ci_osx-arm64: name: osx-arm64 CI @@ -88,5 +95,7 @@ jobs: - name: Run unit tests run: pixi run --environment ${{ matrix.environment }} devtools_test - #- name: Run R CMD CHECK - #run: pixi run rcmdcheck + - name: Run R CMD CHECK + run: | + pixi run --environment ${{ matrix.environment }} build + pixi run --environment ${{ matrix.environment }} rcmdcheck diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml deleted file mode 100644 index 61d3681..0000000 --- a/.github/workflows/test-coverage.yaml +++ /dev/null @@ -1,28 +0,0 @@ - -on: - push: - branches: [main, master] - pull_request: - branches: [main, master] - -name: test-coverage - -jobs: - test-coverage: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - - - name: Install dependencies - run: | - install.packages(c("remotes", "covr")) - remotes::install_deps(dependencies = TRUE) - shell: Rscript {0} - - - name: Test coverage - run: covr::codecov() - shell: Rscript {0} - diff --git a/DESCRIPTION b/DESCRIPTION index 0586bef..9965c63 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: colocboost Type: Package -Date: 2024-09-01 +Date: 2025-04-13 Title: Multi-Context Colocalization Analysis Tool for Molecular QTL and GWAS Studies Version: 0.1.0 Authors@R: c( @@ -28,8 +28,8 @@ Suggests: testthat (>= 3.0.0), knitr, rmarkdown, - susieR, - ashr + ashr, + MASS VignetteBuilder: knitr Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 diff --git a/R/colocboost.R b/R/colocboost.R index c53bf83..17942ee 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -13,7 +13,6 @@ #' (default is 0.8) but within the same locus. This step addresses potential instabilities in linkage disequilibrium (LD) estimation #' that may arise from small sample sizes or discrepancies in minor allele frequencies (MAF) across different confidence sets. #' -#' @section Input Data: #' @param X A list of genotype matrices for different outcomes, or a single matrix if all outcomes share the same genotypes. #' Each matrix should have column names, if sample sizes and variables possibly differing across matrices. #' @param Y A list of vectors of outcomes or an N by L matrix if it is considered for the same X and multiple outcomes. @@ -40,7 +39,6 @@ #' @param effect_se Matrix of standard errors associated with the beta values #' @param effect_n A scalar or a vector of sample sizes for estimating regression coefficients. Highly recommended! #' -#' @section Model Parameters #' @param M The maximum number of gradient boosting rounds. If the number of outcomes are large, it will be automatically increased to a larger number. #' @param stop_thresh The stop criterion for overall profile loglikelihood function. #' @param tau The smooth parameter for proximity adaptive smoothing weights for the best update jk-star. @@ -64,7 +62,6 @@ #' @param p.adjust.methods The adjusted pvalue method in stats:p.adj when \code{func_multi_test = "fdr"} #' @param residual_correlation The residual correlation based on the sample overlap, it is diagonal if it is NULL. #' -#' @section Post Inference Parameters #' @param coverage A number between 0 and 1 specifying the \dQuote{coverage} of the estimated colocalization confidence sets (CoS) (default is 0.95). #' @param min_cluster_corr The small correlation for the weights distributions across different iterations to be decided having only one cluster. #' @param dedup If \code{dedup = TRUE}, the duplicate confidence sets will be removed in the post-processing. diff --git a/R/colocboost_init.R b/R/colocboost_init.R index 2cc6104..bd32b88 100644 --- a/R/colocboost_init.R +++ b/R/colocboost_init.R @@ -571,5 +571,3 @@ get_multiple_correction <- function(z, miss_idx = NULL, func_multi_test = "lfdr" } } - - diff --git a/README.md b/README.md index f0fbbed..69f2137 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,77 @@ -# ColocBoost for multi-context colocalization in molecular QTL and GWAS studies +# ColocBoost for multi-trait colocalization in molecular QTL and GWAS studies [![Codecov test coverage](https://codecov.io/gh/StatFunGen/colocboost/branch/master/graph/badge.svg)](https://codecov.io/gh/StatFunGen/colocboost?branch=master) +[![CRAN Version](https://www.r-pkg.org/badges/version/colocboost)](https://cran.r-project.org/package=colocboost) This R package implements ColocBoost --- motivated and designed for colocalization analysis ([first formulated here](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1004383)) of multiple genetic association studies --- as a multi-task learning approach to variable selection regression with highly correlated predictors and sparse effects, based on frequentist statistical inference. It provides statistical evidence to identify which subsets of predictors have non-zero effects on which subsets of response variables. -Temporary usage before our cran or conda release -- clone the repo to your local folder, then +## Installation + +### Conda +Install major releases from conda (recommended) + ```bash -cd colocboost -R --slave -e "devtools::install()" +conda install -c dnachun r-colocboost +``` + +### CRAN +Install released versions from cran + +```r +install.packages("colocboost") +``` + +### GitHub +Install the development version from GitHub + +```r +devtools::install_github("StatFunGen/colocboost") +``` + +## Usage + +### Single-trait Fine-mapping (FineBoost) +Run FineBoost for single-trait fine-mapping (similar interface to SuSiE) +```r +result <- colocboost(X=X, Y=y) +``` + +### Multi-trait Colocalization +```r +# Basic multi-trait analysis +result <- colocboost(X=list(X), Y=list(y1, y2, y3)) + +# Using summary statistics +result <- colocboost(sumstat=list(sumstat1, sumstat2), LD=LD_matrix) + +# View colocalization summary +summary <- get_cos_summary(result) + +# Visualize results +colocboost_plot(result) + +# Filter for stronger colocalization evidence +filtered <- get_strong_colocalization(result, cos_npc_cutoff = 0.5) ``` -- To run FineBoost, you need `colocboost(X=X, Y=y)`, where X and y are the same as `susie(X,y)` -- To run ColocBoost we suggest using [this pipeline wrapper](https://github.com/StatFunGen/pecotmr/blob/main/R/colocboost_pipeline.R) to manage multiple data-sets mixing individual level and summary statistics data. The `pecotmr` package can be installed either from source or from our conda package at https://anaconda.org/dnachun/r-pecotmr +For more complex analyses involving multiple datasets mixing individual level and summary statistics data, we recommend using [this pipeline wrapper](https://github.com/StatFunGen/pecotmr/blob/main/R/colocboost_pipeline.R) from the `pecotmr` package. The `pecotmr` package can be installed either from source or from our conda package at https://anaconda.org/dnachun/r-pecotmr. + +## Citation + +If you use ColocBoost in your research, please cite: + +Cao X, Sun H, Feng R, Mazumder R, Najar CFB, Li YI, de Jager PL, Bennett D, The Alzheimer's Disease Functional Genomics Consortium, Dey KK, Wang G. (2025+). Integrative multi-omics QTL colocalization maps regulatory architecture in aging human brain. bioRxiv. [https://doi.org/](https://doi.org/) + +## Documentation + +For detailed documentation, use the R help system: + +```r +?colocboost +?colocboost_plot +?get_cos_summary +?get_strong_colocalization +``` + +## License + +This package is released under the MIT License. diff --git a/man/colocboost.Rd b/man/colocboost.Rd index d10c19f..c712a63 100644 --- a/man/colocboost.Rd +++ b/man/colocboost.Rd @@ -208,18 +208,6 @@ There is an additional step to help merge the confidence sets with small \code{b (default is 0.8) but within the same locus. This step addresses potential instabilities in linkage disequilibrium (LD) estimation that may arise from small sample sizes or discrepancies in minor allele frequencies (MAF) across different confidence sets. } -\section{Input Data}{ - -} - -\section{Model Parameters}{ -NA -} - -\section{Post Inference Parameters}{ -NA -} - \examples{ colocboost(X=X, Y=Y) diff --git a/tests/README.md b/tests/README.md index 1ec2832..54c6810 100644 --- a/tests/README.md +++ b/tests/README.md @@ -2,39 +2,29 @@ ## Overview -This repository contains a comprehensive testing framework for the [colocboost](https://github.com/StatFunGen/colocboost) R package. The framework is designed to ensure the reliability and correctness of the package's functionality through automated testing. +This repository contains a comprehensive testing framework for the [colocboost](https://github.com/StatFunGen/colocboost) R package. The framework is designed to ensure the reliability and correctness of package functionality through automated testing. ## Quick Start -1. Navigate to test folder: - ```bash - cd tests - ``` +1. Our unit testing setup is managed by `pixi`. Please follow the instructions at [https://pixi.sh/latest/#installation](https://pixi.sh/latest/#installation) if you have not already installed `pixi`. -2. **First time use**: run the setup script to install required packages and configure the testing environment: - ```r - # install.packages(c("devtools", "testthat", "covr", "roxygen2")) - source("setup_testthat.R") - ``` +2. In the root of this repository, run the helper script to create a pixi.toml file. This file is deliberately ignored in `.gitignore` because it is ephemeral and will be regenerated whenever CI is run. + + ```bash + .github/workflows/create_toml_from_yaml.sh $(pwd) + ``` -3. Run all tests: - ```r - devtools::load_all() - devtools::test() - ``` - or, +3. Run all tests with a `pixi` task: ```bash - Rscript run_tests.R + pixi run devtools_test ``` - To test one file: - ```r - devtools::test_active_file("testthat/test_colocboost.R") + or test one file using `pixi run`: + ```bash + pixi run R -e 'devtools::test_active_file("tests/testthat/test_colocboost.R")' ``` ## Files and Structure -- `setup_testthat.R`: Script to set up the testthat infrastructure -- `run_tests.R`: Script to run all tests and generate test coverage reports - `testthat/`: Directory containing test files - `test_package.R`: Tests for basic package functionality - `test_colocboost.R`: Tests for the main colocalization functions @@ -42,22 +32,6 @@ This repository contains a comprehensive testing framework for the [colocboost]( - `test_model.R`: Tests for model fitting and prediction functions - `.github/workflows/`: GitHub Actions workflow configurations -## How To Use - -### Running Tests Locally - -To run the tests locally, you can use: - -```r -devtools::test() -``` - -Or run individual test files: - -```r -devtools::test_file("testthat/test_colocboost.R") -``` - ### Adding New Tests When adding new functionality to the colocboost package, corresponding tests should be added to maintain test coverage. Follow these steps: @@ -84,23 +58,21 @@ test_that("new_function produces expected output", { ### Test Coverage -The `covr` package is used to measure test coverage, which indicates what percentage of your code is being tested. Aim for at least 80% coverage for a reliable package. +The `covr` package is used to measure test coverage, which indicates what percentage of the code is being tested. To generate a test coverage report: -```r -library(covr) -coverage <- package_coverage() -report(coverage) +```bash +pixi run codecov ``` ## GitHub Actions Workflow -This testing framework includes GitHub Actions workflows that automatically run the tests on every push and pull request. The workflows test the package on multiple operating systems (Windows, macOS, and Linux) to ensure cross-platform compatibility. +This testing framework includes GitHub Actions workflows that automatically run the tests on every pull request. The workflows test the package on Linux and macOS to ensure cross-platform compatibility. -The workflow is defined in `.github/workflows/R-CMD-check.yaml` and automatically runs: -- R CMD check -- Test coverage reporting +The workflow is defined in `.github/workflows/ci.yaml` and automatically runs: +- `R CMD check` +- Test coverage reporting with [codecov.io](codecov.io) -Test results and coverage statistics are available on the GitHub Actions page after each push or pull request. +Test results and coverage statistics are available on the GitHub Actions page after each pull request. For more information, check the [testthat documentation](https://testthat.r-lib.org/). diff --git a/tests/run_tests.R b/tests/run_tests.R deleted file mode 100644 index 4ac19a2..0000000 --- a/tests/run_tests.R +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env Rscript - -# Script to install the package and run tests - -# Check for required packages -required_packages <- c("devtools", "testthat", "covr") -for (pkg in required_packages) { - if (!requireNamespace(pkg, quietly = TRUE)) { - stop(sprintf("Cannot find required package: %s\n", pkg)) - } -} - -# Path to working directory -args <- commandArgs(trailingOnly = TRUE) -if (length(args) > 0) { - work_dir <- args[1] -} else { - work_dir <- getwd() -} - -setwd(work_dir) -cat(sprintf("Working directory: %s\n", work_dir)) - -# Install the package -# cat("Installing colocboost package...\n") -# devtools::install(".", dependencies = FALSE, quiet = TRUE) - -# Run tests -cat("Running tests...\n") -devtools::load_all('../') -testthat::test_dir("testthat/") - -# Calculate test coverage -cat("Calculating test coverage...\n") -coverage <- covr::package_coverage() -print(coverage) -covr::report(coverage) - -cat("Tests completed.\n") \ No newline at end of file diff --git a/tests/setup_testthat.R b/tests/setup_testthat.R deleted file mode 100644 index caab98f..0000000 --- a/tests/setup_testthat.R +++ /dev/null @@ -1,130 +0,0 @@ -setwd("../") - -# Install necessary packages -if (!requireNamespace("devtools", quietly = TRUE)) { - stop("devtools not found") -} -if (!requireNamespace("testthat", quietly = TRUE)) { - stop("testthat not found") -} -if (!requireNamespace("covr", quietly = TRUE)) { - stop("covr not found") -} -if (!requireNamespace("roxygen2", quietly = TRUE)) { - stop("roxygen2 not found") -} - -# Set up testthat infrastructure -if (!dir.exists("tests/testthat")) { - devtools::use_testthat() -} - -# Create GitHub Actions workflow for continuous integration -if (!dir.exists(".github/workflows")) { - dir.create(".github/workflows", recursive = TRUE, showWarnings = FALSE) -} - -# Write GitHub Actions workflow file -workflow_file <- ".github/workflows/R-CMD-check.yaml" -writeLines( -' -on: - push: - branches: [main, master] - pull_request: - branches: [main, master] - -name: R-CMD-check - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: "release"} - - {os: macOS-latest, r: "release"} - - {os: ubuntu-latest, r: "release"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install dependencies - run: | - install.packages(c("remotes", "rcmdcheck", "covr", "testthat")) - remotes::install_deps(dependencies = TRUE) - shell: Rscript {0} - - - name: Check - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Test coverage - run: covr::codecov() - shell: Rscript {0} -', workflow_file) - -# Create a test coverage workflow -coverage_file <- ".github/workflows/test-coverage.yaml" -writeLines( -' -on: - push: - branches: [main, master] - pull_request: - branches: [main, master] - -name: test-coverage - -jobs: - test-coverage: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - - - name: Install dependencies - run: | - install.packages(c("remotes", "covr")) - remotes::install_deps(dependencies = TRUE) - shell: Rscript {0} - - - name: Test coverage - run: covr::codecov() - shell: Rscript {0} -', coverage_file) - -# Add code coverage badge to README.md -readme_file <- "README.md" -if (file.exists(readme_file)) { - readme_content <- readLines(readme_file) - if (!any(grepl("codecov", readme_content))) { - badge <- "[![Codecov test coverage](https://codecov.io/gh/StatFunGen/colocboost/branch/master/graph/badge.svg)](https://codecov.io/gh/StatFunGen/colocboost?branch=master)" - # Add badge after the first line if it's a title - if (length(readme_content) > 0) { - readme_content <- c(readme_content[1], badge, readme_content[-1]) - } else { - readme_content <- c("# colocboost", badge) - } - writeLines(readme_content, readme_file) - } -} - -# Set up basic testthat structure -message("testthat setup complete. Next, create test files for each R file in the package.") \ No newline at end of file diff --git a/tests/testthat/test_colocboost.R b/tests/testthat/test_colocboost.R index 38cf29f..bf8d0bc 100644 --- a/tests/testthat/test_colocboost.R +++ b/tests/testthat/test_colocboost.R @@ -47,10 +47,11 @@ test_that("colocboost runs with individual data", { # Convert Y to list Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + X_list <- list(test_data$X, test_data$X) # Run colocboost with minimal parameters result <- colocboost( - X = test_data$X, + X = X_list, Y = Y_list, M = 10, # Small number of iterations for testing output_level = 2 # More detailed output for testing @@ -123,10 +124,11 @@ test_that("colocboost handles target outcome correctly", { # Convert Y to list Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + X_list <- list(test_data$X, test_data$X) # Run colocboost with target_outcome_idx = 1 result <- colocboost( - X = test_data$X, + X = X_list, Y = Y_list, target_outcome_idx = 1, M = 10, # Small number of iterations for testing @@ -147,10 +149,11 @@ test_that("get_cos_summary returns expected structure", { # Convert Y to list Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + X_list <- list(test_data$X, test_data$X) # Run colocboost with minimal parameters result <- colocboost( - X = test_data$X, + X = X_list, Y = Y_list, M = 10, # Small number of iterations for testing output_level = 2 # More detailed output for testing @@ -181,10 +184,11 @@ test_that("colocboost_plot runs without error", { # Convert Y to list Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + X_list <- list(test_data$X, test_data$X) # Run colocboost with minimal parameters result <- colocboost( - X = test_data$X, + X = X_list, Y = Y_list, M = 10, # Small number of iterations for testing output_level = 2 # More detailed output for testing @@ -201,10 +205,11 @@ test_that("get_strong_colocalization maintains colocboost structure", { # Convert Y to list Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + X_list <- list(test_data$X, test_data$X) # Run colocboost with minimal parameters result <- colocboost( - X = test_data$X, + X = X_list, Y = Y_list, M = 10, # Small number of iterations for testing output_level = 2 # More detailed output for testing @@ -234,6 +239,7 @@ test_that("colocboost handles missing/invalid inputs appropriately", { # Test mismatched dimensions X_bad <- test_data$X[1:(nrow(test_data$X) - 10), ] Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + X_list <- list(X_bad, X_bad) - expect_error(colocboost(X = X_bad, Y = Y_list), "do not have the same sample size") + expect_error(colocboost(X = X_list, Y = Y_list), "do not have the same sample size") }) \ No newline at end of file diff --git a/tests/testthat/test_corner_cases.R b/tests/testthat/test_corner_cases.R index a3a5d6b..8f0c823 100644 --- a/tests/testthat/test_corner_cases.R +++ b/tests/testthat/test_corner_cases.R @@ -84,11 +84,12 @@ test_that("colocboost handles missing values in Y", { # Convert Y to list Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + X_list <- list(test_data$X, test_data$X) # Run colocboost - should handle NAs automatically expect_warning( result <- colocboost( - X = test_data$X, + X = X_list, Y = Y_list, M = 5 # Small number of iterations for testing ), @@ -161,11 +162,12 @@ test_that("colocboost correctly identifies absence of colocalization", { # Convert Y to list Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + X_list <- list(test_data$X, test_data$X) # Run colocboost suppressWarnings({ result <- colocboost( - X = test_data$X, + X = X_list, Y = Y_list, M = 10 # Need more iterations for this test ) @@ -192,6 +194,7 @@ test_that("colocboost handles highly correlated traits", { # Convert Y to list Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + X_list <- list(test_data$X, test_data$X) # Create correlation matrix for residuals residual_corr <- matrix(c(1, 0.9, 0.9, 1), nrow=2) @@ -199,7 +202,7 @@ test_that("colocboost handles highly correlated traits", { # Run colocboost with residual correlation suppressWarnings({ result <- colocboost( - X = test_data$X, + X = X_list, Y = Y_list, residual_correlation = residual_corr, M = 10 # Need more iterations for this test @@ -220,11 +223,12 @@ test_that("colocboost handles very small datasets", { colnames(X_small) <- paste0("SNP", 1:5) Y_small <- matrix(rnorm(10*2), 10, 2) Y_list_small <- list(Y_small[,1], Y_small[,2]) + X_list_small <- list(test_data$X_small, test_data$X_small) # Run colocboost suppressWarnings({ result <- colocboost( - X = X_small, + X = X_list_small, Y = Y_list_small, M = 5 # Small number of iterations for testing ) @@ -244,11 +248,12 @@ test_that("colocboost works with custom parameters", { colnames(X) <- paste0("SNP", 1:10) Y <- matrix(rnorm(50*2), 50, 2) Y_list <- list(Y[,1], Y[,2]) + X_list <- list(X, X) # Run colocboost with custom parameters suppressWarnings({ result <- colocboost( - X = X, + X = X_list, Y = Y_list, M = 5, # Small number of iterations for testing lambda = 0.7, # Custom lambda @@ -287,11 +292,12 @@ test_that("colocboost prioritizes target outcome correctly", { Y2 <- X %*% b2 + rnorm(100, 0, 1) Y_list <- list(Y1, Y2) + X_list <- list(X, X) # Run colocboost with Y1 as target suppressWarnings({ result_target1 <- colocboost( - X = X, + X = X_list, Y = Y_list, target_outcome_idx = 1, lambda_target_outcome = 0.9, # Higher lambda for target @@ -302,7 +308,7 @@ test_that("colocboost prioritizes target outcome correctly", { # Run colocboost with Y2 as target suppressWarnings({ result_target2 <- colocboost( - X = X, + X = X_list, Y = Y_list, target_outcome_idx = 2, lambda_target_outcome = 0.9, # Higher lambda for target diff --git a/tests/testthat/test_model.R b/tests/testthat/test_model.R index c0eeb94..f8ec18f 100644 --- a/tests/testthat/test_model.R +++ b/tests/testthat/test_model.R @@ -28,11 +28,12 @@ generate_test_model <- function(n = 100, p = 20, L = 2, seed = 42) { # Convert Y to list Y_list <- list(Y[,1], Y[,2]) + X_list <- list(X, X) # Run colocboost with minimal parameters to get a model object suppressWarnings({ result <- colocboost( - X = X, + X = X_list, Y = Y_list, M = 5, # Small number of iterations for faster testing output_level = 3 # Include full model details @@ -157,11 +158,12 @@ test_that("colocboost_workhorse performs boosting iterations", { colnames(X) <- paste0("SNP", 1:p) Y <- matrix(rnorm(n*2), n, 2) Y_list <- list(Y[,1], Y[,2]) + X_list <- list(X, X) # Initialize CB objects suppressWarnings({ # First get the data object by running colocboost with M=0 - temp <- colocboost(X = X, Y = Y_list, M = 0) + temp <- colocboost(X = X_list, Y = Y_list, M = 0) # If the workhorse function is exported if (exists("colocboost_workhorse")) { diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/ColocBoost_tutorial_GTEx.Rmd b/vignettes/ColocBoost_tutorial_GTEx.Rmd new file mode 100644 index 0000000..bbfec9e --- /dev/null +++ b/vignettes/ColocBoost_tutorial_GTEx.Rmd @@ -0,0 +1,29 @@ +--- +title: "ColocBoost Tutortial (GTEx tissues)" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{ColocBoost Tutortial (GTEx tissues)} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +ColocBoost: Multi-omics xQTL colocalization improves the discovery of causal variants for complex diseases. + + + +```{r setup} +library(colocboost) +``` + + +## GTEx data preparations + + + diff --git a/vignettes/ColocBoost_tutorial_advance.Rmd b/vignettes/ColocBoost_tutorial_advance.Rmd new file mode 100644 index 0000000..22c93cc --- /dev/null +++ b/vignettes/ColocBoost_tutorial_advance.Rmd @@ -0,0 +1,74 @@ +--- +title: "ColocBoost Tutorial (Advanced Usage)" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{ColocBoost Tutortial (Advanced Usage)} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(colocboost) +``` + +## Leveraging a single genotype or LD matrix across multiple phenotypes or summary statistics + +In large-scale genetic studies, particularly those involving colocalization analysis of gene expression across different tissues or cell types (such as GTEx or ROSMAP), it is common to encounter scenarios where a single genotype matrix applies to multiple phenotypes. Similarly, for summary statistics analyses involving multiple complex diseases (like those from UK Biobank), researchers often use a single LD matrix across various datasets. Traditional methods/usage that require duplicating these matrices for each phenotype or summary statistics not only increase computational load but also significantly waste memory resources. + +To address these inefficiencies, ColocBoost offers a streamlined option that supports using a single genotype matrix alongside a matrix of phenotypes or a single LD matrix with a list of summary statistics dataframes. This approach leverages the consistency across genetic structures to reduce redundancy and enhance computational efficiency. + + +**Example of using individual-level data** + +Consider a scenario where all phenotypes share the same genotype matrix, as demonstrated with the `Ind_5traits` dataset. Instead of loading multiple copies of the genotype matrix, we use the first genotype matrix for all phenotypes and covert list of phenotype to be a single matrix. + +```{r oneGeno} +data("Ind_5traits") +X <- Ind_5traits$X[[1]] +Y <- do.call(cbind, Ind_5traits$Y) +res <- colocboost(X = X, Y = Y) +res$summary_table +``` + +**Example of using summary statistics** + +For summary statistics, similar optimization is applied using `Sumstat_5traits` dataset where all statistical summaries share the same LD matrix. + +```{r oneLD} +data("Sumstat_5traits") +sumstat <- Sumstat_5traits$sumstat +LD <- Sumstat_5traits$LD +res <- colocboost(sumstat = sumstat, LD = LD) +res$summary_table +``` + +**Example of combining individual-level data and summary statistics** + +Consider a scenario where several individual-level phenotypes share a common genotype matrix, while multiple summary statistics share the same LD matrix. Our goal is to identify the colocalization across this mixture of individual-level phenotypes and multiple summary statistics. + + +```{r oneMixture} +data("Ind_5traits") +X <- Ind_5traits$X[[1]] +Y <- do.call(cbind, Ind_5traits$Y[1:3]) +data("Sumstat_5traits") +sumstat <- Sumstat_5traits$sumstat[4:5] +LD <- Sumstat_5traits$LD +res <- colocboost(X = X, Y = Y, sumstat = sumstat, LD = LD) +res$summary_table +``` + + +## Few genotype matrices or few LD matrices than phenotypes and summary statistics (dict_YX and dict_sumstat) + +## Different phenotypes has different SNPs. + + + diff --git a/vignettes/ColocBoost_tutorial_basic.Rmd b/vignettes/ColocBoost_tutorial_basic.Rmd new file mode 100644 index 0000000..99c37fc --- /dev/null +++ b/vignettes/ColocBoost_tutorial_basic.Rmd @@ -0,0 +1,107 @@ +--- +title: "ColocBoost Tutorial (Basic Usage)" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{ColocBoost Tutorial (Basic Usage)} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This tutorial will guide you through using `ColocBoost` with individual-level data, summary statistics data, or the combination of individual-level data and summary statistics. + + +```{r setup} +library(colocboost) +``` + +## Individual-level data only + +This tutorial demonstrates how to analyze individual-level data using `colocboost` package, specifically with the `Ind_5traits` dataset. Detailed information about the `Ind_5traits` dataset, which includes 5 simulated phenotypes alongside corresponding genotype matrices, is avaiable at `url`. The dataset is specifically designed to facilitate the identification of causal variants for complex traits. + +**Loading and Analyzing Data** + +To get started, load the `Ind_5traits` dataset into your R session. Once loaded, you can proceed with the analysis using the `colocboost` function. This function requires specifying genotypes `X` and phenotypes `Y` from the dataset: +```{r individual} +data("Ind_5traits") +res <- colocboost(X = Ind_5traits$X, Y = Ind_5traits$Y) +``` +This command initiates the colocalization analysis, applying the ColocBoost methodology to identify potential genetic intersections between phenotypes and their respective genotypes. + +**Results Exploration** + +After running the analysis, you can explore the results to identify colocalized variants and review the summary statistics. This output will provide insights into which variants are colocalized across the different phenotypes and offer a comprehensive overview of the statistical results from the colocalization analysis. + +```{r indResults} +res$coloc_results$coloc_csets$csets_index + +res$summary_table + +``` + +## Summary statistics only + +This tutorial demonstrates how to analyze summary statistics data using `colocboost` package, specifically with the `Sumstat_5traits` dataset. Detailed information about the `Sumstat_5traits` dataset, which includes the summary data for 5 simulated summary statistics and one LD matrix, where the summary data is directly caluculated using the marginal association from `Ind_5traits` data, is avaiable at `url`. This dataset is designed to facilitate the identification of causal variants for complex traits using summary statistics. + + +**Loading and Analyzing Data** +To get started, load the `Sumstat_5traits` dataset into your R session. Note: The `Sumstat_5traits` dataset includes only one LD matrix that applies to all traits. To demonstrate handling multiple traits, we replicate this single LD matrix for each trait as follows. To analyze the data using summary statistics, apply the colocboost function specifying the summary statistics and LD matrices. +```{r sumstat} +data("Sumstat_5traits") +LD <- lapply(1:5, function(i) Sumstat_5traits$LD) +res <- colocboost(sumstat = Sumstat_5traits$sumstat, LD = LD) +``` +*Note*: This step duplicates the single LD matrix into a list of five matrices, one for each trait. This is to mimic scenarios where different traits might have different LD structures. ColocBoost allows for the input of a single LD matrix if the LD across traits is consistent. For more advanced usage involving different LD matrices or more complex setups, please refer to the advanced tutorial (URL). + +**Results Exploration** (Consistent results obtained from individual-level) + +After running the analysis, you can explore the results to identify colocalized variants and review the summary statistics. This output will provide insights into which variants are colocalized across the different phenotypes and offer a comprehensive overview of the statistical results from the colocalization analysis. + +```{r sumstatResults} +res$coloc_results$coloc_csets$csets_index + +res$summary_table +``` +This section of the analysis provides insights into which variants are colocalized across the different phenotypes and offers a comprehensive overview of the statistical results from the colocalization analysis. + + + +## Mixture usage of individual-level data and summary statistics + +This tutorial provides a step-by-step guide on using both individual-level data and summary statistics within the `colocboost` package to perform multi-trait colocalization analysis. This approach is especially beneficial when comprehensive individual-level genotype and phenotype data is not available for all traits. + +**Loading and Analyzing Datasets** + +To get started, load both `Ind_5traits` and `Sumstat_5traits` datasets into your R session. Once loaded, we want to create a mixture usage datasets. For example, for traits 1,2,3, we use individual-level genotype and phenotype data; for traits 4 and 5, we use summary statistics and duplicated LD matrices. +```{r mixture} +data("Ind_5traits") +data("Sumstat_5traits") +X <- Ind_5traits$X[1:3] +Y <- Ind_5traits$Y[1:3] +sumstat <- Sumstat_5traits$sumstat[4:5] +LD <- lapply(1:2, function(i) Sumstat_5traits$LD) +``` +*Note*: This step duplicates the single LD matrix into a list of two matrices, one for each trait. This is to mimic scenarios where different traits might have different LD structures. ColocBoost allows for the input of a single LD matrix if the LD across traits is consistent. For more advanced usage involving different LD matrices or more complex setups, please refer to the advanced tutorial (URL). + +Once loaded, you can proceed with the analysis using the `colocboost` function. This function requires specifying genotypes `X` and phenotypes `Y` from the individual-level dataset and summary statistics `sumstat` and LD matrices `LD` from summary dataset: +```{r mixRun} +res <- colocboost(X = X, Y = Y, sumstat = sumstat, LD = LD) +``` + +**Results Exploration** (Consistent results obtained from both individual-level only and summary statistics only) + +After running the analysis, you can explore the results to identify colocalized variants and review the summary statistics. This output will provide insights into which variants are colocalized across the different phenotypes and offer a comprehensive overview of the statistical results from the colocalization analysis. +```{r mixResults} +res$coloc_results$coloc_csets$csets_index + +res$summary_table +``` +This section of the analysis provides insights into which variants are colocalized across the different phenotypes and offers a comprehensive overview of the statistical results from the colocalization analysis. + + diff --git a/vignettes/ColocBoost_tutorial_cbsummary.Rmd b/vignettes/ColocBoost_tutorial_cbsummary.Rmd new file mode 100644 index 0000000..d34d360 --- /dev/null +++ b/vignettes/ColocBoost_tutorial_cbsummary.Rmd @@ -0,0 +1,35 @@ +--- +title: "ColocBoost Tutortial (Summary of ColocBoost results)" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{ColocBoost Tutortial (Summary of ColocBoost results)} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +ColocBoost: Multi-omics xQTL colocalization improves the discovery of causal variants for complex diseases. + + + +```{r setup} +library(colocboost) +``` + + +## Get summary table (with or without target phenotype) + +## Plot ColocBoost results + +## Check others .....??? + + + + + diff --git a/vignettes/ColocBoost_tutorial_diagnostic.Rmd b/vignettes/ColocBoost_tutorial_diagnostic.Rmd new file mode 100644 index 0000000..7ed04b9 --- /dev/null +++ b/vignettes/ColocBoost_tutorial_diagnostic.Rmd @@ -0,0 +1,34 @@ +--- +title: "ColocBoost Tutortial (Diagnostic)" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{ColocBoost Tutortial (Diagnostic)} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +ColocBoost: Multi-omics xQTL colocalization improves the discovery of causal variants for complex diseases. + + + +```{r setup} +library(colocboost) +``` + + +## Diagnostic for the post-processing to filter the small effects + +## Diagnostic for add-hoc colocalized sets based on between-purity + +## Diagnostic for checking the updates in each gradient-boosting iteration + + + + diff --git a/vignettes/Install.Rmd b/vignettes/Install.Rmd new file mode 100644 index 0000000..af85f41 --- /dev/null +++ b/vignettes/Install.Rmd @@ -0,0 +1,61 @@ +--- +title: "Installation Instructions for ColocBoost" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Installation Instructions for ColocBoost} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +To install Seurat, [R](https://www.r-project.org/) version 4.0 or greater is required. We also recommend installing [R Studio](https://www.rstudio.com/). + + +Seurat is available on [CRAN](https://cran.r-project.org/package=colocboost)??? for all platforms. To install, run: + +```{r, eval = FALSE} +# Enter commands in R (or R studio, if installed) +install.packages('colocboost') +library(colocboost) +``` + +The required dependences are + +```{r, eval = FALSE} +if (!require("Rfast", quietly = TRUE)) + install.packages("Rfast") +if (!require("matrixStats", quietly = TRUE)) + install.packages("matrixStats") +if (!require("BiocManager", quietly = TRUE)) + install.packages("BiocManager") +BiocManager::install("qvalue") +``` + + +# Install the development version of ColocBoost from GitHub + +Install the development version of ColocBoost - directly from [GitHub](https://github.com/xueweic/colocboost). + +```{r eval = FALSE} +# Enter commands in R (or R studio, if installed) +# Install the remotes package +install.packages('remotes') +remotes::install_github(repo = 'xueweic/colocboost') +library(colocboost) +``` + + +# Docker??? + +We provide docker images for ColocBoost via [dockerhub](???). + +To pull the latest image from the command line: + +```sh +docker pull xueweic/colocboost:latest +``` + +To use as a base image in a new Dockerfile: + +```sh +FROM xueweic/colocboost:latest +``` diff --git a/vignettes/announcements.Rmd b/vignettes/announcements.Rmd new file mode 100644 index 0000000..b42258f --- /dev/null +++ b/vignettes/announcements.Rmd @@ -0,0 +1,12 @@ +--- +title: "News" +output: + html_document: + theme: united + df_print: kable +--- + +## **Initial release in ColocBoost** + +We are excited to release ColocBoost, where it is now the default version for new installs. +