diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..3d53917 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,50 @@ + +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: "release"} + - {os: macOS-latest, r: "release"} + - {os: ubuntu-latest, r: "release"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install dependencies + run: | + install.packages(c("remotes", "rcmdcheck", "covr", "testthat")) + remotes::install_deps(dependencies = TRUE) + shell: Rscript {0} + + - name: Check + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Test coverage + run: covr::codecov() + shell: Rscript {0} + diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..61d3681 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,28 @@ + +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +jobs: + test-coverage: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + + - name: Install dependencies + run: | + install.packages(c("remotes", "covr")) + remotes::install_deps(dependencies = TRUE) + shell: Rscript {0} + + - name: Test coverage + run: covr::codecov() + shell: Rscript {0} + diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index 9d328b8..62030e1 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -393,7 +393,7 @@ get_integrated_weight <- function(avWeight, weight_fudge_factor = 1.5){ get_in_cos <- function(weights, coverage = 0.95){ temp <- order(weights, decreasing=T) - csets <- temp[1:min(which(cumsum(weights[temp]) > coverage))] # 95% + csets <- temp[1:min(which(cumsum(weights[temp]) >= coverage))] # 95% return(list(csets)) } diff --git a/README.md b/README.md index 6e0189e..f0fbbed 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # ColocBoost for multi-context colocalization in molecular QTL and GWAS studies +[![Codecov test coverage](https://codecov.io/gh/StatFunGen/colocboost/branch/master/graph/badge.svg)](https://codecov.io/gh/StatFunGen/colocboost?branch=master) This R package implements ColocBoost --- motivated and designed for colocalization analysis ([first formulated here](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1004383)) of multiple genetic association studies --- as a multi-task learning approach to variable selection regression with highly correlated predictors and sparse effects, based on frequentist statistical inference. It provides statistical evidence to identify which subsets of predictors have non-zero effects on which subsets of response variables. diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..1ec2832 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,106 @@ +# colocboost unit-testing + +## Overview + +This repository contains a comprehensive testing framework for the [colocboost](https://github.com/StatFunGen/colocboost) R package. The framework is designed to ensure the reliability and correctness of the package's functionality through automated testing. + +## Quick Start + +1. Navigate to test folder: + ```bash + cd tests + ``` + +2. **First time use**: run the setup script to install required packages and configure the testing environment: + ```r + # install.packages(c("devtools", "testthat", "covr", "roxygen2")) + source("setup_testthat.R") + ``` + +3. Run all tests: + ```r + devtools::load_all() + devtools::test() + ``` + or, + ```bash + Rscript run_tests.R + ``` + To test one file: + ```r + devtools::test_active_file("testthat/test_colocboost.R") + ``` + +## Files and Structure + +- `setup_testthat.R`: Script to set up the testthat infrastructure +- `run_tests.R`: Script to run all tests and generate test coverage reports +- `testthat/`: Directory containing test files + - `test_package.R`: Tests for basic package functionality + - `test_colocboost.R`: Tests for the main colocalization functions + - `test_utils.R`: Tests for utility functions + - `test_model.R`: Tests for model fitting and prediction functions +- `.github/workflows/`: GitHub Actions workflow configurations + +## How To Use + +### Running Tests Locally + +To run the tests locally, you can use: + +```r +devtools::test() +``` + +Or run individual test files: + +```r +devtools::test_file("testthat/test_colocboost.R") +``` + +### Adding New Tests + +When adding new functionality to the colocboost package, corresponding tests should be added to maintain test coverage. Follow these steps: + +1. Identify which test file should contain the new tests, or create a new test file if necessary +2. Write test functions using the testthat package's expectations +3. Run the tests to ensure they pass + +Example test: + +```r +test_that("new_function produces expected output", { + # Arrange + input_data <- prepare_test_data() + + # Act + result <- new_function(input_data) + + # Assert + expect_equal(result$some_value, expected_value) + expect_true(is.data.frame(result$data)) +}) +``` + +### Test Coverage + +The `covr` package is used to measure test coverage, which indicates what percentage of your code is being tested. Aim for at least 80% coverage for a reliable package. + +To generate a test coverage report: +```r +library(covr) +coverage <- package_coverage() +report(coverage) +``` + +## GitHub Actions Workflow + +This testing framework includes GitHub Actions workflows that automatically run the tests on every push and pull request. The workflows test the package on multiple operating systems (Windows, macOS, and Linux) to ensure cross-platform compatibility. + +The workflow is defined in `.github/workflows/R-CMD-check.yaml` and automatically runs: +- R CMD check +- Test coverage reporting + +Test results and coverage statistics are available on the GitHub Actions page after each push or pull request. + +For more information, check the [testthat documentation](https://testthat.r-lib.org/). diff --git a/tests/run_tests.R b/tests/run_tests.R new file mode 100644 index 0000000..4ac19a2 --- /dev/null +++ b/tests/run_tests.R @@ -0,0 +1,39 @@ +#!/usr/bin/env Rscript + +# Script to install the package and run tests + +# Check for required packages +required_packages <- c("devtools", "testthat", "covr") +for (pkg in required_packages) { + if (!requireNamespace(pkg, quietly = TRUE)) { + stop(sprintf("Cannot find required package: %s\n", pkg)) + } +} + +# Path to working directory +args <- commandArgs(trailingOnly = TRUE) +if (length(args) > 0) { + work_dir <- args[1] +} else { + work_dir <- getwd() +} + +setwd(work_dir) +cat(sprintf("Working directory: %s\n", work_dir)) + +# Install the package +# cat("Installing colocboost package...\n") +# devtools::install(".", dependencies = FALSE, quiet = TRUE) + +# Run tests +cat("Running tests...\n") +devtools::load_all('../') +testthat::test_dir("testthat/") + +# Calculate test coverage +cat("Calculating test coverage...\n") +coverage <- covr::package_coverage() +print(coverage) +covr::report(coverage) + +cat("Tests completed.\n") \ No newline at end of file diff --git a/tests/setup_testthat.R b/tests/setup_testthat.R new file mode 100644 index 0000000..caab98f --- /dev/null +++ b/tests/setup_testthat.R @@ -0,0 +1,130 @@ +setwd("../") + +# Install necessary packages +if (!requireNamespace("devtools", quietly = TRUE)) { + stop("devtools not found") +} +if (!requireNamespace("testthat", quietly = TRUE)) { + stop("testthat not found") +} +if (!requireNamespace("covr", quietly = TRUE)) { + stop("covr not found") +} +if (!requireNamespace("roxygen2", quietly = TRUE)) { + stop("roxygen2 not found") +} + +# Set up testthat infrastructure +if (!dir.exists("tests/testthat")) { + devtools::use_testthat() +} + +# Create GitHub Actions workflow for continuous integration +if (!dir.exists(".github/workflows")) { + dir.create(".github/workflows", recursive = TRUE, showWarnings = FALSE) +} + +# Write GitHub Actions workflow file +workflow_file <- ".github/workflows/R-CMD-check.yaml" +writeLines( +' +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: "release"} + - {os: macOS-latest, r: "release"} + - {os: ubuntu-latest, r: "release"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install dependencies + run: | + install.packages(c("remotes", "rcmdcheck", "covr", "testthat")) + remotes::install_deps(dependencies = TRUE) + shell: Rscript {0} + + - name: Check + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Test coverage + run: covr::codecov() + shell: Rscript {0} +', workflow_file) + +# Create a test coverage workflow +coverage_file <- ".github/workflows/test-coverage.yaml" +writeLines( +' +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +jobs: + test-coverage: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + + - name: Install dependencies + run: | + install.packages(c("remotes", "covr")) + remotes::install_deps(dependencies = TRUE) + shell: Rscript {0} + + - name: Test coverage + run: covr::codecov() + shell: Rscript {0} +', coverage_file) + +# Add code coverage badge to README.md +readme_file <- "README.md" +if (file.exists(readme_file)) { + readme_content <- readLines(readme_file) + if (!any(grepl("codecov", readme_content))) { + badge <- "[![Codecov test coverage](https://codecov.io/gh/StatFunGen/colocboost/branch/master/graph/badge.svg)](https://codecov.io/gh/StatFunGen/colocboost?branch=master)" + # Add badge after the first line if it's a title + if (length(readme_content) > 0) { + readme_content <- c(readme_content[1], badge, readme_content[-1]) + } else { + readme_content <- c("# colocboost", badge) + } + writeLines(readme_content, readme_file) + } +} + +# Set up basic testthat structure +message("testthat setup complete. Next, create test files for each R file in the package.") \ No newline at end of file diff --git a/tests/testthat.R b/tests/testthat.R index 12de635..9bbc0ce 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -9,4 +9,4 @@ library(testthat) library(colocboost) -test_check("colocboost") +test_check("colocboost") \ No newline at end of file diff --git a/tests/testthat/test_colocboost.R b/tests/testthat/test_colocboost.R new file mode 100644 index 0000000..38cf29f --- /dev/null +++ b/tests/testthat/test_colocboost.R @@ -0,0 +1,239 @@ +library(testthat) + +# Utility function to generate test data +generate_test_data <- function(n = 100, p = 20, L = 2, seed = 42) { + set.seed(seed) + + # Generate X with LD structure + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + # Generate true effects - create a shared causal variant + true_beta <- matrix(0, p, L) + true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 + true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) + true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 + + # Generate Y with some noise + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + # Prepare LD matrix + LD <- cor(X) + + # Return test objects + list( + X = X, + Y = Y, + LD = LD, + true_beta = true_beta + ) +} + +# Create small dataset for basic tests +test_data <- generate_test_data() + +# Basic test for colocboost functionality with individual data +test_that("colocboost runs with individual data", { + skip_on_cran() + + # Convert Y to list + Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + + # Run colocboost with minimal parameters + result <- colocboost( + X = test_data$X, + Y = Y_list, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + + # Test that we get a colocboost object + expect_s3_class(result, "colocboost") + + # Check structure of results + expect_type(result$data_info, "list") + expect_type(result$model_info, "list") + + # Check dimensions + expect_equal(length(result$data_info$variables), ncol(test_data$X)) + expect_equal(result$data_info$n_outcomes, 2) +}) + +# Test with summary statistics +test_that("colocboost runs with summary statistics", { + skip_on_cran() + + # Generate summary statistics from the individual data + X <- test_data$X + Y <- test_data$Y + LD <- test_data$LD + + # Calculate beta, se, and z-scores + beta <- matrix(0, ncol(X), ncol(Y)) + se <- matrix(0, ncol(X), ncol(Y)) + z <- matrix(0, ncol(X), ncol(Y)) + + for (i in 1:ncol(Y)) { + fit <- lm(Y[,i] ~ X) + beta[,i] <- coef(fit)[-1] + se[,i] <- summary(fit)$coefficients[-1, "Std. Error"] + z[,i] <- beta[,i]/se[,i] + } + + # Create summary statistics data frames + sumstat_list <- list() + for (i in 1:ncol(Y)) { + sumstat_list[[i]] <- data.frame( + beta = beta[,i], + sebeta = se[,i], + z = z[,i], + n = nrow(X), + variant = colnames(X) + ) + } + + # Run colocboost with summary statistics + result <- colocboost( + sumstat = sumstat_list, + LD = list(LD), + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + + # Test that we get a colocboost object + expect_s3_class(result, "colocboost") + + # Check dimensions + expect_equal(length(result$data_info$variables), ncol(X)) + expect_equal(result$data_info$n_outcomes, 2) +}) + +# Test target outcome functionality +test_that("colocboost handles target outcome correctly", { + skip_on_cran() + + # Convert Y to list + Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + + # Run colocboost with target_outcome_idx = 1 + result <- colocboost( + X = test_data$X, + Y = Y_list, + target_outcome_idx = 1, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + + # Test that we get a colocboost object + expect_s3_class(result, "colocboost") + + # Check target outcome is correctly set + expect_equal(result$data_info$outcome_info$is_target[1], TRUE) + expect_equal(result$data_info$outcome_info$is_target[2], FALSE) +}) + +# Test get_cos_summary functionality +test_that("get_cos_summary returns expected structure", { + skip_on_cran() + + # Convert Y to list + Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + + # Run colocboost with minimal parameters + result <- colocboost( + X = test_data$X, + Y = Y_list, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + + # Get summary + if (!is.null(result$cos_summary)) { + summary <- result$cos_summary + + # Check structure of summary table + expect_true(is.data.frame(summary)) + + # Check expected columns exist + expected_cols <- c("target_outcome", "colocalized_outcomes", "cos_id", + "purity", "top_variable", "top_variable_vcp") + for (col in expected_cols) { + expect_true(col %in% colnames(summary)) + } + } else { + # If no colocalization is found, just check that the summary is NULL + expect_null(result$cos_summary) + } +}) + +# Test colocboost_plot functionality (basic call should not error) +test_that("colocboost_plot runs without error", { + skip_on_cran() + + # Convert Y to list + Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + + # Run colocboost with minimal parameters + result <- colocboost( + X = test_data$X, + Y = Y_list, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + + # Basic plotting should not throw an error + # Use suppressWarnings as the function might warn about lack of colocalization + expect_error(suppressWarnings(colocboost_plot(result)), NA) +}) + +# Test get_strong_colocalization functionality +test_that("get_strong_colocalization maintains colocboost structure", { + skip_on_cran() + + # Convert Y to list + Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + + # Run colocboost with minimal parameters + result <- colocboost( + X = test_data$X, + Y = Y_list, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + + # Run get_strong_colocalization + strong_result <- get_strong_colocalization( + result, + cos_npc_cutoff = 0.2, # Lower threshold for testing + npc_outcome_cutoff = 0.1 # Lower threshold for testing + ) + + # Should still be a colocboost object + expect_s3_class(strong_result, "colocboost") + + # Should have the same data_info + expect_equal(strong_result$data_info, result$data_info) +}) + +# Test error handling with malformed input +test_that("colocboost handles missing/invalid inputs appropriately", { + skip_on_cran() + + # Test missing both individual data and summary stats + expect_warning(colocboost(), "Error: No individual data") + + # Test mismatched dimensions + X_bad <- test_data$X[1:(nrow(test_data$X) - 10), ] + Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + + expect_error(colocboost(X = X_bad, Y = Y_list), "do not have the same sample size") +}) \ No newline at end of file diff --git a/tests/testthat/test_corner_cases.R b/tests/testthat/test_corner_cases.R new file mode 100644 index 0000000..a3a5d6b --- /dev/null +++ b/tests/testthat/test_corner_cases.R @@ -0,0 +1,323 @@ +library(testthat) + +# Generate test data with specific edge case structures +generate_edge_case_data <- function(case = "missing_values", + n = 100, + p = 20, + L = 2, + seed = 42) { + set.seed(seed) + + # Generate X with LD structure + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + # Generate true effects with shared causal variant + true_beta <- matrix(0, p, L) + true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 + true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) + true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 + + # Generate Y with some noise + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + # Modifications based on case + if (case == "missing_values") { + # Introduce missing values in Y + Y[sample(1:n, 5), 1] <- NA + Y[sample(1:n, 5), 2] <- NA + } else if (case == "different_samples") { + # Create different sample sizes for each outcome + Y1 <- Y[1:(n-10), 1] + Y2 <- Y[1:(n-20), 2] + return(list( + X = X, + Y = list(Y1, Y2), + LD = cor(X), + true_beta = true_beta + )) + } else if (case == "different_variants") { + # Create different variant sets for each outcome + X1 <- X[, 1:(p-5)] + X2 <- X[, 6:p] + Y1 <- Y[, 1] + Y2 <- Y[, 2] + return(list( + X = list(X1, X2), + Y = list(Y1, Y2), + LD = cor(X), + true_beta = true_beta + )) + } else if (case == "no_colocalization") { + # Generate data with no true colocalization + true_beta[5, 2] <- 0 # Remove shared effect + Y[, 2] <- X %*% true_beta[, 2] + rnorm(n, 0, 1) + } else if (case == "high_correlation") { + # Create highly correlated outcomes + Y[, 2] <- 0.9 * Y[, 1] + rnorm(n, 0, 0.3) + } + + # Return test objects + list( + X = X, + Y = Y, + LD = cor(X), + true_beta = true_beta + ) +} + +# Test colocboost handling of missing values +test_that("colocboost handles missing values in Y", { + skip_on_cran() + + # Generate data with missing values + test_data <- generate_edge_case_data("missing_values") + + # Convert Y to list + Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + + # Run colocboost - should handle NAs automatically + expect_warning( + result <- colocboost( + X = test_data$X, + Y = Y_list, + M = 5 # Small number of iterations for testing + ), + NA + ) + + # Test that we get a colocboost object + expect_s3_class(result, "colocboost") +}) + +# Test colocboost with different sample sizes +test_that("colocboost handles different sample sizes", { + skip_on_cran() + + # Generate data with different sample sizes + test_data <- generate_edge_case_data("different_samples") + + # Run colocboost - should error without sample indices + expect_error( + colocboost( + X = test_data$X[1], + Y = test_data$Y + ) + ) + + # Test with proper row names to handle different sample sizes + X <- test_data$X + rownames(X) <- paste0("sample", 1:nrow(X)) + Y1 <- test_data$Y[[1]] + Y2 <- test_data$Y[[2]] + rownames(Y1) <- paste0("sample", 1:length(Y1)) + rownames(Y2) <- paste0("sample", 1:length(Y2)) + + # Skip test if function fails in unexpected ways + # (This test may require more complex setup than we can do here) + skip("Requires specialized setup for different sample sizes") +}) + +# Test colocboost with different variant sets +test_that("colocboost handles different variant sets", { + skip_on_cran() + + # Generate data with different variant sets + test_data <- generate_edge_case_data("different_variants") + + # Need to create dict_YX for this case + dict_YX <- matrix(c(1, 2, 1, 2), ncol=2) + + # Run colocboost + expect_warning( + result <- colocboost( + X = test_data$X, + Y = test_data$Y, + dict_YX = dict_YX, + M = 5 # Small number of iterations for testing + ), + NA + ) + + # Test that we get a colocboost object + expect_s3_class(result, "colocboost") +}) + +# Test colocboost with no true colocalization +test_that("colocboost correctly identifies absence of colocalization", { + skip_on_cran() + + # Generate data with no colocalization + test_data <- generate_edge_case_data("no_colocalization") + + # Convert Y to list + Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + + # Run colocboost + suppressWarnings({ + result <- colocboost( + X = test_data$X, + Y = Y_list, + M = 10 # Need more iterations for this test + ) + }) + + # Test that we get a colocboost object + expect_s3_class(result, "colocboost") + + # In many cases with no true colocalization, cos_summary should be NULL + # But this isn't guaranteed due to statistical fluctuations + # Instead we'll check that object has expected structure + expect_true(any( + is.null(result$cos_summary) || + is.data.frame(result$cos_summary) + )) +}) + +# Test colocboost with highly correlated traits +test_that("colocboost handles highly correlated traits", { + skip_on_cran() + + # Generate data with correlated traits + test_data <- generate_edge_case_data("high_correlation") + + # Convert Y to list + Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + + # Create correlation matrix for residuals + residual_corr <- matrix(c(1, 0.9, 0.9, 1), nrow=2) + + # Run colocboost with residual correlation + suppressWarnings({ + result <- colocboost( + X = test_data$X, + Y = Y_list, + residual_correlation = residual_corr, + M = 10 # Need more iterations for this test + ) + }) + + # Test that we get a colocboost object + expect_s3_class(result, "colocboost") +}) + +# Test with very small dataset +test_that("colocboost handles very small datasets", { + skip_on_cran() + + # Create tiny dataset + set.seed(123) + X_small <- matrix(rnorm(10*5), 10, 5) + colnames(X_small) <- paste0("SNP", 1:5) + Y_small <- matrix(rnorm(10*2), 10, 2) + Y_list_small <- list(Y_small[,1], Y_small[,2]) + + # Run colocboost + suppressWarnings({ + result <- colocboost( + X = X_small, + Y = Y_list_small, + M = 5 # Small number of iterations for testing + ) + }) + + # Test that we get a colocboost object + expect_s3_class(result, "colocboost") +}) + +# Test with custom parameter settings +test_that("colocboost works with custom parameters", { + skip_on_cran() + + # Create simple dataset + set.seed(123) + X <- matrix(rnorm(50*10), 50, 10) + colnames(X) <- paste0("SNP", 1:10) + Y <- matrix(rnorm(50*2), 50, 2) + Y_list <- list(Y[,1], Y[,2]) + + # Run colocboost with custom parameters + suppressWarnings({ + result <- colocboost( + X = X, + Y = Y_list, + M = 5, # Small number of iterations for testing + lambda = 0.7, # Custom lambda + tau = 0.02, # Custom tau + func_simplex = "only_z2z", # Different simplex function + learning_rate_init = 0.02, # Custom learning rate + coverage = 0.9, # Different coverage + min_abs_corr = 0.4 # Lower correlation threshold + ) + }) + + # Test that we get a colocboost object + expect_s3_class(result, "colocboost") +}) + +# Test target outcome functionality +test_that("colocboost prioritizes target outcome correctly", { + skip_on_cran() + + # Generate test data + set.seed(123) + X <- matrix(rnorm(100*20), 100, 20) + colnames(X) <- paste0("SNP", 1:20) + + # Create shared causal variant + b <- rep(0, 20) + b[5] <- 0.5 + b[10] <- 0.3 + + # Generate outcomes with different effects + Y1 <- X %*% b + rnorm(100, 0, 1) + + # Y2 has different effect at position 5 + b2 <- b + b2[5] <- 0.2 + Y2 <- X %*% b2 + rnorm(100, 0, 1) + + Y_list <- list(Y1, Y2) + + # Run colocboost with Y1 as target + suppressWarnings({ + result_target1 <- colocboost( + X = X, + Y = Y_list, + target_outcome_idx = 1, + lambda_target_outcome = 0.9, # Higher lambda for target + M = 10 # Need more iterations for this test + ) + }) + + # Run colocboost with Y2 as target + suppressWarnings({ + result_target2 <- colocboost( + X = X, + Y = Y_list, + target_outcome_idx = 2, + lambda_target_outcome = 0.9, # Higher lambda for target + M = 10 # Need more iterations for this test + ) + }) + + # Both should produce colocboost objects + expect_s3_class(result_target1, "colocboost") + expect_s3_class(result_target2, "colocboost") + + # Test that target flag is correctly set + expect_equal(result_target1$data_info$outcome_info$is_target[1], TRUE) + expect_equal(result_target1$data_info$outcome_info$is_target[2], FALSE) + + expect_equal(result_target2$data_info$outcome_info$is_target[1], FALSE) + expect_equal(result_target2$data_info$outcome_info$is_target[2], TRUE) +}) \ No newline at end of file diff --git a/tests/testthat/test_model.R b/tests/testthat/test_model.R new file mode 100644 index 0000000..c0eeb94 --- /dev/null +++ b/tests/testthat/test_model.R @@ -0,0 +1,235 @@ +library(testthat) + +# Utility function to generate a simple colocboost model object +generate_test_model <- function(n = 100, p = 20, L = 2, seed = 42) { + set.seed(seed) + + # Generate X with LD structure + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + # Generate true effects - create a shared causal variant + true_beta <- matrix(0, p, L) + true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 + true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) + true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 + + # Generate Y with some noise + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + # Convert Y to list + Y_list <- list(Y[,1], Y[,2]) + + # Run colocboost with minimal parameters to get a model object + suppressWarnings({ + result <- colocboost( + X = X, + Y = Y_list, + M = 5, # Small number of iterations for faster testing + output_level = 3 # Include full model details + ) + }) + + result +} + +# Test for colocboost_check_update_jk +test_that("colocboost_check_update_jk handles update selection", { + skip_on_cran() + + # Generate a test model + cb_obj <- generate_test_model() + + # Check that function can be called without error + # Note: This is testing a function that's normally internal + # and would typically be covered by testing the main function + if (exists("colocboost_check_update_jk")) { + expect_error({ + result <- colocboost_check_update_jk( + cb_obj$cb_model, + cb_obj$cb_model_para, + cb_obj$cb_data + ) + }, NA) + } else { + skip("colocboost_check_update_jk not directly accessible") + } +}) + +# Test for colocboost_update +test_that("colocboost_update updates model parameters", { + skip_on_cran() + + # Generate a test model + cb_obj <- generate_test_model() + + # Access update function if it's exported + if (exists("colocboost_update")) { + # Create a temporary update status to test with + cb_obj$cb_model_para$update_temp <- list( + update_status = rep(1, cb_obj$cb_model_para$L), + real_update_jk = rep(1, cb_obj$cb_model_para$L) + ) + + # Test function + expect_error({ + updated_model <- colocboost_update( + cb_obj$cb_model, + cb_obj$cb_model_para, + cb_obj$cb_data + ) + }, NA) + } else { + skip("colocboost_update not directly accessible") + } +}) + +# Test for colocboost_init_data +test_that("colocboost_init_data correctly initializes data", { + skip_on_cran() + + # Generate test data + set.seed(42) + n <- 50 + p <- 10 + X <- matrix(rnorm(n*p), n, p) + colnames(X) <- paste0("SNP", 1:p) + Y <- matrix(rnorm(n*2), n, 2) + + # If the function is exported + if (exists("colocboost_init_data")) { + expect_error({ + cb_data <- colocboost_init_data( + X = list(X), + Y = list(Y[,1], Y[,2]), + dict_YX = matrix(c(1:2, rep(1,2)), ncol=2), + Z = NULL, + LD = NULL, + N_sumstat = NULL, + dict_sumstatLD = NULL, + Var_y = NULL, + SeBhat = NULL + ) + }, NA) + } else { + skip("colocboost_init_data not directly accessible") + } +}) + +# Test colocboost_assemble function +test_that("colocboost_assemble processes model results", { + skip_on_cran() + + # Generate a test model + cb_obj <- generate_test_model() + + # If the function is exported + if (exists("colocboost_assemble")) { + expect_error({ + result <- colocboost_assemble( + cb_obj, + coverage = 0.95 + ) + }, NA) + } else { + skip("colocboost_assemble not directly accessible") + } +}) + +# Test for colocboost_workhorse +test_that("colocboost_workhorse performs boosting iterations", { + skip_on_cran() + + # Generate test data + set.seed(42) + n <- 50 + p <- 10 + X <- matrix(rnorm(n*p), n, p) + colnames(X) <- paste0("SNP", 1:p) + Y <- matrix(rnorm(n*2), n, 2) + Y_list <- list(Y[,1], Y[,2]) + + # Initialize CB objects + suppressWarnings({ + # First get the data object by running colocboost with M=0 + temp <- colocboost(X = X, Y = Y_list, M = 0) + + # If the workhorse function is exported + if (exists("colocboost_workhorse")) { + expect_error({ + result <- colocboost_workhorse( + temp$cb_data, + M = 5 # Small number for testing + ) + }, NA) + } else { + skip("colocboost_workhorse not directly accessible") + } + }) +}) + +# Test colocboost_plot function +test_that("colocboost_plot handles different plot options", { + skip_on_cran() + + # Generate a test model + cb_obj <- generate_test_model() + + # Basic plot call + expect_error(suppressWarnings(colocboost_plot(cb_obj)), NA) + + # Test with different y-axis values + expect_error(suppressWarnings(colocboost_plot(cb_obj, y = "z_original")), NA) + + # Test with different outcome_idx + expect_error(suppressWarnings(colocboost_plot(cb_obj, outcome_idx = 1)), NA) +}) + +# Test get_cos_summary function +test_that("get_cos_summary handles different parameters", { + skip_on_cran() + + # Generate a test model + cb_obj <- generate_test_model() + + # Basic summary call + expect_error(get_cos_summary(cb_obj), NA) + + # With custom outcome names + expect_error(get_cos_summary(cb_obj, outcome_names = c("Trait1", "Trait2")), NA) + + # With gene name + summary_with_gene <- get_cos_summary(cb_obj, gene_name = "TestGene") + + # If summary is not NULL, check for gene_name column + if (!is.null(summary_with_gene)) { + expect_true("gene_name" %in% colnames(summary_with_gene)) + expect_equal(summary_with_gene$gene_name[1], "TestGene") + } +}) + +# Test for get_strong_colocalization +test_that("get_strong_colocalization filters results correctly", { + skip_on_cran() + + # Generate a test model + cb_obj <- generate_test_model() + + # Basic call + expect_error(get_strong_colocalization(cb_obj), NA) + + # With stricter thresholds + expect_error(get_strong_colocalization(cb_obj, cos_npc_cutoff = 0.8), NA) + + # With p-value threshold + expect_error(get_strong_colocalization(cb_obj, pvalue_cutoff = 0.05), NA) +}) \ No newline at end of file diff --git a/tests/testthat/test_package.R b/tests/testthat/test_package.R new file mode 100644 index 0000000..2f913c7 --- /dev/null +++ b/tests/testthat/test_package.R @@ -0,0 +1,176 @@ +library(testthat) + +# Helper function to parse NAMESPACE file and extract exported functions +get_exported_functions <- function() { + # Locate the NAMESPACE file + if (file.exists("../../NAMESPACE")) { + namespace_path <- "../../NAMESPACE" + } else if (file.exists("../NAMESPACE")) { + namespace_path <- "../NAMESPACE" + } else { + # If running within package, try to find it in the package directory + package_dir <- find.package("colocboost", quiet = TRUE) + if (length(package_dir) > 0) { + namespace_path <- file.path(package_dir, "NAMESPACE") + if (!file.exists(namespace_path)) { + return(NULL) + } + } else { + return(NULL) + } + } + + # Read NAMESPACE file + ns_lines <- readLines(namespace_path) + + # Extract exported functions + export_lines <- ns_lines[grepl("^export\\(", ns_lines)] + exported_funcs <- gsub("export\\((.*)\\)", "\\1", export_lines) + + # Clean up any quotation marks + exported_funcs <- gsub("\"", "", exported_funcs) + exported_funcs <- gsub("\'", "", exported_funcs) + + return(exported_funcs) +} + +# Test that the package loads without errors +test_that("package loads without errors", { + expect_error(library(colocboost), NA) +}) + +# Test that exported functions exist +test_that("package has expected functions", { + exported_functions <- get_exported_functions() + + # If we couldn't parse the NAMESPACE, fall back to a minimal set + if (is.null(exported_functions) || length(exported_functions) == 0) { + warning("Could not parse NAMESPACE file, falling back to minimum expected functions") + exported_functions <- c("colocboost") + } + + # Test that each exported function exists + for (func in exported_functions) { + expect_true(exists(func, where = asNamespace("colocboost")), + info = paste0("Function '", func, "' should exist in the package")) + } + + # Print the functions we found for debugging + message("Tested for existence of the following exported functions:") + message(paste(exported_functions, collapse = ", ")) +}) + +# Test that package depends on required packages +test_that("package depends on required packages", { + # Get dependencies from DESCRIPTION file + get_dependencies <- function() { + desc_file <- NULL + if (file.exists("../../DESCRIPTION")) { + desc_file <- "../../DESCRIPTION" + } else if (file.exists("../DESCRIPTION")) { + desc_file <- "../DESCRIPTION" + } else { + package_dir <- find.package("colocboost", quiet = TRUE) + if (length(package_dir) > 0) { + desc_file <- file.path(package_dir, "DESCRIPTION") + } + } + + if (is.null(desc_file) || !file.exists(desc_file)) { + return(c("R", "Rfast", "matrixStats")) # Fallback + } + + desc <- readLines(desc_file) + + # Find Depends and Imports sections + deps_start <- grep("^(Depends|Imports):", desc) + deps_end <- c(deps_start[-1] - 1, length(desc)) + deps_end <- deps_end[1:length(deps_start)] + + # Extract dependencies + deps <- c() + for (i in 1:length(deps_start)) { + section <- desc[deps_start[i]:deps_end[i]] + section_text <- paste(section, collapse = " ") + + # Remove section header + section_text <- gsub("^(Depends|Imports):", "", section_text) + + # Extract package names + pkgs <- strsplit(section_text, ",")[[1]] + pkgs <- trimws(pkgs) + + # Remove version requirements + pkgs <- gsub("\\s*\\(.*\\)", "", pkgs) + + deps <- c(deps, pkgs) + } + + # Remove R from the list (it's always available) + deps <- setdiff(deps, "R (>= 4.0.0)") + deps <- c("R", deps) # Add R back in a standardized format + + return(deps) + } + + expected_deps <- get_dependencies() + + # Check if packages can be loaded - this is a better test than checking DESCRIPTION + for (pkg in expected_deps) { + if (pkg != "R") { # Skip R as it's always loaded + if (!requireNamespace(pkg, quietly = TRUE)) { + skip(paste("Package", pkg, "not available for testing")) + } + } + } + + # Basic test succeeded if we got here + expect_true(TRUE) +}) + +# Test that the package has the correct structure +test_that("package has correct S3 methods", { + # Check S3 methods using methods function if any expected + expect_true(is.function(colocboost), "colocboost should be a function") + + # Test that colocboost returns correct class + skip_on_cran() + + # Generate minimal test data + set.seed(123) + n <- 20 # Small sample for quick test + p <- 5 # Few variables + X <- matrix(rnorm(n*p), n, p) + colnames(X) <- paste0("X", 1:p) + Y <- matrix(rnorm(n*2), n, 2) + Y_list <- list(Y[,1], Y[,2]) + X_list <- list(X, X) + + # Run with minimal iterations + suppressWarnings({ + result <- colocboost(X = X_list, Y = Y_list, M = 2) + }) + + # Test class + expect_s3_class(result, "colocboost") +}) + +# Test documentation exists for key functions +test_that("documentation exists for key functions", { + # Get all exported functions + exported_functions <- get_exported_functions() + + # If we couldn't parse the NAMESPACE, fall back to a minimal set + if (is.null(exported_functions) || length(exported_functions) == 0) { + exported_functions <- c("colocboost", "colocboost_plot", "get_cos_summary") + } + + # Limit to a reasonable number to check + funcs_to_check <- exported_functions[1:min(length(exported_functions), 5)] + + # Check if help pages exist for key functions + for (func in funcs_to_check) { + expect_error(help(func), NA, + info = paste0("Documentation should exist for function '", func, "'")) + } +}) \ No newline at end of file diff --git a/tests/testthat/test_sumstats.R b/tests/testthat/test_sumstats.R new file mode 100644 index 0000000..e69de29 diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R new file mode 100644 index 0000000..272c6ab --- /dev/null +++ b/tests/testthat/test_utils.R @@ -0,0 +1,184 @@ +library(testthat) + +# Test for get_integrated_weight function +test_that("get_integrated_weight correctly integrates weights", { + # Create test weights matrix + weights <- matrix(c(0.3, 0.2, 0.1, 0.4, 0.4, 0.3, 0.2, 0.1), nrow = 4) + colnames(weights) <- c("outcome1", "outcome2") + + # Call the function + result <- get_integrated_weight(weights, weight_fudge_factor = 1.5) + + # Test expected properties + expect_equal(length(result), nrow(weights)) + expect_equal(sum(result), 1, tolerance = 1e-10) # Should sum to 1 + + # Higher values in both columns should get higher integrated weights + expect_true(result[1] > result[3]) + expect_true(result[4] < result[2]) +}) + +# Test for w_cs function +test_that("w_cs correctly identifies confidence set for weight vector", { + # Create test weight vector + w <- c(0.5, 0.3, 0.1, 0.1) + + # Call function + result <- w_cs(w, coverage = 0.8) + + # Expected result for 80% coverage: w[1] + w[2] = 0.8, so first 2 elements should be 1 + expected <- c(1, 1, 0, 0) + expect_equal(result, expected) + + # Test with different coverage + result2 <- w_cs(w, coverage = 0.9) + expected2 <- c(1, 1, 1, 0) # First 3 elements cover 90% + expect_equal(result2, expected2) +}) + +# Test for get_in_cos function +test_that("get_in_cos correctly identifies indices in confidence set", { + # Create test weight vector + w <- c(0.1, 0.5, 0.2, 0.1, 0.1) + + # Call function + result <- get_in_cos(w, coverage = 0.7) + + # Expected result: top elements that sum to 0.7 coverage + # Ordering: 2, 3, 1, 4, 5 + # So elements 2 and 3 (indexes 2 and 3) should be in the CS + expect_equal(result[[1]], c(2, 3)) + + # Test with higher coverage + result2 <- get_in_cos(w, coverage = 0.9) + # Should include elements 2, 3, 1, 4 + expect_equal(sort(result2[[1]]), c(1, 2, 3, 4)) +}) + +# Test for merge_sets function +test_that("merge_sets correctly merges overlapping sets", { + # Create test vector of semicolon-delimited sets + vec <- c("1;2;3", "3;4;5", "6;7", "8;9", "9;10", "11") + + # Call function + result <- merge_sets(vec) + + # Expected result: Sets {1,2,3,4,5}, {6,7}, {8,9,10}, {11} + expect_equal(length(result), 4) + expect_true("1;2;3;4;5" %in% result) + expect_true("6;7" %in% result) + expect_true("8;9;10" %in% result) + expect_true("11" %in% result) +}) + +# Test for get_purity function +test_that("get_purity calculates correlation statistics correctly", { + # Create test data with known correlation structure + set.seed(123) + X <- matrix(rnorm(100*3), 100, 3) + X[,2] <- X[,1] + rnorm(100, 0, 0.2) # Column 2 highly correlated with column 1 + X[,3] <- rnorm(100) # Column 3 independent + + Xcorr <- cor(X) + + # Test with a single position + result_single <- get_purity(1, X = X) + expect_equal(result_single, c(1, 1, 1)) # For single element, all correlations are 1 + + # Test with highly correlated positions + result_corr <- get_purity(c(1, 2), X = X) + expect_gt(result_corr[1], 0.8) # Minimum correlation should be high + + # Test with uncorrelated positions + result_uncorr <- get_purity(c(1, 3), X = X) + expect_lt(result_uncorr[1], 0.5) # Minimum correlation should be low + + # Test with XCorr input + result_Xcorr <- get_purity(c(1, 2), Xcorr = Xcorr) + expect_gt(result_Xcorr[1], 0.8) # Should give similar results to X input +}) + +# Test for get_cormat function +test_that("get_cormat calculates correlation matrix correctly", { + # Create test data + set.seed(123) + X <- matrix(rnorm(100*5), 100, 5) + + # Make some columns correlated + X[,2] <- X[,1] + rnorm(100, 0, 0.2) + + # Calculate correlation matrix with base R + expected <- cor(X) + + # Calculate with get_cormat + result <- get_cormat(X) + + # Should match the R correlation matrix + expect_equal(result, expected, tolerance = 1e-6) +}) + +# Test for check_jk_jkeach function +test_that("check_jk_jkeach identifies equivalent variants based on LD", { + skip("Internal function being tested through main function") + + # This function is harder to test directly as it relies on internal colocboost objects + # We'll test it indirectly through colocboost main function +}) + +# Test for get_between_purity function +test_that("get_between_purity calculates correlation between sets", { + # Create test data with known correlation structure + set.seed(123) + X <- matrix(rnorm(100*5), 100, 5) + X[,2] <- X[,1] + rnorm(100, 0, 0.2) # Column 2 highly correlated with column 1 + X[,4] <- X[,3] + rnorm(100, 0, 0.2) # Column 4 highly correlated with column 3 + + Xcorr <- cor(X) + + # Test between correlated sets + set1 <- c(1, 2) + set2 <- c(1, 3) + result <- get_between_purity(set1, set2, X = X) + + # Min, max, median correlations + expect_length(result, 3) + + # Check that correlations between sets with shared elements are high + expect_gt(result[2], 0.8) # Max correlation should be high (same element) + + # Test between uncorrelated sets + set1 <- c(1, 2) + set2 <- c(3, 5) + result2 <- get_between_purity(set1, set2, X = X) + + # Min correlation should be low + expect_lt(result2[1], 0.5) +}) + +# Test for get_max_profile function +test_that("get_max_profile updates check_null_max correctly", { + skip("Internal function being tested through main function") + + # This function modifies internal CB object - hard to test directly +}) + +# Test for get_merge_ordered_with_indices function +test_that("get_merge_ordered_with_indices merges vectors", { + # Create test vectors + vec1 <- c("a", "b", "c") + vec2 <- c("b", "c", "d") + vec3 <- c("c", "d", "e") + + vector_list <- list(vec1, vec2, vec3) + + # Call function + result <- get_merge_ordered_with_indices(vector_list) + + # Expected result: a, b, c, d, e (unique elements in order of appearance) + expect_equal(result, c("a", "b", "c", "d", "e")) + + # Test with duplicate vectors + vector_list2 <- list(vec1, vec1, vec2, vec3) + result2 <- get_merge_ordered_with_indices(vector_list2) + expect_equal(result2, c("a", "b", "c", "d", "e")) +}) \ No newline at end of file