diff --git a/.Rbuildignore b/.Rbuildignore index e9c6505..0f64d8f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,6 +7,7 @@ ^\.git ^\.github ^\.gitignore +^\.covrignore ^codecov.yml ^src/Makevars$ ^tests/testthat/__snaps$ diff --git a/.github/recipe/recipe.yaml b/.github/recipe/recipe.yaml index 6067918..45f1046 100644 --- a/.github/recipe/recipe.yaml +++ b/.github/recipe/recipe.yaml @@ -17,8 +17,12 @@ build: requirements: host: - r-base + - r-matrixstats + - r-rfast run: - r-base + - r-matrixstats + - r-rfast tests: - script: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7193d8..5637fc9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE} - name: Setup pixi - uses: prefix-dev/setup-pixi@v0.8.3 + uses: prefix-dev/setup-pixi@v0.8.4 - name: Run unit tests run: pixi run --environment ${{ matrix.environment }} devtools_test @@ -57,7 +57,7 @@ jobs: run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE} - name: Setup pixi - uses: prefix-dev/setup-pixi@v0.8.3 + uses: prefix-dev/setup-pixi@v0.8.4 - name: Run unit tests run: pixi run --environment ${{ matrix.environment }} devtools_test @@ -83,7 +83,7 @@ jobs: run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE} - name: Setup pixi - uses: prefix-dev/setup-pixi@v0.8.3 + uses: prefix-dev/setup-pixi@v0.8.4 - name: Run unit tests run: pixi run --environment ${{ matrix.environment }} devtools_test diff --git a/.github/workflows/conda_build.yml b/.github/workflows/conda_build.yml index 9a8fa5f..0bab60e 100644 --- a/.github/workflows/conda_build.yml +++ b/.github/workflows/conda_build.yml @@ -58,7 +58,7 @@ jobs: tarBall: true - name: Setup pixi - uses: prefix-dev/setup-pixi@v0.8.3 + uses: prefix-dev/setup-pixi@v0.8.4 with: run-install: false diff --git a/.github/workflows/merge.yml b/.github/workflows/merge.yml index 8087afe..06a664c 100644 --- a/.github/workflows/merge.yml +++ b/.github/workflows/merge.yml @@ -27,7 +27,7 @@ jobs: run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE} - name: Setup pixi - uses: prefix-dev/setup-pixi@v0.8.3 + uses: prefix-dev/setup-pixi@v0.8.4 # - name: Check unit test code coverage # run: pixi run codecov diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9404d87..8cece18 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -43,7 +43,7 @@ jobs: run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE} - name: Setup pixi - uses: prefix-dev/setup-pixi@v0.8.3 + uses: prefix-dev/setup-pixi@v0.8.4 - name: Update version run: | diff --git a/.github/workflows/update-documentation.yml b/.github/workflows/update-documentation.yml index 2d90d66..30118d9 100644 --- a/.github/workflows/update-documentation.yml +++ b/.github/workflows/update-documentation.yml @@ -24,7 +24,7 @@ jobs: run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE} - name: Setup pixi - uses: prefix-dev/setup-pixi@v0.8.3 + uses: prefix-dev/setup-pixi@v0.8.4 - name: Run unit tests run: pixi run devtools_document diff --git a/DESCRIPTION b/DESCRIPTION index 4dd63af..0586bef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: colocboost Type: Package Date: 2024-09-01 -Title: Multi-context colocalization analysis tool for molecular QTL and GWAS studies +Title: Multi-Context Colocalization Analysis Tool for Molecular QTL and GWAS Studies Version: 0.1.0 Authors@R: c( person(given = "Xuewei", family = "Cao", email = "xc2270@cumc.columbia.edu", role = c("cre", "aut")), @@ -12,7 +12,7 @@ Authors@R: c( person(given = "Kushal", family = "Dey", email = "deyk@mskcc.org", role = c("aut")), person(given = "Gao", family = "Wang", email = "gw2411@cumc.columbia.edu", role = c("aut")) ) -Maintainer: The package maintainer +Maintainer: Xuewei Cao Description: This R package implements ColocBoost --- motivated and designed for colocalization analysis of multiple genetic association studies --- as a multi-task learning approach to variable selection regression with highly correlated predictors and sparse effects, based on frequentist statistical inference. It provides statistical evidence to identify which subsets of predictors have non-zero effects on which subsets of response variables. Encoding: UTF-8 LazyData: true @@ -33,4 +33,4 @@ Suggests: VignetteBuilder: knitr Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 -License: MIT +License: MIT + file LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..412e5b6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2025 +COPYRIGHT HOLDER: StatFunGen authors diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index 3143746..0000000 --- a/LICENSE.md +++ /dev/null @@ -1,21 +0,0 @@ -# MIT License - -Copyright (c) 2025 StatFunGen - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index a0db141..e77ecf7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,3 +7,28 @@ export(colocboost_get_methods) export(colocboost_inits) export(colocboost_update) export(colocboost_workhorse) +importFrom(grDevices,adjustcolor) +importFrom(graphics,abline) +importFrom(graphics,axis) +importFrom(graphics,legend) +importFrom(graphics,mtext) +importFrom(graphics,par) +importFrom(graphics,points) +importFrom(graphics,text) +importFrom(stats,as.dist) +importFrom(stats,cor) +importFrom(stats,cutree) +importFrom(stats,density) +importFrom(stats,dnorm) +importFrom(stats,hclust) +importFrom(stats,median) +importFrom(stats,na.omit) +importFrom(stats,pchisq) +importFrom(stats,pnorm) +importFrom(stats,predict) +importFrom(stats,qnorm) +importFrom(stats,quantile) +importFrom(stats,smooth.spline) +importFrom(stats,var) +importFrom(utils,head) +importFrom(utils,tail) diff --git a/R/colocboost.R b/R/colocboost.R index ed9e022..1bb1245 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -98,6 +98,7 @@ #' \item{vcp}{The variable colocalized probability for each variable.} #' \item{data_info}{A object with detailed information from input data} #' +#' @importFrom stats na.omit #' @export #' #' @examples diff --git a/R/colocboost_addhoc_utils.R b/R/colocboost_addhoc_utils.R index 3956528..77c477a 100644 --- a/R/colocboost_addhoc_utils.R +++ b/R/colocboost_addhoc_utils.R @@ -108,7 +108,7 @@ merge_cos_ucos <- function(cb_obj, out_cos, out_ucos, coverage = 0.95, } - +#' @importFrom stats na.omit merge_ucos <- function(cb_obj, past_out, min_abs_corr = 0.5, median_abs_corr = NULL, diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R index fb3e79f..cbee657 100644 --- a/R/colocboost_assemble.R +++ b/R/colocboost_assemble.R @@ -39,7 +39,7 @@ colocboost_assemble <- function(cb_obj, tol = 1e-9, output_level = 1){ - if (class(cb_obj) != "colocboost"){ + if (!inherits(cb_obj, "colocboost")){ stop("Input must from colocboost object!")} # - data information diff --git a/R/colocboost_assemble_cos.R b/R/colocboost_assemble_cos.R index f25b1ad..4ed9996 100644 --- a/R/colocboost_assemble_cos.R +++ b/R/colocboost_assemble_cos.R @@ -1,3 +1,4 @@ +#' @importFrom stats as.dist cutree hclust colocboost_assemble_cos <- function(cb_obj, coverage = 0.95, func_intw = "fun_R", @@ -14,7 +15,7 @@ colocboost_assemble_cos <- function(cb_obj, between_purity = 0.8, tol = 1e-9){ - if (class(cb_obj) != "colocboost"){ + if (!inherits(cb_obj, "colocboost")){ stop("Input must from colocboost function!")} cb_model <- cb_obj$cb_model diff --git a/R/colocboost_assemble_ucos.R b/R/colocboost_assemble_ucos.R index 9aa10a6..7339852 100644 --- a/R/colocboost_assemble_ucos.R +++ b/R/colocboost_assemble_ucos.R @@ -1,7 +1,4 @@ - - - - +#' @importFrom stats as.dist cutree hclust colocboost_assemble_ucos <- function(cb_obj_single, coverage = 0.95, check_null = 0.1, @@ -17,7 +14,7 @@ colocboost_assemble_ucos <- function(cb_obj_single, weaker_ucos = TRUE, tol = 1e-9){ - if (class(cb_obj_single) != "colocboost"){ + if (!inherits(cb_obj_single, "colocboost")){ stop("Input must from colocboost function!")} cb_data <- cb_obj_single$cb_data diff --git a/R/colocboost_check_update_jk.R b/R/colocboost_check_update_jk.R index f0fa100..ce922ac 100644 --- a/R/colocboost_check_update_jk.R +++ b/R/colocboost_check_update_jk.R @@ -50,7 +50,7 @@ colocboost_check_update_jk <- function(cb_model, cb_model_para, cb_data, - +#' @importFrom stats median boost_check_update_jk_notarget <- function(cb_model, cb_model_para, cb_data, prioritize_jkstar = TRUE, jk_equiv_cor = 0.8, ##### more than 2 traits @@ -534,7 +534,7 @@ boost_check_update_jk_target <- function(cb_model, cb_model_para, cb_data, return(cb_model_para) } - +#' @importFrom stats cor get_LD_jk1_jk2 <- function(jk1, jk2, X = NULL, XtX = NULL, N = NULL, remain_jk = NULL){ @@ -660,4 +660,4 @@ estimate_change_profile_res <- function(jk, } - \ No newline at end of file + diff --git a/R/colocboost_init.R b/R/colocboost_init.R index 980d271..6b80aa0 100644 --- a/R/colocboost_init.R +++ b/R/colocboost_init.R @@ -335,6 +335,7 @@ colocboost_init_model <- function(cb_data, #' @noRd #' @keywords cb_objects +#' @importFrom utils tail colocboost_init_para <- function(cb_data, cb_model,tau=0.01, func_prior = "z2z", lambda = 0.5, lambda_target = 1, @@ -508,7 +509,7 @@ get_lfsr <- function(z, miss_idx = NULL, ash_prior = "normal"){ return(lfsr) } - +#' @importFrom stats pchisq get_lfdr <- function(z, miss_idx = NULL){ P <- length(z) lambda_max <- 0.95 @@ -519,7 +520,7 @@ get_lfdr <- function(z, miss_idx = NULL){ result <- try({ lfdr_nomissing <- qvalue(pchisq(drop(z^2), 1, lower.tail = FALSE), lambda = seq(0.05, lambda_max, 0.05))$lfdr }, silent = TRUE) - if(class(result) == "try-error") { + if(inherits(result, "try-error")) { lambda_max <- lambda_max - 0.05 # Decrement lambda_max if error occurs } else {try_run = 0} } @@ -532,7 +533,7 @@ get_lfdr <- function(z, miss_idx = NULL){ result <- try({ lfdr <- qvalue(pchisq(drop(z^2), 1, lower.tail = FALSE), lambda = seq(0.05, lambda_max, 0.05))$lfdr }, silent = TRUE) - if(class(result) == "try-error") { + if(inherits(result, "try-error")) { lambda_max <- lambda_max - 0.05 # Decrement lambda_max if error occurs } else {try_run = 0} } @@ -541,7 +542,7 @@ get_lfdr <- function(z, miss_idx = NULL){ return(lfdr) } - +#' @importFrom stats pchisq get_padj <- function(z, miss_idx = NULL, p.adjust.methods = "fdr"){ # test also p.adjust.methods = "BY" P <- length(z) if (length(miss_idx)!=0){ diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 8a84e9b..7cfd51c 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -594,7 +594,7 @@ get_summary_table_fm <- function(cb_output, outcome_names = NULL, gene_name = NU } - +#' @importFrom stats pchisq cos_pvalue_filter <- function(cos_results, data_info = NULL, pvalue_cutoff = 1e-4){ if (is.null(data_info)) diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index 736a5fb..740db9e 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -1,5 +1,6 @@ - - +#' @importFrom utils head tail +#' @importFrom graphics abline axis legend mtext par points text +#' @importFrom grDevices adjustcolor colocboost_plot <- function(cb_output, y = "log10p", gene_name = NULL, outcome_idx = NULL, @@ -28,7 +29,7 @@ colocboost_plot <- function(cb_output, y = "log10p", ...){ - if (class(cb_output) != "colocboost"){ + if (!inherits(cb_output, "colocboost")){ stop("Input of colocboost_plot must be a 'colocboost' object!")} # get cb_plot_input data from colocboost results @@ -372,7 +373,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, } - +#' @importFrom stats pnorm plot_initial <- function(cb_plot_input, y = "log10p", points_color = "grey90", cos_color = NULL, ylim_each = TRUE, gene_name = NULL, diff --git a/R/colocboost_update.R b/R/colocboost_update.R index cc143af..efda4ce 100644 --- a/R/colocboost_update.R +++ b/R/colocboost_update.R @@ -5,6 +5,7 @@ #' @details #' The gradient boosting algorithm for multiple outcomes #' +#' @importFrom utils head tail #' @return colocboost object after gradient boosting update #' @export colocboost_update <- function(cb_model, cb_model_para, cb_data, diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index 4b5731b..736fdd3 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -36,6 +36,7 @@ get_cormat <- function(X, intercepte = FALSE){ } +#' @importFrom utils head tail check_null_post <- function(cb_obj, coloc_sets_temp, coloc_outcomes, @@ -191,8 +192,8 @@ check_null_post <- function(cb_obj, return(ll) } - -get_purity = function (pos, X=NULL, Xcorr=NULL, N = NULL, n = 100) { +#' @importFrom stats na.omit +get_purity <- function(pos, X=NULL, Xcorr=NULL, N = NULL, n = 100) { get_upper_tri = Rfast::upper_tri get_median = Rfast::med @@ -256,7 +257,7 @@ get_modularity <- function(Weight, B){ } } - +#' @importFrom stats cutree get_n_cluster <- function(hc, Sigma, m=ncol(Sigma), between_cluster = 0.8){ if (min(Sigma) > between_cluster){ IND = 1 @@ -300,8 +301,9 @@ w_purity <- function(weights, X=NULL, Xcorr=NULL, N = NULL, n = 100, coverage = } +#' @importFrom stats na.omit # - Calculate purity between two confidence sets -get_between_purity = function (pos1, pos2, X=NULL, Xcorr=NULL, N = NULL, miss_idx = NULL, P = NULL){ +get_between_purity <- function(pos1, pos2, X=NULL, Xcorr=NULL, N = NULL, miss_idx = NULL, P = NULL){ get_matrix_mult <- function(X_sub1, X_sub2){ @@ -338,7 +340,8 @@ get_between_purity = function (pos1, pos2, X=NULL, Xcorr=NULL, N = NULL, miss_id return(c(min(value), max(value), get_median(value))) } - +#' @importFrom stats var +#' @importFrom utils tail get_cos_evidence <- function(cb_obj, coloc_out, data_info){ get_cos_config <- function(w, config_idx, alpha = 1.5, coverage = 0.95){ diff --git a/R/colocboost_workhorse.R b/R/colocboost_workhorse.R index 77d9859..d65db13 100644 --- a/R/colocboost_workhorse.R +++ b/R/colocboost_workhorse.R @@ -37,7 +37,7 @@ colocboost_workhorse <- function(cb_data, outcome_names = NULL){ - if (class(cb_data) != "colocboost"){ + if (!inherits(cb_data, "colocboost")){ stop("Input must from colocboost function!")} cb_model <- colocboost_init_model(cb_data, diff --git a/R/qval.R b/R/qval.R index 1fc7947..0fb6ca2 100644 --- a/R/qval.R +++ b/R/qval.R @@ -162,6 +162,8 @@ qvalue <- function(p, fdr.level = NULL, pfdr = FALSE, lfdr.out = TRUE, pi0 = NUL #' against the tuning parameter \eqn{\lambda}{lambda}. Optional. #' @param \dots Arguments passed from \code{\link{qvalue}} function. #' +#' @importFrom stats predict quantile smooth.spline +#' #' @details #' If no options are selected, then the method used to estimate \eqn{\pi_0}{pi_0} is #' the smoother method described in Storey and Tibshirani (2003). The @@ -296,6 +298,8 @@ pi0est <- function(p, lambda = seq(0.05,0.95,0.05), pi0.method = c("smoother", " #' @description #' Estimate the local FDR values from p-values. #' +#' @importFrom stats density dnorm predict qnorm smooth.spline +#' #' @param p A vector of p-values (only necessary input). #' @param pi0 Estimated proportion of true null p-values. If NULL, then \code{\link{pi0est}} is called. #' @param trunc If TRUE, local FDR values >1 are set to 1. Default is TRUE. diff --git a/README.md b/README.md index 85aa4fe..6e0189e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,12 @@ This R package implements ColocBoost --- motivated and designed for colocalization analysis ([first formulated here](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1004383)) of multiple genetic association studies --- as a multi-task learning approach to variable selection regression with highly correlated predictors and sparse effects, based on frequentist statistical inference. It provides statistical evidence to identify which subsets of predictors have non-zero effects on which subsets of response variables. -Temporary usage before packaging (v0.1.0-alpha) -- Download the source code in `R/` folder. -- In R, you should source code using `for(file in list.files("R/", full.names = T)) {source(file)}`. +Temporary usage before our cran or conda release +- clone the repo to your local folder, then +```bash +cd colocboost +R --slave -e "devtools::install()" +``` - To run FineBoost, you need `colocboost(X=X, Y=y)`, where X and y are the same as `susie(X,y)` +- To run ColocBoost we suggest using [this pipeline wrapper](https://github.com/StatFunGen/pecotmr/blob/main/R/colocboost_pipeline.R) to manage multiple data-sets mixing individual level and summary statistics data. The `pecotmr` package can be installed either from source or from our conda package at https://anaconda.org/dnachun/r-pecotmr + diff --git a/man/colocboost.Rd b/man/colocboost.Rd index 5793230..2729875 100644 --- a/man/colocboost.Rd +++ b/man/colocboost.Rd @@ -57,6 +57,7 @@ colocboost( check_null_max = 0.02, residual_correlation = NULL, LD_obj = FALSE, + dynamic_step = TRUE, weaker_ucos = TRUE, output_level = 1 ) diff --git a/man/colocboost_update.Rd b/man/colocboost_update.Rd index a4a8d62..22cb5cd 100644 --- a/man/colocboost_update.Rd +++ b/man/colocboost_update.Rd @@ -13,7 +13,8 @@ colocboost_update( func_prior = "z2z", lambda = 0.5, lambda_target = 1, - LD_obj = FALSE + LD_obj = FALSE, + dynamic_step = TRUE ) } \value{ diff --git a/man/colocboost_workhorse.Rd b/man/colocboost_workhorse.Rd index 62a53ec..951e83f 100644 --- a/man/colocboost_workhorse.Rd +++ b/man/colocboost_workhorse.Rd @@ -26,6 +26,7 @@ colocboost_workhorse( func_compare = "min_max", coloc_thres = 0.1, LD_obj = FALSE, + dynamic_step = TRUE, target_idx = NULL, outcome_names = NULL )