From 6718b826df56ddb7048f43e150d7ada17b439e9d Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Fri, 18 Apr 2025 13:01:48 -0400 Subject: [PATCH 1/4] more documentation and minor fix --- NAMESPACE | 3 + R/colocboost_assemble_cos.R | 1 - R/colocboost_assemble_ucos.R | 1 - R/colocboost_inference.R | 262 +++++++++++++--------- R/colocboost_init.R | 26 ++- R/colocboost_output.R | 206 ++++++++++++++--- R/colocboost_utils.R | 3 - man/get_cormat.Rd | 6 +- man/get_cos.Rd | 28 ++- man/get_cos_purity.Rd | 56 +++++ man/get_cos_summary.Rd | 3 +- man/get_hierarchical_clusters.Rd | 44 ++++ man/get_robust_colocalization.Rd | 3 +- man/get_ucos_summary.Rd | 59 +++++ tests/testthat/test_corner_cases.R | 26 +-- tests/testthat/test_inference.R | 175 ++++++++++++++- tests/testthat/test_model.R | 3 - tests/testthat/test_utils.R | 69 ++++++ vignettes/Interpret_ColocBoost_Output.Rmd | 3 + vignettes/Partial_Overlap_Variants.Rmd | 18 ++ 20 files changed, 815 insertions(+), 180 deletions(-) create mode 100644 man/get_cos_purity.Rd create mode 100644 man/get_hierarchical_clusters.Rd create mode 100644 man/get_ucos_summary.Rd diff --git a/NAMESPACE b/NAMESPACE index 0d3d4dd..4650e0f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,8 +4,11 @@ export(colocboost) export(colocboost_plot) export(get_cormat) export(get_cos) +export(get_cos_purity) export(get_cos_summary) +export(get_hierarchical_clusters) export(get_robust_colocalization) +export(get_ucos_summary) importFrom(grDevices,adjustcolor) importFrom(graphics,abline) importFrom(graphics,axis) diff --git a/R/colocboost_assemble_cos.R b/R/colocboost_assemble_cos.R index c604122..6a1243a 100644 --- a/R/colocboost_assemble_cos.R +++ b/R/colocboost_assemble_cos.R @@ -350,7 +350,6 @@ colocboost_assemble_cos <- function(cb_obj, res[[i]] <- get_between_purity(cset1, cset2, X = cb_data$data[[X_dict]]$X, Xcorr = cb_data$data[[X_dict]]$XtX, - N = cb_data$data[[i]]$N, miss_idx = cb_data$data[[i]]$variable_miss, P = cb_model_para$P ) diff --git a/R/colocboost_assemble_ucos.R b/R/colocboost_assemble_ucos.R index 4298ac1..db695a6 100644 --- a/R/colocboost_assemble_ucos.R +++ b/R/colocboost_assemble_ucos.R @@ -236,7 +236,6 @@ colocboost_assemble_ucos <- function(cb_obj_single, res <- get_between_purity(cset1, cset2, X = cb_data$data[[1]]$X, Xcorr = cb_data$data[[1]]$XtX, - N = cb_data$data[[1]]$N, miss_idx = cb_data$data[[1]]$variable_miss, P = cb_model_para$P ) diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R index 6180bc0..c9a42f5 100644 --- a/R/colocboost_inference.R +++ b/R/colocboost_inference.R @@ -46,10 +46,10 @@ colocboost_post_inference <- function() { #' #' @family colocboost_utilities #' @export -get_cormat <- function(X, intercepte = FALSE) { +get_cormat <- function(X, intercepte = TRUE) { X <- t(X) # Center each variable - if (!intercepte) { + if (intercepte) { X <- X - rowMeans(X) } # Standardize each variable @@ -59,6 +59,154 @@ get_cormat <- function(X, intercepte = FALSE) { return(cr) } +#' @title Perform modularity-based hierarchical clustering +#' @description +#' This function performs a modularity-based hierarchical clustering approach to identify clusters from a correlation matrix (LD matrix). +#' +#' @param cormat A correlation matrix. +#' @param min_cluster_corr The small correlation for the weights distributions across different iterations to be decided having only one cluster. Default is 0.8. +#' +#' @return A list containing: +#' \item{cluster}{A binary matrix indicating the cluster membership of each variable.} +#' \item{Q_modularity}{The modularity values for the identified clusters.} +#' +#' @examples +#' # Example usage +#' set.seed(1) +#' N <- 100 +#' P <- 4 +#' sigma <- matrix(0.2, nrow = P, ncol = P) +#' diag(sigma) <- 1 +#' sigma[1:2, 1:2] <- 0.9 +#' sigma[3:4, 3:4] <- 0.9 +#' X <- MASS::mvrnorm(N, rep(0, P), sigma) +#' cormat <- get_cormat(X) +#' clusters <- get_hierarchical_clusters(cormat) +#' clusters$cluster +#' clusters$Q_modularity +#' +#' @family colocboost_utilities +#' @export +get_hierarchical_clusters <- function(cormat, min_cluster_corr = 0.8) { + # Handle edge case: single element matrix + if (nrow(cormat) == 1 || ncol(cormat) == 1) { + # Return a single cluster with one element and zero modularity + return(list( + "cluster" = matrix(1, nrow = nrow(cormat), ncol = 1), + "Q_modularity" = 0 + )) + } + + # Perform hierarchical clustering approach + hc <- hclust(as.dist(1 - cormat)) + # Get the optimal number of clusters + opt_cluster <- get_n_cluster(hc, cormat, min_cluster_corr = min_cluster_corr) + n_cluster <- opt_cluster$n_cluster + Q_modularity <- opt_cluster$Qmodularity + # Obtain the final clusters + index <- cutree(hc, n_cluster) + B <- sapply(1:n_cluster, function(t) as.numeric(index == t)) + B <- as.matrix(B) + return(list("cluster" = B, "Q_modularity" = Q_modularity)) +} + +#' Function to calculate modularity for a given number of clusters +#' @param Weight A matrix of weights +#' @param B A matrix of binary values indicating the clusters +#' @return The modularity value +#' @keywords cb_post_inference +#' @noRd +get_modularity <- function(Weight, B) { + if (dim(Weight)[1] == 1) return(0) + + W_pos <- Weight * (Weight > 0) + W_neg <- Weight * (Weight < 0) + N <- dim(Weight)[1] + K_pos <- colSums(W_pos) + K_neg <- colSums(W_neg) + m_pos <- sum(K_pos) + m_neg <- sum(K_neg) + m <- m_pos + m_neg + + if (m == 0) return(0) + + cate <- B %*% t(B) + + if (m_pos == 0 & m_neg == 0) return(0) + + if (m_pos == 0) { + Q_positive <- 0 + Q_negative <- sum((W_neg - K_neg %*% t(K_neg) / m_neg) * cate) / m_neg + } else if (m_neg == 0) { + Q_positive <- sum((W_pos - K_pos %*% t(K_pos) / m_pos) * cate) / m_pos + Q_negative <- 0 + } else { + Q_positive <- sum((W_pos - K_pos %*% t(K_pos) / m_pos) * cate) / m_pos + Q_negative <- sum((W_neg - K_neg %*% t(K_neg) / m_neg) * cate) / m_neg + } + Q <- m_pos / m * Q_positive - m_neg / m * Q_negative + return(Q) +} + +#' Function to get the number of clusters based on modularity-based hierarchical clustering method +#' @param hc A hierarchical clustering object +#' @param Sigma A matrix of weights +#' @param m The number of clusters +#' @param min_cluster_corr The minimum correlation threshold for clustering within the same cluster +#' @return A list containing the number of clusters and modularity values +#' @keywords cb_post_inference +#' @noRd +#' @importFrom stats cutree +get_n_cluster <- function(hc, Sigma, m = ncol(Sigma), min_cluster_corr = 0.8) { + if (min(Sigma) > min_cluster_corr) { + IND <- 1 + Q <- 1 + } else { + Q <- c() + if (ncol(Sigma) < 10) { + m <- ncol(Sigma) + } + for (i in 1:m) + { + index <- cutree(hc, i) + B <- sapply(1:i, function(t) as.numeric(index == t)) + Q[i] <- get_modularity(Sigma, B) + } + + IND <- which(Q == max(Q)) + L <- length(IND) + if (L > 1) IND <- IND[1] + } + return(list( + "n_cluster" = IND, + "Qmodularity" = Q + )) +} + +#' Function to calculate within-CoS purity for each single weight from one SEC +#' @keywords cb_post_inference +#' @noRd +w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverage = 0.95, + min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL) { + tmp <- apply(weights, 2, w_cs, coverage = coverage) + tmp_purity <- apply(tmp, 2, function(tt) { + pos <- which(tt == 1) + # deal with missing snp here + if (!is.null(Xcorr)) { + pos <- match(pos, setdiff(1:length(tmp), miss_idx)) + } + get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n) + }) + if (is.null(median_abs_corr)) { + is_pure <- which(tmp_purity[1, ] >= min_abs_corr) + } else { + is_pure <- which(tmp_purity[1, ] >= min_abs_corr | tmp_purity[3, ] >= median_abs_corr) + } + return(is_pure) +} + + + #' Function to remove the spurious signals #' @importFrom utils head tail @@ -270,106 +418,11 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) { } } -# ------ Calculate modularity ---------- -#' Function to calculate modularity for a given number of clusters -#' @param Weight A matrix of weights -#' @param B A matrix of binary values indicating the clusters -#' @return The modularity value -#' @keywords cb_post_inference -#' @noRd -get_modularity <- function(Weight, B) { - if (dim(Weight)[1] == 1) { - Q <- 0 - } else { - W_pos <- Weight * (Weight > 0) - W_neg <- Weight * (Weight < 0) - N <- dim(Weight)[1] - K_pos <- colSums(W_pos) - K_neg <- colSums(W_neg) - m_pos <- sum(K_pos) - m_neg <- sum(K_neg) - m <- m_pos + m_neg - cate <- B %*% t(B) - if (m_pos == 0 & m_neg == 0) { - Q <- 0 - } else { - if (m_pos == 0) { - Q_positive <- 0 - Q_negative <- sum((W_neg - K_neg %*% t(K_neg) / m_neg) * cate) / m_neg - } else if (m_neg == 0) { - Q_positive <- sum((W_pos - K_pos %*% t(K_pos) / m_pos) * cate) / m_pos - Q_negative <- 0 - } else { - Q_positive <- sum((W_pos - K_pos %*% t(K_pos) / m_pos) * cate) / m_pos - Q_negative <- sum((W_neg - K_neg %*% t(K_neg) / m_neg) * cate) / m_neg - } - } - Q <- m_pos / m * Q_positive - m_neg / m * Q_negative - } -} - -#' Function to get the number of clusters based on modularity-based hierarchical clustering method -#' @param hc A hierarchical clustering object -#' @param Sigma A matrix of weights -#' @param m The number of clusters -#' @param min_cluster_corr The minimum correlation threshold for clustering within the same cluster -#' @return A list containing the number of clusters and modularity values -#' @keywords cb_post_inference -#' @noRd -#' @importFrom stats cutree -get_n_cluster <- function(hc, Sigma, m = ncol(Sigma), min_cluster_corr = 0.8) { - if (min(Sigma) > min_cluster_corr) { - IND <- 1 - Q <- 1 - } else { - Q <- c() - if (ncol(Sigma) < 10) { - m <- ncol(Sigma) - } - for (i in 1:m) - { - index <- cutree(hc, i) - B <- sapply(1:i, function(t) as.numeric(index == t)) - Q[i] <- get_modularity(Sigma, B) - } - - IND <- which(Q == max(Q)) - L <- length(IND) - if (L > 1) IND <- IND[1] - } - return(list( - "n_cluster" = IND, - "Qmodularity" = Q - )) -} - -#' Function to calculate within-CoS purity for each single weight from one SEC -#' @keywords cb_post_inference -#' @noRd -w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverage = 0.95, - min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL) { - tmp <- apply(weights, 2, w_cs, coverage = coverage) - tmp_purity <- apply(tmp, 2, function(tt) { - pos <- which(tt == 1) - # deal with missing snp here - if (!is.null(Xcorr)) { - pos <- match(pos, setdiff(1:length(tmp), miss_idx)) - } - get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n) - }) - if (is.null(median_abs_corr)) { - is_pure <- which(tmp_purity[1, ] >= min_abs_corr) - } else { - is_pure <- which(tmp_purity[1, ] >= min_abs_corr | tmp_purity[3, ] >= median_abs_corr) - } - return(is_pure) -} - #' Calculate purity between two CoS #' @keywords cb_post_inference #' @noRd #' @importFrom stats na.omit -get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, N = NULL, miss_idx = NULL, P = NULL) { +get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, miss_idx = NULL, P = NULL) { get_matrix_mult <- function(X_sub1, X_sub2) { X_sub1 <- t(X_sub1) X_sub2 <- t(X_sub2) @@ -385,15 +438,16 @@ get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, N = NULL, mis get_median <- Rfast::med if (is.null(Xcorr)) { - X_sub1 <- X[, pos1, drop = FALSE] - X_sub2 <- X[, pos2, drop = FALSE] + X_sub1 <- scale(X[, pos1, drop = FALSE], center = T, scale = F) + X_sub2 <- scale(X[, pos2, drop = FALSE], center = T, scale = F) value <- abs(get_matrix_mult(X_sub1, X_sub2)) } else { - pos1 <- na.omit(match(pos1, setdiff(1:P, miss_idx))) - pos2 <- na.omit(match(pos2, setdiff(1:P, miss_idx))) - # scaling_factor <- if (!is.null(N)) N-1 else 1 + if (length(miss_idx)!=0){ + pos1 <- na.omit(match(pos1, setdiff(1:P, miss_idx))) + pos2 <- na.omit(match(pos2, setdiff(1:P, miss_idx))) + } if (length(pos1) != 0 & length(pos2) != 0) { - value <- abs(Xcorr[pos1, pos2]) # / scaling_factor + value <- abs(Xcorr[pos1, pos2]) } else { value <- 0 } diff --git a/R/colocboost_init.R b/R/colocboost_init.R index f44da5d..2cbcbda 100644 --- a/R/colocboost_init.R +++ b/R/colocboost_init.R @@ -496,12 +496,14 @@ get_multiple_testing_correction <- function(z, miss_idx = NULL, func_multi_test #' @return List containing processed data with optimized LD submatrix storage #' @noRd process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dict, target_variants) { + + # Step 1: Identify unique combinations of (variant list, LD matrix) unified_dict <- integer(length(variant_lists)) # First item is always assigned its own position unified_dict[1] <- 1 - + # Process remaining items if (length(variant_lists) > 1) { for (i in 2:length(variant_lists)) { @@ -641,15 +643,19 @@ process_individual_data <- function(X, Y, dict_YX, target_variants, } # Step 1: Update dictionary to handle duplicates samples - sample_lists <- lapply(Y, rownames) - new_dict <- 1:length(dict_YX) - # For each pair of Y indices - for (i in 1:(length(dict_YX)-1)) { - for (j in (i+1):length(dict_YX)) { - # Check if they map to the same X matrix AND have the same samples - if (dict_YX[i] == dict_YX[j] && identical(sample_lists[[i]], sample_lists[[j]])) { - # If same matrix and same samples, use the smaller index - new_dict[j] <- new_dict[i] + if (length(Y) == 1){ + new_dict = 1 + } else { + sample_lists <- lapply(Y, rownames) + new_dict <- 1:length(dict_YX) + # For each pair of Y indices + for (i in 1:(length(dict_YX)-1)) { + for (j in (i+1):length(dict_YX)) { + # Check if they map to the same X matrix AND have the same samples + if (dict_YX[i] == dict_YX[j] && identical(sample_lists[[i]], sample_lists[[j]])) { + # If same matrix and same samples, use the smaller index + new_dict[j] <- new_dict[i] + } } } } diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 2073949..4ed888f 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -102,7 +102,7 @@ get_cos_summary <- function(cb_output, summary_table$focal_outcome <- ifelse(if.focal, focal_outcome, FALSE) summary_table <- summary_table[order(summary_table$focal_outcome == "FALSE"), ] if (sum(if.focal) == 0) { - warnings("No colocalization with focal outcomes.") + warning("No colocalization with focal outcomes.") } } # - if extract only interest outcome colocalization @@ -116,10 +116,10 @@ get_cos_summary <- function(cb_output, summary_table$interest_outcome <- interest_outcome summary_table <- summary_table[-which(if.interest == "FALSE"), ] if (sum(if.interest) == 0) { - warnings("No colocalization with interest outcomes.") + warning("No colocalization with interest outcomes.") } } else { - warnings("Interest outcome is not in the analysis outcomes, please check.") + warning("Interest outcome is not in the analysis outcomes, please check.") } } } else { @@ -187,7 +187,7 @@ get_robust_colocalization <- function(cb_output, } if (is.null(cb_output$cos_details)) { - warnings("No colocalization results in this region!") + warning("No colocalization results in this region!") return(cb_output) } @@ -202,7 +202,7 @@ get_robust_colocalization <- function(cb_output, )) } else { if (pvalue_cutoff > 1 | pvalue_cutoff < 0) { - warnings("Please check the pvalue cutoff in [0,1].") + warning("Please check the pvalue cutoff in [0,1].") return(cb_output) } if (npc_outcome_cutoff == 0 && cos_npc_cutoff == 0) { @@ -399,27 +399,49 @@ get_robust_colocalization <- function(cb_output, #' @rdname get_ucos_summary #' -#' @title Get fine-mapping summary table from a ColocBoost output with only one single trait fine-mapping analysis. +#' @title Get trait-specific summary table from a ColocBoost output. #' -#' @description `get_ucos_summary` get the fine-mapping summary table +#' @description `get_ucos_summary` produces a trait-specific summary table for uncolocalized (single-trait) +#' associations from ColocBoost results. This is particularly useful for examining trait-specific signals +#' or for summarizing results from single-trait FineBoost analyses. #' #' @param cb_output Output object from `colocboost` analysis #' @param outcome_names Optional vector of names of outcomes, which has the same order as Y in the original analysis. #' @param region_name Optional character string. When provided, adds a column with this gene name to the output table for easier filtering in downstream analyses. #' -#' @return A summary table for fine-mapped events with the following columns: -#' \item{outcomes}{Outcomes analyzed } -#' \item{ucos_id}{Unique identifier for fine-mapped confidence sets } -#' \item{purity}{Minimum absolute correlation of variables with in fine-mapped confidence sets } -#' \item{top_variable}{The variable with highest variant-level probability of association (VPA) } +#' @return A summary table for trait-specific, uncolocalized associations with the following columns: +#' \item{outcomes}{Outcome being analyzed} +#' \item{ucos_id}{Unique identifier for trait-specific confidence sets} +#' \item{purity}{Minimum absolute correlation of variables within trait-specific confidence sets} +#' \item{top_variable}{The variable with highest variant-level probability of association (VPA)} #' \item{top_variable_vpa}{Variant-level probability of association (VPA) for the top variable} -#' \item{n_variables}{Number of variables in colocalization confidence set (CoS)} -#' \item{ucos_index}{Indices of fine-mapped variables} -#' \item{ucos_variables}{List of fine-mapped variables} -#' \item{ucos_variables_vpa}{Variant-level probability of association (VPA) for all fine-mapped variables} +#' \item{ucos_npc}{Normalized probability of causal association for the trait-specific confidence set} +#' \item{n_variables}{Number of variables in trait-specific confidence set} +#' \item{ucos_index}{Indices of variables in the trait-specific confidence set} +#' \item{ucos_variables}{List of variables in the trait-specific confidence set} +#' \item{ucos_variables_vpa}{Variant-level probability of association (VPA) for all variables in the confidence set} +#' \item{region_name}{Region name if provided through the region_name parameter} +#' +#' @examples +#' # colocboost example with single trait analysis +#' set.seed(1) +#' N <- 1000 +#' P <- 100 +#' # Generate X with LD structure +#' sigma <- 0.9^abs(outer(1:P, 1:P, "-")) +#' X <- MASS::mvrnorm(N, rep(0, P), sigma) +#' colnames(X) <- paste0("SNP", 1:P) +#' L <- 1 # Only one trait for single-trait analysis +#' true_beta <- matrix(0, P, L) +#' true_beta[10, 1] <- 0.5 # SNP10 affects the trait +#' true_beta[50, 1] <- 0.2 # SNP11 also affects the trait but with lower effect +#' Y <- X %*% true_beta + rnorm(N, 0, 1) +#' res <- colocboost(X = X, Y = Y, output_level = 2) +#' # Get the trait-specifc effect summary +#' get_ucos_summary(res) #' #' @family colocboost_inference -#' @noRd +#' @export get_ucos_summary <- function(cb_output, outcome_names = NULL, region_name = NULL) { if (!inherits(cb_output, "colocboost")) { stop("Input must from colocboost object!") @@ -432,6 +454,10 @@ get_ucos_summary <- function(cb_output, outcome_names = NULL, region_name = NULL cs_outcome <- outcome_names } vpa <- as.numeric(cb_output$vpa) + if (length(vpa) == 0){ + w <- do.call(cbind, specific_cs$ucos_weight) + vpa <- as.vector(1 - apply(1 - w, 1, prod)) + } summary_table <- matrix(NA, nrow = length(specific_cs$ucos$ucos_index), ncol = 9) colnames(summary_table) <- c( @@ -458,12 +484,16 @@ get_ucos_summary <- function(cb_output, outcome_names = NULL, region_name = NULL return(summary_table) } -#' Extract CoS at different coverages, without filtering by purity +#' Extract CoS at different coverages #' -#' @description `get_cos` get the colocalization confidence sets (CoS) with different coverage. +#' @description `get_cos` get the colocalization confidence sets (CoS) with different coverage. If Genotype...provided...check purity #' #' @param cb_output Output object from `colocboost` analysis #' @param coverage A number between 0 and 1 specifying the \dQuote{coverage} of the estimated colocalization confidence sets (CoS) (default is 0.95). +#' @param X Genotype matrix of values of the p variables. Used to compute correlations if Xcorr is not provided. +#' @param Xcorr Correlation matrix of correlations between variables. Alternative to X. +#' @param n_purity The maximum number of CoS variables used in calculating the correlation (\dQuote{purity}) statistics. +#' When the number of variables included in the CoS is greater than this number, the CoS variables are randomly subsampled. #' #' @return A list of indices of variables in each CoS. #' @@ -487,16 +517,54 @@ get_ucos_summary <- function(cb_output, outcome_names = NULL, region_name = NULL #' Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) #' } #' res <- colocboost(X = X, Y = Y) -#' get_cos(res, coverage = 0.75) +#' get_cos(res, coverage = 0.99, X = X) +#' get_cos(res, coverage = 0.99, X = X, min_abs_corr = 0.95) #' #' @family colocboost_utilities #' @export -get_cos <- function(cb_output, coverage = 0.95) { +get_cos <- function(cb_output, coverage = 0.95, X = NULL, Xcorr = NULL, n_purity = 100, min_abs_corr = 0.5, median_abs_corr = NULL) { + + if (is.null(cb_output$cos_details$cos)){ + warning("No colocalization results in this region!") + return(list("cos" = NULL, "cos_purity" = NULL)) + } + + # Refine CoS based on different coverage cos_vcp <- cb_output$cos_details$cos_vcp cos_diff_coverage <- lapply(cos_vcp, function(w) { unlist(get_in_cos(w, coverage = coverage)) }) - return(cos_diff_coverage) + names(cos_diff_coverage) <- paste0(names(cos_diff_coverage), "_coverage_", coverage) + + # Check purity if X or Xcorr exist + cos_purity <- NULL + if (!is.null(X) || !is.null(Xcorr)){ + cos_purity <- get_cos_purity(cos_diff_coverage, X = X, Xcorr = Xcorr, n_purity = n_purity) + # Extract within-CoS purity + ncos <- nrow(cos_purity[[1]]) + within_purity <- matrix(NA, nrow = ncos, ncol = 3) + colnames(within_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") + rownames(within_purity) <- names(cos_diff_coverage) + within_purity[, 1] <- diag(cos_purity$min_abs_cor) + within_purity[, 2] <- diag(cos_purity$max_abs_cor) + within_purity[, 3] <- diag(cos_purity$median_abs_cor) + # Check purity + if (is.null(median_abs_corr)) { + is_pure <- which(within_purity[, 1] >= min_abs_corr) + } else { + is_pure <- which(within_purity[, 1] >= min_abs_corr | purity[, 3] >= median_abs_corr) + } + # Filter impured CoS + if (length(is_pure) == 0) { + cos_diff_coverage <- NULL + cos_purity <- NULL + } else if (length(is_pure) < ncos){ + cos_diff_coverage <- cos_diff_coverage[is_pure] + cos_purity <- lapply(cos_purity, function(tmp) tmp[is_pure, is_pure, drop = FALSE] ) + } + } + cos_refined <- list("cos" = cos_diff_coverage, "cos_purity" = cos_purity) + return(cos_refined) } #' Get integrated weight from different outcomes @@ -526,6 +594,97 @@ get_in_cos <- function(weights, coverage = 0.95) { } +#' Calculate purity within and in-between CoS +#' +#' @description Calculate purity statistics between all pairs of colocalization confidence sets (CoS) +#' +#' @param cos List of variables in CoS +#' @param X Genotype matrix of values of the p variables. Used to compute correlations if Xcorr is not provided. +#' @param Xcorr Correlation matrix of correlations between variables. Alternative to X. +#' @param n_purity The maximum number of CoS variables used in calculating the correlation (\dQuote{purity}) statistics. +#' When the number of variables included in the CoS is greater than this number, the CoS variables are randomly subsampled. +#' +#' @return A list containing three matrices (min_abs_cor, max_abs_cor, median_abs_cor) with +#' purity statistics for all pairs of CoS. Diagonal elements represent within-CoS purity. +#' +#' @examples +#' # colocboost example +#' set.seed(1) +#' N <- 1000 +#' P <- 100 +#' # Generate X with LD structure +#' sigma <- 0.9^abs(outer(1:P, 1:P, "-")) +#' X <- MASS::mvrnorm(N, rep(0, P), sigma) +#' colnames(X) <- paste0("SNP", 1:P) +#' L <- 3 +#' true_beta <- matrix(0, P, L) +#' true_beta[10, 1] <- 0.5 +#' true_beta[10, 2] <- 0.4 +#' true_beta[50, 2] <- 0.3 +#' true_beta[80, 3] <- 0.6 +#' Y <- matrix(0, N, L) +#' for (l in 1:L) { +#' Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) +#' } +#' res <- colocboost(X = X, Y = Y) +#' cos_res <- get_cos(res, coverage = 0.8) +#' get_cos_purity(cos_res$cos, X = X) +#' +#' @family colocboost_utilities +#' @export +get_cos_purity <- function(cos, X = NULL, Xcorr = NULL, n_purity = 100) { + # Check inputs + if (is.null(cos) || length(cos) == 0) return(NULL) + if (is.null(X) && is.null(Xcorr)) stop("Either X or Xcorr must be provided") + if (is.numeric(cos)) cos <- list(cos) + + # Get CoS names + cos_names <- names(cos) + ncos <- length(cos) + if (is.null(cos_names)) cos_names <- paste0("cos_", 1:ncos) + + # If only one CoS, just return the purity as a matrix + if (ncos == 1) { + purity_stats <- get_purity(cos[[1]], X = X, Xcorr = Xcorr, n = n_purity) + cos_purity <- lapply(1:3, function(ii) { + mat <- matrix(purity_stats[ii], 1, 1) + rownames(mat) <- colnames(mat) <- cos_names + return(mat) + }) + names(cos_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") + return(cos_purity) + } + + # Initialize empty matrices for purity values + empty_matrix <- matrix(NA, ncos, ncos) + colnames(empty_matrix) <- rownames(empty_matrix) <- cos_names + # Initialize purity matrices with diagonal values (within-CoS purity) + cos_purity <- lapply(1:3, function(ii) empty_matrix ) + # Compute within-CoS purity + for (i in 1:ncos){ + purity_stats <- get_purity(cos[[i]], X = X, Xcorr = Xcorr, n = n_purity) + for (ii in 1:3) { + cos_purity[[ii]][i, i] <- purity_stats[ii] + } + } + # Compute between-CoS purity for every pair + for (i in 1:(ncos - 1)) { + for (j in (i + 1):ncos) { + cos1 <- cos[[i]] + cos2 <- cos[[j]] + purity_stats <- get_between_purity(cos1, cos2, X = X, Xcorr = Xcorr) + # Update all three purity matrices + for (ii in 1:3) { + cos_purity[[ii]][i, j] <- cos_purity[[ii]][j, i] <- purity_stats[ii] + } + } + } + names(cos_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") + + return(cos_purity) +} + + #' @title Set of internal functions to re-organize ColocBoost output format #' #' @description @@ -695,7 +854,6 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) { res[[flag]] <- get_between_purity(cset1, cset2, X = cb_obj$cb_data$data[[X_dict]]$X, Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - N = cb_obj$cb_data$data[[ii]]$N, miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, P = cb_obj$cb_model_para$P ) @@ -945,7 +1103,6 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output res[[flag]] <- get_between_purity(cset1, cset2, X = cb_obj$cb_data$data[[X_dict]]$X, Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - N = cb_obj$cb_data$data[[ii]]$N, miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, P = cb_obj$cb_model_para$P ) @@ -986,7 +1143,6 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output res[[flag]] <- get_between_purity(cset1, cset2, X = cb_obj$cb_data$data[[X_dict]]$X, Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - N = cb_obj$cb_data$data[[ii]]$N, miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, P = cb_obj$cb_model_para$P ) diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index 3176775..c5571f6 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -42,7 +42,6 @@ merge_cos_ucos <- function(cb_obj, out_cos, out_ucos, coverage = 0.95, res <- get_between_purity(cset1, cset2, X = cb_obj$cb_data$data[[X_dict]]$X, Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - N = cb_obj$cb_data$data[[fine_outcome]]$N, miss_idx = cb_obj$cb_data$data[[fine_outcome]]$variable_miss, P = cb_obj$cb_model_para$P ) @@ -66,7 +65,6 @@ merge_cos_ucos <- function(cb_obj, out_cos, out_ucos, coverage = 0.95, res[[ii]] <- get_between_purity(cset1, cset2, X = cb_obj$cb_data$data[[X_dict]]$X, Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - N = cb_obj$cb_data$data[[ii]]$N, miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, P = cb_obj$cb_model_para$P ) @@ -152,7 +150,6 @@ merge_ucos <- function(cb_obj, past_out, res[[flag]] <- get_between_purity(cset1, cset2, X = cb_obj$cb_data$data[[X_dict]]$X, Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - N = cb_obj$cb_data$data[[ii]]$N, miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, P = cb_obj$cb_model_para$P ) diff --git a/man/get_cormat.Rd b/man/get_cormat.Rd index e089c1c..9aae1a8 100644 --- a/man/get_cormat.Rd +++ b/man/get_cormat.Rd @@ -4,7 +4,7 @@ \alias{get_cormat} \title{A fast function to calculate correlation matrix (LD matrix) from individual level data} \usage{ -get_cormat(X, intercepte = FALSE) +get_cormat(X, intercepte = TRUE) } \arguments{ \item{X}{A matrix of individual level data.} @@ -30,6 +30,8 @@ cormat <- get_cormat(X) } \seealso{ Other colocboost_utilities: -\code{\link{get_cos}()} +\code{\link{get_cos}()}, +\code{\link{get_cos_purity}()}, +\code{\link{get_hierarchical_clusters}()} } \concept{colocboost_utilities} diff --git a/man/get_cos.Rd b/man/get_cos.Rd index 6b53e23..3b277b6 100644 --- a/man/get_cos.Rd +++ b/man/get_cos.Rd @@ -2,20 +2,35 @@ % Please edit documentation in R/colocboost_output.R \name{get_cos} \alias{get_cos} -\title{Extract CoS at different coverages, without filtering by purity} +\title{Extract CoS at different coverages} \usage{ -get_cos(cb_output, coverage = 0.95) +get_cos( + cb_output, + coverage = 0.95, + X = NULL, + Xcorr = NULL, + n_purity = 100, + min_abs_corr = 0.5, + median_abs_corr = NULL +) } \arguments{ \item{cb_output}{Output object from \code{colocboost} analysis} \item{coverage}{A number between 0 and 1 specifying the \dQuote{coverage} of the estimated colocalization confidence sets (CoS) (default is 0.95).} + +\item{X}{Genotype matrix of values of the p variables. Used to compute correlations if Xcorr is not provided.} + +\item{Xcorr}{Correlation matrix of correlations between variables. Alternative to X.} + +\item{n_purity}{The maximum number of CoS variables used in calculating the correlation (\dQuote{purity}) statistics. +When the number of variables included in the CoS is greater than this number, the CoS variables are randomly subsampled.} } \value{ A list of indices of variables in each CoS. } \description{ -\code{get_cos} get the colocalization confidence sets (CoS) with different coverage. +\code{get_cos} get the colocalization confidence sets (CoS) with different coverage. If Genotype...provided...check purity } \examples{ # colocboost example @@ -37,11 +52,14 @@ for (l in 1:L) { Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) } res <- colocboost(X = X, Y = Y) -get_cos(res, coverage = 0.75) +get_cos(res, coverage = 0.99, X = X) +get_cos(res, coverage = 0.99, X = X, min_abs_corr = 0.95) } \seealso{ Other colocboost_utilities: -\code{\link{get_cormat}()} +\code{\link{get_cormat}()}, +\code{\link{get_cos_purity}()}, +\code{\link{get_hierarchical_clusters}()} } \concept{colocboost_utilities} diff --git a/man/get_cos_purity.Rd b/man/get_cos_purity.Rd new file mode 100644 index 0000000..724ef91 --- /dev/null +++ b/man/get_cos_purity.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/colocboost_output.R +\name{get_cos_purity} +\alias{get_cos_purity} +\title{Calculate purity within and in-between CoS} +\usage{ +get_cos_purity(cos, X = NULL, Xcorr = NULL, n_purity = 100) +} +\arguments{ +\item{cos}{List of variables in CoS} + +\item{X}{Genotype matrix of values of the p variables. Used to compute correlations if Xcorr is not provided.} + +\item{Xcorr}{Correlation matrix of correlations between variables. Alternative to X.} + +\item{n_purity}{The maximum number of CoS variables used in calculating the correlation (\dQuote{purity}) statistics. +When the number of variables included in the CoS is greater than this number, the CoS variables are randomly subsampled.} +} +\value{ +A list containing three matrices (min_abs_cor, max_abs_cor, median_abs_cor) with +purity statistics for all pairs of CoS. Diagonal elements represent within-CoS purity. +} +\description{ +Calculate purity statistics between all pairs of colocalization confidence sets (CoS) +} +\examples{ +# colocboost example +set.seed(1) +N <- 1000 +P <- 100 +# Generate X with LD structure +sigma <- 0.9^abs(outer(1:P, 1:P, "-")) +X <- MASS::mvrnorm(N, rep(0, P), sigma) +colnames(X) <- paste0("SNP", 1:P) +L <- 3 +true_beta <- matrix(0, P, L) +true_beta[10, 1] <- 0.5 +true_beta[10, 2] <- 0.4 +true_beta[50, 2] <- 0.3 +true_beta[80, 3] <- 0.6 +Y <- matrix(0, N, L) +for (l in 1:L) { + Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) +} +res <- colocboost(X = X, Y = Y) +cos_res <- get_cos(res, coverage = 0.8) +get_cos_purity(cos_res$cos, X = X) + +} +\seealso{ +Other colocboost_utilities: +\code{\link{get_cormat}()}, +\code{\link{get_cos}()}, +\code{\link{get_hierarchical_clusters}()} +} +\concept{colocboost_utilities} diff --git a/man/get_cos_summary.Rd b/man/get_cos_summary.Rd index 88e9525..f40e57a 100644 --- a/man/get_cos_summary.Rd +++ b/man/get_cos_summary.Rd @@ -63,6 +63,7 @@ get_cos_summary(res) } \seealso{ Other colocboost_inference: -\code{\link{get_robust_colocalization}()} +\code{\link{get_robust_colocalization}()}, +\code{\link{get_ucos_summary}()} } \concept{colocboost_inference} diff --git a/man/get_hierarchical_clusters.Rd b/man/get_hierarchical_clusters.Rd new file mode 100644 index 0000000..b63e01b --- /dev/null +++ b/man/get_hierarchical_clusters.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/colocboost_inference.R +\name{get_hierarchical_clusters} +\alias{get_hierarchical_clusters} +\title{Perform modularity-based hierarchical clustering} +\usage{ +get_hierarchical_clusters(cormat, min_cluster_corr = 0.8) +} +\arguments{ +\item{cormat}{A correlation matrix.} + +\item{min_cluster_corr}{The small correlation for the weights distributions across different iterations to be decided having only one cluster. Default is 0.8.} +} +\value{ +A list containing: +\item{cluster}{A binary matrix indicating the cluster membership of each variable.} +\item{Q_modularity}{The modularity values for the identified clusters.} +} +\description{ +This function performs a modularity-based hierarchical clustering approach to identify clusters from a correlation matrix (LD matrix). +} +\examples{ +# Example usage +set.seed(1) +N <- 100 +P <- 4 +sigma <- matrix(0.2, nrow = P, ncol = P) +diag(sigma) <- 1 +sigma[1:2, 1:2] <- 0.9 +sigma[3:4, 3:4] <- 0.9 +X <- MASS::mvrnorm(N, rep(0, P), sigma) +cormat <- get_cormat(X) +clusters <- get_hierarchical_clusters(cormat) +clusters$cluster +clusters$Q_modularity + +} +\seealso{ +Other colocboost_utilities: +\code{\link{get_cormat}()}, +\code{\link{get_cos}()}, +\code{\link{get_cos_purity}()} +} +\concept{colocboost_utilities} diff --git a/man/get_robust_colocalization.Rd b/man/get_robust_colocalization.Rd index d5d9b68..7f17ec3 100644 --- a/man/get_robust_colocalization.Rd +++ b/man/get_robust_colocalization.Rd @@ -65,6 +65,7 @@ filter_res$cos_details$cos$cos_index } \seealso{ Other colocboost_inference: -\code{\link{get_cos_summary}()} +\code{\link{get_cos_summary}()}, +\code{\link{get_ucos_summary}()} } \concept{colocboost_inference} diff --git a/man/get_ucos_summary.Rd b/man/get_ucos_summary.Rd new file mode 100644 index 0000000..d2d676e --- /dev/null +++ b/man/get_ucos_summary.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/colocboost_output.R +\name{get_ucos_summary} +\alias{get_ucos_summary} +\title{Get trait-specific summary table from a ColocBoost output.} +\usage{ +get_ucos_summary(cb_output, outcome_names = NULL, region_name = NULL) +} +\arguments{ +\item{cb_output}{Output object from \code{colocboost} analysis} + +\item{outcome_names}{Optional vector of names of outcomes, which has the same order as Y in the original analysis.} + +\item{region_name}{Optional character string. When provided, adds a column with this gene name to the output table for easier filtering in downstream analyses.} +} +\value{ +A summary table for trait-specific, uncolocalized associations with the following columns: +\item{outcomes}{Outcome being analyzed} +\item{ucos_id}{Unique identifier for trait-specific confidence sets} +\item{purity}{Minimum absolute correlation of variables within trait-specific confidence sets} +\item{top_variable}{The variable with highest variant-level probability of association (VPA)} +\item{top_variable_vpa}{Variant-level probability of association (VPA) for the top variable} +\item{ucos_npc}{Normalized probability of causal association for the trait-specific confidence set} +\item{n_variables}{Number of variables in trait-specific confidence set} +\item{ucos_index}{Indices of variables in the trait-specific confidence set} +\item{ucos_variables}{List of variables in the trait-specific confidence set} +\item{ucos_variables_vpa}{Variant-level probability of association (VPA) for all variables in the confidence set} +\item{region_name}{Region name if provided through the region_name parameter} +} +\description{ +\code{get_ucos_summary} produces a trait-specific summary table for uncolocalized (single-trait) +associations from ColocBoost results. This is particularly useful for examining trait-specific signals +or for summarizing results from single-trait FineBoost analyses. +} +\examples{ +# colocboost example with single trait analysis +set.seed(1) +N <- 1000 +P <- 100 +# Generate X with LD structure +sigma <- 0.9^abs(outer(1:P, 1:P, "-")) +X <- MASS::mvrnorm(N, rep(0, P), sigma) +colnames(X) <- paste0("SNP", 1:P) +L <- 1 # Only one trait for single-trait analysis +true_beta <- matrix(0, P, L) +true_beta[10, 1] <- 0.5 # SNP10 affects the trait +true_beta[50, 1] <- 0.2 # SNP11 also affects the trait but with lower effect +Y <- X \%*\% true_beta + rnorm(N, 0, 1) +res <- colocboost(X = X, Y = Y, output_level = 2) +# Get the trait-specifc effect summary +get_ucos_summary(res) + +} +\seealso{ +Other colocboost_inference: +\code{\link{get_cos_summary}()}, +\code{\link{get_robust_colocalization}()} +} +\concept{colocboost_inference} diff --git a/tests/testthat/test_corner_cases.R b/tests/testthat/test_corner_cases.R index 037198d..dcd9d68 100644 --- a/tests/testthat/test_corner_cases.R +++ b/tests/testthat/test_corner_cases.R @@ -20,9 +20,16 @@ generate_edge_case_data <- function(case = "missing_values", # Generate true effects with shared causal variant true_beta <- matrix(0, p, L) - true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 - true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) - true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 + if (L == 1) { + # Single trait case + true_beta[5, 1] <- 0.5 # SNP5 affects the trait + } else { + # Multi-trait case + true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 + true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) + true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 + } + # Generate Y with some noise Y <- matrix(0, n, L) @@ -64,7 +71,7 @@ generate_edge_case_data <- function(case = "missing_values", } else if (case == "high_correlation") { # Create highly correlated outcomes Y[, 2] <- 0.9 * Y[, 1] + rnorm(n, 0, 0.3) - } + } # Return test objects list( @@ -77,7 +84,6 @@ generate_edge_case_data <- function(case = "missing_values", # Test colocboost handling of missing values test_that("colocboost handles missing values in Y", { - skip_on_cran() # Generate data with missing values test_data <- generate_edge_case_data("missing_values") @@ -102,7 +108,6 @@ test_that("colocboost handles missing values in Y", { # Test colocboost with different sample sizes test_that("colocboost handles different sample sizes", { - skip_on_cran() # Generate data with different sample sizes test_data <- generate_edge_case_data("different_samples") @@ -134,7 +139,6 @@ test_that("colocboost handles different sample sizes", { # Test colocboost with different variant sets test_that("colocboost handles different variant sets", { - skip_on_cran() # Generate data with different variant sets test_data <- generate_edge_case_data("different_variants") @@ -155,8 +159,7 @@ test_that("colocboost handles different variant sets", { # Test colocboost with no true colocalization test_that("colocboost correctly identifies absence of colocalization", { - skip_on_cran() - + # Generate data with no colocalization test_data <- generate_edge_case_data("no_colocalization") @@ -185,9 +188,9 @@ test_that("colocboost correctly identifies absence of colocalization", { )) }) + # Test colocboost with highly correlated traits test_that("colocboost handles highly correlated traits", { - skip_on_cran() # Generate data with correlated traits test_data <- generate_edge_case_data("high_correlation") @@ -215,7 +218,6 @@ test_that("colocboost handles highly correlated traits", { # Test with very small dataset test_that("colocboost handles very small datasets", { - skip_on_cran() # Create tiny dataset set.seed(123) @@ -240,7 +242,6 @@ test_that("colocboost handles very small datasets", { # Test with custom parameter settings test_that("colocboost works with custom parameters", { - skip_on_cran() # Create simple dataset set.seed(123) @@ -271,7 +272,6 @@ test_that("colocboost works with custom parameters", { # Test focal outcome functionality test_that("colocboost prioritizes focal outcome correctly", { - skip_on_cran() # Generate test data set.seed(123) diff --git a/tests/testthat/test_inference.R b/tests/testthat/test_inference.R index 863456a..ce65e10 100644 --- a/tests/testthat/test_inference.R +++ b/tests/testthat/test_inference.R @@ -14,11 +14,19 @@ generate_test_result <- function(n = 100, p = 20, L = 2, seed = 42) { X <- MASS::mvrnorm(n, rep(0, p), sigma) colnames(X) <- paste0("SNP", 1:p) - # Generate true effects - create a shared causal variant + # Generate true effects based on the number of traits true_beta <- matrix(0, p, L) - true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 - true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) - true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 + + if (L == 1) { + # Single trait case + true_beta[5, 1] <- 0.5 # SNP5 affects the trait + true_beta[10, 1] <- 0.3 # SNP10 also affects the trait + } else { + # Multi-trait case + true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 + true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) + true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 + } # Generate Y with some noise Y <- matrix(0, n, L) @@ -26,23 +34,30 @@ generate_test_result <- function(n = 100, p = 20, L = 2, seed = 42) { Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) } - # Convert Y to list - Y_list <- list(Y[,1], Y[,2]) - X_list <- list(X, X) + # Prepare input for colocboost based on L + if (L == 1) { + # For single trait, Y should be a vector + Y_input <- Y[,1] + X_input <- X + } else { + # For multiple traits, convert to list format + Y_input <- lapply(1:L, function(l) Y[,l]) + X_input <- replicate(L, X, simplify = FALSE) + } # Run colocboost with minimal parameters to get a model object suppressWarnings({ result <- colocboost( - X = X_list, - Y = Y_list, + X = X_input, + Y = Y_input, M = 5, # Small number of iterations for faster testing output_level = 3 # Include full model details ) }) - result + + return(result) } - # Test colocboost_plot function test_that("colocboost_plot handles different plot options", { @@ -95,4 +110,142 @@ test_that("get_robust_colocalization filters results correctly", { # With p-value threshold expect_error(get_robust_colocalization(cb_res, pvalue_cutoff = 0.05), NA) +}) + +# Test for get_hierarchical_clusters +test_that("get_hierarchical_clusters functions correctly", { + # Test case 1: Simple 2x2 correlation matrix with high correlation + set.seed(123) + cormat_high <- matrix(c(1, 0.95, 0.95, 1), nrow = 2) + result_high <- get_hierarchical_clusters(cormat_high) + + # Should return a single cluster since correlation > min_cluster_corr + expect_equal(ncol(result_high$cluster), 1) + expect_equal(nrow(result_high$cluster), 2) + expect_equal(sum(result_high$cluster), 2) # All items in one cluster + + # Test case 2: Matrix with two distinct clusters + set.seed(456) + N <- 100 + P <- 4 + # Generate correlation structure with two distinct clusters + sigma <- matrix(0.2, nrow = P, ncol = P) + diag(sigma) <- 1 + sigma[1:2, 1:2] <- 0.9 + sigma[3:4, 3:4] <- 0.9 + X <- MASS::mvrnorm(N, rep(0, P), sigma) + cormat_two <- cor(X) + + result_two <- get_hierarchical_clusters(cormat_two) + + # Should identify two clusters + expect_equal(ncol(result_two$cluster), 2) + expect_equal(nrow(result_two$cluster), P) + + # Test case 3: Single variable edge case + cormat_single <- matrix(1, nrow = 1) + result_single <- get_hierarchical_clusters(cormat_single) + + expect_equal(ncol(result_single$cluster), 1) + expect_equal(nrow(result_single$cluster), 1) + + # Test case 4: Larger matrix with multiple clusters + set.seed(789) + P <- 10 + sigma_large <- matrix(0.1, nrow = P, ncol = P) + diag(sigma_large) <- 1 + # Create 3 distinct clusters + sigma_large[1:3, 1:3] <- 0.8 + sigma_large[4:6, 4:6] <- 0.8 + sigma_large[7:10, 7:10] <- 0.8 + X_large <- MASS::mvrnorm(N, rep(0, P), sigma_large) + cormat_large <- cor(X_large) + + result_large <- get_hierarchical_clusters(cormat_large, min_cluster_corr = 0.7) + + # Should identify 3 clusters + expect_equal(ncol(result_large$cluster), 3) + expect_equal(nrow(result_large$cluster), P) + + # Test case 5: Test with different min_cluster_corr + result_diff_threshold <- get_hierarchical_clusters(cormat_two, min_cluster_corr = 0.5) + + # With lower threshold, might merge clusters + expect_true(ncol(result_diff_threshold$cluster) <= 2) + + # Test get_modularity directly + # Test case 6: Test modularity calculation + B <- matrix(c(1, 1, 0, 0, 0, 0, 1, 1), ncol = 2) + W <- matrix(c( + 1.0, 0.9, 0.2, 0.1, + 0.9, 1.0, 0.3, 0.2, + 0.2, 0.3, 1.0, 0.8, + 0.1, 0.2, 0.8, 1.0 + ), nrow = 4) + + modularity <- get_modularity(W, B) + expect_type(modularity, "double") + expect_true(!is.na(modularity)) + + # Test case 7: Edge cases for get_modularity + # Empty matrix + W_empty <- matrix(0, nrow = 2, ncol = 2) + B_empty <- matrix(c(1, 1), ncol = 1) + mod_empty <- get_modularity(W_empty, B_empty) + expect_equal(mod_empty, 0) + + # Matrix with only positive weights + W_pos <- matrix(c(1, 0.5, 0.5, 1), nrow = 2) + B_pos <- matrix(c(1, 1), ncol = 1) + mod_pos <- get_modularity(W_pos, B_pos) + expect_true(!is.na(mod_pos)) + + # Matrix with only negative weights + W_neg <- matrix(c(1, -0.5, -0.5, 1), nrow = 2) + B_neg <- matrix(c(1, 1), ncol = 1) + mod_neg <- get_modularity(W_neg, B_neg) + expect_true(!is.na(mod_neg)) + + # Test get_n_cluster + # Test case 8: Test cluster number determination + hc <- hclust(as.dist(1 - cormat_two)) + cluster_result <- get_n_cluster(hc, cormat_two) + + expect_type(cluster_result, "list") + expect_true("n_cluster" %in% names(cluster_result)) + expect_true("Qmodularity" %in% names(cluster_result)) + + # Test case 9: All correlations above threshold + cormat_all_high <- matrix(0.9, nrow = 3, ncol = 3) + diag(cormat_all_high) <- 1 + hc_high <- hclust(as.dist(1 - cormat_all_high)) + result_all_high <- get_n_cluster(hc_high, cormat_all_high, min_cluster_corr = 0.85) + + expect_equal(result_all_high$n_cluster, 1) +}) + +# Test get_ucos_summary function +test_that("get_ucos_summary handles different parameters", { + + # Generate a test colocboost results + cb_res <- generate_test_result() + + # Basic summary call + expect_error(get_ucos_summary(cb_res), NA) + + # With custom outcome names + expect_error(get_ucos_summary(cb_res, outcome_names = c("Trait1", "Trait2", "Trait3")), NA) + + # With region name + summary_with_region <- get_ucos_summary(cb_res, region_name = "TestGene") + + # If summary is not NULL, check for region column + if (!is.null(summary_with_region)) { + expect_true("region_name" %in% colnames(summary_with_region)) + expect_equal(summary_with_region$region_name[1], "TestGene") + } + + # Test with single trait analysis result + cb_single <- generate_test_result(L=1) + single_summary <- get_ucos_summary(cb_single, outcome_names = "SingleTrait") }) \ No newline at end of file diff --git a/tests/testthat/test_model.R b/tests/testthat/test_model.R index 68420ab..23041ed 100644 --- a/tests/testthat/test_model.R +++ b/tests/testthat/test_model.R @@ -59,7 +59,6 @@ generate_test_model <- function(n = 100, p = 20, L = 2, seed = 42) { # Test for colocboost_init_data test_that("colocboost_init_data correctly initializes data", { - skip_on_cran() # Generate test data set.seed(42) @@ -92,7 +91,6 @@ test_that("colocboost_init_data correctly initializes data", { # Test colocboost_assemble function test_that("colocboost_assemble processes model results", { - skip_on_cran() # Generate a test model cb_obj <- generate_test_model() @@ -112,7 +110,6 @@ test_that("colocboost_assemble processes model results", { # Test for colocboost_workhorse test_that("colocboost_workhorse performs boosting iterations", { - skip_on_cran() # Generate a test model cb_obj <- generate_test_model() diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index f401ce9..dccc5c2 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -182,3 +182,72 @@ test_that("get_merge_ordered_with_indices merges vectors", { result2 <- get_merge_ordered_with_indices(vector_list2) expect_equal(result2, c("a", "b", "c", "d", "e")) }) + +# Test for get_cos_purity function +test_that("get_cos_purity calculates correct purity statistics", { + # Set up test data + set.seed(123) + N <- 100 + P <- 20 + + # Generate X with correlation structure + sigma <- 0.8^abs(outer(1:P, 1:P, "-")) + X <- MASS::mvrnorm(N, rep(0, P), sigma) + colnames(X) <- paste0("SNP", 1:P) + + # Create test CoS lists + cos1 <- c(1, 2, 3) + cos2 <- c(10, 11, 12) + cos3 <- c(5, 6, 7, 8) + cos_list <- list(trait1 = cos1, trait2 = cos2, trait3 = cos3) + + # Calculate X correlation matrix for testing both methods + Xcorr <- cor(X) + + # Test with X matrix input + result_X <- get_cos_purity(cos_list, X = X) + + # Test with correlation matrix input + result_Xcorr <- get_cos_purity(cos_list, Xcorr = Xcorr) + + # Basic structure tests + expect_type(result_X, "list") + expect_equal(length(result_X), 3) + expect_named(result_X, c("min_abs_cor", "max_abs_cor", "median_abs_cor")) + + # Check dimensions + expect_equal(dim(result_X$min_abs_cor), c(3, 3)) + expect_equal(rownames(result_X$min_abs_cor), c("trait1", "trait2", "trait3")) + + # Test symmetry of results + expect_equal(result_X$min_abs_cor[1, 2], result_X$min_abs_cor[2, 1]) + expect_equal(result_X$max_abs_cor[1, 3], result_X$max_abs_cor[3, 1]) + + # Test that X and Xcorr methods give same results (within numerical precision) + expect_equal(result_X$min_abs_cor, result_Xcorr$min_abs_cor, tolerance = 1e-10) + expect_equal(result_X$max_abs_cor, result_Xcorr$max_abs_cor, tolerance = 1e-10) + expect_equal(result_X$median_abs_cor, result_Xcorr$median_abs_cor, tolerance = 1e-10) + + # Test single CoS case + single_cos <- list(single = cos1) + result_single <- get_cos_purity(single_cos, X = X) + expect_equal(dim(result_single$min_abs_cor), c(1, 1)) + expect_named(result_single, c("min_abs_cor", "max_abs_cor", "median_abs_cor")) + + # Test empty CoS case + expect_null(get_cos_purity(list(), X = X)) + + # Test n_purity parameter + result_limited <- get_cos_purity(cos_list, X = X, n_purity = 2) + # The result still has the same structure + expect_type(result_limited, "list") + expect_equal(length(result_limited), 3) + + # Test error cases + expect_error(get_cos_purity(cos_list), "Either X or Xcorr must be provided") + + # Test converting numeric input to list + num_cos <- 1:5 + result_num <- get_cos_purity(num_cos, X = X) + expect_equal(dim(result_num$min_abs_cor), c(1, 1)) +}) diff --git a/vignettes/Interpret_ColocBoost_Output.Rmd b/vignettes/Interpret_ColocBoost_Output.Rmd index fdf29b2..f734faf 100644 --- a/vignettes/Interpret_ColocBoost_Output.Rmd +++ b/vignettes/Interpret_ColocBoost_Output.Rmd @@ -213,6 +213,7 @@ res$model_info$jk_update[c(5:10,36:38), ] - **`outcome_profile_loglik`**: trait-specifc profile log-likelihood changes over boosting rounds. ```{r profile_loglik} +# Plotting joint profile log-likelihood (blue) and trait-specific profile log-likelihood (red). par(mfrow=c(2,3),mar=c(4,4,2,1)) plot(res$model_info$profile_loglik, type="p", col="#3366CC", lwd=2, xlab="", ylab="Joint Profile") for(i in 1:5){ @@ -225,6 +226,7 @@ plot(res$model_info$outcome_profile_loglik[[i]], type="p", col="#CC3333", lwd=2, ```{r objetive-proximity} +# Plotting trait-specific proximity objective par(mfrow=c(2,3), mar=c(4,4,2,1)) for(i in 1:5){ plot(res$model_info$outcome_proximity_obj[[i]], type="p", col="#3366CC", lwd=2, xlab="", ylab="Trait-specific Objective", main = paste0("Trait ", i)) @@ -232,6 +234,7 @@ plot(res$model_info$outcome_proximity_obj[[i]], type="p", col="#3366CC", lwd=2, ``` ```{r objetive-best} +# Plotting trait-specific objective at the best update variant par(mfrow=c(2,3), mar=c(4,4,2,1)) for(i in 1:5){ plot(res$model_info$outcome_coupled_best_update_obj[[i]], type="p", col="#CC3333", lwd=2, xlab="", ylab=paste0("Objetive at best update variant"), main = paste0("Trait ", i)) diff --git a/vignettes/Partial_Overlap_Variants.Rmd b/vignettes/Partial_Overlap_Variants.Rmd index 91c2346..fc8fadf 100644 --- a/vignettes/Partial_Overlap_Variants.Rmd +++ b/vignettes/Partial_Overlap_Variants.Rmd @@ -85,3 +85,21 @@ res$data_info$n_variables # Plotting the results colocboost_plot(res) ``` + + +In disease-prioritized colocalization analysis with a focal trait, `ColocBoost` recommends prioritizing variants in the focal trait as the default setting. +For the example above, if we consider trait 3 as the focal trait, only variants present in trait 3 will be included in the analysis. +This ensures that the analysis focuses on variants relevant to the focal trait while accounting for partial overlaps across other traits. +If you want to include all variants across traits, you can set `focal_outcome_variables = FALSE` to disable this default behavior. + +```{r run-code-focal} +# Run colocboost +res <- colocboost(X = X, Y = Y, focal_outcome_idx = 3) + +# The number of variants in the analysis +res$data_info$n_variables + +# Plotting the results +colocboost_plot(res) +``` + From 1a30861b16fcdca8d23e3ef3ddcdb10c1e7319b4 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Fri, 18 Apr 2025 13:14:01 -0400 Subject: [PATCH 2/4] unit test pass --- R/colocboost_output.R | 4 +- man/get_ucos_summary.Rd | 2 +- tests/testthat/test_utils.R | 173 ++++++++++++++++++++++ vignettes/Interpret_ColocBoost_Output.Rmd | 2 +- 4 files changed, 177 insertions(+), 4 deletions(-) diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 4ed888f..2e7733d 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -434,7 +434,7 @@ get_robust_colocalization <- function(cb_output, #' L <- 1 # Only one trait for single-trait analysis #' true_beta <- matrix(0, P, L) #' true_beta[10, 1] <- 0.5 # SNP10 affects the trait -#' true_beta[50, 1] <- 0.2 # SNP11 also affects the trait but with lower effect +#' true_beta[80, 1] <- 0.2 # SNP11 also affects the trait but with lower effect #' Y <- X %*% true_beta + rnorm(N, 0, 1) #' res <- colocboost(X = X, Y = Y, output_level = 2) #' # Get the trait-specifc effect summary @@ -552,7 +552,7 @@ get_cos <- function(cb_output, coverage = 0.95, X = NULL, Xcorr = NULL, n_purity if (is.null(median_abs_corr)) { is_pure <- which(within_purity[, 1] >= min_abs_corr) } else { - is_pure <- which(within_purity[, 1] >= min_abs_corr | purity[, 3] >= median_abs_corr) + is_pure <- which(within_purity[, 1] >= min_abs_corr | within_purity[, 3] >= median_abs_corr) } # Filter impured CoS if (length(is_pure) == 0) { diff --git a/man/get_ucos_summary.Rd b/man/get_ucos_summary.Rd index d2d676e..00e4b66 100644 --- a/man/get_ucos_summary.Rd +++ b/man/get_ucos_summary.Rd @@ -44,7 +44,7 @@ colnames(X) <- paste0("SNP", 1:P) L <- 1 # Only one trait for single-trait analysis true_beta <- matrix(0, P, L) true_beta[10, 1] <- 0.5 # SNP10 affects the trait -true_beta[50, 1] <- 0.2 # SNP11 also affects the trait but with lower effect +true_beta[80, 1] <- 0.2 # SNP11 also affects the trait but with lower effect Y <- X \%*\% true_beta + rnorm(N, 0, 1) res <- colocboost(X = X, Y = Y, output_level = 2) # Get the trait-specifc effect summary diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index dccc5c2..2a75356 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -1,5 +1,63 @@ library(testthat) +# Utility function to generate a simple colocboost results +generate_test_result <- function(n = 100, p = 20, L = 2, seed = 42) { + set.seed(seed) + + # Generate X with LD structure + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + # Generate true effects based on the number of traits + true_beta <- matrix(0, p, L) + + if (L == 1) { + # Single trait case + true_beta[5, 1] <- 0.5 # SNP5 affects the trait + true_beta[10, 1] <- 0.3 # SNP10 also affects the trait + } else { + # Multi-trait case + true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 + true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) + true_beta[10, 2] <- 0.3 # SNP10 only affects trait 2 + } + + # Generate Y with some noise + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + # Prepare input for colocboost based on L + if (L == 1) { + # For single trait, Y should be a vector + Y_input <- Y[,1] + X_input <- X + } else { + # For multiple traits, convert to list format + Y_input <- lapply(1:L, function(l) Y[,l]) + X_input <- replicate(L, X, simplify = FALSE) + } + + # Run colocboost with minimal parameters to get a model object + suppressWarnings({ + result <- colocboost( + X = X_input, + Y = Y_input, + M = 5, # Small number of iterations for faster testing + output_level = 3 # Include full model details + ) + }) + + return(result) +} + # Test for get_integrated_weight function test_that("get_integrated_weight correctly integrates weights", { # Create test weights matrix @@ -251,3 +309,118 @@ test_that("get_cos_purity calculates correct purity statistics", { result_num <- get_cos_purity(num_cos, X = X) expect_equal(dim(result_num$min_abs_cor), c(1, 1)) }) + +# Test for get_cos function +test_that("get_cos extracts CoS correctly with generated test results", { + # Generate test data + cb_output <- generate_test_result() + + # Test basic functionality with default coverage + result_default <- get_cos(cb_output, coverage = 0.95) + + # Check structure + expect_type(result_default, "list") + expect_named(result_default, c("cos", "cos_purity")) + expect_type(result_default$cos, "list") + expect_null(result_default$cos_purity) # Should be NULL since X and Xcorr not provided + + # Get names of CoS in the result + cos_names <- names(result_default$cos) + expect_true(all(grepl("_coverage_0.95$", cos_names))) # Names should include coverage value + + # Test with different coverage values + result_high <- get_cos(cb_output, coverage = 0.99) + result_low <- get_cos(cb_output, coverage = 0.75) + + # Higher coverage should include more SNPs + for (cos_name_base in gsub("_coverage_0.95$", "", cos_names)) { + high_cos_name <- paste0(cos_name_base, "_coverage_0.99") + low_cos_name <- paste0(cos_name_base, "_coverage_0.75") + default_cos_name <- paste0(cos_name_base, "_coverage_0.95") + + # Check if names exist in all results + if (high_cos_name %in% names(result_high$cos) && + low_cos_name %in% names(result_low$cos)) { + # Higher coverage should have equal or more SNPs + expect_true( + length(result_high$cos[[high_cos_name]]) >= + length(result_default$cos[[default_cos_name]]) + ) + # Lower coverage should have equal or fewer SNPs + expect_true( + length(result_low$cos[[low_cos_name]]) <= + length(result_default$cos[[default_cos_name]]) + ) + } + } + + # Create X and Xcorr for further testing + # Extract SNP names from the first CoS + first_cos <- result_default$cos[[1]] + all_snps <- unique(unlist(result_default$cos)) + max_snp_index <- max(as.integer(gsub("SNP", "", all_snps))) + + # Create a small genotype matrix with the necessary SNPs + set.seed(123) + N <- 100 + P <- max(max_snp_index + 10, 50) # Ensure P is large enough + + # Generate X with LD structure + sigma <- 0.8^abs(outer(1:P, 1:P, "-")) + X <- MASS::mvrnorm(N, rep(0, P), sigma) + colnames(X) <- paste0("SNP", 1:P) + + # Calculate correlation matrix + Xcorr <- cor(X) + + # Test with X provided + result_with_X <- get_cos(cb_output, coverage = 0.95, X = X) + + # Check structure with X provided + expect_type(result_with_X, "list") + expect_named(result_with_X, c("cos", "cos_purity")) + expect_type(result_with_X$cos, "list") + expect_type(result_with_X$cos_purity, "list") + + # Check purity matrices + expect_named(result_with_X$cos_purity, c("min_abs_cor", "max_abs_cor", "median_abs_cor")) + expect_true(all(sapply(result_with_X$cos_purity, is.matrix))) + + # Test with Xcorr provided + result_with_Xcorr <- get_cos(cb_output, coverage = 0.95, Xcorr = Xcorr) + + # Check structure with Xcorr provided + expect_type(result_with_Xcorr, "list") + expect_named(result_with_Xcorr, c("cos", "cos_purity")) + expect_type(result_with_Xcorr$cos, "list") + expect_type(result_with_Xcorr$cos_purity, "list") + + # Test with min_abs_corr filtering + result_high_purity <- get_cos(cb_output, coverage = 0.95, X = X, min_abs_corr = 0.8) + + # High purity threshold might remove some CoS + expect_type(result_high_purity, "list") + expect_named(result_high_purity, c("cos", "cos_purity")) + + # If result_high_purity$cos is not NULL, it should have fewer or equal CoS compared to result_with_X + if (!is.null(result_high_purity$cos)) { + expect_true(length(result_high_purity$cos) <= length(result_with_X$cos)) + } + + # Test with median_abs_corr filtering + result_median_purity <- get_cos(cb_output, coverage = 0.95, X = X, min_abs_corr = 0.4, median_abs_corr = 0.7) + + # Should have structure similar to other results + expect_type(result_median_purity, "list") + expect_named(result_median_purity, c("cos", "cos_purity")) + + # Test empty colocalization results + empty_cb_output <- cb_output + empty_cb_output$cos_details$cos <- NULL + + expect_warning( + result_empty <- get_cos(empty_cb_output, coverage = 0.95), + "No colocalization results in this region!" + ) + expect_equal(result_empty, list("cos" = NULL, "cos_purity" = NULL)) +}) \ No newline at end of file diff --git a/vignettes/Interpret_ColocBoost_Output.Rmd b/vignettes/Interpret_ColocBoost_Output.Rmd index f734faf..a424277 100644 --- a/vignettes/Interpret_ColocBoost_Output.Rmd +++ b/vignettes/Interpret_ColocBoost_Output.Rmd @@ -206,7 +206,7 @@ res$cos_details$cos_top_variables ```{r jk_update} # Pick arbitrary SEC updates, see entire update in advance -res$model_info$jk_update[c(5:10,36:38), ] +res$model_info$jk_star[c(5:10,36:38), ] ``` - **`profile_loglik`**: joint profile log-likelihood changes over boosting rounds. From f95fb1e641e81dc65b68646e95ca1db32661a2a0 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Fri, 18 Apr 2025 18:12:42 -0400 Subject: [PATCH 3/4] update colocboost plot --- R/colocboost_inference.R | 7 +- R/colocboost_output.R | 15 +- R/colocboost_plot.R | 150 ++++++-- man/colocboost_plot.Rd | 21 +- man/get_cormat.Rd | 1 + man/get_hierarchical_clusters.Rd | 1 + tests/testthat/test_inference.R | 38 -- tests/testthat/test_plot.R | 324 ++++++++++++++++++ .../Disease_Prioritized_Colocalization.Rmd | 8 +- vignettes/Interpret_ColocBoost_Output.Rmd | 11 +- vignettes/Visualization_ColocBoost_Output.Rmd | 65 +++- 11 files changed, 555 insertions(+), 86 deletions(-) create mode 100644 tests/testthat/test_plot.R diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R index c9a42f5..e89acfa 100644 --- a/R/colocboost_inference.R +++ b/R/colocboost_inference.R @@ -44,6 +44,7 @@ colocboost_post_inference <- function() { #' X <- MASS::mvrnorm(N, rep(0, P), sigma) #' cormat <- get_cormat(X) #' +#' @keywords cb_post_inference #' @family colocboost_utilities #' @export get_cormat <- function(X, intercepte = TRUE) { @@ -85,6 +86,7 @@ get_cormat <- function(X, intercepte = TRUE) { #' clusters$cluster #' clusters$Q_modularity #' +#' @keywords cb_post_inference #' @family colocboost_utilities #' @export get_hierarchical_clusters <- function(cormat, min_cluster_corr = 0.8) { @@ -206,8 +208,6 @@ w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverag } - - #' Function to remove the spurious signals #' @importFrom utils head tail #' @keywords cb_post_inference @@ -295,7 +295,7 @@ check_null_post <- function(cb_obj, return(res.tmp) } } - # - add hoc + # - add hoc - for single-trait fine-mapping results cut <- if (length(cb_obj$cb_data) == 1) 0.2 else 1 # ----- null filtering @@ -323,7 +323,6 @@ check_null_post <- function(cb_obj, if (min(cb_obj$cb_model[[j]]$multi_correction_univariate[cs_variants]) >= cut) { change <- 0 } - # ------ # - check_null check_cs_change[i, j] <- change / diff(range(cb_obj$cb_model[[j]]$profile_loglike_each)) cs_change[i, j] <- change # / diff(range(cb_obj$cb_model[[j]]$profile_loglike_each)) diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 2e7733d..3d7a0c8 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -249,10 +249,14 @@ get_robust_colocalization <- function(cb_output, cb_output$cos_details <- cos_details return(cb_output) } + get_npuc <- function(npc_outcome) { + max_idx <- which.max(npc_outcome) + npc_outcome[max_idx] * prod(1 - npc_outcome[-max_idx]) + } cos_details <- cb_output$cos_details coloc_outcome_index <- coloc_outcome <- list() - colocset_names <- cos_min_npc_outcome <- c() + colocset_names <- cos_min_npc_outcome <- cos_npc <- c() for (i in 1:length(cos_details$cos$cos_index)) { cos_npc_config <- cos_details$cos_outcomes_npc[[i]] npc_outcome <- cos_npc_config$npc_outcome @@ -277,9 +281,15 @@ get_robust_colocalization <- function(cb_output, coloc_outcome_index[[i]] <- 0 coloc_outcome[[i]] <- 0 cos_min_npc_outcome[i] <- 0 + cos_npc[i] <- 0 colocset_names[i] <- paste0("remove", i) } else { cos_min_npc_outcome[i] <- min(npc_outcome[pos_pass]) + if (length(pos_pass) > 1){ + cos_npc[i] <- 1 - get_npuc(npc_outcome[pos_pass]) + } else { + cos_npc[i] <- 0 # since single-trait remain + } coloc_outcome_index[[i]] <- sort(cos_npc_config$outcomes_index[pos_pass]) coloc_outcome[[i]] <- rownames(cos_npc_config)[pos_pass] colocset_names[i] <- paste0("cos", i, ":", paste0(paste0("y", coloc_outcome_index[[i]]), collapse = "_")) @@ -288,9 +298,10 @@ get_robust_colocalization <- function(cb_output, } } } - names(coloc_outcome) <- names(coloc_outcome_index) <- names(cos_min_npc_outcome) <- colocset_names + names(coloc_outcome) <- names(coloc_outcome_index) <- names(cos_min_npc_outcome) <- names(cos_npc) <- colocset_names cos_details$cos_outcomes <- list("outcome_index" = coloc_outcome_index, "outcome_name" = coloc_outcome) cos_details$cos_min_npc_outcome <- cos_min_npc_outcome + cos_details$cos_npc <- cos_npc # - VCP cos_weights <- lapply(1:length(cos_details$cos_outcomes$outcome_index), function(idx) { diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index 1087cdb..7a107bb 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -7,9 +7,10 @@ #' @param cb_output Output object from `colocboost` analysis #' @param y Specifies the y-axis values, default is "log10p" for -log10 transformed marginal association p-values. #' @param grange Optional plotting range of x-axis to zoom in to a specific region. -#' @param plot_focal_only Logical, if TRUE only plots colocalization with focal outcome, default is FALSE. #' @param plot_cos_idx Optional indices of CoS to plot #' @param outcome_idx Optional indices of outcomes to include in the plot. \code{outcome_idx=NULL} to plot only the outcomes having colocalization. +#' @param plot_focal_only Logical, if TRUE only plots colocalization with focal outcome, default is FALSE. +#' @param plot_focal_cos_outocme_only Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE. #' @param points_color Background color for non-colocalized variables, default is "grey80". #' @param cos_color Optional custom colors for CoS. #' @param add_vertical Logical, if TRUE adds vertical lines at specified positions, default is FALSE @@ -21,11 +22,13 @@ #' @param show_cos_to_uncoloc Logical, if TRUE shows colocalization to uncolocalized outcomes to diagnose, default is FALSE #' @param show_cos_to_uncoloc_idx Optional indices for showing CoS to all uncolocalized outcomes #' @param show_cos_to_uncoloc_outcome Optional outcomes for showing CoS to uncolocalized outcomes +#' @param plot_ucos Logical, if TRUE plots also trait-specific (uncolocalized) sets , default is FALSE +#' @param plot_ucos_idx Optional indices of trait-specific (uncolocalized) sets to plot when included #' @param gene_name Optional gene name to display in plot title #' @param ylim_each Logical, if TRUE uses separate y-axis limits for each plot, default is TRUE #' @param outcome_legend_pos Position for outcome legend, default is "top" #' @param outcome_legend_size Size for outcome legend text, default is 1.2 -#' @param cos_legend_pos Position for colocalization set legend, default is "bottomleft" +#' @param cos_legend_pos Proportion of the legend from (left edge, bottom edge), default as (0.05, 0.4) at the left - median position #' @param show_variable Logical, if TRUE displays variant IDs, default is FALSE #' @param lab_style Vector of two numbers for label style (size, boldness), default is c(2, 1) #' @param axis_style Vector of two numbers for axis style (size, boldness), default is c(2, 1) @@ -65,9 +68,10 @@ #' @export colocboost_plot <- function(cb_output, y = "log10p", grange = NULL, - plot_focal_only = FALSE, plot_cos_idx = NULL, outcome_idx = NULL, + plot_focal_only = FALSE, + plot_focal_cos_outocme_only = FALSE, points_color = "grey80", cos_color = NULL, add_vertical = FALSE, @@ -79,11 +83,13 @@ colocboost_plot <- function(cb_output, y = "log10p", show_cos_to_uncoloc = FALSE, show_cos_to_uncoloc_idx = NULL, show_cos_to_uncoloc_outcome = NULL, + plot_ucos = FALSE, + plot_ucos_idx = NULL, gene_name = NULL, ylim_each = TRUE, outcome_legend_pos = "top", - outcome_legend_size = 1.2, - cos_legend_pos = "bottomleft", + outcome_legend_size = 1.8, + cos_legend_pos = c(0.05, 0.4), show_variable = FALSE, lab_style = c(2, 1), axis_style = c(2, 1), @@ -96,12 +102,14 @@ colocboost_plot <- function(cb_output, y = "log10p", # get cb_plot_input data from colocboost results cb_plot_input <- get_input_plot(cb_output, plot_cos_idx = plot_cos_idx, - plot_focal_only = plot_focal_only, variant_coord = variant_coord, outcome_names = outcome_names, + plot_focal_only = plot_focal_only, + plot_focal_cos_outocme_only = plot_focal_cos_outocme_only, show_cos_to_uncoloc = show_cos_to_uncoloc, show_cos_to_uncoloc_idx = show_cos_to_uncoloc_idx, - show_cos_to_uncoloc_outcome = show_cos_to_uncoloc_outcome + show_cos_to_uncoloc_outcome = show_cos_to_uncoloc_outcome, + plot_ucos = plot_ucos, plot_ucos_idx = plot_ucos_idx ) # get initial set up of plot cb_plot_init <- plot_initial(cb_plot_input, @@ -244,7 +252,7 @@ colocboost_plot <- function(cb_output, y = "log10p", if (!is.null(uncoloc)) { p.uncoloc <- sapply(uncoloc$outcome_to_uncoloc, function(idx) sum(idx == iy) != 0) p.uncoloc <- which(p.uncoloc) - texts <- shape_col <- texts_col <- c() + shape_col <- texts_col <- c() for (i.uncoloc in p.uncoloc) { uncoloc_outcome <- uncoloc$outcome_to_uncoloc[[i.uncoloc]] if (iy %in% uncoloc_outcome) { @@ -253,20 +261,32 @@ colocboost_plot <- function(cb_output, y = "log10p", x0 <- intersect(args$x, cs) y1 <- args$y[match(x0, args$x)] points(x0, y1, - pch = 4, col = adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.3), + pch = 4, col = adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.4), cex = 1.5, lwd = 1.5 ) - texts <- c(texts, uncoloc$cos_uncoloc_texts[i.cs]) shape_col <- c(shape_col, adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 1)) texts_col <- c(texts_col, adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.8)) } } + texts <- unlist(sapply(names(uncoloc$outcome_to_uncoloc), function(name) { + positions <- which(uncoloc$outcome_to_uncoloc[[name]] == iy) + if(length(positions) > 0) { + return(uncoloc$cos_uncoloc_texts[[name]][positions]) + } else { + return(NULL) + } + })) if (length(texts) == 0) { next } - legend(cb_plot_init$cos_legend_pos, texts, + + # Get current plot area coordinates + usr <- par("usr") + x_pos <- usr[1] + cb_plot_init$cos_legend_pos[1] * (usr[2] - usr[1]) # 5% from left edge + y_pos <- usr[3] + cb_plot_init$cos_legend_pos[2] * (usr[4] - usr[3]) # 50% from bottom edge + legend(x = x_pos, y = y_pos, texts, bty = "n", col = shape_col, text.col = texts_col, - cex = 1.5, pt.cex = 1.5, pch = 4, x.intersp = 0.1, y.intersp = 0.3 + cex = 1.5, pt.cex = 1.5, pch = 4, x.intersp = 0.1, y.intersp = 0.5 ) } } @@ -293,12 +313,25 @@ colocboost_plot <- function(cb_output, y = "log10p", # get input data for cb_plot get_input_plot <- function(cb_output, plot_cos_idx = NULL, - plot_focal_only = FALSE, variant_coord = FALSE, outcome_names = NULL, + plot_focal_only = FALSE, + plot_focal_cos_outocme_only = FALSE, show_cos_to_uncoloc = FALSE, show_cos_to_uncoloc_idx = NULL, - show_cos_to_uncoloc_outcome = NULL) { + show_cos_to_uncoloc_outcome = NULL, + plot_ucos = FALSE, + plot_ucos_idx = NULL) { + + # check ucos exists + if (plot_ucos && !"ucos_details" %in% names(cb_output)) { + warning( + "Since you want to plot trait-specific (uncolocalized) sets with plot_ucos = TRUE,", + " but there is no output of ucos from colocboost.\n", + " Please run colocboost model with output_level=2!", + " Only show colocalization results if exists." + ) + } # redefined outcome names if (!is.null(outcome_names)) { cb_output$data_info$outcome_info$outcome_names <- outcome_names @@ -330,6 +363,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, cb_output$cos_details$cos_outcomes <- cb_output$ucos_details$ucos_outcomes if (!is.null(cb_output$cos_details$cos$cos_variables)) { cb_output$cos_details$cos_vcp <- cb_output$ucos_details$ucos_weight + vcp <- list(as.numeric(cb_output$vpa)) } if_focal <- rep(TRUE, length(cb_output$cos_details$cos$cos_variables)) } @@ -357,18 +391,27 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, ncos <- length(cb_output$cos_details$cos$cos_index) select_cs <- 1:ncos - if (plot_focal_only) { - if (sum(if_focal) == 0) { - message("No focal CoS, draw all CoS.") - } else { - select_cs <- which(if_focal) + if (!is.null(plot_cos_idx)) { + if (length(setdiff(plot_cos_idx, c(1:ncos))) != 0) { + stop("Please check plot_cos_idx!") } + select_cs <- plot_cos_idx } else { - if (!is.null(plot_cos_idx)) { - if (length(setdiff(plot_cos_idx, c(1:ncos))) != 0) { - stop("please check plot_cos_idx!") + if (plot_focal_only || plot_focal_cos_outocme_only) { + if (sum(if_focal) == 0) { + message("No focal CoS, draw all CoS.") + } else if (plot_focal_only) { + select_cs <- which(if_focal) + } else { # plot_focal_cos_outocme_only is true here + # Get all outcomes colocalized with focal CoS + focal_outcomes <- unique(unlist(coloc_index[if_focal])) + # Find CoS that include at least one of these focal outcomes + cos_with_focal_outcomes <- sapply(coloc_index, function(cos_outcomes) { + length(intersect(cos_outcomes, focal_outcomes)) > 0 + }) + # Extract the indices of these CoS + select_cs <- which(cos_with_focal_outcomes) } - select_cs <- plot_cos_idx } } coloc_variables <- coloc_variables[select_cs] @@ -381,6 +424,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, } else { warnings("No colocalized effects in this region!") } + ncos <- 0 coloc_index <- select_cs <- NULL vcp <- list(rep(0, cb_output$data_info$n_variables)) cos_vcp <- lapply(1:length(analysis_outcome), function(iy) rep(0, cb_output$data_info$n_variables) ) @@ -416,7 +460,42 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, "select_cos" = select_cs ) + # check plot uncolocalizaed confidence sets from ucos_details + if (plot_ucos & cb_output$data_info$n_outcomes > 1){ + ucos_details <- cb_output$ucos_details + if (!is.null(ucos_details)){ + ucos <- ucos_details$ucos$ucos_index + ucos_outcome_index <- ucos_details$ucos_outcomes$outcome_index + ucos_hits <- lapply(ucos, function(x) x[[1]]) + # check inclusion of other options + select_ucos <- 1:length(ucos) + if (!is.null(plot_ucos_idx)) { + if (length(setdiff(plot_cos_idx, select_ucos)) != 0) { + stop("Please check plot_ucos_idx!") + } + select_ucos <- plot_ucos_idx + } else if (plot_focal_cos_outocme_only && sum(if_focal) != 0) { + # Get all outcomes colocalized with focal CoS + focal_outcomes <- unique(unlist(plot_input$coloc_index)) + # Find uCoS that include at least one of these focal outcomes + ucos_with_focal_outcomes <- sapply(ucos_outcome_index, function(ucos_outcomes) { + length(intersect(ucos_outcomes, focal_outcomes)) > 0 + }) + # Extract the indices of these uCoS + select_ucos <- which(ucos_with_focal_outcomes) + } + plot_input$cos <- c(plot_input$cos, ucos[select_ucos]) + plot_input$cos_hits <- c(plot_input$cos_hits, ucos_hits[select_ucos]) + plot_input$coloc_index <- c(plot_input$coloc_index, ucos_outcome_index[select_ucos]) + plot_input$select_cos <- c(plot_input$select_cos, ncos + select_ucos) + } + } + # check if plot cos to uncolocalized outcome + # use the updated coloc_cos and related components from plot_input if available + coloc_cos <- plot_input$cos + coloc_index <- plot_input$coloc_index + coloc_hits <- plot_input$cos_hits if (show_cos_to_uncoloc & !is.null(coloc_cos)) { if (is.null(show_cos_to_uncoloc_idx)) { cos_to_uncoloc <- coloc_cos @@ -467,7 +546,16 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, } } if (!is.null(outcome_to_uncoloc)) { - cos_uncoloc_texts <- rep("Uncolocalized effect", length(outcome_to_uncoloc)) + cos_uncoloc_texts <- lapply(1:length(outcome_to_uncoloc), function(idx){ + cname <- names(outcome_to_uncoloc)[idx] + if (grepl("^ucos", cname)){ + texts_tmp <- "Uncolocalized effect (uCoS)" + } else { + texts_tmp <- "Uncolocalized effect (CoS)" + } + return(rep(texts_tmp, length(outcome_to_uncoloc[[idx]]))) + }) + names(cos_uncoloc_texts) <- names(outcome_to_uncoloc) } else { cos_uncoloc_texts <- NULL } @@ -583,6 +671,13 @@ plot_initial <- function(cb_plot_input, y = "log10p", } else { ymax <- NULL ymin <- rep(0, length(args$y)) + + # Check if ylim_each is FALSE but no ylim is provided + if (!ylim_each) { + warning("Since you want to plot with the same y-axis range (ylim_each=FALSE), but ylim was not provided, ", + "still plotting with trait-specific y ranges (equivalent to ylim_each=TRUE).") + ylim_each <- TRUE + } } if (ylim_each & is.null(ymax)) { ymax <- sapply(plot_data, function(p) { @@ -611,13 +706,6 @@ plot_initial <- function(cb_plot_input, y = "log10p", } else { args$outcome_legend_angle <- 0 } - - if (!(cos_legend_pos %in% c( - "bottomright", "bottom", "bottomleft", "left", - "topleft", "top", "topright", "right", "center" - ))) { - cos_legend_pos <- "bottomleft" - } args$cos_legend_pos <- cos_legend_pos return(args) diff --git a/man/colocboost_plot.Rd b/man/colocboost_plot.Rd index bff458a..8229a47 100644 --- a/man/colocboost_plot.Rd +++ b/man/colocboost_plot.Rd @@ -8,9 +8,10 @@ colocboost_plot( cb_output, y = "log10p", grange = NULL, - plot_focal_only = FALSE, plot_cos_idx = NULL, outcome_idx = NULL, + plot_focal_only = FALSE, + plot_focal_cos_outocme_only = FALSE, points_color = "grey80", cos_color = NULL, add_vertical = FALSE, @@ -22,11 +23,13 @@ colocboost_plot( show_cos_to_uncoloc = FALSE, show_cos_to_uncoloc_idx = NULL, show_cos_to_uncoloc_outcome = NULL, + plot_ucos = FALSE, + plot_ucos_idx = NULL, gene_name = NULL, ylim_each = TRUE, outcome_legend_pos = "top", - outcome_legend_size = 1.2, - cos_legend_pos = "bottomleft", + outcome_legend_size = 1.8, + cos_legend_pos = c(0.05, 0.4), show_variable = FALSE, lab_style = c(2, 1), axis_style = c(2, 1), @@ -41,12 +44,14 @@ colocboost_plot( \item{grange}{Optional plotting range of x-axis to zoom in to a specific region.} -\item{plot_focal_only}{Logical, if TRUE only plots colocalization with focal outcome, default is FALSE.} - \item{plot_cos_idx}{Optional indices of CoS to plot} \item{outcome_idx}{Optional indices of outcomes to include in the plot. \code{outcome_idx=NULL} to plot only the outcomes having colocalization.} +\item{plot_focal_only}{Logical, if TRUE only plots colocalization with focal outcome, default is FALSE.} + +\item{plot_focal_cos_outocme_only}{Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE.} + \item{points_color}{Background color for non-colocalized variables, default is "grey80".} \item{cos_color}{Optional custom colors for CoS.} @@ -69,6 +74,10 @@ colocboost_plot( \item{show_cos_to_uncoloc_outcome}{Optional outcomes for showing CoS to uncolocalized outcomes} +\item{plot_ucos}{Logical, if TRUE plots also trait-specific (uncolocalized) sets , default is FALSE} + +\item{plot_ucos_idx}{Optional indices of trait-specific (uncolocalized) sets to plot when included} + \item{gene_name}{Optional gene name to display in plot title} \item{ylim_each}{Logical, if TRUE uses separate y-axis limits for each plot, default is TRUE} @@ -77,7 +86,7 @@ colocboost_plot( \item{outcome_legend_size}{Size for outcome legend text, default is 1.2} -\item{cos_legend_pos}{Position for colocalization set legend, default is "bottomleft"} +\item{cos_legend_pos}{Proportion of the legend from (left edge, bottom edge), default as (0.05, 0.4) at the left - median position} \item{show_variable}{Logical, if TRUE displays variant IDs, default is FALSE} diff --git a/man/get_cormat.Rd b/man/get_cormat.Rd index 9aae1a8..9ace142 100644 --- a/man/get_cormat.Rd +++ b/man/get_cormat.Rd @@ -35,3 +35,4 @@ Other colocboost_utilities: \code{\link{get_hierarchical_clusters}()} } \concept{colocboost_utilities} +\keyword{cb_post_inference} diff --git a/man/get_hierarchical_clusters.Rd b/man/get_hierarchical_clusters.Rd index b63e01b..24f8a22 100644 --- a/man/get_hierarchical_clusters.Rd +++ b/man/get_hierarchical_clusters.Rd @@ -42,3 +42,4 @@ Other colocboost_utilities: \code{\link{get_cos_purity}()} } \concept{colocboost_utilities} +\keyword{cb_post_inference} diff --git a/tests/testthat/test_inference.R b/tests/testthat/test_inference.R index ce65e10..e433eb7 100644 --- a/tests/testthat/test_inference.R +++ b/tests/testthat/test_inference.R @@ -58,44 +58,6 @@ generate_test_result <- function(n = 100, p = 20, L = 2, seed = 42) { return(result) } -# Test colocboost_plot function -test_that("colocboost_plot handles different plot options", { - - # Generate a test colocboost results - cb_res <- generate_test_result() - - # Basic plot call - expect_error(suppressWarnings(colocboost_plot(cb_res)), NA) - - # Test with different y-axis values - expect_error(suppressWarnings(colocboost_plot(cb_res, y = "z_original")), NA) - - # Test with different outcome_idx - expect_error(suppressWarnings(colocboost_plot(cb_res, outcome_idx = 1)), NA) -}) - -# Test get_cos_summary function -test_that("get_cos_summary handles different parameters", { - - # Generate a test colocboost results - cb_res <- generate_test_result() - - # Basic summary call - expect_error(get_cos_summary(cb_res), NA) - - # With custom outcome names - expect_error(get_cos_summary(cb_res, outcome_names = c("Trait1", "Trait2")), NA) - - # With gene name - summary_with_gene <- get_cos_summary(cb_res, region_name = "TestGene") - - # If summary is not NULL, check for region_name column - if (!is.null(summary_with_gene)) { - expect_true("region_name" %in% colnames(summary_with_gene)) - expect_equal(summary_with_gene$region_name[1], "TestGene") - } -}) - # Test for get_strong_colocalization test_that("get_robust_colocalization filters results correctly", { diff --git a/tests/testthat/test_plot.R b/tests/testthat/test_plot.R new file mode 100644 index 0000000..d1a51dc --- /dev/null +++ b/tests/testthat/test_plot.R @@ -0,0 +1,324 @@ +library(testthat) + +# Utility function to generate a simple colocboost results +generate_test_result <- function(n = 500, p = 60, L = 4, seed = 42, output_level = 3) { + set.seed(seed) + + # Generate X with LD structure + sigma <- matrix(0, p, p) + for (i in 1:p) { + for (j in 1:p) { + sigma[i, j] <- 0.9^abs(i - j) + } + } + X <- MASS::mvrnorm(n, rep(0, p), sigma) + colnames(X) <- paste0("SNP", 1:p) + + # Generate true effects based on the number of traits + true_beta <- matrix(0, p, L) + + if (L == 1) { + # Single trait case + true_beta[5, 1] <- 0.5 # SNP5 affects the trait + true_beta[30, 1] <- 0.3 # SNP10 also affects the trait + } else if (L == 2) { + # Simple multi-trait case + true_beta[5, 1] <- 0.5 # SNP5 affects trait 1 + true_beta[5, 2] <- 0.4 # SNP5 also affects trait 2 (colocalized) + true_beta[30, 2] <- 0.3 # SNP10 only affects trait 2 + } else if (L == 4) { + # Complex multi-trait case with multiple colocalization patterns + # SNP5 affects traits 1, 2, and 3 (colocalized across 3 traits) + true_beta[5, 1] <- 0.5 + true_beta[5, 2] <- 0.5 + true_beta[5, 3] <- 0.5 + + # SNP10 affects traits 2 and 4 (colocalized across 2 traits) + true_beta[30, 2] <- 0.5 + true_beta[30, 4] <- 0.5 + + # SNP15 only affects trait 3 (trait-specific effect) + true_beta[40, 3] <- 0.6 + + # SNP18 only affects trait 4 (trait-specific effect) + true_beta[55, 4] <- 0.5 + } + + # Generate Y with some noise + Y <- matrix(0, n, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1) + } + + # Prepare input for colocboost + if (L == 1) { + # For single trait, Y should be a vector + Y_input <- Y[,1] + X_input <- X + } else { + # For multiple traits, convert to list format + Y_input <- lapply(1:L, function(l) Y[,l]) + X_input <- replicate(L, X, simplify = FALSE) + } + + + # Run colocboost with minimal parameters to get a model object + suppressWarnings({ + result <- colocboost( + X = X_input, + Y = Y_input, + focal_outcome_idx = L, + output_level = output_level + ) + }) + + return(result) +} + +# Test colocboost_plot function with basic options +test_that("colocboost_plot basic functionality works", { + + # Generate a test colocboost result + cb_res <- generate_test_result() + + # Basic plot call + expect_error(suppressWarnings(colocboost_plot(cb_res)), NA) + + # Test with non-colocboost object + expect_error(colocboost_plot("not_a_colocboost_object"), + "Input of colocboost_plot must be a 'colocboost' object!") +}) + +# Test colocboost_plot with different y-axis options +test_that("colocboost_plot handles different y-axis options", { + + # Generate a test colocboost result + cb_res <- generate_test_result() + + # Test with different y-axis values + expect_error(suppressWarnings(colocboost_plot(cb_res, y = "log10p")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, y = "z_original")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, y = "cos_vcp")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, y = "vcp")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, y = "coef")), NA) + + # Test with invalid y-axis value + expect_error(suppressWarnings(colocboost_plot(cb_res, y = "invalid")), + "Invalid y value! Choose from 'log10p', 'z_original', 'vcp', 'coef', or 'cos_vcp'!") +}) + +# Test colocboost_plot with plot filtering options +test_that("colocboost_plot handles filtering options", { + + # Generate a test colocboost result + cb_res <- generate_test_result() + + # Test with outcome index filtering + expect_error(suppressWarnings(colocboost_plot(cb_res, outcome_idx = 1)), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, outcome_idx = 2)), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, outcome_idx = 1:2)), NA) + + # Test with plot_cos_idx filtering + cos_count <- length(cb_res$cos_details$cos$cos_variables) + if (cos_count > 0) { + expect_error(suppressWarnings(colocboost_plot(cb_res, plot_cos_idx = 1)), NA) + + # Test with invalid plot_cos_idx + if (cos_count < 10) { + expect_error(suppressWarnings(colocboost_plot(cb_res, plot_cos_idx = 10)), + "Please check plot_cos_idx!") + } + } + + # Test with plot_focal_only + expect_error(suppressWarnings(colocboost_plot(cb_res, plot_focal_only = TRUE)), NA) + + # Test with plot_focal_cos_outocme_only + expect_error(suppressWarnings(colocboost_plot(cb_res, plot_focal_cos_outocme_only = TRUE)), NA) +}) + +# Test colocboost_plot with visual customization options +test_that("colocboost_plot handles visual customization options", { + + # Generate a test colocboost result + cb_res <- generate_test_result() + + # Test with custom colors + expect_error(suppressWarnings(colocboost_plot(cb_res, points_color = "red")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, cos_color = c("blue", "green", "orange"))), NA) + + # Test with custom styling + expect_error(suppressWarnings(colocboost_plot(cb_res, lab_style = c(3, 2))), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, axis_style = c(2.5, 2))), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, title_style = c(3, 3))), NA) + + # Test with legend position options + expect_error(suppressWarnings(colocboost_plot(cb_res, outcome_legend_pos = "top")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, outcome_legend_pos = "bottom")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, outcome_legend_pos = "left")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, outcome_legend_pos = "right")), NA) + + # Test with custom legend sizes + expect_error(suppressWarnings(colocboost_plot(cb_res, outcome_legend_size = 2.0)), NA) +}) + +# Test colocboost_plot with layout options +test_that("colocboost_plot handles layout options", { + + # Generate a test colocboost result + cb_res <- generate_test_result() + + # Test with different plot_cols values + expect_error(suppressWarnings(colocboost_plot(cb_res, plot_cols = 1)), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, plot_cols = 3)), NA) + + # Test with ylim_each option + expect_error(suppressWarnings(colocboost_plot(cb_res, ylim_each = TRUE)), NA) + # When ylim_each is FALSE, we need to provide a ylim parameter + expect_error(suppressWarnings(colocboost_plot(cb_res, ylim_each = FALSE, ylim = c(0, 10))), NA) + + # Test with gene_name option + expect_error(suppressWarnings(colocboost_plot(cb_res, gene_name = "BRCA1")), NA) + + # Test with variant_coord option + expect_error(suppressWarnings(colocboost_plot(cb_res, variant_coord = FALSE)), NA) +}) + +# Test colocboost_plot with additional visualization options +test_that("colocboost_plot handles additional visualization options", { + + # Generate a test colocboost result + cb_res <- generate_test_result() + + # Test with vertical line options + expect_error(suppressWarnings(colocboost_plot(cb_res, add_vertical = TRUE, add_vertical_idx = c(5, 10))), NA) + + # Test with show_top_variables option + expect_error(suppressWarnings(colocboost_plot(cb_res, show_top_variables = TRUE)), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, show_top_variables = FALSE)), NA) + + # Test with show_variable option + expect_error(suppressWarnings(colocboost_plot(cb_res, show_variable = TRUE)), NA) +}) + +# Test colocboost_plot with single trait (finemapping) results +test_that("colocboost_plot handles single trait results", { + + # Generate a single trait colocboost result + cb_res_single <- generate_test_result(L = 1) + + # Basic plot call for single trait + expect_error(suppressWarnings(colocboost_plot(cb_res_single)), NA) + + # Test custom options with single trait + expect_error(suppressWarnings(colocboost_plot(cb_res_single, y = "vcp")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res_single, plot_cols = 1)), NA) +}) + +# Test colocboost_plot with uncolocalized visualization options +test_that("colocboost_plot handles uncolocalized visualization options", { + + # Generate a test colocboost result with high output level to include ucos details + cb_res <- generate_test_result(output_level = 3) + + # Test with plot_ucos options + expect_error(suppressWarnings(colocboost_plot(cb_res, plot_ucos = TRUE)), NA) + + # Test with show_cos_to_uncoloc options + expect_error(suppressWarnings(colocboost_plot(cb_res, show_cos_to_uncoloc = TRUE)), NA) + + # Generate a different colocboost result to test the warning for plot_ucos + cb_res_low <- generate_test_result(output_level = 1) + # This should give a warning but not an error + expect_warning(colocboost_plot(cb_res_low, plot_ucos = TRUE), + "Since you want to plot trait-specific \\(uncolocalized\\) sets with plot_ucos = TRUE") +}) + +# Test colocboost_plot with L=4 case for complex colocalization and trait-specific effects +test_that("colocboost_plot handles L=4 case with complex colocalization patterns", { + + # Generate a test colocboost result with 4 traits and high output level + cb_res_complex <- generate_test_result(L = 4, output_level = 3) + + # Basic plot call for complex case + expect_error(suppressWarnings(colocboost_plot(cb_res_complex)), NA) + + # Test y-axis options for complex case + expect_error(suppressWarnings(colocboost_plot(cb_res_complex, y = "log10p")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res_complex, y = "cos_vcp")), NA) + + # Test filtering for specific outcomes + expect_error(suppressWarnings(colocboost_plot(cb_res_complex, outcome_idx = 1:2)), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res_complex, outcome_idx = c(1,3))), NA) + + # Test plot_ucos for visualizing trait-specific effects + expect_error(suppressWarnings(colocboost_plot(cb_res_complex, plot_ucos = TRUE)), NA) + + # Test with specific plot_ucos_idx if available + # This is conditional because it depends on the actual number of ucos detected + if (!is.null(cb_res_complex$ucos_details) && + !is.null(cb_res_complex$ucos_details$ucos) && + length(cb_res_complex$ucos_details$ucos$ucos_index) > 0) { + n_ucos <- length(cb_res_complex$ucos_details$ucos$ucos_index) + if (n_ucos > 0) { + expect_error(suppressWarnings(colocboost_plot(cb_res_complex, + plot_ucos = TRUE, + plot_ucos_idx = 1:min(n_ucos, 2))), NA) + } + } + + # Test visualization of both colocalization and trait-specific effects together + expect_error(suppressWarnings(colocboost_plot(cb_res_complex, + plot_ucos = TRUE, + show_cos_to_uncoloc = TRUE)), NA) +}) + +# Test colocboost_plot with custom outcome names +test_that("colocboost_plot handles custom outcome names", { + + # Generate a test colocboost result + cb_res <- generate_test_result() + + # Test with custom outcome names + expect_error(suppressWarnings(colocboost_plot(cb_res, outcome_names = c("Trait1", "Trait2"))), NA) +}) + +# Test colocboost_plot with a specific range +test_that("colocboost_plot handles zoom-in with grange", { + + # Generate a test colocboost result + cb_res <- generate_test_result() + + # Test with grange option to zoom in + expect_error(suppressWarnings(colocboost_plot(cb_res, grange = 5:15)), NA) +}) + +# Test colocboost_plot with focal outcome in L=4 case +test_that("colocboost_plot handles focal outcome in complex cases", { + + # Generate a test colocboost result with 4 traits and focal outcome set + cb_res_focal <- generate_test_result(L = 4, output_level = 3) + + # Basic plot call with focal outcome + expect_error(suppressWarnings(colocboost_plot(cb_res_focal)), NA) + + # Test plot_focal_only option + expect_error(suppressWarnings(colocboost_plot(cb_res_focal, plot_focal_only = TRUE)), NA) + + # Test plot_focal_cos_outocme_only option + expect_error(suppressWarnings(colocboost_plot(cb_res_focal, plot_focal_cos_outocme_only = TRUE)), NA) + + # Combine focal outcome filtering with other options + expect_error(suppressWarnings(colocboost_plot(cb_res_focal, + plot_focal_only = TRUE, + y = "cos_vcp")), NA) + + expect_error(suppressWarnings(colocboost_plot(cb_res_focal, + plot_focal_cos_outocme_only = TRUE, + plot_ucos = TRUE)), NA) + + # Test focusing only on outcomes colocalized with focal outcome + expect_error(suppressWarnings(colocboost_plot(cb_res_focal, + plot_focal_cos_outocme_only = TRUE, + outcome_idx = 1:3)), NA) +}) \ No newline at end of file diff --git a/vignettes/Disease_Prioritized_Colocalization.Rmd b/vignettes/Disease_Prioritized_Colocalization.Rmd index a97e29a..844ac51 100644 --- a/vignettes/Disease_Prioritized_Colocalization.Rmd +++ b/vignettes/Disease_Prioritized_Colocalization.Rmd @@ -51,7 +51,7 @@ LD <- get_cormat(Ind_5traits$X[[1]]) ``` For analyze the specific one type of data, you can refer to the following -tutorals [Individual Level Data Colocalization](https://statfungen.github.io/colocboost/articles/Individual_Level_Colocalization.html) and +tutorials [Individual Level Data Colocalization](https://statfungen.github.io/colocboost/articles/Individual_Level_Colocalization.html) and [Summary Level Data Colocalization](https://statfungen.github.io/colocboost/articles/Summary_Level_Colocalization.html). @@ -116,6 +116,7 @@ res <- colocboost(X = X, Y = Y, colocboost_plot(res, plot_focal_only = TRUE) ``` + Unlike the other existing methods, the disease-prioritized mode of ColocBoost only used a soft prioritization strategy. Therefore, it does not only identify the colocalization of the focal trait, but also the colocalization across the other traits without the focal trait. To extract all CoS and visualization of all colocalization results, you can use the following code: @@ -128,8 +129,11 @@ res$cos_details$cos$cos_index colocboost_plot(res) ``` + ### Results Interpretation For comprehensive tutorials on result interpretation and advanced visualization techniques, please visit our documentation portal at [Visualization of ColocBoost Results](https://statfungen.github.io/colocboost/articles/Visualization_ColocBoost_Output.html) and -[Interpret ColocBoost Output](https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html). \ No newline at end of file +[Interpret ColocBoost Output](https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html). + + diff --git a/vignettes/Interpret_ColocBoost_Output.Rmd b/vignettes/Interpret_ColocBoost_Output.Rmd index a424277..76b8920 100644 --- a/vignettes/Interpret_ColocBoost_Output.Rmd +++ b/vignettes/Interpret_ColocBoost_Output.Rmd @@ -245,7 +245,11 @@ for(i in 1:5){ ## 4. Trait-specific effects information (UCoS) -TO-DO-LIST +There is `ucos_details` in ColocBoost output when setting `output_level = 2`, including the trait-specific (uncolocalized) information from single-effect learner (SEL). + +- **`ucos`**: +- **`cb_model_para`**: + ```{r ucos-details} res <- colocboost(X = X, Y = Y, sumstat = sumstat, LD = LD, output_level = 2) @@ -253,6 +257,9 @@ res <- colocboost(X = X, Y = Y, sumstat = sumstat, LD = LD, output_level = 2) ## 5. Diagnostic details -TO-DO-LIST +There is `diagnostic_details` in ColocBoost output when setting `output_level = 3`: + +- **`cb_model`**: trait-specific proximity gradient boosting model, including proximity weight at each iteration, residual after gradient boosting, et al. +- **`cb_model_para`**: parameters used in fitting ColocBoost model. For more diagnosis of ColocBoost model fitting, please refer to our Tutorial (TO-DO-LIST). diff --git a/vignettes/Visualization_ColocBoost_Output.Rmd b/vignettes/Visualization_ColocBoost_Output.Rmd index 2551c71..15b68dd 100644 --- a/vignettes/Visualization_ColocBoost_Output.Rmd +++ b/vignettes/Visualization_ColocBoost_Output.Rmd @@ -100,5 +100,68 @@ Following plot also shows the top variants. ```{r vertical-plot} -colocboost_plot(res, show_top_variables = TRUE, add_vertical = TRUE, add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants))) +colocboost_plot(res, show_top_variables = TRUE, + add_vertical = TRUE, + add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants))) ``` + + +### 2.5. Plot with trait-specific sets if exists + +There are two options avaiable for plotting the trait-specific (uncolocalized) variants: + +- `plot_ucos = FALSE` (default), if `TRUE` will plot all trait-specific (uncolocalized) sets. +- `plot_ucos_idx = NULL` (default) indicates all confidence sets are plotted. `plot_cos_idx = 1` can be specified to plot the 1st confidence sets, and so on. + +*Important Note*: You should use `colocboost(..., output_level = 2)` to obtain the trait-specific (uncolocalized) information. + +```{r trait-specific} +# Create a mixed dataset +data(Ind_5traits) +data(Heterogeneous_Effect) +X <- Ind_5traits$X[1:3] +Y <- Ind_5traits$Y[1:3] +X1 <- Heterogeneous_Effect$X[,1:3000] +Y1 <- Heterogeneous_Effect$Y[,1,drop=F] + +# Run colocboost +res <- colocboost(X = c(X, list(X1)), Y = c(Y, list(Y1)), output_level = 2) +colocboost_plot(res, plot_ucos = TRUE) +``` + +In this example, there are two colocalized sets (blue and orange) and +two trait-specific sets for trait 4 only (green and purple). +For comprehensive tutorials on result interpretation, please please visit our documentation portal +at [Interpret ColocBoost Output](https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html). + + + +### 2.6 Plot with focal trait for disease prioritized colocalization + +There are three options avaiable for plotting the results from disease prioritized colocalization, considering a focal trait: + +- `plot_focal_only = FALSE` (default), if `TRUE` will only plot CoS with focal trait and ignoring other CoS. +- `plot_focal_cos_outocme_only = FALSE` (default) and recommend for visulization for disease prioritized colocalization. +If `TRUE` will plot all CoS colocalized with at least on traits within CoS of focal traits. + +```{r focal-colocalization} +# Create a mixed dataset +data(Ind_5traits) +data(Sumstat_5traits) +X <- Ind_5traits$X[1:3] +Y <- Ind_5traits$Y[1:3] +sumstat <- Sumstat_5traits$sumstat[4] +LD <- get_cormat(Ind_5traits$X[[1]]) + +# Run colocboost +res <- colocboost(X = X, Y = Y, + sumstat = sumstat, LD = LD, + focal_outcome_idx = 4) + +# Only plot CoS with focal trait +colocboost_plot(res, plot_focal_only = TRUE) +# Plot all CoS including at least one traits colocalized with focal trait +colocboost_plot(res, plot_focal_cos_outocme_only = TRUE) +``` + + From 99199333c4db8329095349f6f10abada9fad320d Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Fri, 18 Apr 2025 19:03:04 -0400 Subject: [PATCH 4/4] release draft --- R/colocboost.R | 2 + R/colocboost_output.R | 11 ++++- R/colocboost_plot.R | 2 + man/colocboost.Rd | 3 ++ man/colocboost_plot.Rd | 3 ++ man/get_cos_summary.Rd | 4 ++ man/get_robust_colocalization.Rd | 4 ++ man/get_ucos_summary.Rd | 4 ++ tests/testthat/test_plot.R | 48 ++++++++++++++++++ vignettes/Interpret_ColocBoost_Output.Rmd | 60 ++++++++++++++++++++--- 10 files changed, 133 insertions(+), 8 deletions(-) diff --git a/R/colocboost.R b/R/colocboost.R index d2b0313..d2f994f 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -117,6 +117,8 @@ #' } #' res <- colocboost(X = X, Y = Y) #' res$cos_details$cos$cos_index +#' +#' @source See detailed instructions in our tutorial portal: \url{https://statfungen.github.io/colocboost/index.html} #' #' @family colocboost #' @importFrom stats na.omit diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 3d7a0c8..8695921 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -45,6 +45,9 @@ #' res <- colocboost(X = X, Y = Y) #' get_cos_summary(res) #' +#' @source See detailed instructions in our tutorial portal: +#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} +#' #' @family colocboost_inference #' @export get_cos_summary <- function(cb_output, @@ -173,6 +176,9 @@ get_cos_summary <- function(cb_output, #' res$cos_details$cos$cos_index #' filter_res <- get_robust_colocalization(res, cos_npc_cutoff = 0.5, npc_outcome_cutoff = 0.2) #' filter_res$cos_details$cos$cos_index +#' +#' @source See detailed instructions in our tutorial portal: +#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} #' #' @family colocboost_inference #' @export @@ -198,7 +204,7 @@ get_robust_colocalization <- function(cb_output, if (is.null(pvalue_cutoff)) { message(paste0( "Extracting colocalization results with cos_npc_cutoff = ", cos_npc_cutoff, " and npc_outcome_cutoff = ", npc_outcome_cutoff, ".\n", - "For each CoS, keep the outcomes configurations that the npc_outcome > ", npc_outcome_cutoff, "." + "For each CoS, keep the outcomes configurations that the npc_outcome >= ", npc_outcome_cutoff, "." )) } else { if (pvalue_cutoff > 1 | pvalue_cutoff < 0) { @@ -450,6 +456,9 @@ get_robust_colocalization <- function(cb_output, #' res <- colocboost(X = X, Y = Y, output_level = 2) #' # Get the trait-specifc effect summary #' get_ucos_summary(res) +#' +#' @source See detailed instructions in our tutorial portal: +#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} #' #' @family colocboost_inference #' @export diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index 7a107bb..8bbf3a5 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -63,6 +63,8 @@ #' @importFrom utils head tail #' @importFrom graphics abline axis legend mtext par points text #' @importFrom grDevices adjustcolor +#' +#' @source See detailed instructions in our tutorial portal: \url{https://statfungen.github.io/colocboost/articles/Visualization_ColocBoost_Output.html} #' #' @family colocboost_plot #' @export diff --git a/man/colocboost.Rd b/man/colocboost.Rd index 28c9d4f..84811ab 100644 --- a/man/colocboost.Rd +++ b/man/colocboost.Rd @@ -3,6 +3,9 @@ \name{colocboost} \alias{colocboost} \title{ColocBoost: A gradient boosting informed multi-omics xQTL colocalization method} +\source{ +See detailed instructions in our tutorial portal: \url{https://statfungen.github.io/colocboost/index.html} +} \usage{ colocboost( X = NULL, diff --git a/man/colocboost_plot.Rd b/man/colocboost_plot.Rd index 8229a47..2b3da6a 100644 --- a/man/colocboost_plot.Rd +++ b/man/colocboost_plot.Rd @@ -3,6 +3,9 @@ \name{colocboost_plot} \alias{colocboost_plot} \title{Plot visualization plot from a ColocBoost output.} +\source{ +See detailed instructions in our tutorial portal: \url{https://statfungen.github.io/colocboost/articles/Visualization_ColocBoost_Output.html} +} \usage{ colocboost_plot( cb_output, diff --git a/man/get_cos_summary.Rd b/man/get_cos_summary.Rd index f40e57a..c6af013 100644 --- a/man/get_cos_summary.Rd +++ b/man/get_cos_summary.Rd @@ -3,6 +3,10 @@ \name{get_cos_summary} \alias{get_cos_summary} \title{Get colocalization summary table from a ColocBoost output.} +\source{ +See detailed instructions in our tutorial portal: +\url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} +} \usage{ get_cos_summary( cb_output, diff --git a/man/get_robust_colocalization.Rd b/man/get_robust_colocalization.Rd index 7f17ec3..caa0193 100644 --- a/man/get_robust_colocalization.Rd +++ b/man/get_robust_colocalization.Rd @@ -3,6 +3,10 @@ \name{get_robust_colocalization} \alias{get_robust_colocalization} \title{Recalibrate and summarize robust colocalization events.} +\source{ +See detailed instructions in our tutorial portal: +\url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} +} \usage{ get_robust_colocalization( cb_output, diff --git a/man/get_ucos_summary.Rd b/man/get_ucos_summary.Rd index 00e4b66..4aaaa5c 100644 --- a/man/get_ucos_summary.Rd +++ b/man/get_ucos_summary.Rd @@ -3,6 +3,10 @@ \name{get_ucos_summary} \alias{get_ucos_summary} \title{Get trait-specific summary table from a ColocBoost output.} +\source{ +See detailed instructions in our tutorial portal: +\url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} +} \usage{ get_ucos_summary(cb_output, outcome_names = NULL, region_name = NULL) } diff --git a/tests/testthat/test_plot.R b/tests/testthat/test_plot.R index d1a51dc..0331c3f 100644 --- a/tests/testthat/test_plot.R +++ b/tests/testthat/test_plot.R @@ -321,4 +321,52 @@ test_that("colocboost_plot handles focal outcome in complex cases", { expect_error(suppressWarnings(colocboost_plot(cb_res_focal, plot_focal_cos_outocme_only = TRUE, outcome_idx = 1:3)), NA) +}) + +# Test colocboost_plot with varying cutoff settings from get_robust_colocalization +test_that("colocboost_plot handles varying cutoff settings", { + + # Generate a test colocboost result + cb_res <- generate_test_result(output_level = 3) + + # Test with different cutoff settings + cutoff_settings <- list( + list(cos_npc_cutoff = 0.5, npc_outcome_cutoff = 0.2), + list(cos_npc_cutoff = 0.7, npc_outcome_cutoff = 0.3), + list(cos_npc_cutoff = 0.9, npc_outcome_cutoff = 0.5), + list(cos_npc_cutoff = 1.0, npc_outcome_cutoff = 0.5), # Corner case: cos_npc_cutoff = 1 + list(cos_npc_cutoff = 0.5, npc_outcome_cutoff = 1.0), # Corner case: npc_outcome_cutoff = 1 + list(cos_npc_cutoff = 1.0, npc_outcome_cutoff = 1.0) # Corner case: both cutoffs = 1 + ) + + for (cutoff in cutoff_settings) { + # Apply robust colocalization filtering + filter_res <- get_robust_colocalization( + cb_res, + cos_npc_cutoff = cutoff$cos_npc_cutoff, + npc_outcome_cutoff = cutoff$npc_outcome_cutoff + ) + + # Test basic plot call with filtered results + expect_error(suppressWarnings(colocboost_plot(filter_res)), NA) + + # Test y-axis options with filtered results + expect_error(suppressWarnings(colocboost_plot(filter_res, y = "log10p")), NA) + expect_error(suppressWarnings(colocboost_plot(filter_res, y = "cos_vcp")), NA) + + # Test plot_focal_only option + expect_error(suppressWarnings(colocboost_plot(filter_res, plot_focal_only = TRUE)), NA) + + # Test plot_ucos option + expect_error(suppressWarnings(colocboost_plot(filter_res, plot_ucos = TRUE)), NA) + + # Test show_cos_to_uncoloc option + expect_error(suppressWarnings(colocboost_plot(filter_res, show_cos_to_uncoloc = TRUE)), NA) + + # Test combined options + expect_error(suppressWarnings(colocboost_plot(filter_res, + plot_focal_only = TRUE, + plot_ucos = TRUE, + y = "cos_vcp")), NA) + } }) \ No newline at end of file diff --git a/vignettes/Interpret_ColocBoost_Output.Rmd b/vignettes/Interpret_ColocBoost_Output.Rmd index 76b8920..c7315fb 100644 --- a/vignettes/Interpret_ColocBoost_Output.Rmd +++ b/vignettes/Interpret_ColocBoost_Output.Rmd @@ -241,21 +241,67 @@ for(i in 1:5){ } ``` +### 3.5. Trait-specific effects information (**`ucos_details`**) +There is `ucos_details` in ColocBoost output when setting `output_level = 2`, including the trait-specific (uncolocalized) information from the single-effect learner (SEL). -## 4. Trait-specific effects information (UCoS) +```{r ucos-details} +# Create a mixed dataset +data(Ind_5traits) +data(Heterogeneous_Effect) +X <- Ind_5traits$X[1:3] +Y <- Ind_5traits$Y[1:3] +X1 <- Heterogeneous_Effect$X[,1:3000] +Y1 <- Heterogeneous_Effect$Y[,1,drop=F] +res <- colocboost(X = c(X, list(X1)), Y = c(Y, list(Y1)), output_level = 2) +``` -There is `ucos_details` in ColocBoost output when setting `output_level = 2`, including the trait-specific (uncolocalized) information from single-effect learner (SEL). +#### 3.5.1. Trait-specific (uncolocalized) traits (**`ucos`**) -- **`ucos`**: -- **`cb_model_para`**: +- **`ucos`**: A list containing detailed information about trait-specific (uncolocalized) traits for each uCoS. + - **`ucos_index`**: Indices of trait-specific (uncolocalized) variables. + - **`ucos_variables`**: Names of trait-specific (uncolocalized) variables. +```{r ucos} +res$ucos_details$ucos +``` -```{r ucos-details} -res <- colocboost(X = X, Y = Y, sumstat = sumstat, LD = LD, output_level = 2) +#### 3.5.2. Trait-specific (uncolocalized) outcomes (**`ucos_outcomes`**) + +- **`ucos_outcomes`**: A list with detailed information about trait-specific (uncolocalized) outcomes for each uCoS. + - **`outcome_index`**: Indices of trait-specific (uncolocalized) outcomes. + - **`outcome_name`**: Names of trait-specific (uncolocalized) outcomes. + +```{r ucos-outcomes} +res$ucos_details$ucos_outcomes ``` -## 5. Diagnostic details +#### 3.5.3. Purity across CoS and uCoS (**`cos_ucos_purity`**) + +- **`cos_ucos_purity`**: Includes three lists, each containing an $S \times uS$ matrix, where $S$ is the number of CoS and $uS$ is the number of uCoS: + - `min_abs_cor`: Minimum absolute correlation of variables across each pair of CoS and uCoS. + - `median_abs_cor`: Median absolute correlation of variables across each pair of CoS and uCoS. + - `max_abs_cor`: Maximum absolute correlation of variables across each pair of CoS and uCoS. + +```{r cos-ucos-purity} +res$ucos_details$cos_ucos_purity +``` + + +#### 3.5.4. Other components + +- **`ucos_weight`**: Integrative weights for each trait-specific (uncolocalized) trait, used to recalibrate UCoS when traits are filtered out. +- **`ucos_top_variables`**: Indices and names of the top variable for each UCoS, which is the variable with the highest VCP. +- **`ucos_purity`**: Includes three lists, each containing an $uS \times uS$ matrix, where $uS$ is the number of uCoS: + - `min_abs_cor`: Minimum absolute correlation of variables within (diagonal) UCoS or between (off-diagonal) different uCoS. + - `median_abs_cor`: Median absolute correlation of variables within or between UCoS. + - `max_abs_cor`: Maximum absolute correlation of variables within or between UCoS. + +By analyzing these components, you can gain a deeper understanding of trait-specific (uncolocalized) effects that are not colocalized, +providing additional insights into the dataset. + + +### 3.6. Diagnostic details (**`diagnostic_details`**) There is `diagnostic_details` in ColocBoost output when setting `output_level = 3`: