diff --git a/NAMESPACE b/NAMESPACE index 4650e0f..ae0fd67 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,8 @@ export(colocboost) export(colocboost_plot) +export(get_ambiguous_colocalization) +export(get_colocboost_summary) export(get_cormat) export(get_cos) export(get_cos_purity) diff --git a/R/colocboost.R b/R/colocboost.R index dcc1802..25fc181 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -85,8 +85,9 @@ #' @param check_null_max The smallest value of change of profile loglikelihood for each outcome. #' @param weaker_effect If \code{weaker_effect = TRUE}, consider the weaker single effect due to coupling effects #' @param LD_free When \code{LD_free = FALSE}, objective function doesn't include LD information. -#' @param output_level When \code{output_level = 2}, return the ucos details for the single specific effects. -#' When \code{output_level = 3}, return the entire Colocboost model to diagnostic results (more space). +#' @param output_level When \code{output_level = 1}, return basic cos details for colocalization results +#' When \code{output_level = 2}, return the ucos details for the single specific effects. +#' When \code{output_level = 3}, return the entire Colocboost model to diagnostic results (more space). #' #' @return A \code{"colocboost"} object with some or all of the following elements: #' @@ -95,7 +96,9 @@ #' \item{cos_details}{A object with all information for colocalization results.} #' \item{data_info}{A object with detailed information from input data} #' \item{model_info}{A object with detailed information for colocboost model} -#' +#' \item{ucos_details}{A object with all information for trait-specific effects when \code{output_level = 2}.} +#' \item{diagnositci_details}{A object with diagnostic details for ColocBoost model when \code{output_level = 3}.} +#' #' @examples #' # colocboost example #' set.seed(1) diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R index 0648bd8..567d43d 100644 --- a/R/colocboost_inference.R +++ b/R/colocboost_inference.R @@ -568,3 +568,4 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) { names(normalization_evidence) <- names(npc) <- names(coloc_out$cos) return(list(normalization_evidence = normalization_evidence, npc = npc)) } + diff --git a/R/colocboost_output.R b/R/colocboost_output.R index b7a0265..7214863 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -1,28 +1,53 @@ -#' @rdname get_cos_summary +#' @rdname get_colocboost_summary #' -#' @title Get colocalization summary table from a ColocBoost output. +#' @title Get summary tables from a ColocBoost output. #' -#' @description `get_cos_summary` get the colocalization summary table with or without the outcomes of interest. +#' @description `get_colocboost_summary` get colocalization and trait-specific summary table +#' with or without the outcomes of interest. #' #' @param cb_output Output object from `colocboost` analysis +#' @param summary_level When \code{summary_level = 1}, return basic sumamry table for colocalization results. See details in `get_ucos_summary` function when \code{summary_level = 2}. #' @param outcome_names Optional vector of names of outcomes, which has the same order as Y in the original analysis. #' @param interest_outcome Optional vector specifying a subset of outcomes from \code{outcome_names} to focus on. When provided, only colocalization events that include at least one of these outcomes will be returned. #' @param region_name Optional character string. When provided, adds a column with this gene name to the output table for easier filtering in downstream analyses. -#' -#' @return A summary table for colocalization events with the following columns: -#' \item{focal_outcome}{The focal outcome being analyzed if exists. Otherwise, it is \code{FALSE}.} -#' \item{colocalized_outcomes}{Colocalized outcomes for colocalization confidence set (CoS) } -#' \item{cos_id}{Unique identifier for colocalization confidence set (CoS) } -#' \item{purity}{Minimum absolute correlation of variables with in colocalization confidence set (CoS) } -#' \item{top_variable}{The variable with highest variant colocalization probability (VCP) } -#' \item{top_variable_vcp}{Variant colocalization probability for the top variable} -#' \item{cos_npc}{Normalized probability of colocalization} -#' \item{min_npc_outcome}{Minimum normalized probability of colocalized traits} -#' \item{n_variables}{Number of variables in colocalization confidence set (CoS)} -#' \item{colocalized_index}{Indices of colocalized variables} -#' \item{colocalized_variables}{List of colocalized variables} -#' \item{colocalized_variables_vcp}{Variant colocalization probabilities for all colocalized variables} -#' +#' @param min_abs_corr_between_ucos Minimum absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.5. +#' @param median_abs_corr_between_ucos Median absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.8. +#' +#' @return A list containing results from the ColocBoost analysis: +#' \itemize{ +#' \item When \code{summary_level = 1} (default): +#' \itemize{ +#' \item \code{cos_summary}: A summary table for colocalization events with the following columns: +#' \itemize{ +#' \item \code{focal_outcome}: The focal outcome being analyzed if exists. Otherwise, it is \code{FALSE}. +#' \item \code{colocalized_outcomes}: Colocalized outcomes for colocalization confidence set (CoS) +#' \item \code{cos_id}: Unique identifier for colocalization confidence set (CoS) +#' \item \code{purity}: Minimum absolute correlation of variables within colocalization confidence set (CoS) +#' \item \code{top_variable}: The variable with highest variant colocalization probability (VCP) +#' \item \code{top_variable_vcp}: Variant colocalization probability for the top variable +#' \item \code{cos_npc}: Normalized probability of colocalization +#' \item \code{min_npc_outcome}: Minimum normalized probability of colocalized traits +#' \item \code{n_variables}: Number of variables in colocalization confidence set (CoS) +#' \item \code{colocalized_index}: Indices of colocalized variables +#' \item \code{colocalized_variables}: List of colocalized variables +#' \item \code{colocalized_variables_vcp}: Variant colocalization probabilities for all colocalized variables +#' } +#' } +#' \item When \code{summary_level = 2}: +#' \itemize{ +#' \item \code{cos_summary}: As described above +#' \item \code{ucos_summary}: A summary table for trait-specific (uncolocalized) effects +#' } +#' \item When \code{summary_level = 3}: +#' \itemize{ +#' \item \code{cos_summary}: As described above +#' \item \code{ucos_summary}: A summary table for trait-specific (uncolocalized) effects +#' \item \code{ambiguous_ucos_summary}: A summary table for ambiguous colocalization events from trait-specific effects +#' } +#' } +#' @details When \code{summary_level = 2} or \code{summary_level = 3}, additional details for trait-specific effects and ambiguous +#' colocalization events are included. See \code{\link{get_ucos_summary}} for details on these tables. +#' #' @examples #' # colocboost example #' set.seed(1) @@ -43,92 +68,48 @@ #' Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) #' } #' res <- colocboost(X = X, Y = Y) -#' get_cos_summary(res) +#' get_colocboost_summary(res) #' #' @source See detailed instructions in our tutorial portal: #' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} #' #' @family colocboost_inference #' @export -get_cos_summary <- function(cb_output, - outcome_names = NULL, - interest_outcome = NULL, - region_name = NULL) { - if (!inherits(cb_output, "colocboost")) { - stop("Input must from colocboost output!") - } - - coloc_csets <- cb_output$cos_details$cos$cos_index - if (length(coloc_csets) != 0) { - analysis_outcome <- cb_output$data_info$outcome_info$outcome_names - if (!is.null(outcome_names)) { - analysis_outcome <- outcome_names +#' +get_colocboost_summary <- function(cb_output, + summary_level = 1, + outcome_names = NULL, + interest_outcome = NULL, + region_name = NULL, + min_abs_corr_between_ucos = 0.5, + median_abs_corr_between_ucos = 0.8) { + + if (!inherits(cb_output, "colocboost")) { + stop("Input must from colocboost output!") } - coloc_outcome <- lapply(cb_output$cos_details$cos_outcomes$outcome_index, function(idx) analysis_outcome[idx]) - coloc_sets <- cb_output$cos_details$cos$cos_index - if (!is.null(cb_output$cos_warnings)) { - cos_warnings + cos_summary <- get_cos_summary(cb_output, outcome_names, interest_outcome, region_name) + if (summary_level == 1) { + return(list("cos_summary" = cos_summary)) } - vcp <- as.numeric(cb_output$vcp) - summary_table <- matrix(NA, nrow = length(coloc_sets), ncol = 12) - colnames(summary_table) <- c( - "focal_outcome", "colocalized_outcomes", "cos_id", "purity", - "top_variable", "top_variable_vcp", "cos_npc", "min_npc_outcome", "n_variables", - "colocalized_index", "colocalized_variables", "colocalized_variables_vcp" - ) - summary_table <- as.data.frame(summary_table) - summary_table[, 1] <- FALSE - summary_table[, 2] <- unlist(sapply(coloc_outcome, function(tmp) paste0(tmp, collapse = "; "))) - summary_table[, 3] <- names(coloc_sets) - summary_table[, 4] <- as.numeric(diag(as.matrix(cb_output$cos_details$cos_purity$min_abs_cor))) - summary_table[, 5] <- unlist(sapply(cb_output$cos_details$cos$cos_variables, function(tmp) tmp[1])) - summary_table[, 6] <- sapply(coloc_sets, function(tmp) max(vcp[tmp])) - summary_table[, 7] <- round(as.numeric(cb_output$cos_details$cos_npc), 4) - summary_table[, 8] <- round(as.numeric(cb_output$cos_details$cos_min_npc_outcome), 4) - summary_table[, 9] <- as.numeric(sapply(coloc_sets, length)) - summary_table[, 10] <- unlist(sapply(coloc_sets, function(tmp) paste0(tmp, collapse = "; "))) - summary_table[, 11] <- unlist(sapply(cb_output$cos_details$cos$cos_variables, function(tmp) paste0(tmp, collapse = "; "))) - summary_table[, 12] <- unlist(sapply(coloc_sets, function(tmp) paste0(vcp[tmp], collapse = "; "))) - if (!is.null(region_name)) { - summary_table$region_name <- region_name - } - # - if focal colocalization - focal_outcome_idx <- which(cb_output$data_info$outcome_info$is_focal) - if (length(focal_outcome_idx) != 0) { - focal_outcome <- analysis_outcome[focal_outcome_idx] - tmp <- sapply(focal_outcome, function(tmp) grep(paste0(tmp, "\\b"), analysis_outcome)) - if.focal <- sapply(coloc_outcome, function(cp) { - tt <- sapply(focal_outcome, function(tmp) grep(paste0(tmp, "\\b"), cp)) - all(sapply(tt, length) != 0) - }) - summary_table$focal_outcome <- ifelse(if.focal, focal_outcome, FALSE) - summary_table <- summary_table[order(summary_table$focal_outcome == "FALSE"), ] - if (sum(if.focal) == 0) { - warning("No colocalization with focal outcomes.") - } + if (summary_level == 2){ + ucos_summary <- get_ucos_summary(cb_output, outcome_names, region_name) + return(list("cos_summary" = cos_summary, + "ucos_summary" = ucos_summary)) } - # - if extract only interest outcome colocalization - if (!is.null(interest_outcome)) { - tmp <- sapply(interest_outcome, function(tmp) grep(paste0(tmp, "\\b"), analysis_outcome)) - if (all(sapply(tmp, length) != 0)) { - if.interest <- sapply(coloc_outcome, function(cp) { - tt <- sapply(interest_outcome, function(tmp) grep(paste0(tmp, "\\b"), cp)) - all(sapply(tt, length) != 0) - }) - summary_table$interest_outcome <- paste0(interest_outcome, collapse = "; ") - summary_table <- summary_table[which(if.interest), ] - if (sum(if.interest) == 0) { - warning("No colocalization with interest outcomes.") - } - } else { - warning("Interest outcome is not in the analysis outcomes, please check.") - } + + if (summary_level == 3){ + ucos_summary <- get_ucos_summary( + cb_output, outcome_names, region_name, + ambiguous_ucos = TRUE, + min_abs_corr_between_ucos = min_abs_corr_between_ucos, + median_abs_corr_between_ucos = median_abs_corr_between_ucos + ) + return(list("cos_summary" = cos_summary, + "ucos_summary" = ucos_summary$ucos_summary, + "ambiguous_ucos_summary" = ucos_summary$ambiguous_ucos_summary)) } - } else { - summary_table <- NULL - } - return(summary_table) + } @@ -152,6 +133,7 @@ get_cos_summary <- function(cb_output, #' \item{cos_details}{A object with all information for colocalization results.} #' \item{data_info}{A object with detailed information from input data} #' \item{model_info}{A object with detailed information for colocboost model} +#' \item{ucos_from_cos}{A object with information for trait-specific effects if exists after removing weaker signals.} #' #' @examples #' # colocboost example @@ -414,6 +396,297 @@ get_robust_colocalization <- function(cb_output, return(cb_output) } +#' @rdname get_ambiguous_colocalization +#' +#' @title Get ambiguous colocalization events from trait-specific (uncolocalized) effects. +#' +#' @description `get_ambiguous_colocalization` get the colocalization by discarding the weaker colocalization events or colocalized outcomes +#' +#' @param cb_output Output object from `colocboost` analysis +#' @param min_abs_corr_between_ucos Minimum absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.5. +#' @param median_abs_corr_between_ucos Median absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.8. +#' @param tol A small, non-negative number specifying the convergence tolerance for checking the overlap of the variables in different sets. +#' +#' @return A \code{"colocboost"} object of colocboost output with additional elements: +#' \item{ambiguous_ucos}{If exists, a list of ambiguous trait-specific (uncolocalized) effects.} +#' +#' @examples +#' data(Ambiguous_Colocalization) +#' test_colocboost_results <- Ambiguous_Colocalization$ColocBoost_Results +#' res <- get_ambiguous_colocalization(test_colocboost_results) +#' names(res$ambigous_ucos) +#' +#' @source See detailed instructions in our tutorial portal: +#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} +#' +#' @family colocboost_inference +#' @export +get_ambiguous_colocalization <- function(cb_output, + min_abs_corr_between_ucos = 0.5, + median_abs_corr_between_ucos = 0.8, + tol = 1e-9) { + + if (!inherits(cb_output, "colocboost")) { + stop("Input must from colocboost output!") + } + + if (!("ucos_details" %in% names(cb_output))) { + warning( + "Since you want to extract ambiguous colocalization from trait-specific (uncolocalized) sets,", + " but there is no output of ucos_details from colocboost.\n", + " Please run colocboost model with output_level=2!" + ) + return(cb_output) + } + + if (is.null(cb_output$ucos_details)){ + message("No trait-specific (uncolocalized) effects in this region!") + return(cb_output) + } + + ucos_details <- cb_output$ucos_details + nucos <- length(ucos_details$ucos$ucos_index) + if (nucos == 1) { + message("Only one trait-specific (uncolocalized) effect in this region!") + return(cb_output) + } + + # Function to merge ambiguous ucos + merge_sets <- function(vec) { + split_lists <- lapply(vec, function(x) as.numeric(unlist(strsplit(x, ";")))) + result <- list() + while (length(split_lists) > 0) { + current <- split_lists[[1]] + split_lists <- split_lists[-1] + repeat { + overlap_index <- NULL + for (i in seq_along(split_lists)) { + if (length(intersect(current, split_lists[[i]])) > 0) { + overlap_index <- i + break + } + } + if (!is.null(overlap_index)) { + current <- union(current, split_lists[[overlap_index]]) + split_lists <- split_lists[-overlap_index] + } else { + break + } + } + result <- c(result, list(paste(sort(current), collapse = ";"))) + } + return(result) + } + + + purity <- ucos_details$ucos_purity + min_abs_cor <- purity$min_abs_cor + median_abs_cor <- purity$median_abs_cor + max_abs_cor <- purity$max_abs_cor + is_ambiguous <- (min_abs_cor > min_abs_corr_between_ucos) * + (abs(max_abs_cor - 1) < tol) * + (median_abs_cor > median_abs_corr_between_ucos) + diag(is_ambiguous) <- 0 # no need to check within ucos + + if (sum(is_ambiguous) == 0){ + message("No ambiguous colocalization events!") + return(cb_output) + } else { + message("There exists the ambiguous colocalization events from trait-specific effects. Extracting!") + } + + temp <- sapply(1:nrow(is_ambiguous), function(x) { + tt <- c(x, which(is_ambiguous[x, ] != 0)) + return(paste0(sort(tt), collapse = ";")) + }) + temp <- merge_sets(temp) + potential_merged <- lapply(temp, function(x) as.numeric(unlist(strsplit(x, ";")))) + potential_merged <- potential_merged[which(sapply(potential_merged, length) >= 2)] + + ambigous_events <- list() + ambigouse_ucos_names <- c() + for (i in 1:length(potential_merged)) { + idx <- potential_merged[[i]] + test_outcome <- unique(unlist(ucos_details$ucos_outcomes$outcome_index[idx])) + if (length(test_outcome) == 1) next + ambigouse_ucos_names[i] <- paste0(names(ucos_details$ucos$ucos_index)[idx], collapse = ";") + tmp <- list( + ambigouse_ucos = list( + ucos_index = ucos_details$ucos$ucos_index[idx], + ucos_variables = ucos_details$ucos$ucos_variables[idx] + ), + ambigouse_ucos_overlap = list( + ucos_index = Reduce(intersect, ucos_details$ucos$ucos_index[idx]), + ucos_variables = Reduce(intersect, ucos_details$ucos$ucos_variables[idx]) + ), + ambigouse_ucos_union = list( + ucos_index = Reduce(union, ucos_details$ucos$ucos_index[idx]), + ucos_variables = Reduce(union, ucos_details$ucos$ucos_variables[idx]) + ), + ambigouse_ucos_outcomes = list( + outcome_idx = unique(unlist(ucos_details$ucos_outcomes$outcome_index[idx])), + outcome_name = unique(unlist(ucos_details$ucos_outcomes$outcome_name[idx])) + ), + ambigous_ucos_weight = ucos_details$ucos_weight[idx], + ambigous_ucos_puriry = list( + min_abs_cor = min_abs_cor[idx, idx], + median_abs_cor = median_abs_cor[idx, idx], + max_abs_cor = max_abs_cor[idx, idx] + ) + ) + w <- tmp$ambigous_ucos_weight + w <- do.call(cbind, w) + tmp$recalibrated_cos_vcp <- get_integrated_weight(w) + tmp$recalibrated_cos <- list( + "cos_index" = unlist(get_in_cos(tmp$recalibrated_cos_vcp)), + "cos_variables" = lapply(unlist(get_in_cos(tmp$recalibrated_cos_vcp)), function(idx) cb_output$data_info$variables[idx]) + ) + ambigous_events[[i]] <- tmp + } + names(ambigous_events) <- ambigouse_ucos_names + message(paste("There are", length(ambigous_events), "ambiguous trait-specific effects.")) + + cb_output$ambigous_ucos <- ambigous_events + return(cb_output) + +} + + + + +#' @rdname get_cos_summary +#' +#' @title Get colocalization summary table from a ColocBoost output. +#' +#' @description `get_cos_summary` get the colocalization summary table with or without the outcomes of interest. +#' +#' @param cb_output Output object from `colocboost` analysis +#' @param outcome_names Optional vector of names of outcomes, which has the same order as Y in the original analysis. +#' @param interest_outcome Optional vector specifying a subset of outcomes from \code{outcome_names} to focus on. When provided, only colocalization events that include at least one of these outcomes will be returned. +#' @param region_name Optional character string. When provided, adds a column with this gene name to the output table for easier filtering in downstream analyses. +#' +#' @return A summary table for colocalization events with the following columns: +#' \item{focal_outcome}{The focal outcome being analyzed if exists. Otherwise, it is \code{FALSE}.} +#' \item{colocalized_outcomes}{Colocalized outcomes for colocalization confidence set (CoS) } +#' \item{cos_id}{Unique identifier for colocalization confidence set (CoS) } +#' \item{purity}{Minimum absolute correlation of variables with in colocalization confidence set (CoS) } +#' \item{top_variable}{The variable with highest variant colocalization probability (VCP) } +#' \item{top_variable_vcp}{Variant colocalization probability for the top variable} +#' \item{cos_npc}{Normalized probability of colocalization} +#' \item{min_npc_outcome}{Minimum normalized probability of colocalized traits} +#' \item{n_variables}{Number of variables in colocalization confidence set (CoS)} +#' \item{colocalized_index}{Indices of colocalized variables} +#' \item{colocalized_variables}{List of colocalized variables} +#' \item{colocalized_variables_vcp}{Variant colocalization probabilities for all colocalized variables} +#' +#' @examples +#' # colocboost example +#' set.seed(1) +#' N <- 1000 +#' P <- 100 +#' # Generate X with LD structure +#' sigma <- 0.9^abs(outer(1:P, 1:P, "-")) +#' X <- MASS::mvrnorm(N, rep(0, P), sigma) +#' colnames(X) <- paste0("SNP", 1:P) +#' L <- 3 +#' true_beta <- matrix(0, P, L) +#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 +#' Y <- matrix(0, N, L) +#' for (l in 1:L) { +#' Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) +#' } +#' res <- colocboost(X = X, Y = Y) +#' get_cos_summary(res) +#' +#' @source See detailed instructions in our tutorial portal: +#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} +#' +#' @keywords colocboost_inference +#' @family colocboost_utilities +#' @export +get_cos_summary <- function(cb_output, + outcome_names = NULL, + interest_outcome = NULL, + region_name = NULL) { + if (!inherits(cb_output, "colocboost")) { + stop("Input must from colocboost output!") + } + + coloc_csets <- cb_output$cos_details$cos$cos_index + if (length(coloc_csets) != 0) { + analysis_outcome <- cb_output$data_info$outcome_info$outcome_names + if (!is.null(outcome_names)) { + analysis_outcome <- outcome_names + } + coloc_outcome <- lapply(cb_output$cos_details$cos_outcomes$outcome_index, function(idx) analysis_outcome[idx]) + coloc_sets <- cb_output$cos_details$cos$cos_index + if (!is.null(cb_output$cos_warnings)) { + cos_warnings + } + vcp <- as.numeric(cb_output$vcp) + + summary_table <- matrix(NA, nrow = length(coloc_sets), ncol = 12) + colnames(summary_table) <- c( + "focal_outcome", "colocalized_outcomes", "cos_id", "purity", + "top_variable", "top_variable_vcp", "cos_npc", "min_npc_outcome", "n_variables", + "colocalized_index", "colocalized_variables", "colocalized_variables_vcp" + ) + summary_table <- as.data.frame(summary_table) + summary_table[, 1] <- FALSE + summary_table[, 2] <- unlist(sapply(coloc_outcome, function(tmp) paste0(tmp, collapse = "; "))) + summary_table[, 3] <- names(coloc_sets) + summary_table[, 4] <- as.numeric(diag(as.matrix(cb_output$cos_details$cos_purity$min_abs_cor))) + summary_table[, 5] <- unlist(sapply(cb_output$cos_details$cos$cos_variables, function(tmp) tmp[1])) + summary_table[, 6] <- sapply(coloc_sets, function(tmp) max(vcp[tmp])) + summary_table[, 7] <- round(as.numeric(cb_output$cos_details$cos_npc), 4) + summary_table[, 8] <- round(as.numeric(cb_output$cos_details$cos_min_npc_outcome), 4) + summary_table[, 9] <- as.numeric(sapply(coloc_sets, length)) + summary_table[, 10] <- unlist(sapply(coloc_sets, function(tmp) paste0(tmp, collapse = "; "))) + summary_table[, 11] <- unlist(sapply(cb_output$cos_details$cos$cos_variables, function(tmp) paste0(tmp, collapse = "; "))) + summary_table[, 12] <- unlist(sapply(coloc_sets, function(tmp) paste0(vcp[tmp], collapse = "; "))) + if (!is.null(region_name)) { + summary_table$region_name <- region_name + } + # - if focal colocalization + focal_outcome_idx <- which(cb_output$data_info$outcome_info$is_focal) + if (length(focal_outcome_idx) != 0) { + focal_outcome <- analysis_outcome[focal_outcome_idx] + tmp <- sapply(focal_outcome, function(tmp) grep(paste0(tmp, "\\b"), analysis_outcome)) + if.focal <- sapply(coloc_outcome, function(cp) { + tt <- sapply(focal_outcome, function(tmp) grep(paste0(tmp, "\\b"), cp)) + all(sapply(tt, length) != 0) + }) + summary_table$focal_outcome <- ifelse(if.focal, focal_outcome, FALSE) + summary_table <- summary_table[order(summary_table$focal_outcome == "FALSE"), ] + if (sum(if.focal) == 0) { + warning("No colocalization with focal outcomes.") + } + } + # - if extract only interest outcome colocalization + if (!is.null(interest_outcome)) { + tmp <- sapply(interest_outcome, function(tmp) grep(paste0(tmp, "\\b"), analysis_outcome)) + if (all(sapply(tmp, length) != 0)) { + if.interest <- sapply(coloc_outcome, function(cp) { + tt <- sapply(interest_outcome, function(tmp) grep(paste0(tmp, "\\b"), cp)) + all(sapply(tt, length) != 0) + }) + summary_table$interest_outcome <- paste0(interest_outcome, collapse = "; ") + summary_table <- summary_table[which(if.interest), ] + if (sum(if.interest) == 0) { + warning("No colocalization with interest outcomes.") + } + } else { + warning("Interest outcome is not in the analysis outcomes, please check.") + } + } + } else { + summary_table <- NULL + } + return(summary_table) +} #' @rdname get_ucos_summary @@ -427,19 +700,41 @@ get_robust_colocalization <- function(cb_output, #' @param cb_output Output object from `colocboost` analysis #' @param outcome_names Optional vector of names of outcomes, which has the same order as Y in the original analysis. #' @param region_name Optional character string. When provided, adds a column with this gene name to the output table for easier filtering in downstream analyses. -#' -#' @return A summary table for trait-specific, uncolocalized associations with the following columns: -#' \item{outcomes}{Outcome being analyzed} -#' \item{ucos_id}{Unique identifier for trait-specific confidence sets} -#' \item{purity}{Minimum absolute correlation of variables within trait-specific confidence sets} -#' \item{top_variable}{The variable with highest variant-level probability of association (VPA)} -#' \item{top_variable_vpa}{Variant-level probability of association (VPA) for the top variable} -#' \item{ucos_npc}{Normalized probability of causal association for the trait-specific confidence set} -#' \item{n_variables}{Number of variables in trait-specific confidence set} -#' \item{ucos_index}{Indices of variables in the trait-specific confidence set} -#' \item{ucos_variables}{List of variables in the trait-specific confidence set} -#' \item{ucos_variables_vpa}{Variant-level probability of association (VPA) for all variables in the confidence set} -#' \item{region_name}{Region name if provided through the region_name parameter} +#' @param ambiguous_ucos Logical indicating whether to include ambiguous colocalization events. The default is FALSE. +#' @param min_abs_corr_between_ucos Minimum absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.5. +#' @param median_abs_corr_between_ucos Median absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.8. +#' +#' @return A list containing: +#' \itemize{ +#' \item \code{ucos_summary}: A summary table for trait-specific, uncolocalized associations with the following columns: +#' \itemize{ +#' \item \code{outcomes}: Outcome being analyzed +#' \item \code{ucos_id}: Unique identifier for trait-specific confidence sets +#' \item \code{purity}: Minimum absolute correlation of variables within trait-specific confidence sets +#' \item \code{top_variable}: The variable with highest variant-level probability of association (VPA) +#' \item \code{top_variable_vpa}: Variant-level probability of association (VPA) for the top variable +#' \item \code{ucos_npc}: Normalized probability of causal association for the trait-specific confidence set +#' \item \code{n_variables}: Number of variables in trait-specific confidence set +#' \item \code{ucos_index}: Indices of variables in the trait-specific confidence set +#' \item \code{ucos_variables}: List of variables in the trait-specific confidence set +#' \item \code{ucos_variables_vpa}: Variant-level probability of association (VPA) for all variables in the confidence set +#' \item \code{region_name}: Region name if provided through the region_name parameter +#' } +#' \item \code{ambiguous_ucos_summary}: A summary table for ambiguous colocalization events with the following columns: +#' \itemize{ +#' \item \code{outcomes}: Outcome in the ambiguous colocalization event +#' \item \code{ucos_id}: Unique identifiers for the ambiguous event +#' \item \code{min_between_purity}: Minimum absolute correlation between variables across trait-specific sets in the ambiguous event +#' \item \code{median_between_purity}: Median absolute correlation between variables across trait-specific sets in the ambiguous event +#' \item \code{overlap_idx}: Indices of variables that overlap between ambiguous trait-specific sets +#' \item \code{overlap_variables}: Names of variables that overlap between ambiguous trait-specific sets +#' \item \code{n_recalibrated_variables}: Number of variables in the recalibrated colocalization set from an ambiguous event +#' \item \code{recalibrated_index}: Indices of variables in the recalibrated colocalization set from an ambiguous event +#' \item \code{recalibrated_variables}: Names of variables in the recalibrated colocalization set from an ambiguous event +#' \item \code{recalibrated_variables_vcp}: Variant colocalization probabilities for recalibrated variables from an ambiguous event +#' \item \code{region_name}: Region name if provided through the region_name parameter +#' } +#' } #' #' @examples #' # colocboost example with single trait analysis @@ -462,48 +757,103 @@ get_robust_colocalization <- function(cb_output, #' @source See detailed instructions in our tutorial portal: #' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} #' -#' @family colocboost_inference +#' @keywords colocboost_inference +#' @family colocboost_utilities #' @export -get_ucos_summary <- function(cb_output, outcome_names = NULL, region_name = NULL) { +get_ucos_summary <- function(cb_output, outcome_names = NULL, region_name = NULL, + ambiguous_ucos = FALSE, + min_abs_corr_between_ucos = 0.5, + median_abs_corr_between_ucos = 0.8) { + + # - check input if (!inherits(cb_output, "colocboost")) { stop("Input must from colocboost object!") } + if (!("ucos_details" %in% names(cb_output))) { + warning( + "Since you want to extract trait-specific (uncolocalized) sets,", + " but there is no output of ucos_details from colocboost.\n", + " Please run colocboost model with output_level=2!" + ) + return(NULL) + } + + if (is.null(cb_output$ucos_details)){ + message("No trait-specific (uncolocalized) effects in this region!") + return(NULL) + } + specific_cs <- cb_output$ucos_details - if (length(specific_cs$ucos$ucos_index) != 0) { - cs_outcome <- cb_output$data_info$outcome_info$outcome_names - if (!is.null(outcome_names)) { - cs_outcome <- outcome_names - } - vpa <- as.numeric(cb_output$vpa) - if (length(vpa) == 0){ - w <- do.call(cbind, specific_cs$ucos_weight) - vpa <- as.vector(1 - apply(1 - w, 1, prod)) - } + cs_outcome <- cb_output$data_info$outcome_info$outcome_names + if (!is.null(outcome_names)) { + cs_outcome <- outcome_names + } + vpa <- as.numeric(cb_output$vpa) + if (length(vpa) == 0){ + w <- do.call(cbind, specific_cs$ucos_weight) + vpa <- as.vector(1 - apply(1 - w, 1, prod)) + } - summary_table <- matrix(NA, nrow = length(specific_cs$ucos$ucos_index), ncol = 9) - colnames(summary_table) <- c( - "outcomes", "ucos_id", "purity", - "top_variable", "top_variable_vpa", "n_variables", "ucos_index", - "ucos_variables", "ucos_variables_vpa" - ) - summary_table <- as.data.frame(summary_table) - summary_table[, 1] <- cs_outcome[unlist(specific_cs$ucos_outcomes$outcome_index)] - summary_table[, 2] <- names(specific_cs$ucos$ucos_index) - summary_table[, 3] <- as.numeric(diag(as.matrix(specific_cs$ucos_purity$min_abs_cor))) - summary_table[, 4] <- unlist(sapply(specific_cs$ucos$ucos_variables, function(tmp) tmp[1])) - summary_table[, 5] <- sapply(specific_cs$ucos$ucos_index, function(tmp) max(vpa[tmp])) - summary_table[, 6] <- as.numeric(sapply(specific_cs$ucos$ucos_index, length)) - summary_table[, 7] <- unlist(sapply(specific_cs$ucos$ucos_index, function(tmp) paste0(tmp, collapse = "; "))) - summary_table[, 8] <- unlist(sapply(specific_cs$ucos$ucos_variables, function(tmp) paste0(tmp, collapse = "; "))) - summary_table[, 9] <- unlist(sapply(specific_cs$ucos$ucos_index, function(tmp) paste0(vpa[tmp], collapse = "; "))) - if (!is.null(region_name)) { - summary_table$region_name <- region_name - } - } else { - summary_table <- NULL + summary_table <- matrix(NA, nrow = length(specific_cs$ucos$ucos_index), ncol = 9) + colnames(summary_table) <- c( + "outcomes", "ucos_id", "purity", + "top_variable", "top_variable_vpa", "n_variables", "ucos_index", + "ucos_variables", "ucos_variables_vpa" + ) + summary_table <- as.data.frame(summary_table) + summary_table[, 1] <- cs_outcome[unlist(specific_cs$ucos_outcomes$outcome_index)] + summary_table[, 2] <- names(specific_cs$ucos$ucos_index) + summary_table[, 3] <- as.numeric(diag(as.matrix(specific_cs$ucos_purity$min_abs_cor))) + summary_table[, 4] <- unlist(sapply(specific_cs$ucos$ucos_variables, function(tmp) tmp[1])) + summary_table[, 5] <- sapply(specific_cs$ucos$ucos_index, function(tmp) max(vpa[tmp])) + summary_table[, 6] <- as.numeric(sapply(specific_cs$ucos$ucos_index, length)) + summary_table[, 7] <- unlist(sapply(specific_cs$ucos$ucos_index, function(tmp) paste0(tmp, collapse = "; "))) + summary_table[, 8] <- unlist(sapply(specific_cs$ucos$ucos_variables, function(tmp) paste0(tmp, collapse = "; "))) + summary_table[, 9] <- unlist(sapply(specific_cs$ucos$ucos_index, function(tmp) paste0(vpa[tmp], collapse = "; "))) + if (!is.null(region_name)) { + summary_table$region_name <- region_name } - return(summary_table) + summary_table <- as.data.frame(summary_table) + if (!ambiguous_ucos) return(summary_table) + + + # advanced summary for ambiguous colocalization at post-processing + output_summary <- list( + ucos_summary = summary_table, + ambiguous_ucos_summary = NULL + ) + test <- get_ambiguous_colocalization( + cb_output, + min_abs_corr_between_ucos = min_abs_corr_between_ucos, + median_abs_corr_between_ucos = median_abs_corr_between_ucos + ) + if (length(test$ambigous_ucos) == 0) return(output_summary) + + ambiguous_results <- test$ambigous_ucos + ambiguous_summary <- matrix(NA, nrow = length(ambiguous_results), ncol = 10) + colnames(ambiguous_summary) <- c( + "outcomes", "ucos_id", "min_between_purity", "median_between_purity", + "overlap_idx", "overlap_variables", "n_recalibrated_variables", + "recalibrated_index", "recalibrated_variables", "recalibrated_variables_vcp" + ) + + ambiguous_summary[, 1] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$ambigouse_ucos_outcomes$outcome_name, collapse = "; "))) + ambiguous_summary[, 2] <- names(ambiguous_results) + ambiguous_summary[, 3] <- sapply(ambiguous_results, function(tmp) max(tmp$ambigous_ucos_puriry$min_abs_cor[lower.tri(tmp$ambigous_ucos_puriry$min_abs_cor)]) ) + ambiguous_summary[, 4] <- sapply(ambiguous_results, function(tmp) max(tmp$ambigous_ucos_puriry$median_abs_cor[lower.tri(tmp$ambigous_ucos_puriry$median_abs_cor)]) ) + ambiguous_summary[, 5] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$ambigouse_ucos_overlap$ucos_index, collapse = "; "))) + ambiguous_summary[, 6] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$ambigouse_ucos_overlap$ucos_variables, collapse = "; "))) + ambiguous_summary[, 7] <- as.numeric(sapply(ambiguous_results, function(tmp) length(tmp$recalibrated_cos$cos_index))) + ambiguous_summary[, 8] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$recalibrated_cos$cos_index, collapse = "; "))) + ambiguous_summary[, 9] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$recalibrated_cos$cos_variables, collapse = "; "))) + ambiguous_summary[, 10] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$recalibrated_cos_vcp[tmp$recalibrated_cos$cos_index], collapse = "; "))) + if (!is.null(region_name)) { + ambiguous_summary$region_name <- region_name + } + output_summary$ambiguous_ucos_summary <- as.data.frame(ambiguous_summary) + + return(output_summary) } #' Extract CoS at different coverages @@ -594,6 +944,7 @@ get_cos <- function(cb_output, coverage = 0.95, X = NULL, Xcorr = NULL, n_purity return(cos_refined) } + #' Get integrated weight from different outcomes #' @keywords cb_get_functions #' @noRd @@ -711,528 +1062,3 @@ get_cos_purity <- function(cos, X = NULL, Xcorr = NULL, n_purity = 100) { return(cos_purity) } - -#' @title Set of internal functions to re-organize ColocBoost output format -#' -#' @description -#' The `colocboost_output_reorganization` functions access basic properties inferences from a fitted ColocBoost model. This documentation serves as a summary for all related post-inference functions. -#' -#' @details -#' The following functions are included in this set: -#' `get_data_info` get formatted \code{data_info} in ColocBoost output -#' `get_cos_details` get formatted \code{cos_details} in ColocBoost output -#' `get_model_info` get formatted \code{model_info} in ColocBoost output -#' `get_full_output` get formatted additional information in ColocBoost output when \code{output_level!=1} -#' -#' These functions are not exported individually and are accessed via `colocboost_output_reorganization`. -#' -#' @rdname colocboost_output_reorganization -#' @keywords cb_reorganization -#' @noRd -colocboost_output_reorganization <- function() { - message("This function re-formats colocboost output as internal used. See details for more information.") -} - -#' Get formatted \code{data_info} in ColocBoost output -#' @noRd -#' @keywords cb_reorganization -get_data_info <- function(cb_obj) { - ## - analysis data information - n_outcome <- cb_obj$cb_model_para$L - n_variables <- cb_obj$cb_model_para$P - analysis_outcome <- cb_obj$cb_model_para$outcome_names - variables <- cb_obj$cb_data$variable.names - focal_outcome <- NULL - is_focal <- rep(FALSE, n_outcome) - if (!is.null(cb_obj$cb_model_para$focal_outcome_idx)) { - focal_outcome <- analysis_outcome[cb_obj$cb_model_para$focal_outcome_idx] - is_focal[cb_obj$cb_model_para$focal_outcome_idx] <- TRUE - } - is_sumstat <- grepl("sumstat_outcome", names(cb_obj$cb_data$data)) - N <- cb_obj$cb_model_para$N - check_no_N <- sapply(cb_obj$cb_model_para$N, is.null) - if (sum(check_no_N)!=0){ - N[which(check_no_N)] <- "NA" - N <- unlist(N) - } - outcome_info <- data.frame( - "outcome_names" = analysis_outcome, "sample_size" = N, - "is_sumstats" = is_sumstat, "is_focal" = is_focal - ) - rownames(outcome_info) <- paste0("y", 1:n_outcome) - - ## - marginal associations - z_scores <- lapply(cb_obj$cb_model, function(cb) { - as.numeric(cb$z_univariate) - }) - betas <- lapply(cb_obj$cb_model, function(cb) { - as.numeric(cb$beta) - }) - names(z_scores) <- names(betas) <- analysis_outcome - ## - output data info - data.info <- list( - "n_outcomes" = n_outcome, - "n_variables" = n_variables, - "outcome_info" = outcome_info, - "variables" = variables, - "coef" = betas, - "z" = z_scores - ) - return(data.info) -} - -#' Get formatted \code{cos_details} in ColocBoost output -#' @noRd -#' @keywords cb_reorganization -get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) { - if (is.null(data_info)) { - data_info <- get_data_info(cb_obj) - } - - - ### ----- Define the colocalization results - coloc_sets <- coloc_out$cos - if (length(coloc_sets) != 0) { - # - colocalization outcome configurations - tmp <- get_cos_evidence(cb_obj, coloc_out, data_info) - normalization_evidence <- tmp$normalization_evidence - npc <- tmp$npc - cos_min_npc_outcome <- sapply(normalization_evidence, function(cp) min(cp$npc_outcome)) - - # - colocalized outcomes - analysis_outcome <- cb_obj$cb_model_para$outcome_names - coloc_outcome_index <- coloc_outcome <- list() - colocset_names <- c() - for (i in 1:length(coloc_out$cos)) { - coloc_outcome_index[[i]] <- coloc_out$coloc_outcomes[[i]] - coloc_outcome[[i]] <- analysis_outcome[coloc_outcome_index[[i]]] - colocset_names[i] <- paste0("cos", i, ":", paste0(paste0("y", coloc_outcome_index[[i]]), collapse = "_")) - if (grepl("merged", names(coloc_sets)[i])) { - colocset_names[i] <- paste0(colocset_names[i], ":merged") - } - } - names(coloc_outcome) <- names(coloc_outcome_index) <- colocset_names - names(npc) <- names(normalization_evidence) <- names(cos_min_npc_outcome) <- colocset_names - coloc_outcomes <- list("outcome_index" = coloc_outcome_index, "outcome_name" = coloc_outcome) - - # - colocalized sets for variables - coloc_csets_variableidx <- coloc_out$cos - coloc_csets_variablenames <- lapply(coloc_csets_variableidx, function(coloc_tmp) { - cb_obj$cb_model_para$variables[coloc_tmp] - }) - coloc_csets_variableidx <- lapply(coloc_csets_variablenames, function(variable) match(variable, data_info$variables)) - names(coloc_csets_variableidx) <- names(coloc_csets_variablenames) <- colocset_names - coloc_csets_original <- list("cos_index" = coloc_csets_variableidx, "cos_variables" = coloc_csets_variablenames) - - # - colocalized set cs_change - cs_change <- coloc_out$cs_change - rownames(cs_change) <- colocset_names - colnames(cs_change) <- analysis_outcome - - # - VCP - cos_weights <- lapply(coloc_out$avWeight, function(w) { - pos <- match(data_info$variables, cb_obj$cb_model_para$variables) - return(w[pos, , drop = FALSE]) - }) - int_weight <- lapply(cos_weights, get_integrated_weight, weight_fudge_factor = cb_obj$cb_model_para$weight_fudge_factor) - names(int_weight) <- names(cos_weights) <- colocset_names - vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight), 1, prod)) - names(vcp) <- data_info$variables - - # - resummary results - cos_re_idx <- lapply(int_weight, function(w) { - unlist(get_in_cos(w, coverage = cb_obj$cb_model_para$coverage)) - }) - cos_re_var <- lapply(cos_re_idx, function(idx) { - data_info$variables[idx] - }) - coloc_csets <- list("cos_index" = cos_re_idx, "cos_variables" = cos_re_var) - - # - hits variables in each csets - coloc_hits <- coloc_hits_variablenames <- coloc_hits_names <- c() - for (i in 1:length(int_weight)) { - inw <- int_weight[[i]] - pp <- which(inw == max(inw)) - coloc_hits <- c(coloc_hits, pp) - coloc_hits_variablenames <- c(coloc_hits_variablenames, data_info$variables[pp]) - if (length(pp) == 1) { - coloc_hits_names <- c(coloc_hits_names, names(int_weight)[i]) - } else { - coloc_hits_names <- c(coloc_hits_names, paste0(names(int_weight)[i], ".", 1:length(pp))) - } - } - coloc_hits <- data.frame("top_index" = coloc_hits, "top_variables" = coloc_hits_variablenames) - rownames(coloc_hits) <- coloc_hits_names - - # - purity - ncos <- length(coloc_csets$cos_index) - if (ncos >= 2) { - empty_matrix <- matrix(NA, ncos, ncos) - colnames(empty_matrix) <- rownames(empty_matrix) <- colocset_names - csets_purity <- lapply(1:3, function(ii) { - diag(empty_matrix) <- coloc_out$purity[, ii] - return(empty_matrix) - }) - for (i in 1:(ncos - 1)) { - for (j in (i + 1):ncos) { - cset1 <- coloc_csets$cos_index[[i]] - cset2 <- coloc_csets$cos_index[[j]] - y.i <- coloc_outcomes$outcome_index[[i]] - y.j <- coloc_outcomes$outcome_index[[j]] - yy <- unique(y.i, y.j) - res <- list() - flag <- 1 - for (ii in yy) { - X_dict <- cb_obj$cb_data$dict[ii] - res[[flag]] <- get_between_purity(cset1, cset2, - X = cb_obj$cb_data$data[[X_dict]]$X, - Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, - P = cb_obj$cb_model_para$P - ) - flag <- flag + 1 - } - res <- Reduce(pmax, res) - csets_purity <- lapply(1:3, function(ii) { - csets_purity[[ii]][i, j] <- csets_purity[[ii]][j, i] <- res[ii] - return(csets_purity[[ii]]) - }) - } - } - names(csets_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") - } else { - csets_purity <- lapply(1:length(coloc_out$purity), function(i) { - tmp <- as.matrix(coloc_out$purity[i]) - rownames(tmp) <- colnames(tmp) <- colocset_names - tmp - }) - names(csets_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") - } - - # - save coloc_results - coloc_results <- list( - "cos" = coloc_csets, - "cos_outcomes" = coloc_outcomes, - "cos_vcp" = int_weight, - "cos_outcomes_npc" = normalization_evidence, - "cos_npc" = npc, - "cos_min_npc_outcome" = cos_min_npc_outcome, - "cos_purity" = csets_purity, - "cos_top_variables" = coloc_hits, - "cos_weights" = cos_weights - ) - - - # - missing variable and warning message - missing_variables_idx <- Reduce(union, lapply(cb_obj$cb_data$data, function(cb) cb$variable_miss)) - missing_variables <- cb_obj$cb_model_para$variables[missing_variables_idx] - cos_missing_variables_idx <- lapply(coloc_csets_original$cos_variables, function(variable) { - missing <- intersect(variable, missing_variables) - if (length(missing) != 0) { - match(missing, data_info$variables) - } else { - NULL - } - }) - cos_missing_variables <- lapply(cos_missing_variables_idx, function(variable) { - if (!is.null(variable)) { - data_info$variables[variable] - } else { - NULL - } - }) - warning_needed <- any(!sapply(cos_missing_variables, is.null)) - if (warning_needed) { - is_missing <- which(!sapply(cos_missing_variables, is.null)) - cos_missing_variables_idx <- cos_missing_variables_idx[is_missing] - cos_missing_variables <- cos_missing_variables[is_missing] - cos_missing_vcp <- lapply(cos_missing_variables_idx, function(idx) { - vcp[idx] - }) - warning_message <- paste( - "CoS", paste(names(cos_missing_variables_idx), collapse = ","), - "contains missing variables in at least one outcome.", - "The missing variables will cause the ~0 VCP scores." - ) - cos_warnings <- list( - "cos_missing_info" = list( - "index" = cos_missing_variables_idx, - "variables" = cos_missing_variables, - "vcp" = cos_missing_vcp - ), - "warning_message" = warning_message - ) - coloc_results$cos_warnings <- cos_warnings - } - } else { - coloc_results <- NULL - vcp <- NULL - } - return(list("cos_results" = coloc_results, "vcp" = vcp)) -} - -#' Get formatted \code{model_info} in ColocBoost output -#' @noRd -#' @keywords cb_reorganization -get_model_info <- function(cb_obj, outcome_names = NULL) { - if (is.null(outcome_names)) { - data_info <- get_data_info(cb_obj) - outcome_names <- data_info$outcome_info$outcome_names - } - - profile_loglik <- cb_obj$cb_model_para$profile_loglike - n_updates <- cb_obj$cb_model_para$num_updates - n_updates_outcome <- cb_obj$cb_model_para$num_updates_outcome - model_coveraged <- cb_obj$cb_model_para$coveraged - model_coveraged_outcome <- cb_obj$cb_model_para$coveraged_outcome - jk_update <- cb_obj$cb_model_para$real_update_jk - if (!is.null(jk_update)){ - rownames(jk_update) <- paste0("jk_star_", 1:nrow(jk_update)) - colnames(jk_update) <- outcome_names - } - outcome_proximity_obj <- lapply(cb_obj$cb_model, function(cb) cb$obj_path) - outcome_coupled_best_update_obj <- lapply(cb_obj$cb_model, function(cb) cb$obj_single) - outcome_profile_loglik <- lapply(cb_obj$cb_model, function(cb) cb$profile_loglike_each) - names(outcome_proximity_obj) <- names(outcome_coupled_best_update_obj) <- - names(outcome_profile_loglik) <- names(n_updates_outcome) <- - names(model_coveraged_outcome) <- outcome_names - ll <- list( - "model_coveraged" = model_coveraged, - "n_updates" = n_updates, - "profile_loglik" = profile_loglik, - "outcome_profile_loglik" = outcome_profile_loglik, - "outcome_proximity_obj" = outcome_proximity_obj, - "outcome_coupled_best_update_obj" = outcome_coupled_best_update_obj, - "outcome_model_coveraged" = model_coveraged_outcome, - "outcome_n_updates" = n_updates_outcome, - "jk_star" = jk_update - ) - return(ll) -} - -#' Get formatted additional information in ColocBoost output when \code{output_level!=1} -#' @noRd -#' @keywords cb_reorganization -get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output = NULL, weaker_ucos = FALSE) { - cb_model <- cb_obj$cb_model - cb_model_para <- cb_obj$cb_model_para - - ## - obtain the order of variables based on the variables names if it has position information - if (!is.null(variables)) { - ordered <- 1:length(cb_obj$cb_model_para$variables) - } else { - ordered <- match(variables, cb_obj$cb_model_para$variables) - } - - ## - reorder all output - # - cb_model - tmp <- lapply(cb_model, function(cb) { - cb$beta <- cb$beta[ordered] - cb$weights_path <- cb$weights_path[, ordered] - cb$change_loglike <- cb$change_loglike[ordered] - cb$correlation <- as.numeric(cb$correlation[ordered]) - cb$z <- as.numeric(cb$z[ordered]) - cb$ld_jk <- cb$ld_jk[, ordered] - cb$z_univariate <- as.numeric(cb$z_univariate[ordered]) - cb$beta_hat <- as.numeric(cb$beta_hat[ordered]) - cb$multi_correction <- as.numeric(cb$multi_correction[ordered]) - cb$multi_correction_univariate <- as.numeric(cb$multi_correction_univariate[ordered]) - return(cb) - }) - cb_model <- tmp - - # - sets - if (!is.null(past_out)) { - out_ucos <- past_out$ucos - # - single sets - if (!is.null(out_ucos$ucos_each)) { - out_ucos$ucos_each <- lapply(out_ucos$ucos_each, function(cs) { - match(cb_model_para$variables[cs], variables) - }) - out_ucos$avW_ucos_each <- out_ucos$avW_ucos_each[ordered, , drop = FALSE] - - # - re-orginize specific results - analysis_outcome <- cb_obj$cb_model_para$outcome_names - specific_outcome_index <- specific_outcome <- list() - specific_cs_names <- c() - for (i in 1:length(out_ucos$ucos_each)) { - cc <- out_ucos$avW_ucos_each[, i, drop = FALSE] - tmp_names <- colnames(cc) - specific_outcome_index[[i]] <- as.numeric(gsub(".*Y([0-9]+).*", "\\1", tmp_names)) - specific_outcome[[i]] <- analysis_outcome[specific_outcome_index[[i]]] - specific_cs_names[i] <- paste0("ucos", i, ":y", specific_outcome_index[[i]]) - } - names(specific_outcome) <- names(specific_outcome_index) <- specific_cs_names - specific_outcomes <- list("outcome_index" = specific_outcome_index, "outcome_name" = specific_outcome) - - # - specific sets for variables - specific_cs_variableidx <- out_ucos$ucos_each - specific_cs_variablenames <- lapply(specific_cs_variableidx, function(specific_tmp) { - cb_obj$cb_model_para$variables[specific_tmp] - }) - specific_cs_variableidx <- lapply(specific_cs_variablenames, function(variable) match(variable, variables)) - names(specific_cs_variableidx) <- names(specific_cs_variablenames) <- specific_cs_names - specific_css <- list("ucos_index" = specific_cs_variableidx, "ucos_variables" = specific_cs_variablenames) - - # - specific set cs_change - cs_change <- out_ucos$change_obj_each - rownames(cs_change) <- specific_cs_names - colnames(cs_change) <- analysis_outcome - index_change <- as.data.frame(which(cs_change != 0, arr.ind = TRUE)) - change_outcomes <- analysis_outcome[index_change$col] - change_values <- diag(as.matrix(cs_change[index_change$row, index_change$col])) - cs_change <- data.frame("ucos_outcome" = change_outcomes, "ucos_delta" = change_values) - - # - filter weak ucos - check_null_max <- sapply(cb_model, function(cb) cb$check_null_max) - remove_weak <- sapply(1:nrow(cs_change), function(ic) { - outcome_tmp <- cs_change$ucos_outcome[ic] - delta_tmp <- cs_change$ucos_delta[ic] - pp <- which(cb_obj$cb_model_para$outcome_names == outcome_tmp) - check_tmp <- check_null_max[pp] - delta_tmp >= check_tmp - }) - keep_ucos <- which(remove_weak) - if (length(keep_ucos) == 0) { - specific_results <- NULL - } else { - specific_outcomes$outcome_index <- specific_outcomes$outcome_index[keep_ucos] - specific_outcomes$outcome_name <- specific_outcomes$outcome_name[keep_ucos] - specific_css$ucos_index <- specific_css$ucos_index[keep_ucos] - specific_css$ucos_variables <- specific_css$ucos_variables[keep_ucos] - cs_change <- cs_change[keep_ucos, , drop = FALSE] - out_ucos$avW_ucos_each <- out_ucos$avW_ucos_each[, keep_ucos, drop = FALSE] - specific_cs_names <- specific_cs_names[keep_ucos] - out_ucos$purity_each <- out_ucos$purity_each[keep_ucos, , drop = FALSE] - - # - ucos_weight - specific_w <- lapply(1:ncol(out_ucos$avW_ucos_each), function(ii) out_ucos$avW_ucos_each[, ii, drop = FALSE]) - names(specific_w) <- specific_cs_names - - # - hits variables in each csets - cs_hits <- sapply(1:length(specific_w), function(jj) { - inw <- specific_w[[jj]] - sample(which(inw == max(inw)), 1) - }) - cs_hits_variablenames <- sapply(cs_hits, function(ch) variables[ch]) - specific_cs_hits <- data.frame("top_index" = cs_hits, "top_variables" = cs_hits_variablenames) # save - rownames(specific_cs_hits) <- specific_cs_names - - # - purity - nucos <- length(specific_css$ucos_index) - if (nucos >= 2) { - empty_matrix <- matrix(NA, nucos, nucos) - colnames(empty_matrix) <- rownames(empty_matrix) <- specific_cs_names - specific_cs_purity <- lapply(1:3, function(ii) { - diag(empty_matrix) <- out_ucos$purity_each[, ii] - return(empty_matrix) - }) - for (i in 1:(nucos - 1)) { - for (j in (i + 1):nucos) { - cset1 <- specific_css$ucos_index[[i]] - cset2 <- specific_css$ucos_index[[j]] - y.i <- specific_outcomes$outcome_index[[i]] - y.j <- specific_outcomes$outcome_index[[j]] - yy <- unique(y.i, y.j) - res <- list() - flag <- 1 - for (ii in yy) { - X_dict <- cb_obj$cb_data$dict[ii] - res[[flag]] <- get_between_purity(cset1, cset2, - X = cb_obj$cb_data$data[[X_dict]]$X, - Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, - P = cb_obj$cb_model_para$P - ) - flag <- flag + 1 - } - res <- Reduce(pmax, res) - specific_cs_purity <- lapply(1:3, function(ii) { - specific_cs_purity[[ii]][i, j] <- specific_cs_purity[[ii]][j, i] <- res[ii] - return(specific_cs_purity[[ii]]) - }) - } - } - names(specific_cs_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") - } else { - specific_cs_purity <- out_ucos$purity_each - rownames(specific_cs_purity) <- specific_cs_names - } - - # - cos&ucos purity - cos <- cb_output$cos_details$cos$cos_index - ncos <- length(cos) - if (ncos != 0) { - empty_matrix <- matrix(NA, ncos, nucos) - colnames(empty_matrix) <- specific_cs_names - rownames(empty_matrix) <- names(cos) - cos_ucos_purity <- lapply(1:3, function(ii) empty_matrix) - for (i in 1:ncos) { - for (j in 1:nucos) { - cset1 <- cos[[i]] - cset2 <- specific_css$ucos_index[[j]] - y.i <- cb_output$cos_details$cos_outcomes$outcome_index[[i]] - y.j <- specific_outcomes$outcome_index[[j]] - yy <- unique(y.i, y.j) - res <- list() - flag <- 1 - for (ii in yy) { - X_dict <- cb_obj$cb_data$dict[ii] - res[[flag]] <- get_between_purity(cset1, cset2, - X = cb_obj$cb_data$data[[X_dict]]$X, - Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, - P = cb_obj$cb_model_para$P - ) - flag <- flag + 1 - } - res <- Reduce(pmax, res) - cos_ucos_purity <- lapply(1:3, function(ii) { - cos_ucos_purity[[ii]][i, j] <- res[ii] - return(cos_ucos_purity[[ii]]) - }) - } - } - names(cos_ucos_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") - } else { - cos_ucos_purity <- NULL - } - - - # - save coloc_results - specific_results <- list( - "ucos" = specific_css, - "ucos_outcomes" = specific_outcomes, - "ucos_weight" = specific_w, - "ucos_top_variables" = specific_cs_hits, - "ucos_purity" = specific_cs_purity, - "cos_ucos_purity" = cos_ucos_purity, - "ucos_outcomes_delta" = cs_change - ) - } - } else { - specific_results <- NULL - } - - # - cb_model_para - cb_model_para$N <- as.numeric(unlist(cb_model_para$N)) - cb_model_para$variables <- variables - - ll <- list( - "ucos_details" = specific_results, - "cb_model" = cb_model, - "cb_model_para" = cb_model_para - ) - } else { - # - cb_model_para - cb_model_para$N <- as.numeric(unlist(cb_model_para$N)) - cb_model_para$variables <- variables - ll <- list( - "ucos_detials" = NULL, - "cb_model" = cb_model, - "cb_model_para" = cb_model_para - ) - } - - return(ll) -} diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index 962d12f..fcc56d4 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -589,6 +589,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, ) plot_input$uncoloc <- uncoloc } + class(plot_input) <- "colocboost" return(plot_input) } diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index c5571f6..702590e 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -504,3 +504,531 @@ get_merge_ordered_with_indices <- function(vector_list) { result } + + + +#' @title Set of internal functions to re-organize ColocBoost output format +#' +#' @description +#' The `colocboost_output_reorganization` functions access basic properties inferences from a fitted ColocBoost model. This documentation serves as a summary for all related post-inference functions. +#' +#' @details +#' The following functions are included in this set: +#' `get_data_info` get formatted \code{data_info} in ColocBoost output +#' `get_cos_details` get formatted \code{cos_details} in ColocBoost output +#' `get_model_info` get formatted \code{model_info} in ColocBoost output +#' `get_full_output` get formatted additional information in ColocBoost output when \code{output_level!=1} +#' +#' These functions are not exported individually and are accessed via `colocboost_output_reorganization`. +#' +#' @rdname colocboost_output_reorganization +#' @keywords cb_reorganization +#' @noRd +colocboost_output_reorganization <- function() { + message("This function re-formats colocboost output as internal used. See details for more information.") +} + +#' Get formatted \code{data_info} in ColocBoost output +#' @noRd +#' @keywords cb_reorganization +get_data_info <- function(cb_obj) { + ## - analysis data information + n_outcome <- cb_obj$cb_model_para$L + n_variables <- cb_obj$cb_model_para$P + analysis_outcome <- cb_obj$cb_model_para$outcome_names + variables <- cb_obj$cb_data$variable.names + focal_outcome <- NULL + is_focal <- rep(FALSE, n_outcome) + if (!is.null(cb_obj$cb_model_para$focal_outcome_idx)) { + focal_outcome <- analysis_outcome[cb_obj$cb_model_para$focal_outcome_idx] + is_focal[cb_obj$cb_model_para$focal_outcome_idx] <- TRUE + } + is_sumstat <- grepl("sumstat_outcome", names(cb_obj$cb_data$data)) + N <- cb_obj$cb_model_para$N + check_no_N <- sapply(cb_obj$cb_model_para$N, is.null) + if (sum(check_no_N)!=0){ + N[which(check_no_N)] <- "NA" + N <- unlist(N) + } + outcome_info <- data.frame( + "outcome_names" = analysis_outcome, "sample_size" = N, + "is_sumstats" = is_sumstat, "is_focal" = is_focal + ) + rownames(outcome_info) <- paste0("y", 1:n_outcome) + + ## - marginal associations + z_scores <- lapply(cb_obj$cb_model, function(cb) { + as.numeric(cb$z_univariate) + }) + betas <- lapply(cb_obj$cb_model, function(cb) { + as.numeric(cb$beta) + }) + names(z_scores) <- names(betas) <- analysis_outcome + ## - output data info + data.info <- list( + "n_outcomes" = n_outcome, + "n_variables" = n_variables, + "outcome_info" = outcome_info, + "variables" = variables, + "coef" = betas, + "z" = z_scores + ) + return(data.info) +} + +#' Get formatted \code{cos_details} in ColocBoost output +#' @noRd +#' @keywords cb_reorganization +get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) { + if (is.null(data_info)) { + data_info <- get_data_info(cb_obj) + } + + + ### ----- Define the colocalization results + coloc_sets <- coloc_out$cos + if (length(coloc_sets) != 0) { + # - colocalization outcome configurations + tmp <- get_cos_evidence(cb_obj, coloc_out, data_info) + normalization_evidence <- tmp$normalization_evidence + npc <- tmp$npc + cos_min_npc_outcome <- sapply(normalization_evidence, function(cp) min(cp$npc_outcome)) + + # - colocalized outcomes + analysis_outcome <- cb_obj$cb_model_para$outcome_names + coloc_outcome_index <- coloc_outcome <- list() + colocset_names <- c() + for (i in 1:length(coloc_out$cos)) { + coloc_outcome_index[[i]] <- coloc_out$coloc_outcomes[[i]] + coloc_outcome[[i]] <- analysis_outcome[coloc_outcome_index[[i]]] + colocset_names[i] <- paste0("cos", i, ":", paste0(paste0("y", coloc_outcome_index[[i]]), collapse = "_")) + if (grepl("merged", names(coloc_sets)[i])) { + colocset_names[i] <- paste0(colocset_names[i], ":merged") + } + } + names(coloc_outcome) <- names(coloc_outcome_index) <- colocset_names + names(npc) <- names(normalization_evidence) <- names(cos_min_npc_outcome) <- colocset_names + coloc_outcomes <- list("outcome_index" = coloc_outcome_index, "outcome_name" = coloc_outcome) + + # - colocalized sets for variables + coloc_csets_variableidx <- coloc_out$cos + coloc_csets_variablenames <- lapply(coloc_csets_variableidx, function(coloc_tmp) { + cb_obj$cb_model_para$variables[coloc_tmp] + }) + coloc_csets_variableidx <- lapply(coloc_csets_variablenames, function(variable) match(variable, data_info$variables)) + names(coloc_csets_variableidx) <- names(coloc_csets_variablenames) <- colocset_names + coloc_csets_original <- list("cos_index" = coloc_csets_variableidx, "cos_variables" = coloc_csets_variablenames) + + # - colocalized set cs_change + cs_change <- coloc_out$cs_change + rownames(cs_change) <- colocset_names + colnames(cs_change) <- analysis_outcome + + # - VCP + cos_weights <- lapply(coloc_out$avWeight, function(w) { + pos <- match(data_info$variables, cb_obj$cb_model_para$variables) + return(w[pos, , drop = FALSE]) + }) + int_weight <- lapply(cos_weights, get_integrated_weight, weight_fudge_factor = cb_obj$cb_model_para$weight_fudge_factor) + names(int_weight) <- names(cos_weights) <- colocset_names + vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight), 1, prod)) + names(vcp) <- data_info$variables + + # - resummary results + cos_re_idx <- lapply(int_weight, function(w) { + unlist(get_in_cos(w, coverage = cb_obj$cb_model_para$coverage)) + }) + cos_re_var <- lapply(cos_re_idx, function(idx) { + data_info$variables[idx] + }) + coloc_csets <- list("cos_index" = cos_re_idx, "cos_variables" = cos_re_var) + + # - hits variables in each csets + coloc_hits <- coloc_hits_variablenames <- coloc_hits_names <- c() + for (i in 1:length(int_weight)) { + inw <- int_weight[[i]] + pp <- which(inw == max(inw)) + coloc_hits <- c(coloc_hits, pp) + coloc_hits_variablenames <- c(coloc_hits_variablenames, data_info$variables[pp]) + if (length(pp) == 1) { + coloc_hits_names <- c(coloc_hits_names, names(int_weight)[i]) + } else { + coloc_hits_names <- c(coloc_hits_names, paste0(names(int_weight)[i], ".", 1:length(pp))) + } + } + coloc_hits <- data.frame("top_index" = coloc_hits, "top_variables" = coloc_hits_variablenames) + rownames(coloc_hits) <- coloc_hits_names + + # - purity + ncos <- length(coloc_csets$cos_index) + if (ncos >= 2) { + empty_matrix <- matrix(NA, ncos, ncos) + colnames(empty_matrix) <- rownames(empty_matrix) <- colocset_names + csets_purity <- lapply(1:3, function(ii) { + diag(empty_matrix) <- coloc_out$purity[, ii] + return(empty_matrix) + }) + for (i in 1:(ncos - 1)) { + for (j in (i + 1):ncos) { + cset1 <- coloc_csets$cos_index[[i]] + cset2 <- coloc_csets$cos_index[[j]] + y.i <- coloc_outcomes$outcome_index[[i]] + y.j <- coloc_outcomes$outcome_index[[j]] + yy <- unique(c(y.i, y.j)) + res <- list() + flag <- 1 + for (ii in yy) { + X_dict <- cb_obj$cb_data$dict[ii] + res[[flag]] <- get_between_purity(cset1, cset2, + X = cb_obj$cb_data$data[[X_dict]]$X, + Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, + miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, + P = cb_obj$cb_model_para$P + ) + flag <- flag + 1 + } + res <- Reduce(pmax, res) + csets_purity <- lapply(1:3, function(ii) { + csets_purity[[ii]][i, j] <- csets_purity[[ii]][j, i] <- res[ii] + return(csets_purity[[ii]]) + }) + } + } + names(csets_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") + } else { + csets_purity <- lapply(1:length(coloc_out$purity), function(i) { + tmp <- as.matrix(coloc_out$purity[i]) + rownames(tmp) <- colnames(tmp) <- colocset_names + tmp + }) + names(csets_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") + } + + # - save coloc_results + coloc_results <- list( + "cos" = coloc_csets, + "cos_outcomes" = coloc_outcomes, + "cos_vcp" = int_weight, + "cos_outcomes_npc" = normalization_evidence, + "cos_npc" = npc, + "cos_min_npc_outcome" = cos_min_npc_outcome, + "cos_purity" = csets_purity, + "cos_top_variables" = coloc_hits, + "cos_weights" = cos_weights + ) + + + # - missing variable and warning message + missing_variables_idx <- Reduce(union, lapply(cb_obj$cb_data$data, function(cb) cb$variable_miss)) + missing_variables <- cb_obj$cb_model_para$variables[missing_variables_idx] + cos_missing_variables_idx <- lapply(coloc_csets_original$cos_variables, function(variable) { + missing <- intersect(variable, missing_variables) + if (length(missing) != 0) { + match(missing, data_info$variables) + } else { + NULL + } + }) + cos_missing_variables <- lapply(cos_missing_variables_idx, function(variable) { + if (!is.null(variable)) { + data_info$variables[variable] + } else { + NULL + } + }) + warning_needed <- any(!sapply(cos_missing_variables, is.null)) + if (warning_needed) { + is_missing <- which(!sapply(cos_missing_variables, is.null)) + cos_missing_variables_idx <- cos_missing_variables_idx[is_missing] + cos_missing_variables <- cos_missing_variables[is_missing] + cos_missing_vcp <- lapply(cos_missing_variables_idx, function(idx) { + vcp[idx] + }) + warning_message <- paste( + "CoS", paste(names(cos_missing_variables_idx), collapse = ","), + "contains missing variables in at least one outcome.", + "The missing variables will cause the ~0 VCP scores." + ) + cos_warnings <- list( + "cos_missing_info" = list( + "index" = cos_missing_variables_idx, + "variables" = cos_missing_variables, + "vcp" = cos_missing_vcp + ), + "warning_message" = warning_message + ) + coloc_results$cos_warnings <- cos_warnings + } + } else { + coloc_results <- NULL + vcp <- NULL + } + return(list("cos_results" = coloc_results, "vcp" = vcp)) +} + +#' Get formatted \code{model_info} in ColocBoost output +#' @noRd +#' @keywords cb_reorganization +get_model_info <- function(cb_obj, outcome_names = NULL) { + if (is.null(outcome_names)) { + data_info <- get_data_info(cb_obj) + outcome_names <- data_info$outcome_info$outcome_names + } + + profile_loglik <- cb_obj$cb_model_para$profile_loglike + n_updates <- cb_obj$cb_model_para$num_updates + n_updates_outcome <- cb_obj$cb_model_para$num_updates_outcome + model_coveraged <- cb_obj$cb_model_para$coveraged + model_coveraged_outcome <- cb_obj$cb_model_para$coveraged_outcome + jk_update <- cb_obj$cb_model_para$real_update_jk + if (!is.null(jk_update)){ + rownames(jk_update) <- paste0("jk_star_", 1:nrow(jk_update)) + colnames(jk_update) <- outcome_names + } + outcome_proximity_obj <- lapply(cb_obj$cb_model, function(cb) cb$obj_path) + outcome_coupled_best_update_obj <- lapply(cb_obj$cb_model, function(cb) cb$obj_single) + outcome_profile_loglik <- lapply(cb_obj$cb_model, function(cb) cb$profile_loglike_each) + names(outcome_proximity_obj) <- names(outcome_coupled_best_update_obj) <- + names(outcome_profile_loglik) <- names(n_updates_outcome) <- + names(model_coveraged_outcome) <- outcome_names + ll <- list( + "model_coveraged" = model_coveraged, + "n_updates" = n_updates, + "profile_loglik" = profile_loglik, + "outcome_profile_loglik" = outcome_profile_loglik, + "outcome_proximity_obj" = outcome_proximity_obj, + "outcome_coupled_best_update_obj" = outcome_coupled_best_update_obj, + "outcome_model_coveraged" = model_coveraged_outcome, + "outcome_n_updates" = n_updates_outcome, + "jk_star" = jk_update + ) + return(ll) +} + +#' Get formatted additional information in ColocBoost output when \code{output_level!=1} +#' @noRd +#' @keywords cb_reorganization +get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output = NULL, weaker_ucos = FALSE) { + cb_model <- cb_obj$cb_model + cb_model_para <- cb_obj$cb_model_para + + ## - obtain the order of variables based on the variables names if it has position information + if (!is.null(variables)) { + ordered <- 1:length(cb_obj$cb_model_para$variables) + } else { + ordered <- match(variables, cb_obj$cb_model_para$variables) + } + + ## - reorder all output + # - cb_model + tmp <- lapply(cb_model, function(cb) { + cb$beta <- cb$beta[ordered] + cb$weights_path <- cb$weights_path[, ordered] + cb$change_loglike <- cb$change_loglike[ordered] + cb$correlation <- as.numeric(cb$correlation[ordered]) + cb$z <- as.numeric(cb$z[ordered]) + cb$ld_jk <- cb$ld_jk[, ordered] + cb$z_univariate <- as.numeric(cb$z_univariate[ordered]) + cb$beta_hat <- as.numeric(cb$beta_hat[ordered]) + cb$multi_correction <- as.numeric(cb$multi_correction[ordered]) + cb$multi_correction_univariate <- as.numeric(cb$multi_correction_univariate[ordered]) + return(cb) + }) + cb_model <- tmp + + # - sets + if (!is.null(past_out)) { + out_ucos <- past_out$ucos + # - single sets + if (!is.null(out_ucos$ucos_each)) { + out_ucos$ucos_each <- lapply(out_ucos$ucos_each, function(cs) { + match(cb_model_para$variables[cs], variables) + }) + out_ucos$avW_ucos_each <- out_ucos$avW_ucos_each[ordered, , drop = FALSE] + + # - re-orginize specific results + analysis_outcome <- cb_obj$cb_model_para$outcome_names + specific_outcome_index <- specific_outcome <- list() + specific_cs_names <- c() + for (i in 1:length(out_ucos$ucos_each)) { + cc <- out_ucos$avW_ucos_each[, i, drop = FALSE] + tmp_names <- colnames(cc) + specific_outcome_index[[i]] <- as.numeric(gsub(".*Y([0-9]+).*", "\\1", tmp_names)) + specific_outcome[[i]] <- analysis_outcome[specific_outcome_index[[i]]] + specific_cs_names[i] <- paste0("ucos", i, ":y", specific_outcome_index[[i]]) + } + names(specific_outcome) <- names(specific_outcome_index) <- specific_cs_names + specific_outcomes <- list("outcome_index" = specific_outcome_index, "outcome_name" = specific_outcome) + + # - specific sets for variables + specific_cs_variableidx <- out_ucos$ucos_each + specific_cs_variablenames <- lapply(specific_cs_variableidx, function(specific_tmp) { + cb_obj$cb_model_para$variables[specific_tmp] + }) + specific_cs_variableidx <- lapply(specific_cs_variablenames, function(variable) match(variable, variables)) + names(specific_cs_variableidx) <- names(specific_cs_variablenames) <- specific_cs_names + specific_css <- list("ucos_index" = specific_cs_variableidx, "ucos_variables" = specific_cs_variablenames) + + # - specific set cs_change + cs_change <- out_ucos$change_obj_each + rownames(cs_change) <- specific_cs_names + colnames(cs_change) <- analysis_outcome + index_change <- as.data.frame(which(cs_change != 0, arr.ind = TRUE)) + change_outcomes <- analysis_outcome[index_change$col] + change_values <- diag(as.matrix(cs_change[index_change$row, index_change$col])) + cs_change <- data.frame("ucos_outcome" = change_outcomes, "ucos_delta" = change_values) + + # - filter weak ucos + check_null_max <- sapply(cb_model, function(cb) cb$check_null_max) + remove_weak <- sapply(1:nrow(cs_change), function(ic) { + outcome_tmp <- cs_change$ucos_outcome[ic] + delta_tmp <- cs_change$ucos_delta[ic] + pp <- which(cb_obj$cb_model_para$outcome_names == outcome_tmp) + check_tmp <- check_null_max[pp] + delta_tmp >= check_tmp + }) + keep_ucos <- which(remove_weak) + if (length(keep_ucos) == 0) { + specific_results <- NULL + } else { + specific_outcomes$outcome_index <- specific_outcomes$outcome_index[keep_ucos] + specific_outcomes$outcome_name <- specific_outcomes$outcome_name[keep_ucos] + specific_css$ucos_index <- specific_css$ucos_index[keep_ucos] + specific_css$ucos_variables <- specific_css$ucos_variables[keep_ucos] + cs_change <- cs_change[keep_ucos, , drop = FALSE] + out_ucos$avW_ucos_each <- out_ucos$avW_ucos_each[, keep_ucos, drop = FALSE] + specific_cs_names <- specific_cs_names[keep_ucos] + out_ucos$purity_each <- out_ucos$purity_each[keep_ucos, , drop = FALSE] + + # - ucos_weight + specific_w <- lapply(1:ncol(out_ucos$avW_ucos_each), function(ii) out_ucos$avW_ucos_each[, ii, drop = FALSE]) + names(specific_w) <- specific_cs_names + + # - hits variables in each csets + cs_hits <- sapply(1:length(specific_w), function(jj) { + inw <- specific_w[[jj]] + sample(which(inw == max(inw)), 1) + }) + cs_hits_variablenames <- sapply(cs_hits, function(ch) variables[ch]) + specific_cs_hits <- data.frame("top_index" = cs_hits, "top_variables" = cs_hits_variablenames) # save + rownames(specific_cs_hits) <- specific_cs_names + + # - purity + nucos <- length(specific_css$ucos_index) + if (nucos >= 2) { + empty_matrix <- matrix(NA, nucos, nucos) + colnames(empty_matrix) <- rownames(empty_matrix) <- specific_cs_names + specific_cs_purity <- lapply(1:3, function(ii) { + diag(empty_matrix) <- out_ucos$purity_each[, ii] + return(empty_matrix) + }) + for (i in 1:(nucos - 1)) { + for (j in (i + 1):nucos) { + cset1 <- specific_css$ucos_index[[i]] + cset2 <- specific_css$ucos_index[[j]] + y.i <- specific_outcomes$outcome_index[[i]] + y.j <- specific_outcomes$outcome_index[[j]] + yy <- unique(c(y.i, y.j)) + res <- list() + flag <- 1 + for (ii in yy) { + X_dict <- cb_obj$cb_data$dict[ii] + res[[flag]] <- get_between_purity(cset1, cset2, + X = cb_obj$cb_data$data[[X_dict]]$X, + Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, + miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, + P = cb_obj$cb_model_para$P + ) + flag <- flag + 1 + } + res <- Reduce(pmax, res) + specific_cs_purity <- lapply(1:3, function(ii) { + specific_cs_purity[[ii]][i, j] <- specific_cs_purity[[ii]][j, i] <- res[ii] + return(specific_cs_purity[[ii]]) + }) + } + } + names(specific_cs_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") + } else { + specific_cs_purity <- out_ucos$purity_each + rownames(specific_cs_purity) <- specific_cs_names + } + + # - cos&ucos purity + cos <- cb_output$cos_details$cos$cos_index + ncos <- length(cos) + if (ncos != 0) { + empty_matrix <- matrix(NA, ncos, nucos) + colnames(empty_matrix) <- specific_cs_names + rownames(empty_matrix) <- names(cos) + cos_ucos_purity <- lapply(1:3, function(ii) empty_matrix) + for (i in 1:ncos) { + for (j in 1:nucos) { + cset1 <- cos[[i]] + cset2 <- specific_css$ucos_index[[j]] + y.i <- cb_output$cos_details$cos_outcomes$outcome_index[[i]] + y.j <- specific_outcomes$outcome_index[[j]] + yy <- unique(c(y.i, y.j)) + res <- list() + flag <- 1 + for (ii in yy) { + X_dict <- cb_obj$cb_data$dict[ii] + res[[flag]] <- get_between_purity(cset1, cset2, + X = cb_obj$cb_data$data[[X_dict]]$X, + Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, + miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, + P = cb_obj$cb_model_para$P + ) + flag <- flag + 1 + } + res <- Reduce(pmax, res) + cos_ucos_purity <- lapply(1:3, function(ii) { + cos_ucos_purity[[ii]][i, j] <- res[ii] + return(cos_ucos_purity[[ii]]) + }) + } + } + names(cos_ucos_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") + } else { + cos_ucos_purity <- NULL + } + + + # - save coloc_results + specific_results <- list( + "ucos" = specific_css, + "ucos_outcomes" = specific_outcomes, + "ucos_weight" = specific_w, + "ucos_top_variables" = specific_cs_hits, + "ucos_purity" = specific_cs_purity, + "cos_ucos_purity" = cos_ucos_purity, + "ucos_outcomes_delta" = cs_change + ) + } + } else { + specific_results <- NULL + } + + # - cb_model_para + cb_model_para$N <- as.numeric(unlist(cb_model_para$N)) + cb_model_para$variables <- variables + + ll <- list( + "ucos_details" = specific_results, + "cb_model" = cb_model, + "cb_model_para" = cb_model_para + ) + } else { + # - cb_model_para + cb_model_para$N <- as.numeric(unlist(cb_model_para$N)) + cb_model_para$variables <- variables + ll <- list( + "ucos_detials" = NULL, + "cb_model" = cb_model, + "cb_model_para" = cb_model_para + ) + } + + return(ll) +} + diff --git a/R/data.R b/R/data.R index 8be1ab2..32950da 100644 --- a/R/data.R +++ b/R/data.R @@ -90,3 +90,23 @@ #' #' @family colocboost_data "Non_Causal_Strongest_Marginal" + + + +#' A real data example includes an ambiguous colocalization between eQTL and GWAS +#' +#' An example result from one of our real data applications, which shows an ambiguous colocalization between eQTL and GWAS. +#' +#' @format ## `Ambiguous_Colocalization` +#' A list with 2 elements +#' \describe{ +#' \item{ColocBoost_Results}{A `colocboost` output objective} +#' \item{SuSiE_Results}{Two `susie` output objective for eQTL and GWAS} +#' } +#' @source The Ambiguous_Colocalization dataset contains a real data example from one of our real data applications, +#' which shows an ambiguous colocalization between eQTL and GWAS. +#' The dataset is specifically designed for evaluating and demonstrating the capabilities of ColocBoost in real data applications. +#' See details in tutorial vignette \url{https://statfungen.github.io/colocboost/articles/index.html}. +#' +#' @family colocboost_data +"Ambiguous_Colocalization" diff --git a/_pkgdown.yml b/_pkgdown.yml index 5354be6..ca0dea6 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -38,6 +38,7 @@ articles: - Partial_Overlap_Variants - ColocBoost_Wrapper_Pipeline - LD_Free_Colocalization + - Ambiguous_Colocalization - FineBoost_Special_Case - title: internal @@ -45,7 +46,6 @@ articles: - announcements - installation - Pairwise_Colocalization - - ColocBoost_Diagnostics reference: - title: "Example Data" @@ -61,6 +61,8 @@ reference: - title: "Inference and summary" desc: "Functions for inference and summary from fitted model." contents: + - get_robust_colocalization + - get_colocboost_summary - has_concept("colocboost_inference") - title: "Visualization" diff --git a/data/Ambiguous_Colocalization.rda b/data/Ambiguous_Colocalization.rda new file mode 100644 index 0000000..0063776 Binary files /dev/null and b/data/Ambiguous_Colocalization.rda differ diff --git a/man/Ambiguous_Colocalization.Rd b/man/Ambiguous_Colocalization.Rd new file mode 100644 index 0000000..28dbccc --- /dev/null +++ b/man/Ambiguous_Colocalization.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{Ambiguous_Colocalization} +\alias{Ambiguous_Colocalization} +\title{A real data example includes an ambiguous colocalization between eQTL and GWAS} +\format{ +\subsection{\code{Ambiguous_Colocalization}}{ + +A list with 2 elements +\describe{ +\item{ColocBoost_Results}{A \code{colocboost} output objective} +\item{SuSiE_Results}{Two \code{susie} output objective for eQTL and GWAS} +} +} +} +\source{ +The Ambiguous_Colocalization dataset contains a real data example from one of our real data applications, +which shows an ambiguous colocalization between eQTL and GWAS. +The dataset is specifically designed for evaluating and demonstrating the capabilities of ColocBoost in real data applications. +See details in tutorial vignette \url{https://statfungen.github.io/colocboost/articles/index.html}. +} +\usage{ +Ambiguous_Colocalization +} +\description{ +An example result from one of our real data applications, which shows an ambiguous colocalization between eQTL and GWAS. +} +\seealso{ +Other colocboost_data: +\code{\link{Heterogeneous_Effect}}, +\code{\link{Ind_5traits}}, +\code{\link{Non_Causal_Strongest_Marginal}}, +\code{\link{Sumstat_5traits}}, +\code{\link{Weaker_GWAS_Effect}} +} +\concept{colocboost_data} +\keyword{datasets} diff --git a/man/Heterogeneous_Effect.Rd b/man/Heterogeneous_Effect.Rd index d4b7d29..4708a7c 100644 --- a/man/Heterogeneous_Effect.Rd +++ b/man/Heterogeneous_Effect.Rd @@ -28,6 +28,7 @@ An example dataset with simulated genotypes and traits for 2 traits and 2 common } \seealso{ Other colocboost_data: +\code{\link{Ambiguous_Colocalization}}, \code{\link{Ind_5traits}}, \code{\link{Non_Causal_Strongest_Marginal}}, \code{\link{Sumstat_5traits}}, diff --git a/man/Ind_5traits.Rd b/man/Ind_5traits.Rd index a1cc697..6347f4a 100644 --- a/man/Ind_5traits.Rd +++ b/man/Ind_5traits.Rd @@ -28,6 +28,7 @@ An example dataset with simulated genotypes and traits for 5 traits } \seealso{ Other colocboost_data: +\code{\link{Ambiguous_Colocalization}}, \code{\link{Heterogeneous_Effect}}, \code{\link{Non_Causal_Strongest_Marginal}}, \code{\link{Sumstat_5traits}}, diff --git a/man/Non_Causal_Strongest_Marginal.Rd b/man/Non_Causal_Strongest_Marginal.Rd index 7ecffa9..8fba152 100644 --- a/man/Non_Causal_Strongest_Marginal.Rd +++ b/man/Non_Causal_Strongest_Marginal.Rd @@ -28,6 +28,7 @@ An example dataset with simulated genotypes and traits for 2 traits and 2 common } \seealso{ Other colocboost_data: +\code{\link{Ambiguous_Colocalization}}, \code{\link{Heterogeneous_Effect}}, \code{\link{Ind_5traits}}, \code{\link{Sumstat_5traits}}, diff --git a/man/Sumstat_5traits.Rd b/man/Sumstat_5traits.Rd index bdf8ee5..c024bc9 100644 --- a/man/Sumstat_5traits.Rd +++ b/man/Sumstat_5traits.Rd @@ -28,6 +28,7 @@ An example dataset with simulated statistics for 5 traits } \seealso{ Other colocboost_data: +\code{\link{Ambiguous_Colocalization}}, \code{\link{Heterogeneous_Effect}}, \code{\link{Ind_5traits}}, \code{\link{Non_Causal_Strongest_Marginal}}, diff --git a/man/Weaker_GWAS_Effect.Rd b/man/Weaker_GWAS_Effect.Rd index 5819ada..14db5a3 100644 --- a/man/Weaker_GWAS_Effect.Rd +++ b/man/Weaker_GWAS_Effect.Rd @@ -28,6 +28,7 @@ An example dataset with simulated genotypes and traits for 2 traits and 2 common } \seealso{ Other colocboost_data: +\code{\link{Ambiguous_Colocalization}}, \code{\link{Heterogeneous_Effect}}, \code{\link{Ind_5traits}}, \code{\link{Non_Causal_Strongest_Marginal}}, diff --git a/man/colocboost.Rd b/man/colocboost.Rd index 6b71d2f..5cfbacc 100644 --- a/man/colocboost.Rd +++ b/man/colocboost.Rd @@ -188,7 +188,8 @@ to merge colocalized sets, which may resulting in a huge set.} \item{LD_free}{When \code{LD_free = FALSE}, objective function doesn't include LD information.} -\item{output_level}{When \code{output_level = 2}, return the ucos details for the single specific effects. +\item{output_level}{When \code{output_level = 1}, return basic cos details for colocalization results +When \code{output_level = 2}, return the ucos details for the single specific effects. When \code{output_level = 3}, return the entire Colocboost model to diagnostic results (more space).} } \value{ @@ -199,6 +200,8 @@ A \code{"colocboost"} object with some or all of the following elements: \item{cos_details}{A object with all information for colocalization results.} \item{data_info}{A object with detailed information from input data} \item{model_info}{A object with detailed information for colocboost model} +\item{ucos_details}{A object with all information for trait-specific effects when \code{output_level = 2}.} +\item{diagnositci_details}{A object with diagnostic details for ColocBoost model when \code{output_level = 3}.} } \description{ \code{colocboost} implements a proximity adaptive smoothing gradient boosting approach for multi-trait colocalization at gene loci, diff --git a/man/get_ambiguous_colocalization.Rd b/man/get_ambiguous_colocalization.Rd new file mode 100644 index 0000000..44d64a8 --- /dev/null +++ b/man/get_ambiguous_colocalization.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/colocboost_output.R +\name{get_ambiguous_colocalization} +\alias{get_ambiguous_colocalization} +\title{Get ambiguous colocalization events from trait-specific (uncolocalized) effects.} +\source{ +See detailed instructions in our tutorial portal: +\url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} +} +\usage{ +get_ambiguous_colocalization( + cb_output, + min_abs_corr_between_ucos = 0.5, + median_abs_corr_between_ucos = 0.8, + tol = 1e-09 +) +} +\arguments{ +\item{cb_output}{Output object from \code{colocboost} analysis} + +\item{min_abs_corr_between_ucos}{Minimum absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.5.} + +\item{median_abs_corr_between_ucos}{Median absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.8.} + +\item{tol}{A small, non-negative number specifying the convergence tolerance for checking the overlap of the variables in different sets.} +} +\value{ +A \code{"colocboost"} object of colocboost output with additional elements: +\item{ambiguous_ucos}{If exists, a list of ambiguous trait-specific (uncolocalized) effects.} +} +\description{ +\code{get_ambiguous_colocalization} get the colocalization by discarding the weaker colocalization events or colocalized outcomes +} +\examples{ +data(Ambiguous_Colocalization) +test_colocboost_results <- Ambiguous_Colocalization$ColocBoost_Results +res <- get_ambiguous_colocalization(test_colocboost_results) +names(res$ambigous_ucos) + +} +\seealso{ +Other colocboost_inference: +\code{\link{get_colocboost_summary}()}, +\code{\link{get_robust_colocalization}()} +} +\concept{colocboost_inference} diff --git a/man/get_colocboost_summary.Rd b/man/get_colocboost_summary.Rd new file mode 100644 index 0000000..255e832 --- /dev/null +++ b/man/get_colocboost_summary.Rd @@ -0,0 +1,106 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/colocboost_output.R +\name{get_colocboost_summary} +\alias{get_colocboost_summary} +\title{Get summary tables from a ColocBoost output.} +\source{ +See detailed instructions in our tutorial portal: +\url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} +} +\usage{ +get_colocboost_summary( + cb_output, + summary_level = 1, + outcome_names = NULL, + interest_outcome = NULL, + region_name = NULL, + min_abs_corr_between_ucos = 0.5, + median_abs_corr_between_ucos = 0.8 +) +} +\arguments{ +\item{cb_output}{Output object from \code{colocboost} analysis} + +\item{summary_level}{When \code{summary_level = 1}, return basic sumamry table for colocalization results. See details in \code{get_ucos_summary} function when \code{summary_level = 2}.} + +\item{outcome_names}{Optional vector of names of outcomes, which has the same order as Y in the original analysis.} + +\item{interest_outcome}{Optional vector specifying a subset of outcomes from \code{outcome_names} to focus on. When provided, only colocalization events that include at least one of these outcomes will be returned.} + +\item{region_name}{Optional character string. When provided, adds a column with this gene name to the output table for easier filtering in downstream analyses.} + +\item{min_abs_corr_between_ucos}{Minimum absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.5.} + +\item{median_abs_corr_between_ucos}{Median absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.8.} +} +\value{ +A list containing results from the ColocBoost analysis: +\itemize{ +\item When \code{summary_level = 1} (default): +\itemize{ +\item \code{cos_summary}: A summary table for colocalization events with the following columns: +\itemize{ +\item \code{focal_outcome}: The focal outcome being analyzed if exists. Otherwise, it is \code{FALSE}. +\item \code{colocalized_outcomes}: Colocalized outcomes for colocalization confidence set (CoS) +\item \code{cos_id}: Unique identifier for colocalization confidence set (CoS) +\item \code{purity}: Minimum absolute correlation of variables within colocalization confidence set (CoS) +\item \code{top_variable}: The variable with highest variant colocalization probability (VCP) +\item \code{top_variable_vcp}: Variant colocalization probability for the top variable +\item \code{cos_npc}: Normalized probability of colocalization +\item \code{min_npc_outcome}: Minimum normalized probability of colocalized traits +\item \code{n_variables}: Number of variables in colocalization confidence set (CoS) +\item \code{colocalized_index}: Indices of colocalized variables +\item \code{colocalized_variables}: List of colocalized variables +\item \code{colocalized_variables_vcp}: Variant colocalization probabilities for all colocalized variables +} +} +\item When \code{summary_level = 2}: +\itemize{ +\item \code{cos_summary}: As described above +\item \code{ucos_summary}: A summary table for trait-specific (uncolocalized) effects +} +\item When \code{summary_level = 3}: +\itemize{ +\item \code{cos_summary}: As described above +\item \code{ucos_summary}: A summary table for trait-specific (uncolocalized) effects +\item \code{ambiguous_ucos_summary}: A summary table for ambiguous colocalization events from trait-specific effects +} +} +} +\description{ +\code{get_colocboost_summary} get colocalization and trait-specific summary table +with or without the outcomes of interest. +} +\details{ +When \code{summary_level = 2} or \code{summary_level = 3}, additional details for trait-specific effects and ambiguous +colocalization events are included. See \code{\link{get_ucos_summary}} for details on these tables. +} +\examples{ +# colocboost example +set.seed(1) +N <- 1000 +P <- 100 +# Generate X with LD structure +sigma <- 0.9^abs(outer(1:P, 1:P, "-")) +X <- MASS::mvrnorm(N, rep(0, P), sigma) +colnames(X) <- paste0("SNP", 1:P) +L <- 3 +true_beta <- matrix(0, P, L) +true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 +true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) +true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 +true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 +Y <- matrix(0, N, L) +for (l in 1:L) { + Y[, l] <- X \%*\% true_beta[, l] + rnorm(N, 0, 1) +} +res <- colocboost(X = X, Y = Y) +get_colocboost_summary(res) + +} +\seealso{ +Other colocboost_inference: +\code{\link{get_ambiguous_colocalization}()}, +\code{\link{get_robust_colocalization}()} +} +\concept{colocboost_inference} diff --git a/man/get_cormat.Rd b/man/get_cormat.Rd index 9ace142..72bf230 100644 --- a/man/get_cormat.Rd +++ b/man/get_cormat.Rd @@ -32,7 +32,9 @@ cormat <- get_cormat(X) Other colocboost_utilities: \code{\link{get_cos}()}, \code{\link{get_cos_purity}()}, -\code{\link{get_hierarchical_clusters}()} +\code{\link{get_cos_summary}()}, +\code{\link{get_hierarchical_clusters}()}, +\code{\link{get_ucos_summary}()} } \concept{colocboost_utilities} \keyword{cb_post_inference} diff --git a/man/get_cos.Rd b/man/get_cos.Rd index 9dbcee0..f064a46 100644 --- a/man/get_cos.Rd +++ b/man/get_cos.Rd @@ -67,6 +67,8 @@ get_cos(res, coverage = 0.99, X = X, min_abs_corr = 0.95) Other colocboost_utilities: \code{\link{get_cormat}()}, \code{\link{get_cos_purity}()}, -\code{\link{get_hierarchical_clusters}()} +\code{\link{get_cos_summary}()}, +\code{\link{get_hierarchical_clusters}()}, +\code{\link{get_ucos_summary}()} } \concept{colocboost_utilities} diff --git a/man/get_cos_purity.Rd b/man/get_cos_purity.Rd index 724ef91..31de449 100644 --- a/man/get_cos_purity.Rd +++ b/man/get_cos_purity.Rd @@ -51,6 +51,8 @@ get_cos_purity(cos_res$cos, X = X) Other colocboost_utilities: \code{\link{get_cormat}()}, \code{\link{get_cos}()}, -\code{\link{get_hierarchical_clusters}()} +\code{\link{get_cos_summary}()}, +\code{\link{get_hierarchical_clusters}()}, +\code{\link{get_ucos_summary}()} } \concept{colocboost_utilities} diff --git a/man/get_cos_summary.Rd b/man/get_cos_summary.Rd index c6af013..4c14017 100644 --- a/man/get_cos_summary.Rd +++ b/man/get_cos_summary.Rd @@ -66,8 +66,12 @@ get_cos_summary(res) } \seealso{ -Other colocboost_inference: -\code{\link{get_robust_colocalization}()}, +Other colocboost_utilities: +\code{\link{get_cormat}()}, +\code{\link{get_cos}()}, +\code{\link{get_cos_purity}()}, +\code{\link{get_hierarchical_clusters}()}, \code{\link{get_ucos_summary}()} } -\concept{colocboost_inference} +\concept{colocboost_utilities} +\keyword{colocboost_inference} diff --git a/man/get_hierarchical_clusters.Rd b/man/get_hierarchical_clusters.Rd index cd69fa9..f1e4a49 100644 --- a/man/get_hierarchical_clusters.Rd +++ b/man/get_hierarchical_clusters.Rd @@ -39,7 +39,9 @@ clusters$Q_modularity Other colocboost_utilities: \code{\link{get_cormat}()}, \code{\link{get_cos}()}, -\code{\link{get_cos_purity}()} +\code{\link{get_cos_purity}()}, +\code{\link{get_cos_summary}()}, +\code{\link{get_ucos_summary}()} } \concept{colocboost_utilities} \keyword{cb_post_inference} diff --git a/man/get_robust_colocalization.Rd b/man/get_robust_colocalization.Rd index caa0193..b898f54 100644 --- a/man/get_robust_colocalization.Rd +++ b/man/get_robust_colocalization.Rd @@ -38,6 +38,7 @@ A \code{"colocboost"} object with some or all of the following elements: \item{cos_details}{A object with all information for colocalization results.} \item{data_info}{A object with detailed information from input data} \item{model_info}{A object with detailed information for colocboost model} +\item{ucos_from_cos}{A object with information for trait-specific effects if exists after removing weaker signals.} } \description{ \code{get_robust_colocalization} get the colocalization by discarding the weaker colocalization events or colocalized outcomes @@ -69,7 +70,7 @@ filter_res$cos_details$cos$cos_index } \seealso{ Other colocboost_inference: -\code{\link{get_cos_summary}()}, -\code{\link{get_ucos_summary}()} +\code{\link{get_ambiguous_colocalization}()}, +\code{\link{get_colocboost_summary}()} } \concept{colocboost_inference} diff --git a/man/get_ucos_summary.Rd b/man/get_ucos_summary.Rd index 4aaaa5c..97cc419 100644 --- a/man/get_ucos_summary.Rd +++ b/man/get_ucos_summary.Rd @@ -8,7 +8,14 @@ See detailed instructions in our tutorial portal: \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html} } \usage{ -get_ucos_summary(cb_output, outcome_names = NULL, region_name = NULL) +get_ucos_summary( + cb_output, + outcome_names = NULL, + region_name = NULL, + ambiguous_ucos = FALSE, + min_abs_corr_between_ucos = 0.5, + median_abs_corr_between_ucos = 0.8 +) } \arguments{ \item{cb_output}{Output object from \code{colocboost} analysis} @@ -16,20 +23,45 @@ get_ucos_summary(cb_output, outcome_names = NULL, region_name = NULL) \item{outcome_names}{Optional vector of names of outcomes, which has the same order as Y in the original analysis.} \item{region_name}{Optional character string. When provided, adds a column with this gene name to the output table for easier filtering in downstream analyses.} + +\item{ambiguous_ucos}{Logical indicating whether to include ambiguous colocalization events. The default is FALSE.} + +\item{min_abs_corr_between_ucos}{Minimum absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.5.} + +\item{median_abs_corr_between_ucos}{Median absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.8.} } \value{ -A summary table for trait-specific, uncolocalized associations with the following columns: -\item{outcomes}{Outcome being analyzed} -\item{ucos_id}{Unique identifier for trait-specific confidence sets} -\item{purity}{Minimum absolute correlation of variables within trait-specific confidence sets} -\item{top_variable}{The variable with highest variant-level probability of association (VPA)} -\item{top_variable_vpa}{Variant-level probability of association (VPA) for the top variable} -\item{ucos_npc}{Normalized probability of causal association for the trait-specific confidence set} -\item{n_variables}{Number of variables in trait-specific confidence set} -\item{ucos_index}{Indices of variables in the trait-specific confidence set} -\item{ucos_variables}{List of variables in the trait-specific confidence set} -\item{ucos_variables_vpa}{Variant-level probability of association (VPA) for all variables in the confidence set} -\item{region_name}{Region name if provided through the region_name parameter} +A list containing: +\itemize{ +\item \code{ucos_summary}: A summary table for trait-specific, uncolocalized associations with the following columns: +\itemize{ +\item \code{outcomes}: Outcome being analyzed +\item \code{ucos_id}: Unique identifier for trait-specific confidence sets +\item \code{purity}: Minimum absolute correlation of variables within trait-specific confidence sets +\item \code{top_variable}: The variable with highest variant-level probability of association (VPA) +\item \code{top_variable_vpa}: Variant-level probability of association (VPA) for the top variable +\item \code{ucos_npc}: Normalized probability of causal association for the trait-specific confidence set +\item \code{n_variables}: Number of variables in trait-specific confidence set +\item \code{ucos_index}: Indices of variables in the trait-specific confidence set +\item \code{ucos_variables}: List of variables in the trait-specific confidence set +\item \code{ucos_variables_vpa}: Variant-level probability of association (VPA) for all variables in the confidence set +\item \code{region_name}: Region name if provided through the region_name parameter +} +\item \code{ambiguous_ucos_summary}: A summary table for ambiguous colocalization events with the following columns: +\itemize{ +\item \code{outcomes}: Outcome in the ambiguous colocalization event +\item \code{ucos_id}: Unique identifiers for the ambiguous event +\item \code{min_between_purity}: Minimum absolute correlation between variables across trait-specific sets in the ambiguous event +\item \code{median_between_purity}: Median absolute correlation between variables across trait-specific sets in the ambiguous event +\item \code{overlap_idx}: Indices of variables that overlap between ambiguous trait-specific sets +\item \code{overlap_variables}: Names of variables that overlap between ambiguous trait-specific sets +\item \code{n_recalibrated_variables}: Number of variables in the recalibrated colocalization set from an ambiguous event +\item \code{recalibrated_index}: Indices of variables in the recalibrated colocalization set from an ambiguous event +\item \code{recalibrated_variables}: Names of variables in the recalibrated colocalization set from an ambiguous event +\item \code{recalibrated_variables_vcp}: Variant colocalization probabilities for recalibrated variables from an ambiguous event +\item \code{region_name}: Region name if provided through the region_name parameter +} +} } \description{ \code{get_ucos_summary} produces a trait-specific summary table for uncolocalized (single-trait) @@ -56,8 +88,12 @@ get_ucos_summary(res) } \seealso{ -Other colocboost_inference: +Other colocboost_utilities: +\code{\link{get_cormat}()}, +\code{\link{get_cos}()}, +\code{\link{get_cos_purity}()}, \code{\link{get_cos_summary}()}, -\code{\link{get_robust_colocalization}()} +\code{\link{get_hierarchical_clusters}()} } -\concept{colocboost_inference} +\concept{colocboost_utilities} +\keyword{colocboost_inference} diff --git a/tests/testthat/test_colocboost.R b/tests/testthat/test_colocboost.R index 7e2afab..c31d914 100644 --- a/tests/testthat/test_colocboost.R +++ b/tests/testthat/test_colocboost.R @@ -132,37 +132,7 @@ test_that("colocboost runs with summary statistics", { expect_equal(result$data_info$n_outcomes, 2) }) -# Test focal outcome functionality -test_that("colocboost handles focal outcome correctly", { - - # Convert Y to list - Y_list <- list(test_data$Y[,1], test_data$Y[,2]) - X_list <- list(test_data$X, test_data$X) - - # Run colocboost with focal_outcome_idx = 1 - warnings <- capture_warnings({ - result <- colocboost( - X = X_list, - Y = Y_list, - focal_outcome_idx = 1, - M = 10, # Small number of iterations for testing - output_level = 2 # More detailed output for testing - ) - }) - # Check if any of the expected warning patterns are present - expect_true( - any(grepl("smallest number of variables", warnings)) || - any(grepl("did not coverage", warnings)) - ) - - # Test that we get a colocboost object - expect_s3_class(result, "colocboost") - - # Check focal outcome is correctly set - expect_equal(result$data_info$outcome_info$is_focal[1], TRUE) - expect_equal(result$data_info$outcome_info$is_focal[2], FALSE) -}) # Test get_cos_summary functionality test_that("get_cos_summary returns expected structure", { @@ -206,6 +176,39 @@ test_that("get_cos_summary returns expected structure", { } }) + +# Test focal outcome functionality +test_that("colocboost handles focal outcome correctly", { + + # Convert Y to list + Y_list <- list(test_data$Y[,1], test_data$Y[,2]) + X_list <- list(test_data$X, test_data$X) + + # Run colocboost with focal_outcome_idx = 1 + warnings <- capture_warnings({ + result <- colocboost( + X = X_list, + Y = Y_list, + focal_outcome_idx = 1, + M = 10, # Small number of iterations for testing + output_level = 2 # More detailed output for testing + ) + }) + + # Check if any of the expected warning patterns are present + expect_true( + any(grepl("smallest number of variables", warnings)) || + any(grepl("did not coverage", warnings)) + ) + + # Test that we get a colocboost object + expect_s3_class(result, "colocboost") + + # Check focal outcome is correctly set + expect_equal(result$data_info$outcome_info$is_focal[1], TRUE) + expect_equal(result$data_info$outcome_info$is_focal[2], FALSE) +}) + # Test colocboost_plot functionality (basic call should not error) test_that("colocboost_plot runs without error", { diff --git a/tests/testthat/test_corner_cases.R b/tests/testthat/test_corner_cases.R index 78756cf..0ba15bd 100644 --- a/tests/testthat/test_corner_cases.R +++ b/tests/testthat/test_corner_cases.R @@ -334,4 +334,59 @@ test_that("colocboost prioritizes focal outcome correctly", { expect_equal(result_focal2$data_info$outcome_info$is_focal[1], FALSE) expect_equal(result_focal2$data_info$outcome_info$is_focal[2], TRUE) -}) \ No newline at end of file +}) + + +# Test with ambiguous corner cases +test_that("get_ambiguous_colocalization handles edge cases with correlation thresholds", { + + data(Ambiguous_Colocalization) + test_colocboost_results <- Ambiguous_Colocalization$ColocBoost_Results + + # Test with very high correlation thresholds (should find fewer ambiguities) + result_high_thresh <- get_ambiguous_colocalization( + test_colocboost_results, + min_abs_corr_between_ucos = 0.95, + median_abs_corr_between_ucos = 0.98 + ) + + # Test with very low correlation thresholds (should find more ambiguities) + result_low_thresh <- get_ambiguous_colocalization( + test_colocboost_results, + min_abs_corr_between_ucos = 0.1, + median_abs_corr_between_ucos = 0.3 + ) + + # Compare number of ambiguous events found with different thresholds + # Generally expect: n_high_thresh <= n_default <= n_low_thresh + n_high <- length(result_high_thresh$ambigous_ucos) + n_default <- length(get_ambiguous_colocalization(test_colocboost_results)$ambigous_ucos) + n_low <- length(result_low_thresh$ambigous_ucos) + + # Higher thresholds should find equal or fewer ambiguities than default + expect_true(n_high <= n_default) + + # Lower thresholds should find equal or more ambiguities than default + expect_true(n_low >= n_default) + + # Test with extreme threshold (min=1.0, median=1.0) - should find very few or no ambiguities + expect_message( + result_extreme <- get_ambiguous_colocalization( + test_colocboost_results, + min_abs_corr_between_ucos = 1.0, + median_abs_corr_between_ucos = 1.0 + ), + "No ambiguous colocalization events!" + ) + + # Test with threshold at 0 - should find many/all potential ambiguities + result_zero <- get_ambiguous_colocalization( + test_colocboost_results, + min_abs_corr_between_ucos = 0.0, + median_abs_corr_between_ucos = 0.0 + ) + expect_true(length(result_zero$ambigous_ucos) >= n_low) + + +}) + diff --git a/tests/testthat/test_inference.R b/tests/testthat/test_inference.R index 7442feb..34a26a2 100644 --- a/tests/testthat/test_inference.R +++ b/tests/testthat/test_inference.R @@ -1,7 +1,7 @@ library(testthat) # Utility function to generate a simple colocboost results -generate_test_result <- function(n = 100, p = 20, L = 2, seed = 42) { +generate_test_result <- function(n = 100, p = 50, L = 2, seed = 42) { set.seed(seed) # Generate X with LD structure @@ -20,12 +20,12 @@ generate_test_result <- function(n = 100, p = 20, L = 2, seed = 42) { if (L == 1) { # Single trait case true_beta[5, 1] <- 0.7 # SNP5 affects the trait - true_beta[10, 1] <- 0.6 # SNP10 also affects the trait + true_beta[40, 1] <- 0.6 # SNP10 also affects the trait } else { # Multi-trait case true_beta[5, 1] <- 0.7 # SNP5 affects trait 1 true_beta[5, 2] <- 0.6 # SNP5 also affects trait 2 (colocalized) - true_beta[10, 2] <- 0.5 # SNP10 only affects trait 2 + true_beta[40, 2] <- 0.8 # SNP10 only affects trait 2 } # Generate Y with some noise @@ -186,28 +186,189 @@ test_that("get_hierarchical_clusters functions correctly", { expect_equal(result_all_high$n_cluster, 1) }) -# Test get_ucos_summary function -test_that("get_ucos_summary handles different parameters", { + +# Test get_ambiguous_colocalization function +test_that("get_ambiguous_colocalization identifies ambiguous colocalizations correctly", { + # The function expects a specialized test dataset that has ambiguous colocalizations + # There's a reference in the example to a dataset named "Ambiguous_Colocalization" + data(Ambiguous_Colocalization) + test_colocboost_results <- Ambiguous_Colocalization$ColocBoost_Results + + # Basic call with default parameters + result <- get_ambiguous_colocalization(test_colocboost_results) + + # Check that the returned object is of class "colocboost" + expect_s3_class(result, "colocboost") + + # Check that the ambiguous_ucos field exists in the result + expect_true("ambigous_ucos" %in% names(result)) + + # If ambiguous colocalizations were found, test their structure + if (length(result$ambigous_ucos) > 0) { + # There should be fields for the ambiguous UCOs details + expect_true("ambigouse_ucos" %in% names(result$ambigous_ucos[[1]])) + expect_true("ambigouse_ucos_outcomes" %in% names(result$ambigous_ucos[[1]])) + expect_true("ambigous_ucos_weight" %in% names(result$ambigous_ucos[[1]])) + expect_true("ambigous_ucos_puriry" %in% names(result$ambigous_ucos[[1]])) + expect_true("ambigouse_ucos_union" %in% names(result$ambigous_ucos[[1]])) + expect_true("ambigouse_ucos_overlap" %in% names(result$ambigous_ucos[[1]])) + expect_true("recalibrated_cos_vcp" %in% names(result$ambigous_ucos[[1]])) + expect_true("recalibrated_cos" %in% names(result$ambigous_ucos[[1]])) + } - # Generate a test colocboost results - cb_res <- generate_test_result() + # Test with custom correlation thresholds + result_custom <- get_ambiguous_colocalization( + test_colocboost_results, + min_abs_corr_between_ucos = 0.7, + median_abs_corr_between_ucos = 0.9 + ) + + # The result should still be a colocboost object + expect_s3_class(result_custom, "colocboost") + + # Test with input that has no ucos_details + # Create a modified object without ucos_details + test_no_ucos <- test_colocboost_results + test_no_ucos$ucos_details <- NULL - # Basic summary call - expect_error(get_ucos_summary(cb_res), NA) + # Should show a warning but not error + expect_warning(get_ambiguous_colocalization(test_no_ucos)) - # With custom outcome names - expect_error(get_ucos_summary(cb_res, outcome_names = c("Trait1", "Trait2", "Trait3")), NA) + # Test with input that has only one trait-specific effect + cb_res <- generate_test_result(n=500) - # With region name - summary_with_region <- get_ucos_summary(cb_res, region_name = "TestGene") + expect_message( + result <- get_ambiguous_colocalization(cb_res), + "Only one trait-specific \\(uncolocalized\\) effect in this region!" + ) - # If summary is not NULL, check for region column - if (!is.null(summary_with_region)) { - expect_true("region_name" %in% colnames(summary_with_region)) - expect_equal(summary_with_region$region_name[1], "TestGene") + # The result should be unchanged from the input + expect_equal(result, cb_res) + + # There should be no ambiguous_ucos field added + expect_false("ambigous_ucos" %in% names(result)) + +}) + + + +# Test get_ucos_summary function +test_that("get_ucos_summary funtionality", { + # The function expects a specialized test dataset that has ambiguous colocalizations + # There's a reference in the example to a dataset named "Ambiguous_Colocalization" + data(Ambiguous_Colocalization) + test_colocboost_results <- Ambiguous_Colocalization$ColocBoost_Results + + # Basic call with default parameters + summary <- get_ucos_summary(test_colocboost_results) + + # Check structure of summary table + expect_true(is.data.frame(summary)) + + # Check expected columns exist + expected_cols <- c( + "outcomes", "ucos_id", "purity", + "top_variable", "top_variable_vpa", "n_variables", "ucos_index", + "ucos_variables", "ucos_variables_vpa" + ) + for (col in expected_cols) { + expect_true(col %in% colnames(summary)) + } + + # Basic call with default parameters + summary_ambiguous <- get_ucos_summary(test_colocboost_results, ambiguous_ucos = TRUE) + expect_true(all.equal(names(summary_ambiguous), c("ucos_summary", "ambiguous_ucos_summary"))) + + # Check expected columns exist + expected_cols <- c( + "outcomes", "ucos_id", "min_between_purity", "median_between_purity", + "overlap_idx", "overlap_variables", "n_recalibrated_variables", + "recalibrated_index", "recalibrated_variables", "recalibrated_variables_vcp" + ) + for (col in expected_cols) { + expect_true(col %in% colnames(summary_ambiguous$ambiguous_ucos_summary)) + } + +}) + + +test_that("get_colocboost_summary works correctly", { + # Setup mock data + set.seed(1) + N <- 1000 + P <- 100 + # Generate X with LD structure + sigma <- 0.9^abs(outer(1:P, 1:P, "-")) + X <- MASS::mvrnorm(N, rep(0, P), sigma) + colnames(X) <- paste0("SNP", 1:P) + L <- 3 + true_beta <- matrix(0, P, L) + true_beta[10, 1] <- 0.5 # SNP10 affects trait 1 + true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized) + true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2 + true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3 + Y <- matrix(0, N, L) + for (l in 1:L) { + Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1) + } + + # Run colocboost + cb_output <- colocboost(X = X, Y = Y) + + # Test summary_level = 1 (default) + summary1 <- get_colocboost_summary(cb_output) + + # Check structure + expect_type(summary1, "list") + expect_named(summary1, "cos_summary") + expect_s3_class(summary1$cos_summary, "data.frame") + + # Check required columns in cos_summary + expected_cols <- c("focal_outcome", "colocalized_outcomes", "cos_id", + "purity", "top_variable", "top_variable_vcp", + "cos_npc", "min_npc_outcome", "n_variables") + expect_true(all(expected_cols %in% colnames(summary1$cos_summary))) + + # Test with outcome_names + outcome_names <- c("Trait1", "Trait2", "Trait3") + summary_with_names <- get_colocboost_summary(cb_output, outcome_names = outcome_names) + coloc_outcome <- strsplit(summary_with_names$cos_summary$colocalized_outcomes, "; ")[[1]] + expect_true(all( coloc_outcome %in% + c("Trait1", "Trait2", "Trait3", paste(outcome_names, collapse = ", ")))) + + # Test with region_name + region_summary <- get_colocboost_summary(cb_output, region_name = "TestRegion") + expect_true("region_name" %in% colnames(region_summary$cos_summary)) + expect_equal(unique(region_summary$cos_summary$region), "TestRegion") + + # Test summary_level = 2 + expect_warning(summary2 <- get_colocboost_summary(cb_output, summary_level = 2), + "Please run colocboost model with output_level=2") + + cb_output <- colocboost(X = X, Y = Y, output_level = 2) + summary2 <- get_colocboost_summary(cb_output, summary_level = 2) + expect_named(summary2, c("cos_summary", "ucos_summary")) + expect_s3_class(summary2$ucos_summary, "data.frame") + + # Test summary_level = 3 + summary3 <- get_colocboost_summary(cb_output, + summary_level = 3, + min_abs_corr_between_ucos = 0.4, + median_abs_corr_between_ucos = 0.7) + expect_named(summary3, c("cos_summary", "ucos_summary", "ambiguous_ucos_summary")) + expect_s3_class(summary3$ucos_summary, "data.frame") + + # Test with interest_outcome + interest_summary <- get_colocboost_summary(cb_output, + outcome_names = outcome_names, + interest_outcome = c("Trait1")) + # Should only contain colocalization events involving Trait1 + if(nrow(interest_summary$cos_summary) > 0) { + expect_true(all(sapply(interest_summary$cos_summary$colocalized_outcomes, + function(x) grepl("Trait1", x)))) } - # Test with single trait analysis result - cb_single <- generate_test_result(L=1) - single_summary <- get_ucos_summary(cb_single, outcome_names = "SingleTrait") + # Test error handling + expect_error(get_colocboost_summary("not_a_colocboost_object"), + "Input must from colocboost output!") }) \ No newline at end of file diff --git a/vignettes/Ambiguous_Colocalization.Rmd b/vignettes/Ambiguous_Colocalization.Rmd new file mode 100644 index 0000000..c9b3995 --- /dev/null +++ b/vignettes/Ambiguous_Colocalization.Rmd @@ -0,0 +1,146 @@ +--- +title: "Ambiguous Colocalization from Trait-Specific Effects" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Ambiguous Colocalization from Trait-Specific Effects} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + + +This vignette demonstrates an example of ambiguous colocalization from trait-specific effects using the `colocboost`. +Specifically, we will use the `Ambiguous_Colocalization`, which is an example result from one of our real data applications, +showing an ambiguous colocalization between eQTL and GWAS. + + +```{r setup} +library(colocboost) +# Run colocboost with diagnostic details +data(Ambiguous_Colocalization) +names(Ambiguous_Colocalization) +``` + +# 1. The `Ambiguous_Colocalization` Dataset + +The `Ambiguous_Colocalization` dataset contains results from a colocboost analysis of a real genomic region showing ambiguous trait-specific effects between eQTL +(expression quantitative trait loci) and GWAS (genome-wide association study) signals. +Ambiguous colocalization occurs when there appears to be shared causal variants between traits, +but the evidence is complicated by the presence of trait-specific effects. This dataset is structured as a list with two main components: + +1. `ColocBoost_Results`: Contains the output from running the ColocBoost algorithm. + +2. `SuSiE_Results`: Contains fine-mapping results from the SuSiE algorithm for both eQTL and GWAS data separately. + + + +# 2. ColocBoost results + +In this example, there are two trait-specifc effects for the eQTL and GWAS signals, respectively. But two uCoS have overlapping variants, +which indicates that the two uCoS are not independent. ColocBoost identifies two uCoS: + +- `ucos1:y1`: eQTL trait-specific effect has 6 variants. +- `ucos2:y2`: GWAS trait-specific effect has 22 variants. +- There are 3 variants that are overlapping between the two uCoS. + +```{r colocboost-results} +# Trait-specific effects for both eQTL and GWAS +Ambiguous_Colocalization$ColocBoost_Results$ucos_details$ucos$ucos_index + +# Intersection of eQTL and GWAS variants +Reduce(intersect, Ambiguous_Colocalization$ColocBoost_Results$ucos_details$ucos$ucos_index) +``` + +After checking the correlation of variants between the two uCoS, we can see the high correlation between the two uCoS. + +- The minimum absolute correlation between the two uCoS is 0.636 (off-diagonal `purity$min_abs_corr`). +- The median absolute correlation between the two uCoS is 0.837 (off-diagonal `purity$median_abs_corr`). +- The maximum absolute correlation between the two uCoS is 1 (off-diagonal `purity$max_abs_corr`), indicating overlapping variants exists. + +```{r colocboost-purity} +# With-in and between purity +Ambiguous_Colocalization$ColocBoost_Results$ucos_details$ucos_purity +``` + +Based on the results, we can see that the two uCoS are not independent, but they are not fully overlapping. + +```{r plot-ambiguous} +n_variables <- Ambiguous_Colocalization$ColocBoost_Results$data_info$n_variables +colocboost_plot( + Ambiguous_Colocalization$ColocBoost_Results, + plot_cols = 1, + grange = c(2000:n_variables), + plot_ucos = TRUE, + show_cos_to_uncoloc = TRUE +) +``` + + + +# 2. Fine-mapping results from SuSiE + +In this example, we also have fine-mapping results from SuSiE for both eQTL and GWAS data separately. + +- For eQTL, SuSiE shows 40 variants in 95% credible set (CS). +- For GWAS, SuSiE shows 57 variants in 95% credible set (CS). +- There are 24 variants are overlapping between the two credible sets. + +```{r susie-results} +susie_eQTL <- Ambiguous_Colocalization$SuSiE_Results$eQTL +susie_GWAS <- Ambiguous_Colocalization$SuSiE_Results$GWAS + +# Fine-mapped eQTL +susie_eQTL$sets$cs$L1 + +# Fine-mapped GWAS variants +susie_GWAS$sets$cs$L1 + +# Intersection of fine-mapped eQTL and GWAS variants +intersect(susie_eQTL$sets$cs$L1, susie_GWAS$sets$cs$L1) +``` + + +# 3. Get the ambiguous colocalization results and summary + +ColocBoost provides a function to get the ambiguous colocalization results and summary from trait-specifc effects, by considering the correlation of variants between the two uCoS. + +The `get_ambiguous_colocalization` function will return the ambiguous results in `ambigous_ucos` object, if the following conditions are met: + +- The two uCoS should have at least one overlapping variant. +- The minimum absolute correlation between the two uCoS is greater than `min_abs_corr_between_ucos` (default is 0.5). +- The median absolute correlation between the two uCoS is greater than `median_abs_corr_between_ucos` (default is 0.8). + + +```{r ambiguous-results} +colocboost_results <- Ambiguous_Colocalization$ColocBoost_Results +res <- get_ambiguous_colocalization(colocboost_results, min_abs_corr_between_ucos = 0.5, median_abs_corr_between_ucos = 0.8) +names(res) +names(res$ambigous_ucos) +``` + +To get the summary of ambiguous colocalization results, we can use the `get_colocboost_summary` function. + +- `summary_level = 1` (default): get the summary table for only the colocalization results, same as `cos_summary` in ColocBoost output. +- `summary_level = 2`: get the summary table for both colocalization and trait-specific effects if exists. +- `summary_level = 3`: get the summary table for colocalization, trait-specific effects and ambiguous colocalization results if exists. + +```{r ambiguous-summary} +# Get the full summary results from colocboost +full_summary <- get_colocboost_summary(colocboost_results, summary_level = 3) +names(full_summary) + +# Get the summary of ambiguous colocalization results +summary_ambiguous <- full_summary$ambiguous_ucos_summary +colnames(summary_ambiguous) +``` + + +- `recalibrated_*`: giving the recalibrated weigths and recalibrated 95% colocalization confidece sets (CoS) from the trait-specific effects. + +See details of function usage in the [Functions](https://statfungen.github.io/colocboost/reference/index.html). \ No newline at end of file diff --git a/vignettes/ColocBoost_Diagnostics.Rmd b/vignettes/ColocBoost_Diagnostics.Rmd deleted file mode 100644 index 0344b32..0000000 --- a/vignettes/ColocBoost_Diagnostics.Rmd +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: "Diagnostic for ColocBoost" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Diagnostic for ColocBoost} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - - -This vignette demonstrates how to perform multi-trait colocalization analysis using diagnosic details in ColocBoost, -including model parameters, model fitting, and model performance metrics. There is `diagnostic_details` in ColocBoost output when setting `output_level = 3`: - - -```{r setup} -library(colocboost) -# Run colocboost with diagnostic details -data(Ind_5traits) -res <- colocboost(X = Ind_5traits$X, Y = Ind_5traits$Y, output_level = 3) -names(res) -``` - -# 1. Diagnostic details of the model parameters - -- **`cb_model_para`**: parameters used in fitting ColocBoost model. - - -```{r cb-model-para} -names(res$diagnostic_details$cb_model_para) -``` - - -# 2. Diagnostic details of the model fitting - -- **`cb_model`**: trait-specific proximity gradient boosting model, including proximity weight at each iteration, residual after gradient boosting, et al. -- **`weights_paths`**: individual trait-specific weights for each iteration. - -```{r cb-model} -names(res$diagnostic_details$cb_model) -names(res$diagnostic_details$cb_model$ind_outcome_1) -``` -