Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions R/colocboost_assemble.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ colocboost_assemble <- function(cb_obj,
cb_output <- cb_output[c("cos_summary", "vcp", "cos_details", "data_info", "model_info", "ucos_details", "diagnostic_details")]
}
if (data_info$n_outcome == 1){
cb_output <- list("ucos_summary", "pip" = NULL, "ucos_details" = NULL, "data_info" = data_info)
cb_output <- list("ucos_summary", "vpa" = NULL, "ucos_details" = NULL, "data_info" = data_info)
}
}

Expand Down Expand Up @@ -223,14 +223,14 @@ colocboost_assemble <- function(cb_obj,
cb_output <- cb_output[-match(c("cos_summary","vcp","cos_details"), names(cb_output))]
remain_obj <- names(cb_output)
if (!is.null(cb_output$ucos_details$ucos)){
cb_output$pip <- apply(do.call(cbind,cb_output$ucos_details$ucos_weight), 1, function(w0) 1-prod(1-w0))
names(cb_output$pip) <- data_info$variables
cb_output$vpa <- apply(do.call(cbind,cb_output$ucos_details$ucos_weight), 1, function(w0) 1-prod(1-w0))
names(cb_output$vpa) <- data_info$variables
cb_output$ucos_summary <- get_ucos_summary(cb_output)
} else {
tmp <- list("pip" = NULL, "ucos_summary" = NULL)
tmp <- list("vpa" = NULL, "ucos_summary" = NULL)
cb_output <- c(cb_output, tmp)
}
cb_output <- cb_output[c("ucos_summary", "pip", remain_obj)]
cb_output <- cb_output[c("ucos_summary", "vpa", remain_obj)]
}
}
}
Expand Down
43 changes: 38 additions & 5 deletions R/colocboost_inference.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
#'
#' @details
#' The following functions are included in this set:
#' `get_abc` get the colocalization summary table with or without the specific outcomes.
#' `get_cormat` a fast function to calulate correlation matrix from individual level data.
#' `check_null_post` a function to remove the spurious signals.
#' `get_purity` a function to calculate within-CoS purity.
#' `get_modularity` a function to calculate modularity for a given number of clusters.
#' `get_n_cluster` a function to get the number of clusters based on modularity-based hierarchical clustering method.
#' `get_between_purity` a function to calculate purity between two CoS.
#' `get_cos_evidence` a function to get the evidence of colocalization.
#' `w_purity` a function to calculate within-CoS purity for each single weight from one SEC.
#'
#' These functions are not exported individually and are accessed via `colocboost_post_inference`.
#'
Expand All @@ -33,7 +40,10 @@ get_cormat <- function(X, intercepte = FALSE){
}


#' Function to remove the spurious signals
#' @importFrom utils head tail
#' @keywords cb_post_inference
#' @noRd
check_null_post <- function(cb_obj,
coloc_sets_temp,
coloc_outcomes,
Expand Down Expand Up @@ -181,6 +191,9 @@ check_null_post <- function(cb_obj,
return(ll)
}

#' Function to calculate within-CoS purity
#' @keywords cb_post_inference
#' @noRd
#' @importFrom stats na.omit
get_purity <- function(pos, X=NULL, Xcorr=NULL, N = NULL, n = 100) {
get_upper_tri = Rfast::upper_tri
Expand Down Expand Up @@ -215,6 +228,12 @@ get_purity <- function(pos, X=NULL, Xcorr=NULL, N = NULL, n = 100) {
}

# ------ Calculate modularity ----------
#' Function to calculate modularity for a given number of clusters
#' @param Weight A matrix of weights
#' @param B A matrix of binary values indicating the clusters
#' @return The modularity value
#' @keywords cb_post_inference
#' @noRd
get_modularity <- function(Weight, B){
if (dim(Weight)[1] == 1){
Q <- 0
Expand Down Expand Up @@ -246,6 +265,14 @@ get_modularity <- function(Weight, B){
}
}

#' Function to get the number of clusters based on modularity-based hierarchical clustering method
#' @param hc A hierarchical clustering object
#' @param Sigma A matrix of weights
#' @param m The number of clusters
#' @param min_cluster_corr The minimum correlation threshold for clustering within the same cluster
#' @return A list containing the number of clusters and modularity values
#' @keywords cb_post_inference
#' @noRd
#' @importFrom stats cutree
get_n_cluster <- function(hc, Sigma, m=ncol(Sigma), min_cluster_corr = 0.8){
if (min(Sigma) > min_cluster_corr){
Expand All @@ -269,6 +296,9 @@ get_n_cluster <- function(hc, Sigma, m=ncol(Sigma), min_cluster_corr = 0.8){
"Qmodularity" = Q))
}

#' Function to calculate within-CoS purity for each single weight from one SEC
#' @keywords cb_post_inference
#' @noRd
w_purity <- function(weights, X=NULL, Xcorr=NULL, N = NULL, n = 100, coverage = 0.95,
min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL){

Expand All @@ -289,9 +319,10 @@ w_purity <- function(weights, X=NULL, Xcorr=NULL, N = NULL, n = 100, coverage =
return(is_pure)
}


#' Calculate purity between two CoS
#' @keywords cb_post_inference
#' @noRd
#' @importFrom stats na.omit
# - Calculate purity between two confidence sets
get_between_purity <- function(pos1, pos2, X=NULL, Xcorr=NULL, N = NULL, miss_idx = NULL, P = NULL){

get_matrix_mult <- function(X_sub1, X_sub2){
Expand Down Expand Up @@ -329,6 +360,9 @@ get_between_purity <- function(pos1, pos2, X=NULL, Xcorr=NULL, N = NULL, miss_id
return(c(min(value), max(value), get_median(value)))
}

#' Function to get the evidence of colocalization
#' @keywords cb_post_inference
#' @noRd
#' @importFrom stats var
#' @importFrom utils tail
get_cos_evidence <- function(cb_obj, coloc_out, data_info){
Expand Down Expand Up @@ -369,7 +403,6 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info){
cos_profile
}

# -
get_outcome_profile_change <- function(outcome_idx, cos, cb_obj, check_null_max){
extract_last <- function(lst) { tail(lst, n = 1)}
cb_data <- cb_obj$cb_data
Expand All @@ -386,7 +419,7 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info){
ifelse(max_profile < cos_profile, 0, max_profile - cos_profile)
}

# - calculate best configuration likelihood explained by minimal configuration
# - Calculate best configuration likelihood explained by minimal configuration
get_normalization_evidence <- function(profile_change, null_max, outcomes, outcome_names) {
# Define the exponential likelihood ratio normalization (ELRN)
logLR_normalization <- function(ratio) { 1 - exp( - 2*ratio ) }
Expand Down
59 changes: 53 additions & 6 deletions R/colocboost_one_causal.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@


#' @title Set of functions for ColocBoost based on per-trait-one-causal assumption, including LD-free mode and one iteration mode.
#'
#' @description
#' The `colocboost_one_causal` functions access the set of functions for ColocBoost based on per-trait-one-causal assumption.
#'
#'
#' @details
#' The following functions are included in this set:
#' `get_abc` get the colocalization summary table with or without the specific outcomes.
#'
#' These functions are not exported individually and are accessed via `colocboost_one_causal`.
#'
#' @rdname colocboost_one_causal
#' @keywords cb_one_causal
#' @noRd
colocboost_one_causal <- function(cb_model, cb_model_para, cb_data,
jk_equiv_corr = 0.8,
jk_equiv_loglik = 1,
Expand Down Expand Up @@ -37,8 +50,20 @@ colocboost_one_causal <- function(cb_model, cb_model_para, cb_data,




# under one causal per trait assumption with one iteration
#' Under one causal per trait assumption with one iteration mode
#'
#' @description
#' Executes one iteration of the ColocBoost algorithm under the one-causal-variant-per-trait
#' assumption. This function provides a simplified approach to examine colocalization patterns
#' for traits with strong marginal associations.
#'
#' @details
#' This version implements an LD-dependent mode that can handle scenarios where LD structures
#' may not perfectly match between datasets but still contain informative signals. The function
#' identifies the most strongly associated variants and evaluates their colocalization evidence.
#'
#' @keywords cb_one_causal
#' @noRd
colocboost_one_iteration <- function(cb_model, cb_model_para, cb_data,
jk_equiv_corr = 0.8,
jk_equiv_loglik = 1,
Expand Down Expand Up @@ -91,7 +116,14 @@ colocboost_one_iteration <- function(cb_model, cb_model_para, cb_data,
}



#' Identify and best update trait group for one causal variant - one iteration mode
#'
#' @details
#' The function operates by partitioning traits into different equivalence groups based on
#' correlation patterns and log-likelihood differences.
#'
#' @keywords cb_one_causal
#' @noRd
boost_check_update_jk_one_causal <- function(cb_model, cb_model_para, cb_data,
prioritize_jkstar = TRUE,
jk_equiv_corr = 0.8,
Expand Down Expand Up @@ -166,7 +198,14 @@ boost_check_update_jk_one_causal <- function(cb_model, cb_model_para, cb_data,



# under one causal per trait assumption with no LD
#' ColocBoost under one causal per trait assumption - LD-free mode
#' @description
#' Executes one iteration of the ColocBoost algorithm under the one-causal-variant-per-trait
#' assumption. This function provides a simplified approach to examine colocalization patterns
#' for traits without using any LD information.
#'
#' @keywords cb_one_causal
#' @noRd
colocboost_diagLD <- function(cb_model, cb_model_para, cb_data,
jk_equiv_corr = 0,
jk_equiv_loglik = 0.1,
Expand Down Expand Up @@ -300,6 +339,14 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data,
}


#' Check for overlapping causal effects between trait pairs
#'
#' @description
#' This function evaluates pairs of traits to determine if they share overlapping
#' causal effects. It will used to create the trait group when LD information is not available.
#'
#' @keywords cb_one_causal
#' @noRd
check_pair_overlap <- function(weights, coverage = 0.95){

overlap_pair <- matrix(NA, nrow = nrow(weights), ncol = nrow(weights))
Expand Down
41 changes: 29 additions & 12 deletions R/colocboost_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -396,12 +396,37 @@ get_cos_different_coverage <- function(cb_output, coverage = 0.95){

cos_vcp <- cb_output$cos_details$cos_vcp
cos_diff_coverage <- lapply(cos_vcp, function(w){
temp <- order(w, decreasing=T)
temp[1:min(which(cumsum(w[temp]) > coverage))] # 95%
unlist(get_in_cos(w, coverage = coverage))
})
return(cos_diff_coverage)
}

#' Get integrated weight from different outcomes
#' @keywords cb_get_functions
#' @noRd
get_integrated_weight <- function(weights, weight_fudge_factor = 1.5){
av <- apply(weights, 1, function(w) prod(w^(weight_fudge_factor/ncol(weights))))
return(av / sum(av))
}

#' Extract CoS based on coverage
#' @keywords cb_get_functions
#' @noRd
get_in_cos <- function(weights, coverage = 0.95){
# Get indices in decreasing weight order
temp <- order(weights, decreasing=TRUE)
weights_ordered <- weights[temp]
# Find position where cumsum exceeds coverage
pos <- min(which(cumsum(weights_ordered) >= coverage))
# Get the actual threshold weight value
weight_thresh <- weights_ordered[pos]
# Find all indices with weights >= threshold
indices <- which(weights >= weight_thresh)
# Order these indices by their weights in decreasing order
csets <- indices[order(weights[indices], decreasing=TRUE)]
return(list(csets))
}


#' @title Set of internal functions to re-organize ColocBoost output format
#'
Expand Down Expand Up @@ -669,16 +694,8 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output
cb_model_para <- cb_obj$cb_model_para

## - obtain the order of variables based on the variables names if it has position information
if (is.null(variables)){
variables <- cb_obj$cb_model_para$variables
if (all(grepl("chr", variables))){
# - if variables_name has position informtaion, re-order all_info based on positions
position <- as.numeric(gsub(".*:(\\d+).*", "\\1", variables))
# position <- as.numeric(sapply(variables_name, function(tmp) strsplit(tmp, ":")[[1]][2]))
ordered <- order(position, decreasing = FALSE)
} else {
ordered <- 1:length(variables)
}
if (!is.null(variables)){
ordered <- 1:length(cb_obj$cb_model_para$variables)
} else {
ordered <- match(variables, cb_obj$cb_model_para$variables)
}
Expand Down
56 changes: 34 additions & 22 deletions R/colocboost_utils.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
#' @title Set of internal functions to refrine colocalization confidence sets
#'
#' @description
#' The `colocboost_refine_cos` functions serves as a summary for the following two refine functions.
#'
#' @details
#' The following functions are included in this set:
#' `merge_cos_ucos` merge a trait-specific confidence set CS into a colocalization set CoS.
#' `merge_ucos` merge two trait-specific confidence sets.
#'
#' These functions are not exported individually and are accessed via `colocboost_refine_cos`.
#'
#' @rdname colocboost_refine_cos
#' @keywords cb_refine_cos
#' @noRd
merge_cos_ucos <- function(cb_obj, out_cos, out_ucos, coverage = 0.95,
min_abs_corr = 0.5, tol = 1e-9,
median_cos_abs_corr = 0.8){
Expand Down Expand Up @@ -255,7 +270,21 @@ merge_ucos <- function(cb_obj, past_out,
return(ll)
}


#' @title Set of internal utils functions
#'
#' @description
#' The `colocboost_utils` functions serves as a summary for the following two refine functions.
#'
#' @details
#' The following functions are included in this set:
#' `merge_cos_ucos` merge a trait-specific confidence set CS into a colocalization set CoS.
#' `merge_ucos` merge two trait-specific confidence sets.
#'
#' These functions are not exported individually and are accessed via `colocboost_utils`.
#'
#' @rdname colocboost_utils
#' @keywords cb_utils
#' @noRd
get_vcp <- function(past_out, P){
if (!is.null(past_out$cos$cos$cos)){
avW_coloc_vcp <- sapply(past_out$cos$cos$avWeight, get_integrated_weight)
Expand Down Expand Up @@ -315,7 +344,6 @@ check_two_overlap_sets <- function(total, i, j){
}
}


# Function to merge overlapping sets
merge_sets <- function(vec) {
split_lists <- lapply(vec, function(x) as.numeric(unlist(strsplit(x, ";"))))
Expand Down Expand Up @@ -344,7 +372,6 @@ merge_sets <- function(vec) {
}



get_avWeigth <- function(cb_model, coloc_outcomes, update, pos.coloc, name_weight=FALSE){

avWeight <- lapply(coloc_outcomes, function(i){
Expand All @@ -360,7 +387,6 @@ get_avWeigth <- function(cb_model, coloc_outcomes, update, pos.coloc, name_weigh
}



get_max_profile <- function(cb_obj, check_null_max=0.02, check_null_method = "profile"){
for (i in 1:cb_obj$cb_model_para$L){
cb <- cb_obj$cb_model[[i]]
Expand All @@ -377,27 +403,13 @@ get_max_profile <- function(cb_obj, check_null_max=0.02, check_null_method = "pr


### Function for check cs for each weight
w_cs <- function(w, coverage = 0.95){
n <- sum(cumsum(sort(w,decreasing = TRUE)) < coverage) + 1
o <- order(w,decreasing = TRUE)
result = rep(0,length(w))
result[o[1:n]] = 1
w_cs <- function(weights, coverage = 0.95){
indices <- unlist(get_in_cos(weights, coverage = coverage))
result = rep(0,length(weights))
result[indices] = 1
return(result)
}

get_integrated_weight <- function(avWeight, weight_fudge_factor = 1.5){
av <- apply(avWeight, 1, function(w) prod(w^(weight_fudge_factor/ncol(avWeight))))
return(av / sum(av))
}

get_in_cos <- function(weights, coverage = 0.95){

temp <- order(weights, decreasing=T)
csets <- temp[1:min(which(cumsum(weights[temp]) >= coverage))] # 95%
return(list(csets))

}


#' Pure R implementation (fallback)
#' @noRd
Expand Down
Loading
Loading