StatFunGen · xueweic · Apr 16, 2025 · Apr 15, 2025 · Apr 15, 2025 · Apr 16, 2025
diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R
@@ -63,7 +63,7 @@ colocboost_assemble <- function(cb_obj,
                 cb_output <- cb_output[c("cos_summary", "vcp", "cos_details", "data_info", "model_info", "ucos_details", "diagnostic_details")]
             }
             if (data_info$n_outcome == 1){
-              cb_output <- list("ucos_summary", "pip" = NULL, "ucos_details" = NULL, "data_info" = data_info)
+              cb_output <- list("ucos_summary", "vpa" = NULL, "ucos_details" = NULL, "data_info" = data_info)
             }
         }
 
@@ -223,14 +223,14 @@ colocboost_assemble <- function(cb_obj,
             cb_output <- cb_output[-match(c("cos_summary","vcp","cos_details"), names(cb_output))]
             remain_obj <- names(cb_output)
             if (!is.null(cb_output$ucos_details$ucos)){
-              cb_output$pip <- apply(do.call(cbind,cb_output$ucos_details$ucos_weight), 1, function(w0) 1-prod(1-w0))
-              names(cb_output$pip) <- data_info$variables
+              cb_output$vpa <- apply(do.call(cbind,cb_output$ucos_details$ucos_weight), 1, function(w0) 1-prod(1-w0))
+              names(cb_output$vpa) <- data_info$variables
               cb_output$ucos_summary <- get_ucos_summary(cb_output)
             } else {
-              tmp <- list("pip" = NULL, "ucos_summary" = NULL)
+              tmp <- list("vpa" = NULL, "ucos_summary" = NULL)
               cb_output <- c(cb_output, tmp)
             }
-            cb_output <- cb_output[c("ucos_summary", "pip", remain_obj)]
+            cb_output <- cb_output[c("ucos_summary", "vpa", remain_obj)]
           }
         }
     }

diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R
@@ -6,7 +6,14 @@
 #'
 #' @details
 #' The following functions are included in this set:
-#' `get_abc` get the colocalization summary table with or without the specific outcomes.
+#' `get_cormat` a fast function to calulate correlation matrix from individual level data.
+#' `check_null_post` a function to remove the spurious signals.
+#' `get_purity` a function to calculate within-CoS purity.
+#' `get_modularity` a function to calculate modularity for a given number of clusters.
+#' `get_n_cluster` a function to get the number of clusters based on modularity-based hierarchical clustering method.
+#' `get_between_purity` a function to calculate purity between two CoS.
+#' `get_cos_evidence` a function to get the evidence of colocalization.
+#' `w_purity` a function to calculate within-CoS purity for each single weight from one SEC.
 #'
 #' These functions are not exported individually and are accessed via `colocboost_post_inference`.
 #'
@@ -33,7 +40,10 @@ get_cormat <- function(X, intercepte = FALSE){
 }
 
 
+#' Function to remove the spurious signals
 #' @importFrom utils head tail
+#' @keywords cb_post_inference
+#' @noRd
 check_null_post <- function(cb_obj, 
                             coloc_sets_temp,
                             coloc_outcomes,
@@ -181,6 +191,9 @@ check_null_post <- function(cb_obj,
   return(ll)
 }
 
+#' Function to calculate within-CoS purity
+#' @keywords cb_post_inference
+#' @noRd
 #' @importFrom stats na.omit
 get_purity <- function(pos, X=NULL, Xcorr=NULL, N = NULL, n = 100) {
     get_upper_tri = Rfast::upper_tri
@@ -215,6 +228,12 @@ get_purity <- function(pos, X=NULL, Xcorr=NULL, N = NULL, n = 100) {
 }
 
 # ------ Calculate modularity ----------
+#' Function to calculate modularity for a given number of clusters
+#' @param Weight A matrix of weights
+#' @param B A matrix of binary values indicating the clusters
+#' @return The modularity value
+#' @keywords cb_post_inference
+#' @noRd
 get_modularity <- function(Weight, B){
     if (dim(Weight)[1] == 1){
         Q <- 0
@@ -246,6 +265,14 @@ get_modularity <- function(Weight, B){
     }
 }
 
+#' Function to get the number of clusters based on modularity-based hierarchical clustering method
+#' @param hc A hierarchical clustering object
+#' @param Sigma A matrix of weights
+#' @param m The number of clusters
+#' @param min_cluster_corr The minimum correlation threshold for clustering within the same cluster
+#' @return A list containing the number of clusters and modularity values
+#' @keywords cb_post_inference
+#' @noRd
 #' @importFrom stats cutree
 get_n_cluster <- function(hc, Sigma, m=ncol(Sigma), min_cluster_corr = 0.8){
     if (min(Sigma) > min_cluster_corr){
@@ -269,6 +296,9 @@ get_n_cluster <- function(hc, Sigma, m=ncol(Sigma), min_cluster_corr = 0.8){
                 "Qmodularity" = Q))
 }
 
+#' Function to calculate within-CoS purity for each single weight from one SEC
+#' @keywords cb_post_inference
+#' @noRd
 w_purity <- function(weights, X=NULL, Xcorr=NULL, N = NULL, n = 100, coverage = 0.95, 
                      min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL){
 
@@ -289,9 +319,10 @@ w_purity <- function(weights, X=NULL, Xcorr=NULL, N = NULL, n = 100, coverage =
     return(is_pure)
 }
 
-
+#' Calculate purity between two CoS
+#' @keywords cb_post_inference
+#' @noRd
 #' @importFrom stats na.omit
-# - Calculate purity between two confidence sets
 get_between_purity <- function(pos1, pos2, X=NULL, Xcorr=NULL, N = NULL, miss_idx = NULL, P = NULL){
 
     get_matrix_mult <- function(X_sub1, X_sub2){
@@ -329,6 +360,9 @@ get_between_purity <- function(pos1, pos2, X=NULL, Xcorr=NULL, N = NULL, miss_id
     return(c(min(value), max(value), get_median(value)))
 }
 
+#' Function to get the evidence of colocalization
+#' @keywords cb_post_inference
+#' @noRd
 #' @importFrom stats var
 #' @importFrom utils tail
 get_cos_evidence <- function(cb_obj, coloc_out, data_info){
@@ -369,7 +403,6 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info){
     cos_profile
   }
 
-  # - 
   get_outcome_profile_change <- function(outcome_idx, cos, cb_obj, check_null_max){
     extract_last <- function(lst) { tail(lst, n = 1)}
     cb_data <- cb_obj$cb_data
@@ -386,7 +419,7 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info){
     ifelse(max_profile < cos_profile, 0, max_profile - cos_profile)
   }
 
-  # - calculate best configuration likelihood explained by minimal configuration
+  # - Calculate best configuration likelihood explained by minimal configuration
   get_normalization_evidence <- function(profile_change, null_max, outcomes, outcome_names) {
     # Define the exponential likelihood ratio normalization (ELRN)
     logLR_normalization <- function(ratio) { 1 - exp( - 2*ratio ) }

diff --git a/R/colocboost_one_causal.R b/R/colocboost_one_causal.R
@@ -1,5 +1,18 @@
-
-
+#' @title Set of functions for ColocBoost based on per-trait-one-causal assumption, including LD-free mode and one iteration mode.
+#'
+#' @description
+#' The `colocboost_one_causal` functions access the set of functions for ColocBoost based on per-trait-one-causal assumption.
+#'
+#'
+#' @details
+#' The following functions are included in this set:
+#' `get_abc` get the colocalization summary table with or without the specific outcomes.
+#'
+#' These functions are not exported individually and are accessed via `colocboost_one_causal`.
+#'
+#' @rdname colocboost_one_causal
+#' @keywords cb_one_causal
+#' @noRd
 colocboost_one_causal <- function(cb_model, cb_model_para, cb_data,
                                   jk_equiv_corr = 0.8,
                                   jk_equiv_loglik = 1,
@@ -37,8 +50,20 @@ colocboost_one_causal <- function(cb_model, cb_model_para, cb_data,
 
 
 
-
-# under one causal per trait assumption with one iteration
+#' Under one causal per trait assumption with one iteration mode
+#'
+#' @description
+#' Executes one iteration of the ColocBoost algorithm under the one-causal-variant-per-trait
+#' assumption. This function provides a simplified approach to examine colocalization patterns
+#' for traits with strong marginal associations.
+#'
+#' @details
+#' This version implements an LD-dependent mode that can handle scenarios where LD structures
+#' may not perfectly match between datasets but still contain informative signals. The function
+#' identifies the most strongly associated variants and evaluates their colocalization evidence.
+#'
+#' @keywords cb_one_causal
+#' @noRd
 colocboost_one_iteration <- function(cb_model, cb_model_para, cb_data,
                                      jk_equiv_corr = 0.8,
                                      jk_equiv_loglik = 1,
@@ -91,7 +116,14 @@ colocboost_one_iteration <- function(cb_model, cb_model_para, cb_data,
 }
 
 
-
+#' Identify and best update trait group for one causal variant - one iteration mode
+#'
+#' @details
+#' The function operates by partitioning traits into different equivalence groups based on
+#' correlation patterns and log-likelihood differences. 
+#'
+#' @keywords cb_one_causal
+#' @noRd
 boost_check_update_jk_one_causal <- function(cb_model, cb_model_para, cb_data,
                                              prioritize_jkstar = TRUE, 
                                              jk_equiv_corr = 0.8,
@@ -166,7 +198,14 @@ boost_check_update_jk_one_causal <- function(cb_model, cb_model_para, cb_data,
 
 
 
-# under one causal per trait assumption with no LD
+#' ColocBoost under one causal per trait assumption - LD-free mode
+#' @description
+#' Executes one iteration of the ColocBoost algorithm under the one-causal-variant-per-trait
+#' assumption. This function provides a simplified approach to examine colocalization patterns
+#' for traits without using any LD information.
+#' 
+#' @keywords cb_one_causal
+#' @noRd
 colocboost_diagLD <- function(cb_model, cb_model_para, cb_data,
                               jk_equiv_corr = 0,
                               jk_equiv_loglik = 0.1,
@@ -300,6 +339,14 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data,
 }
 
 
+#' Check for overlapping causal effects between trait pairs
+#'
+#' @description
+#' This function evaluates pairs of traits to determine if they share overlapping
+#' causal effects. It will used to create the trait group when LD information is not available.
+#'
+#' @keywords cb_one_causal
+#' @noRd
 check_pair_overlap <- function(weights, coverage = 0.95){
 
   overlap_pair <- matrix(NA, nrow = nrow(weights), ncol =  nrow(weights))

diff --git a/R/colocboost_output.R b/R/colocboost_output.R
@@ -396,12 +396,37 @@ get_cos_different_coverage <- function(cb_output, coverage = 0.95){
 
   cos_vcp <- cb_output$cos_details$cos_vcp
   cos_diff_coverage <- lapply(cos_vcp, function(w){
-      temp <- order(w, decreasing=T)
-      temp[1:min(which(cumsum(w[temp]) > coverage))] # 95%
+      unlist(get_in_cos(w, coverage = coverage))
   })
   return(cos_diff_coverage)
 }
 
+#' Get integrated weight from different outcomes
+#' @keywords cb_get_functions
+#' @noRd
+get_integrated_weight <- function(weights, weight_fudge_factor = 1.5){
+  av <- apply(weights, 1, function(w) prod(w^(weight_fudge_factor/ncol(weights))))
+  return(av / sum(av))
+}
+
+#' Extract CoS based on coverage
+#' @keywords cb_get_functions
+#' @noRd
+get_in_cos <- function(weights, coverage = 0.95){
+  # Get indices in decreasing weight order
+  temp <- order(weights, decreasing=TRUE)
+  weights_ordered <- weights[temp]
+  # Find position where cumsum exceeds coverage
+  pos <- min(which(cumsum(weights_ordered) >= coverage))
+  # Get the actual threshold weight value
+  weight_thresh <- weights_ordered[pos]
+  # Find all indices with weights >= threshold
+  indices <- which(weights >= weight_thresh)
+  # Order these indices by their weights in decreasing order
+  csets <- indices[order(weights[indices], decreasing=TRUE)]
+  return(list(csets))
+}
+
 
 #' @title Set of internal functions to re-organize ColocBoost output format
 #'
@@ -669,16 +694,8 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output
   cb_model_para <- cb_obj$cb_model_para
 
   ## - obtain the order of variables based on the variables names if it has position information
-  if (is.null(variables)){
-    variables <- cb_obj$cb_model_para$variables
-    if (all(grepl("chr", variables))){
-      # - if variables_name has position informtaion, re-order all_info based on positions
-      position <- as.numeric(gsub(".*:(\\d+).*", "\\1", variables))
-      # position <- as.numeric(sapply(variables_name, function(tmp) strsplit(tmp, ":")[[1]][2]))
-      ordered <- order(position, decreasing = FALSE)
-    } else {
-      ordered <- 1:length(variables)
-    }
+  if (!is.null(variables)){
+    ordered <- 1:length(cb_obj$cb_model_para$variables)
   } else {
     ordered <- match(variables, cb_obj$cb_model_para$variables)
   }

diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R
@@ -1,3 +1,18 @@
+#' @title Set of internal functions to refrine colocalization confidence sets
+#'
+#' @description
+#' The `colocboost_refine_cos` functions serves as a summary for the following two refine functions.
+#'
+#' @details
+#' The following functions are included in this set:
+#' `merge_cos_ucos` merge a trait-specific confidence set CS into a colocalization set CoS.
+#' `merge_ucos` merge two trait-specific confidence sets.
+#'
+#' These functions are not exported individually and are accessed via `colocboost_refine_cos`.
+#'
+#' @rdname colocboost_refine_cos
+#' @keywords cb_refine_cos
+#' @noRd
 merge_cos_ucos <- function(cb_obj, out_cos, out_ucos, coverage = 0.95,
                                min_abs_corr = 0.5, tol = 1e-9,
                                median_cos_abs_corr = 0.8){
@@ -255,7 +270,21 @@ merge_ucos <- function(cb_obj, past_out,
     return(ll)
 }
 
-
+#' @title Set of internal utils functions
+#'
+#' @description
+#' The `colocboost_utils` functions serves as a summary for the following two refine functions.
+#'
+#' @details
+#' The following functions are included in this set:
+#' `merge_cos_ucos` merge a trait-specific confidence set CS into a colocalization set CoS.
+#' `merge_ucos` merge two trait-specific confidence sets.
+#'
+#' These functions are not exported individually and are accessed via `colocboost_utils`.
+#'
+#' @rdname colocboost_utils
+#' @keywords cb_utils
+#' @noRd
 get_vcp <- function(past_out, P){
     if (!is.null(past_out$cos$cos$cos)){
         avW_coloc_vcp <- sapply(past_out$cos$cos$avWeight, get_integrated_weight)
@@ -315,7 +344,6 @@ check_two_overlap_sets <- function(total, i, j){
   }
 }
 
-
 # Function to merge overlapping sets
 merge_sets <- function(vec) {
   split_lists <- lapply(vec, function(x) as.numeric(unlist(strsplit(x, ";"))))
@@ -344,7 +372,6 @@ merge_sets <- function(vec) {
 }
 
 
-
 get_avWeigth <- function(cb_model, coloc_outcomes, update, pos.coloc, name_weight=FALSE){
 
   avWeight <- lapply(coloc_outcomes, function(i){
@@ -360,7 +387,6 @@ get_avWeigth <- function(cb_model, coloc_outcomes, update, pos.coloc, name_weigh
 }
 
 
-
 get_max_profile <- function(cb_obj, check_null_max=0.02, check_null_method = "profile"){
   for (i in 1:cb_obj$cb_model_para$L){
     cb <- cb_obj$cb_model[[i]]
@@ -377,27 +403,13 @@ get_max_profile <- function(cb_obj, check_null_max=0.02, check_null_method = "pr
 
 
 ### Function for check cs for each weight
-w_cs <- function(w, coverage = 0.95){
-  n <- sum(cumsum(sort(w,decreasing = TRUE)) < coverage) + 1
-  o <- order(w,decreasing = TRUE)
-  result = rep(0,length(w))
-  result[o[1:n]] = 1
+w_cs <- function(weights, coverage = 0.95){
+  indices <- unlist(get_in_cos(weights, coverage = coverage))
+  result = rep(0,length(weights))
+  result[indices] = 1
   return(result)
 }
 
-get_integrated_weight <- function(avWeight, weight_fudge_factor = 1.5){
-  av <- apply(avWeight, 1, function(w) prod(w^(weight_fudge_factor/ncol(avWeight))))
-  return(av / sum(av))
-}
-
-get_in_cos <- function(weights, coverage = 0.95){
-
-  temp <- order(weights, decreasing=T)
-  csets <- temp[1:min(which(cumsum(weights[temp]) >= coverage))] # 95%
-  return(list(csets))
-
-}
-
 
 #' Pure R implementation (fallback)
 #' @noRd