StatFunGen · gaow · Aug 27, 2025 · May 6, 2025 · May 6, 2025 · May 10, 2025
diff --git a/R/colocboost.R b/R/colocboost.R
@@ -82,7 +82,8 @@
 #' @param weight_fudge_factor The strength to integrate weight from different outcomes, default is 1.5
 #' @param check_null The cut off value for change conditional objective function. Default is 0.1.
 #' @param check_null_method The metric to check the null sets. Default is "profile"
-#' @param check_null_max The smallest value of change of profile loglikelihood for each outcome.
+#' @param check_null_max The smallest value of change of profile loglikelihood for each outcome in CoS.
+#' @param check_null_max_ucos The smallest value of change of profile loglikelihood for each outcome in uCoS.
 #' @param weaker_effect If \code{weaker_effect = TRUE}, consider the weaker single effect due to coupling effects
 #' @param LD_free When \code{LD_free = FALSE}, objective function doesn't include LD information.
 #' @param output_level When \code{output_level = 1}, return basic cos details for colocalization results
@@ -180,6 +181,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
                        check_null = 0.1, # the cut off value for change conditional objective function
                        check_null_method = "profile", # the metric to check the null sets.
                        check_null_max = 0.025, # the smallest value of change of profile loglikelihood for each outcome.
+                       check_null_max_ucos = 0.015, # the smallest value of change of profile loglikelihood for each outcome in uCoS.
                        weaker_effect = TRUE,
                        LD_free = FALSE,
                        output_level = 1) {
@@ -398,23 +400,9 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
         "Without LD, only a single iteration will be performed under the assumption of one causal variable per outcome. ",
         "Additionally, the purity of CoS cannot be evaluated!"
       )
-
-      p.sumstat <- sapply(keep_variable_sumstat, length)
-      p.unique <- unique(p.sumstat)
-      if (length(p.unique) == 1) {
-        ld <- diag(1, nrow = p.unique)
-        colnames(ld) <- rownames(ld) <- keep_variable_sumstat[[1]]
-        LD <- list(ld)
-        sumstatLD_dict <- rep(1, length(sumstat))
-      } else {
-        LD <- lapply(keep_variable_sumstat, function(sn) {
-          ld <- diag(1, nrow = length(sn))
-          colnames(ld) <- rownames(ld) <- sn
-          return(ld)
-        })
-        sumstatLD_dict <- 1:length(sumstat)
-      }
-
+
+      LD <- 1
+      sumstatLD_dict <- rep(1, length(sumstat))
       # change some algorithm parameters
       M <- 1 # one iteration
       min_abs_corr <- 0 # remove purity checking
@@ -428,6 +416,12 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
       if (is.matrix(LD)) {
         LD <- list(LD)
       }
+      # - check if NA in LD matrix
+      num_na <- sapply(LD, sum)
+      if (any(is.na(num_na))){
+        warning("Error: Input LD must not contain missing values (NA).")
+        return(NULL)
+      }
       if (length(LD) == 1) {
         sumstatLD_dict <- rep(1, length(sumstat))
       } else if (length(LD) == length(sumstat)) {
@@ -616,6 +610,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
     check_null = check_null,
     check_null_method = check_null_method,
     check_null_max = check_null_max,
+    check_null_max_ucos = check_null_max_ucos,
     dedup = dedup,
     overlap = overlap,
     n_purity = n_purity,
@@ -632,3 +627,4 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
   class(cb_output) <- "colocboost"
   return(cb_output)
 }
+
diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R
@@ -25,6 +25,7 @@ colocboost_assemble <- function(cb_obj,
                                 check_null = 0.1,
                                 check_null_method = "profile",
                                 check_null_max = 0.025,
+                                check_null_max_ucos = 0.015,
                                 dedup = TRUE,
                                 overlap = TRUE,
                                 n_purity = 100,
@@ -71,11 +72,17 @@ colocboost_assemble <- function(cb_obj,
       }
     }
   } else {
-    if (cb_obj$cb_model_para$M == 1) {
+    if (cb_obj$cb_model_para$model_used == "LD_free") {
+      # fixme later
       check_null_method <- "obj"
-      check_null <- check_null_max <- 0.1
+      check_null_max <- check_null * check_null
+    } else if (cb_obj$cb_model_para$model_used == "one_causal"){
+      # fixme later
+      check_null_max <- check_null_max * check_null
     }
-    cb_obj <- get_max_profile(cb_obj, check_null_max = check_null_max, check_null_method = check_null_method)
+    cb_obj <- get_max_profile(cb_obj, check_null_max = check_null_max, 
+                              check_null_max_ucos = check_null_max_ucos, 
+                              check_null_method = check_null_method)
     # --------- about colocalized confidence sets ---------------------------------
     out_cos <- colocboost_assemble_cos(cb_obj,
       coverage = coverage,
@@ -202,10 +209,6 @@ colocboost_assemble <- function(cb_obj,
     # - colocalization results
     cb_obj$cb_model_para$weight_fudge_factor <- weight_fudge_factor
     cb_obj$cb_model_para$coverage <- coverage
-    if (cb_obj$cb_model_para$M == 1) {
-      # fixme for single iteration model
-      cb_obj <- get_max_profile(cb_obj, check_null_max = 0.01, check_null_method = "profile")
-    }
     cos_results <- get_cos_details(cb_obj, coloc_out = past_out$cos$cos, data_info = data_info)
     cb_output <- list(
       "vcp" = cos_results$vcp,

diff --git a/R/colocboost_check_update_jk.R b/R/colocboost_check_update_jk.R
@@ -498,10 +498,13 @@ get_LD_jk1_jk2 <- function(jk1, jk2,
     LD_temp[which(is.na(LD_temp))] <- 0
     LD_temp <- LD_temp[1, 2]
   } else if (!is.null(XtX)) {
-    jk1.remain <- which(remain_jk == jk1)
-    jk2.remain <- which(remain_jk == jk2)
-    # scaling <- if (!is.null(N)) N-1 else 1
-    LD_temp <- XtX[jk1.remain, jk2.remain] # / scaling
+    if (sum(XtX) == 1){
+      LD_temp <- 0
+    } else {
+      jk1.remain <- which(remain_jk == jk1)
+      jk2.remain <- which(remain_jk == jk2)
+      LD_temp <- XtX[jk1.remain, jk2.remain]
+    }
   }
   return(LD_temp)
 }
@@ -595,7 +598,11 @@ estimate_change_profile_res <- function(jk,
     } else {
       xty <- XtY / scaling_factor
     }
-    rtr <- yty - 2 * sum(beta_k * xty) + sum((xtx %*% as.matrix(beta_k)) * beta_k)
+    if (sum(xtx) == 1){
+      rtr <- yty - 2 * sum(beta_k * xty) + sum(beta_k^2)
+    } else {
+      rtr <- yty - 2 * sum(beta_k * xty) + sum((xtx %*% as.matrix(beta_k)) * beta_k)
+    }
   }
   numerator <- xtr^2 / (2 * rtr)
   denominator <- 0.5 * log(2 * pi * rtr) + 0.5

diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R
@@ -207,6 +207,7 @@ w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverag
   return(is_pure)
 }
 
+
 #' Function to remove the spurious signals
 #' @importFrom utils head tail
 #' @keywords cb_post_inference
@@ -240,8 +241,11 @@ check_null_post <- function(cb_obj,
       } else {
         xty <- XtY / scaling_factor
       }
-
-      (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
+      if (sum(xtx) == 1){
+        (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
+      } else {
+        (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
+      }
     }
   }
 
@@ -289,10 +293,18 @@ check_null_post <- function(cb_obj,
       if (length(miss_idx) != 0) {
         xty <- XtY[-miss_idx] / scaling.factor
         res.tmp <- rep(0, length(XtY))
-        res.tmp[-miss_idx] <- xty - xtx %*% (cs_beta[-miss_idx] / beta_scaling)
+        if (sum(xtx) == 1){
+          res.tmp[-miss_idx] <- xty - cs_beta[-miss_idx] / beta_scaling
+        } else {
+          res.tmp[-miss_idx] <- xty - xtx %*% (cs_beta[-miss_idx] / beta_scaling)
+        }
       } else {
         xty <- XtY / scaling.factor
-        res.tmp <- xty - xtx %*% (cs_beta / beta_scaling)
+        if (sum(xtx) == 1){
+          res.tmp <- xty - (cs_beta / beta_scaling)
+        } else {
+          res.tmp <- xty - xtx %*% (cs_beta / beta_scaling)
+        }
       }
       return(res.tmp)
     }
@@ -349,7 +361,7 @@ check_null_post <- function(cb_obj,
         last_obj <- min(cb_obj$cb_model[[j]]$obj_path)
         change <- abs(cs_obj - last_obj)
         if (length(cb_obj$cb_model[[j]]$obj_path) == 1) {
-          total_obj <- 1
+          total_obj <- change
         } else {
           total_obj <- diff(range(cb_obj$cb_model[[j]]$obj_path))
         }
@@ -408,8 +420,12 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
       corr[which(is.na(corr))] <- 0
       value <- abs(get_upper_tri(corr))
     } else {
-      Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr
-      value <- abs(get_upper_tri(Xcorr[pos, pos]))
+      if (sum(Xcorr) == 1){
+        value <- 0
+      } else {
+        Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr
+        value <- abs(get_upper_tri(Xcorr[pos, pos]))
+      }
     }
     return(c(
       min(value),
@@ -443,14 +459,18 @@ get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, miss_idx = NU
     X_sub2 <- scale(X[, pos2, drop = FALSE], center = T, scale = F)
     value <- abs(get_matrix_mult(X_sub1, X_sub2))
   } else {
-    if (length(miss_idx)!=0){
-      pos1 <- na.omit(match(pos1, setdiff(1:P, miss_idx)))
-      pos2 <- na.omit(match(pos2, setdiff(1:P, miss_idx)))
-    }
-    if (length(pos1) != 0 & length(pos2) != 0) {
-      value <- abs(Xcorr[pos1, pos2])
-    } else {
+    if (sum(Xcorr)==1){
       value <- 0
+    } else {
+      if (length(miss_idx)!=0){
+        pos1 <- na.omit(match(pos1, setdiff(1:P, miss_idx)))
+        pos2 <- na.omit(match(pos2, setdiff(1:P, miss_idx)))
+      }
+      if (length(pos1) != 0 & length(pos2) != 0) {
+        value <- abs(Xcorr[pos1, pos2])
+      } else {
+        value <- 0
+      }
     }
   }
   return(c(min(value), max(value), get_median(value)))
@@ -488,8 +508,11 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
       } else {
         xty <- XtY / scaling_factor
       }
-
-      cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
+      if (sum(xtx) == 1){
+        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
+      } else {
+        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
+      }
     }
     delta <- yty - cos_profile
     if (delta <= 0) {

diff --git a/R/colocboost_init.R b/R/colocboost_init.R
@@ -39,22 +39,43 @@ colocboost_init_data <- function(X, Y, dict_YX,
   #################  initialization #######################################
   cb_data <- list("data" = list())
   class(cb_data) <- "colocboost"
+  # if (!is.null(dict_YX) & !is.null(dict_sumstatLD)) {
+  #   dict <- c(dict_YX, max(dict_YX) + dict_sumstatLD)
+  #   n_ind_variable <- max(dict_YX)
+  # } else if (!is.null(dict_YX) & is.null(dict_sumstatLD)) {
+  #   dict <- dict_YX
+  # } else if (is.null(dict_YX) & !is.null(dict_sumstatLD)) {
+  #   dict <- dict_sumstatLD
+  #   n_ind_variable <- 0
+  # }
+
+  # note here and remove later: dict is designed by X and LD, but we need additional dictionary for keep variables.
+  # keep variables for individual level data is based on X - there is no issue.
+  # keep variables for sumstat data is based on sumstat (not LD) - there is a issue to index the focal outcome based on dict later.
   if (!is.null(dict_YX) & !is.null(dict_sumstatLD)) {
     dict <- c(dict_YX, max(dict_YX) + dict_sumstatLD)
     n_ind_variable <- max(dict_YX)
+    dict_keep_variables <- c(dict_YX, 1:length(dict_sumstatLD) + n_ind_variable)
   } else if (!is.null(dict_YX) & is.null(dict_sumstatLD)) {
     dict <- dict_YX
+    dict_keep_variables <- dict_YX
   } else if (is.null(dict_YX) & !is.null(dict_sumstatLD)) {
     dict <- dict_sumstatLD
     n_ind_variable <- 0
+    dict_keep_variables <- 1:length(dict_sumstatLD)
   }
+
   if (focal_outcome_variables & !is.null(focal_outcome_idx)) {
     if (focal_outcome_idx > length(dict)) {
       stop("Target outcome index is over the total number of outcomes! please check!")
     }
-    keep_variable_names <- keep_variables[[dict[focal_outcome_idx]]]
+    # keep_variable_names <- keep_variables[[dict[focal_outcome_idx]]]
+    keep_variable_names <- keep_variables[[dict_keep_variables[focal_outcome_idx]]]
     if (overlap_variables) {
-      keep_tmp <- lapply(keep_variables[-dict[focal_outcome_idx]], function(tm) {
+      # keep_tmp <- lapply(keep_variables[-dict[focal_outcome_idx]], function(tm) {
+      #   intersect(keep_variable_names, tm)
+      # })
+      keep_tmp <- lapply(keep_variables[-dict_keep_variables[focal_outcome_idx]], function(tm) {
         intersect(keep_variable_names, tm)
       })
       keep_variable_names <- Reduce(union, keep_tmp)
@@ -299,7 +320,8 @@ colocboost_init_para <- function(cb_data, cb_model, tau = 0.01,
     "num_updates_outcome" = num_updates_outcome,
     "func_multi_test" = func_multi_test,
     "multi_test_thresh" = multi_test_thresh,
-    "multi_test_max" = multi_test_max
+    "multi_test_max" = multi_test_max,
+    "model_used" = "original"
   )
   class(cb_model_para) <- "colocboost"
 
@@ -374,7 +396,11 @@ get_correlation <- function(X = NULL, res = NULL, XtY = NULL, N = NULL,
       Xtr <- res / scaling_factor
       XtY <- XtY / scaling_factor
     }
-    var_r <- YtY - 2 * sum(beta_k * XtY) + sum((XtX %*% as.matrix(beta_k)) * beta_k)
+    if (sum(XtX) == 1){
+      var_r <- YtY - 2 * sum(beta_k * XtY) + sum(beta_k^2)
+    } else {
+      var_r <- YtY - 2 * sum(beta_k * XtY) + sum((XtX %*% as.matrix(beta_k)) * beta_k)
+    }
     if (var_r > 1e-6) {
       corr_nomiss <- Xtr / sqrt(var_r)
       if (length(miss_idx) != 0) {
@@ -425,15 +451,35 @@ get_lfdr <- function(z, miss_idx = NULL) {
     z <- z[-miss_idx]
     try_run <- 1
     while (try_run == 1 && lambda_max >= 0.05) {
-      result <- try(
+      # result <- try(
+      #   {
+      #     lfdr_nomissing <- qvalue(pchisq(drop(z^2), 1, lower.tail = FALSE), lambda = seq(0.05, lambda_max, 0.05))$lfdr
+      #   },
+      #   silent = TRUE
+      # )
+      # if (inherits(result, "try-error")) {
+      #   lambda_max <- lambda_max - 0.05 # Decrement lambda_max if error occurs
+      # } else {
+      #   try_run <- 0
+      # }
+      result <- tryCatch(
         {
-          lfdr_nomissing <- qvalue(pchisq(drop(z^2), 1, lower.tail = FALSE), lambda = seq(0.05, lambda_max, 0.05))$lfdr
+          lfdr_nomissing <- qvalue(pchisq(drop(z^2), 1, lower.tail = FALSE), 
+                                   lambda = seq(0.05, lambda_max, 0.05))$lfdr
+          list(success = TRUE, value = lfdr_nomissing)
         },
-        silent = TRUE
+        warning = function(w) {
+          list(success = FALSE, message = w$message)
+        },
+        error = function(e) {
+          list(success = FALSE, message = e$message)
+        }
       )
-      if (inherits(result, "try-error")) {
-        lambda_max <- lambda_max - 0.05 # Decrement lambda_max if error occurs
+
+      if (!result$success) {
+        lambda_max <- lambda_max - 0.05
       } else {
+        lfdr_nomissing <- result$value
         try_run <- 0
       }
     }
@@ -573,21 +619,24 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
     Z_extend[pos_target] <- current_z[pos_z]
 
     # Calculate submatrix for each unique entry (not duplicates)
-    ld_submatrix <- NULL
-
-    if (length(common_variants) > 0) {
-      # Only include the submatrix if this entry is unique or is the first occurrence
-      if (i == unified_dict[i]) {
-        # Check if common_variants and rownames have identical order
-        if (identical(common_variants, rownames(current_ld_matrix))) {
-          # If order is identical, use the matrix directly without reordering
-          ld_submatrix <- current_ld_matrix
-        } else {
-          # If order is different, reorder using matched indices
-          matched_indices <- match(common_variants, rownames(current_ld_matrix))
-          ld_submatrix <- current_ld_matrix[matched_indices, matched_indices, drop = FALSE]
-          rownames(ld_submatrix) <- common_variants
-          colnames(ld_submatrix) <- common_variants
+    if (sum(current_ld_matrix) == 1){
+      ld_submatrix <- current_ld_matrix
+    } else {
+      ld_submatrix <- NULL
+      if (length(common_variants) > 0) {
+        # Only include the submatrix if this entry is unique or is the first occurrence
+        if (i == unified_dict[i]) {
+          # Check if common_variants and rownames have identical order
+          if (identical(common_variants, rownames(current_ld_matrix))) {
+            # If order is identical, use the matrix directly without reordering
+            ld_submatrix <- current_ld_matrix
+          } else {
+            # If order is different, reorder using matched indices
+            matched_indices <- match(common_variants, rownames(current_ld_matrix))
+            ld_submatrix <- current_ld_matrix[matched_indices, matched_indices, drop = FALSE]
+            rownames(ld_submatrix) <- common_variants
+            colnames(ld_submatrix) <- common_variants
+          }
         }
       }
     }