StatFunGen · xueweic · Apr 21, 2025 · Apr 21, 2025 · Apr 21, 2025
diff --git a/R/colocboost.R b/R/colocboost.R
@@ -468,8 +468,8 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
       warning(
         "Providing the sample size (n), or even a rough estimate of n, ",
         "is highly recommended. Without n, the implicit assumption is ",
-        "n is large (Inf) and the effect sizes are small (close to zero).",
-        "outcome ", paste(p_no, collapse = ","), " in sumstat don't contain 'n'!"
+        "n is large (Inf) and the effect sizes are small (close to zero). ",
+        "Outcome ", paste(p_no, collapse = ", "), " in sumstat don't contain 'n'!"
       )
     }
 

diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R
@@ -126,7 +126,7 @@ colocboost_assemble <- function(cb_obj,
         if (!is.null(cb_obj_single$cb_data$data[[1]][["XtY"]])) {
           if (is.null(cb_obj_single$cb_data$data[[1]]$XtX)) {
             X_dict <- cb_obj$cb_data$dict[i]
-            cb_obj_single$cb_data$data[[1]]$X <- cb_obj$cb_data$data[[X_dict]]$XtX
+            cb_obj_single$cb_data$data[[1]]$XtX <- cb_obj$cb_data$data[[X_dict]]$XtX
           }
         }
         class(cb_obj_single) <- "colocboost"

diff --git a/R/colocboost_check_update_jk.R b/R/colocboost_check_update_jk.R
@@ -584,6 +584,8 @@ estimate_change_profile_res <- function(jk,
     xtr <- t(X[, jk]) %*% res / (N - 1)
   } else if (!is.null(XtY)) {
     scaling_factor <- if (!is.null(N)) (N - 1) else 1
+    beta_scaling <- if (!is.null(N)) 1 else 100
+    beta_k <- beta_k / beta_scaling
     yty <- YtY / scaling_factor
     xtr <- res[jk] / scaling_factor
     xtx <- XtX # / scaling_factor

diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R
@@ -207,7 +207,6 @@ w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverag
   return(is_pure)
 }
 
-
 #' Function to remove the spurious signals
 #' @importFrom utils head tail
 #' @keywords cb_post_inference
@@ -231,6 +230,8 @@ check_null_post <- function(cb_obj,
       mean((Y - X %*% as.matrix(cs_beta))^2) * N / (N - 1)
     } else if (!is.null(XtY)) {
       scaling_factor <- if (!is.null(N)) (N - 1) else 1
+      beta_scaling <- if (!is.null(N)) 1 else 100
+      cs_beta <- cs_beta / beta_scaling
       yty <- YtY / scaling_factor
       xtx <- XtX
       if (length(miss_idx) != 0) {
@@ -283,14 +284,15 @@ check_null_post <- function(cb_obj,
       return(Y - X %*% cs_beta)
     } else if (!is.null(XtX)) {
       scaling.factor <- if (!is.null(N)) N - 1 else 1
+      beta_scaling <- if (!is.null(N)) 1 else 100
       xtx <- XtX / scaling.factor
       if (length(miss_idx) != 0) {
         xty <- XtY[-miss_idx] / scaling.factor
         res.tmp <- rep(0, length(XtY))
-        res.tmp[-miss_idx] <- xty - xtx %*% cs_beta[-miss_idx]
+        res.tmp[-miss_idx] <- xty - xtx %*% (cs_beta[-miss_idx] / beta_scaling)
       } else {
         xty <- XtY / scaling.factor
-        res.tmp <- xty - xtx %*% cs_beta
+        res.tmp <- xty - xtx %*% (cs_beta / beta_scaling)
       }
       return(res.tmp)
     }
@@ -476,6 +478,8 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
       yty <- var(Y)
     } else if (!is.null(XtY)) {
       scaling_factor <- if (!is.null(N)) (N - 1) else 1
+      beta_scaling <- if (!is.null(N)) 1 else 100
+      cs_beta <- cs_beta / beta_scaling
       yty <- YtY / scaling_factor
       xtx <- XtX
       if (length(miss_idx) != 0) {

diff --git a/R/colocboost_init.R b/R/colocboost_init.R
@@ -41,10 +41,12 @@ colocboost_init_data <- function(X, Y, dict_YX,
   class(cb_data) <- "colocboost"
   if (!is.null(dict_YX) & !is.null(dict_sumstatLD)) {
     dict <- c(dict_YX, max(dict_YX) + dict_sumstatLD)
+    n_ind_variable <- max(dict_YX)
   } else if (!is.null(dict_YX) & is.null(dict_sumstatLD)) {
     dict <- dict_YX
   } else if (is.null(dict_YX) & !is.null(dict_sumstatLD)) {
     dict <- dict_sumstatLD
+    n_ind_variable <- 0
   }
   if (focal_outcome_variables & !is.null(focal_outcome_idx)) {
     if (focal_outcome_idx > length(dict)) {
@@ -85,7 +87,7 @@ colocboost_init_data <- function(X, Y, dict_YX,
   if (!is.null(Z) & !is.null(LD)) {
     ####################### need to consider more #########################
     # ------ only code up one sumstat
-    variant_lists <- keep_variables[c(flag:length(keep_variables))]
+    variant_lists <- keep_variables[c((n_ind_variable+1):length(keep_variables))]
     sumstat_formated <- process_sumstat(
       Z, N_sumstat, Var_y, SeBhat, LD,
       variant_lists, dict_sumstatLD,
@@ -350,6 +352,8 @@ get_correlation <- function(X = NULL, res = NULL, XtY = NULL, N = NULL,
   } else if (!is.null(XtY)) {
     corr <- rep(0, length(XtY))
     scaling_factor <- if (!is.null(N)) (N - 1) else 1
+    beta_scaling <- if (!is.null(N)) 1 else 100
+    beta_k <- beta_k / beta_scaling
     YtY <- YtY / scaling_factor
     XtX <- XtX
     if (length(miss_idx) != 0) {

diff --git a/R/colocboost_output.R b/R/colocboost_output.R
@@ -744,8 +744,14 @@ get_data_info <- function(cb_obj) {
     is_focal[cb_obj$cb_model_para$focal_outcome_idx] <- TRUE
   }
   is_sumstat <- grepl("sumstat_outcome", names(cb_obj$cb_data$data))
+  N <- cb_obj$cb_model_para$N
+  check_no_N <- sapply(cb_obj$cb_model_para$N, is.null)
+  if (sum(check_no_N)!=0){
+    N[which(check_no_N)] <- "NA"
+    N <- unlist(N)
+  }
   outcome_info <- data.frame(
-    "outcome_names" = analysis_outcome, "sample_size" = cb_obj$cb_model_para$N,
+    "outcome_names" = analysis_outcome, "sample_size" = N,
     "is_sumstats" = is_sumstat, "is_focal" = is_focal
   )
   rownames(outcome_info) <- paste0("y", 1:n_outcome)

diff --git a/R/colocboost_update.R b/R/colocboost_update.R
@@ -104,15 +104,16 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
       profile_log <- mean((y - x %*% beta)^2) * adj_dep
     } else if (!is.null(cb_data$data[[X_dict]]$XtX)) {
       scaling_factor <- if (!is.null(cb_data$data[[i]]$N)) cb_data$data[[i]]$N - 1 else 1
+      beta_scaling <- if (!is.null(cb_data$data[[i]]$N)) 1 else 100
       # - summary statistics
       xtx <- cb_data$data[[X_dict]]$XtX
       cb_model[[i]]$res <- rep(0, cb_model_para$P)
       if (length(cb_data$data[[i]]$variable_miss) != 0) {
-        beta <- cb_model[[i]]$beta[-cb_data$data[[i]]$variable_miss]
+        beta <- beta[-cb_data$data[[i]]$variable_miss]  / beta_scaling
         xty <- cb_data$data[[i]]$XtY[-cb_data$data[[i]]$variable_miss]
         cb_model[[i]]$res[-cb_data$data[[i]]$variable_miss] <- xty - scaling_factor * xtx %*% beta
       } else {
-        beta <- cb_model[[i]]$beta
+        beta <- cb_model[[i]]$beta / beta_scaling
         xty <- cb_data$data[[i]]$XtY
         cb_model[[i]]$res <- xty - scaling_factor * xtx %*% beta
       }

diff --git a/R/colocboost_workhorse.R b/R/colocboost_workhorse.R
@@ -69,9 +69,10 @@ colocboost_workhorse <- function(cb_data,
     func_compare = func_compare,
     coloc_thresh = coloc_thresh
   )
-
+
+  M_single_outcome <- 200      
   if (is.null(M)) {
-    M <- cb_model_para$L * 200
+    M <- cb_model_para$L * M_single_outcome
   }
   cb_model_para$M <- M
   # - if multi_test value > multi_test cutoff for some outcomes, we will not update them.
@@ -158,9 +159,10 @@ colocboost_workhorse <- function(cb_data,
             )
             cb_model[[i]]$multi_correction <- multiple_testing_correction
             stop2 <- all(multiple_testing_correction >= cb_model[[i]]$stop_null)
+            stop3 <- (M_i > M_single_outcome)
             # -- to ensure if some outcomes do not update previously
             if (length(cb_model[[i]]$profile_loglike_each) >= 2) {
-              stop[i] <- (stop1 | stop2) # (stop2 | stop3) # (stop1 | stop2 | stop3)
+              stop[i] <- (stop1 | stop2 | stop3) # (stop2 | stop3) # (stop1 | stop2 | stop3)
             } else {
               stop[i] <- FALSE
             }