Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 14 additions & 18 deletions R/colocboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@
#' @param weight_fudge_factor The strength to integrate weight from different outcomes, default is 1.5
#' @param check_null The cut off value for change conditional objective function. Default is 0.1.
#' @param check_null_method The metric to check the null sets. Default is "profile"
#' @param check_null_max The smallest value of change of profile loglikelihood for each outcome.
#' @param check_null_max The smallest value of change of profile loglikelihood for each outcome in CoS.
#' @param check_null_max_ucos The smallest value of change of profile loglikelihood for each outcome in uCoS.
#' @param weaker_effect If \code{weaker_effect = TRUE}, consider the weaker single effect due to coupling effects
#' @param LD_free When \code{LD_free = FALSE}, objective function doesn't include LD information.
#' @param output_level When \code{output_level = 1}, return basic cos details for colocalization results
Expand Down Expand Up @@ -180,6 +181,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
check_null = 0.1, # the cut off value for change conditional objective function
check_null_method = "profile", # the metric to check the null sets.
check_null_max = 0.025, # the smallest value of change of profile loglikelihood for each outcome.
check_null_max_ucos = 0.015, # the smallest value of change of profile loglikelihood for each outcome in uCoS.
weaker_effect = TRUE,
LD_free = FALSE,
output_level = 1) {
Expand Down Expand Up @@ -398,23 +400,9 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
"Without LD, only a single iteration will be performed under the assumption of one causal variable per outcome. ",
"Additionally, the purity of CoS cannot be evaluated!"
)

p.sumstat <- sapply(keep_variable_sumstat, length)
p.unique <- unique(p.sumstat)
if (length(p.unique) == 1) {
ld <- diag(1, nrow = p.unique)
colnames(ld) <- rownames(ld) <- keep_variable_sumstat[[1]]
LD <- list(ld)
sumstatLD_dict <- rep(1, length(sumstat))
} else {
LD <- lapply(keep_variable_sumstat, function(sn) {
ld <- diag(1, nrow = length(sn))
colnames(ld) <- rownames(ld) <- sn
return(ld)
})
sumstatLD_dict <- 1:length(sumstat)
}


LD <- 1
sumstatLD_dict <- rep(1, length(sumstat))
# change some algorithm parameters
M <- 1 # one iteration
min_abs_corr <- 0 # remove purity checking
Expand All @@ -428,6 +416,12 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
if (is.matrix(LD)) {
LD <- list(LD)
}
# - check if NA in LD matrix
num_na <- sapply(LD, sum)
if (any(is.na(num_na))){
warning("Error: Input LD must not contain missing values (NA).")
return(NULL)
}
if (length(LD) == 1) {
sumstatLD_dict <- rep(1, length(sumstat))
} else if (length(LD) == length(sumstat)) {
Expand Down Expand Up @@ -616,6 +610,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
check_null = check_null,
check_null_method = check_null_method,
check_null_max = check_null_max,
check_null_max_ucos = check_null_max_ucos,
dedup = dedup,
overlap = overlap,
n_purity = n_purity,
Expand All @@ -632,3 +627,4 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
class(cb_output) <- "colocboost"
return(cb_output)
}

17 changes: 10 additions & 7 deletions R/colocboost_assemble.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ colocboost_assemble <- function(cb_obj,
check_null = 0.1,
check_null_method = "profile",
check_null_max = 0.025,
check_null_max_ucos = 0.015,
dedup = TRUE,
overlap = TRUE,
n_purity = 100,
Expand Down Expand Up @@ -71,11 +72,17 @@ colocboost_assemble <- function(cb_obj,
}
}
} else {
if (cb_obj$cb_model_para$M == 1) {
if (cb_obj$cb_model_para$model_used == "LD_free") {
# fixme later
check_null_method <- "obj"
check_null <- check_null_max <- 0.1
check_null_max <- check_null * check_null
} else if (cb_obj$cb_model_para$model_used == "one_causal"){
# fixme later
check_null_max <- check_null_max * check_null
}
cb_obj <- get_max_profile(cb_obj, check_null_max = check_null_max, check_null_method = check_null_method)
cb_obj <- get_max_profile(cb_obj, check_null_max = check_null_max,
check_null_max_ucos = check_null_max_ucos,
check_null_method = check_null_method)
# --------- about colocalized confidence sets ---------------------------------
out_cos <- colocboost_assemble_cos(cb_obj,
coverage = coverage,
Expand Down Expand Up @@ -202,10 +209,6 @@ colocboost_assemble <- function(cb_obj,
# - colocalization results
cb_obj$cb_model_para$weight_fudge_factor <- weight_fudge_factor
cb_obj$cb_model_para$coverage <- coverage
if (cb_obj$cb_model_para$M == 1) {
# fixme for single iteration model
cb_obj <- get_max_profile(cb_obj, check_null_max = 0.01, check_null_method = "profile")
}
cos_results <- get_cos_details(cb_obj, coloc_out = past_out$cos$cos, data_info = data_info)
cb_output <- list(
"vcp" = cos_results$vcp,
Expand Down
17 changes: 12 additions & 5 deletions R/colocboost_check_update_jk.R
Original file line number Diff line number Diff line change
Expand Up @@ -498,10 +498,13 @@ get_LD_jk1_jk2 <- function(jk1, jk2,
LD_temp[which(is.na(LD_temp))] <- 0
LD_temp <- LD_temp[1, 2]
} else if (!is.null(XtX)) {
jk1.remain <- which(remain_jk == jk1)
jk2.remain <- which(remain_jk == jk2)
# scaling <- if (!is.null(N)) N-1 else 1
LD_temp <- XtX[jk1.remain, jk2.remain] # / scaling
if (sum(XtX) == 1){
LD_temp <- 0
} else {
jk1.remain <- which(remain_jk == jk1)
jk2.remain <- which(remain_jk == jk2)
LD_temp <- XtX[jk1.remain, jk2.remain]
}
}
return(LD_temp)
}
Expand Down Expand Up @@ -595,7 +598,11 @@ estimate_change_profile_res <- function(jk,
} else {
xty <- XtY / scaling_factor
}
rtr <- yty - 2 * sum(beta_k * xty) + sum((xtx %*% as.matrix(beta_k)) * beta_k)
if (sum(xtx) == 1){
rtr <- yty - 2 * sum(beta_k * xty) + sum(beta_k^2)
} else {
rtr <- yty - 2 * sum(beta_k * xty) + sum((xtx %*% as.matrix(beta_k)) * beta_k)
}
}
numerator <- xtr^2 / (2 * rtr)
denominator <- 0.5 * log(2 * pi * rtr) + 0.5
Expand Down
55 changes: 39 additions & 16 deletions R/colocboost_inference.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverag
return(is_pure)
}


#' Function to remove the spurious signals
#' @importFrom utils head tail
#' @keywords cb_post_inference
Expand Down Expand Up @@ -240,8 +241,11 @@ check_null_post <- function(cb_obj,
} else {
xty <- XtY / scaling_factor
}

(yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
if (sum(xtx) == 1){
(yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
} else {
(yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
}
}
}

Expand Down Expand Up @@ -289,10 +293,18 @@ check_null_post <- function(cb_obj,
if (length(miss_idx) != 0) {
xty <- XtY[-miss_idx] / scaling.factor
res.tmp <- rep(0, length(XtY))
res.tmp[-miss_idx] <- xty - xtx %*% (cs_beta[-miss_idx] / beta_scaling)
if (sum(xtx) == 1){
res.tmp[-miss_idx] <- xty - cs_beta[-miss_idx] / beta_scaling
} else {
res.tmp[-miss_idx] <- xty - xtx %*% (cs_beta[-miss_idx] / beta_scaling)
}
} else {
xty <- XtY / scaling.factor
res.tmp <- xty - xtx %*% (cs_beta / beta_scaling)
if (sum(xtx) == 1){
res.tmp <- xty - (cs_beta / beta_scaling)
} else {
res.tmp <- xty - xtx %*% (cs_beta / beta_scaling)
}
}
return(res.tmp)
}
Expand Down Expand Up @@ -349,7 +361,7 @@ check_null_post <- function(cb_obj,
last_obj <- min(cb_obj$cb_model[[j]]$obj_path)
change <- abs(cs_obj - last_obj)
if (length(cb_obj$cb_model[[j]]$obj_path) == 1) {
total_obj <- 1
total_obj <- change
} else {
total_obj <- diff(range(cb_obj$cb_model[[j]]$obj_path))
}
Expand Down Expand Up @@ -408,8 +420,12 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
corr[which(is.na(corr))] <- 0
value <- abs(get_upper_tri(corr))
} else {
Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr
value <- abs(get_upper_tri(Xcorr[pos, pos]))
if (sum(Xcorr) == 1){
value <- 0
} else {
Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr
value <- abs(get_upper_tri(Xcorr[pos, pos]))
}
}
return(c(
min(value),
Expand Down Expand Up @@ -443,14 +459,18 @@ get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, miss_idx = NU
X_sub2 <- scale(X[, pos2, drop = FALSE], center = T, scale = F)
value <- abs(get_matrix_mult(X_sub1, X_sub2))
} else {
if (length(miss_idx)!=0){
pos1 <- na.omit(match(pos1, setdiff(1:P, miss_idx)))
pos2 <- na.omit(match(pos2, setdiff(1:P, miss_idx)))
}
if (length(pos1) != 0 & length(pos2) != 0) {
value <- abs(Xcorr[pos1, pos2])
} else {
if (sum(Xcorr)==1){
value <- 0
} else {
if (length(miss_idx)!=0){
pos1 <- na.omit(match(pos1, setdiff(1:P, miss_idx)))
pos2 <- na.omit(match(pos2, setdiff(1:P, miss_idx)))
}
if (length(pos1) != 0 & length(pos2) != 0) {
value <- abs(Xcorr[pos1, pos2])
} else {
value <- 0
}
}
}
return(c(min(value), max(value), get_median(value)))
Expand Down Expand Up @@ -488,8 +508,11 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
} else {
xty <- XtY / scaling_factor
}

cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
if (sum(xtx) == 1){
cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
} else {
cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
}
}
delta <- yty - cos_profile
if (delta <= 0) {
Expand Down
97 changes: 73 additions & 24 deletions R/colocboost_init.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,43 @@ colocboost_init_data <- function(X, Y, dict_YX,
################# initialization #######################################
cb_data <- list("data" = list())
class(cb_data) <- "colocboost"
# if (!is.null(dict_YX) & !is.null(dict_sumstatLD)) {
# dict <- c(dict_YX, max(dict_YX) + dict_sumstatLD)
# n_ind_variable <- max(dict_YX)
# } else if (!is.null(dict_YX) & is.null(dict_sumstatLD)) {
# dict <- dict_YX
# } else if (is.null(dict_YX) & !is.null(dict_sumstatLD)) {
# dict <- dict_sumstatLD
# n_ind_variable <- 0
# }

# note here and remove later: dict is designed by X and LD, but we need additional dictionary for keep variables.
# keep variables for individual level data is based on X - there is no issue.
# keep variables for sumstat data is based on sumstat (not LD) - there is a issue to index the focal outcome based on dict later.
if (!is.null(dict_YX) & !is.null(dict_sumstatLD)) {
dict <- c(dict_YX, max(dict_YX) + dict_sumstatLD)
n_ind_variable <- max(dict_YX)
dict_keep_variables <- c(dict_YX, 1:length(dict_sumstatLD) + n_ind_variable)
} else if (!is.null(dict_YX) & is.null(dict_sumstatLD)) {
dict <- dict_YX
dict_keep_variables <- dict_YX
} else if (is.null(dict_YX) & !is.null(dict_sumstatLD)) {
dict <- dict_sumstatLD
n_ind_variable <- 0
dict_keep_variables <- 1:length(dict_sumstatLD)
}

if (focal_outcome_variables & !is.null(focal_outcome_idx)) {
if (focal_outcome_idx > length(dict)) {
stop("Target outcome index is over the total number of outcomes! please check!")
}
keep_variable_names <- keep_variables[[dict[focal_outcome_idx]]]
# keep_variable_names <- keep_variables[[dict[focal_outcome_idx]]]
keep_variable_names <- keep_variables[[dict_keep_variables[focal_outcome_idx]]]
if (overlap_variables) {
keep_tmp <- lapply(keep_variables[-dict[focal_outcome_idx]], function(tm) {
# keep_tmp <- lapply(keep_variables[-dict[focal_outcome_idx]], function(tm) {
# intersect(keep_variable_names, tm)
# })
keep_tmp <- lapply(keep_variables[-dict_keep_variables[focal_outcome_idx]], function(tm) {
intersect(keep_variable_names, tm)
})
keep_variable_names <- Reduce(union, keep_tmp)
Expand Down Expand Up @@ -299,7 +320,8 @@ colocboost_init_para <- function(cb_data, cb_model, tau = 0.01,
"num_updates_outcome" = num_updates_outcome,
"func_multi_test" = func_multi_test,
"multi_test_thresh" = multi_test_thresh,
"multi_test_max" = multi_test_max
"multi_test_max" = multi_test_max,
"model_used" = "original"
)
class(cb_model_para) <- "colocboost"

Expand Down Expand Up @@ -374,7 +396,11 @@ get_correlation <- function(X = NULL, res = NULL, XtY = NULL, N = NULL,
Xtr <- res / scaling_factor
XtY <- XtY / scaling_factor
}
var_r <- YtY - 2 * sum(beta_k * XtY) + sum((XtX %*% as.matrix(beta_k)) * beta_k)
if (sum(XtX) == 1){
var_r <- YtY - 2 * sum(beta_k * XtY) + sum(beta_k^2)
} else {
var_r <- YtY - 2 * sum(beta_k * XtY) + sum((XtX %*% as.matrix(beta_k)) * beta_k)
}
if (var_r > 1e-6) {
corr_nomiss <- Xtr / sqrt(var_r)
if (length(miss_idx) != 0) {
Expand Down Expand Up @@ -425,15 +451,35 @@ get_lfdr <- function(z, miss_idx = NULL) {
z <- z[-miss_idx]
try_run <- 1
while (try_run == 1 && lambda_max >= 0.05) {
result <- try(
# result <- try(
# {
# lfdr_nomissing <- qvalue(pchisq(drop(z^2), 1, lower.tail = FALSE), lambda = seq(0.05, lambda_max, 0.05))$lfdr
# },
# silent = TRUE
# )
# if (inherits(result, "try-error")) {
# lambda_max <- lambda_max - 0.05 # Decrement lambda_max if error occurs
# } else {
# try_run <- 0
# }
result <- tryCatch(
{
lfdr_nomissing <- qvalue(pchisq(drop(z^2), 1, lower.tail = FALSE), lambda = seq(0.05, lambda_max, 0.05))$lfdr
lfdr_nomissing <- qvalue(pchisq(drop(z^2), 1, lower.tail = FALSE),
lambda = seq(0.05, lambda_max, 0.05))$lfdr
list(success = TRUE, value = lfdr_nomissing)
},
silent = TRUE
warning = function(w) {
list(success = FALSE, message = w$message)
},
error = function(e) {
list(success = FALSE, message = e$message)
}
)
if (inherits(result, "try-error")) {
lambda_max <- lambda_max - 0.05 # Decrement lambda_max if error occurs

if (!result$success) {
lambda_max <- lambda_max - 0.05
} else {
lfdr_nomissing <- result$value
try_run <- 0
}
}
Expand Down Expand Up @@ -573,21 +619,24 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
Z_extend[pos_target] <- current_z[pos_z]

# Calculate submatrix for each unique entry (not duplicates)
ld_submatrix <- NULL

if (length(common_variants) > 0) {
# Only include the submatrix if this entry is unique or is the first occurrence
if (i == unified_dict[i]) {
# Check if common_variants and rownames have identical order
if (identical(common_variants, rownames(current_ld_matrix))) {
# If order is identical, use the matrix directly without reordering
ld_submatrix <- current_ld_matrix
} else {
# If order is different, reorder using matched indices
matched_indices <- match(common_variants, rownames(current_ld_matrix))
ld_submatrix <- current_ld_matrix[matched_indices, matched_indices, drop = FALSE]
rownames(ld_submatrix) <- common_variants
colnames(ld_submatrix) <- common_variants
if (sum(current_ld_matrix) == 1){
ld_submatrix <- current_ld_matrix
} else {
ld_submatrix <- NULL
if (length(common_variants) > 0) {
# Only include the submatrix if this entry is unique or is the first occurrence
if (i == unified_dict[i]) {
# Check if common_variants and rownames have identical order
if (identical(common_variants, rownames(current_ld_matrix))) {
# If order is identical, use the matrix directly without reordering
ld_submatrix <- current_ld_matrix
} else {
# If order is different, reorder using matched indices
matched_indices <- match(common_variants, rownames(current_ld_matrix))
ld_submatrix <- current_ld_matrix[matched_indices, matched_indices, drop = FALSE]
rownames(ld_submatrix) <- common_variants
colnames(ld_submatrix) <- common_variants
}
}
}
}
Expand Down
Loading