diff --git a/R/colocboost.R b/R/colocboost.R index 54b0b9c..91890f1 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -157,12 +157,13 @@ colocboost <- function(X = NULL, Y = NULL, # individual data multicorrection_cut = 1, ash_prior = "normal", # only applicable if func_multicorrection = lfsr p.adjust.methods = "fdr", - pv_cutoff = 1e-4, + npc_cutoff = 0.7, check_null = 0.1, # the cut off value for change conditional objective function check_null_method = "profile", check_null_max = 0.02, residual_correlation = NULL, # sample overlap, it is diagonal if it is NULL LD_obj = FALSE, + dynamic_step = TRUE, weaker_ucos = TRUE, output_level = 1){ @@ -557,6 +558,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data func_compare = func_compare, coloc_thres = coloc_thres, LD_obj = LD_obj, + dynamic_step = dynamic_step, target_idx = target_idx, outcome_names = outcome_names) @@ -566,7 +568,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data coverage = coverage, func_intw = func_intw, alpha = alpha, - pv_cutoff = pv_cutoff, + npc_cutoff = npc_cutoff, check_null = check_null, check_null_method = check_null_method, check_null_max = check_null_max, diff --git a/R/colocboost_addhoc_utils.R b/R/colocboost_addhoc_utils.R index 1d1248d..3956528 100644 --- a/R/colocboost_addhoc_utils.R +++ b/R/colocboost_addhoc_utils.R @@ -61,8 +61,8 @@ merge_cos_ucos <- function(cb_obj, out_cos, out_ucos, coverage = 0.95, min_between <- res[1] max_between <- res[2] ave_between <- res[3] - # is.between <- ((min_between>min_abs_corr) & (abs(max_between-1) < tol)) - is.between <- (min_between>min_abs_corr) * (abs(max_between-1)between_purity) + is.between <- ((min_between>between_purity) & (abs(max_between-1) < tol)) + # is.between <- (min_between>min_abs_corr) * (abs(max_between-1)between_purity) if (is.between){ is_overlap = c(is_overlap, j) # -- add weight @@ -154,7 +154,8 @@ merge_ucos <- function(cb_obj, past_out, ave_between[i.between, j.between] <- ave_between[j.between, i.between] <- res[3] } } - is.between <- (min_between>min_abs_corr) * (abs(max_between-1)between_purity) + # is.between <- (min_between>min_abs_corr) * (abs(max_between-1)between_purity) + is.between <- ((min_between>between_purity) & (abs(max_between-1) < tol)) if (sum(is.between) != 0){ temp <- sapply(1:nrow(is.between), function(x){ tt <- c(x, which(is.between[x,] != 0)) diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R index 96d0ed6..9de7f39 100644 --- a/R/colocboost_assemble.R +++ b/R/colocboost_assemble.R @@ -26,7 +26,7 @@ colocboost_assemble <- function(cb_obj, check_null = 0.1, check_null_method = "profile", check_null_max=2e-5, - pv_cutoff = 1e-4, + npc_cutoff = 0.7, dedup = TRUE, overlap = TRUE, n_purity = 100, @@ -193,10 +193,7 @@ colocboost_assemble <- function(cb_obj, # - colocalization results cb_obj$cb_model_para$alpha <- alpha cb_obj$cb_model_para$coverage <- coverage - cos_results <- get_cos_details(cb_obj, coloc_out = past_out$cos$cos, data_info = data_info) - if (!is.null(pv_cutoff) & !is.null(cos_results$cos_results)){ - cos_results <- cos_pvalue_filter(cos_results, data_info = data_info, pv_cutoff = pv_cutoff) - } + cos_results <- get_cos_details(cb_obj, coloc_out = past_out$cos$cos, data_info = data_info, npc_cutoff = npc_cutoff) cb_output <- list("vcp" = cos_results$vcp, "cos_details" = cos_results$cos_results, "data_info" = data_info, @@ -210,7 +207,8 @@ colocboost_assemble <- function(cb_obj, # - save model and all coloc and single information for diagnostic if (output_level != 1){ - tmp <- get_full_output(cb_obj = cb_obj, past_out = past_out, variables = data_info$variables, cb_output = cb_output) + tmp <- get_full_output(cb_obj = cb_obj, past_out = past_out, variables = data_info$variables, + cb_output = cb_output) if (output_level == 2){ cb_output <- c(cb_output, list("ucos_details" = tmp$ucos_details)) cb_output <- cb_output[c("cos_summary", "vcp", "cos_details", "data_info", "model_info", "ucos_details")] diff --git a/R/colocboost_assemble_cos.R b/R/colocboost_assemble_cos.R index fb2156f..f25b1ad 100644 --- a/R/colocboost_assemble_cos.R +++ b/R/colocboost_assemble_cos.R @@ -180,8 +180,8 @@ colocboost_assemble_cos <- function(cb_obj, } else { - av <- lapply(coloc_outcomes, function(i){ - get_avWeigth(cb_model, i, update, pos.coloc[pos_temp_coloc_each]) + av <- lapply(coloc_outcomes, function(ii){ + get_avWeigth(cb_model, ii, update, pos.coloc[pos_temp_coloc_each]) }) weight_coloc <- do.call(cbind, av) diff --git a/R/colocboost_output.R b/R/colocboost_output.R index 075ddf4..7f2899f 100644 --- a/R/colocboost_output.R +++ b/R/colocboost_output.R @@ -75,7 +75,7 @@ get_data_info <- function(cb_obj){ #' @noRd #' @keywords cb_post_inference -get_cos_details <- function(cb_obj, coloc_out, data_info = NULL, pv_cutoff = 1e-4){ +get_cos_details <- function(cb_obj, coloc_out, data_info = NULL, npc_cutoff = 0.7){ if (is.null(data_info)) data_info <- get_data_info(cb_obj) @@ -85,7 +85,12 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL, pv_cutoff = 1e- coloc_sets <- coloc_out$cos if (length(coloc_sets)!=0){ - # - colocalized outcomes + # - colocalization outcome configurations + tmp <- get_cos_evidence(cb_obj, coloc_out, data_info, npc_cutoff = npc_cutoff) + normalization_evidence <- tmp$normalization_evidence + npc <- tmp$npc + + # - colocalized outcomes analysis_outcome <- cb_obj$cb_model_para$outcome_names coloc_outcome_index <- coloc_outcome <- list() colocset_names <- c() @@ -98,6 +103,7 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL, pv_cutoff = 1e- } } names(coloc_outcome) <- names(coloc_outcome_index) <- colocset_names + names(npc) <- names(normalization_evidence) <- colocset_names coloc_outcomes <- list("outcome_index" = coloc_outcome_index, "outcome_name" = coloc_outcome) # - colocalized sets for variables @@ -113,12 +119,12 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL, pv_cutoff = 1e- colnames(cs_change) <- analysis_outcome # - VCP - int_weight <- lapply(coloc_out$avWeight, get_integrated_weight, alpha = cb_obj$cb_model_para$alpha) - names(int_weight) <- colocset_names - int_weight <- lapply(int_weight, function(inw) { - pos <- match(data_info$variables, cb_obj$cb_model_para$variables) - return(inw[pos]) + cos_weights <- lapply(coloc_out$avWeight, function(w){ + pos <- match(data_info$variables, cb_obj$cb_model_para$variables) + return(w[pos, , drop = FALSE]) }) + int_weight <- lapply(cos_weights, get_integrated_weight, alpha = cb_obj$cb_model_para$alpha) + names(int_weight) <- names(cos_weights) <- colocset_names vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight),1,prod)) names(vcp) <- data_info$variables @@ -144,7 +150,7 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL, pv_cutoff = 1e- rownames(coloc_hits) <- coloc_hits_names # - purity - ncos <- length(coloc_sets) + ncos <- length(coloc_csets$cos_index) if (ncos >= 2){ empty_matrix <- matrix(NA, ncos, ncos) colnames(empty_matrix) <- rownames(empty_matrix) <- colocset_names @@ -154,8 +160,8 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL, pv_cutoff = 1e- }) for (i in 1:(ncos-1)){ for (j in (i+1):ncos){ - cset1 <- coloc_sets[[i]] - cset2 <- coloc_sets[[j]] + cset1 <- coloc_csets$cos_index[[i]] + cset2 <- coloc_csets$cos_index[[j]] y.i <- coloc_outcomes$outcome_index[[i]] y.j <- coloc_outcomes$outcome_index[[j]] yy <- unique(y.i, y.j) @@ -188,9 +194,11 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL, pv_cutoff = 1e- coloc_results <- list("cos" = coloc_csets, "cos_outcomes" = coloc_outcomes, "cos_vcp" = int_weight, + "cos_npc" = npc, "cos_purity" = csets_purity, "cos_top_variables" = coloc_hits, - "cos_outcomes_delta" = cs_change) + "cos_outcomes_npc" = normalization_evidence, + "cos_weights" = cos_weights) # - missing variable and warning message @@ -270,7 +278,8 @@ get_model_info <- function(cb_obj, outcome_names = NULL){ #' #' @noRd #' @keywords cb_post_inference -get_cos_summary <- function(cb_output, outcome_names = NULL, gene_name = NULL, target_outcome = NULL){ +get_cos_summary <- function(cb_output, outcome_names = NULL, gene_name = NULL, + target_outcome = NULL){ coloc_csets <- cb_output$cos_details$cos$cos_index if (length(coloc_csets) != 0){ @@ -284,21 +293,22 @@ get_cos_summary <- function(cb_output, outcome_names = NULL, gene_name = NULL, t } vcp <- as.numeric(cb_output$vcp) - summary_table <- matrix(NA, nrow = length(coloc_sets), ncol = 10) + summary_table <- matrix(NA, nrow = length(coloc_sets), ncol = 11) colnames(summary_table) <- c("target_outcome", "colocalized_outcomes", "cos_id", "purity", - "top_variable", "top_variable_vcp", "n_variables", "colocalized_index", - "colocalized_variables", "colocalized_variables_vcp") + "top_variable", "top_variable_vcp", "cos_npc", "n_variables", + "colocalized_index", "colocalized_variables", "colocalized_variables_vcp") summary_table <- as.data.frame(summary_table) summary_table[,1] <- FALSE summary_table[,2] <- unlist(sapply(coloc_outcome, function(tmp) paste0(tmp, collapse = "; "))) summary_table[,3] <- names(coloc_sets) summary_table[,4] <- as.numeric(diag(as.matrix(cb_output$cos_details$cos_purity$min_abs_cor))) summary_table[,5] <- unlist(sapply(cb_output$cos_details$cos$cos_variables, function(tmp) tmp[1])) - summary_table[,6] <- sapply(coloc_sets, function(tmp) max(vcp[tmp])) - summary_table[,7] <- as.numeric(sapply(coloc_sets, length)) - summary_table[,8] <- unlist(sapply(coloc_sets, function(tmp) paste0(tmp, collapse = "; "))) - summary_table[,9] <- unlist(sapply(cb_output$cos_details$cos$cos_variables, function(tmp) paste0(tmp, collapse = "; "))) - summary_table[,10] <- unlist(sapply(coloc_sets, function(tmp) paste0(vcp[tmp], collapse = "; "))) + summary_table[,6] <- sapply(coloc_sets, function(tmp) max(vcp[tmp])) + summary_table[,7] <- as.numeric(cb_output$cos_details$cos_npc) + summary_table[,8] <- as.numeric(sapply(coloc_sets, length)) + summary_table[,9] <- unlist(sapply(coloc_sets, function(tmp) paste0(tmp, collapse = "; "))) + summary_table[,10] <- unlist(sapply(cb_output$cos_details$cos$cos_variables, function(tmp) paste0(tmp, collapse = "; "))) + summary_table[,11] <- unlist(sapply(coloc_sets, function(tmp) paste0(vcp[tmp], collapse = "; "))) if (!is.null(gene_name)){ summary_table$gene_name <- gene_name } if (!is.null(target_outcome)){ @@ -325,7 +335,7 @@ get_cos_summary <- function(cb_output, outcome_names = NULL, gene_name = NULL, t #' @noRd #' @keywords cb_post_inference -get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output = NULL){ +get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output = NULL, weaker_ucos = FALSE){ cb_model <- cb_obj$cb_model cb_model_para <- cb_obj$cb_model_para @@ -367,6 +377,7 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output out_ucos <- past_out$ucos # - single sets if (!is.null(out_ucos$ucos_each)){ + out_ucos$ucos_each <- lapply(out_ucos$ucos_each, function(cs){ match(cb_model_para$variables[cs], variables) }) @@ -402,106 +413,128 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output change_values <- diag(as.matrix(cs_change[index_change$row, index_change$col])) cs_change <- data.frame("ucos_outcome" = change_outcomes, "ucos_delta" = change_values) - # - ucos_weight - specific_w <- lapply(1:ncol(out_ucos$avW_ucos_each), function(ii) out_ucos$avW_ucos_each[,ii,drop=FALSE]) - names(specific_w) <- specific_cs_names - - # - hits variables in each csets - cs_hits <- sapply(1:length(specific_w), function(jj){ inw=specific_w[[jj]]; sample(which(inw == max(inw)),1) }) - cs_hits_variablenames <- sapply(cs_hits, function(ch) variables[ch] ) - specific_cs_hits <- data.frame("top_index" = cs_hits, "top_variables" = cs_hits_variablenames) # save - rownames(specific_cs_hits) <- specific_cs_names - - # - purity - nucos <- length(out_ucos$ucos_each) - if (nucos >= 2){ - empty_matrix <- matrix(NA, nucos, nucos) - colnames(empty_matrix) <- rownames(empty_matrix) <- specific_cs_names - specific_cs_purity <- lapply(1:3, function(ii){ - diag(empty_matrix) <- out_ucos$purity_each[,ii] - return(empty_matrix) - }) - for (i in 1:(nucos-1)){ - for (j in (i+1):nucos){ - cset1 <- out_ucos$ucos_each[[i]] - cset2 <- out_ucos$ucos_each[[j]] - y.i <- specific_outcomes$outcome_index[[i]] - y.j <- specific_outcomes$outcome_index[[j]] - yy <- unique(y.i, y.j) - res <- list() - flag <- 1 - for (ii in yy){ - X_dict <- cb_obj$cb_data$dict[ii] - res[[flag]] <- get_between_purity(cset1, cset2, X = cb_obj$cb_data$data[[X_dict]]$X, - Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - N = cb_obj$cb_data$data[[ii]]$N, - miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, - P = cb_obj$cb_model_para$P) - flag <- flag + 1 + # - filter weak ucos + check_null_max <- sapply(cb_model, function(cb) cb$check_null_max) + remove_weak <- sapply(1:nrow(cs_change), function(ic){ + outcome_tmp <- cs_change$ucos_outcome[ic] + delta_tmp <- cs_change$ucos_delta[ic] + pp <- which(cb_obj$cb_model_para$outcome_names == outcome_tmp) + check_tmp <- check_null_max[pp] + delta_tmp >= check_tmp + }) + keep_ucos <- which(remove_weak) + if (length(keep_ucos)==0){ + specific_results <- NULL + } else { + specific_outcomes$outcome_index <- specific_outcomes$outcome_index[keep_ucos] + specific_outcomes$outcome_name <- specific_outcomes$outcome_name[keep_ucos] + specific_css$ucos_index <- specific_css$ucos_index[keep_ucos] + specific_css$ucos_variables <- specific_css$ucos_variables[keep_ucos] + cs_change <- cs_change[keep_ucos, , drop = FALSE] + out_ucos$avW_ucos_each <- out_ucos$avW_ucos_each[, keep_ucos, drop = FALSE] + specific_cs_names <- specific_cs_names[keep_ucos] + out_ucos$purity_each <- out_ucos$purity_each[keep_ucos, , drop = FALSE] + + # - ucos_weight + specific_w <- lapply(1:ncol(out_ucos$avW_ucos_each), function(ii) out_ucos$avW_ucos_each[,ii,drop=FALSE]) + names(specific_w) <- specific_cs_names + + # - hits variables in each csets + cs_hits <- sapply(1:length(specific_w), function(jj){ inw=specific_w[[jj]]; sample(which(inw == max(inw)),1) }) + cs_hits_variablenames <- sapply(cs_hits, function(ch) variables[ch] ) + specific_cs_hits <- data.frame("top_index" = cs_hits, "top_variables" = cs_hits_variablenames) # save + rownames(specific_cs_hits) <- specific_cs_names + + # - purity + nucos <- length(specific_css$ucos_index) + if (nucos >= 2){ + empty_matrix <- matrix(NA, nucos, nucos) + colnames(empty_matrix) <- rownames(empty_matrix) <- specific_cs_names + specific_cs_purity <- lapply(1:3, function(ii){ + diag(empty_matrix) <- out_ucos$purity_each[,ii] + return(empty_matrix) + }) + for (i in 1:(nucos-1)){ + for (j in (i+1):nucos){ + cset1 <- specific_css$ucos_index[[i]] + cset2 <- specific_css$ucos_index[[j]] + y.i <- specific_outcomes$outcome_index[[i]] + y.j <- specific_outcomes$outcome_index[[j]] + yy <- unique(y.i, y.j) + res <- list() + flag <- 1 + for (ii in yy){ + X_dict <- cb_obj$cb_data$dict[ii] + res[[flag]] <- get_between_purity(cset1, cset2, X = cb_obj$cb_data$data[[X_dict]]$X, + Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, + N = cb_obj$cb_data$data[[ii]]$N, + miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, + P = cb_obj$cb_model_para$P) + flag <- flag + 1 + } + res <- Reduce(pmax, res) + specific_cs_purity <- lapply(1:3, function(ii){ + specific_cs_purity[[ii]][i,j] <- specific_cs_purity[[ii]][j,i] <- res[ii] + return(specific_cs_purity[[ii]]) + }) } - res <- Reduce(pmax, res) - specific_cs_purity <- lapply(1:3, function(ii){ - specific_cs_purity[[ii]][i,j] <- specific_cs_purity[[ii]][j,i] <- res[ii] - return(specific_cs_purity[[ii]]) - }) } + names(specific_cs_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") + + } else { + specific_cs_purity <- out_ucos$purity_each + rownames(specific_cs_purity) <- specific_cs_names } - names(specific_cs_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") - } else { - specific_cs_purity <- out_ucos$purity_each - rownames(specific_cs_purity) <- specific_cs_names - } - - # - cos&ucos purity - cos <- cb_output$cos_details$cos$cos_index - ucos <- - ncos <- length(cos) - if (ncos!=0){ - empty_matrix <- matrix(NA, ncos, nucos) - colnames(empty_matrix) <- specific_cs_names - rownames(empty_matrix) <- names(cos) - cos_ucos_purity <- lapply(1:3, function(ii) empty_matrix ) - for (i in 1:ncos){ - for (j in 1:nucos){ - cset1 <- cos[[i]] - cset2 <- specific_cs_variableidx[[j]] - y.i <- cb_output$cos_details$cos_outcomes$outcome_index[[i]] - y.j <- specific_outcomes$outcome_index[[j]] - yy <- unique(y.i, y.j) - res <- list() - flag <- 1 - for (ii in yy){ - X_dict <- cb_obj$cb_data$dict[ii] - res[[flag]] <- get_between_purity(cset1, cset2, X = cb_obj$cb_data$data[[X_dict]]$X, - Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, - N = cb_obj$cb_data$data[[ii]]$N, - miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, - P = cb_obj$cb_model_para$P) - flag <- flag + 1 + # - cos&ucos purity + cos <- cb_output$cos_details$cos$cos_index + ncos <- length(cos) + if (ncos!=0){ + empty_matrix <- matrix(NA, ncos, nucos) + colnames(empty_matrix) <- specific_cs_names + rownames(empty_matrix) <- names(cos) + cos_ucos_purity <- lapply(1:3, function(ii) empty_matrix ) + for (i in 1:ncos){ + for (j in 1:nucos){ + cset1 <- cos[[i]] + cset2 <- specific_css$ucos_index[[j]] + y.i <- cb_output$cos_details$cos_outcomes$outcome_index[[i]] + y.j <- specific_outcomes$outcome_index[[j]] + yy <- unique(y.i, y.j) + res <- list() + flag <- 1 + for (ii in yy){ + X_dict <- cb_obj$cb_data$dict[ii] + res[[flag]] <- get_between_purity(cset1, cset2, X = cb_obj$cb_data$data[[X_dict]]$X, + Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, + N = cb_obj$cb_data$data[[ii]]$N, + miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss, + P = cb_obj$cb_model_para$P) + flag <- flag + 1 + } + res <- Reduce(pmax, res) + cos_ucos_purity <- lapply(1:3, function(ii){ + cos_ucos_purity[[ii]][i,j] <- res[ii] + return(cos_ucos_purity[[ii]]) + }) } - res <- Reduce(pmax, res) - cos_ucos_purity <- lapply(1:3, function(ii){ - cos_ucos_purity[[ii]][i,j] <- res[ii] - return(cos_ucos_purity[[ii]]) - }) } + names(cos_ucos_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") + } else { + cos_ucos_purity <- NULL } - names(cos_ucos_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor") - } else { - cos_ucos_purity <- NULL + + + # - save coloc_results + specific_results <- list("ucos" = specific_css, + "ucos_outcomes" = specific_outcomes, + "ucos_weight" = specific_w, + "ucos_top_variables" = specific_cs_hits, + "ucos_purity" = specific_cs_purity, + "cos_ucos_purity" = cos_ucos_purity, + "ucos_outcomes_delta" = cs_change) } - - # - save coloc_results - specific_results <- list("ucos" = specific_css, - "ucos_outcomes" = specific_outcomes, - "ucos_weight" = specific_w, - "ucos_top_variables" = specific_cs_hits, - "ucos_purity" = specific_cs_purity, - "cos_ucos_purity" = cos_ucos_purity, - "ucos_outcomes_delta" = cs_change) - } else { specific_results <- NULL } @@ -562,7 +595,7 @@ get_summary_table_fm <- function(cb_output, outcome_names = NULL, gene_name = NU -cos_pvalue_filter <- function(cos_results, data_info = NULL, pv_cutoff = 1e-4){ +cos_pvalue_filter <- function(cos_results, data_info = NULL, pvalue_cutoff = 1e-4){ if (is.null(data_info)) data_info <- get_data_info(cb_obj) @@ -583,7 +616,7 @@ cos_pvalue_filter <- function(cos_results, data_info = NULL, pv_cutoff = 1e-4){ pv <- pchisq(z^2, 1, lower.tail = FALSE) min(pv) }) - pp <- which(minPV < 1e-4) + pp <- which(minPV < pvalue_cutoff) if (length(pp) == 0 | length(pp) == 1) { pp_remove <- c(pp_remove, i) next @@ -598,7 +631,6 @@ cos_pvalue_filter <- function(cos_results, data_info = NULL, pv_cutoff = 1e-4){ cos_results$cos_results$cos_outcomes$outcome_index[i]) filtered_traits$outcome_name <- c(filtered_traits$outcome_name, cos_results$cos_results$cos_outcomes$outcome_name[i]) - } else { filtered_traits$outcome_index <- c(filtered_traits$outcome_index, list(cos_results$cos_results$cos_outcomes$outcome_index[[i]][pp])) @@ -635,6 +667,7 @@ cos_pvalue_filter <- function(cos_results, data_info = NULL, pv_cutoff = 1e-4){ cos_results$cos_results$cos_purity$min_abs_cor <- as.matrix(cos_results$cos_results$cos_purity$min_abs_cor)[-pp_remove, -pp_remove, drop=FALSE] cos_results$cos_results$cos_purity$median_abs_cor <- as.matrix(cos_results$cos_results$cos_purity$median_abs_cor)[-pp_remove, -pp_remove, drop=FALSE] cos_results$cos_results$cos_purity$max_abs_cor <- as.matrix(cos_results$cos_results$cos_purity$max_abs_cor)[-pp_remove, -pp_remove, drop=FALSE] + cos_results$cos_results$cos_PPC <- cos_results$cos_results$cos_PPC[-pp_remove] } } @@ -643,3 +676,179 @@ cos_pvalue_filter <- function(cos_results, data_info = NULL, pv_cutoff = 1e-4){ } +colocboost_get_npc_configs <- function(cb_results, npc_cutoff = 0.7, alpha = 1.5, coverage = 0.95){ + + if (is.null(cb_results$cos_details)){ + warnings("No colocalization results in this region!") + return(cb_results) + } + if (npc_cutoff == 0){ + warnings("All possible colocalization events are reported regardless of their relative evidence compared to uncolocalized events (npc_cutoff = 0).") + return(cb_results) + } + message(paste("Extracting colocalization results with npc_cutoff =", npc_cutoff, ".\n", + "For each CoS, keep the outcomes configurations that the npc_outcome >", npc_cutoff, ".")) + + remove_cos <- function(cb_results, remove_idx = NULL){ + if (length(remove_idx)==0){ + return(cb_results) + } + ncos <- length(cos_details$cos_top_variables) + if (length(remove_idx) == ncos){ + cb_results$vcp <- NULL + cb_results$cos_details <- NULL + cb_results <- c(cb_results, list(vcp = NULL, cos_details = NULL)) + return(cb_results) + } + cos_details <- cb_results$cos_details + cos_details$cos_top_variables <- cos_details$cos_top_variables[-remove_idx, , drop = FALSE] + cos_details$cos$cos_index <- cos_details$cos$cos_index[-remove_idx] + cos_details$cos$cos_variables <- cos_details$cos$cos_variables[-remove_idx] + cos_details$cos_outcomes$outcome_index <- cos_details$cos_outcomes$outcome_index[-remove_idx] + cos_details$cos_outcomes$outcome_name <- cos_details$cos_outcomes$outcome_name[-remove_idx] + cos_details$cos_vcp <- cos_details$cos_vcp[-remove_idx] + cos_details$cos_weights <- cos_details$cos_weights[-remove_idx] + cos_details$cos_npc <- cos_details$cos_npc[-remove_idx] + cos_details$cos_purity$min_abs_cor <- as.matrix(cos_details$cos_purity$min_abs_cor)[-remove_idx, -remove_idx, drop=FALSE] + cos_details$cos_purity$median_abs_cor <- as.matrix(cos_details$cos_purity$median_abs_cor)[-remove_idx, -remove_idx, drop=FALSE] + cos_details$cos_purity$max_abs_cor <- as.matrix(cos_details$cos_purity$max_abs_cor)[-remove_idx, -remove_idx, drop=FALSE] + vcp <- as.vector(1 - apply(1 - do.call(cbind, cos_details$cos_vcp),1,prod)) + names(vcp) <- cb_results$data_info$variables + cb_results$vcp <- vcp + cb_results$cos_details <- cos_details + return(cb_results) + } + + cos_details <- cb_results$cos_details + + coloc_outcome_index <- coloc_outcome <- list() + colocset_names <- c() + for (i in 1:length(cos_details$cos$cos_index)){ + cos_npc_config <- cos_details$cos_normalization_evidence[[i]] + npc_outcome <- cos_npc_config$npc_outcome + pos_pass <- which(npc_outcome>=npc_cutoff) + if (length(pos_pass) == 0){ + coloc_outcome_index[[i]] <- 0 + coloc_outcome[[i]] <- 0 + colocset_names[i] <- paste0("remove", i) + } else { + coloc_outcome_index[[i]] <- sort(cos_npc_config$outcomes_index[pos_pass]) + coloc_outcome[[i]] <- rownames(cos_npc_config)[pos_pass] + colocset_names[i] <- paste0("cos", i, ":", paste0(paste0("y", coloc_outcome_index[[i]]), collapse = "_")) + if (grepl("merged", names(cos_details$cos$cos_index)[i])) { + colocset_names[i] <- paste0(colocset_names[i], ":merged") + } + } + + } + names(coloc_outcome) <- names(coloc_outcome_index) <- colocset_names + cos_details$cos_outcomes <- list("outcome_index" = coloc_outcome_index, "outcome_name" = coloc_outcome) + + # - VCP + cos_weights <- lapply(1:length(cos_details$cos_outcomes$outcome_index), function(idx){ + w <- cos_details$cos_weights[[idx]] + config_idx <- cos_details$cos_outcomes$outcome_index[[idx]] + if (length(config_idx)==1){ + if (config_idx == 0){ + return(matrix(1, nrow=nrow(cos_details$cos_weights[[idx]]))) + } + } + w_outcome <- colnames(w) + config_outcome <- paste0("outcome", config_idx) + pos <- which(w_outcome %in% config_outcome) + w[, pos, drop=FALSE] + }) + cos_details$cos_weights <- cos_weights + int_weight <- lapply(cos_weights, get_integrated_weight, alpha = alpha) + names(int_weight) <- names(cos_weights) <- colocset_names + vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight),1,prod)) + names(vcp) <- cb_results$data_info$variables + cb_results$vcp <- vcp + cos_details$cos_vcp = int_weight + + # - resummary results + cos_re_idx <- lapply(int_weight, function(w){ unlist(get_in_csets(w, coverage = coverage))}) + cos_re_var <- lapply(cos_re_idx, function(idx){ cb_results$data_info$variables[idx] }) + coloc_csets <- list("cos_index" = cos_re_idx, "cos_variables" = cos_re_var) + cos_details$cos <- coloc_csets + + # - hits variables in each csets + coloc_hits <- coloc_hits_variablenames <- coloc_hits_names <- c() + for (i in 1:length(int_weight)){ + inw <- int_weight[[i]] + if (length(unique(inw))==1){ + coloc_hits <- c(coloc_hits, 0) + coloc_hits_names <- c(coloc_hits_names, names(int_weight)[i]) + } else { + pp <- which(inw == max(inw)) + coloc_hits <- c(coloc_hits, pp) + coloc_hits_variablenames <- c(coloc_hits_variablenames, cb_results$data_info$variables[pp]) + if (length(pp)==1){ + coloc_hits_names <- c(coloc_hits_names, names(int_weight)[i]) + } else { + coloc_hits_names <- c(coloc_hits_names, paste0(names(int_weight)[i], ".", 1:length(pp))) + } + } + } + coloc_hits <- data.frame("top_index" = coloc_hits, "top_variables" = coloc_hits_variablenames) + rownames(coloc_hits) <- coloc_hits_names + cos_details$cos_top_variables <- coloc_hits + cb_results$cos_details <- cos_details + + + # remove CoS does not include outcomes pass npc_cutoff + remove <- grep("remove", colocset_names) + cb_results <- remove_cos(cb_results, remove_idx = remove) + cos_details <- cb_results$cos_details + + # remove CoS only with one trait + n_outcome <- sapply(cos_details$cos_outcomes$outcome_index, length) + single <- which(n_outcome == 1) + if (length(single)==length(n_outcome)){ + + # - all remaining the single outcome + cb_results$cos_details <- cb_results$vcp <- NULL + cb_results <- c(cb_results, list(vcp = NULL, cos_details = NULL)) + + } else if (length(single)!=0 & length(single)!=length(n_outcome)){ + + # - partial remaining the single outcome + ucos_from_cos <- list("ucos" = list("ucos_index" = cos_details$cos$cos_index[single], + "ucos_variables" = cos_details$cos$cos_variables[single]), + "ucos_outcomes" = list("outcome_idx" = cos_details$cos_outcomes$outcome_index[single], + "outcome_name" = cos_details$cos_outcomes$outcome_name[single]), + "ucos_weight" = cos_details$cos_weights[single], + "ucos_purity" = list("min_abs_cor" = as.matrix(cos_details$cos_purity$min_abs_cor)[single, single, drop=FALSE], + "median_abs_cor" = as.matrix(cos_details$cos_purity$median_abs_cor)[single, single, drop=FALSE], + "max_abs_cor" = as.matrix(cos_details$cos_purity$max_abs_cor)[single, single, drop=FALSE]), + "ucos_top_variables" = cos_details$cos_top_variables[single, , drop = FALSE]) + cb_results$ucos_from_cos <- ucos_from_cos + cb_results <- remove_cos(cb_results, remove_idx = single) + + } + + # - refine and output + target_outcome <- NULL + target_idx <- which(cb_results$data_info$outcome_info$is_target) + if (length(target_idx)!=0){ + target_outcome <- cb_results$data_info$outcome_info$outcome_names[target_idx] + } + cb_results$cos_summary <- get_cos_summary(cb_results, target_outcome = target_outcome) + + return(cb_results) + +} + + + + + + + + + + + + + + diff --git a/R/colocboost_update.R b/R/colocboost_update.R index 3682fac..cc143af 100644 --- a/R/colocboost_update.R +++ b/R/colocboost_update.R @@ -13,7 +13,8 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data, func_prior = "z2z", lambda = 0.5, lambda_target = 1, - LD_obj = FALSE){ + LD_obj = FALSE, + dynamic_step = TRUE){ # - clear which outcome need to be updated at which jk pos.update <- which(cb_model_para$update_temp$update_status != 0) @@ -82,10 +83,14 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data, ########## BEGIN: MAIN UPDATE ###################### # - Gradient ascent on beta beta_grad <- weights * sign(cb_model[[i]]$correlation) - if (tail(cb_model[[i]]$obj_path, n=1) > 0.5){ + if (dynamic_step){ + if (tail(cb_model[[i]]$obj_path, n=1) > 0.5){ step1 = max(0.5 * (1 / ( 1 + decayrate*(length(cb_model[[i]]$obj_path)-1) )), cb_model[[i]]$step) - } else { + } else { step1 = cb_model[[i]]$step + } + } else { + step1 = cb_model[[i]]$step } cb_model[[i]]$beta <- cb_model[[i]]$beta + step1 * beta_grad diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index 4f5db98..000bea0 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -1,5 +1,3 @@ - - get_integrated_weight <- function(avWeight, func_intw = "fun_R", alpha = 1.5){ if (func_intw == "prod"){ @@ -37,147 +35,160 @@ get_cormat <- function(X, intercepte = FALSE){ return(cr) } + check_null_post <- function(cb_obj, coloc_sets_temp, coloc_outcomes, check_null = 0.1, check_null_method = "profile", weaker_ucos = TRUE){ + + extract_last <- function(lst) { tail(lst, n = 1)} + extract_first <- function(lst) { head(lst, n = 1)} + + get_profile <- function(cs_beta, X = NULL, Y = NULL, N = NULL, + XtX = NULL, YtY = NULL, XtY = NULL, miss_idx, adj_dep = 1){ - extract_last <- function(lst) { tail(lst, n = 1)} - extract_first <- function(lst) { head(lst, n = 1)} - - get_profile <- function(cs_beta, X = NULL, Y = NULL, N = NULL, - XtX = NULL, YtY = NULL, XtY = NULL, miss_idx){ - - if (!is.null(X)){ - mean((Y-X%*%as.matrix(cs_beta))^2) *N/(N-1) - } else if (!is.null(XtY)){ - scaling_factor <- if (!is.null(N)) (N - 1) else 1 - yty <- YtY / scaling_factor - xtx <- XtX - if (length(miss_idx)!=0){ - xty <- XtY[-miss_idx] / scaling_factor - cs_beta <- cs_beta[-miss_idx] - } else { xty <- XtY / scaling_factor } - - yty - 2*sum(cs_beta*xty) + sum( (xtx %*% as.matrix(cs_beta)) * cs_beta ) - } - + if (!is.null(X)){ + mean((Y-X%*%as.matrix(cs_beta))^2) *N/(N-1) + } else if (!is.null(XtY)){ + scaling_factor <- if (!is.null(N)) (N - 1) else 1 + yty <- YtY / scaling_factor + xtx <- XtX + if (length(miss_idx)!=0){ + xty <- XtY[-miss_idx] / scaling_factor + cs_beta <- cs_beta[-miss_idx] + } else { xty <- XtY / scaling_factor } + + ( yty - 2*sum(cs_beta*xty) + sum( (xtx %*% as.matrix(cs_beta)) * cs_beta ) ) * adj_dep } - get_cs_obj <- function(cs_beta, res, tau, func_prior, lambda, adj_dep, LD_obj, - X = NULL, Y = NULL, N = NULL, - XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL){ - - correlation <- get_correlation(X = X, res = res, XtY = XtY, N = N, YtY = YtY, - XtX = XtX, beta_k = cs_beta, miss_idx = miss_idx) - z <- get_z(correlation, n=N, res) - abs_cor <- abs(correlation) - jk <- which(abs_cor == max(abs_cor)) - jk <- ifelse(length(jk) == 1, jk, sample(jk,1)) - P <- length(z) - ld_jk <- get_LD_jk(jk, X = X, XtX = XtX, N = N, - remain_idx = setdiff(1:P, miss_idx), P = P) - ld_feature <- sqrt(abs(ld_jk)) - # - calculate delta - delta <- boost_KL_delta(z = z, ld_feature = ld_feature, adj_dep=adj_dep, - func_prior = func_prior, lambda = lambda) - scaling_factor <- if (!is.null(N)) (N - 1) else 1 - cov_Xtr <- if(!is.null(X)){ - t(X)%*%res / scaling_factor - } else { - res / scaling_factor - } - obj_ld <- if (LD_obj) ld_feature else rep(1, length(ld_feature)) - obj_ld[miss_idx] <- 0 - exp_term <- adj_dep * obj_ld * (abs(cov_Xtr)) - return(tau * matrixStats::logSumExp(exp_term / tau + log(delta))) - - } + } + + get_cs_obj <- function(cs_beta, res, tau, func_prior, lambda, adj_dep, LD_obj, + X = NULL, Y = NULL, N = NULL, + XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL){ - update_res <- function(X = NULL, Y = NULL, XtX = NULL, XtY = NULL, N = NULL, cs_beta, miss_idx){ - if (!is.null(X)){ - return(Y - X%*%cs_beta) - } else if (!is.null(XtX)){ - scaling.factor <- if (!is.null(N)) N-1 else 1 - xtx <- XtX / scaling.factor - if (length(miss_idx)!=0){ - xty <- XtY[-miss_idx] / scaling.factor - res.tmp <- rep(0, length(XtY)) - res.tmp[-miss_idx] <- xty - xtx%*%cs_beta[-miss_idx] - } else { - xty <- XtY / scaling.factor - res.tmp <- xty - xtx%*%cs_beta - } - return(res.tmp) - } + correlation <- get_correlation(X = X, res = res, XtY = XtY, N = N, YtY = YtY, + XtX = XtX, beta_k = cs_beta, miss_idx = miss_idx) + z <- get_z(correlation, n=N, res) + abs_cor <- abs(correlation) + jk <- which(abs_cor == max(abs_cor)) + jk <- ifelse(length(jk) == 1, jk, sample(jk,1)) + P <- length(z) + ld_jk <- get_LD_jk(jk, X = X, XtX = XtX, N = N, + remain_idx = setdiff(1:P, miss_idx), P = P) + ld_feature <- sqrt(abs(ld_jk)) + # - calculate delta + delta <- boost_KL_delta(z = z, ld_feature = ld_feature, adj_dep=adj_dep, + func_prior = func_prior, lambda = lambda) + scaling_factor <- if (!is.null(N)) (N - 1) else 1 + cov_Xtr <- if(!is.null(X)){ + t(X)%*%res / scaling_factor + } else { + res / scaling_factor } - # - add hoc - cut <- if(length(cb_obj$cb_data)==1) 0.2 else 1 + obj_ld <- if (LD_obj) ld_feature else rep(1, length(ld_feature)) + obj_ld[miss_idx] <- 0 + exp_term <- adj_dep * obj_ld * (abs(cov_Xtr)) + return(tau * matrixStats::logSumExp(exp_term / tau + log(delta))) - # ----- null filtering - cb_data <- cb_obj$cb_data - cs_change <- check_cs_change <- matrix(0, nrow = length(coloc_sets_temp), ncol = cb_obj$cb_model_para$L) - colnames(cs_change) <- colnames(check_cs_change) <- paste0("change_obj_", 1:cb_obj$cb_model_para$L) - max_change <- matrix(0, nrow = length(coloc_sets_temp), ncol = cb_obj$cb_model_para$L) - for (i in 1:length(coloc_sets_temp)){ - cs_variants <- as.numeric(unlist(coloc_sets_temp[[i]])) - for (j in coloc_outcomes){ - cs_beta <- cb_obj$cb_model[[j]]$beta - cs_beta[cs_variants] <- 0 - X_dict <- cb_data$dict[j] - adj_dep <- cb_data$data[[j]]$dependency - if (check_null_method == "profile"){ - cs_profile <- get_profile(cs_beta, X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[j]]$Y, - XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[j]]$XtY, - YtY = cb_data$data[[j]]$YtY, N = cb_data$data[[j]]$N, - miss_idx = cb_data$data[[j]]$variable_miss) - last_profile <- extract_last(cb_obj$cb_model[[j]]$profile_loglike_each) - change <- abs(cs_profile - last_profile) - # - add hoc - if (min(cb_obj$cb_model[[j]]$multi_correction_univariate[cs_variants]) >= cut){ change <- 0 } - # ------ - # - check_null - check_cs_change[i,j] <- change / diff(range(cb_obj$cb_model[[j]]$profile_loglike_each)) - cs_change[i,j] <- change # / diff(range(cb_obj$cb_model[[j]]$profile_loglike_each)) - first_profile <- extract_first(cb_obj$cb_model[[j]]$profile_loglike_each) - max_change[i,j] <- (first_profile - last_profile) >= cb_obj$cb_model[[j]]$check_null_max - } else if (check_null_method == "obj"){ - res <- update_res(X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[j]]$Y, + } + + update_res <- function(X = NULL, Y = NULL, XtX = NULL, XtY = NULL, N = NULL, cs_beta, miss_idx){ + if (!is.null(X)){ + return(Y - X%*%cs_beta) + } else if (!is.null(XtX)){ + scaling.factor <- if (!is.null(N)) N-1 else 1 + xtx <- XtX / scaling.factor + if (length(miss_idx)!=0){ + xty <- XtY[-miss_idx] / scaling.factor + res.tmp <- rep(0, length(XtY)) + res.tmp[-miss_idx] <- xty - xtx%*%cs_beta[-miss_idx] + } else { + xty <- XtY / scaling.factor + res.tmp <- xty - xtx%*%cs_beta + } + return(res.tmp) + } + } + # - add hoc + cut <- if(length(cb_obj$cb_data)==1) 0.2 else 1 + + # ----- null filtering + cb_data <- cb_obj$cb_data + cs_change <- check_cs_change <- matrix(0, nrow = length(coloc_sets_temp), ncol = cb_obj$cb_model_para$L) + colnames(cs_change) <- colnames(check_cs_change) <- paste0("change_obj_", 1:cb_obj$cb_model_para$L) + max_change <- matrix(0, nrow = length(coloc_sets_temp), ncol = cb_obj$cb_model_para$L) + for (i in 1:length(coloc_sets_temp)){ + cs_variants <- as.numeric(unlist(coloc_sets_temp[[i]])) + for (j in coloc_outcomes){ + cs_beta <- cb_obj$cb_model[[j]]$beta + cs_beta[cs_variants] <- 0 + X_dict <- cb_data$dict[j] + adj_dep <- cb_data$data[[j]]$dependency + if (check_null_method == "profile"){ + cs_profile <- get_profile(cs_beta, X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[j]]$Y, XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[j]]$XtY, - N = cb_data$data[[j]]$N, cs_beta, - miss_idx = cb_data$data[[j]]$variable_miss) - cs_obj <- get_cs_obj(cs_beta, res, cb_obj$cb_model_para$tau, cb_obj$cb_model_para$func_prior, - cb_obj$cb_model_para$lambda, adj_dep = adj_dep, - LD_obj = cb_obj$cb_model_para$LD_obj, - X = cb_data$data[[X_dict]]$X, N = cb_data$data[[j]]$N, - XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[X_dict]]$XtY, - YtY = cb_data$data[[X_dict]]$YtY, - miss_idx = cb_data$data[[j]]$variable_miss) - last_obj <- min(cb_obj$cb_model[[j]]$obj_path) - change <- abs(cs_obj - last_obj) - if (length(cb_obj$cb_model[[j]]$obj_path)==1){ - total_obj <- 1 - } else { - total_obj <- diff(range(cb_obj$cb_model[[j]]$obj_path)) - } - check_cs_change[i,j] <- change / total_obj - cs_change[i,j] <- change - max_change[i,j] <- total_obj >= cb_obj$cb_model[[j]]$check_null_max - } + YtY = cb_data$data[[j]]$YtY, N = cb_data$data[[j]]$N, + miss_idx = cb_data$data[[j]]$variable_miss, adj_dep = adj_dep) + last_profile <- extract_last(cb_obj$cb_model[[j]]$profile_loglike_each) + change <- abs(cs_profile - last_profile) + # - add hoc + if (min(cb_obj$cb_model[[j]]$multi_correction_univariate[cs_variants]) >= cut){ change <- 0 } + # ------ + # - check_null + check_cs_change[i,j] <- change / diff(range(cb_obj$cb_model[[j]]$profile_loglike_each)) + cs_change[i,j] <- change # / diff(range(cb_obj$cb_model[[j]]$profile_loglike_each)) + first_profile <- extract_first(cb_obj$cb_model[[j]]$profile_loglike_each) + max_change[i,j] <- (first_profile - last_profile) >= cb_obj$cb_model[[j]]$check_null_max + } else if (check_null_method == "obj"){ + res <- update_res(X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[j]]$Y, + XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[j]]$XtY, + N = cb_data$data[[j]]$N, cs_beta, + miss_idx = cb_data$data[[j]]$variable_miss) + cs_obj <- get_cs_obj(cs_beta, res, cb_obj$cb_model_para$tau, cb_obj$cb_model_para$func_prior, + cb_obj$cb_model_para$lambda, adj_dep = adj_dep, + LD_obj = cb_obj$cb_model_para$LD_obj, + X = cb_data$data[[X_dict]]$X, N = cb_data$data[[j]]$N, + XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[X_dict]]$XtY, + YtY = cb_data$data[[X_dict]]$YtY, + miss_idx = cb_data$data[[j]]$variable_miss) + last_obj <- min(cb_obj$cb_model[[j]]$obj_path) + change <- abs(cs_obj - last_obj) + if (length(cb_obj$cb_model[[j]]$obj_path)==1){ + total_obj <- 1 + } else { + total_obj <- diff(range(cb_obj$cb_model[[j]]$obj_path)) } + check_cs_change[i,j] <- change / total_obj + cs_change[i,j] <- change + max_change[i,j] <- total_obj >= cb_obj$cb_model[[j]]$check_null_max + } } - if (!weaker_ucos){ - check_cs_change <- cs_change - check_null <- cb_obj$cb_model[[j]]$check_null_max - } - cs_change <- as.data.frame(cs_change) - is_non_null <- which(rowSums( (check_cs_change >= check_null) * max_change ) != 0) - - ll = list("cs_change" = cs_change, - "is_non_null" = is_non_null) - return(ll) + } + if (!weaker_ucos){ + check_cs_change <- cs_change + check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) cb_obj$cb_model[[j]]$check_null_max) + } else { check_null_tmp <- rep(check_null, cb_obj$cb_model_para$L) } + # check_tmp <- sapply(1:nrow(check_cs_change), function(ii) { + # check_cs_tmp <- check_cs_change[ii, ] > check_null + # check_max_change <- max_change[ii, ] + # pos <- which(check_max_change!=0) + # min(check_cs_tmp[pos]) + # }) + cs_change <- as.data.frame(cs_change * max_change) # changed + for (ii in 1:nrow(check_cs_change)){ + cs_tmp <- check_cs_change[ii,] + check_cs_change[ii,] <- (cs_tmp>check_null_tmp) + } + is_non_null <- which(rowSums( check_cs_change * max_change ) != 0) + # is_non_null <- which(rowSums( (cs_change >= check_null) * (max_change >= check_null_max) ) != 0) + # is_non_null <- which(rowSums(cs_change >= check_null) != 0) + + ll = list("cs_change" = cs_change, + "is_non_null" = is_non_null) + return(ll) } @@ -328,6 +339,104 @@ get_between_purity = function (pos1, pos2, X=NULL, Xcorr=NULL, N = NULL, miss_id } +get_cos_evidence <- function(cb_obj, coloc_out, data_info, npc_cutoff = 0.7){ + + get_cos_config <- function(w, config_idx, alpha = 1.5, coverage = 0.95){ + + w_outcome <- colnames(w) + config_outcome <- paste0("outcome", config_idx) + pos <- which(w_outcome %in% config_outcome) + w_config <- w[, pos, drop=FALSE] + int_w <- get_integrated_weight(w_config, alpha = alpha) + unlist(get_in_csets(int_w, coverage = coverage)) + + } + + get_cos_profile <- function(cs_beta, outcome_idx, X = NULL, Y = NULL, N = NULL, + XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL, adj_dep = 1){ + + if (!is.null(X)){ + cos_profile <- mean((Y-X%*%as.matrix(cs_beta))^2) *N/(N-1) + yty <- var(Y) + } else if (!is.null(XtY)){ + scaling_factor <- if (!is.null(N)) (N - 1) else 1 + yty <- YtY / scaling_factor + xtx <- XtX + if (length(miss_idx)!=0){ + xty <- XtY[-miss_idx] / scaling_factor + cs_beta <- cs_beta[-miss_idx] + } else { xty <- XtY / scaling_factor } + + cos_profile <- ( yty - 2*sum(cs_beta*xty) + sum( (xtx %*% as.matrix(cs_beta)) * cs_beta ) ) * adj_dep + } + delta <- yty - cos_profile + if (delta <= 0){ + warning(paste("Warning message: potential sumstat & LD mismatch may happens for outcome", outcome_idx, + ". Using logLR = CoS(profile) - max(profile). Please check our website.")) + } + cos_profile + } + + # - + get_outcome_profile_change <- function(outcome_idx, cos, cb_obj, check_null_max){ + extract_last <- function(lst) { tail(lst, n = 1)} + cb_data <- cb_obj$cb_data + cs_beta <- rep(0, cb_obj$cb_model_para$P) + cs_beta[cos] <- cb_obj$cb_model[[outcome_idx]]$beta[cos] + X_dict <- cb_data$dict[outcome_idx] + cos_profile <- get_cos_profile(cs_beta, outcome_idx, + X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[outcome_idx]]$Y, + XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[outcome_idx]]$XtY, + YtY = cb_data$data[[outcome_idx]]$YtY, N = cb_data$data[[outcome_idx]]$N, + miss_idx = cb_data$data[[outcome_idx]]$variable_miss, + adj_dep = cb_data$data[[outcome_idx]]$dependency) + max_profile <- max(cb_obj$cb_model[[outcome_idx]]$profile_loglike_each) + ifelse(max_profile < cos_profile, 0, max_profile - cos_profile) + } + + # - calculate best configuration likelihood explained by minimal configuration + get_normalization_evidence <- function(profile_change, null_max, outcomes, outcome_names, npc_cutoff = 0.7) { + # Define the exponential likelihood ratio normalization (ELRN) + logLR_normalization <- function(ratio) { 1 - exp( - 2*ratio ) } + + ratio <- profile_change / null_max + prob <- logLR_normalization(ratio) + df <- data.frame(outcomes_index = outcomes, relative_logLR = ratio, npc_outcome = prob) + rownames(df) <- outcome_names[outcomes] + sorted_df <- df[order(-df$relative_logLR), ] + sorted_df$strong_effect <- sorted_df$relative_logLR >= 1 + sorted_df$suggestive_effect <- sorted_df$npc_outcome >= npc_cutoff + return(sorted_df) + } + + avWeight <- coloc_out$avWeight + cs_change <- coloc_out$cs_change + check_null_max <- sapply(cb_obj$cb_model, function(cb) cb$check_null_max) + outcome_names <- data_info$outcome_info$outcome_names + n_cos <- length(avWeight) + npc <- c() # normalized probability for / of colocalization + normalization_evidence <- list() + for (i in 1:n_cos){ + w <- avWeight[[i]] + outcomes <- coloc_out$coloc_outcomes[[i]] + # most likely cos + cos <- get_cos_config(w, outcomes,alpha = cb_obj$cb_model_para$alpha,coverage = cb_obj$cb_model_para$coverage) + profile_change_outcome <- sapply(outcomes, function(outcome_idx){ + get_outcome_profile_change(outcome_idx, cos, cb_obj, check_null_max) + }) + normalization_evidence[[i]] <- get_normalization_evidence(profile_change = profile_change_outcome, + null_max = check_null_max[outcomes], + outcomes, outcome_names, npc_cutoff = npc_cutoff) + + # - calcualte CoS colocalization probability (CCP) and CoS uncolocalization probability (UCCP) + npc_outcome <- normalization_evidence[[i]]$npc_outcome + max_idx <- which.max(npc_outcome) + npuc <- npc_outcome[max_idx] * prod(1 - npc_outcome[-max_idx]) + npc[i] <- 1 - npuc + } + names(normalization_evidence) <- names(npc) <- names(coloc_out$cos) + return(list(normalization_evidence=normalization_evidence, npc=npc)) +} diff --git a/R/colocboost_workhorse.R b/R/colocboost_workhorse.R index 6577e18..77d9859 100644 --- a/R/colocboost_workhorse.R +++ b/R/colocboost_workhorse.R @@ -32,6 +32,7 @@ colocboost_workhorse <- function(cb_data, func_compare = "min_max", coloc_thres = 0.1, LD_obj = FALSE, + dynamic_step = TRUE, target_idx = NULL, outcome_names = NULL){ @@ -126,7 +127,8 @@ colocboost_workhorse <- function(cb_data, func_prior = func_prior, lambda = lambda, lambda_target = lambda_target, - LD_obj = LD_obj) + LD_obj = LD_obj, + dynamic_step = dynamic_step) # print(paste("m: update", which(cb_model_para$update_temp$update_status != 0), "at", cb_model_para$update_temp$real_update_jk[which(cb_model_para$update_temp$update_status != 0)]))