Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export(coloc_wrapper)
export(colocboost_analysis_pipeline)
export(compute_LD)
export(compute_qtl_enrichment)
export(ctwas_bimfile_loader)
export(dentist)
export(dentist_single_window)
export(dpr_weights)
Expand All @@ -35,6 +36,7 @@ export(find_data)
export(find_duplicate_variants)
export(fsusie_get_cs)
export(fsusie_wrapper)
export(get_ctwas_meta_data)
export(get_filter_lbf_index)
export(get_nested_element)
export(get_ref_variant_info)
Expand Down
29 changes: 21 additions & 8 deletions R/colocboost_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -457,13 +457,22 @@ qc_regional_data <- function(region_data,
impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01)) {
qc_method <- match.arg(qc_method)

# Validate and recycle pip_cutoff_to_skip_ind: scalar -> recycled to n_contexts
# Validate and recycle pip_cutoff_to_skip_ind: scalar -> named vector for all contexts
if (!is.null(region_data$individual_data)) {
n_ind_contexts <- length(region_data$individual_data$residual_Y)
if (length(pip_cutoff_to_skip_ind) == 1) {
pip_cutoff_to_skip_ind <- rep(pip_cutoff_to_skip_ind, n_ind_contexts)
} else if (length(pip_cutoff_to_skip_ind) != n_ind_contexts) {
stop("pip_cutoff_to_skip_ind must be a scalar or a vector with length equal to the number of individual contexts (", n_ind_contexts, ").")
ind_context_names <- names(region_data$individual_data$residual_Y)
n_ind_contexts <- length(ind_context_names)
if (length(pip_cutoff_to_skip_ind) == 1 && is.null(names(pip_cutoff_to_skip_ind))) {
pip_cutoff_to_skip_ind <- setNames(rep(pip_cutoff_to_skip_ind, n_ind_contexts), ind_context_names)
} else if (!is.null(names(pip_cutoff_to_skip_ind))) {
# Named vector: fill missing contexts with 0
missing <- setdiff(ind_context_names, names(pip_cutoff_to_skip_ind))
if (length(missing) > 0) {
pip_cutoff_to_skip_ind <- c(pip_cutoff_to_skip_ind, setNames(rep(0, length(missing)), missing))
}
} else if (length(pip_cutoff_to_skip_ind) == n_ind_contexts) {
names(pip_cutoff_to_skip_ind) <- ind_context_names
} else {
stop("pip_cutoff_to_skip_ind must be a scalar, a named vector, or a vector with length equal to the number of individual contexts (", n_ind_contexts, ").")
}
}

Expand Down Expand Up @@ -543,10 +552,14 @@ qc_regional_data <- function(region_data,
results <- purrr::imap(X, function(resX, context) {
resY <- Y[[context]]
maf <- MAF[[context]]
i_context <- match(context, names(X))
pip_cutoff <- if (context %in% names(pip_cutoff_to_skip_ind)) {
pip_cutoff_to_skip_ind[[context]]
} else {
0
}
if (is.null(resY)) return(NULL)
resX <- filter_X(resX, missing_rate_thresh = NULL, maf_thresh = maf_cutoff, maf = maf)
resY <- filter_resY_pip(resX, resY, pip_cutoff = pip_cutoff_to_skip_ind[i_context], context = context)
resY <- filter_resY_pip(resX, resY, pip_cutoff = pip_cutoff, context = context)
if (is.null(resY)) return(NULL)
list(X = resX, Y = resY)
}) %>% purrr::compact()
Expand Down
84 changes: 82 additions & 2 deletions R/ctwas_wrapper.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,85 @@
### File-I/O functions (ctwas_bimfile_loader, get_ctwas_meta_data) have been
### removed. Use ld_loader() and read_bim() from the standard I/O path instead.
#' Load a PLINK .bim file for cTWAS
#'
#' @description
#' \strong{Deprecated.} Use [read_bim()] via the standard I/O path
#' instead. This wrapper remains for backwards compatibility and calls
#' [read_bim()] internally, mapping its output to the legacy column names.
#'
#' @param bim_file_path Path to a PLINK \code{.bim} file (or a \code{.bed}
#' file — the \code{.bim} extension is resolved automatically).
#'
#' @return A data.frame with columns \code{chrom}, \code{id}, \code{GD},
#' \code{pos}, \code{A1}, \code{A2}. Variant IDs are normalised via
#' [normalize_variant_id()].
#'
#' @export
ctwas_bimfile_loader <- function(bim_file_path) {
.Deprecated("read_bim", package = "pecotmr",
msg = "ctwas_bimfile_loader() is deprecated. Use read_bim() instead.")
# read_bim() expects a .bed path and derives .bim from it.
# Accept either .bim or .bed and normalise to .bed.
bed_path <- sub("\\.bim$", ".bed", bim_file_path)
bim <- read_bim(bed_path)
# Map new column names back to legacy names
snp_info <- data.frame(
chrom = bim$chrom,
id = normalize_variant_id(bim$id),
GD = bim$gpos,
pos = bim$pos,
A1 = bim$a1,
A2 = bim$a0,
stringsAsFactors = FALSE
)
return(snp_info)
}

#' Load cTWAS LD meta-data
#'
#' @description
#' \strong{Deprecated.} Use [ld_loader()] with its \code{LD_info}
#' argument instead. This wrapper remains for backwards compatibility and
#' produces the same \code{list(LD_info, region_info)} output as the original.
#'
#' @param ld_meta_data_file Path to the LD meta-data TSV file.
#' @param subset_region_ids Optional character vector of region IDs
#' (\code{"chrom_start_end"}) to subset to.
#'
#' @return A list with components:
#' \describe{
#' \item{LD_info}{Data.frame with columns \code{region_id}, \code{LD_file},
#' \code{SNP_file}.}
#' \item{region_info}{Data.frame with columns \code{chrom}, \code{start},
#' \code{stop}, \code{region_id}.}
#' }
#'
#' @importFrom vroom vroom
#' @export
get_ctwas_meta_data <- function(ld_meta_data_file, subset_region_ids = NULL) {
.Deprecated("ld_loader", package = "pecotmr",
msg = "get_ctwas_meta_data() is deprecated. Use ld_loader() with LD_info instead.")
LD_info <- as.data.frame(vroom(ld_meta_data_file))
colnames(LD_info)[1] <- "chrom"
LD_info$region_id <- paste(as.integer(strip_chr_prefix(LD_info$chrom)),
LD_info$start, LD_info$end, sep = "_")
LD_info$LD_file <- paste0(dirname(ld_meta_data_file), "/",
gsub(",.*$", "", LD_info$path))
LD_info$SNP_file <- paste0(LD_info$LD_file, ".bim")
LD_info <- LD_info[, c("region_id", "LD_file", "SNP_file")]
region_info <- LD_info[, "region_id", drop = FALSE]
region_info$chrom <- as.integer(gsub("\\_.*$", "", region_info$region_id))
region_info$start <- as.integer(gsub("\\_.*$", "",
sub("^.*?\\_", "", region_info$region_id)))
region_info$stop <- as.integer(sub("^.*?\\_", "",
sub("^.*?\\_", "", region_info$region_id)))
region_info$region_id <- paste0(region_info$chrom, "_",
region_info$start, "_",
region_info$stop)
region_info <- region_info[, c("chrom", "start", "stop", "region_id")]
if (!is.null(subset_region_ids)) {
region_info <- region_info[region_info$region_id %in% subset_region_ids, ]
}
return(list(LD_info = LD_info, region_info = region_info))
}

#' Function to select variants for ctwas weights input
#' @param region_data A list of list containing weights list and snp_info list data for multiple genes/events within a single LD block region.
Expand Down
22 changes: 22 additions & 0 deletions man/ctwas_bimfile_loader.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions man/get_ctwas_meta_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

72 changes: 72 additions & 0 deletions tests/testthat/test_colocboost_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -1768,6 +1768,78 @@ test_that("qc_regional_data: mismatched pip_cutoff_to_skip_ind length errors", {
)
})

test_that("qc_regional_data: named pip_cutoff_to_skip_ind works with context names", {
region_data <- make_individual_region_data(n = 20, p = 8, n_contexts = 2, n_events = 2)

# Named vector matching context names
result <- pecotmr:::qc_regional_data(
region_data,
pip_cutoff_to_skip_ind = c(ctx1 = 0, ctx2 = 0),
qc_method = "slalom"
)
expect_type(result, "list")
})

test_that("qc_regional_data: named pip_cutoff_to_skip_ind fills missing contexts with 0", {
region_data <- make_individual_region_data(n = 20, p = 8, n_contexts = 3, n_events = 2)

# Only specify cutoff for ctx1 — ctx2 and ctx3 should default to 0
result <- pecotmr:::qc_regional_data(
region_data,
pip_cutoff_to_skip_ind = c(ctx1 = 0),
qc_method = "slalom"
)
expect_type(result, "list")
})

test_that("qc_regional_data: scalar pip_cutoff_to_skip_ind becomes named vector", {
region_data <- make_individual_region_data(n = 20, p = 8, n_contexts = 2, n_events = 2)

# This exercises the scalar -> named vector recycling path
result <- pecotmr:::qc_regional_data(
region_data,
pip_cutoff_to_skip_ind = 0,
qc_method = "slalom"
)
expect_type(result, "list")
})

test_that("qc_regional_data: pip_cutoff_to_skip_ind lookup works when X and Y have different contexts", {
# Simulate a case where residual_X has a context not in residual_Y
set.seed(303)
n <- 20; p <- 8; n_events <- 2
make_ctx <- function() {
X <- matrix(rnorm(n * p), n, p)
colnames(X) <- paste0("chr1:", seq_len(p) * 100, ":A:G")
Y <- matrix(rnorm(n * n_events), n, n_events)
colnames(Y) <- paste0("event", seq_len(n_events))
maf <- runif(p, 0.05, 0.45)
list(X = X, Y = Y, maf = maf)
}
ctx1 <- make_ctx()
ctx2 <- make_ctx()
ctx3 <- make_ctx()

# residual_X has 3 contexts, residual_Y only has 2
# pip_cutoff_to_skip_ind is recycled from residual_Y (length 2)
region_data <- list(
individual_data = list(
residual_Y = list(ctx1 = ctx1$Y, ctx2 = ctx2$Y),
residual_X = list(ctx1 = ctx1$X, ctx2 = ctx2$X, ctx3 = ctx3$X),
maf = list(ctx1 = ctx1$maf, ctx2 = ctx2$maf, ctx3 = ctx3$maf)
),
sumstat_data = NULL
)

# Should not error — ctx3 in X has no pip_cutoff entry, defaults to 0
result <- pecotmr:::qc_regional_data(
region_data,
pip_cutoff_to_skip_ind = 0,
qc_method = "slalom"
)
expect_type(result, "list")
})

# ===========================================================================
# SECTION U: colocboost_analysis_pipeline - sumstat with NA z filtering (line 252-258)
# ===========================================================================
Expand Down
Loading