diff --git a/NAMESPACE b/NAMESPACE index d7b1baaa..65bc5db7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(coloc_wrapper) export(colocboost_analysis_pipeline) export(compute_LD) export(compute_qtl_enrichment) +export(ctwas_bimfile_loader) export(dentist) export(dentist_single_window) export(dpr_weights) @@ -35,6 +36,7 @@ export(find_data) export(find_duplicate_variants) export(fsusie_get_cs) export(fsusie_wrapper) +export(get_ctwas_meta_data) export(get_filter_lbf_index) export(get_nested_element) export(get_ref_variant_info) diff --git a/R/colocboost_pipeline.R b/R/colocboost_pipeline.R index e9e4e30e..c5f9f90b 100644 --- a/R/colocboost_pipeline.R +++ b/R/colocboost_pipeline.R @@ -457,13 +457,22 @@ qc_regional_data <- function(region_data, impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01)) { qc_method <- match.arg(qc_method) - # Validate and recycle pip_cutoff_to_skip_ind: scalar -> recycled to n_contexts + # Validate and recycle pip_cutoff_to_skip_ind: scalar -> named vector for all contexts if (!is.null(region_data$individual_data)) { - n_ind_contexts <- length(region_data$individual_data$residual_Y) - if (length(pip_cutoff_to_skip_ind) == 1) { - pip_cutoff_to_skip_ind <- rep(pip_cutoff_to_skip_ind, n_ind_contexts) - } else if (length(pip_cutoff_to_skip_ind) != n_ind_contexts) { - stop("pip_cutoff_to_skip_ind must be a scalar or a vector with length equal to the number of individual contexts (", n_ind_contexts, ").") + ind_context_names <- names(region_data$individual_data$residual_Y) + n_ind_contexts <- length(ind_context_names) + if (length(pip_cutoff_to_skip_ind) == 1 && is.null(names(pip_cutoff_to_skip_ind))) { + pip_cutoff_to_skip_ind <- setNames(rep(pip_cutoff_to_skip_ind, n_ind_contexts), ind_context_names) + } else if (!is.null(names(pip_cutoff_to_skip_ind))) { + # Named vector: fill missing contexts with 0 + missing <- setdiff(ind_context_names, names(pip_cutoff_to_skip_ind)) + if (length(missing) > 0) { + pip_cutoff_to_skip_ind <- c(pip_cutoff_to_skip_ind, setNames(rep(0, length(missing)), missing)) + } + } else if (length(pip_cutoff_to_skip_ind) == n_ind_contexts) { + names(pip_cutoff_to_skip_ind) <- ind_context_names + } else { + stop("pip_cutoff_to_skip_ind must be a scalar, a named vector, or a vector with length equal to the number of individual contexts (", n_ind_contexts, ").") } } @@ -543,10 +552,14 @@ qc_regional_data <- function(region_data, results <- purrr::imap(X, function(resX, context) { resY <- Y[[context]] maf <- MAF[[context]] - i_context <- match(context, names(X)) + pip_cutoff <- if (context %in% names(pip_cutoff_to_skip_ind)) { + pip_cutoff_to_skip_ind[[context]] + } else { + 0 + } if (is.null(resY)) return(NULL) resX <- filter_X(resX, missing_rate_thresh = NULL, maf_thresh = maf_cutoff, maf = maf) - resY <- filter_resY_pip(resX, resY, pip_cutoff = pip_cutoff_to_skip_ind[i_context], context = context) + resY <- filter_resY_pip(resX, resY, pip_cutoff = pip_cutoff, context = context) if (is.null(resY)) return(NULL) list(X = resX, Y = resY) }) %>% purrr::compact() diff --git a/R/ctwas_wrapper.R b/R/ctwas_wrapper.R index 76d5c7c3..bdc0639f 100644 --- a/R/ctwas_wrapper.R +++ b/R/ctwas_wrapper.R @@ -1,5 +1,85 @@ -### File-I/O functions (ctwas_bimfile_loader, get_ctwas_meta_data) have been -### removed. Use ld_loader() and read_bim() from the standard I/O path instead. +#' Load a PLINK .bim file for cTWAS +#' +#' @description +#' \strong{Deprecated.} Use [read_bim()] via the standard I/O path +#' instead. This wrapper remains for backwards compatibility and calls +#' [read_bim()] internally, mapping its output to the legacy column names. +#' +#' @param bim_file_path Path to a PLINK \code{.bim} file (or a \code{.bed} +#' file — the \code{.bim} extension is resolved automatically). +#' +#' @return A data.frame with columns \code{chrom}, \code{id}, \code{GD}, +#' \code{pos}, \code{A1}, \code{A2}. Variant IDs are normalised via +#' [normalize_variant_id()]. +#' +#' @export +ctwas_bimfile_loader <- function(bim_file_path) { + .Deprecated("read_bim", package = "pecotmr", + msg = "ctwas_bimfile_loader() is deprecated. Use read_bim() instead.") + # read_bim() expects a .bed path and derives .bim from it. + # Accept either .bim or .bed and normalise to .bed. + bed_path <- sub("\\.bim$", ".bed", bim_file_path) + bim <- read_bim(bed_path) + # Map new column names back to legacy names + snp_info <- data.frame( + chrom = bim$chrom, + id = normalize_variant_id(bim$id), + GD = bim$gpos, + pos = bim$pos, + A1 = bim$a1, + A2 = bim$a0, + stringsAsFactors = FALSE + ) + return(snp_info) +} + +#' Load cTWAS LD meta-data +#' +#' @description +#' \strong{Deprecated.} Use [ld_loader()] with its \code{LD_info} +#' argument instead. This wrapper remains for backwards compatibility and +#' produces the same \code{list(LD_info, region_info)} output as the original. +#' +#' @param ld_meta_data_file Path to the LD meta-data TSV file. +#' @param subset_region_ids Optional character vector of region IDs +#' (\code{"chrom_start_end"}) to subset to. +#' +#' @return A list with components: +#' \describe{ +#' \item{LD_info}{Data.frame with columns \code{region_id}, \code{LD_file}, +#' \code{SNP_file}.} +#' \item{region_info}{Data.frame with columns \code{chrom}, \code{start}, +#' \code{stop}, \code{region_id}.} +#' } +#' +#' @importFrom vroom vroom +#' @export +get_ctwas_meta_data <- function(ld_meta_data_file, subset_region_ids = NULL) { + .Deprecated("ld_loader", package = "pecotmr", + msg = "get_ctwas_meta_data() is deprecated. Use ld_loader() with LD_info instead.") + LD_info <- as.data.frame(vroom(ld_meta_data_file)) + colnames(LD_info)[1] <- "chrom" + LD_info$region_id <- paste(as.integer(strip_chr_prefix(LD_info$chrom)), + LD_info$start, LD_info$end, sep = "_") + LD_info$LD_file <- paste0(dirname(ld_meta_data_file), "/", + gsub(",.*$", "", LD_info$path)) + LD_info$SNP_file <- paste0(LD_info$LD_file, ".bim") + LD_info <- LD_info[, c("region_id", "LD_file", "SNP_file")] + region_info <- LD_info[, "region_id", drop = FALSE] + region_info$chrom <- as.integer(gsub("\\_.*$", "", region_info$region_id)) + region_info$start <- as.integer(gsub("\\_.*$", "", + sub("^.*?\\_", "", region_info$region_id))) + region_info$stop <- as.integer(sub("^.*?\\_", "", + sub("^.*?\\_", "", region_info$region_id))) + region_info$region_id <- paste0(region_info$chrom, "_", + region_info$start, "_", + region_info$stop) + region_info <- region_info[, c("chrom", "start", "stop", "region_id")] + if (!is.null(subset_region_ids)) { + region_info <- region_info[region_info$region_id %in% subset_region_ids, ] + } + return(list(LD_info = LD_info, region_info = region_info)) +} #' Function to select variants for ctwas weights input #' @param region_data A list of list containing weights list and snp_info list data for multiple genes/events within a single LD block region. diff --git a/man/ctwas_bimfile_loader.Rd b/man/ctwas_bimfile_loader.Rd new file mode 100644 index 00000000..c2e34be7 --- /dev/null +++ b/man/ctwas_bimfile_loader.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ctwas_wrapper.R +\name{ctwas_bimfile_loader} +\alias{ctwas_bimfile_loader} +\title{Load a PLINK .bim file for cTWAS} +\usage{ +ctwas_bimfile_loader(bim_file_path) +} +\arguments{ +\item{bim_file_path}{Path to a PLINK \code{.bim} file (or a \code{.bed} +file — the \code{.bim} extension is resolved automatically).} +} +\value{ +A data.frame with columns \code{chrom}, \code{id}, \code{GD}, + \code{pos}, \code{A1}, \code{A2}. Variant IDs are normalised via + [normalize_variant_id()]. +} +\description{ +\strong{Deprecated.} Use [read_bim()] via the standard I/O path +instead. This wrapper remains for backwards compatibility and calls +[read_bim()] internally, mapping its output to the legacy column names. +} diff --git a/man/get_ctwas_meta_data.Rd b/man/get_ctwas_meta_data.Rd new file mode 100644 index 00000000..367a4365 --- /dev/null +++ b/man/get_ctwas_meta_data.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ctwas_wrapper.R +\name{get_ctwas_meta_data} +\alias{get_ctwas_meta_data} +\title{Load cTWAS LD meta-data} +\usage{ +get_ctwas_meta_data(ld_meta_data_file, subset_region_ids = NULL) +} +\arguments{ +\item{ld_meta_data_file}{Path to the LD meta-data TSV file.} + +\item{subset_region_ids}{Optional character vector of region IDs +(\code{"chrom_start_end"}) to subset to.} +} +\value{ +A list with components: +\describe{ + \item{LD_info}{Data.frame with columns \code{region_id}, \code{LD_file}, + \code{SNP_file}.} + \item{region_info}{Data.frame with columns \code{chrom}, \code{start}, + \code{stop}, \code{region_id}.} +} +} +\description{ +\strong{Deprecated.} Use [ld_loader()] with its \code{LD_info} +argument instead. This wrapper remains for backwards compatibility and +produces the same \code{list(LD_info, region_info)} output as the original. +} diff --git a/tests/testthat/test_colocboost_pipeline.R b/tests/testthat/test_colocboost_pipeline.R index c76ebb78..fe0eafd0 100644 --- a/tests/testthat/test_colocboost_pipeline.R +++ b/tests/testthat/test_colocboost_pipeline.R @@ -1768,6 +1768,78 @@ test_that("qc_regional_data: mismatched pip_cutoff_to_skip_ind length errors", { ) }) +test_that("qc_regional_data: named pip_cutoff_to_skip_ind works with context names", { + region_data <- make_individual_region_data(n = 20, p = 8, n_contexts = 2, n_events = 2) + + # Named vector matching context names + result <- pecotmr:::qc_regional_data( + region_data, + pip_cutoff_to_skip_ind = c(ctx1 = 0, ctx2 = 0), + qc_method = "slalom" + ) + expect_type(result, "list") +}) + +test_that("qc_regional_data: named pip_cutoff_to_skip_ind fills missing contexts with 0", { + region_data <- make_individual_region_data(n = 20, p = 8, n_contexts = 3, n_events = 2) + + # Only specify cutoff for ctx1 — ctx2 and ctx3 should default to 0 + result <- pecotmr:::qc_regional_data( + region_data, + pip_cutoff_to_skip_ind = c(ctx1 = 0), + qc_method = "slalom" + ) + expect_type(result, "list") +}) + +test_that("qc_regional_data: scalar pip_cutoff_to_skip_ind becomes named vector", { + region_data <- make_individual_region_data(n = 20, p = 8, n_contexts = 2, n_events = 2) + + # This exercises the scalar -> named vector recycling path + result <- pecotmr:::qc_regional_data( + region_data, + pip_cutoff_to_skip_ind = 0, + qc_method = "slalom" + ) + expect_type(result, "list") +}) + +test_that("qc_regional_data: pip_cutoff_to_skip_ind lookup works when X and Y have different contexts", { + # Simulate a case where residual_X has a context not in residual_Y + set.seed(303) + n <- 20; p <- 8; n_events <- 2 + make_ctx <- function() { + X <- matrix(rnorm(n * p), n, p) + colnames(X) <- paste0("chr1:", seq_len(p) * 100, ":A:G") + Y <- matrix(rnorm(n * n_events), n, n_events) + colnames(Y) <- paste0("event", seq_len(n_events)) + maf <- runif(p, 0.05, 0.45) + list(X = X, Y = Y, maf = maf) + } + ctx1 <- make_ctx() + ctx2 <- make_ctx() + ctx3 <- make_ctx() + + # residual_X has 3 contexts, residual_Y only has 2 + # pip_cutoff_to_skip_ind is recycled from residual_Y (length 2) + region_data <- list( + individual_data = list( + residual_Y = list(ctx1 = ctx1$Y, ctx2 = ctx2$Y), + residual_X = list(ctx1 = ctx1$X, ctx2 = ctx2$X, ctx3 = ctx3$X), + maf = list(ctx1 = ctx1$maf, ctx2 = ctx2$maf, ctx3 = ctx3$maf) + ), + sumstat_data = NULL + ) + + # Should not error — ctx3 in X has no pip_cutoff entry, defaults to 0 + result <- pecotmr:::qc_regional_data( + region_data, + pip_cutoff_to_skip_ind = 0, + qc_method = "slalom" + ) + expect_type(result, "list") +}) + # =========================================================================== # SECTION U: colocboost_analysis_pipeline - sumstat with NA z filtering (line 252-258) # =========================================================================== diff --git a/tests/testthat/test_ctwas.R b/tests/testthat/test_ctwas.R index 8284a566..09a65258 100644 --- a/tests/testthat/test_ctwas.R +++ b/tests/testthat/test_ctwas.R @@ -158,3 +158,139 @@ test_that("trim_ctwas_variants select_variants uses cs_min_cor to include CS var expect_true("chr1:1000:A:G" %in% included) expect_true("chr1:3000:G:A" %in% included) }) + +# =========================================================================== +# Deprecated wrapper: ctwas_bimfile_loader +# =========================================================================== + +test_that("ctwas_bimfile_loader reads .bim and returns legacy column names", { + bim_path <- tempfile(fileext = ".bim") + on.exit(unlink(bim_path), add = TRUE) + cat("1\tchr1:1000:A:G\t0\t1000\tA\tG\n", file = bim_path) + cat("1\tchr1:2000:C:T\t0\t2000\tC\tT\n", file = bim_path, append = TRUE) + + expect_warning( + res <- ctwas_bimfile_loader(bim_path), + "deprecated" + ) + expect_equal(colnames(res), c("chrom", "id", "GD", "pos", "A1", "A2")) + expect_equal(nrow(res), 2) + expect_equal(res$pos, c(1000, 2000)) +}) + +test_that("ctwas_bimfile_loader accepts .bed path and resolves .bim", { + bim_path <- tempfile(fileext = ".bim") + bed_path <- sub("\\.bim$", ".bed", bim_path) + on.exit(unlink(c(bim_path, bed_path)), add = TRUE) + cat("22\trs100\t0\t50000\tA\tG\n", file = bim_path) + + expect_warning( + res <- ctwas_bimfile_loader(bed_path), + "deprecated" + ) + expect_equal(nrow(res), 1) + expect_equal(res$pos, 50000) +}) + +test_that("ctwas_bimfile_loader normalizes variant IDs", { + bim_path <- tempfile(fileext = ".bim") + on.exit(unlink(bim_path), add = TRUE) + cat("1\tchr1:1000:A:G\t0\t1000\tA\tG\n", file = bim_path) + + expect_warning( + res <- ctwas_bimfile_loader(bim_path), + "deprecated" + ) + # normalize_variant_id should have been applied + expect_equal(res$id, normalize_variant_id("chr1:1000:A:G")) +}) + +test_that("ctwas_bimfile_loader works with real test fixture", { + bim_path <- test_path("test_data", "protocol_example.genotype.bim") + skip_if_not(file.exists(bim_path), "Test fixture not available") + + expect_warning( + res <- ctwas_bimfile_loader(bim_path), + "deprecated" + ) + expect_equal(colnames(res), c("chrom", "id", "GD", "pos", "A1", "A2")) + expect_equal(nrow(res), 100) +}) + +# =========================================================================== +# Deprecated wrapper: get_ctwas_meta_data +# =========================================================================== + +test_that("get_ctwas_meta_data reads LD metadata and returns LD_info + region_info", { + meta_file <- tempfile(fileext = ".tsv") + on.exit(unlink(meta_file), add = TRUE) + writeLines( + paste("chrom", "start", "end", "path", sep = "\t"), + meta_file + ) + cat(paste("chr1", "1000", "2000", "block1.cor.xz", sep = "\t"), "\n", + file = meta_file, append = TRUE) + cat(paste("chr1", "2000", "3000", "block2.cor.xz", sep = "\t"), "\n", + file = meta_file, append = TRUE) + + expect_warning( + res <- get_ctwas_meta_data(meta_file), + "deprecated" + ) + expect_true(is.list(res)) + expect_true("LD_info" %in% names(res)) + expect_true("region_info" %in% names(res)) + + expect_equal(nrow(res$LD_info), 2) + expect_equal(colnames(res$LD_info), c("region_id", "LD_file", "SNP_file")) + expect_equal(res$LD_info$region_id, c("1_1000_2000", "1_2000_3000")) + + expect_equal(nrow(res$region_info), 2) + expect_equal(colnames(res$region_info), c("chrom", "start", "stop", "region_id")) + expect_equal(res$region_info$chrom, c(1L, 1L)) + expect_equal(res$region_info$start, c(1000L, 2000L)) + expect_equal(res$region_info$stop, c(2000L, 3000L)) +}) + +test_that("get_ctwas_meta_data subset_region_ids filters correctly", { + meta_file <- tempfile(fileext = ".tsv") + on.exit(unlink(meta_file), add = TRUE) + writeLines( + paste("chrom", "start", "end", "path", sep = "\t"), + meta_file + ) + cat(paste("chr1", "1000", "2000", "block1.cor.xz", sep = "\t"), "\n", + file = meta_file, append = TRUE) + cat(paste("chr1", "2000", "3000", "block2.cor.xz", sep = "\t"), "\n", + file = meta_file, append = TRUE) + cat(paste("chr2", "5000", "6000", "block3.cor.xz", sep = "\t"), "\n", + file = meta_file, append = TRUE) + + expect_warning( + res <- get_ctwas_meta_data(meta_file, subset_region_ids = "1_1000_2000"), + "deprecated" + ) + expect_equal(nrow(res$region_info), 1) + expect_equal(res$region_info$region_id, "1_1000_2000") + # LD_info is not subsetted (matches original behavior) + expect_equal(nrow(res$LD_info), 3) +}) + +test_that("get_ctwas_meta_data LD_file paths are relative to metadata directory", { + tmpdir <- tempdir() + meta_file <- file.path(tmpdir, "ld_meta.tsv") + on.exit(unlink(meta_file), add = TRUE) + writeLines( + paste("chrom", "start", "end", "path", sep = "\t"), + meta_file + ) + cat(paste("chr1", "100", "200", "subdir/block.cor.xz", sep = "\t"), "\n", + file = meta_file, append = TRUE) + + expect_warning( + res <- get_ctwas_meta_data(meta_file), + "deprecated" + ) + expect_equal(res$LD_info$LD_file, file.path(tmpdir, "subdir/block.cor.xz")) + expect_equal(res$LD_info$SNP_file, paste0(file.path(tmpdir, "subdir/block.cor.xz"), ".bim")) +})