Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions R/epidish.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,8 @@
#'
#' @param beta_matrix a beta matrix with CpGs in rows and samples in columns
#' @param mode Choice of a reference-based method ('RPC','CBS','CP')
#' @param reference A matrix of reference 'centroids', i.e. representative molecular profiles,
#' for a number of cell subtypes. rows label molecular features (e.g. CpGs,...)
#' and columns label the cell-type. IDs need to be provided as rownames and
#' colnames, respectively. Missing value is not allowed, and all values in
#' this matrix should be positive or zero. For DNAm data, values should be
#' beta-values.
#' @param reference Either a string (one of 'blood', 'breast', 'epithelial') or a data.frame of reference CpGs, i.e. representative sites,
#' for a number of cell subtypes. If a matrix is provided, it will be used directly as the signature matrix.
#' @param maxit Only used in RPC mode, the limit of the number of IWLS iterations
#' @param nu.v Only used in CBS mode. It is a vector of several candidate nu values. nu is
#' parameter needed for nu-classification, nu-regression, and
Expand Down Expand Up @@ -51,17 +47,35 @@ run_epidish <- function(beta_matrix,
mode <- mode[1]
message(paste0(mode, " was chosen as default for \"mode\""))
}
if (length(reference) > 1) {
# If reference is a character, use built-in, else use as matrix
if (is.character(reference) && length(reference) > 1) {
reference <- reference[1]
message(paste0(reference, " was chosen as default for \"reference\""))
}
message(paste0("Starting EpiDISH deconvolution with mode ", mode, " ..."))

ref_mat <- switch(reference,
'blood' = EpiDISH::centDHSbloodDMC.m,
'breast' = EpiDISH::centEpiFibFatIC.m,
'epithelial' = EpiDISH::centEpiFibIC.m
)
if (is.character(reference)) {
ref_mat <- switch(reference,
'blood' = EpiDISH::centDHSbloodDMC.m,
'breast' = EpiDISH::centEpiFibFatIC.m,
'epithelial' = EpiDISH::centEpiFibIC.m,
stop("Unknown reference type: ", reference)
)
} else if (is.data.frame(reference)) {

# check if 'CpGs' column exists
if(!'CpGs' %in% colnames(reference)){
stop("No CpG column in custom reference.")
}

reference_cpgs <- reference[['CpGs']]
ref_mat <- reference |> dplyr::select(-CpGs) |> as.matrix()
rownames(ref_mat) <- reference_cpgs

} else {
stop("reference must be either a character string or a data.frame")
}

if (!is.null(cpg_subset)) {
overlap <- intersect(cpg_subset, rownames(ref_mat))
missing <- setdiff(cpg_subset, rownames(ref_mat))
Expand Down
43 changes: 32 additions & 11 deletions R/methatlas.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
#' run MethAtlas
#'
#' @param beta_matrix a beta matrix with CpGs in rows and samples in columns
#' @param reference_atlas Path to a csv file that saves a reference matrix with CpGs as rows and cell types as columns.
#' The default (tissue-wide) reference file is stored in 'inst/reference_atlas.csv'.
#' @param reference Either a path to a csv file that saves a reference matrix with CpGs as rows and cell types as columns,
#' or a data.frame of reference CpGs with a 'CpGs' column and cell types as other columns.
#' The default (tissue-wide) reference file is stored in 'inst/reference_atlas.csv'.
#' @param temp_dir Path to directory where the beta matrix will be saved as a csv file.
#' @param out_dir Path to output directory. Output will be a csv file and a png representing the cell type fractions.
#' @param use_epic_reference The MethAtlas has a whole-tissue reference or a immunecell-specific reference that is optimized for EPIC arrays (which is a subset of the whole-tissue reference)
#' @param cpg_subset Optional character vector of CpGs to subset the signature matrix. Default: NULL (use all CpGs in the signature).
#'
#' @export
#'
run_methatlas <- function(beta_matrix, reference_atlas = system.file("reference_atlas.csv", package = "deconvMe"), temp_dir = NULL, out_dir = NULL, use_epic_reference=FALSE, cpg_subset = NULL){
run_methatlas <- function(beta_matrix, reference = system.file("reference_atlas.csv", package = "deconvMe"), temp_dir = NULL, out_dir = NULL, use_epic_reference=FALSE, cpg_subset = NULL){
# check if python is installed, else install
init_python()

Expand All @@ -34,12 +35,28 @@ run_methatlas <- function(beta_matrix, reference_atlas = system.file("reference_
beta_path = paste0(tmp_dir, "/beta.csv")
write.csv(beta_matrix, beta_path)

# subset reference if applicable
if(use_epic_reference){
reference_atlas <- system.file("reference_atlas_epic.csv", package = "deconvMe")
# Handle reference parameter
if (is.character(reference)) {
# If it's a character, treat as file path and check if it exists
if(!file.exists(reference)) {
stop("Reference file does not exist: ", reference)
}

# If it's epic reference, use the epic reference file
if(use_epic_reference){
reference <- system.file("reference_atlas_epic.csv", package = "deconvMe")
}
ref_df <- read.csv(reference, check.names = FALSE)
} else if (is.data.frame(reference)) {
# If it's a data.frame, use it directly
if (!'CpGs' %in% colnames(reference)) {
stop("No 'CpGs' column in custom reference data.frame.")
}
ref_df <- reference
} else {
stop("reference must be either a character string (file path) or a data.frame")
}

ref_df <- read.csv(reference_atlas, check.names = FALSE)
if (!is.null(cpg_subset)) {
overlap <- intersect(cpg_subset, ref_df$CpGs)
missing <- setdiff(cpg_subset, ref_df$CpGs)
Expand All @@ -54,12 +71,16 @@ run_methatlas <- function(beta_matrix, reference_atlas = system.file("reference_
}
ref_df <- ref_df[ref_df$CpGs %in% overlap, , drop = FALSE]
# Write the subsetted reference to a temp file
reference_atlas <- paste0(tmp_dir, "/reference_atlas_subset.csv")
write.csv(ref_df, reference_atlas, row.names = FALSE)
reference_path <- paste0(tmp_dir, "/reference_atlas_subset.csv")
write.csv(ref_df, reference_path, row.names = FALSE)
}else{
# Write the reference to a temp file
reference_path <- paste0(tmp_dir, "/reference_atlas_temp.csv")
write.csv(ref_df, reference_path, row.names = FALSE)
}

# run meth_atlas
system(paste("python", system.file("deconvolve.py", package = "deconvMe")," -a", reference_atlas, beta_path, "--out", out_dir))
system(paste("python", system.file("deconvolve.py", package = "deconvMe")," -a", reference_path, beta_path, "--out", out_dir))

# read the results to provide as data frame
as.matrix(t(read.csv(paste0(out_dir, "/beta_deconv_output.csv"),
Expand All @@ -75,6 +96,6 @@ run_methatlas <- function(beta_matrix, reference_atlas = system.file("reference_
#' @export
get_methatlas_signature_matrix <- function() {
reference_atlas = system.file("reference_atlas.csv", package = "deconvMe")
read.csv(reference_atlas, check.names = FALSE)
unique(read.csv(reference_atlas, check.names = FALSE))
}

34 changes: 28 additions & 6 deletions R/methylResolver.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
#' @param purityModel Random Forest model to predict mixture purity (unknown content) which allows the calculation of absolute cell type fractions. Required if absolute is TRUE. Default is our RF model trained on the consensus purity estimate (CPE) using TCGA data.
#' @param seed fixed seed to account for RNG influences
#' @param cpg_subset Optional character vector of CpGs to subset the signature matrix. Default: NULL (use all CpGs in the signature).
#' @param reference Either the built-in MethylResolver signature matrix or a data.frame of reference CpGs with a 'CpGs' column and cell types as other columns.
#'
#' @export
#'
run_methylresolver <- function(beta_matrix, doPar = F, numCores = 1, alpha = seq(0.5,0.9,by = 0.05),
absolute = TRUE, purityModel = MethylResolver::RFmodel, seed = 1, cpg_subset = NULL){
absolute = TRUE, purityModel = MethylResolver::RFmodel, seed = 1, cpg_subset = NULL, reference = NULL){

set.seed(seed)

Expand All @@ -23,7 +24,23 @@ run_methylresolver <- function(beta_matrix, doPar = F, numCores = 1, alpha = seq
immediate. = TRUE)
}

sig <- MethylResolver::MethylSig
# Handle reference parameter
if (is.null(reference)) {
sig <- MethylResolver::MethylSig
} else if (is.data.frame(reference)) {
if (!'CpGs' %in% colnames(reference)) {
stop("No 'CpGs' column in custom reference data.frame.")
}
sig <- reference[, -which(colnames(reference) == 'CpGs')]
rownames(sig) <- reference$CpGs
colnames(sig) <- gsub(pattern = '-', replacement = '_', x = colnames(sig))

warning('When a external signature matrix is provided, MethylResolver can no longer estimate tumor purity and absolute will be automatically set to FALSE.')
absolute <- FALSE
} else {
stop("reference must be either NULL (use built-in) or a data.frame")
}

if (!is.null(cpg_subset)) {
overlap <- intersect(cpg_subset, rownames(sig))
missing <- setdiff(cpg_subset, rownames(sig))
Expand All @@ -48,10 +65,15 @@ run_methylresolver <- function(beta_matrix, doPar = F, numCores = 1, alpha = seq
absolute = absolute,
purityModel = purityModel)

result_metrics <- result_methylresolver[,1:4]
result_fractions <- result_methylresolver[,5:15]
result_absolute <- result_methylresolver[,16:26]
result_purity <- result_methylresolver[,27]
result_metrics <- result_methylresolver[,c('RMSE1', 'R1', 'RMSE2', 'R2')]
result_fractions <- result_methylresolver[,colnames(sig)]
if(absolute){
result_absolute <- result_methylresolver[,paste0('abs_',colnames(sig))]
result_purity <- result_methylresolver[['Purity']]
}else{
result_absolute <- NULL
result_purity <- NULL
}

return(list(result_metrics=result_metrics,
result_fractions=result_fractions,
Expand Down
69 changes: 69 additions & 0 deletions tests/testthat/test_signature_matrix.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
suppressMessages(library(deconvMe))
options(matrixStats.useNames.NA = "deprecated")

methyl_set <- minfiData::MsetEx
ratio_set <- minfi::ratioConvert(methyl_set)
beta_matrix <- minfi::getBeta(ratio_set)


test_that("Houseman, EpiDISH, MethAtlas, and MethylResolver signatures have same design",{
houseman_sig <- deconvMe::get_houseman_signature_matrix()
epidish_sig <- deconvMe::get_epidish_signature_matrix("blood")
methatlas_sig <- deconvMe::get_methatlas_signature_matrix()
methylresolver_sig <- deconvMe::get_methylresolver_signature_matrix()

expect_true('CpGs' %in% colnames(houseman_sig))
expect_true('CpGs' %in% colnames(epidish_sig))
expect_true('CpGs' %in% colnames(methatlas_sig))
expect_true('CpGs' %in% colnames(methylresolver_sig))

expect_true(is.data.frame(houseman_sig))
expect_true(is.data.frame(epidish_sig))
expect_true(is.data.frame(methatlas_sig))
expect_true(is.data.frame(methylresolver_sig))

expect_true(nrow(houseman_sig) > 0)
expect_true(nrow(epidish_sig) > 0)
expect_true(nrow(methatlas_sig) > 0)
expect_true(nrow(methylresolver_sig) > 0)
})

test_that("run_epidish works with Houseman signature as external reference", {
houseman_sig <- deconvMe::get_houseman_signature_matrix()
res <- suppressWarnings(deconvMe::run_epidish(beta_matrix, reference = houseman_sig))

expect_true(is.list(res))
expect_true("estF" %in% names(res))
expect_true(nrow(res$estF) > 0)
expect_true(ncol(res$estF) > 0)
})

test_that("run_methatlas works with Houseman signature as external reference", {
houseman_sig <- deconvMe::get_houseman_signature_matrix()
res <- suppressWarnings(deconvMe::run_methatlas(beta_matrix, reference = houseman_sig))

expect_true(is.matrix(res) || is.data.frame(res))
expect_true(nrow(res) > 0)
expect_true(ncol(res) > 0)
})

test_that("run_methylresolver works with Houseman signature as external reference", {
houseman_sig <- deconvMe::get_houseman_signature_matrix()
res <- suppressWarnings(deconvMe::run_methylresolver(beta_matrix, reference = houseman_sig, alpha = 1))

expect_true(is.list(res))
expect_true("result_fractions" %in% names(res))
expect_true(nrow(res$result_fractions) > 0)
expect_true(ncol(res$result_fractions) > 0)
})

test_that("run_methylresolver works with Methatlas signature as external reference", {
methatlas_sig <- deconvMe::get_methatlas_signature_matrix()
res <- suppressWarnings(deconvMe::run_methylresolver(beta_matrix, reference = methatlas_sig, alpha = 1))

expect_true(is.list(res))
expect_true("result_fractions" %in% names(res))
expect_true(nrow(res$result_fractions) > 0)
expect_true(ncol(res$result_fractions) > 0)
})

Loading
Loading