From 4790d153f801f88173de6818045058c566b87dca Mon Sep 17 00:00:00 2001 From: sandrews5 Date: Thu, 14 Nov 2019 19:20:41 -0800 Subject: [PATCH 1/8] Fix warning in DataFrame call; remove stringsAsFactors argument --- R/estimateCellCounts.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/estimateCellCounts.R b/R/estimateCellCounts.R index aac00ea..cf7a184 100644 --- a/R/estimateCellCounts.R +++ b/R/estimateCellCounts.R @@ -297,8 +297,7 @@ estimateCellCounts <- function(rgSet, compositeCellType = "Blood", sampleNames = c(colnames(rgSet), colnames(referenceRGset)), studyIndex = rep( x = c("user", "reference"), - times = c(ncol(rgSet), ncol(referenceRGset))), - stringsAsFactors = FALSE) + times = c(ncol(rgSet), ncol(referenceRGset)))) referencePd <- colData(referenceRGset) combinedRGset <- combineArrays( object1 = rgSet, From f6d27d734ae605314fa9ea2438393f2154009fa5 Mon Sep 17 00:00:00 2001 From: sandrews5 Date: Thu, 14 Nov 2019 19:23:24 -0800 Subject: [PATCH 2/8] Add Bayesian measurement error function to internal functions of estimateCellCounts --- R/estimateCellCounts.R | 85 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/R/estimateCellCounts.R b/R/estimateCellCounts.R index cf7a184..f873aad 100644 --- a/R/estimateCellCounts.R +++ b/R/estimateCellCounts.R @@ -218,6 +218,91 @@ validationCellType <- function(Y, pheno, modelFix, modelBatch=NULL, degFree = degFree) } +Bayes.estimate <- function(rgSet, counts, referencePkg, compositeCellType, + inferReference = "Blood", inferCellType = "Eos", minDiff = 0.028, maxDiff = 0.168, verbose = verbose){ + + if (verbose) message("[estimateCellCounts] Generating ",inferReference," estimates to predict cell type ",inferCellType," in ",compositeCellType,".\n") + cellTypes <- colnames(counts) + + referencePkg <- sub(compositeCellType, inferReference, referencePkg) + if (!require(referencePkg, character.only = TRUE)) { + stop(sprintf("Could not find reference data package for compositeCellType '%s' and referencePlatform '%s' (inferred package name is '%s')", + compositeCellType, platform, referencePkg)) + } + + data(list = referencePkg) + referenceRGset <- get(referencePkg) + + if (!all(cellTypes %in% referenceRGset$CellType)) { + cellTypes <- cellTypes[cellTypes %in% referenceRGset$CellType] + } + + processMethod <- get("preprocessQuantile") + probeSelect <- "both" + + newpd <- DataFrame( + sampleNames = c(colnames(rgSet), colnames(referenceRGset)), + studyIndex = rep( + x = c("user", "reference"), + times = c(ncol(rgSet), ncol(referenceRGset)))) + referencePd <- colData(referenceRGset) + combinedRGset <- combineArrays( + object1 = rgSet, + object2 = referenceRGset, + outType = "IlluminaHumanMethylation450k") + colData(combinedRGset) <- newpd + colnames(combinedRGset) <- newpd$sampleNames + rm(referenceRGset) + + combinedMset <- processMethod(combinedRGset) + referenceMset <- combinedMset[, combinedMset$studyIndex == "reference"] + colData(referenceMset) <- as(referencePd, "DataFrame") + mSet <- combinedMset[, combinedMset$studyIndex == "user"] + colData(mSet) <- as(colData(rgSet), "DataFrame") + rm(combinedMset) + + compData <- pickCompProbes( + mSet = referenceMset, + cellTypes = c(cellTypes, inferCellType), + compositeCellType = inferReference, + probeSelect = probeSelect) + coefs <- compData$coefEsts + + inferprop <- projectCellType(getBeta(mSet)[rownames(coefs), ], coefs) + + if (verbose) message("[estimateCellCounts] Performing ",inferCellType," estimation in ", compositeCellType,".\n") + counts[counts < 0.000001] = 0.000001 + counts.logit = log(counts/(1-counts)) + + inferprop[inferprop < 0.000001] = 0.000001 + inferprop.logit = log(inferprop/(1-inferprop)) + + diff = inferprop.logit[,cellTypes] - counts.logit[,cellTypes] + + y = inferprop.logit[,inferCellType] + taumaxVAR = 1/min(apply(diff, 2, var)) + tauminVAR = 1/max(apply(diff, 2, var)) + S2inv = 1/10^4 + n = dim(inferprop.logit)[1] + max.steps = 50000 + mu = matrix(NA, nrow = n, ncol = max.steps) + ###Sample for posterior mu + for (i in 1:max.steps){ + v = runif(1, minDiff, maxDiff) + sigmainv = runif(1, tauminVAR, taumaxVAR) + mu.mean = sigmainv*(y-v)/(sigmainv+S2inv) + mu.var = 1/(sigmainv+S2inv) + mu[,i] = rnorm(n, mu.mean, mu.var) + } + + EOS = apply(mu, 1, quantile, probs = 0.5) + EOS.prop = round(exp(EOS)/(1+exp(EOS)), digit = 4) + + cell.prop = cbind(counts, Eos = EOS.prop) + cell.prop +} + + # Exported functions ----------------------------------------------------------- estimateCellCounts <- function(rgSet, compositeCellType = "Blood", From 3e1f77e4d80bca5ecead60f6f67cec6fe55655ac Mon Sep 17 00:00:00 2001 From: sandrews5 Date: Thu, 14 Nov 2019 19:30:23 -0800 Subject: [PATCH 3/8] Add check for invocation of bayesian measurement error function it not all requested cell types are in reference panel --- R/estimateCellCounts.R | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/R/estimateCellCounts.R b/R/estimateCellCounts.R index f873aad..c601f4a 100644 --- a/R/estimateCellCounts.R +++ b/R/estimateCellCounts.R @@ -351,9 +351,27 @@ estimateCellCounts <- function(rgSet, compositeCellType = "Blood", stop("the sample/column names in the user set must not be in the ", "reference data ") } + bayesEst <- FALSE + bayesParams <- data.frame(compositeCellType = c("CordBlood"), inferCellType = c("Eos"), inferReference = c("Blood"), minDiff = c(-0.85), maxDiff = c(0.26)) #Can be modified when additional priors are estimated. if (!all(cellTypes %in% referenceRGset$CellType)) { - stop(sprintf("all elements of argument 'cellTypes' needs to be part of the reference phenoData columns 'CellType' (containg the following elements: '%s')", - paste(unique(referenceRGset$cellType), collapse = "', '"))) + inferCellType <- cellTypes[!cellTypes%in%referenceRGset$CellType] + validrow <- which(bayesParams$inferCellType %in% inferCellType) + if (length(validrow) == 1){ + if(bayesParams$compositeCellType[validrow] %in% compositeCellType){ + bayesEst <- TRUE + bayesParams <- bayesParams[validrow,] + validrow <- match(compositeCellType, bayesParams$compositeCellType) + + inferCellType <- as.character(with(bayesParams, inferCellType[validrow])) + cellTypes <- cellTypes[!cellTypes %in% inferCellType] + inferReference <- as.character(with(bayesParams, inferReference[validrow])) + minDiff <- with(bayesParams, minDiff[validrow]) + maxDiff <- with(bayesParams, maxDiff[validrow]) + } + } else{ + stop(paste0("all elements of argument 'cellTypes' need to be part of the reference phenoData columns 'CellType' (containg the following elements: ", + paste(unique(referenceRGset$CellType), collapse = "', '"),") or have known priors to supply to Bayesian measurement error inference function.")) + } } if (length(unique(cellTypes)) < 2) { stop("At least 2 cell types must be provided.") From b139e2f14bd0721f9652b938773185562608ce10 Mon Sep 17 00:00:00 2001 From: sandrews5 Date: Thu, 14 Nov 2019 19:33:39 -0800 Subject: [PATCH 4/8] Add call to measurement error function after initial composition estimation --- R/estimateCellCounts.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/R/estimateCellCounts.R b/R/estimateCellCounts.R index c601f4a..5bb0e8b 100644 --- a/R/estimateCellCounts.R +++ b/R/estimateCellCounts.R @@ -452,6 +452,19 @@ estimateCellCounts <- function(rgSet, compositeCellType = "Blood", counts <- projectCellType(getBeta(mSet)[rownames(coefs), ], coefs) rownames(counts) <- colnames(rgSet) + if (bayesEst == TRUE){ + if (verbose) message("[estimateCellCounts] Estimating ",inferCellType," composition using Bayesian measurement error model.\n") + if (!("Gran" %in% cellTypes) && inferCellType == "Eos"){ + message("[estimateCellCounts] It is recommended for 'Gran' to be included in cellTypes argument for eosinophil estimation.\n") + } else { + if (length(cellTypes) < 3){ + warning("[estimateCellCounts] At least 3 requested cell types recommended for Bayesian measurement error model.\n") + } + counts <- Bayes.estimate(rgSet, counts, referencePkg, compositeCellType, inferReference = inferReference, + inferCellType = inferCellType, minDiff = minDiff, maxDiff = maxDiff, verbose = verbose) + } + } + if (meanPlot) { smeans <- compData$sampleMeans smeans <- smeans[order(names(smeans))] From a790be20b84fdf10921487d87af481e1c6d3a5f0 Mon Sep 17 00:00:00 2001 From: sandrews5 Date: Thu, 14 Nov 2019 20:36:30 -0800 Subject: [PATCH 5/8] Add eosinophil estimation paper citation --- man/estimateCellCounts.Rd | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/man/estimateCellCounts.Rd b/man/estimateCellCounts.Rd index 6e73e93..8e59fc7 100644 --- a/man/estimateCellCounts.Rd +++ b/man/estimateCellCounts.Rd @@ -103,6 +103,11 @@ batch effects. \emph{DNA methylation of cord blood cell types: Applications for mixed cell birth studies.} Epigenetics (2016) 11:5. doi:\href{http://dx.doi.org/10.1080/15592294.2016.1161875}{10.1080/15592294.2016.1161875}. + + Y Jiang, H Zhang, SV Andrews, H Arshad, S Ewart, JW Holloway, MD Fallin, KM Bakulski, and W Karmaus. + \emph{Estimation of Eosinophil Cells in Cord Blood with References Based on Blood in Adults via Bayesian Measurement Error Modeling.} + Bioinformatics (2019). + doi:\href{https://doi.org/10.1093/bioinformatics/btz839}{10.1093/bioinformatics/btz839}. } \author{ From f852df5bc2cf5c4c97a9a4f8d5543e26b98cc165 Mon Sep 17 00:00:00 2001 From: sandrews5 Date: Sun, 17 Nov 2019 12:57:05 -0800 Subject: [PATCH 6/8] Update documentation to reflect new argument and explain default behavior of measurement error method --- man/estimateCellCounts.Rd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/man/estimateCellCounts.Rd b/man/estimateCellCounts.Rd index 8e59fc7..fff1e0a 100644 --- a/man/estimateCellCounts.Rd +++ b/man/estimateCellCounts.Rd @@ -15,6 +15,7 @@ estimateCellCounts(rgSet, compositeCellType = "Blood", referencePlatform = c("IlluminaHumanMethylation450k", "IlluminaHumanMethylationEPIC", "IlluminaHumanMethylation27k"), + bayesMethod = TRUE, returnAll = FALSE, meanPlot = FALSE, verbose = TRUE, \dots) } \arguments{ @@ -39,6 +40,7 @@ estimateCellCounts(rgSet, compositeCellType = "Blood", \item{referencePlatform}{The platform for the reference dataset; if the input \code{rgSet} belongs to another platform, it will be converted using \code{\link{convertArray}}.} + \item{bayesMethod}{Should the Bayesian measurement error inference method be used to estimate elements of cellTypes that are not included in the specified reference platform? See details.} \item{returnAll}{Should the composition table and the normalized user supplied data be return?} \item{verbose}{Should the function be verbose?} @@ -67,6 +69,8 @@ For cord blood, these are "Bcell", "CD4T", "CD8T", "Gran", "Mono", "Neu", and "n cortex, these are "NeuN_neg" and "NeuN_pos". See documentation of individual reference packages for more details. +When \code{bayesMethod} is TRUE, and elements of \code{cellTypes} are not in the specified reference data package, the function will attempt to use a Bayesian measurement error procedure to estimate the additional cell type. Currently, this feature only supports eosinophil estimation when \code{compositeCellType} is "CordBlood", but additional cell type - tissue combinations will be added in the future. See Jiang and Zhang et al. for additional details. + The \code{meanPlot} should be used to check for large batch effects in the data, reducing the confidence placed in the composition estimates. This plot depicts the average DNA methylation across the cell-type discrimating probes From bbff99bee64b42a6326e3c92d11d9f7f2755ddc4 Mon Sep 17 00:00:00 2001 From: sandrews5 Date: Sun, 17 Nov 2019 12:57:53 -0800 Subject: [PATCH 7/8] Add new bayesMethod logical argument --- R/estimateCellCounts.R | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/R/estimateCellCounts.R b/R/estimateCellCounts.R index 5bb0e8b..610cd81 100644 --- a/R/estimateCellCounts.R +++ b/R/estimateCellCounts.R @@ -313,6 +313,7 @@ estimateCellCounts <- function(rgSet, compositeCellType = "Blood", "IlluminaHumanMethylation450k", "IlluminaHumanMethylationEPIC", "IlluminaHumanMethylation27k"), + bayesMethod = TRUE, returnAll = FALSE, meanPlot = FALSE, verbose = TRUE, ...) { @@ -352,27 +353,31 @@ estimateCellCounts <- function(rgSet, compositeCellType = "Blood", "reference data ") } bayesEst <- FALSE - bayesParams <- data.frame(compositeCellType = c("CordBlood"), inferCellType = c("Eos"), inferReference = c("Blood"), minDiff = c(-0.85), maxDiff = c(0.26)) #Can be modified when additional priors are estimated. + bayesParams <- data.frame(compositeCellType = c("CordBlood"), inferCellType = c("Eos"), inferReference = c("Blood"), minDiff = c(0.028), maxDiff = c(0.168)) #Can be modified when additional priors are estimated. if (!all(cellTypes %in% referenceRGset$CellType)) { - inferCellType <- cellTypes[!cellTypes%in%referenceRGset$CellType] - validrow <- which(bayesParams$inferCellType %in% inferCellType) - if (length(validrow) == 1){ - if(bayesParams$compositeCellType[validrow] %in% compositeCellType){ - bayesEst <- TRUE - bayesParams <- bayesParams[validrow,] - validrow <- match(compositeCellType, bayesParams$compositeCellType) - - inferCellType <- as.character(with(bayesParams, inferCellType[validrow])) - cellTypes <- cellTypes[!cellTypes %in% inferCellType] - inferReference <- as.character(with(bayesParams, inferReference[validrow])) - minDiff <- with(bayesParams, minDiff[validrow]) - maxDiff <- with(bayesParams, maxDiff[validrow]) + if(bayesMethod == TRUE){ + inferCellType <- cellTypes[!cellTypes%in%referenceRGset$CellType] + validrow <- which(bayesParams$inferCellType %in% inferCellType) + if (length(validrow) == 1){ + if(bayesParams$compositeCellType[validrow] %in% compositeCellType){ + bayesEst <- TRUE + bayesParams <- bayesParams[validrow,] + validrow <- match(compositeCellType, bayesParams$compositeCellType) + + inferCellType <- as.character(with(bayesParams, inferCellType[validrow])) + cellTypes <- cellTypes[!cellTypes %in% inferCellType] + inferReference <- as.character(with(bayesParams, inferReference[validrow])) + minDiff <- with(bayesParams, minDiff[validrow]) + maxDiff <- with(bayesParams, maxDiff[validrow]) + } + } else{ + stop("If element of argument 'cellTypes' is not part of reference phenoData columns and bayesMethod = TRUE, must specify cell type/tissue combination with known priors for Bayesian measurement error inference function. See documentation for additional details.") } } else{ stop(paste0("all elements of argument 'cellTypes' need to be part of the reference phenoData columns 'CellType' (containg the following elements: ", - paste(unique(referenceRGset$CellType), collapse = "', '"),") or have known priors to supply to Bayesian measurement error inference function.")) + paste(unique(referenceRGset$CellType), collapse = "', '"),") or set bayesMethod = TRUE.")) } - } + } if (length(unique(cellTypes)) < 2) { stop("At least 2 cell types must be provided.") } @@ -464,7 +469,7 @@ estimateCellCounts <- function(rgSet, compositeCellType = "Blood", inferCellType = inferCellType, minDiff = minDiff, maxDiff = maxDiff, verbose = verbose) } } - + if (meanPlot) { smeans <- compData$sampleMeans smeans <- smeans[order(names(smeans))] From 79d57e5508bc4bb187e1f78da163af47f5595bec Mon Sep 17 00:00:00 2001 From: sandrews5 Date: Tue, 30 Mar 2021 18:23:13 -0700 Subject: [PATCH 8/8] Update local copy --- DESCRIPTION | 2 +- R/qc.R | 2 +- R/read.geo.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 89ae720..bf6e80e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: minfi -Version: 1.29.5 +Version: 1.33.1 Title: Analyze Illumina Infinium DNA methylation arrays Description: Tools to analyze & visualize Illumina Infinium methylation arrays. Authors@R: c(person(c("Kasper", "Daniel"), "Hansen", role = c("cre", "aut"), diff --git a/R/qc.R b/R/qc.R index 6c6954c..c12db14 100644 --- a/R/qc.R +++ b/R/qc.R @@ -68,7 +68,7 @@ controlStripPlot <- function(rgSet, ctl <- rbind( cbind(channel = "Red", ctlR), cbind(channel = "Green", ctlG)) - if (any((ctl$value < xlim[1]) || (ctl$value > xlim[2]))) { + if (any((ctl$value < xlim[1]) | (ctl$value > xlim[2]))) { message("Warning: ", controlType, " probes outside plot range") } fig <- xyplot( diff --git a/R/read.geo.R b/R/read.geo.R index 4774ea4..aee3607 100644 --- a/R/read.geo.R +++ b/R/read.geo.R @@ -60,7 +60,7 @@ makeGenomicRatioSetFromMatrix <- function(mat,rownames = NULL, pData = NULL, if (is.null(pData)) { pData <- DataFrame(X1 = seq_len(ncol(mat)), row.names = colnames(mat)) } - if (class(pData) != "DataFrame") { + if (!is(pData, "DataFrame")) { stop(sprintf( "'pData' must be DataFrame or data.frame. It is a %s.", class(pData)))