Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 137 additions & 111 deletions R/RMT.R
Original file line number Diff line number Diff line change
@@ -1,133 +1,146 @@
#' Denoising of Covariance matrix using Random Matrix Theory
#'
#'
#' @name estRMT
#'
#'
#' @details
#' This method takes in data as a matrix object. It then
#' fits a marchenko pastur density to eigenvalues of the correlation matrix. All
#' eigenvalues above the cutoff are retained and ones below the cutoff are
#' replaced such that the trace of the correlation matrix is 1 or non-significant
#' eigenvalues are deleted and diagonal of correlation matrix is changed to 1.
#' eigenvalues are deleted and diagonal of correlation matrix is changed to 1.
#' Finally, correlation matrix is converted to covariance matrix. This function
#' was taken and modified from the covmat package (https://github.com/cran/covmat)
#' which has since been deprecated on CRAN.
#'
#' @param R input matrix
#' @param Q ratio of rows/size. Can be supplied externally or fit using data
#' @param cutoff takes two values max/each. If cutoff is max, Q is fitted and
#' @param mat input matrix
#' @param dim_ratio ratio of rows/size. Can be supplied externally or fit using data
#' @param cutoff takes two values max/each. If cutoff is max, Q is fitted and
#' cutoff for eigenvalues is calculated. If cutoff is each, Q is set to
#' row/size. Individual cutoff for each eigenvalue is calculated and used
#' for filteration.
#' @param eigenTreat takes 2 values, average/delete. If average then the noisy
#' for filteration.
#' @param eigenTreat takes 2 values, average/delete. If average then the noisy
#' eigenvalues are averged and each value is replaced by average. If delete
#' then noisy eigenvalues are ignored and the diagonal entries of the
#' then noisy eigenvalues are ignored and the diagonal entries of the
#' correlation matrix are replaced with 1 to make the matrix psd.
#' @param numEig number of eigenvalues that are known for variance calculation.
#' Default is set to 1. If numEig = 0 then variance is assumed to be 1.
#'
#' @importFrom stats optim cov
#'
#' @return A denoised RMT object
#'
#' @examples
#'
#' @examples
#' rand_cor_mat <- cor(matrix(rnorm(100), nrow = 10))
#' denoised_rand_cor_mat <- estRMT(rand_cor_mat)$cov
#'
#'
#' @author Rohit Arora
#'
#'
#' @export
#'
estRMT <- function(R, Q = NA, cutoff = c("max", "each"),
eigenTreat = c("average", "delete") , numEig = 1) {

.data <- as.matrix(R)
T <- nrow(.data)
M <- ncol(.data)
if (T < M) stop("Does not work when nrow < ncol")

if(!is.na(Q)) if(Q < 1) stop("Does not work for Q<1")
cutoff <- cutoff[1]; if(!cutoff %in% c("max", "each")) stop("Invalid cutoff")
if(cutoff == "each") Q <- T/M

eigenTreat <- eigenTreat[1];
if(!eigenTreat %in% c("average", "delete")) stop("Invalid eigenTreat option")

if (numEig < 0) stop("Number of eigenvalues must be non-negative")

#eigenvalues can be negative. To avoid this e need a positive-definite matrix
S <- cov(.data); S <- as.matrix(Matrix::nearPD(S)$mat)
D <- diag(diag(S)); C <- Matrix::cov2cor(S);

estRMT <- function(mat, dim_ratio = NA, cutoff = c("max", "each"), eigenTreat = c("average", "delete"), numEig = 1) {
.data <- as.matrix(mat)
n_row <- nrow(.data)
n_col <- ncol(.data)
cutoff <- match.arg(cutoff)
eigenTreat <- match.arg(eigenTreat)

if (cutoff == "each") {
dim_ratio <- n_row / n_col
}

stopifnot("Does not work when nrow < ncol" = n_row >= n_col)
if (!is.na(dim_ratio)) {
stopifnot("Does not work for dim_ration < 1" = dim_ratio >= 1)
}

stopifnot("Number of eigenvalues must be non-negative" = numEig > 0)

#eigenvalues can be negative. To avoid this e need a positive-definite matrix
S <- cov(.data)
S <- as.matrix(Matrix::nearPD(S)$mat)
D <- diag(diag(S))
C <- Matrix::cov2cor(S)

# Marchenko Pastur density is defined for eigenvalues of correlation matrix
eigen.C <- eigen(C,symmetric=T)
lambdas <- eigen.C$values; sigma.sq <- mean(lambdas)

sigma.sq <- 1 - sum(head(lambdas,numEig))/M

#minimize log-likelihood.
eigen.C <- eigen(C, symmetric = n_row)
lambdas <- eigen.C$values
sigma.sq <- mean(lambdas)

sigma.sq <- 1 - sum(head(lambdas, numEig)) / n_col

#minimize log-likelihood.
loglik.marpas <- function(theta, sigma.sq) {
Q <- theta
val <- sapply(lambdas, function(x) RMTstat::dmp(x,svr = Q, var=sigma.sq))
dim_ratio <- theta
val <- sapply(lambdas, function(x) RMTstat::dmp(x, svr = dim_ratio, var = sigma.sq))
val <- val[val > 0]
ifelse(is.infinite(-sum(log(val))), .Machine$double.xmax, -sum(log(val)))
}
if( is.na(Q) && cutoff != "each") {

if (is.na(dim_ratio) && cutoff != "each") {
lb <- 1
ub <- max(T/M,5)
ub <- max(n_row / n_col, 5)
starts <- seq(lb, ub, length.out = 50)
# this would be a logical place to use BiocParallel::bpapply
fit.marpas <- do.call(rbind,
lapply(starts,
function(start) {
optim(par=start,
fn=loglik.marpas,
method="L-BFGS-B",
lower=lb,
upper=ub,
sigma.sq=sigma.sq)
}))
idx <- grep("CONVERGENCE",unlist(fit.marpas[,"message"]))
vals <- fit.marpas[idx,c("par","value")] # wtf is going on here
Q <- unlist(vals[which.min(vals[,"value"]),"par"])
fit.marpas <- do.call(
rbind,
lapply(
starts,
function(start) {
optim(par = start, fn = loglik.marpas, method = "L-BFGS-B", lower = lb, upper = ub, sigma.sq = sigma.sq)
}
)
)
idx <- grep("CONVERGENCE", unlist(fit.marpas[, "message"]))
vals <- fit.marpas[idx, c("par", "value")] # wtf is going on here
dim_ratio <- unlist(vals[which.min(vals[, "value"]), "par"])
}
lambda.max <- RMTstat::qmp(1, svr=Q, var = sigma.sq)

lambda.max <- RMTstat::qmp(1, svr = dim_ratio, var = sigma.sq)
# now that we have a fit. lets denoise eigenvalues below the cutoff
if(cutoff == "max") {

if (cutoff == "max") {
idx <- which(lambdas > lambda.max)
} else if(cutoff == "each") {
} else if (cutoff == "each") {
cutoff.each <- sapply(2:length(lambdas), function(i) {
eigr <- lambdas[i:M]
mean(eigr)*(1 + (M - i + 1)/T + 2*sqrt((M - i + 1)/T))
eigr <- lambdas[i:n_col]
mean(eigr) * (1 + (n_col - i + 1) / n_row + 2 * sqrt((n_col - i + 1) / n_row))
})
idx <- c(1, 1 + which(lambdas[-1] > cutoff.each))
idx <- c(1, 1 + which(lambdas[-1] > cutoff.each))
}

if (length(idx) == 0) {
return(S)
}

val <- eigen.C$values[idx]
vec <- eigen.C$vectors[, idx, drop = FALSE]
sum <- 0
for (i in seq_len(ncol(vec))) {
sum <- sum + val[i] * vec[, i] %*% t(vec[, i])
}

if (length(idx) == 0) return(S)

val <- eigen.C$values[idx]; vec <- eigen.C$vectors[,idx,drop=FALSE]
sum <- 0; for (i in 1:ncol(vec)) sum <- sum + val[i]*vec[,i] %*% t(vec[,i])

# trace of correlation matrix is 1. Use this to determine all the remaining
# eigenvalues


# trace of correlation matrix is 1. Use this to determine all the remaining eigenvalues

lambdas.cleaned <- c()
clean.C <- if (eigenTreat == "average") {
lambdas.cleaned <- c(val, rep(1,M))
sum + sum(eigen.C$values[-idx])/M * diag(rep(1,M))
lambdas.cleaned <- c(val, rep(1, n_col))
sum + sum(eigen.C$values[-idx]) / n_col * diag(rep(1, n_col))
} else if (eigenTreat == "delete") {
lambdas.cleaned <- c(val, rep(0,M))
lambdas.cleaned <- c(val, rep(0, n_col))
diag(sum) <- 1
sum
}

# convert correlation to covariance matrix and return
clean.S <- D^0.5 %*% clean.C %*% D^0.5
fit <- list(cov = clean.S, Q = Q, var = sigma.sq, eigVals = lambdas,
eigVals.cleaned = lambdas.cleaned, lambdascutoff = lambda.max)

fit <- list(
cov = clean.S,
dim_ratio = dim_ratio,
var = sigma.sq,
eigVals = lambdas,
eigVals.cleaned = lambdas.cleaned,
lambdascutoff = lambda.max
)

class(fit) <- "RMT"
fit
}
Expand All @@ -146,7 +159,7 @@ estRMT <- function(R, Q = NA, cutoff = c("max", "each"),
#' @param assay What assay type this is ("rna", "atac")
#'
#' @return A denoised correlation matrix object for plotting with plotCorMatrix
#'
#'
#' @import SummarizedExperiment
#' @import GenomicRanges
#'
Expand All @@ -156,10 +169,16 @@ estRMT <- function(R, Q = NA, cutoff = c("max", "each"),
#' data("k562_scrna_chr14", package = "compartmap")
#' denoised_cor_mat <- getDenoisedCorMatrix(k562_scrna_chr14, genome = "hg19", assay = "rna")

getDenoisedCorMatrix <- function(obj, res = 1e6, chr = "chr14",
genome = c("hg19", "hg38", "mm9", "mm10"),
iter = 2, targets = NULL, prior.means = NULL,
assay = c("rna", "atac", "array")) {
getDenoisedCorMatrix <- function(
obj,
res = 1e6,
chr = "chr14",
genome = c("hg19", "hg38", "mm9", "mm10"),
iter = 2,
targets = NULL,
prior.means = NULL,
assay = c("rna", "atac", "array")
) {
## this is a wrapper to give back a denoised correlation matrix to plot
#match the assay args
assay <- match.arg(assay)
Expand All @@ -172,20 +191,27 @@ getDenoisedCorMatrix <- function(obj, res = 1e6, chr = "chr14",

#subset to selected chromosome(s)
obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse")

#get the prior means
if (is.null(prior.means)) {
prior.means <- getGlobalMeans(obj=obj, targets=targets,
assay=assay)
prior.means <- getGlobalMeans(obj = obj, targets = targets, assay = assay)
} else {
message("Assuming the prior means passed were derived from the full sample set.")
}

#shrink bins
message("Shrinking bins with the JSE.")
bin.mat <- shrinkBins(x = obj, original.x = obj, prior.means = prior.means,
chr = chr, res = res, targets = targets,
jse = TRUE, assay = assay, genome = genome)
bin.mat <- shrinkBins(
x = obj,
original.x = obj,
prior.means = prior.means,
chr = chr,
res = res,
targets = targets,
jse = TRUE,
assay = assay,
genome = genome
)
#get the raw correlation matrix
cor.mat <- getCorMatrix(binmat = bin.mat, squeeze = FALSE)
#denoise with RMT
Expand All @@ -198,13 +224,12 @@ getDenoisedCorMatrix <- function(obj, res = 1e6, chr = "chr14",
cor.mat.denoise <- compartmap::estRMT(cor.mat.denoise)$cov
}
}

#rescale
cor.mat.denoise <- scales::rescale(cor.mat.denoise,
to = c(0,1))
cor.mat.denoise <- scales::rescale(cor.mat.denoise, to = c(0, 1))
#tidy up
colnames(cor.mat.denoise) <- rownames(cor.mat.denoise) <- as.character(granges(cor.mat$gr.cor))

return(cor.mat.denoise)
}

Expand All @@ -219,22 +244,24 @@ getDenoisedCorMatrix <- function(obj, res = 1e6, chr = "chr14",
#' @param lowertri Whether to keep the lower triangle of the matrix
#'
#' @return Either a ggplot object or plot
#'
#'
#' @importFrom rlang .data
#' @importFrom ggplot2 ggplot aes geom_raster scale_fill_gradient2 theme_minimal theme element_blank
#'
#'
#' @export
#'
#' @examples
#' dummy <- matrix(rnorm(10000), ncol=25)
#' set.seed(1000)
#' my_plot <- plotCorMatrix(dummy, return.plot.obj = TRUE)

plotCorMatrix <- function(denoised.cor.mat,
midpoint = 0.3,
return.plot.obj = FALSE,
uppertri = FALSE,
lowertri = FALSE) {
plotCorMatrix <- function(
denoised.cor.mat,
midpoint = 0.3,
return.plot.obj = FALSE,
uppertri = FALSE,
lowertri = FALSE
) {
## upper tri
if (uppertri) {
denoised.cor.mat[upper.tri(denoised.cor.mat)] <- 0
Expand All @@ -248,16 +275,15 @@ plotCorMatrix <- function(denoised.cor.mat,
## plot
p <- ggplot(cor.mat.melt, aes(x = .data$Var2, y = .data$Var1, fill = .data$value)) +
geom_raster() +
scale_fill_gradient2(low = "white", mid = "white", high = "red3", midpoint = {{ midpoint }},
name = "Correlation") +
scale_fill_gradient2(low = "white", mid = "white", high = "red3", midpoint = {{ midpoint }}, name = "Correlation") +
theme_minimal() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank(),
plot.margin= grid::unit(c(0, 0, 0, 0), "in")
)
plot.margin = grid::unit(c(0, 0, 0, 0), "in")
)

if (return.plot.obj) {
return(p)
} else {
Expand Down
10 changes: 5 additions & 5 deletions man/estRMT.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading