huishenlab · jamespeapen · Feb 25, 2026 · Jan 13, 2026 · Feb 25, 2026
diff --git a/R/RMT.R b/R/RMT.R
@@ -1,133 +1,146 @@
 #' Denoising of Covariance matrix using Random Matrix Theory
-#' 
+#'
 #' @name estRMT
-#' 
+#'
 #' @details
 #' This method takes in data as a matrix object. It then
 #' fits a marchenko pastur density to eigenvalues of the correlation matrix. All
 #' eigenvalues above the cutoff are retained and ones below the cutoff are
 #' replaced such that the trace of the correlation matrix is 1 or non-significant
-#' eigenvalues are deleted and diagonal of correlation matrix is changed to 1. 
+#' eigenvalues are deleted and diagonal of correlation matrix is changed to 1.
 #' Finally, correlation matrix is converted to covariance matrix. This function
 #' was taken and modified from the covmat package (https://github.com/cran/covmat)
 #' which has since been deprecated on CRAN.
 #'
-#' @param  R input matrix
-#' @param  Q ratio of rows/size. Can be supplied externally or fit using data
-#' @param  cutoff takes two values max/each. If cutoff is max, Q is fitted and 
+#' @param  mat input matrix
+#' @param  dim_ratio ratio of rows/size. Can be supplied externally or fit using data
+#' @param  cutoff takes two values max/each. If cutoff is max, Q is fitted and
 #'          cutoff for eigenvalues is calculated. If cutoff is each, Q is set to
 #'          row/size. Individual cutoff for each eigenvalue is calculated and used
-#'          for filteration. 
-#' @param eigenTreat takes 2 values, average/delete. If average then the noisy 
+#'          for filteration.
+#' @param eigenTreat takes 2 values, average/delete. If average then the noisy
 #'        eigenvalues are averged and each value is replaced by average. If delete
-#'        then noisy eigenvalues are ignored and the diagonal entries of the 
+#'        then noisy eigenvalues are ignored and the diagonal entries of the
 #'        correlation matrix are replaced with 1 to make the matrix psd.
 #' @param numEig number of eigenvalues that are known for variance calculation.
 #'        Default is set to 1. If numEig = 0 then variance is assumed to be 1.
 #'
 #' @importFrom stats optim cov
 #'
 #' @return A denoised RMT object
-#' 
-#' @examples 
+#'
+#' @examples
 #' rand_cor_mat <- cor(matrix(rnorm(100), nrow = 10))
 #' denoised_rand_cor_mat <- estRMT(rand_cor_mat)$cov
-#'        
+#'
 #' @author Rohit Arora
-#' 
+#'
 #' @export
-#' 
-estRMT <- function(R, Q = NA, cutoff = c("max", "each"), 
-                   eigenTreat = c("average", "delete") , numEig = 1) {  
-
-  .data <- as.matrix(R)
-  T <- nrow(.data)
-  M <- ncol(.data) 
-  if (T < M) stop("Does not work when nrow < ncol")
-
-  if(!is.na(Q)) if(Q < 1) stop("Does not work for Q<1")
-  cutoff <- cutoff[1]; if(!cutoff %in% c("max", "each")) stop("Invalid cutoff")
-  if(cutoff == "each") Q <- T/M
-
-  eigenTreat <- eigenTreat[1]; 
-  if(!eigenTreat %in% c("average", "delete")) stop("Invalid eigenTreat option")
-
-  if (numEig < 0) stop("Number of eigenvalues must be non-negative")
-
-  #eigenvalues can be negative. To avoid this e need a positive-definite matrix 
-  S <- cov(.data); S <- as.matrix(Matrix::nearPD(S)$mat)
-  D <- diag(diag(S)); C <- Matrix::cov2cor(S);
-
+estRMT <- function(mat, dim_ratio = NA, cutoff = c("max", "each"), eigenTreat = c("average", "delete"), numEig = 1) {
+  .data <- as.matrix(mat)
+  n_row <- nrow(.data)
+  n_col <- ncol(.data)
+  cutoff <- match.arg(cutoff)
+  eigenTreat <- match.arg(eigenTreat)
+
+  if (cutoff == "each") {
+    dim_ratio <- n_row / n_col
+  }
+
+  stopifnot("Does not work when nrow < ncol" = n_row >= n_col)
+  if (!is.na(dim_ratio)) {
+    stopifnot("Does not work for dim_ration < 1" = dim_ratio >= 1)
+  }
+
+  stopifnot("Number of eigenvalues must be non-negative" = numEig > 0)
+
+  #eigenvalues can be negative. To avoid this e need a positive-definite matrix
+  S <- cov(.data)
+  S <- as.matrix(Matrix::nearPD(S)$mat)
+  D <- diag(diag(S))
+  C <- Matrix::cov2cor(S)
+
   # Marchenko Pastur density is defined for eigenvalues of correlation matrix
-  eigen.C <- eigen(C,symmetric=T)
-  lambdas <- eigen.C$values; sigma.sq <- mean(lambdas)
-
-  sigma.sq <- 1 - sum(head(lambdas,numEig))/M
-
-  #minimize log-likelihood. 
+  eigen.C <- eigen(C, symmetric = n_row)
+  lambdas <- eigen.C$values
+  sigma.sq <- mean(lambdas)
+
+  sigma.sq <- 1 - sum(head(lambdas, numEig)) / n_col
+
+  #minimize log-likelihood.
   loglik.marpas <- function(theta, sigma.sq) {
-    Q <- theta
-    val <- sapply(lambdas, function(x) RMTstat::dmp(x,svr = Q, var=sigma.sq))
+    dim_ratio <- theta
+    val <- sapply(lambdas, function(x) RMTstat::dmp(x, svr = dim_ratio, var = sigma.sq))
     val <- val[val > 0]
     ifelse(is.infinite(-sum(log(val))), .Machine$double.xmax, -sum(log(val)))
   }
-  
-  if( is.na(Q) && cutoff != "each") {
+
+  if (is.na(dim_ratio) && cutoff != "each") {
     lb <- 1
-    ub <- max(T/M,5)
+    ub <- max(n_row / n_col, 5)
     starts <- seq(lb, ub, length.out = 50)
     # this would be a logical place to use BiocParallel::bpapply
-    fit.marpas <- do.call(rbind, 
-                          lapply(starts,
-                                 function(start) { 
-                                   optim(par=start, 
-                                         fn=loglik.marpas, 
-                                         method="L-BFGS-B", 
-                                         lower=lb, 
-                                         upper=ub,
-                                         sigma.sq=sigma.sq)
-                                 }))
-    idx <- grep("CONVERGENCE",unlist(fit.marpas[,"message"]))
-    vals <- fit.marpas[idx,c("par","value")] # wtf is going on here
-    Q <- unlist(vals[which.min(vals[,"value"]),"par"])    
+    fit.marpas <- do.call(
+      rbind,
+      lapply(
+        starts,
+        function(start) {
+          optim(par = start, fn = loglik.marpas, method = "L-BFGS-B", lower = lb, upper = ub, sigma.sq = sigma.sq)
+        }
+      )
+    )
+    idx <- grep("CONVERGENCE", unlist(fit.marpas[, "message"]))
+    vals <- fit.marpas[idx, c("par", "value")] # wtf is going on here
+    dim_ratio <- unlist(vals[which.min(vals[, "value"]), "par"])
   }
-  
-  lambda.max <- RMTstat::qmp(1, svr=Q, var = sigma.sq)  
+
+  lambda.max <- RMTstat::qmp(1, svr = dim_ratio, var = sigma.sq)
   # now that we have a fit. lets denoise eigenvalues below the cutoff
-  
-  if(cutoff == "max")  {
+
+  if (cutoff == "max") {
     idx <- which(lambdas > lambda.max)
-  } else if(cutoff == "each") {
+  } else if (cutoff == "each") {
     cutoff.each <- sapply(2:length(lambdas), function(i) {
-      eigr <- lambdas[i:M]
-      mean(eigr)*(1 + (M - i + 1)/T + 2*sqrt((M - i + 1)/T))
+      eigr <- lambdas[i:n_col]
+      mean(eigr) * (1 + (n_col - i + 1) / n_row + 2 * sqrt((n_col - i + 1) / n_row))
     })
-    idx <- c(1, 1 + which(lambdas[-1] > cutoff.each))    
+    idx <- c(1, 1 + which(lambdas[-1] > cutoff.each))
+  }
+
+  if (length(idx) == 0) {
+    return(S)
+  }
+
+  val <- eigen.C$values[idx]
+  vec <- eigen.C$vectors[, idx, drop = FALSE]
+  sum <- 0
+  for (i in seq_len(ncol(vec))) {
+    sum <- sum + val[i] * vec[, i] %*% t(vec[, i])
   }
-
-  if (length(idx) == 0) return(S)
-
-  val <- eigen.C$values[idx]; vec <- eigen.C$vectors[,idx,drop=FALSE]
-  sum <- 0; for (i in 1:ncol(vec)) sum <- sum + val[i]*vec[,i] %*% t(vec[,i])
-
-  # trace of correlation matrix is 1. Use this to determine all the remaining
-  # eigenvalues
-
+
+  # trace of correlation matrix is 1. Use this to determine all the remaining eigenvalues
+
   lambdas.cleaned <- c()
   clean.C <- if (eigenTreat == "average") {
-    lambdas.cleaned <- c(val, rep(1,M))
-    sum + sum(eigen.C$values[-idx])/M * diag(rep(1,M))
+    lambdas.cleaned <- c(val, rep(1, n_col))
+    sum + sum(eigen.C$values[-idx]) / n_col * diag(rep(1, n_col))
   } else if (eigenTreat == "delete") {
-    lambdas.cleaned <- c(val, rep(0,M))
+    lambdas.cleaned <- c(val, rep(0, n_col))
     diag(sum) <- 1
     sum
   }
-  
+
   # convert correlation to covariance matrix and return
   clean.S <- D^0.5 %*% clean.C %*% D^0.5
-  fit <- list(cov = clean.S, Q = Q, var = sigma.sq, eigVals = lambdas, 
-              eigVals.cleaned = lambdas.cleaned, lambdascutoff = lambda.max)
-
+  fit <- list(
+    cov = clean.S,
+    dim_ratio = dim_ratio,
+    var = sigma.sq,
+    eigVals = lambdas,
+    eigVals.cleaned = lambdas.cleaned,
+    lambdascutoff = lambda.max
+  )
+
   class(fit) <- "RMT"
   fit
 }
@@ -146,7 +159,7 @@ estRMT <- function(R, Q = NA, cutoff = c("max", "each"),
 #' @param assay What assay type this is ("rna", "atac")
 #'
 #' @return A denoised correlation matrix object for plotting with plotCorMatrix
-#' 
+#'
 #' @import SummarizedExperiment
 #' @import GenomicRanges
 #'
@@ -156,10 +169,16 @@ estRMT <- function(R, Q = NA, cutoff = c("max", "each"),
 #' data("k562_scrna_chr14", package = "compartmap")
 #' denoised_cor_mat <- getDenoisedCorMatrix(k562_scrna_chr14, genome = "hg19", assay = "rna")
 
-getDenoisedCorMatrix <- function(obj, res = 1e6, chr = "chr14",
-                                 genome = c("hg19", "hg38", "mm9", "mm10"),
-                                 iter = 2, targets = NULL, prior.means = NULL,
-                                 assay = c("rna", "atac", "array")) {
+getDenoisedCorMatrix <- function(
+  obj,
+  res = 1e6,
+  chr = "chr14",
+  genome = c("hg19", "hg38", "mm9", "mm10"),
+  iter = 2,
+  targets = NULL,
+  prior.means = NULL,
+  assay = c("rna", "atac", "array")
+) {
   ## this is a wrapper to give back a denoised correlation matrix to plot
   #match the assay args
   assay <- match.arg(assay)
@@ -172,20 +191,27 @@ getDenoisedCorMatrix <- function(obj, res = 1e6, chr = "chr14",
 
   #subset to selected chromosome(s)
   obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse")
-  
+
   #get the prior means
   if (is.null(prior.means)) {
-    prior.means <- getGlobalMeans(obj=obj, targets=targets,
-                                  assay=assay)
+    prior.means <- getGlobalMeans(obj = obj, targets = targets, assay = assay)
   } else {
     message("Assuming the prior means passed were derived from the full sample set.")
   }
-  
+
   #shrink bins
   message("Shrinking bins with the JSE.")
-  bin.mat <- shrinkBins(x = obj, original.x = obj, prior.means = prior.means,
-                        chr = chr, res = res, targets = targets,
-                        jse = TRUE, assay = assay, genome = genome)
+  bin.mat <- shrinkBins(
+    x = obj,
+    original.x = obj,
+    prior.means = prior.means,
+    chr = chr,
+    res = res,
+    targets = targets,
+    jse = TRUE,
+    assay = assay,
+    genome = genome
+  )
   #get the raw correlation matrix
   cor.mat <- getCorMatrix(binmat = bin.mat, squeeze = FALSE)
   #denoise with RMT
@@ -198,13 +224,12 @@ getDenoisedCorMatrix <- function(obj, res = 1e6, chr = "chr14",
       cor.mat.denoise <- compartmap::estRMT(cor.mat.denoise)$cov
     }
   }
-  
+
   #rescale
-  cor.mat.denoise <- scales::rescale(cor.mat.denoise,
-                                     to = c(0,1))
+  cor.mat.denoise <- scales::rescale(cor.mat.denoise, to = c(0, 1))
   #tidy up
   colnames(cor.mat.denoise) <- rownames(cor.mat.denoise) <- as.character(granges(cor.mat$gr.cor))
-  
+
   return(cor.mat.denoise)
 }
 
@@ -219,22 +244,24 @@ getDenoisedCorMatrix <- function(obj, res = 1e6, chr = "chr14",
 #' @param lowertri Whether to keep the lower triangle of the matrix
 #'
 #' @return Either a ggplot object or plot
-#' 
+#'
 #' @importFrom rlang .data
 #' @importFrom ggplot2 ggplot aes geom_raster scale_fill_gradient2 theme_minimal theme element_blank
-#' 
+#'
 #' @export
 #'
 #' @examples
 #' dummy <- matrix(rnorm(10000), ncol=25)
 #' set.seed(1000)
 #' my_plot <- plotCorMatrix(dummy, return.plot.obj = TRUE)
 
-plotCorMatrix <- function(denoised.cor.mat,
-                          midpoint = 0.3,
-                          return.plot.obj = FALSE,
-                          uppertri = FALSE,
-                          lowertri = FALSE) {
+plotCorMatrix <- function(
+  denoised.cor.mat,
+  midpoint = 0.3,
+  return.plot.obj = FALSE,
+  uppertri = FALSE,
+  lowertri = FALSE
+) {
   ## upper tri
   if (uppertri) {
     denoised.cor.mat[upper.tri(denoised.cor.mat)] <- 0
@@ -248,16 +275,15 @@ plotCorMatrix <- function(denoised.cor.mat,
   ## plot
   p <- ggplot(cor.mat.melt, aes(x = .data$Var2, y = .data$Var1, fill = .data$value)) +
     geom_raster() +
-    scale_fill_gradient2(low = "white", mid = "white", high = "red3", midpoint = {{ midpoint }},
-                         name = "Correlation") +
+    scale_fill_gradient2(low = "white", mid = "white", high = "red3", midpoint = {{ midpoint }}, name = "Correlation") +
     theme_minimal() +
     theme(
       axis.title = element_blank(),
       axis.text = element_blank(),
       panel.grid = element_blank(),
-      plot.margin= grid::unit(c(0, 0, 0, 0), "in")
-      )
-  
+      plot.margin = grid::unit(c(0, 0, 0, 0), "in")
+    )
+
   if (return.plot.obj) {
     return(p)
   } else {

diff --git a/man/estRMT.Rd b/man/estRMT.Rd