|
11 | 11 | #' |
12 | 12 | #' |
13 | 13 | #' @param exprData the expression data matrix |
14 | | -#' @param cellsPerPartition control over the minimum number of cells to put into each supercell |
| 14 | +#' @param cellsPerPartition control over the target number of cells to put into each supercell |
15 | 15 | #' @param filterInput name of filtering method ('threshold' or 'fano') or list of |
16 | 16 | #' genes to use when computing projections. |
17 | | -#' @param filterThreshold Threshold to apply when using the 'threshold' projection genes filter. |
| 17 | +#' @param filterThreshold Threshold to apply when using the 'threshold' or 'fano' projection genes filter. |
18 | 18 | #' If greater than 1, this specifies the number of cells in which a gene must be detected |
19 | 19 | #' for it to be used when computing PCA. If less than 1, this instead specifies the proportion of cells needed |
20 | 20 | #' @param latentSpace (Optional) Latent space to be used instead of PCA numeric matrix cells x components |
|
23 | 23 | #' @return pooled cells - named list of vectors - cells in each supercell |
24 | 24 | #' @export |
25 | 25 | applyMicroClustering <- function( |
26 | | - exprData, cellsPerPartition=100, |
| 26 | + exprData, cellsPerPartition=10, |
27 | 27 | filterInput = "fano", |
28 | | - filterThreshold = round(ncol(exprData) * 0.2), |
| 28 | + filterThreshold = round(ncol(exprData) * 0.05), |
29 | 29 | latentSpace = NULL) { |
30 | 30 |
|
| 31 | + if (is.data.frame(exprData)){ |
| 32 | + exprData <- data.matrix(exprData) |
| 33 | + } |
| 34 | + |
31 | 35 | if (is.null(latentSpace) || all(dim(latentSpace) == c(1, 1))) { |
32 | 36 | exprData <- matLog2(exprData) |
33 | 37 |
|
34 | 38 |
|
35 | | - if (length(filterInput > 1)){ |
36 | | - gene_passes <- filterInput |
| 39 | + message(" Computing a latent space for microclustering using PCA...") |
| 40 | + if (length(filterInput) > 1){ |
| 41 | + gene_passes <- intersect(filterInput, rownames(exprData)) |
| 42 | + if (length(gene_passes) == 0){ |
| 43 | + stop("Supplied list of genes in `filterInput` does not match any rows of `exprData`") |
| 44 | + } else { |
| 45 | + message( |
| 46 | + sprintf(" Using supplied list of genes: Found %i/%i matches", length(gene_passes), length(filterInput)) |
| 47 | + ) |
| 48 | + } |
37 | 49 | } else { |
| 50 | + message(" Determining lateng space genes...") |
38 | 51 | gene_passes <- applyFilters(exprData, filterThreshold, filterInput) |
| 52 | + |
| 53 | + if (length(gene_passes) == 0){ |
| 54 | + stop( |
| 55 | + sprintf("Filtering with (filterInput=\"%s\", filterThreshold=%i) results in 0 genes\n Set a lower threshold and re-run", filterInput, filterThreshold) |
| 56 | + ) |
| 57 | + } |
39 | 58 | } |
40 | 59 |
|
41 | | - fexpr <- exprData[gene_passes, ] |
| 60 | + fexpr <- exprData[gene_passes, , drop = FALSE] |
42 | 61 |
|
43 | | - message(" Computing a latent space for microclustering using PCA...") |
44 | 62 | # Compute wcov using matrix operations to avoid |
45 | 63 | # creating a large dense matrix |
46 | 64 |
|
| 65 | + message(" Performing PCA...") |
47 | 66 | N <- ncol(fexpr) |
48 | 67 | wcov <- tcrossprod(fexpr) / N |
49 | 68 |
|
@@ -82,7 +101,7 @@ applyMicroClustering <- function( |
82 | 101 | pools <- readjust_clusters(cl, res, cellsPerPartition = cellsPerPartition) |
83 | 102 |
|
84 | 103 | # Rename clusters |
85 | | - cn <- paste0("microcluster ", 1:length(pools)) |
| 104 | + cn <- paste0("microcluster_", 1:length(pools)) |
86 | 105 | names(pools) <- cn |
87 | 106 |
|
88 | 107 | message( |
|
0 commit comments