From 68f64f597245a0c0068bbe7bb8af595b2f3c9370 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 24 Mar 2025 11:41:02 -0700 Subject: [PATCH 1/5] Initialize repo --- .Rbuildignore | 9 + .github/workflows/R-CMD-check.yaml | 40 +++ .gitignore | 4 + .idea/.gitignore | 8 + .idea/misc.xml | 6 + .idea/modules.xml | 8 + .idea/nimbleR.iml | 9 + .idea/vcs.xml | 6 + DESCRIPTION | 25 ++ NAMESPACE | 8 + R/NimbleAppend.R | 321 +++++++++++++++++++++++++ R/Normalization.R | 58 +++++ README.md | 28 ++- man/AppendNimbleCounts.Rd | 58 +++++ man/LogNormalizeUsingAlternateAssay.Rd | 28 +++ tests/testdata/12345_nimbleCounts.tsv | 17 ++ tests/testdata/nimbleTest.rds | Bin 0 -> 714 bytes tests/testthat.R | 4 + tests/testthat/test-nimbleappend.R | 112 +++++++++ tests/testthat/test-normalization.R | 18 ++ 20 files changed, 766 insertions(+), 1 deletion(-) create mode 100644 .Rbuildignore create mode 100644 .github/workflows/R-CMD-check.yaml create mode 100644 .idea/.gitignore create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/nimbleR.iml create mode 100644 .idea/vcs.xml create mode 100644 DESCRIPTION create mode 100644 NAMESPACE create mode 100644 R/NimbleAppend.R create mode 100644 R/Normalization.R create mode 100644 man/AppendNimbleCounts.Rd create mode 100644 man/LogNormalizeUsingAlternateAssay.Rd create mode 100644 tests/testdata/12345_nimbleCounts.tsv create mode 100644 tests/testdata/nimbleTest.rds create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-nimbleappend.R create mode 100644 tests/testthat/test-normalization.R diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..bc67620 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,9 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^\.git* +^\.idea* +^nimbleR.iml$ +^nimbleR.*\.tar\.gz$ +^nimbleR.*\.zip$ +^\.github$ +.dockerignore \ No newline at end of file diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..5d02de0 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,40 @@ +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + +name: R Build and Checks + +jobs: + R-CMD-check: + runs-on: ubuntu-${{ matrix.config.os }} + + name: ubuntu-${{ matrix.config.os }} (${{ matrix.config.r }} / ${{ matrix.config.bioc }}) + + strategy: + fail-fast: false + matrix: + config: + - { os: 24.04, r: '4.4', bioc: '3.20' } + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: bimberlabinternal/DevOps/githubActions/r-gh-setup@master + with: + r_version: ${{ matrix.config.r }} + bioc_version: ${{ matrix.config.bioc }} + cache_version: ${{ secrets.CACHE_VERSION }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + - uses: r-lib/actions/check-r-package@v2 + with: + args: 'c("--no-manual")' + env: + _R_CHECK_CRAN_INCOMING_: false diff --git a/.gitignore b/.gitignore index e75435c..57d7a7d 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,7 @@ po/*~ # RStudio Connect folder rsconnect/ + +*.tar.gz +*.zip +install.bat \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..07115cd --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..5315fbb --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/nimbleR.iml b/.idea/nimbleR.iml new file mode 100644 index 0000000..d6ebd48 --- /dev/null +++ b/.idea/nimbleR.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..88bd2b2 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,25 @@ +Package: nimbleR +Type: Package +Title: +Version: 1.0.0 +Author: Bimber Lab +Maintainer: The package maintainer +Description: +License: MIT +Encoding: UTF-8 +Depends: + R (>= 4.3.0) +Imports: + Seurat, + tidyr, + dplyr, + ggplot2 +Suggests: + RIRA, + devtools, + testthat (>= 2.1.0) +Remotes: + bimberlab/RIRA +RoxygenNote: 7.3.2 +URL: https://github.com/BimberLab/nimbleR +BugReports: https://github.com/BimberLab/nimbleR/issues diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..ec00965 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,8 @@ +# Generated by roxygen2: do not edit by hand + +export(AppendNimbleCounts) +export(LogNormalizeUsingAlternateAssay) +import(Seurat) +import(dplyr) +import(ggplot2) +importFrom(tidyr,pivot_wider) diff --git a/R/NimbleAppend.R b/R/NimbleAppend.R new file mode 100644 index 0000000..637d30f --- /dev/null +++ b/R/NimbleAppend.R @@ -0,0 +1,321 @@ +# @include Normalization.R + +#' @import Seurat +#' @importFrom tidyr pivot_wider + +#' @title AppendNimbleCounts +#' @description Reads a given seurat object and a nimble file, and appends the nimble data to the object. +#' +#' @param seuratObj A Seurat object. +#' @param nimbleFile A nimble file, which is a TSV of feature counts created by nimble +#' @param maxAmbiguityAllowed If provided, any features representing more than ths value will be discarded. For example, 'Feat1,Feat2,Feat3' represents 3 features. maxAmbiguityAllowed=1 results in removal of all ambiguous features. +#' @param targetAssayName The target assay. If this assay exists, features will be appended (and an error thrown if there are duplicates). Otherwise a new assay will be created. +#' @param renameConflictingFeatures If true, when appending to an existing assay, any conflicting feature names will be renamed, appending the value of duplicateFeatureSuffix +#' @param duplicateFeatureSuffix If renameConflictingFeatures is true, this string will be appended to duplicated feature names +#' @param normalizeData If true, data will be normalized after appending/creating the assay. This will default to CellMembrane::LogNormalizeUsingAlternateAssay; however, if assayForLibrarySize equals targetAssayName then Seurat::NormalizeData is used. +#' @param performDietSeurat If true, DietSeurat will be run, which removes existing reductions. This may or may not be required based on your usage, but the default is to perform this if the targetAssay exists. +#' @param assayForLibrarySize If normalizeData is true, then this is the assay used for librarySize when normalizing. If assayForLibrarySize equals targetAssayName, Seurat::NormalizeData is used. +#' @param maxLibrarySizeRatio If normalizeData is true, then this is passed to CellMembrane::LogNormalizeUsingAlternateAssay +#' @param doPlot If true, FeaturePlots will be generated for the appended features +#' @param maxFeaturesToPlot If doPlot is true, this is the maximum number of features to plot +#' @param replaceExistingAssayData If true, any existing data in the targetAssay will be deleted +#' @param featureRenameList An optional named list in the format = . If any are present, the will be renamed to . The intention of this is to recover specific ambiguous classes. +#' @return A modified Seurat object. +#' +#' @import dplyr +#' @export +AppendNimbleCounts <- function(seuratObj, nimbleFile, targetAssayName, maxAmbiguityAllowed = 0, renameConflictingFeatures = TRUE, duplicateFeatureSuffix = ".Nimble", normalizeData = TRUE, performDietSeurat = (targetAssayName %in% names(seuratObj@assays)), assayForLibrarySize = 'RNA', maxLibrarySizeRatio = 0.05, doPlot = FALSE, maxFeaturesToPlot = 40, replaceExistingAssayData = TRUE, featureRenameList = NULL, doPlot = FALSE) { + if (!file.exists(nimbleFile)) { + stop(paste0("Nimble file does not exist: ", nimbleFile)) + } + + # Read file and construct df + df <- NULL + tryCatch({ + df <- read.table(nimbleFile, sep="\t", header=FALSE) + }, error = function(e){ + if (conditionMessage(e) != 'no lines available in input') { + stop(e) + } else { + print(paste0('No lines in nimble file: ', nimbleFile)) + df <- data.frame(V1 = character(), V2 = character(), V3 = character()) + } + }) + + # Indicates no data in TSV + if (all(is.null(df))){ + return(seuratObj) + } + + if (sum(df$V1 == "") > 0) { + stop("The nimble data contains blank feature names. This should not occur.") + } + + if (sum(grepl(df$V1, pattern = "^,")) > 0) { + stop("The nimble data contains features with leading commas. This should not occur.") + } + + if (sum(grepl(df$V1, pattern = ",$")) > 0) { + stop("The nimble data contains features with trailing commas. This should not occur.") + } + + d <- as.integer(df$V2) + if (any(is.na(d))){ + stop(paste0('Non-integer count values found, were: ', paste0(head(df$V2[is.na(d)]), collapse = ','))) + } + + if (is.na(maxAmbiguityAllowed) || is.null(maxAmbiguityAllowed)){ + maxAmbiguityAllowed <- Inf + } else if (maxAmbiguityAllowed == 0) { + maxAmbiguityAllowed <- 1 + } + + # Ensure consistent sorting of ambiguous features, and re-group if needed: + if (any(grepl(df$V1, pattern = ','))) { + print('Ensuring consistent feature sort within ambiguous features:') + df$V1 <- unlist(sapply(df$V1, function(y){ + return(paste0(sort(unlist(strsplit(y, split = ','))), collapse = ',')) + })) + + df <- df %>% + group_by(V1, V3) %>% + summarize(V2 = sum(V2)) + + df <- df[c('V1', 'V2', 'V3')] + + paste0('Distinct features after re-grouping: ', length(unique(df$V1))) + } + + if (!all(is.null(featureRenameList))) { + print('Potentially renaming features:') + df$V1 <- as.character(df$V1) + totalRenamed <- 0 + for (featName in names(featureRenameList)) { + if (featName %in% df$V1) { + df$V1[df$V1 == featName] <- featureRenameList[[featName]] + totalRenamed <- totalRenamed + 1 + } + } + + print(paste0('Total features renamed: ', totalRenamed)) + } + + #Remove ambiguous features + totalHitsByRow <- sapply(df$V1, function(y){ + return(length(unlist(strsplit(y, split = ',')))) + }) + + ambigFeatRows <- totalHitsByRow > maxAmbiguityAllowed + if (sum(ambigFeatRows) > 0) { + print(paste0('Dropping ', sum(ambigFeatRows), ' rows with ambiguous features (>', maxAmbiguityAllowed, '), ', sum(ambigFeatRows),' of ', nrow(df))) + totalUMI <- sum(df$V2) + x <- df$V1[ambigFeatRows] + totalHitsByRow <- totalHitsByRow[ambigFeatRows] + x[totalHitsByRow > 3] <- 'ManyHits' + + x <- sort(table(x), decreasing = T) + x <- data.frame(Feature = names(x), Total = as.numeric(unname(x))) + + x$Fraction <- x$Total / totalUMI + x <- x[x$Fraction > 0.005,] + + if (nrow(x) > 0) { + x$Name <- substr(x$Feature, start = 1, stop = 40) + x$Name[x$Name != x$Feature] <- paste0(x$Name[x$Name != x$Feature], '..') + x$Total <- paste0(x$Total, ' (', scales::percent(x$Total / totalUMI, accuracy = 0.001), ')') + + print('Top ambiguous combinations:') + print(head(x[c('Name', 'Total')], n = 100)) + } + + df <- df[!ambigFeatRows, , drop = F] + paste0('Distinct features after pruning: ', length(unique(df$V1))) + } + + if (any(duplicated(df[c('V1','V3')]))) { + print(paste0('Duplicate cell/features found. Rows at start: ', nrow(df))) + df <- df %>% + group_by(V1, V3) %>% + summarize(V2 = sum(V2)) + + df <- df[c('V1', 'V2', 'V3')] + + print(paste0('After re-grouping: ', nrow(df))) + } + + tryCatch({ + # Group to ensure we have one value per combination: + d <- as.integer(df$V2) + if (any(is.na(d))){ + stop(paste0('Non-integer count values found, were: ', paste0(df$V2[is.na(d)], collapse = ','))) + } + rm(d) + + paste0('Distinct features: ', length(unique(df$V1))) + + df <- tidyr::pivot_wider(df, names_from=V3, values_from=V2, values_fill=0) + }, error = function(e){ + write.table(df, file = 'debug.nimble.txt.gz', sep = '\t', quote = F, row.names = F) + + print(paste0('Error pivoting input data for assay:', targetAssayName, ', results saved to: debug.nimble.txt.gz')) + print(conditionMessage(e)) + traceback() + e$message <- paste0('Error pivoting nimble data. target assay: ', targetAssayName) + stop(e) + }) + + if (replaceExistingAssayData && targetAssayName %in% names(seuratObj@assays)) { + print('Replacing existing assay') + seuratObj@assays[[targetAssayName]] <- NULL + } + + appendToExistingAssay <- targetAssayName %in% names(seuratObj@assays) + + # Remove barcodes from nimble that aren't in seurat + seuratBarcodes <- colnames(seuratObj@assays[[Seurat::DefaultAssay(seuratObj)]]) + barcodeDiff <- colnames(df) %in% seuratBarcodes + barcodeDiff[1] <- TRUE # retain feature name + numColsToDrop <- length(barcodeDiff) - sum(barcodeDiff) + if (numColsToDrop > 0) { + print(paste0('Dropping ', numColsToDrop, ' cell barcodes not in the seurat object (out of ', (ncol(df)-1), ')')) + } + df <- df[barcodeDiff] + + # Fill zeroed barcodes that are in seurat but not in nimble + zeroedBarcodes <- setdiff(seuratBarcodes, colnames(df)[-1]) + print(paste0('Total cells lacking nimble data: ', length(zeroedBarcodes), ' of ', length(seuratBarcodes), ' cells')) + for (barcode in zeroedBarcodes) { + df[barcode] <- 0 + } + + # Cast nimble df to matrix + featureNames <- df$V1 + if (any(duplicated(featureNames))) { + stop('Error, there were duplicate feature names') + } + + df <- subset(df, select=-(V1)) + m <- Seurat::as.sparse(df) + dimnames(m) <- list(featureNames, colnames(df)) + if (is.null(colnames(m))) { + stop(paste0('Error: no column names in nimble count matrix, size: ', paste0(dim(m), collapse = ' by '))) + } + + m <- m[,seuratBarcodes, drop=FALSE] # Ensure column order matches + if (appendToExistingAssay && ncol(m) != ncol(seuratObj@assays[[targetAssayName]])) { + stop(paste0('Error parsing nimble data, ncol not equal after subset, was ', ncol(m))) + } + + if (is.null(colnames(m))) { + stop(paste0('Error: no column names in matrix after subset, size: ', paste0(dim(m), collapse = ' by '))) + } + + if (appendToExistingAssay) { + if (any(rownames(m) %in% rownames(seuratObj@assays[[targetAssayName]]))) { + conflicting <- rownames(m)[rownames(m) %in% rownames(seuratObj@assays[[targetAssayName]])] + + if (renameConflictingFeatures) { + print(paste0('The following nimble features have conflicts in the existing assay and will be renamed: ', paste0(conflicting, collapse = ','))) + newNames <- rownames(m) + names(newNames) <- newNames + newNames[conflicting] <- paste0(conflicting, duplicateFeatureSuffix) + newNames <- unname(newNames) + rownames(m) <- newNames + } else { + stop(paste0('The following nimble features conflict with features in the seuratObj: ', paste0(conflicting, collapse = ','))) + } + } + + if (performDietSeurat) { + print('Running DietSeurat') + seuratObj <- Seurat::DietSeurat(seuratObj) + } + + # Append nimble matrix to seurat count matrix + existingBarcodes <- colnames(Seurat::GetAssayData(seuratObj, assay = targetAssayName, slot = 'counts')) + if (sum(colnames(m) != existingBarcodes) > 0) { + stop('cellbarcodes do not match on matrices') + } + + # If feature source exists, retain it. Otherwise assume these are from cellranger: + slotName <- .GetAssayMetaSlotName(seuratObj[[targetAssayName]]) + if ('FeatureSource' %in% names(methods::slot(seuratObj@assays[[targetAssayName]], slotName))) { + fs <- methods::slot(seuratObj@assays[[targetAssayName]], slotName)$FeatureSource + } else { + fs <- rep('CellRanger', nrow(seuratObj@assays[[targetAssayName]])) + } + + fs <- c(fs, rep('Nimble', nrow(m))) + + # perform in two steps to avoid warnings: + ad <- Seurat::CreateAssayObject(counts = Seurat::as.sparse(rbind(Seurat::GetAssayData(seuratObj, assay = targetAssayName, slot = 'counts'), m))) + if (targetAssayName != Seurat::DefaultAssay(seuratObj)) { + seuratObj[[targetAssayName]] <- NULL + } + seuratObj[[targetAssayName]] <- ad + + names(fs) <- rownames(seuratObj@assays[[targetAssayName]]) + seuratObj@assays[[targetAssayName]] <- Seurat::AddMetaData(seuratObj@assays[[targetAssayName]], metadata = fs, col.name = 'FeatureSource') + + if (sum(colnames(Seurat::GetAssayData(seuratObj, assay = targetAssayName, slot = 'counts')) != existingBarcodes) > 0) { + stop('cellbarcodes do not match on matrices after assay replacement') + } + } else { + # Add nimble as separate assay + if (any(duplicated(rownames(m)))) { + stop('Error: The count matrix had duplicate rownames') + } + seuratObj[[targetAssayName]] <- Seurat::CreateAssayObject(counts = m, min.features = 0, min.cells = 0) + + fs <- rep('Nimble', nrow(seuratObj@assays[[targetAssayName]])) + names(fs) <- rownames(seuratObj@assays[[targetAssayName]]) + seuratObj@assays[[targetAssayName]] <- Seurat::AddMetaData(seuratObj@assays[[targetAssayName]], metadata = fs, col.name = 'FeatureSource') + } + + if (normalizeData) { + if (targetAssayName == assayForLibrarySize) { + print('Normalizing using Seurat::NormalizeData') + seuratObj <- Seurat::NormalizeData(seuratObj, assay = targetAssayName, verbose = FALSE) + } else { + print(paste0('Normalizing using LogNormalizeUsingAlternateAssay with ', assayForLibrarySize)) + seuratObj <- LogNormalizeUsingAlternateAssay(seuratObj, assay = targetAssayName, assayForLibrarySize = assayForLibrarySize, maxLibrarySizeRatio = maxLibrarySizeRatio) + } + } + + if (doPlot) { + if (!requireNamespace("RIRA", quietly = TRUE)) { + warning("The RIRA package must be installed to perform plotting") + } else { + print('Plotting features') + reductions <- intersect(names(seuratObj@reductions), c('umap', 'tsne', 'wnn')) + if (length(reductions) == 0){ + print('No reductions, cannot plot') + } else { + feats <- paste0(seuratObj[[targetAssayName]]@key, rownames(seuratObj[[targetAssayName]])) + rowSums <- Matrix::rowSums(Seurat::GetAssayData(seuratObj, assay = targetAssayName, layer = 'counts')) + feats <- feats[rowSums > 0] + if (length(feats) == 0) { + print('All features are zero, skipping plot') + } else { + if (length(feats) > maxFeaturesToPlot){ + print(paste0('Too many features, will plot the first: ', maxFeaturesToPlot)) + feats <- feats[1:maxFeaturesToPlot] + } + + RIRA::PlotMarkerSeries(seuratObj, reductions = reductions, features = feats, title = targetAssayName) + } + } + } + } + + return(seuratObj) +} + +.GetAssayMetaSlotName <- function(assayObj) { + slotName <- ifelse('meta.features' %in% methods::slotNames(assayObj), yes = 'meta.features', no = 'meta.data') + if (! slotName %in% methods::slotNames(assayObj)) { + stop(paste0('Assay object lacks slot: ', slotName)) + } + + return(slotName) +} \ No newline at end of file diff --git a/R/Normalization.R b/R/Normalization.R new file mode 100644 index 0000000..ecc3e32 --- /dev/null +++ b/R/Normalization.R @@ -0,0 +1,58 @@ + +#' @title LogNormalizeUsingAlternateAssay +#' +#' @param seuratObj The seurat object +#' @param assayToNormalize The name of the assay to normalize +#' @param assayForLibrarySize The name of the assay from which to derive library sizes. This will be added to the library size of assayToNormalize. +#' @param scale.factor A scale factor to be applied in normalization +#' @param maxLibrarySizeRatio This normalization relies on the assumption that the library size of the assay being normalized in negligible relative to the assayForLibrarySize. To verify this holds true, the method will error if librarySize(assayToNormalize)/librarySize(assayForLibrarySize) exceeds this value +#' +#' @import ggplot2 +#' @export +LogNormalizeUsingAlternateAssay <- function(seuratObj, assayToNormalize, assayForLibrarySize = 'RNA', scale.factor = 1e4, maxLibrarySizeRatio = 0.01) { + toNormalize <- Seurat::GetAssayData(seuratObj, assayToNormalize, layer = 'counts') + assayForLibrarySizeObj <- Seurat::GetAssayData(seuratObj, assay = assayForLibrarySize, layer = 'counts') + + if (any(colnames(toNormalize) != colnames(assayForLibrarySize))) { + stop(paste0('The assayToNormalize and assayForLibrarySize do not have the same cell names!')) + } + + if (is.null(maxLibrarySizeRatio) || is.na(maxLibrarySizeRatio)) { + maxLibrarySizeRatio <- Inf + } + + margin <- 2 + ncells <- dim(x = toNormalize)[margin] + + start_time <- Sys.time() + assayForLibrarySizeData <- Matrix::colSums(assayForLibrarySizeObj) + ratios <- unlist(sapply(seq_len(length.out = ncells), function(i){ + x <- toNormalize[, i] + sumX <- sum(x) + librarySize <- sumX + assayForLibrarySizeData[i] + + lsr <- (sumX / librarySize) + if (lsr > maxLibrarySizeRatio) { + stop(paste0('The ratio of library sizes was above maxLibrarySizeRatio for cell: ', colnames(assayForLibrarySizeObj)[i], ', on assay: ', assayToNormalize, '. Ratio was: ', lsr, ' (', sumX, ' / ', librarySize, ')')) + } + + xnorm <- log1p(x = x / librarySize * scale.factor) + toNormalize[, i] <<- xnorm + + return(lsr) + })) + end_time <- Sys.time() + + print('Normalization time:') + print(end_time - start_time) + + print(ggplot(data.frame(lsr = ratios), aes(x = lsr)) + + geom_histogram() + + ggtitle(paste0("Library size ratios: ", assayToNormalize)) + + egg::theme_article() + ) + + seuratObj <- Seurat::SetAssayData(seuratObj, assay = assayToNormalize, layer = 'data', new.data = toNormalize) + + return(seuratObj) +} \ No newline at end of file diff --git a/README.md b/README.md index cbe2146..2be3f83 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,28 @@ +![R Build and Checks](https://github.com/BimberLab/nimbleR/workflows/R%20Build%20and%20Checks/badge.svg) + # nimbleR -Contains helper functions to append nimble data to Seurat objects +The nimbleR package is a companion to the [nimble aligner](https://github.com/BimberLab/nimble/). The R package is primarily designed to process and append the per-cell alignment data produced by nimble (in TSV format), and append it to a seurat object either as a new assay, or merged with an existing assay. + +It also contains several specialized normalization functions, developed to work with nimble's supplemental alignment data and also MHC/HLA data. + +### Installation + +```{r} +# Install requirements. Other dependencies will be downloaded automatically +install.packages(c('devtools', 'BiocManager', 'remotes'), dependencies = TRUE, ask = FALSE) + +# Updating your Rprofile (i.e. ~/.Rprofile), with the following line will ensure install.packages() pulls from Bioconductor repos: +local({options(repos = BiocManager::repositories())}) + +# Install latest version: +devtools::install_github(repo = 'bimberlab/nimbleR', dependencies = TRUE) +``` + +### Basic Usage + +Append the output from nimble to a Seurat object as a new assay: +```{r} +library(nimbleR) + +seuratObj <- +``` \ No newline at end of file diff --git a/man/AppendNimbleCounts.Rd b/man/AppendNimbleCounts.Rd new file mode 100644 index 0000000..6347992 --- /dev/null +++ b/man/AppendNimbleCounts.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/NimbleAppend.R +\name{AppendNimbleCounts} +\alias{AppendNimbleCounts} +\title{AppendNimbleCounts} +\usage{ +AppendNimbleCounts( + seuratObj, + nimbleFile, + targetAssayName, + maxAmbiguityAllowed = 0, + renameConflictingFeatures = TRUE, + duplicateFeatureSuffix = ".Nimble", + normalizeData = TRUE, + performDietSeurat = (targetAssayName \%in\% names(seuratObj@assays)), + assayForLibrarySize = "RNA", + maxLibrarySizeRatio = 0.05, + doPlot = TRUE, + maxFeaturesToPlot = 40, + replaceExistingAssayData = TRUE, + featureRenameList = NULL +) +} +\arguments{ +\item{seuratObj}{A Seurat object.} + +\item{nimbleFile}{A nimble file, which is a TSV of feature counts created by nimble} + +\item{targetAssayName}{The target assay. If this assay exists, features will be appended (and an error thrown if there are duplicates). Otherwise a new assay will be created.} + +\item{maxAmbiguityAllowed}{If provided, any features representing more than ths value will be discarded. For example, 'Feat1,Feat2,Feat3' represents 3 features. maxAmbiguityAllowed=1 results in removal of all ambiguous features.} + +\item{renameConflictingFeatures}{If true, when appending to an existing assay, any conflicting feature names will be renamed, appending the value of duplicateFeatureSuffix} + +\item{duplicateFeatureSuffix}{If renameConflictingFeatures is true, this string will be appended to duplicated feature names} + +\item{normalizeData}{If true, data will be normalized after appending/creating the assay. This will default to CellMembrane::LogNormalizeUsingAlternateAssay; however, if assayForLibrarySize equals targetAssayName then Seurat::NormalizeData is used.} + +\item{performDietSeurat}{If true, DietSeurat will be run, which removes existing reductions. This may or may not be required based on your usage, but the default is to perform this if the targetAssay exists.} + +\item{assayForLibrarySize}{If normalizeData is true, then this is the assay used for librarySize when normalizing. If assayForLibrarySize equals targetAssayName, Seurat::NormalizeData is used.} + +\item{maxLibrarySizeRatio}{If normalizeData is true, then this is passed to CellMembrane::LogNormalizeUsingAlternateAssay} + +\item{doPlot}{If true, FeaturePlots will be generated for the appended features} + +\item{maxFeaturesToPlot}{If doPlot is true, this is the maximum number of features to plot} + +\item{replaceExistingAssayData}{If true, any existing data in the targetAssay will be deleted} + +\item{featureRenameList}{An optional named list in the format = . If any are present, the will be renamed to . The intention of this is to recover specific ambiguous classes.} +} +\value{ +A modified Seurat object. +} +\description{ +Reads a given seurat object and a nimble file, and appends the nimble data to the object. +} diff --git a/man/LogNormalizeUsingAlternateAssay.Rd b/man/LogNormalizeUsingAlternateAssay.Rd new file mode 100644 index 0000000..c03cde0 --- /dev/null +++ b/man/LogNormalizeUsingAlternateAssay.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Normalization.R +\name{LogNormalizeUsingAlternateAssay} +\alias{LogNormalizeUsingAlternateAssay} +\title{LogNormalizeUsingAlternateAssay} +\usage{ +LogNormalizeUsingAlternateAssay( + seuratObj, + assayToNormalize, + assayForLibrarySize = "RNA", + scale.factor = 10000, + maxLibrarySizeRatio = 0.01 +) +} +\arguments{ +\item{seuratObj}{The seurat object} + +\item{assayToNormalize}{The name of the assay to normalize} + +\item{assayForLibrarySize}{The name of the assay from which to derive library sizes. This will be added to the library size of assayToNormalize.} + +\item{scale.factor}{A scale factor to be applied in normalization} + +\item{maxLibrarySizeRatio}{This normalization relies on the assumption that the library size of the assay being normalized in negligible relative to the assayForLibrarySize. To verify this holds true, the method will error if librarySize(assayToNormalize)/librarySize(assayForLibrarySize) exceeds this value} +} +\description{ +LogNormalizeUsingAlternateAssay +} diff --git a/tests/testdata/12345_nimbleCounts.tsv b/tests/testdata/12345_nimbleCounts.tsv new file mode 100644 index 0000000..448500c --- /dev/null +++ b/tests/testdata/12345_nimbleCounts.tsv @@ -0,0 +1,17 @@ +E 3 12345_CCAGCGAAGTCCGTAT +F 3 12345_CCAGCGAAGTCCGTAT +G 3 12345_CCAGCGAAGTCCGTAT +H 3 12345_CCAGCGAAGTCCGTAT +Z,D 3 12345_CCAGCGAAGTCCGTAC +E 1 12345_AAAAAAAAAAAAAAAA +F 1 12345_AAAAAAAAAAAAAAAA +G 1 12345_AAAAAAAAAAAAAAAA +H 1 12345_AAAAAAAAAAAAAAAA +E 2 12345_CAAAAAAAAAAAAAA +F 2 12345_CAAAAAAAAAAAAAA +G 2 12345_CAAAAAAAAAAAAAA +H 2 12345_CAAAAAAAAAAAAAA +E 4 12345_GAAAAAAAAAAAAAAA +F 4 12345_GAAAAAAAAAAAAAAA +G 4 12345_GAAAAAAAAAAAAAAA +H 4 12345_GAAAAAAAAAAAAAAA \ No newline at end of file diff --git a/tests/testdata/nimbleTest.rds b/tests/testdata/nimbleTest.rds new file mode 100644 index 0000000000000000000000000000000000000000..aac96985ed73fa09da7a783e542cac0de451736e GIT binary patch literal 714 zcmV;*0yX^~iwFP!000002Gv&WZqqOnbrLsiqIOg&1Mw1(R@iUoT7i(pnD~(RS9yus zg*8d#xMh25o`7Pff}5t!B7oG8ur;q|K+RG@jz?RdMvC% zqfst>XI?6;Uv#TR%`N*m;%9pOQ*iT8PwimM2?}XO6veu^5zd&>81>!Kq!&$x9}p$+ zt-jeDg*Rx5CHHG7DlqsZdSwxe5JelAIxFx`)d|&pTSVDV7)^=p3jD9iYx1K%BEhv3 zmGa$N?GTl}0*9!`&qFlBL7^pJA8j0M3QklhII8IzIXC!D^9)If#=5_v00$X~r{yzR z7hliMEe+dVA`(n6q6<>gGT(5zPJ$^afgEEts%qFZ7(J5x2+Dvx#j2uF`DC9H8B7S< z&cG2&;ym0D&&GfgR1?egXS^VYe!TjQLt2e!Psci+E(V4(cm-i1-UV4Mdv<`xU#r@y ze~dG{#Is!U(kPOth7DH~NqJB%F8D@m%m$kB>nGboPno`frl+( zYc04+mOS5rIKnAaGwG1ubH-dGQd{X;kwvAs&KV+M7U4^RC#-}>1&x?^a!lVxD$MgB ziD~vnDbZ#MiI+G)LiGj(8{y(pwhFiRIAZZ!=VSkjsixnQp!s#(eZxy}JiWfEzmWI@ zM+?SXmewKMIAK?iI%|7QGG26wO1b5Wj#o7sG?$p<%)P8SJWxD%PAR#dMv5($K9Sd> wvD!jTr;7xWI9yNPC#@A`NrKX-_L@esY^E9SRb*Eb-9t+9Z}KCknUV|u05L631ONa4 literal 0 HcmV?d00001 diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..b60d808 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(nimbleR) + +test_check("nimbleR", reporter = "progress") \ No newline at end of file diff --git a/tests/testthat/test-nimbleappend.R b/tests/testthat/test-nimbleappend.R new file mode 100644 index 0000000..3b57bf5 --- /dev/null +++ b/tests/testthat/test-nimbleappend.R @@ -0,0 +1,112 @@ +context("Nimble") + +test_that("Nimble Append detects missing input file", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + expect_error(AppendNimbleCounts(seuratObj, "../testdata/nonexistent.tsv", targetAssayName = 'RNA'), "Nimble file does not exist: ../testdata/nonexistent.tsv", fixed=TRUE) +}) + +test_that("Nimble Append deletes blank feature names when appending", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'Nimble', maxLibrarySizeRatio = 1) + expect_false('' %in% rownames(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts'))) + expect_true('FeatureSource' %in% names(seuratObj@assays$Nimble@meta.features)) + expect_equal(unique(seuratObj@assays$Nimble@meta.features$FeatureSource), c('Nimble')) +}) + +test_that("Nimble Append works with empty input", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + fn <- tempfile(fileext = '.txt') + file.create(fn) + seuratObj <- AppendNimbleCounts(seuratObj, fn, targetAssayName = 'Nimble') + unlink(fn) + expect_false('Nimble' %in% names(seuratObj@assays)) +}) + +test_that("Nimble Append deletes all barcodes not in Seurat when appending", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + nimbleExclusiveBarcodes <- c("12345_CCAGCGAAGTCCGTAT", "12345_CCAGCGAAGTCCGTAC") + seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'RNA', maxLibrarySizeRatio = 1, replaceExistingAssayData = FALSE) + expect_equal(nimbleExclusiveBarcodes %in% colnames(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), c(FALSE, FALSE)) + + expect_true('FeatureSource' %in% names(seuratObj@assays$RNA@meta.features)) + expect_equal(seuratObj@assays$RNA@meta.features$FeatureSource, c("CellRanger", "CellRanger", "CellRanger", "CellRanger", "Nimble", "Nimble", "Nimble", "Nimble")) +}) + +test_that("Nimble Append supports rename", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + nimbleExclusiveBarcodes <- c("12345_CCAGCGAAGTCCGTAT", "12345_CCAGCGAAGTCCGTAC") + seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'RNA', + maxLibrarySizeRatio = 1, + featureRenameList = list( + 'E' = 'AA' + ), + replaceExistingAssayData = FALSE, + performDietSeurat = TRUE + ) + + expect_equal(nimbleExclusiveBarcodes %in% colnames(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), c(FALSE, FALSE)) + + expect_true('FeatureSource' %in% names(seuratObj@assays$RNA@meta.features)) + expect_equal(seuratObj@assays$RNA@meta.features$FeatureSource, c("CellRanger", "CellRanger", "CellRanger", "CellRanger", "Nimble", "Nimble", "Nimble", "Nimble")) + + expect_equal(rownames(GetAssayData(seuratObj, assay = 'RNA')), c('A', 'B', 'C', 'D', 'AA', 'F', 'G', 'H')) +}) + +test_that("Nimble Append fills all barcodes in Seurat but not in Nimble when appending", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + seuratExclusiveBarcode <- "12345_TAAAAAAAAAAAAAAA" + seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'RNA', maxLibrarySizeRatio = 1, replaceExistingAssayData = FALSE) + nimbleFeatureCounts <- GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, seuratExclusiveBarcode][c('E', 'F', 'G', 'H')] + expect_false(FALSE %in% (nimbleFeatureCounts == c(0, 0, 0, 0))) +}) + +test_that("Nimble Append output Seurat object is valid when appending", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + expectedBarcodes <- c("12345_AAAAAAAAAAAAAAAA", "12345_CAAAAAAAAAAAAAA", "12345_GAAAAAAAAAAAAAAA", "12345_TAAAAAAAAAAAAAAA") + expectedFeatures <- c("A", "B", "C", "D", "E", "F", "G", "H") + expectedValues <- list(c(1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 2, 2, 2, 2), c(1, 1, 1, 1, 4, 4, 4, 4), c(1, 1, 1, 1, 0, 0, 0, 0)) + seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'RNA', maxLibrarySizeRatio = 1, replaceExistingAssayData = FALSE) + expect_equal(colnames(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), expectedBarcodes) + expect_equal(rownames(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), expectedFeatures) + + expect_equal(unname(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[1]]), expectedValues[[1]]) + expect_equal(unname(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[2]]), expectedValues[[2]]) + expect_equal(unname(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[3]]), expectedValues[[3]]) + expect_equal(unname(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[4]]), expectedValues[[4]]) +}) + +test_that("Nimble Append deletes blank feature names when creating new assay", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'Nimble', maxLibrarySizeRatio = 1) + expect_false('' %in% rownames(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts'))) +}) + +test_that("Nimble Append deletes all barcodes not in Seurat when creating new assay", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + nimbleExclusiveBarcodes <- c("12345_CCAGCGAAGTCCGTAT", "12345_CCAGCGAAGTCCGTAC") + seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'Nimble', maxLibrarySizeRatio = 1) + expect_equal(nimbleExclusiveBarcodes %in% colnames(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')), c(FALSE, FALSE)) +}) + +test_that("Nimble Append fills all barcodes in Seurat but not in Nimble when creating new assay", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + seuratExclusiveBarcode <- "12345_TAAAAAAAAAAAAAAA" + seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'Nimble', maxLibrarySizeRatio = 1) + nimbleFeatureCounts <- GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, seuratExclusiveBarcode] + expect_equal(FALSE %in% (nimbleFeatureCounts == c(0, 0, 0, 0)), FALSE) +}) + +test_that("Nimble Append output Seurat object is valid when creating new assay", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + expectedBarcodes <- c("12345_AAAAAAAAAAAAAAAA", "12345_CAAAAAAAAAAAAAA", "12345_GAAAAAAAAAAAAAAA", "12345_TAAAAAAAAAAAAAAA") + expectedFeatures <- c("E", "F", "G", "H") + expectedValues <- list(c(1, 1, 1, 1), c(2, 2, 2, 2), c(4, 4, 4, 4), c(0, 0, 0, 0)) + seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'Nimble', maxLibrarySizeRatio = 1) + expect_equal(colnames(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')), expectedBarcodes) + expect_equal(rownames(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')), expectedFeatures) + + expect_equal(unname(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[1]]), expectedValues[[1]]) + expect_equal(unname(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[2]]), expectedValues[[2]]) + expect_equal(unname(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[3]]), expectedValues[[3]]) + expect_equal(unname(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[4]]), expectedValues[[4]]) +}) diff --git a/tests/testthat/test-normalization.R b/tests/testthat/test-normalization.R new file mode 100644 index 0000000..080e773 --- /dev/null +++ b/tests/testthat/test-normalization.R @@ -0,0 +1,18 @@ +context("Normalization") + +test_that("LogNormalizeUsingAlternateAssay works as expected", { + seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + + assayToAdd <- Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts') + assayToAdd <- floor(assayToAdd[1:10,] / 5) + + rownames(assayToAdd) <- paste0('Feature', LETTERS[1:10]) + + seuratObj[['Norm']] <- Seurat::CreateAssayObject(assayToAdd) + + seuratObj <- LogNormalizeUsingAlternateAssay(seuratObj, assayToNormalize = 'Norm', assayForLibrarySize = 'RNA') + + nd <- Seurat::GetAssayData(seuratObj, assay = 'Norm', layer = 'data') + expect_equal(max(nd[,4]), 3.442982, tolerance = 0.000001) + expect_equal(max(nd[,101]), 2.823479, tolerance = 0.000001) +}) \ No newline at end of file From a0f040a0fe40b4078602d5cfdf3641ee83b88928 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 25 Mar 2025 01:36:57 -0700 Subject: [PATCH 2/5] Bugfix --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 88bd2b2..0594f74 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: nimbleR Type: Package -Title: +Title: Tools to use nimble data with seurat objects Version: 1.0.0 Author: Bimber Lab Maintainer: The package maintainer -Description: +Description: A R package to format and append nimble RNA-seq count data to seurat objects License: MIT Encoding: UTF-8 Depends: From 295e28d11647fa817514f23462074268b8387b36 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 25 Mar 2025 02:09:36 -0700 Subject: [PATCH 3/5] Bugfix --- R/NimbleAppend.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/NimbleAppend.R b/R/NimbleAppend.R index 637d30f..9b90c12 100644 --- a/R/NimbleAppend.R +++ b/R/NimbleAppend.R @@ -24,7 +24,7 @@ #' #' @import dplyr #' @export -AppendNimbleCounts <- function(seuratObj, nimbleFile, targetAssayName, maxAmbiguityAllowed = 0, renameConflictingFeatures = TRUE, duplicateFeatureSuffix = ".Nimble", normalizeData = TRUE, performDietSeurat = (targetAssayName %in% names(seuratObj@assays)), assayForLibrarySize = 'RNA', maxLibrarySizeRatio = 0.05, doPlot = FALSE, maxFeaturesToPlot = 40, replaceExistingAssayData = TRUE, featureRenameList = NULL, doPlot = FALSE) { +AppendNimbleCounts <- function(seuratObj, nimbleFile, targetAssayName, maxAmbiguityAllowed = 0, renameConflictingFeatures = TRUE, duplicateFeatureSuffix = ".Nimble", normalizeData = TRUE, performDietSeurat = (targetAssayName %in% names(seuratObj@assays)), assayForLibrarySize = 'RNA', maxLibrarySizeRatio = 0.05, doPlot = FALSE, maxFeaturesToPlot = 40, replaceExistingAssayData = TRUE, featureRenameList = NULL) { if (!file.exists(nimbleFile)) { stop(paste0("Nimble file does not exist: ", nimbleFile)) } From 4319e5fa4fc0fe4cd27aec02e4f17fad1750d2e7 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 25 Mar 2025 12:03:48 -0700 Subject: [PATCH 4/5] Bugfix --- DESCRIPTION | 2 ++ tests/testthat/test-nimbleappend.R | 40 ++++++++++++++--------------- tests/testthat/test-normalization.R | 22 +++++++++++----- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0594f74..bbd6f5d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,9 +16,11 @@ Imports: ggplot2 Suggests: RIRA, + SeuratData, devtools, testthat (>= 2.1.0) Remotes: + SeuratData=satijalab/seurat-data, bimberlab/RIRA RoxygenNote: 7.3.2 URL: https://github.com/BimberLab/nimbleR diff --git a/tests/testthat/test-nimbleappend.R b/tests/testthat/test-nimbleappend.R index 3b57bf5..9e52919 100644 --- a/tests/testthat/test-nimbleappend.R +++ b/tests/testthat/test-nimbleappend.R @@ -8,7 +8,7 @@ test_that("Nimble Append detects missing input file", { test_that("Nimble Append deletes blank feature names when appending", { seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'Nimble', maxLibrarySizeRatio = 1) - expect_false('' %in% rownames(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts'))) + expect_false('' %in% rownames(Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts'))) expect_true('FeatureSource' %in% names(seuratObj@assays$Nimble@meta.features)) expect_equal(unique(seuratObj@assays$Nimble@meta.features$FeatureSource), c('Nimble')) }) @@ -26,7 +26,7 @@ test_that("Nimble Append deletes all barcodes not in Seurat when appending", { seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) nimbleExclusiveBarcodes <- c("12345_CCAGCGAAGTCCGTAT", "12345_CCAGCGAAGTCCGTAC") seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'RNA', maxLibrarySizeRatio = 1, replaceExistingAssayData = FALSE) - expect_equal(nimbleExclusiveBarcodes %in% colnames(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), c(FALSE, FALSE)) + expect_equal(nimbleExclusiveBarcodes %in% colnames(Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), c(FALSE, FALSE)) expect_true('FeatureSource' %in% names(seuratObj@assays$RNA@meta.features)) expect_equal(seuratObj@assays$RNA@meta.features$FeatureSource, c("CellRanger", "CellRanger", "CellRanger", "CellRanger", "Nimble", "Nimble", "Nimble", "Nimble")) @@ -44,19 +44,19 @@ test_that("Nimble Append supports rename", { performDietSeurat = TRUE ) - expect_equal(nimbleExclusiveBarcodes %in% colnames(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), c(FALSE, FALSE)) + expect_equal(nimbleExclusiveBarcodes %in% colnames(Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), c(FALSE, FALSE)) expect_true('FeatureSource' %in% names(seuratObj@assays$RNA@meta.features)) expect_equal(seuratObj@assays$RNA@meta.features$FeatureSource, c("CellRanger", "CellRanger", "CellRanger", "CellRanger", "Nimble", "Nimble", "Nimble", "Nimble")) - expect_equal(rownames(GetAssayData(seuratObj, assay = 'RNA')), c('A', 'B', 'C', 'D', 'AA', 'F', 'G', 'H')) + expect_equal(rownames(Seurat::GetAssayData(seuratObj, assay = 'RNA')), c('A', 'B', 'C', 'D', 'AA', 'F', 'G', 'H')) }) test_that("Nimble Append fills all barcodes in Seurat but not in Nimble when appending", { seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) seuratExclusiveBarcode <- "12345_TAAAAAAAAAAAAAAA" seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'RNA', maxLibrarySizeRatio = 1, replaceExistingAssayData = FALSE) - nimbleFeatureCounts <- GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, seuratExclusiveBarcode][c('E', 'F', 'G', 'H')] + nimbleFeatureCounts <- Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, seuratExclusiveBarcode][c('E', 'F', 'G', 'H')] expect_false(FALSE %in% (nimbleFeatureCounts == c(0, 0, 0, 0))) }) @@ -66,33 +66,33 @@ test_that("Nimble Append output Seurat object is valid when appending", { expectedFeatures <- c("A", "B", "C", "D", "E", "F", "G", "H") expectedValues <- list(c(1, 1, 1, 1, 1, 1, 1, 1), c(1, 1, 1, 1, 2, 2, 2, 2), c(1, 1, 1, 1, 4, 4, 4, 4), c(1, 1, 1, 1, 0, 0, 0, 0)) seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'RNA', maxLibrarySizeRatio = 1, replaceExistingAssayData = FALSE) - expect_equal(colnames(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), expectedBarcodes) - expect_equal(rownames(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), expectedFeatures) + expect_equal(colnames(Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), expectedBarcodes) + expect_equal(rownames(Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')), expectedFeatures) - expect_equal(unname(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[1]]), expectedValues[[1]]) - expect_equal(unname(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[2]]), expectedValues[[2]]) - expect_equal(unname(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[3]]), expectedValues[[3]]) - expect_equal(unname(GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[4]]), expectedValues[[4]]) + expect_equal(unname(Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[1]]), expectedValues[[1]]) + expect_equal(unname(Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[2]]), expectedValues[[2]]) + expect_equal(unname(Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[3]]), expectedValues[[3]]) + expect_equal(unname(Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts')[, expectedBarcodes[4]]), expectedValues[[4]]) }) test_that("Nimble Append deletes blank feature names when creating new assay", { seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'Nimble', maxLibrarySizeRatio = 1) - expect_false('' %in% rownames(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts'))) + expect_false('' %in% rownames(Seurat::GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts'))) }) test_that("Nimble Append deletes all barcodes not in Seurat when creating new assay", { seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) nimbleExclusiveBarcodes <- c("12345_CCAGCGAAGTCCGTAT", "12345_CCAGCGAAGTCCGTAC") seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'Nimble', maxLibrarySizeRatio = 1) - expect_equal(nimbleExclusiveBarcodes %in% colnames(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')), c(FALSE, FALSE)) + expect_equal(nimbleExclusiveBarcodes %in% colnames(Seurat::GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')), c(FALSE, FALSE)) }) test_that("Nimble Append fills all barcodes in Seurat but not in Nimble when creating new assay", { seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) seuratExclusiveBarcode <- "12345_TAAAAAAAAAAAAAAA" seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'Nimble', maxLibrarySizeRatio = 1) - nimbleFeatureCounts <- GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, seuratExclusiveBarcode] + nimbleFeatureCounts <- Seurat::GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, seuratExclusiveBarcode] expect_equal(FALSE %in% (nimbleFeatureCounts == c(0, 0, 0, 0)), FALSE) }) @@ -102,11 +102,11 @@ test_that("Nimble Append output Seurat object is valid when creating new assay", expectedFeatures <- c("E", "F", "G", "H") expectedValues <- list(c(1, 1, 1, 1), c(2, 2, 2, 2), c(4, 4, 4, 4), c(0, 0, 0, 0)) seuratObj <- AppendNimbleCounts(seuratObj, "../testdata/12345_nimbleCounts.tsv", targetAssayName = 'Nimble', maxLibrarySizeRatio = 1) - expect_equal(colnames(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')), expectedBarcodes) - expect_equal(rownames(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')), expectedFeatures) + expect_equal(colnames(Seurat::GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')), expectedBarcodes) + expect_equal(rownames(Seurat::GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')), expectedFeatures) - expect_equal(unname(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[1]]), expectedValues[[1]]) - expect_equal(unname(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[2]]), expectedValues[[2]]) - expect_equal(unname(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[3]]), expectedValues[[3]]) - expect_equal(unname(GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[4]]), expectedValues[[4]]) + expect_equal(unname(Seurat::GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[1]]), expectedValues[[1]]) + expect_equal(unname(Seurat::GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[2]]), expectedValues[[2]]) + expect_equal(unname(Seurat::GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[3]]), expectedValues[[3]]) + expect_equal(unname(Seurat::GetAssayData(seuratObj, assay = 'Nimble', layer = 'counts')[, expectedBarcodes[4]]), expectedValues[[4]]) }) diff --git a/tests/testthat/test-normalization.R b/tests/testthat/test-normalization.R index 080e773..3b6a40b 100644 --- a/tests/testthat/test-normalization.R +++ b/tests/testthat/test-normalization.R @@ -1,18 +1,28 @@ +library(SeuratData) + context("Normalization") +getBaseSeuratData <- function(){ + suppressWarnings(SeuratData::InstallData("pbmc3k")) + suppressWarnings(data("pbmc3k")) + seuratObj <- suppressWarnings(Seurat::UpdateSeuratObject(pbmc3k)) + + return(seuratObj) +} + test_that("LogNormalizeUsingAlternateAssay works as expected", { - seuratObj <- Seurat::UpdateSeuratObject(readRDS("../testdata/nimbleTest.rds")) + seuratObj <- getBaseSeuratData() assayToAdd <- Seurat::GetAssayData(seuratObj, assay = 'RNA', layer = 'counts') - assayToAdd <- floor(assayToAdd[1:10,] / 5) + assayToAdd <- floor(assayToAdd[c('H6PD', 'H6PD', 'JAK1', 'XAF1', 'TMEM52'),]) - rownames(assayToAdd) <- paste0('Feature', LETTERS[1:10]) + rownames(assayToAdd) <- paste0('Feature', LETTERS[1:nrow(assayToAdd)]) seuratObj[['Norm']] <- Seurat::CreateAssayObject(assayToAdd) - seuratObj <- LogNormalizeUsingAlternateAssay(seuratObj, assayToNormalize = 'Norm', assayForLibrarySize = 'RNA') + seuratObj <- LogNormalizeUsingAlternateAssay(seuratObj, assayToNormalize = 'Norm', assayForLibrarySize = 'RNA', maxLibrarySizeRatio = 0.2) nd <- Seurat::GetAssayData(seuratObj, assay = 'Norm', layer = 'data') - expect_equal(max(nd[,4]), 3.442982, tolerance = 0.000001) - expect_equal(max(nd[,101]), 2.823479, tolerance = 0.000001) + expect_equal(max(nd[,2]), 1.111578, tolerance = 0.000001) + expect_equal(max(nd[,200]), 1.726143, tolerance = 0.000001) }) \ No newline at end of file From a8cfd43a9a7d5810417c3cf19b8d1bdfc05cc7a5 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 25 Mar 2025 12:33:52 -0700 Subject: [PATCH 5/5] Bugfix --- DESCRIPTION | 6 +++++- LICENSE | 21 --------------------- R/NimbleAppend.R | 14 ++++++++++---- R/Normalization.R | 6 ++++++ man/AppendNimbleCounts.Rd | 2 +- 5 files changed, 22 insertions(+), 27 deletions(-) delete mode 100644 LICENSE diff --git a/DESCRIPTION b/DESCRIPTION index bbd6f5d..70a9809 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,13 +4,17 @@ Title: Tools to use nimble data with seurat objects Version: 1.0.0 Author: Bimber Lab Maintainer: The package maintainer -Description: A R package to format and append nimble RNA-seq count data to seurat objects +Description: An R package to format and append nimble RNA-seq count data to seurat objects. License: MIT Encoding: UTF-8 Depends: R (>= 4.3.0) Imports: Seurat, + Matrix, + egg, + scales, + methods, tidyr, dplyr, ggplot2 diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 9403aa4..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2025 Bimber Lab - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/R/NimbleAppend.R b/R/NimbleAppend.R index 9b90c12..637ac18 100644 --- a/R/NimbleAppend.R +++ b/R/NimbleAppend.R @@ -1,3 +1,9 @@ +utils::globalVariables( + names = c('V1','V2','V3'), + package = 'nimbleR', + add = TRUE +) + # @include Normalization.R #' @import Seurat @@ -32,7 +38,7 @@ AppendNimbleCounts <- function(seuratObj, nimbleFile, targetAssayName, maxAmbigu # Read file and construct df df <- NULL tryCatch({ - df <- read.table(nimbleFile, sep="\t", header=FALSE) + df <- utils::read.table(nimbleFile, sep="\t", header=FALSE) }, error = function(e){ if (conditionMessage(e) != 'no lines available in input') { stop(e) @@ -61,7 +67,7 @@ AppendNimbleCounts <- function(seuratObj, nimbleFile, targetAssayName, maxAmbigu d <- as.integer(df$V2) if (any(is.na(d))){ - stop(paste0('Non-integer count values found, were: ', paste0(head(df$V2[is.na(d)]), collapse = ','))) + stop(paste0('Non-integer count values found, were: ', paste0(utils::head(df$V2[is.na(d)]), collapse = ','))) } if (is.na(maxAmbiguityAllowed) || is.null(maxAmbiguityAllowed)){ @@ -125,7 +131,7 @@ AppendNimbleCounts <- function(seuratObj, nimbleFile, targetAssayName, maxAmbigu x$Total <- paste0(x$Total, ' (', scales::percent(x$Total / totalUMI, accuracy = 0.001), ')') print('Top ambiguous combinations:') - print(head(x[c('Name', 'Total')], n = 100)) + print(utils::head(x[c('Name', 'Total')], n = 100)) } df <- df[!ambigFeatRows, , drop = F] @@ -155,7 +161,7 @@ AppendNimbleCounts <- function(seuratObj, nimbleFile, targetAssayName, maxAmbigu df <- tidyr::pivot_wider(df, names_from=V3, values_from=V2, values_fill=0) }, error = function(e){ - write.table(df, file = 'debug.nimble.txt.gz', sep = '\t', quote = F, row.names = F) + utils::write.table(df, file = 'debug.nimble.txt.gz', sep = '\t', quote = F, row.names = F) print(paste0('Error pivoting input data for assay:', targetAssayName, ', results saved to: debug.nimble.txt.gz')) print(conditionMessage(e)) diff --git a/R/Normalization.R b/R/Normalization.R index ecc3e32..607e7c1 100644 --- a/R/Normalization.R +++ b/R/Normalization.R @@ -1,3 +1,9 @@ +utils::globalVariables( + names = c('lsr'), + package = 'nimbleR', + add = TRUE +) + #' @title LogNormalizeUsingAlternateAssay #' diff --git a/man/AppendNimbleCounts.Rd b/man/AppendNimbleCounts.Rd index 6347992..0821f43 100644 --- a/man/AppendNimbleCounts.Rd +++ b/man/AppendNimbleCounts.Rd @@ -15,7 +15,7 @@ AppendNimbleCounts( performDietSeurat = (targetAssayName \%in\% names(seuratObj@assays)), assayForLibrarySize = "RNA", maxLibrarySizeRatio = 0.05, - doPlot = TRUE, + doPlot = FALSE, maxFeaturesToPlot = 40, replaceExistingAssayData = TRUE, featureRenameList = NULL