Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions src/methods_expression_correction/split_correction/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
__merge__: /src/api/comp_method_expression_correction.yaml

name: split_correction
label: "SPLIT"
summary: "Correct doublet/misegmented cells using SPLIT"
description: "SPLIT (Spatial Purification of Layered Intracellular Transcripts) is a novel method that integrates snRNA-seq with RCTD deconvolution to enhance signal purity. SPLIT effectively resolves mixed transcriptomic signals, improving background correction and cell-type resolution."
links:
documentation: "https://github.com/bdsc-tds/SPLIT"
repository: "https://github.com/bdsc-tds/SPLIT"
references:
doi: "10.1101/2025.04.23.649965"

arguments:
- name: --keep_all_cells
required: false
direction: input
type: boolean
default: false
description: Whether to keep cells with 0 counts (may cause errors if set to TRUE)

resources:
- type: r_script
path: script.R

engines:
- type: docker
image: openproblems/base_r:1
setup:
#- type: docker
# run: |
# apt-get update && apt-get install -y wget
- type: r
bioc: [anndataR, rhdf5, devtools, scater]
#- type: r
# bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment]
# bioc_force_install: true
- type: docker
run: |
Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')"

# SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues:
# https://github.com/drighelli/SpatialExperiment/issues/171
# https://github.com/satijalab/seurat/issues/9889
# The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment.

# Is there a better way to install an r package from github?
# The 6 million timeout thing stops it from breaking

- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [ hightime, highcpu, highmem ]
102 changes: 102 additions & 0 deletions src/methods_expression_correction/split_correction/script.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
library(spacexr)
library(Matrix)
library(SingleCellExperiment)
library(anndataR)
library(SPLIT)
library(Seurat)
library(scuttle)

## VIASH START
par <- list(
"input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad",
"input_scrnaseq_reference"= "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad",
"output" = "task_ist_preprocessing/tmp/split_corrected.h5ad",
"keep_all_cells" = FALSE,
)

meta <- list(
'cpus': 4,
)

## VIASH END

# Read the input h5ad file and convert to SingleCellExperiment and Seurat
sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment")
xe <- read_h5ad(par$input_spatial_with_cell_types, as = "Seurat")

# filter out 0 cells
if (!par$keep_all_cells) {
cat("Filtering cells with 0 counts\n")
sce <- sce[, colSums(counts(sce)) > 0]
xe <- subset(xe, subset = nCount_RNA > 0)
}

# Extract spatial coordinates and counts matrix
centroid_x <- colData(sce)$centroid_x
centroid_y <- colData(sce)$centroid_y
coords <- data.frame(centroid_x, centroid_y)
counts <- assay(sce, "counts")
rownames(coords) <- colData(sce)$cell_id
puck <- SpatialRNA(coords, counts)

# Read reference scrnaseq
ref <- read_h5ad(par$input_scrnaseq_reference, as = "SingleCellExperiment")

#filter reference cell types to those with >25 cells (minimum for RCTD)
valid_celltypes <- names(table(colData(ref)$cell_type))[table(colData(ref)$cell_type) >= 25]
filtered_ref <- ref[,colData(ref)$cell_type %in% valid_celltypes]

ref_counts <- assay(filtered_ref, "counts")
# factor to drop filtered cell types
colData(filtered_ref)$cell_type <- factor(colData(filtered_ref)$cell_type)
cell_types <- colData(filtered_ref)$cell_type
names(cell_types) <- colnames(ref_counts)
reference <- Reference(ref_counts, cell_types, min_UMI = 0)

# check cores
cores <- 1
if ("cpus" %in% names(meta) && !is.null(meta$cpus)) cores <- meta$cpus
cat(sprintf("Number of cores: %s\n", cores))

# Run the algorithm
cat("Running RCTD\n")
myRCTD <- create.RCTD(puck, reference, max_cores = cores)
myRCTD <- run.RCTD(myRCTD, doublet_mode = "doublet")

# Get the "spot_class" annotation from RCTD
# cat("Saving RCTD spot_class\n")
# results <- myRCTD@results
# rctd_spot_class <- results$results_df$spot_class
# names(rctd_spot_class) <- rownames(results$results_df)
# colData(sce)$RCTD_class <- "not_included"
# colData(sce)[names(rctd_spot_class),"RCTD_class"] <- as.character(rctd_spot_class)

# Post-process RCTD output
RCTD <- SPLIT::run_post_process_RCTD(myRCTD)

# Run SPLIT purification
cat("Running SPLIT\n")
res_split <- SPLIT::purify(
counts = GetAssayData(xe, assay = 'RNA', layer = 'counts'), # or any gene x cells counts matrix
rctd = RCTD,
DO_purify_singlets = TRUE # optional
)


# create corrected counts layer in original SingleCell object
cat("Normalizing counts\n")

# First copy in counts
assay(sce, "corrected_counts") <- assay(sce, "counts")

# Then, replace only the updated cells
assay(sce, "corrected_counts")[rownames(res_split$purified_counts), colnames(res_split$purified_counts)] <- res_split$purified_counts

# Library size normalization - see note in resolVI
size_factors <- librarySizeFactors(assay(sce, "corrected_counts"))
assay(sce, "normalized") <- assay(logNormCounts(sce, size_factors=size_factors, assay.type = "corrected_counts"),"logcounts")

# Write the final object to h5ad format
cat("Writing to h5ad\n")
dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE)
write_h5ad(sce, par$output, mode = "w")