src/methods_expression_correction/split_correction/config.vsh.yaml

-Original file line number
+Diff line change
@@ -0,0 +1,55 @@
+    __merge__: /src/api/comp_method_expression_correction.yaml
+    name: split_correction
+    label: "SPLIT"
+    summary: "Correct doublet/misegmented cells using SPLIT"
+    description: "SPLIT (Spatial Purification of Layered Intracellular Transcripts) is a novel method that integrates snRNA-seq with RCTD deconvolution to enhance signal purity. SPLIT effectively resolves mixed transcriptomic signals, improving background correction and cell-type resolution."
+    links:
+      documentation: "https://github.com/bdsc-tds/SPLIT"
+      repository: "https://github.com/bdsc-tds/SPLIT"
+    references:
+      doi: "10.1101/2025.04.23.649965"
+    arguments:
+      - name: --keep_all_cells
+        required: false
+        direction: input
+        type: boolean
+        default: false
+        description: Whether to keep cells with 0 counts (may cause errors if set to TRUE)
+    resources:
+      - type: r_script
+        path: script.R
+    engines:
+      - type: docker
+        image: openproblems/base_r:1
+        setup:
+          #- type: docker
+          #  run: |
+          #    apt-get update && apt-get install -y wget
+          - type: r
+            bioc: [anndataR, rhdf5, devtools, scater]
+          #- type: r
+          #  bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment]
+          #  bioc_force_install: true
+          - type: docker
+            run: |
+              Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')"
+          # SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues:
+          # https://github.com/drighelli/SpatialExperiment/issues/171
+          # https://github.com/satijalab/seurat/issues/9889
+          # The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment.
+          # Is there a better way to install an r package from github?
+          # The 6 million timeout thing stops it from breaking
+      - type: native
+    runners:
+      - type: executable
+      - type: nextflow
+        directives:
+          label: [ hightime, highcpu, highmem ]

src/methods_expression_correction/split_correction/script.R

-Original file line number
+Diff line change
@@ -0,0 +1,102 @@
+    library(spacexr)
+    library(Matrix)
+    library(SingleCellExperiment)
+    library(anndataR)
+    library(SPLIT)
+    library(Seurat)
+    library(scuttle)
+    ## VIASH START
+    par <- list(
+      "input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad",
+      "input_scrnaseq_reference"= "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad",
+      "output" = "task_ist_preprocessing/tmp/split_corrected.h5ad",
+      "keep_all_cells" = FALSE,
+    )
+    meta <- list(
+      'cpus': 4,
+    )
+    ## VIASH END
+    # Read the input h5ad file and convert to SingleCellExperiment and Seurat
+    sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment")
+    xe <- read_h5ad(par$input_spatial_with_cell_types, as = "Seurat")
+    # filter out 0 cells
+    if (!par$keep_all_cells) {
+      cat("Filtering cells with 0 counts\n")
+      sce <- sce[, colSums(counts(sce)) > 0]
+      xe <- subset(xe, subset = nCount_RNA > 0)
+    }
+    # Extract spatial coordinates and counts matrix
+    centroid_x <- colData(sce)$centroid_x
+    centroid_y <- colData(sce)$centroid_y
+    coords <- data.frame(centroid_x, centroid_y)
+    counts <- assay(sce, "counts")
+    rownames(coords) <- colData(sce)$cell_id
+    puck <- SpatialRNA(coords, counts)
+    # Read reference scrnaseq
+    ref <- read_h5ad(par$input_scrnaseq_reference, as = "SingleCellExperiment")
+    #filter reference cell types to those with >25 cells (minimum for RCTD)
+    valid_celltypes <- names(table(colData(ref)$cell_type))[table(colData(ref)$cell_type) >= 25]
+    filtered_ref <- ref[,colData(ref)$cell_type %in% valid_celltypes]
+    ref_counts <- assay(filtered_ref, "counts")
+    # factor to drop filtered cell types
+    colData(filtered_ref)$cell_type <- factor(colData(filtered_ref)$cell_type)
+    cell_types <- colData(filtered_ref)$cell_type
+    names(cell_types) <- colnames(ref_counts)
+    reference <- Reference(ref_counts, cell_types, min_UMI = 0)
+    # check cores
+    cores <- 1
+    if ("cpus" %in% names(meta) && !is.null(meta$cpus)) cores <- meta$cpus
+    cat(sprintf("Number of cores: %s\n", cores))
+    # Run the algorithm
+    cat("Running RCTD\n")
+    myRCTD <- create.RCTD(puck, reference, max_cores = cores)
+    myRCTD <- run.RCTD(myRCTD, doublet_mode = "doublet")
+    # Get the "spot_class" annotation from RCTD
+    # cat("Saving RCTD spot_class\n")
+    # results <- myRCTD@results
+    # rctd_spot_class <- results$results_df$spot_class
+    # names(rctd_spot_class) <- rownames(results$results_df)
+    # colData(sce)$RCTD_class <- "not_included"
+    # colData(sce)[names(rctd_spot_class),"RCTD_class"] <- as.character(rctd_spot_class)
+    # Post-process RCTD output
+    RCTD <- SPLIT::run_post_process_RCTD(myRCTD)
+    # Run SPLIT purification
+    cat("Running SPLIT\n")
+    res_split <- SPLIT::purify(
+      counts = GetAssayData(xe, assay = 'RNA', layer = 'counts'), # or any gene x cells counts matrix
+      rctd = RCTD,
+      DO_purify_singlets = TRUE # optional
+    )
+    # create corrected counts layer in original SingleCell object
+    cat("Normalizing counts\n")
+    # First copy in counts
+    assay(sce, "corrected_counts") <- assay(sce, "counts")
+    # Then, replace only the updated cells
+    assay(sce, "corrected_counts")[rownames(res_split$purified_counts), colnames(res_split$purified_counts)] <- res_split$purified_counts
+    # Library size normalization - see note in resolVI
+    size_factors <- librarySizeFactors(assay(sce, "corrected_counts"))
+    assay(sce, "normalized") <- assay(logNormCounts(sce, size_factors=size_factors, assay.type = "corrected_counts"),"logcounts")
+    # Write the final object to h5ad format
+    cat("Writing to h5ad\n")
+    dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE)
+    write_h5ad(sce, par$output, mode = "w")

Add split #125

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

habibrehman2002 wants to merge 2 commits into main from add_split

+157 −0

-Original file line number
+Diff line change
@@ -0,0 +1,55 @@
+    __merge__: /src/api/comp_method_expression_correction.yaml
+    name: split_correction
+    label: "SPLIT"
+    summary: "Correct doublet/misegmented cells using SPLIT"
+    description: "SPLIT (Spatial Purification of Layered Intracellular Transcripts) is a novel method that integrates snRNA-seq with RCTD deconvolution to enhance signal purity. SPLIT effectively resolves mixed transcriptomic signals, improving background correction and cell-type resolution."
+    links:
+      documentation: "https://github.com/bdsc-tds/SPLIT"
+      repository: "https://github.com/bdsc-tds/SPLIT"
+    references:
+      doi: "10.1101/2025.04.23.649965"
+    arguments:
+      - name: --keep_all_cells
+        required: false
+        direction: input
+        type: boolean
+        default: false
+        description: Whether to keep cells with 0 counts (may cause errors if set to TRUE)
+    resources:
+      - type: r_script
+        path: script.R
+    engines:
+      - type: docker
+        image: openproblems/base_r:1
+        setup:
+          #- type: docker
+          #  run: |
+          #    apt-get update && apt-get install -y wget
+          - type: r
+            bioc: [anndataR, rhdf5, devtools, scater]
+          #- type: r
+          #  bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment]
+          #  bioc_force_install: true
+          - type: docker
+            run: |
+              Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); options(timeout = 600000000); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE); devtools::install_github('bdsc-tds/SPLIT')"
+          # SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues:
+          # https://github.com/drighelli/SpatialExperiment/issues/171
+          # https://github.com/satijalab/seurat/issues/9889
+          # The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment.
+          # Is there a better way to install an r package from github?
+          # The 6 million timeout thing stops it from breaking
+      - type: native
+    runners:
+      - type: executable
+      - type: nextflow
+        directives:
+          label: [ hightime, highcpu, highmem ]

-Original file line number
+Diff line change
@@ -0,0 +1,102 @@
+    library(spacexr)
+    library(Matrix)
+    library(SingleCellExperiment)
+    library(anndataR)
+    library(SPLIT)
+    library(Seurat)
+    library(scuttle)
+    ## VIASH START
+    par <- list(
+      "input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad",
+      "input_scrnaseq_reference"= "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad",
+      "output" = "task_ist_preprocessing/tmp/split_corrected.h5ad",
+      "keep_all_cells" = FALSE,
+    )
+    meta <- list(
+      'cpus': 4,
+    )
+    ## VIASH END
+    # Read the input h5ad file and convert to SingleCellExperiment and Seurat
+    sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment")
+    xe <- read_h5ad(par$input_spatial_with_cell_types, as = "Seurat")
+    # filter out 0 cells
+    if (!par$keep_all_cells) {
+      cat("Filtering cells with 0 counts\n")
+      sce <- sce[, colSums(counts(sce)) > 0]
+      xe <- subset(xe, subset = nCount_RNA > 0)
+    }
+    # Extract spatial coordinates and counts matrix
+    centroid_x <- colData(sce)$centroid_x
+    centroid_y <- colData(sce)$centroid_y
+    coords <- data.frame(centroid_x, centroid_y)
+    counts <- assay(sce, "counts")
+    rownames(coords) <- colData(sce)$cell_id
+    puck <- SpatialRNA(coords, counts)
+    # Read reference scrnaseq
+    ref <- read_h5ad(par$input_scrnaseq_reference, as = "SingleCellExperiment")
+    #filter reference cell types to those with >25 cells (minimum for RCTD)
+    valid_celltypes <- names(table(colData(ref)$cell_type))[table(colData(ref)$cell_type) >= 25]
+    filtered_ref <- ref[,colData(ref)$cell_type %in% valid_celltypes]
+    ref_counts <- assay(filtered_ref, "counts")
+    # factor to drop filtered cell types
+    colData(filtered_ref)$cell_type <- factor(colData(filtered_ref)$cell_type)
+    cell_types <- colData(filtered_ref)$cell_type
+    names(cell_types) <- colnames(ref_counts)
+    reference <- Reference(ref_counts, cell_types, min_UMI = 0)
+    # check cores
+    cores <- 1
+    if ("cpus" %in% names(meta) && !is.null(meta$cpus)) cores <- meta$cpus
+    cat(sprintf("Number of cores: %s\n", cores))
+    # Run the algorithm
+    cat("Running RCTD\n")
+    myRCTD <- create.RCTD(puck, reference, max_cores = cores)
+    myRCTD <- run.RCTD(myRCTD, doublet_mode = "doublet")
+    # Get the "spot_class" annotation from RCTD
+    # cat("Saving RCTD spot_class\n")
+    # results <- myRCTD@results
+    # rctd_spot_class <- results$results_df$spot_class
+    # names(rctd_spot_class) <- rownames(results$results_df)
+    # colData(sce)$RCTD_class <- "not_included"
+    # colData(sce)[names(rctd_spot_class),"RCTD_class"] <- as.character(rctd_spot_class)
+    # Post-process RCTD output
+    RCTD <- SPLIT::run_post_process_RCTD(myRCTD)
+    # Run SPLIT purification
+    cat("Running SPLIT\n")
+    res_split <- SPLIT::purify(
+      counts = GetAssayData(xe, assay = 'RNA', layer = 'counts'), # or any gene x cells counts matrix
+      rctd = RCTD,
+      DO_purify_singlets = TRUE # optional
+    )
+    # create corrected counts layer in original SingleCell object
+    cat("Normalizing counts\n")
+    # First copy in counts
+    assay(sce, "corrected_counts") <- assay(sce, "counts")
+    # Then, replace only the updated cells
+    assay(sce, "corrected_counts")[rownames(res_split$purified_counts), colnames(res_split$purified_counts)] <- res_split$purified_counts
+    # Library size normalization - see note in resolVI
+    size_factors <- librarySizeFactors(assay(sce, "corrected_counts"))
+    assay(sce, "normalized") <- assay(logNormCounts(sce, size_factors=size_factors, assay.type = "corrected_counts"),"logcounts")
+    # Write the final object to h5ad format
+    cat("Writing to h5ad\n")
+    dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE)
+    write_h5ad(sce, par$output, mode = "w")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add split #125

Diff view

Diff view

There are no files selected for viewing

Add split #125

Are you sure you want to change the base?

Add split #125

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing