From a3a9bf0970992ba6342ccedcc8a2a9edb6518a8a Mon Sep 17 00:00:00 2001 From: Katarzyna Wreczycka Date: Sat, 18 Apr 2026 20:44:54 +0200 Subject: [PATCH 1/2] Add configurable stain background threshold with percentile option for Visium --- README.md | 9 +++++++++ src/preprocessing.py | 10 ++++++++-- src/scs.py | 8 +++++--- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e227a8d..f50bbec 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,15 @@ scs.segment_cells(bin_file, image_file, align='rigid') ``` Use `help(scs.segment_cells)` in python to see more instructions on the usages. +#### Background threshold +By default, SCS identifies background spots using an absolute stain intensity cutoff of `stain_bg_threshold=10`, which works well for low-range fluorescence images (e.g. Stereo-seq, Seq-Scope). + +For **Visium** or other platforms that use **H&E staining** (pixel values 0–255), nearly every pixel exceeds 10, so no background samples are collected and the model cannot train properly. In this case, pass `stain_bg_percentile` to compute the threshold adaptively from the data at runtime: +```python +scs.segment_cells(bin_file, image_file, stain_bg_percentile=10) # 10th percentile of the stain layer +``` +The `stain_bg_percentile` parameter overrides `stain_bg_threshold` when set. + The `segment_cells` function will run three steps to segment the provided patch: (*i*) preprocessing, *i.e.*, identifying nuclei and preparing data for the transformer, (*ii*) training the transformer and inference on all the spots in the patch, (*iii*), postprocessing, *i.e.*, gradient flow tracking. The preprocessing time on the demo patch will be about 10 minutes, transformer training will take roughly 1 hour with an Nvidia GeForce 10 series graphics card, and the postprocessing will take about 5 minutes. ### Processing large-scale data diff --git a/src/preprocessing.py b/src/preprocessing.py index a34690c..3d60b63 100644 --- a/src/preprocessing.py +++ b/src/preprocessing.py @@ -10,7 +10,7 @@ from scipy.sparse import lil_matrix, csr_matrix, vstack -def preprocess(bin_file, image_file, prealigned, align, startx, starty, patchsize, bin_size, n_neighbor): +def preprocess(bin_file, image_file, prealigned, align, startx, starty, patchsize, bin_size, n_neighbor, stain_bg_threshold=10, stain_bg_percentile=None): #read data if prealigned: adatasub = st.io.read_bgi_agg(bin_file, image_file, prealigned=True) @@ -28,6 +28,12 @@ def preprocess(bin_file, image_file, prealigned, align, startx, starty, patchsiz patchsizex = adatasub.X.shape[0] patchsizey = adatasub.X.shape[1] + if stain_bg_percentile is not None: + stain_bg_threshold = float(np.percentile(np.asarray(adatasub.layers['stain']), stain_bg_percentile)) + print(f'stain_bg_threshold ({stain_bg_percentile}th percentile):', stain_bg_threshold) + else: + print('stain_bg_threshold:', stain_bg_threshold) + #align staining image with bins before = adatasub.layers['stain'].copy() if align: @@ -230,7 +236,7 @@ def preprocess(bin_file, image_file, prealigned, align, startx, starty, patchsiz if idx >= 0 and idx < all_exp_merged_bins.shape[0] and np.sum(all_exp_merged_bins[idx, :]) > 0: backgroud = True for nucleus in watershed2center: - if (i - watershed2center[nucleus][0]) ** 2 + (j - watershed2center[nucleus][1]) ** 2 <= 900 or adatasub.layers['stain'][i, j] > 10: + if (i - watershed2center[nucleus][0]) ** 2 + (j - watershed2center[nucleus][1]) ** 2 <= 900 or adatasub.layers['stain'][i, j] > stain_bg_threshold: backgroud = False break if backgroud: diff --git a/src/scs.py b/src/scs.py index 094af69..041b7db 100644 --- a/src/scs.py +++ b/src/scs.py @@ -2,7 +2,7 @@ import numpy as np from src import preprocessing, transformer, postprocessing -def segment_cells(bin_file, image_file, prealigned=False, align=None, patch_size=0, bin_size=3, n_neighbor=50, epochs=100, r_estimate=15, val_ratio=0.0625): +def segment_cells(bin_file, image_file, prealigned=False, align=None, patch_size=0, bin_size=3, n_neighbor=50, epochs=100, r_estimate=15, val_ratio=0.0625, stain_bg_threshold=10, stain_bg_percentile=None): """ Parameters: bin_file - string, tsv file for detected RNAs @@ -15,9 +15,11 @@ def segment_cells(bin_file, image_file, prealigned=False, align=None, patch_size epochs - int, the training epochs of the transformer model, default 100 r_estimate - int, the estimated radius (spots) of cells, used to calculate the priors for transformer predictions, default 15 val_ratio - float, the fraction of the patch set aside for validation, default 0.0625 (1/4 height x 1/4 width) + stain_bg_threshold - float, absolute stain intensity threshold; a spot is background only if its stain value is at or below this value, default 10 + stain_bg_percentile - float or None, if set (e.g. 10.0), overrides stain_bg_threshold with the given percentile of the stain layer computed at runtime; useful for Visium H&E data where pixel values are in the 0-255 range, default None """ if patch_size == 0: - preprocessing.preprocess(bin_file, image_file, prealigned, align, 0, 0, patch_size, bin_size, n_neighbor) + preprocessing.preprocess(bin_file, image_file, prealigned, align, 0, 0, patch_size, bin_size, n_neighbor, stain_bg_threshold, stain_bg_percentile) transformer.train(0, 0, patch_size, epochs, val_ratio) postprocessing.postprocess(0, 0, patch_size, bin_size, r_estimate) else: @@ -37,7 +39,7 @@ def segment_cells(bin_file, image_file, prealigned=False, align=None, patch_size for startc in range(0, cmax, patch_size): try: print('Processing the patch ' + str(startr) + ':' + str(startc) + '...') - preprocessing.preprocess(bin_file, image_file, prealigned, align, startr, startc, patch_size, bin_size, n_neighbor) + preprocessing.preprocess(bin_file, image_file, prealigned, align, startr, startc, patch_size, bin_size, n_neighbor, stain_bg_threshold, stain_bg_percentile) transformer.train(startr, startc, patch_size, epochs, val_ratio) postprocessing.postprocess(startr, startc, patch_size, bin_size, r_estimate) except Exception as e: From eccc45049c7d5ca4fb9717abe913af75e3325173 Mon Sep 17 00:00:00 2001 From: Katarzyna Wreczycka Date: Fri, 24 Apr 2026 11:45:23 +0200 Subject: [PATCH 2/2] Clarify README: Visium/Xenium H&E background threshold guidance --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f50bbec..ea32dc7 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ Use `help(scs.segment_cells)` in python to see more instructions on the usages. #### Background threshold By default, SCS identifies background spots using an absolute stain intensity cutoff of `stain_bg_threshold=10`, which works well for low-range fluorescence images (e.g. Stereo-seq, Seq-Scope). -For **Visium** or other platforms that use **H&E staining** (pixel values 0–255), nearly every pixel exceeds 10, so no background samples are collected and the model cannot train properly. In this case, pass `stain_bg_percentile` to compute the threshold adaptively from the data at runtime: +For **Visium**, **Xenium** or other platforms that use **H&E staining** (pixel values 0-255), nearly every pixel exceeds 10, so no background samples are collected and the model cannot train properly. In this case, pass `stain_bg_percentile` to compute the threshold adaptively from the data at runtime: ```python scs.segment_cells(bin_file, image_file, stain_bg_percentile=10) # 10th percentile of the stain layer ```