Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ scs.segment_cells(bin_file, image_file, align='rigid')
```
Use `help(scs.segment_cells)` in python to see more instructions on the usages.

#### Background threshold
By default, SCS identifies background spots using an absolute stain intensity cutoff of `stain_bg_threshold=10`, which works well for low-range fluorescence images (e.g. Stereo-seq, Seq-Scope).

For **Visium**, **Xenium** or other platforms that use **H&E staining** (pixel values 0-255), nearly every pixel exceeds 10, so no background samples are collected and the model cannot train properly. In this case, pass `stain_bg_percentile` to compute the threshold adaptively from the data at runtime:
```python
scs.segment_cells(bin_file, image_file, stain_bg_percentile=10) # 10th percentile of the stain layer
```
The `stain_bg_percentile` parameter overrides `stain_bg_threshold` when set.

The `segment_cells` function will run three steps to segment the provided patch: (*i*) preprocessing, *i.e.*, identifying nuclei and preparing data for the transformer, (*ii*) training the transformer and inference on all the spots in the patch, (*iii*), postprocessing, *i.e.*, gradient flow tracking. The preprocessing time on the demo patch will be about 10 minutes, transformer training will take roughly 1 hour with an Nvidia GeForce 10 series graphics card, and the postprocessing will take about 5 minutes.

### Processing large-scale data
Expand Down
10 changes: 8 additions & 2 deletions src/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from scipy.sparse import lil_matrix, csr_matrix, vstack


def preprocess(bin_file, image_file, prealigned, align, startx, starty, patchsize, bin_size, n_neighbor):
def preprocess(bin_file, image_file, prealigned, align, startx, starty, patchsize, bin_size, n_neighbor, stain_bg_threshold=10, stain_bg_percentile=None):
#read data
if prealigned:
adatasub = st.io.read_bgi_agg(bin_file, image_file, prealigned=True)
Expand All @@ -28,6 +28,12 @@ def preprocess(bin_file, image_file, prealigned, align, startx, starty, patchsiz
patchsizex = adatasub.X.shape[0]
patchsizey = adatasub.X.shape[1]

if stain_bg_percentile is not None:
stain_bg_threshold = float(np.percentile(np.asarray(adatasub.layers['stain']), stain_bg_percentile))
print(f'stain_bg_threshold ({stain_bg_percentile}th percentile):', stain_bg_threshold)
else:
print('stain_bg_threshold:', stain_bg_threshold)

#align staining image with bins
before = adatasub.layers['stain'].copy()
if align:
Expand Down Expand Up @@ -230,7 +236,7 @@ def preprocess(bin_file, image_file, prealigned, align, startx, starty, patchsiz
if idx >= 0 and idx < all_exp_merged_bins.shape[0] and np.sum(all_exp_merged_bins[idx, :]) > 0:
backgroud = True
for nucleus in watershed2center:
if (i - watershed2center[nucleus][0]) ** 2 + (j - watershed2center[nucleus][1]) ** 2 <= 900 or adatasub.layers['stain'][i, j] > 10:
if (i - watershed2center[nucleus][0]) ** 2 + (j - watershed2center[nucleus][1]) ** 2 <= 900 or adatasub.layers['stain'][i, j] > stain_bg_threshold:
backgroud = False
break
if backgroud:
Expand Down
8 changes: 5 additions & 3 deletions src/scs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
from src import preprocessing, transformer, postprocessing

def segment_cells(bin_file, image_file, prealigned=False, align=None, patch_size=0, bin_size=3, n_neighbor=50, epochs=100, r_estimate=15, val_ratio=0.0625):
def segment_cells(bin_file, image_file, prealigned=False, align=None, patch_size=0, bin_size=3, n_neighbor=50, epochs=100, r_estimate=15, val_ratio=0.0625, stain_bg_threshold=10, stain_bg_percentile=None):
"""
Parameters:
bin_file - string, tsv file for detected RNAs
Expand All @@ -15,9 +15,11 @@ def segment_cells(bin_file, image_file, prealigned=False, align=None, patch_size
epochs - int, the training epochs of the transformer model, default 100
r_estimate - int, the estimated radius (spots) of cells, used to calculate the priors for transformer predictions, default 15
val_ratio - float, the fraction of the patch set aside for validation, default 0.0625 (1/4 height x 1/4 width)
stain_bg_threshold - float, absolute stain intensity threshold; a spot is background only if its stain value is at or below this value, default 10
stain_bg_percentile - float or None, if set (e.g. 10.0), overrides stain_bg_threshold with the given percentile of the stain layer computed at runtime; useful for Visium H&E data where pixel values are in the 0-255 range, default None
"""
if patch_size == 0:
preprocessing.preprocess(bin_file, image_file, prealigned, align, 0, 0, patch_size, bin_size, n_neighbor)
preprocessing.preprocess(bin_file, image_file, prealigned, align, 0, 0, patch_size, bin_size, n_neighbor, stain_bg_threshold, stain_bg_percentile)
transformer.train(0, 0, patch_size, epochs, val_ratio)
postprocessing.postprocess(0, 0, patch_size, bin_size, r_estimate)
else:
Expand All @@ -37,7 +39,7 @@ def segment_cells(bin_file, image_file, prealigned=False, align=None, patch_size
for startc in range(0, cmax, patch_size):
try:
print('Processing the patch ' + str(startr) + ':' + str(startc) + '...')
preprocessing.preprocess(bin_file, image_file, prealigned, align, startr, startc, patch_size, bin_size, n_neighbor)
preprocessing.preprocess(bin_file, image_file, prealigned, align, startr, startc, patch_size, bin_size, n_neighbor, stain_bg_threshold, stain_bg_percentile)
transformer.train(startr, startc, patch_size, epochs, val_ratio)
postprocessing.postprocess(startr, startc, patch_size, bin_size, r_estimate)
except Exception as e:
Expand Down