Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
102 commits
Select commit Hold shift + click to select a range
a8d386b
update foundation models list in pyproject
EzicStar Feb 27, 2025
92f921e
add slide encoding config
EzicStar Feb 28, 2025
26238ec
fix slide config
EzicStar Feb 28, 2025
a9bcdef
add slide encoding
EzicStar Mar 3, 2025
5bfd4dd
add cobra chief and titan
EzicStar Mar 4, 2025
6a4c06c
add slide_encoding in config
EzicStar Mar 5, 2025
9fdc70f
add cobra dependencies
EzicStar Mar 5, 2025
40e953a
use cobra package directly
EzicStar Mar 5, 2025
396d6d2
add cobra
EzicStar Mar 11, 2025
777f570
fix cobra encoder
EzicStar Mar 17, 2025
44b6977
coords pixel conversion
EzicStar Mar 25, 2025
c428764
patch_lvl_0 calculation
EzicStar Mar 28, 2025
797d0ca
make patch size lvl as integer
EzicStar Mar 28, 2025
45dbc5b
encoders now have their own encode function
EzicStar Mar 31, 2025
f9d58a2
split slide and patient encoding commands
EzicStar Mar 31, 2025
f88732e
update changes from main
EzicStar Apr 10, 2025
6d85ec0
implement mpp calculation from CoordsInfo
EzicStar Apr 10, 2025
429ca58
format cobra.py
EzicStar Apr 10, 2025
b2b12a4
make chief encoder a class
EzicStar Apr 10, 2025
1746319
add eagle encoder class
EzicStar Apr 10, 2025
f34f8ff
add eagle patient encoding
EzicStar Apr 11, 2025
2734c46
add shape validation
EzicStar Apr 11, 2025
71ff930
add features validations
EzicStar Apr 15, 2025
e11d088
add eagle slide encoding
EzicStar Apr 15, 2025
151fb4a
adapt abstract functions implementations
EzicStar Apr 15, 2025
d126bb9
eagle integration to config
EzicStar Apr 15, 2025
29dd194
remove chief prints
EzicStar Apr 15, 2025
e8718fc
format tqdm logging and file naming
EzicStar Apr 15, 2025
0ab75b5
add metadata and exceptions
EzicStar Apr 16, 2025
af4e1ce
fix warnings
EzicStar Apr 16, 2025
f79bc41
make coords numpy array instead of tensor
EzicStar Apr 29, 2025
38afcda
add titan patient level encoding
EzicStar Apr 29, 2025
61fdc78
add gigapath dependency
EzicStar Apr 30, 2025
e9ed4e0
add gigapath dependency to pyproject
EzicStar Apr 30, 2025
08b0322
update available encoders
EzicStar Apr 30, 2025
506f60f
add gigapath slide encoder
EzicStar Apr 30, 2025
8413b0f
add chief slide encoding
EzicStar May 5, 2025
a57fb23
fix gigapath dependencies
EzicStar May 6, 2025
62e0f23
add gigapath patient encoding
EzicStar May 6, 2025
646cd29
fix gigapath dimensions
EzicStar May 7, 2025
220f96a
add feature saving function
EzicStar May 7, 2025
a9bc7d1
reuse reading and saving functions
EzicStar May 7, 2025
dcb0d64
add chief patient encoding
EzicStar May 7, 2025
696867b
add prism
EzicStar May 7, 2025
47d8df0
add virchow feature extractor
EzicStar May 8, 2025
a809afd
add prism dependencies
EzicStar May 8, 2025
25c161d
forgot to add pyproject and change encoder name
EzicStar May 8, 2025
6b6e225
add musk feature extractor
EzicStar May 8, 2025
5b029bb
forgot to add dep stuff
EzicStar May 8, 2025
c432237
fix name typo
EzicStar May 12, 2025
8b558ed
format musk
EzicStar May 12, 2025
09b28af
add mstar
EzicStar May 12, 2025
e89a019
add plip
EzicStar May 12, 2025
9578ceb
minor prism fixes
EzicStar May 14, 2025
185c08e
move encoding logic to base class
EzicStar May 15, 2025
4a44a36
add madeleine
EzicStar May 15, 2025
44df4b7
add test definition
EzicStar May 16, 2025
5167e86
Merge remote-tracking branch 'origin/main' into dev/encode-slides
EzicStar May 19, 2025
27ebb86
type extractor and encoder names
EzicStar May 21, 2025
6c66980
add slde encoding test
EzicStar May 21, 2025
b6c4c19
remove unused random function
EzicStar May 21, 2025
26c8401
fix string casting
EzicStar May 21, 2025
9eb6575
add titan test coverage
EzicStar May 25, 2025
04c12dc
remove hardcoded patient and filename labels
EzicStar May 25, 2025
7ca3c0e
fix eagle test
EzicStar May 25, 2025
79c6db3
fix gigapath warnings
EzicStar May 26, 2025
20efbc8
minor coords and extractor fixes
EzicStar May 26, 2025
01e0b83
add hash flag
EzicStar May 26, 2025
90d88d8
add hash check and formatting
EzicStar May 26, 2025
71982a2
fixes and validations
EzicStar May 26, 2025
26e7901
merge main changes
EzicStar May 26, 2025
1f43e0c
format and update docs
EzicStar May 26, 2025
e8027c1
forgot formatting
EzicStar May 26, 2025
c0287c5
Fix dependencies
georg-wolflein May 26, 2025
a6ca197
reduce test patients for faster testing
EzicStar May 28, 2025
48a1be8
Fix dependencies (again) (for macos)
georg-wolflein Jun 2, 2025
5543140
Debug build.yml
EzicStar Jun 3, 2025
8f3b508
skip cuda required encoders
EzicStar Jun 3, 2025
5fa5c61
Debug feature extractor tests
EzicStar Jun 4, 2025
ae23522
Update .github/workflows/build.yml
EzicStar Jun 19, 2025
94b333a
add own DeviceLikeType
EzicStar Jun 20, 2025
7c5cee2
add description and better func names
EzicStar Jun 23, 2025
fa4e02a
reformat encoder base class
EzicStar Jun 25, 2025
3843db1
add mayor formatting
EzicStar Jun 25, 2025
0e37454
use enum for extractor identifier
EzicStar Jun 25, 2025
4541642
rename extractors and encoders
EzicStar Jun 25, 2025
660157e
reformat output feat files
EzicStar Jun 26, 2025
c5020f5
Fix cobra build
FWao Jun 30, 2025
96dd3c5
add metadata
EzicStar Jun 26, 2025
dfd08be
add cobraII
EzicStar Jun 30, 2025
26180bc
debug CI pipeline
EzicStar Jun 30, 2025
98a0edd
check virchow on gpu
EzicStar Jun 30, 2025
62f63ce
check other extractors
EzicStar Jun 30, 2025
76e3dd0
test each extractor by separate
EzicStar Jun 30, 2025
185e540
set timm as a common dependency
EzicStar Jun 30, 2025
97b4b54
update uv.lock
EzicStar Jun 30, 2025
12bc772
Update COBRA hash, pyproject.toml
FWao Jun 30, 2025
ccc0d02
actions test: add verbose option
FWao Jun 30, 2025
4a0dd4e
extractor param and flash-attn
EzicStar Jun 30, 2025
3502526
fix gigapath dependency
EzicStar Jun 30, 2025
4942e4e
format and remove empty
EzicStar Jun 30, 2025
17d9edd
update README, reformat
EzicStar Jun 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 52 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,26 @@ on:
branches: [ main ]

jobs:
test:
test_extractors:
runs-on: ubuntu-latest
# needs: format_and_lint
strategy:
matrix:
python-version: ["3.11", "3.12"]
extractor: [
"ctranspath",
"chief-ctranspath",
"conch",
"conch1_5",
"uni",
"uni2",
"dino-bloom",
"gigapath",
"h-optimus-0",
"h-optimus-1",
"mstar",
"plip",
]

steps:
- uses: actions/checkout@v4
Expand All @@ -30,13 +44,46 @@ jobs:
# python-version: ${{ matrix.python-version }}

- name: Install the project
run: uv sync --all-extras --dev
run: uv sync --extra all --dev

- name: Build
run: uv build

- name: Run tests
run: uv run pytest tests/
- name: Run tests for extractor ${{ matrix.extractor }}
run: uv run pytest -s tests/test_feature_extractors.py -k "${{ matrix.extractor }}" --verbose
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

test_others:
runs-on: ubuntu-latest
# needs: format_and_lint
strategy:
matrix:
python-version: ["3.11", "3.12"]

steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
python-version: ${{ matrix.python-version }}
enable-cache: true
cache-dependency-glob: "uv.lock"

# - name: Set up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v5
# with:
# python-version: ${{ matrix.python-version }}

- name: Install the project
run: uv sync --extra all --dev

- name: Build
run: uv build

- name: Run other tests
run: uv run pytest -s tests/ -k "not test_feature_extractors.py" --verbose
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

Expand All @@ -56,4 +103,4 @@ jobs:
- name: Check code formatting with Ruff
run: ruff format --diff --target-version=py311
# continue-on-error: true


1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ parts/
sdist/
var/
wheels/
weights/
share/python-wheels/
*.egg-info/
.installed.cfg
Expand Down
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@ A Protocol for End-to-End Deep Learning in Computational Pathology".

## Installing stamp

To install stamp, run:
We recommend installing STAMP with [uv](https://docs.astral.sh/uv/):
```bash
# We recommend using a virtual environment to install stamp
python -m venv .venv
. .venv/bin/activate
uv sync --all extras

pip install "stamp[all] @ git+https://github.com/KatherLab/STAMP"
source .venv/bin/activate
```

> [!IMPORTANT]
Expand All @@ -43,16 +41,18 @@ pip install "stamp[all] @ git+https://github.com/KatherLab/STAMP"
If the installation was successful, running `stamp` in your terminal should yield the following output:
```
$ stamp
usage: stamp [-h] [--config CONFIG_FILE_PATH] {init,setup,preprocess,train,crossval,deploy,statistics,config,heatmaps} ...
usage: stamp [-h] [--config CONFIG_FILE_PATH] {init,preprocess,encode_slides,encode_patients,train,crossval,deploy,statistics,config,heatmaps} ...

STAMP: Solid Tumor Associative Modeling in Pathology

positional arguments:
{init,setup,preprocess,train,crossval,deploy,statistics,config,heatmaps}
{init,preprocess,encode_slides,encode_patients,train,crossval,deploy,statistics,config,heatmaps}
init Create a new STAMP configuration file at the path specified by --config
preprocess Preprocess whole-slide images into feature vectors
encode_slides Encode patch-level features into slide-level embeddings
encode_patients Encode features into patient-level embeddings
train Train a Vision Transformer model
crossval Train a Vision Transformer model with cross validation
crossval Train a Vision Transformer model with cross validation for modeling.n_splits folds
deploy Deploy a trained Vision Transformer model
statistics Generate AUROCs and AUPRCs with 95%CI for a trained Vision Transformer model
config Print the loaded configuration
Expand Down
22 changes: 22 additions & 0 deletions getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,21 @@ which in turn allows us to efficiently train machine learning models with them.

Stamp currently supports the following feature extractors:
- [ctranspath][ctranspath]
- [chief_ctranspath][chief_ctranspath]
- [DinoBloom][dinobloom]
- [CONCH][conch]
- [CONCHv1.5][conch1_5]
- [UNI][uni]
- [UNI2][uni2]
- [Virchow][virchow]
- [Virchow2][virchow2]
- [Gigapath][gigapath]
- [H-optimus-0][h_optimus_0]
- [H-optimus-1][h_optimus_1]
- [mSTAR][mstar]
- [MUSK][musk]
- [PLIP][plip]


As some of the above require you to request access to the model on huggingface,
we will stick with ctranspath for this example.
Expand Down Expand Up @@ -128,8 +139,19 @@ meaning ignored that it was ignored during feature extraction.
[ctranspath]: https://www.sciencedirect.com/science/article/abs/pii/S1361841522002043 "Transformer-based unsupervised contrastive learning for histopathological image classification"
[dinobloom]: https://github.com/marrlab/DinoBloom "DinoBloom: A Foundation Model for Generalizable Cell Embeddings in Hematology"
[uni]: https://www.nature.com/articles/s41591-024-02857-3 "Towards a general-purpose foundation model for computational pathology"
[uni2]: https://huggingface.co/MahmoodLab/UNI2-h
[conch]: https://www.nature.com/articles/s41591-024-02856-4 "A visual-language foundation model for computational pathology"
[conch1_5]: https://huggingface.co/MahmoodLab/conchv1_5
[virchow]: https://huggingface.co/paige-ai/Virchow "A foundation model for clinical-grade computational pathology and rare cancers detection"
[virchow2]: https://huggingface.co/paige-ai/Virchow2
[chief_ctranspath]: https://github.com/hms-dbmi/CHIEF
[gigapath]: https://huggingface.co/prov-gigapath/prov-gigapath
[h_optimus_0]: https://huggingface.co/bioptimus/H-optimus-0
[h_optimus_1]: https://huggingface.co/bioptimus/H-optimus-1
[mstar]: https://huggingface.co/Wangyh/mSTAR
[musk]: https://huggingface.co/xiangjx/musk
[plip]: https://github.com/PathologyFoundation/plip


## Doing Cross-Validation on the Data Set

Expand Down
62 changes: 37 additions & 25 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,59 +42,56 @@ dependencies = [
"torchmetrics>=1.6.0",
"torchvision>=0.20.1",
"tqdm>=4.66.6",
"timm>=0.9.11",
]

[project.optional-dependencies]
dinobloom = [
"torchvision>=0.20.1",
"xformers>=0.0.28.post3",
]
conch = [
"huggingface-hub>=0.26.2",
"conch @ git+https://github.com/Mahmoodlab/CONCH.git@02d6ac59cc20874bff0f581de258c2b257f69a84",
]
conch1_5 = [
"transformers>=4.45.2",
"einops-exts==0.0.4",
"torch>=2.0.0"
]
ctranspath = [
"gdown>=5.2.0",
"torchvision>=0.20.1",
]
chief_ctranspath = [
"gdown>=5.2.0",
"torchvision>=0.20.1",
"torch>=2.0.0"
]
gigapath = [
"timm>=0.9.11",
"torchvision>=0.20.1",
]
h_optimus_0 = [
"timm>=0.9.11",
"torchvision>=0.20.1",
]
h_optimus_1 = [
"timm>=0.9.11",
"torchvision>=0.20.1",
"gigapath @ git+https://github.com/EzicStar/prov-gigapath.git@d4cf55321df37aaf867e24a31c61bcf490a296eb"
]
uni = [
"huggingface-hub>=0.26.2",
#TODO change the git repo back to mahmoodlab's once our pull request has been accepted
"uni @ git+https://github.com/KatherLab/uni.git@f37c299eb0bffa0e585f120974082cfec6ee6d53",
]
uni2 = [
"timm>=0.9.11",
"torch>=2.0.0",
"uni @ git+https://github.com/mahmoodlab/UNI.git",
]
virchow2 = [
"huggingface-hub>=0.27.1",
"timm>=0.9.11",
"torch>=2.0.0",
]
cobra = [
"jinja2>=3.1.4",
"cobra @ git+https://github.com/KatherLab/COBRA.git@f1a576e1133330ffc2d1df6ee110701921c7b7c9",
]
prism = [
"sacremoses==0.1.1",
"environs==11.0.0",
]
madeleine = [
"madeleine @ git+https://github.com/mahmoodlab/MADELEINE.git@de7c85acc2bdad352e6df8eee5694f8b6f288012"
]
musk = [
"musk @ git+https://github.com/lilab-stanford/MUSK.git@e1699c27687f44bbf6d4adfcbb2abe89795d347f",
]
plip = [
"transformers>=4.45.2"
]

# Blanket target
all = ["stamp[dinobloom,conch,ctranspath,uni,virchow2]"]
all = ["stamp[conch,ctranspath,uni,virchow2,chief_ctranspath,conch1_5,prism,madeleine,musk,plip]"]

[project.scripts]
"stamp" = "stamp.__main__:main"
Expand Down Expand Up @@ -127,3 +124,18 @@ markers = [

[tool.ruff]
lint.ignore = ["F722"] # https://docs.kidger.site/jaxtyping/faq/#flake8-or-ruff-are-throwing-an-error

[[tool.uv.dependency-metadata]]
name = "uni"
version = "v0.1.0"
requires-dist = [
"torch>=2.0.1",
"torchvision",
"timm>=0.9.8",
"numpy",
"pandas",
"scikit-learn",
"tqdm",
"transformers",
"xformers; sys_platform != 'darwin'" # xformers is not supported on macOS
]
51 changes: 51 additions & 0 deletions src/stamp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,49 @@ def _run_cli(args: argparse.Namespace) -> None:
generate_hash=config.preprocessing.generate_hash,
)

case "encode_slides":
from stamp.encoding import init_slide_encoder_

if config.slide_encoding is None:
raise ValueError("no slide encoding configuration supplied")

_add_file_handle_(_logger, output_dir=config.slide_encoding.output_dir)
_logger.info(
"using the following configuration:\n"
f"{yaml.dump(config.slide_encoding.model_dump(mode='json'))}"
)
init_slide_encoder_(
encoder=config.slide_encoding.encoder,
output_dir=config.slide_encoding.output_dir,
feat_dir=config.slide_encoding.feat_dir,
device=config.slide_encoding.device,
agg_feat_dir=config.slide_encoding.agg_feat_dir,
generate_hash=config.slide_encoding.generate_hash,
)

case "encode_patients":
from stamp.encoding import init_patient_encoder_

if config.patient_encoding is None:
raise ValueError("no patient encoding configuration supplied")

_add_file_handle_(_logger, output_dir=config.patient_encoding.output_dir)
_logger.info(
"using the following configuration:\n"
f"{yaml.dump(config.patient_encoding.model_dump(mode='json'))}"
)
init_patient_encoder_(
encoder=config.patient_encoding.encoder,
output_dir=config.patient_encoding.output_dir,
feat_dir=config.patient_encoding.feat_dir,
slide_table_path=config.patient_encoding.slide_table,
patient_label=config.patient_encoding.patient_label,
filename_label=config.patient_encoding.filename_label,
device=config.patient_encoding.device,
agg_feat_dir=config.patient_encoding.agg_feat_dir,
generate_hash=config.patient_encoding.generate_hash,
)

case "train":
from stamp.modeling.train import train_categorical_model_

Expand Down Expand Up @@ -249,6 +292,14 @@ def main() -> None:
commands.add_parser(
"preprocess", help="Preprocess whole-slide images into feature vectors"
)
commands.add_parser(
"encode_slides",
help="Encode patch-level features into slide-level embeddings",
)
commands.add_parser(
"encode_patients",
help="Encode features into patient-level embeddings",
)
commands.add_parser("train", help="Train a Vision Transformer model")
commands.add_parser(
"crossval",
Expand Down
15 changes: 15 additions & 0 deletions src/stamp/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import shutil
import urllib.request
from functools import cache
from pathlib import Path
from typing import Final

Expand Down Expand Up @@ -36,3 +37,17 @@ def download_file(*, url: str, file_name: str, sha256sum: str) -> Path:
def file_digest(file: str | Path) -> str:
with open(file, "rb") as fp:
return hashlib.file_digest(fp, "sha256").hexdigest()


@cache
def get_processing_code_hash(file_path) -> str:
"""The hash of the entire process codebase.

It is used to assure that features extracted with different versions of this code base
can be identified as such after the fact.
"""
hasher = hashlib.sha256()
for file_path in sorted(file_path.parent.glob("*.py")):
with open(file_path, "rb") as fp:
hasher.update(fp.read())
return hasher.hexdigest()
5 changes: 5 additions & 0 deletions src/stamp/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pydantic import BaseModel, ConfigDict

from stamp.encoding.config import PatientEncodingConfig, SlideEncodingConfig
from stamp.heatmaps.config import HeatmapConfig
from stamp.modeling.config import CrossvalConfig, DeploymentConfig, TrainConfig
from stamp.preprocessing.config import PreprocessingConfig
Expand All @@ -18,3 +19,7 @@ class StampConfig(BaseModel):
statistics: StatsConfig | None = None

heatmaps: HeatmapConfig | None = None

slide_encoding: SlideEncodingConfig | None = None

patient_encoding: PatientEncodingConfig | None = None
Loading