From 6af6506ff24b5c38948d78b2260d6a432008e3a4 Mon Sep 17 00:00:00 2001 From: Juan Pablo Date: Tue, 1 Jul 2025 10:42:49 +0100 Subject: [PATCH 1/2] update docs - add tutorial on how to use slide and patient encoding - set chief as default encoder - update readme for new installation steps with uv --- README.md | 4 +- getting-started.md | 117 +++++++++++++++++++++++++++++++++++++++++- src/stamp/config.yaml | 4 +- 3 files changed, 121 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ddba1d33..9383963d 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,9 @@ A Protocol for End-to-End Deep Learning in Computational Pathology". We recommend installing STAMP with [uv](https://docs.astral.sh/uv/): ```bash -uv sync --all extras +git clone https://github.com/KatherLab/STAMP.git + +uv sync --all-extras source .venv/bin/activate ``` diff --git a/getting-started.md b/getting-started.md index 9e84bd07..78eaa9a7 100644 --- a/getting-started.md +++ b/getting-started.md @@ -151,6 +151,11 @@ meaning ignored that it was ignored during feature extraction. [mstar]: https://huggingface.co/Wangyh/mSTAR [musk]: https://huggingface.co/xiangjx/musk [plip]: https://github.com/PathologyFoundation/plip +[TITAN]: https://huggingface.co/MahmoodLab/TITAN +[COBRA2]: https://huggingface.co/KatherLab/COBRA +[EAGLE]: https://github.com/KatherLab/EAGLE +[MADELEINE]: https://huggingface.co/MahmoodLab/madeleine + ## Doing Cross-Validation on the Data Set @@ -252,4 +257,114 @@ Afterwards, the `output_dir` should contain the following files: as well as the 95% confidence interval for the statistical scores for the splits. - `roc-curve_isMSIH=yes.svg` and `pr-curve_isMSIH=yes.svg` - contain the ROC and precision recall curves of the splits. \ No newline at end of file + contain the ROC and precision recall curves of the splits. + +## Slide-Level Encoding +Tile-Level features can be enconded into a single feature per slide, this is useful +when trying to capture global patterns across whole slides. + +STAMP currently supports the following encoders: +- [CHIEF][CHIEF_CTRANSPATH] +- [TITAN] +- [GIGAPATH] +- [COBRA2] +- [EAGLE] +- [MADELEINE] + +Slide encoders take as input the already extracted tile-level features in the +preprocessing step. Each encoder accepts only certain extractors and most +work only on CUDA devices: + +| Encoder | Required Extractor | Compatible Devices | +|--|--|--| +| CHIEF | CTRANSPATH, CHIEF-CTRANSPATH | CUDA only | +| TITAN | CONCH1.5 | CUDA, cpu, mps +| GIGAPATH | GIGAPATH | CUDA only +| COBRA2 | CONCH, UNI, VIRCHOW2 or H-OPTIMUS-0 | CUDA only +| EAGLE | CTRANSPATH, CHIEF-CTRANSPATH | CUDA only +| MADELEINE | CONCH | CUDA only + + +As with feature extractors, most of these models require you to request +access. The following example uses CHIEF, which is available if you installed +STAMP with `uv sync --all-extras`. The configuration should look like this: + +```yaml +# stamp-test-experiment/config.yaml + +slide_encoding: + # Encoder to use for slide encoding. Possible options are "cobra", + # "eagle", "titan", "gigapath", "chief", "prism", "madeleine". + encoder: "chief" + + # Directory to save the output files. + output_dir: "/path/to/save/files/to" + + # Directory where the extracted features are stored. + feat_dir: "/path/your/extracted/features/are/stored/in" + + # Device to run slide encoding on ("cpu", "cuda", "cuda:0", etc.) + device: "cuda" + + # Optional settings: + # Directory where the aggregated features are stored. Needed for + # some encoders such as eagle (it requires virchow2 features). + #agg_feat_dir: "/path/your/aggregated/features/are/stored/in" + + # Add a hash of the entire preprocessing codebase in the feature folder name. + #generate_hash: True + ``` + +Don't forget to put in `feat_dir` a path containing, in this case, `ctranspath` or +`chief-ctranspath` tile-level features. Once everything is set, you can simply run: + +```sh +stamp --config stamp-test-experiment/config.yaml encode_slides +``` +The output will be one `.h5` file per slide. + +## Patient-Level Encoding +Even though the available encoders are designed for slide-level use, this +option concatenates the slides of a patient along the x-axis, creating a single +"virtual" slide that contains two blocks of tissue. The configuration is the same +except for `slide_table` which is required to link slides with patients. +```yaml +# stamp-test-experiment/config.yaml + +patient_encoding: + # Encoder to use for patient encoding. Possible options are "cobra", + # "eagle", "titan", "gigapath", "chief", "prism", "madeleine". + encoder: "eagle" + + # Directory to save the output files. + output_dir: "/path/to/save/files/to" + + # Directory where the extracted features are stored. + feat_dir: "/path/your/extracted/features/are/stored/in" + + # A table (.xlsx or .csv) relating every slide to their feature files. + # The table must contain at least two columns, one titled "SLIDE", + # containing the slide ID, and one called "FILENAME", containing the feature file path relative to `feat_dir`. + slide_table: "/path/of/slide.csv" + + # Device to run slide encoding on ("cpu", "cuda", "cuda:0", etc.) + device: "cuda" + + # Optional settings: + patient_label: "PATIENT" + filename_label: "FILENAME" + + # Directory where the aggregated features are stored. Needed for + # some encoders such as eagle (it requires virchow2 features). + #agg_feat_dir: "/path/your/aggregated/features/are/stored/in" + + # Add a hash of the entire preprocessing codebase in the feature folder name. + #generate_hash: True + ``` + + Then run: + ```sh +stamp --config stamp-test-experiment/config.yaml encode_patients +``` + +The output `.h5` features will have the patient's id as name. \ No newline at end of file diff --git a/src/stamp/config.yaml b/src/stamp/config.yaml index d06a4e8f..3e09c261 100644 --- a/src/stamp/config.yaml +++ b/src/stamp/config.yaml @@ -223,7 +223,7 @@ heatmaps: slide_encoding: # Encoder to use for slide encoding. Possible options are "cobra", # "eagle", "titan", "gigapath", "chief", "prism", "madeleine". - encoder: "eagle" + encoder: "chief" # Directory to save the output files. output_dir: "/path/to/save/files/to" @@ -246,7 +246,7 @@ slide_encoding: patient_encoding: # Encoder to use for patient encoding. Possible options are "cobra", # "eagle", "titan", "gigapath", "chief", "prism", "madeleine". - encoder: "eagle" + encoder: "chief" # Directory to save the output files. output_dir: "/path/to/save/files/to" From ea1d6f8b9012ce3017fb2741f97db7d9b882d084 Mon Sep 17 00:00:00 2001 From: Juan Pablo Ricapito <50255924+EzicStar@users.noreply.github.com> Date: Tue, 1 Jul 2025 13:40:38 +0200 Subject: [PATCH 2/2] Update README.md add cd STAMP in installation steps --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 9383963d..4727619c 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ We recommend installing STAMP with [uv](https://docs.astral.sh/uv/): ```bash git clone https://github.com/KatherLab/STAMP.git +cd STAMP/ + uv sync --all-extras source .venv/bin/activate