From 6af6506ff24b5c38948d78b2260d6a432008e3a4 Mon Sep 17 00:00:00 2001
From: Juan Pablo <juanpabloricapito@gmail.com>
Date: Tue, 1 Jul 2025 10:42:49 +0100
Subject: [PATCH 1/2] update docs

- add tutorial on how to use slide and patient encoding
- set chief as default encoder
- update readme for new installation steps with uv
---
 README.md             |   4 +-
 getting-started.md    | 117 +++++++++++++++++++++++++++++++++++++++++-
 src/stamp/config.yaml |   4 +-
 3 files changed, 121 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index ddba1d33..9383963d 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,9 @@ A Protocol for End-to-End Deep Learning in Computational Pathology".
 
 We recommend installing STAMP with [uv](https://docs.astral.sh/uv/):
 ```bash
-uv sync --all extras
+git clone https://github.com/KatherLab/STAMP.git
+
+uv sync --all-extras
 
 source .venv/bin/activate
 ```
diff --git a/getting-started.md b/getting-started.md
index 9e84bd07..78eaa9a7 100644
--- a/getting-started.md
+++ b/getting-started.md
@@ -151,6 +151,11 @@ meaning ignored that it was ignored during feature extraction.
 [mstar]: https://huggingface.co/Wangyh/mSTAR
 [musk]: https://huggingface.co/xiangjx/musk
 [plip]: https://github.com/PathologyFoundation/plip
+[TITAN]: https://huggingface.co/MahmoodLab/TITAN
+[COBRA2]: https://huggingface.co/KatherLab/COBRA
+[EAGLE]: https://github.com/KatherLab/EAGLE
+[MADELEINE]: https://huggingface.co/MahmoodLab/madeleine
+
 
 
 ## Doing Cross-Validation on the Data Set
@@ -252,4 +257,114 @@ Afterwards, the `output_dir` should contain the following files:
     as well as the 95% confidence interval for the statistical scores
     for the splits.
   - `roc-curve_isMSIH=yes.svg` and `pr-curve_isMSIH=yes.svg`
-    contain the ROC and precision recall curves of the splits.
\ No newline at end of file
+    contain the ROC and precision recall curves of the splits.
+
+## Slide-Level Encoding 
+Tile-Level features can be enconded into a single feature per slide, this is useful
+when trying to capture global patterns across whole slides.
+
+STAMP currently supports the following encoders:
+- [CHIEF][CHIEF_CTRANSPATH]
+- [TITAN]
+- [GIGAPATH]
+- [COBRA2]
+- [EAGLE]
+- [MADELEINE]
+
+Slide encoders take as input the already extracted tile-level features in the 
+preprocessing step. Each encoder accepts only certain extractors and most
+work only on CUDA devices:
+
+| Encoder | Required Extractor | Compatible Devices |
+|--|--|--|
+| CHIEF | CTRANSPATH, CHIEF-CTRANSPATH | CUDA only |
+| TITAN | CONCH1.5 | CUDA, cpu, mps
+| GIGAPATH | GIGAPATH | CUDA only
+| COBRA2 | CONCH, UNI, VIRCHOW2 or H-OPTIMUS-0 | CUDA only
+| EAGLE | CTRANSPATH, CHIEF-CTRANSPATH | CUDA only
+| MADELEINE | CONCH | CUDA only
+
+
+As with feature extractors, most of these models require you to request
+access. The following example uses CHIEF, which is available if you installed 
+STAMP with `uv sync --all-extras`. The configuration should look like this:
+
+```yaml
+# stamp-test-experiment/config.yaml
+
+slide_encoding:
+  # Encoder to use for slide encoding. Possible options are "cobra",
+  # "eagle", "titan", "gigapath", "chief", "prism", "madeleine".
+  encoder: "chief"
+  
+  # Directory to save the output files.
+  output_dir: "/path/to/save/files/to"
+  
+  # Directory where the extracted features are stored.
+  feat_dir: "/path/your/extracted/features/are/stored/in"
+  
+  # Device to run slide encoding on ("cpu", "cuda", "cuda:0", etc.)
+  device: "cuda"
+
+  # Optional settings:
+  # Directory where the aggregated features are stored. Needed for
+  # some encoders such as eagle (it requires virchow2 features).
+  #agg_feat_dir: "/path/your/aggregated/features/are/stored/in"
+
+  # Add a hash of the entire preprocessing codebase in the feature folder name.
+  #generate_hash: True
+  ```
+
+Don't forget to put in `feat_dir` a path containing, in this case, `ctranspath` or
+`chief-ctranspath` tile-level features. Once everything is set, you can simply run:
+
+```sh
+stamp --config stamp-test-experiment/config.yaml encode_slides
+```
+The output will be one `.h5` file per slide. 
+
+## Patient-Level Encoding
+Even though the available encoders are designed for slide-level use, this
+option concatenates the slides of a patient along the x-axis, creating a single
+"virtual" slide that contains two blocks of tissue. The configuration is the same
+except for `slide_table` which is required to link slides with patients.
+```yaml
+# stamp-test-experiment/config.yaml
+
+patient_encoding:
+  # Encoder to use for patient encoding. Possible options are "cobra",
+  # "eagle", "titan", "gigapath", "chief", "prism", "madeleine".
+  encoder: "eagle"
+  
+  # Directory to save the output files.
+  output_dir: "/path/to/save/files/to"
+  
+  # Directory where the extracted features are stored.
+  feat_dir: "/path/your/extracted/features/are/stored/in"
+  
+  # A table (.xlsx or .csv) relating every slide to their feature files.
+  # The table must contain at least two columns, one titled "SLIDE",
+  # containing the slide ID, and one called "FILENAME", containing the feature file path relative to `feat_dir`.
+  slide_table: "/path/of/slide.csv"
+  
+  # Device to run slide encoding on ("cpu", "cuda", "cuda:0", etc.)
+  device: "cuda"
+
+  # Optional settings:
+  patient_label: "PATIENT"
+  filename_label: "FILENAME"
+  
+  # Directory where the aggregated features are stored. Needed for
+  # some encoders such as eagle (it requires virchow2 features).
+  #agg_feat_dir: "/path/your/aggregated/features/are/stored/in"
+
+  # Add a hash of the entire preprocessing codebase in the feature folder name.
+  #generate_hash: True
+  ```
+
+  Then run:
+  ```sh
+stamp --config stamp-test-experiment/config.yaml encode_patients
+```
+
+The output `.h5` features will have the patient's id as name. 
\ No newline at end of file
diff --git a/src/stamp/config.yaml b/src/stamp/config.yaml
index d06a4e8f..3e09c261 100644
--- a/src/stamp/config.yaml
+++ b/src/stamp/config.yaml
@@ -223,7 +223,7 @@ heatmaps:
 slide_encoding:
   # Encoder to use for slide encoding. Possible options are "cobra",
   # "eagle", "titan", "gigapath", "chief", "prism", "madeleine".
-  encoder: "eagle"
+  encoder: "chief"
   
   # Directory to save the output files.
   output_dir: "/path/to/save/files/to"
@@ -246,7 +246,7 @@ slide_encoding:
 patient_encoding:
   # Encoder to use for patient encoding. Possible options are "cobra",
   # "eagle", "titan", "gigapath", "chief", "prism", "madeleine".
-  encoder: "eagle"
+  encoder: "chief"
   
   # Directory to save the output files.
   output_dir: "/path/to/save/files/to"

From ea1d6f8b9012ce3017fb2741f97db7d9b882d084 Mon Sep 17 00:00:00 2001
From: Juan Pablo Ricapito <50255924+EzicStar@users.noreply.github.com>
Date: Tue, 1 Jul 2025 13:40:38 +0200
Subject: [PATCH 2/2] Update README.md

add cd STAMP in installation steps
---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 9383963d..4727619c 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,8 @@ We recommend installing STAMP with [uv](https://docs.astral.sh/uv/):
 ```bash
 git clone https://github.com/KatherLab/STAMP.git
 
+cd STAMP/
+
 uv sync --all-extras
 
 source .venv/bin/activate