From 30117a4a529a300d6d7c226b231c1959849c3925 Mon Sep 17 00:00:00 2001 From: clemsgrs Date: Thu, 1 Jan 2026 16:04:09 +0000 Subject: [PATCH 1/5] sync hs2p --- slide2vec/hs2p | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/slide2vec/hs2p b/slide2vec/hs2p index e63953e..b95f6aa 160000 --- a/slide2vec/hs2p +++ b/slide2vec/hs2p @@ -1 +1 @@ -Subproject commit e63953eb040190cafb3dd36fe2348724af92a24b +Subproject commit b95f6aa18a93dc88c528bf0f1dafb2d7ce0c1307 From 892a1d760e7ee8451b0f4abb17b6b93ec04d160d Mon Sep 17 00:00:00 2001 From: clemsgrs Date: Thu, 1 Jan 2026 16:08:02 +0000 Subject: [PATCH 2/5] separate preprocessing from model config --- slide2vec/configs/__init__.py | 9 ++-- slide2vec/configs/default.yaml | 64 ------------------------ slide2vec/configs/default_embedding.yaml | 34 +++++++++++++ slide2vec/configs/default_tiling.yaml | 1 + slide2vec/utils/config.py | 10 ++-- test/input/config.yaml | 2 +- 6 files changed, 49 insertions(+), 71 deletions(-) delete mode 100644 slide2vec/configs/default.yaml create mode 100644 slide2vec/configs/default_embedding.yaml create mode 120000 slide2vec/configs/default_tiling.yaml diff --git a/slide2vec/configs/__init__.py b/slide2vec/configs/__init__.py index 1530156..e730f02 100644 --- a/slide2vec/configs/__init__.py +++ b/slide2vec/configs/__init__.py @@ -8,10 +8,13 @@ def load_config(config_name: str): return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename) -default_config = load_config("default") +default_tiling_config = load_config("default_tiling") +default_embedding_config = load_config("default_embedding") def load_and_merge_config(config_name: str): - default_config = OmegaConf.create(default_config) + default_tiling_config = OmegaConf.create(default_tiling_config) + default_embedding_config = OmegaConf.create(default_embedding_config) + default_config = OmegaConf.merge(default_tiling_config, default_embedding_config) loaded_config = load_config(config_name) - return OmegaConf.merge(default_config, loaded_config) + return OmegaConf.merge(default_config, loaded_config) \ No newline at end of file diff --git a/slide2vec/configs/default.yaml b/slide2vec/configs/default.yaml deleted file mode 100644 index 82fab7e..0000000 --- a/slide2vec/configs/default.yaml +++ /dev/null @@ -1,64 +0,0 @@ -csv: # path to csv containing slide paths - -output_dir: "output" # output directory -resume: false # resume from a previous run -resume_dirname: # directory name to resume from - -visualize: true # save a visualization of slide tiling in a .jpg - -seed: 0 # seed for reproducibility - -tiling: - read_coordinates_from: # path to a directory containing {wsi.stem}.npy files with tiles coordinates & associated metadata (leave empty to compute the coordinates) - backend: "asap" # backend to use for slide reading - params: - spacing: 0.5 # spacing at which to tile the slide, in microns per pixel - tolerance: 0.05 # tolerance for matching the spacing (float between 0 and 1, deciding how much the spacing can deviate from the one specified in the slide metadata) - tile_size: 256 # size of the tiles to extract, in pixels - overlap: 0.0 # percentage of overlap between two consecutive tiles (float between 0 and 1) - min_tissue_percentage: 0.01 # threshold used to filter out tiles that have less tissue than this value (percentage) - drop_holes: false # whether or not to drop tiles whose center pixel falls withing an identified holes - use_padding: true # whether to pad the border of the slide - seg_params: - downsample: 64 # find the closest downsample in the slide for tissue segmentation - sthresh: 8 # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when use_otsu=True) - sthresh_up: 255 # upper threshold value for scaling the binary mask - mthresh: 7 # median filter size (positive, odd integer) - close: 4 # additional morphological closing to apply following initial thresholding (positive integer) - use_otsu: false # use otsu's method instead of simple binary thresholding - tissue_pixel_value: 1 # value of tissue pixel in pre-computed segmentation masks - filter_params: - ref_tile_size: ${tiling.params.tile_size} # reference tile size at spacing tiling.spacing - a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.spacing will be kept) - a_h: 2 # area filter threshold for holes (positive integer, the minimum size of detected holes/cavities in foreground contours to avoid, once again relative to the reference tile size ref_tile_size) - max_n_holes: 8 # maximum of holes to consider per detected foreground contours (positive integer, higher values lead to more accurate patching but increase computational cost ; keeps the biggest holes) - visu_params: - downsample: 32 # downsample to use for tiling visualization - -model: - level: "tile" # level at which to extract the features ("tile", "region" or "slide") - name: # foundation model name ["uni", "uni2", "virchow", "virchow2", "prov-gigapath", "h-optimus-0", "h-optimus-1", "titan", "prism"] (leave empty when using a custom model) - mode: "cls" # embedding mode ["cls", "full"] - arch: # architecture of custom model - pretrained_weights: # path to the pretrained weights when using a custom model - batch_size: 256 - tile_size: ${tiling.params.tile_size} - restrict_to_tissue: false # whether to restrict tile content to tissue pixels only when feeding tile through encoder - patch_size: 256 # if level is "region", size used to unroll the region into patches - save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide" - save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism') - -speed: - fp16: false # use mixed precision during model inference - num_workers_tiling: 8 # number of workers for tiling slides - num_workers_embedding: 8 # number of workers for data loading when embedding slides - -wandb: - enable: false - project: "" # wandb project name - username: "" # wandb username - exp_name: "" # wandb experiment name - tags: ["features", "${model.level}", "${tiling.params.tile_size}"] # wandb tags - dir: "/home/user/" - group: - resume_id: "${resume_dirname}" \ No newline at end of file diff --git a/slide2vec/configs/default_embedding.yaml b/slide2vec/configs/default_embedding.yaml new file mode 100644 index 0000000..faea47a --- /dev/null +++ b/slide2vec/configs/default_embedding.yaml @@ -0,0 +1,34 @@ +csv: # path to csv containing slide paths + +output_dir: "output" # output directory +resume: false # resume from a previous run +resume_dirname: # directory name to resume from + +seed: 0 # seed for reproducibility + +model: + level: "tile" # level at which to extract the features ("tile", "region" or "slide") + name: # foundation model name ["uni", "uni2", "virchow", "virchow2", "prov-gigapath", "h-optimus-0", "h-optimus-1", "titan", "prism"] (leave empty when using a custom model) + mode: "cls" # embedding mode ["cls", "full"] + arch: # architecture of custom model + pretrained_weights: # path to the pretrained weights when using a custom model + batch_size: 256 + tile_size: ${tiling.params.tile_size} + restrict_to_tissue: false # whether to restrict tile content to tissue pixels only when feeding tile through encoder + patch_size: 256 # if level is "region", size used to unroll the region into patches + save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide" + save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism') + +speed: + fp16: false # use mixed precision during model inference + num_workers_embedding: 8 # number of workers for data loading when embedding slides + +wandb: + enable: false + project: "" # wandb project name + username: "" # wandb username + exp_name: "" # wandb experiment name + tags: ["features", "${model.level}", "${tiling.params.tile_size}"] # wandb tags + dir: "/home/user/" + group: + resume_id: "${resume_dirname}" \ No newline at end of file diff --git a/slide2vec/configs/default_tiling.yaml b/slide2vec/configs/default_tiling.yaml new file mode 120000 index 0000000..9fd8167 --- /dev/null +++ b/slide2vec/configs/default_tiling.yaml @@ -0,0 +1 @@ +../hs2p/hs2p/configs/default.yaml \ No newline at end of file diff --git a/slide2vec/utils/config.py b/slide2vec/utils/config.py index f396b79..86d77f4 100644 --- a/slide2vec/utils/config.py +++ b/slide2vec/utils/config.py @@ -11,7 +11,7 @@ import slide2vec.distributed as distributed from slide2vec.utils import initialize_wandb, fix_random_seeds, get_sha, setup_logging -from slide2vec.configs import default_config +from slide2vec.configs import default_tiling_config, default_embedding_config logger = logging.getLogger("slide2vec") @@ -25,7 +25,9 @@ def write_config(cfg, output_dir, name="config.yaml"): def get_cfg_from_file(config_file): - default_cfg = OmegaConf.create(default_config) + default_tiling_cfg = OmegaConf.create(default_tiling_config) + default_embedding_cfg = OmegaConf.create(default_embedding_config) + default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg) cfg = OmegaConf.load(config_file) cfg = OmegaConf.merge(default_cfg, cfg) OmegaConf.resolve(cfg) @@ -36,7 +38,9 @@ def get_cfg_from_args(args): if args.output_dir is not None: args.output_dir = os.path.abspath(args.output_dir) args.opts += [f"output_dir={args.output_dir}"] - default_cfg = OmegaConf.create(default_config) + default_tiling_cfg = OmegaConf.create(default_tiling_config) + default_embedding_cfg = OmegaConf.create(default_embedding_config) + default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg) cfg = OmegaConf.load(args.config_file) cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts)) OmegaConf.resolve(cfg) diff --git a/test/input/config.yaml b/test/input/config.yaml index 80783f9..5c07deb 100644 --- a/test/input/config.yaml +++ b/test/input/config.yaml @@ -19,7 +19,7 @@ model: speed: fp16: true - num_workers_tiling: 4 + num_workers: 4 num_workers_embedding: 4 wandb: From b346172beb43f23a83cb550c32c9c39cee1edb9d Mon Sep 17 00:00:00 2001 From: clemsgrs Date: Thu, 1 Jan 2026 16:09:18 +0000 Subject: [PATCH 3/5] rename model config --- slide2vec/configs/__init__.py | 6 +++--- .../configs/{default_embedding.yaml => default_model.yaml} | 0 slide2vec/utils/config.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) rename slide2vec/configs/{default_embedding.yaml => default_model.yaml} (100%) diff --git a/slide2vec/configs/__init__.py b/slide2vec/configs/__init__.py index e730f02..53be251 100644 --- a/slide2vec/configs/__init__.py +++ b/slide2vec/configs/__init__.py @@ -9,12 +9,12 @@ def load_config(config_name: str): default_tiling_config = load_config("default_tiling") -default_embedding_config = load_config("default_embedding") +default_model_config = load_config("default_model") def load_and_merge_config(config_name: str): default_tiling_config = OmegaConf.create(default_tiling_config) - default_embedding_config = OmegaConf.create(default_embedding_config) - default_config = OmegaConf.merge(default_tiling_config, default_embedding_config) + default_model_config = OmegaConf.create(default_model_config) + default_config = OmegaConf.merge(default_tiling_config, default_model_config) loaded_config = load_config(config_name) return OmegaConf.merge(default_config, loaded_config) \ No newline at end of file diff --git a/slide2vec/configs/default_embedding.yaml b/slide2vec/configs/default_model.yaml similarity index 100% rename from slide2vec/configs/default_embedding.yaml rename to slide2vec/configs/default_model.yaml diff --git a/slide2vec/utils/config.py b/slide2vec/utils/config.py index 86d77f4..e8ffd2a 100644 --- a/slide2vec/utils/config.py +++ b/slide2vec/utils/config.py @@ -11,7 +11,7 @@ import slide2vec.distributed as distributed from slide2vec.utils import initialize_wandb, fix_random_seeds, get_sha, setup_logging -from slide2vec.configs import default_tiling_config, default_embedding_config +from slide2vec.configs import default_tiling_config, default_model_config logger = logging.getLogger("slide2vec") @@ -26,7 +26,7 @@ def write_config(cfg, output_dir, name="config.yaml"): def get_cfg_from_file(config_file): default_tiling_cfg = OmegaConf.create(default_tiling_config) - default_embedding_cfg = OmegaConf.create(default_embedding_config) + default_embedding_cfg = OmegaConf.create(default_model_config) default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg) cfg = OmegaConf.load(config_file) cfg = OmegaConf.merge(default_cfg, cfg) @@ -39,7 +39,7 @@ def get_cfg_from_args(args): args.output_dir = os.path.abspath(args.output_dir) args.opts += [f"output_dir={args.output_dir}"] default_tiling_cfg = OmegaConf.create(default_tiling_config) - default_embedding_cfg = OmegaConf.create(default_embedding_config) + default_embedding_cfg = OmegaConf.create(default_model_config) default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg) cfg = OmegaConf.load(args.config_file) cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts)) From ec1a7c7788cbc2a833a99a3e6e5a964de53654f5 Mon Sep 17 00:00:00 2001 From: clemsgrs Date: Thu, 1 Jan 2026 16:09:33 +0000 Subject: [PATCH 4/5] update readme --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4c5ec19..11dca26 100644 --- a/README.md +++ b/README.md @@ -39,9 +39,12 @@ pip install slide2vec 2. Create a configuration file - A good starting point is the default configuration file `slide2vec/configs/default.yaml` where parameters are documented.
+ A good starting point are the default configuration files where parameters are documented:
+ - for preprocessing options: `slide2vec/configs/default_tiling.yaml` + - for model options: `slide2vec/configs/default_model_.yaml` + We've also added default configuration files for each of the foundation models currently supported: - - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms) + - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, `MidNight12k`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms) - slide-level: `prov-gigapath`, `titan`, `prism` From 15451f56d69979182542893332fb1a37649a7995 Mon Sep 17 00:00:00 2001 From: clemsgrs Date: Thu, 1 Jan 2026 16:56:57 +0000 Subject: [PATCH 5/5] add support when masks are not all pre-computed --- slide2vec/data/dataset.py | 1 + slide2vec/embed.py | 9 ++++++--- slide2vec/hs2p | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/slide2vec/data/dataset.py b/slide2vec/data/dataset.py index 6e2b741..e19d0b4 100644 --- a/slide2vec/data/dataset.py +++ b/slide2vec/data/dataset.py @@ -41,6 +41,7 @@ def __init__( path=self.path, mask_path=self.mask_path, backend=self.backend, + segment=self.mask_path is None, segment_params=segment_params, sampling_params=sampling_params, ) diff --git a/slide2vec/embed.py b/slide2vec/embed.py index 4c5d951..50e498a 100644 --- a/slide2vec/embed.py +++ b/slide2vec/embed.py @@ -173,10 +173,13 @@ def main(args): process_list.is_file() ), "Process list CSV not found. Ensure tiling has been run." process_df = pd.read_csv(process_list) + cols = ["wsi_name", "wsi_path", "tiling_status", "error", "traceback"] if "feature_status" not in process_df.columns: process_df["feature_status"] = ["tbp"] * len(process_df) - cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "error", "traceback"] - process_df = process_df[cols] + if "mask_path" not in process_df.columns: + process_df["mask_path"] = [None] * len(process_df) + cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "error", "traceback"] + process_df = process_df[cols] skip_feature_extraction = process_df["feature_status"].str.contains("success").all() @@ -217,7 +220,7 @@ def main(args): total = len(process_stack) wsi_paths_to_process = [Path(x) for x in process_stack.wsi_path.values.tolist()] - mask_paths_to_process = [Path(x) for x in process_stack.mask_path.values.tolist()] + mask_paths_to_process = [Path(x) if x is not None and not pd.isna(x) else None for x in process_stack.mask_path.values.tolist()] combined_paths = zip(wsi_paths_to_process, mask_paths_to_process) features_dir = Path(cfg.output_dir, "features") diff --git a/slide2vec/hs2p b/slide2vec/hs2p index b95f6aa..bfa3bf8 160000 --- a/slide2vec/hs2p +++ b/slide2vec/hs2p @@ -1 +1 @@ -Subproject commit b95f6aa18a93dc88c528bf0f1dafb2d7ce0c1307 +Subproject commit bfa3bf871671548da2824ea06b21c4d9e96b5150