From 30117a4a529a300d6d7c226b231c1959849c3925 Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Thu, 1 Jan 2026 16:04:09 +0000
Subject: [PATCH 1/5] sync hs2p

---
 slide2vec/hs2p | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/slide2vec/hs2p b/slide2vec/hs2p
index e63953e..b95f6aa 160000
--- a/slide2vec/hs2p
+++ b/slide2vec/hs2p
@@ -1 +1 @@
-Subproject commit e63953eb040190cafb3dd36fe2348724af92a24b
+Subproject commit b95f6aa18a93dc88c528bf0f1dafb2d7ce0c1307

From 892a1d760e7ee8451b0f4abb17b6b93ec04d160d Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Thu, 1 Jan 2026 16:08:02 +0000
Subject: [PATCH 2/5] separate preprocessing from model config

---
 slide2vec/configs/__init__.py            |  9 ++--
 slide2vec/configs/default.yaml           | 64 ------------------------
 slide2vec/configs/default_embedding.yaml | 34 +++++++++++++
 slide2vec/configs/default_tiling.yaml    |  1 +
 slide2vec/utils/config.py                | 10 ++--
 test/input/config.yaml                   |  2 +-
 6 files changed, 49 insertions(+), 71 deletions(-)
 delete mode 100644 slide2vec/configs/default.yaml
 create mode 100644 slide2vec/configs/default_embedding.yaml
 create mode 120000 slide2vec/configs/default_tiling.yaml

diff --git a/slide2vec/configs/__init__.py b/slide2vec/configs/__init__.py
index 1530156..e730f02 100644
--- a/slide2vec/configs/__init__.py
+++ b/slide2vec/configs/__init__.py
@@ -8,10 +8,13 @@ def load_config(config_name: str):
     return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
 
 
-default_config = load_config("default")
+default_tiling_config = load_config("default_tiling")
+default_embedding_config = load_config("default_embedding")
 
 
 def load_and_merge_config(config_name: str):
-    default_config = OmegaConf.create(default_config)
+    default_tiling_config = OmegaConf.create(default_tiling_config)
+    default_embedding_config = OmegaConf.create(default_embedding_config)
+    default_config = OmegaConf.merge(default_tiling_config, default_embedding_config)
     loaded_config = load_config(config_name)
-    return OmegaConf.merge(default_config, loaded_config)
+    return OmegaConf.merge(default_config, loaded_config)
\ No newline at end of file
diff --git a/slide2vec/configs/default.yaml b/slide2vec/configs/default.yaml
deleted file mode 100644
index 82fab7e..0000000
--- a/slide2vec/configs/default.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-csv: # path to csv containing slide paths
-
-output_dir: "output" # output directory
-resume: false # resume from a previous run
-resume_dirname: # directory name to resume from
-
-visualize: true # save a visualization of slide tiling in a .jpg
-
-seed: 0 # seed for reproducibility
-
-tiling:
-  read_coordinates_from: # path to a directory containing {wsi.stem}.npy files with tiles coordinates & associated metadata (leave empty to compute the coordinates)
-  backend: "asap" # backend to use for slide reading
-  params:
-    spacing: 0.5 # spacing at which to tile the slide, in microns per pixel
-    tolerance: 0.05 # tolerance for matching the spacing (float between 0 and 1, deciding how much the spacing can deviate from the one specified in the slide metadata)
-    tile_size: 256 # size of the tiles to extract, in pixels
-    overlap: 0.0 # percentage of overlap between two consecutive tiles (float between 0 and 1)
-    min_tissue_percentage: 0.01 # threshold used to filter out tiles that have less tissue than this value (percentage)
-    drop_holes: false # whether or not to drop tiles whose center pixel falls withing an identified holes
-    use_padding: true # whether to pad the border of the slide
-  seg_params:
-    downsample: 64 # find the closest downsample in the slide for tissue segmentation
-    sthresh: 8 # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when use_otsu=True)
-    sthresh_up: 255 # upper threshold value for scaling the binary mask
-    mthresh: 7 # median filter size (positive, odd integer)
-    close: 4 # additional morphological closing to apply following initial thresholding (positive integer)
-    use_otsu: false # use otsu's method instead of simple binary thresholding
-    tissue_pixel_value: 1 # value of tissue pixel in pre-computed segmentation masks
-  filter_params:
-    ref_tile_size: ${tiling.params.tile_size} # reference tile size at spacing tiling.spacing
-    a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.spacing will be kept)
-    a_h: 2 # area filter threshold for holes (positive integer, the minimum size of detected holes/cavities in foreground contours to avoid, once again relative to the reference tile size ref_tile_size)
-    max_n_holes: 8 # maximum of holes to consider per detected foreground contours (positive integer, higher values lead to more accurate patching but increase computational cost ; keeps the biggest holes)
-  visu_params:
-    downsample: 32 # downsample to use for tiling visualization
-
-model:
-  level: "tile" # level at which to extract the features ("tile", "region" or "slide")
-  name: # foundation model name ["uni", "uni2", "virchow", "virchow2", "prov-gigapath", "h-optimus-0", "h-optimus-1", "titan", "prism"] (leave empty when using a custom model)
-  mode: "cls" # embedding mode ["cls", "full"]
-  arch: # architecture of custom model
-  pretrained_weights: # path to the pretrained weights when using a custom model
-  batch_size: 256
-  tile_size: ${tiling.params.tile_size}
-  restrict_to_tissue: false # whether to restrict tile content to tissue pixels only when feeding tile through encoder
-  patch_size: 256 # if level is "region", size used to unroll the region into patches
-  save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
-  save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
-
-speed:
-  fp16: false # use mixed precision during model inference
-  num_workers_tiling: 8 # number of workers for tiling slides
-  num_workers_embedding: 8 # number of workers for data loading when embedding slides
-
-wandb:
-  enable: false
-  project: "" # wandb project name
-  username: "" # wandb username
-  exp_name: "" # wandb experiment name
-  tags: ["features", "${model.level}", "${tiling.params.tile_size}"] # wandb tags
-  dir: "/home/user/"
-  group:
-  resume_id: "${resume_dirname}"
\ No newline at end of file
diff --git a/slide2vec/configs/default_embedding.yaml b/slide2vec/configs/default_embedding.yaml
new file mode 100644
index 0000000..faea47a
--- /dev/null
+++ b/slide2vec/configs/default_embedding.yaml
@@ -0,0 +1,34 @@
+csv: # path to csv containing slide paths
+
+output_dir: "output" # output directory
+resume: false # resume from a previous run
+resume_dirname: # directory name to resume from
+
+seed: 0 # seed for reproducibility
+
+model:
+  level: "tile" # level at which to extract the features ("tile", "region" or "slide")
+  name: # foundation model name ["uni", "uni2", "virchow", "virchow2", "prov-gigapath", "h-optimus-0", "h-optimus-1", "titan", "prism"] (leave empty when using a custom model)
+  mode: "cls" # embedding mode ["cls", "full"]
+  arch: # architecture of custom model
+  pretrained_weights: # path to the pretrained weights when using a custom model
+  batch_size: 256
+  tile_size: ${tiling.params.tile_size}
+  restrict_to_tissue: false # whether to restrict tile content to tissue pixels only when feeding tile through encoder
+  patch_size: 256 # if level is "region", size used to unroll the region into patches
+  save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
+  save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
+
+speed:
+  fp16: false # use mixed precision during model inference
+  num_workers_embedding: 8 # number of workers for data loading when embedding slides
+
+wandb:
+  enable: false
+  project: "" # wandb project name
+  username: "" # wandb username
+  exp_name: "" # wandb experiment name
+  tags: ["features", "${model.level}", "${tiling.params.tile_size}"] # wandb tags
+  dir: "/home/user/"
+  group:
+  resume_id: "${resume_dirname}"
\ No newline at end of file
diff --git a/slide2vec/configs/default_tiling.yaml b/slide2vec/configs/default_tiling.yaml
new file mode 120000
index 0000000..9fd8167
--- /dev/null
+++ b/slide2vec/configs/default_tiling.yaml
@@ -0,0 +1 @@
+../hs2p/hs2p/configs/default.yaml
\ No newline at end of file
diff --git a/slide2vec/utils/config.py b/slide2vec/utils/config.py
index f396b79..86d77f4 100644
--- a/slide2vec/utils/config.py
+++ b/slide2vec/utils/config.py
@@ -11,7 +11,7 @@
 
 import slide2vec.distributed as distributed
 from slide2vec.utils import initialize_wandb, fix_random_seeds, get_sha, setup_logging
-from slide2vec.configs import default_config
+from slide2vec.configs import default_tiling_config, default_embedding_config
 
 logger = logging.getLogger("slide2vec")
 
@@ -25,7 +25,9 @@ def write_config(cfg, output_dir, name="config.yaml"):
 
 
 def get_cfg_from_file(config_file):
-    default_cfg = OmegaConf.create(default_config)
+    default_tiling_cfg = OmegaConf.create(default_tiling_config)
+    default_embedding_cfg = OmegaConf.create(default_embedding_config)
+    default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg)
     cfg = OmegaConf.load(config_file)
     cfg = OmegaConf.merge(default_cfg, cfg)
     OmegaConf.resolve(cfg)
@@ -36,7 +38,9 @@ def get_cfg_from_args(args):
     if args.output_dir is not None:
         args.output_dir = os.path.abspath(args.output_dir)
         args.opts += [f"output_dir={args.output_dir}"]
-    default_cfg = OmegaConf.create(default_config)
+    default_tiling_cfg = OmegaConf.create(default_tiling_config)
+    default_embedding_cfg = OmegaConf.create(default_embedding_config)
+    default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg)
     cfg = OmegaConf.load(args.config_file)
     cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts))
     OmegaConf.resolve(cfg)
diff --git a/test/input/config.yaml b/test/input/config.yaml
index 80783f9..5c07deb 100644
--- a/test/input/config.yaml
+++ b/test/input/config.yaml
@@ -19,7 +19,7 @@ model:
 
 speed:
   fp16: true
-  num_workers_tiling: 4
+  num_workers: 4
   num_workers_embedding: 4
 
 wandb:

From b346172beb43f23a83cb550c32c9c39cee1edb9d Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Thu, 1 Jan 2026 16:09:18 +0000
Subject: [PATCH 3/5] rename model config

---
 slide2vec/configs/__init__.py                               | 6 +++---
 .../configs/{default_embedding.yaml => default_model.yaml}  | 0
 slide2vec/utils/config.py                                   | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)
 rename slide2vec/configs/{default_embedding.yaml => default_model.yaml} (100%)

diff --git a/slide2vec/configs/__init__.py b/slide2vec/configs/__init__.py
index e730f02..53be251 100644
--- a/slide2vec/configs/__init__.py
+++ b/slide2vec/configs/__init__.py
@@ -9,12 +9,12 @@ def load_config(config_name: str):
 
 
 default_tiling_config = load_config("default_tiling")
-default_embedding_config = load_config("default_embedding")
+default_model_config = load_config("default_model")
 
 
 def load_and_merge_config(config_name: str):
     default_tiling_config = OmegaConf.create(default_tiling_config)
-    default_embedding_config = OmegaConf.create(default_embedding_config)
-    default_config = OmegaConf.merge(default_tiling_config, default_embedding_config)
+    default_model_config = OmegaConf.create(default_model_config)
+    default_config = OmegaConf.merge(default_tiling_config, default_model_config)
     loaded_config = load_config(config_name)
     return OmegaConf.merge(default_config, loaded_config)
\ No newline at end of file
diff --git a/slide2vec/configs/default_embedding.yaml b/slide2vec/configs/default_model.yaml
similarity index 100%
rename from slide2vec/configs/default_embedding.yaml
rename to slide2vec/configs/default_model.yaml
diff --git a/slide2vec/utils/config.py b/slide2vec/utils/config.py
index 86d77f4..e8ffd2a 100644
--- a/slide2vec/utils/config.py
+++ b/slide2vec/utils/config.py
@@ -11,7 +11,7 @@
 
 import slide2vec.distributed as distributed
 from slide2vec.utils import initialize_wandb, fix_random_seeds, get_sha, setup_logging
-from slide2vec.configs import default_tiling_config, default_embedding_config
+from slide2vec.configs import default_tiling_config, default_model_config
 
 logger = logging.getLogger("slide2vec")
 
@@ -26,7 +26,7 @@ def write_config(cfg, output_dir, name="config.yaml"):
 
 def get_cfg_from_file(config_file):
     default_tiling_cfg = OmegaConf.create(default_tiling_config)
-    default_embedding_cfg = OmegaConf.create(default_embedding_config)
+    default_embedding_cfg = OmegaConf.create(default_model_config)
     default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg)
     cfg = OmegaConf.load(config_file)
     cfg = OmegaConf.merge(default_cfg, cfg)
@@ -39,7 +39,7 @@ def get_cfg_from_args(args):
         args.output_dir = os.path.abspath(args.output_dir)
         args.opts += [f"output_dir={args.output_dir}"]
     default_tiling_cfg = OmegaConf.create(default_tiling_config)
-    default_embedding_cfg = OmegaConf.create(default_embedding_config)
+    default_embedding_cfg = OmegaConf.create(default_model_config)
     default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg)
     cfg = OmegaConf.load(args.config_file)
     cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts))

From ec1a7c7788cbc2a833a99a3e6e5a964de53654f5 Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Thu, 1 Jan 2026 16:09:33 +0000
Subject: [PATCH 4/5] update readme

---
 README.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 4c5ec19..11dca26 100644
--- a/README.md
+++ b/README.md
@@ -39,9 +39,12 @@ pip install slide2vec
 
 2. Create a configuration file
 
-   A good starting point is the default configuration file `slide2vec/configs/default.yaml` where parameters are documented.<br>
+   A good starting point are the default configuration files where parameters are documented:<br>
+   - for preprocessing options: `slide2vec/configs/default_tiling.yaml`
+   - for model options: `slide2vec/configs/default_model_.yaml`
+
    We've also added default configuration files for each of the foundation models currently supported:
-   - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms)
+   - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, `MidNight12k`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms)
    - slide-level: `prov-gigapath`, `titan`, `prism`
 
 

From 15451f56d69979182542893332fb1a37649a7995 Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Thu, 1 Jan 2026 16:56:57 +0000
Subject: [PATCH 5/5] add support when masks are not all pre-computed

---
 slide2vec/data/dataset.py | 1 +
 slide2vec/embed.py        | 9 ++++++---
 slide2vec/hs2p            | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/slide2vec/data/dataset.py b/slide2vec/data/dataset.py
index 6e2b741..e19d0b4 100644
--- a/slide2vec/data/dataset.py
+++ b/slide2vec/data/dataset.py
@@ -41,6 +41,7 @@ def __init__(
                 path=self.path,
                 mask_path=self.mask_path,
                 backend=self.backend,
+                segment=self.mask_path is None,
                 segment_params=segment_params,
                 sampling_params=sampling_params,
             )
diff --git a/slide2vec/embed.py b/slide2vec/embed.py
index 4c5d951..50e498a 100644
--- a/slide2vec/embed.py
+++ b/slide2vec/embed.py
@@ -173,10 +173,13 @@ def main(args):
         process_list.is_file()
     ), "Process list CSV not found. Ensure tiling has been run."
     process_df = pd.read_csv(process_list)
+    cols = ["wsi_name", "wsi_path", "tiling_status", "error", "traceback"]
     if "feature_status" not in process_df.columns:
         process_df["feature_status"] = ["tbp"] * len(process_df)
-        cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "error", "traceback"]
-        process_df = process_df[cols]
+    if "mask_path" not in process_df.columns:
+        process_df["mask_path"] = [None] * len(process_df)
+    cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "error", "traceback"]
+    process_df = process_df[cols]
 
     skip_feature_extraction = process_df["feature_status"].str.contains("success").all()
 
@@ -217,7 +220,7 @@ def main(args):
         total = len(process_stack)
 
         wsi_paths_to_process = [Path(x) for x in process_stack.wsi_path.values.tolist()]
-        mask_paths_to_process = [Path(x) for x in process_stack.mask_path.values.tolist()]
+        mask_paths_to_process = [Path(x) if x is not None and not pd.isna(x) else None  for x in process_stack.mask_path.values.tolist()]
         combined_paths = zip(wsi_paths_to_process, mask_paths_to_process)
 
         features_dir = Path(cfg.output_dir, "features")
diff --git a/slide2vec/hs2p b/slide2vec/hs2p
index b95f6aa..bfa3bf8 160000
--- a/slide2vec/hs2p
+++ b/slide2vec/hs2p
@@ -1 +1 @@
-Subproject commit b95f6aa18a93dc88c528bf0f1dafb2d7ce0c1307
+Subproject commit bfa3bf871671548da2824ea06b21c4d9e96b5150