diff --git a/README.md b/README.md
index 4c5ec19..11dca26 100644
--- a/README.md
+++ b/README.md
@@ -39,9 +39,12 @@ pip install slide2vec
2. Create a configuration file
- A good starting point is the default configuration file `slide2vec/configs/default.yaml` where parameters are documented.
+ A good starting point are the default configuration files where parameters are documented:
+ - for preprocessing options: `slide2vec/configs/default_tiling.yaml`
+ - for model options: `slide2vec/configs/default_model_.yaml`
+
We've also added default configuration files for each of the foundation models currently supported:
- - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms)
+ - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, `MidNight12k`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms)
- slide-level: `prov-gigapath`, `titan`, `prism`
diff --git a/slide2vec/configs/__init__.py b/slide2vec/configs/__init__.py
index 1530156..53be251 100644
--- a/slide2vec/configs/__init__.py
+++ b/slide2vec/configs/__init__.py
@@ -8,10 +8,13 @@ def load_config(config_name: str):
return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
-default_config = load_config("default")
+default_tiling_config = load_config("default_tiling")
+default_model_config = load_config("default_model")
def load_and_merge_config(config_name: str):
- default_config = OmegaConf.create(default_config)
+ default_tiling_config = OmegaConf.create(default_tiling_config)
+ default_model_config = OmegaConf.create(default_model_config)
+ default_config = OmegaConf.merge(default_tiling_config, default_model_config)
loaded_config = load_config(config_name)
- return OmegaConf.merge(default_config, loaded_config)
+ return OmegaConf.merge(default_config, loaded_config)
\ No newline at end of file
diff --git a/slide2vec/configs/default.yaml b/slide2vec/configs/default.yaml
deleted file mode 100644
index 82fab7e..0000000
--- a/slide2vec/configs/default.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-csv: # path to csv containing slide paths
-
-output_dir: "output" # output directory
-resume: false # resume from a previous run
-resume_dirname: # directory name to resume from
-
-visualize: true # save a visualization of slide tiling in a .jpg
-
-seed: 0 # seed for reproducibility
-
-tiling:
- read_coordinates_from: # path to a directory containing {wsi.stem}.npy files with tiles coordinates & associated metadata (leave empty to compute the coordinates)
- backend: "asap" # backend to use for slide reading
- params:
- spacing: 0.5 # spacing at which to tile the slide, in microns per pixel
- tolerance: 0.05 # tolerance for matching the spacing (float between 0 and 1, deciding how much the spacing can deviate from the one specified in the slide metadata)
- tile_size: 256 # size of the tiles to extract, in pixels
- overlap: 0.0 # percentage of overlap between two consecutive tiles (float between 0 and 1)
- min_tissue_percentage: 0.01 # threshold used to filter out tiles that have less tissue than this value (percentage)
- drop_holes: false # whether or not to drop tiles whose center pixel falls withing an identified holes
- use_padding: true # whether to pad the border of the slide
- seg_params:
- downsample: 64 # find the closest downsample in the slide for tissue segmentation
- sthresh: 8 # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when use_otsu=True)
- sthresh_up: 255 # upper threshold value for scaling the binary mask
- mthresh: 7 # median filter size (positive, odd integer)
- close: 4 # additional morphological closing to apply following initial thresholding (positive integer)
- use_otsu: false # use otsu's method instead of simple binary thresholding
- tissue_pixel_value: 1 # value of tissue pixel in pre-computed segmentation masks
- filter_params:
- ref_tile_size: ${tiling.params.tile_size} # reference tile size at spacing tiling.spacing
- a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.spacing will be kept)
- a_h: 2 # area filter threshold for holes (positive integer, the minimum size of detected holes/cavities in foreground contours to avoid, once again relative to the reference tile size ref_tile_size)
- max_n_holes: 8 # maximum of holes to consider per detected foreground contours (positive integer, higher values lead to more accurate patching but increase computational cost ; keeps the biggest holes)
- visu_params:
- downsample: 32 # downsample to use for tiling visualization
-
-model:
- level: "tile" # level at which to extract the features ("tile", "region" or "slide")
- name: # foundation model name ["uni", "uni2", "virchow", "virchow2", "prov-gigapath", "h-optimus-0", "h-optimus-1", "titan", "prism"] (leave empty when using a custom model)
- mode: "cls" # embedding mode ["cls", "full"]
- arch: # architecture of custom model
- pretrained_weights: # path to the pretrained weights when using a custom model
- batch_size: 256
- tile_size: ${tiling.params.tile_size}
- restrict_to_tissue: false # whether to restrict tile content to tissue pixels only when feeding tile through encoder
- patch_size: 256 # if level is "region", size used to unroll the region into patches
- save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
- save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
-
-speed:
- fp16: false # use mixed precision during model inference
- num_workers_tiling: 8 # number of workers for tiling slides
- num_workers_embedding: 8 # number of workers for data loading when embedding slides
-
-wandb:
- enable: false
- project: "" # wandb project name
- username: "" # wandb username
- exp_name: "" # wandb experiment name
- tags: ["features", "${model.level}", "${tiling.params.tile_size}"] # wandb tags
- dir: "/home/user/"
- group:
- resume_id: "${resume_dirname}"
\ No newline at end of file
diff --git a/slide2vec/configs/default_model.yaml b/slide2vec/configs/default_model.yaml
new file mode 100644
index 0000000..faea47a
--- /dev/null
+++ b/slide2vec/configs/default_model.yaml
@@ -0,0 +1,34 @@
+csv: # path to csv containing slide paths
+
+output_dir: "output" # output directory
+resume: false # resume from a previous run
+resume_dirname: # directory name to resume from
+
+seed: 0 # seed for reproducibility
+
+model:
+ level: "tile" # level at which to extract the features ("tile", "region" or "slide")
+ name: # foundation model name ["uni", "uni2", "virchow", "virchow2", "prov-gigapath", "h-optimus-0", "h-optimus-1", "titan", "prism"] (leave empty when using a custom model)
+ mode: "cls" # embedding mode ["cls", "full"]
+ arch: # architecture of custom model
+ pretrained_weights: # path to the pretrained weights when using a custom model
+ batch_size: 256
+ tile_size: ${tiling.params.tile_size}
+ restrict_to_tissue: false # whether to restrict tile content to tissue pixels only when feeding tile through encoder
+ patch_size: 256 # if level is "region", size used to unroll the region into patches
+ save_tile_embeddings: false # whether to save tile embeddings alongside the pooled slide embedding when level is "slide"
+ save_latents: false # whether to save the latent representations from the model alongside the slide embedding (only supported for 'prism')
+
+speed:
+ fp16: false # use mixed precision during model inference
+ num_workers_embedding: 8 # number of workers for data loading when embedding slides
+
+wandb:
+ enable: false
+ project: "" # wandb project name
+ username: "" # wandb username
+ exp_name: "" # wandb experiment name
+ tags: ["features", "${model.level}", "${tiling.params.tile_size}"] # wandb tags
+ dir: "/home/user/"
+ group:
+ resume_id: "${resume_dirname}"
\ No newline at end of file
diff --git a/slide2vec/configs/default_tiling.yaml b/slide2vec/configs/default_tiling.yaml
new file mode 120000
index 0000000..9fd8167
--- /dev/null
+++ b/slide2vec/configs/default_tiling.yaml
@@ -0,0 +1 @@
+../hs2p/hs2p/configs/default.yaml
\ No newline at end of file
diff --git a/slide2vec/data/dataset.py b/slide2vec/data/dataset.py
index 6e2b741..e19d0b4 100644
--- a/slide2vec/data/dataset.py
+++ b/slide2vec/data/dataset.py
@@ -41,6 +41,7 @@ def __init__(
path=self.path,
mask_path=self.mask_path,
backend=self.backend,
+ segment=self.mask_path is None,
segment_params=segment_params,
sampling_params=sampling_params,
)
diff --git a/slide2vec/embed.py b/slide2vec/embed.py
index 4c5d951..50e498a 100644
--- a/slide2vec/embed.py
+++ b/slide2vec/embed.py
@@ -173,10 +173,13 @@ def main(args):
process_list.is_file()
), "Process list CSV not found. Ensure tiling has been run."
process_df = pd.read_csv(process_list)
+ cols = ["wsi_name", "wsi_path", "tiling_status", "error", "traceback"]
if "feature_status" not in process_df.columns:
process_df["feature_status"] = ["tbp"] * len(process_df)
- cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "error", "traceback"]
- process_df = process_df[cols]
+ if "mask_path" not in process_df.columns:
+ process_df["mask_path"] = [None] * len(process_df)
+ cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "error", "traceback"]
+ process_df = process_df[cols]
skip_feature_extraction = process_df["feature_status"].str.contains("success").all()
@@ -217,7 +220,7 @@ def main(args):
total = len(process_stack)
wsi_paths_to_process = [Path(x) for x in process_stack.wsi_path.values.tolist()]
- mask_paths_to_process = [Path(x) for x in process_stack.mask_path.values.tolist()]
+ mask_paths_to_process = [Path(x) if x is not None and not pd.isna(x) else None for x in process_stack.mask_path.values.tolist()]
combined_paths = zip(wsi_paths_to_process, mask_paths_to_process)
features_dir = Path(cfg.output_dir, "features")
diff --git a/slide2vec/hs2p b/slide2vec/hs2p
index e63953e..bfa3bf8 160000
--- a/slide2vec/hs2p
+++ b/slide2vec/hs2p
@@ -1 +1 @@
-Subproject commit e63953eb040190cafb3dd36fe2348724af92a24b
+Subproject commit bfa3bf871671548da2824ea06b21c4d9e96b5150
diff --git a/slide2vec/utils/config.py b/slide2vec/utils/config.py
index f396b79..e8ffd2a 100644
--- a/slide2vec/utils/config.py
+++ b/slide2vec/utils/config.py
@@ -11,7 +11,7 @@
import slide2vec.distributed as distributed
from slide2vec.utils import initialize_wandb, fix_random_seeds, get_sha, setup_logging
-from slide2vec.configs import default_config
+from slide2vec.configs import default_tiling_config, default_model_config
logger = logging.getLogger("slide2vec")
@@ -25,7 +25,9 @@ def write_config(cfg, output_dir, name="config.yaml"):
def get_cfg_from_file(config_file):
- default_cfg = OmegaConf.create(default_config)
+ default_tiling_cfg = OmegaConf.create(default_tiling_config)
+ default_embedding_cfg = OmegaConf.create(default_model_config)
+ default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg)
cfg = OmegaConf.load(config_file)
cfg = OmegaConf.merge(default_cfg, cfg)
OmegaConf.resolve(cfg)
@@ -36,7 +38,9 @@ def get_cfg_from_args(args):
if args.output_dir is not None:
args.output_dir = os.path.abspath(args.output_dir)
args.opts += [f"output_dir={args.output_dir}"]
- default_cfg = OmegaConf.create(default_config)
+ default_tiling_cfg = OmegaConf.create(default_tiling_config)
+ default_embedding_cfg = OmegaConf.create(default_model_config)
+ default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg)
cfg = OmegaConf.load(args.config_file)
cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts))
OmegaConf.resolve(cfg)
diff --git a/test/input/config.yaml b/test/input/config.yaml
index 80783f9..5c07deb 100644
--- a/test/input/config.yaml
+++ b/test/input/config.yaml
@@ -19,7 +19,7 @@ model:
speed:
fp16: true
- num_workers_tiling: 4
+ num_workers: 4
num_workers_embedding: 4
wandb: