From 51a5b5298910829da6d5c77eb7a1e491edcd43c8 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Thu, 30 Jan 2025 19:27:38 +0200 Subject: [PATCH 01/88] init the pydantic configuration --- GANDLF/Configuration/configuration_manager.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 GANDLF/Configuration/configuration_manager.py diff --git a/GANDLF/Configuration/configuration_manager.py b/GANDLF/Configuration/configuration_manager.py new file mode 100644 index 000000000..e69de29bb From 2458e5efe9f641ee521b054132c4e34376f95727 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Thu, 30 Jan 2025 22:30:23 +0200 Subject: [PATCH 02/88] add default_parameters --- GANDLF/Configuration/configuration_manager.py | 0 GANDLF/Configuration/default_parameters.py | 100 ++++++++++++++++++ 2 files changed, 100 insertions(+) delete mode 100644 GANDLF/Configuration/configuration_manager.py create mode 100644 GANDLF/Configuration/default_parameters.py diff --git a/GANDLF/Configuration/configuration_manager.py b/GANDLF/Configuration/configuration_manager.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/GANDLF/Configuration/default_parameters.py b/GANDLF/Configuration/default_parameters.py new file mode 100644 index 000000000..062ab40b5 --- /dev/null +++ b/GANDLF/Configuration/default_parameters.py @@ -0,0 +1,100 @@ +from pydantic import BaseModel, Field +from typing import Optional, Dict + +class DefaultParameters(BaseModel): + weighted_loss: bool = Field( + default=False, + description="Whether weighted loss is to be used or not." + ) + verbose: bool = Field( + default=False, + description="General application verbosity." + ) + q_verbose: bool = Field( + default=False, + description="Queue construction verbosity." + ) + medcam_enabled: bool = Field( + default=False, + description="Enable interpretability via medcam." + ) + save_training: bool = Field( + default=False, + description="Save outputs during training." + ) + save_output: bool = Field( + default=False, + description="Save outputs during validation/testing." + ) + in_memory: bool = Field( + default=False, + description="Pin data to CPU memory." + ) + pin_memory_dataloader: bool = Field( + default=False, + description="Pin data to GPU memory." + ) + scaling_factor: int = Field( + default=1, + description="Scaling factor for regression problems." + ) + q_max_length: int = Field( + default=100, + description="The max length of the queue." + ) + q_samples_per_volume: int = Field( + default=10, + description="Number of samples per volume." + ) + q_num_workers: int = Field( + default=4, + description="Number of worker threads to use." + ) + num_epochs: int = Field( + default=100, + description="Total number of epochs to train." + ) + patience: int = Field( + default=100, + description="Number of epochs to wait for performance improvement." + ) + batch_size: int = Field( + default=1, + description="Default batch size for training." + ) + learning_rate: float = Field( + default=0.001, + description="Default learning rate." + ) + clip_grad: Optional[float] = Field( + default=None, + description="Gradient clipping value." + ) + track_memory_usage: bool = Field( + default=False, + description="Enable memory usage tracking." + ) + memory_save_mode: bool = Field( + default=False, + description="Enable memory-saving mode. If enabled, resize/resample will save files to disk." + ) + print_rgb_label_warning: bool = Field( + default=True, + description="Print a warning for RGB labels." + ) + data_postprocessing: Dict = Field( + default={}, + description="Default data postprocessing configuration." + ) + grid_aggregator_overlap: str = Field( + default="crop", + description="Default grid aggregator overlap strategy." + ) + determinism: bool = Field( + default=False, + description="Enable deterministic computation." + ) + previous_parameters: Optional[Dict] = Field( + default=None, + description="Previous parameters to be used for resuming training and performing sanity checks." + ) \ No newline at end of file From df290e3af28cbbc903c9a1937531a6d82e288c1b Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Thu, 30 Jan 2025 22:31:19 +0200 Subject: [PATCH 03/88] add utils and generate_and_save_markdown function --- .../Configuration/generate_documentation.py | 4 +++ GANDLF/Configuration/utils.py | 34 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 GANDLF/Configuration/generate_documentation.py create mode 100644 GANDLF/Configuration/utils.py diff --git a/GANDLF/Configuration/generate_documentation.py b/GANDLF/Configuration/generate_documentation.py new file mode 100644 index 000000000..f03e864d0 --- /dev/null +++ b/GANDLF/Configuration/generate_documentation.py @@ -0,0 +1,4 @@ +import utils +import default_parameters + +markdown_output = utils.generate_and_save_markdown(default_parameters.DefaultParameters, "doc/user_model.md") \ No newline at end of file diff --git a/GANDLF/Configuration/utils.py b/GANDLF/Configuration/utils.py new file mode 100644 index 000000000..6da42a41b --- /dev/null +++ b/GANDLF/Configuration/utils.py @@ -0,0 +1,34 @@ +from typing import Type +from pydantic import BaseModel + +from typing import Type +from pydantic import BaseModel + +def generate_and_save_markdown(model: Type[BaseModel], file_path: str) -> None: + schema = model.schema() + markdown = [] + + # Add title + markdown.append(f"# {schema['title']}\n") + + # Add description if available + if "description" in schema: + markdown.append(f"{schema['description']}\n") + + # Add fields table + markdown.append("## Parameters\n") + markdown.append("| Field | Type | Description | Default |") + markdown.append("|----------------|----------------|-----------------------|------------------|") + + for field_name, field_info in schema["properties"].items(): + # Extract field details + field_type = field_info.get("type", "N/A") + description = field_info.get("description", "N/A") + default = field_info.get("default", "N/A") + + # Add row to the table + markdown.append(f"| `{field_name}` | `{field_type}` | {description} | `{default}` |") + + # Write to file + with open(file_path, "w", encoding="utf-8") as file: + file.write("\n".join(markdown)) \ No newline at end of file From 66bf03d8e06e333fe25a229a90eebfd6b411f3ae Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Thu, 30 Jan 2025 22:31:44 +0200 Subject: [PATCH 04/88] add user_defined_parameters.py --- GANDLF/Configuration/user_defined_parameters.py | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 GANDLF/Configuration/user_defined_parameters.py diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py new file mode 100644 index 000000000..9232c2773 --- /dev/null +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -0,0 +1,4 @@ +from pydantic import BaseModel, Field +from typing import Optional, Dict + +class UserDefinedParameters(BaseModel): From 9539f06205c6c3f82b7748c6a76827294504e49e Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Thu, 30 Jan 2025 22:34:03 +0200 Subject: [PATCH 05/88] blacked . --- GANDLF/Configuration/default_parameters.py | 157 +++++++----------- .../Configuration/generate_documentation.py | 4 +- GANDLF/Configuration/utils.py | 11 +- 3 files changed, 72 insertions(+), 100 deletions(-) diff --git a/GANDLF/Configuration/default_parameters.py b/GANDLF/Configuration/default_parameters.py index 062ab40b5..5afe342de 100644 --- a/GANDLF/Configuration/default_parameters.py +++ b/GANDLF/Configuration/default_parameters.py @@ -1,100 +1,65 @@ from pydantic import BaseModel, Field from typing import Optional, Dict + class DefaultParameters(BaseModel): - weighted_loss: bool = Field( - default=False, - description="Whether weighted loss is to be used or not." - ) - verbose: bool = Field( - default=False, - description="General application verbosity." - ) - q_verbose: bool = Field( - default=False, - description="Queue construction verbosity." - ) - medcam_enabled: bool = Field( - default=False, - description="Enable interpretability via medcam." - ) - save_training: bool = Field( - default=False, - description="Save outputs during training." - ) - save_output: bool = Field( - default=False, - description="Save outputs during validation/testing." - ) - in_memory: bool = Field( - default=False, - description="Pin data to CPU memory." - ) - pin_memory_dataloader: bool = Field( - default=False, - description="Pin data to GPU memory." - ) - scaling_factor: int = Field( - default=1, - description="Scaling factor for regression problems." - ) - q_max_length: int = Field( - default=100, - description="The max length of the queue." - ) - q_samples_per_volume: int = Field( - default=10, - description="Number of samples per volume." - ) - q_num_workers: int = Field( - default=4, - description="Number of worker threads to use." - ) - num_epochs: int = Field( - default=100, - description="Total number of epochs to train." - ) - patience: int = Field( - default=100, - description="Number of epochs to wait for performance improvement." - ) - batch_size: int = Field( - default=1, - description="Default batch size for training." - ) - learning_rate: float = Field( - default=0.001, - description="Default learning rate." - ) - clip_grad: Optional[float] = Field( - default=None, - description="Gradient clipping value." - ) - track_memory_usage: bool = Field( - default=False, - description="Enable memory usage tracking." - ) - memory_save_mode: bool = Field( - default=False, - description="Enable memory-saving mode. If enabled, resize/resample will save files to disk." - ) - print_rgb_label_warning: bool = Field( - default=True, - description="Print a warning for RGB labels." - ) - data_postprocessing: Dict = Field( - default={}, - description="Default data postprocessing configuration." - ) - grid_aggregator_overlap: str = Field( - default="crop", - description="Default grid aggregator overlap strategy." - ) - determinism: bool = Field( - default=False, - description="Enable deterministic computation." - ) - previous_parameters: Optional[Dict] = Field( - default=None, - description="Previous parameters to be used for resuming training and performing sanity checks." - ) \ No newline at end of file + weighted_loss: bool = Field( + default=False, description="Whether weighted loss is to be used or not." + ) + verbose: bool = Field(default=False, description="General application verbosity.") + q_verbose: bool = Field(default=False, description="Queue construction verbosity.") + medcam_enabled: bool = Field( + default=False, description="Enable interpretability via medcam." + ) + save_training: bool = Field( + default=False, description="Save outputs during training." + ) + save_output: bool = Field( + default=False, description="Save outputs during validation/testing." + ) + in_memory: bool = Field(default=False, description="Pin data to CPU memory.") + pin_memory_dataloader: bool = Field( + default=False, description="Pin data to GPU memory." + ) + scaling_factor: int = Field( + default=1, description="Scaling factor for regression problems." + ) + q_max_length: int = Field(default=100, description="The max length of the queue.") + q_samples_per_volume: int = Field( + default=10, description="Number of samples per volume." + ) + q_num_workers: int = Field( + default=4, description="Number of worker threads to use." + ) + num_epochs: int = Field(default=100, description="Total number of epochs to train.") + patience: int = Field( + default=100, description="Number of epochs to wait for performance improvement." + ) + batch_size: int = Field(default=1, description="Default batch size for training.") + learning_rate: float = Field(default=0.001, description="Default learning rate.") + clip_grad: Optional[float] = Field( + default=None, description="Gradient clipping value." + ) + track_memory_usage: bool = Field( + default=False, description="Enable memory usage tracking." + ) + memory_save_mode: bool = Field( + default=False, + description="Enable memory-saving mode. If enabled, resize/resample will save files to disk.", + ) + print_rgb_label_warning: bool = Field( + default=True, description="Print a warning for RGB labels." + ) + data_postprocessing: Dict = Field( + default={}, description="Default data postprocessing configuration." + ) + grid_aggregator_overlap: str = Field( + default="crop", description="Default grid aggregator overlap strategy." + ) + determinism: bool = Field( + default=False, description="Enable deterministic computation." + ) + previous_parameters: Optional[Dict] = Field( + default=None, + description="Previous parameters to be used for resuming training and performing sanity checks.", + ) diff --git a/GANDLF/Configuration/generate_documentation.py b/GANDLF/Configuration/generate_documentation.py index f03e864d0..ece6207bb 100644 --- a/GANDLF/Configuration/generate_documentation.py +++ b/GANDLF/Configuration/generate_documentation.py @@ -1,4 +1,6 @@ import utils import default_parameters -markdown_output = utils.generate_and_save_markdown(default_parameters.DefaultParameters, "doc/user_model.md") \ No newline at end of file +markdown_output = utils.generate_and_save_markdown( + default_parameters.DefaultParameters, "doc/user_model.md" +) diff --git a/GANDLF/Configuration/utils.py b/GANDLF/Configuration/utils.py index 6da42a41b..439f19fd0 100644 --- a/GANDLF/Configuration/utils.py +++ b/GANDLF/Configuration/utils.py @@ -4,6 +4,7 @@ from typing import Type from pydantic import BaseModel + def generate_and_save_markdown(model: Type[BaseModel], file_path: str) -> None: schema = model.schema() markdown = [] @@ -18,7 +19,9 @@ def generate_and_save_markdown(model: Type[BaseModel], file_path: str) -> None: # Add fields table markdown.append("## Parameters\n") markdown.append("| Field | Type | Description | Default |") - markdown.append("|----------------|----------------|-----------------------|------------------|") + markdown.append( + "|----------------|----------------|-----------------------|------------------|" + ) for field_name, field_info in schema["properties"].items(): # Extract field details @@ -27,8 +30,10 @@ def generate_and_save_markdown(model: Type[BaseModel], file_path: str) -> None: default = field_info.get("default", "N/A") # Add row to the table - markdown.append(f"| `{field_name}` | `{field_type}` | {description} | `{default}` |") + markdown.append( + f"| `{field_name}` | `{field_type}` | {description} | `{default}` |" + ) # Write to file with open(file_path, "w", encoding="utf-8") as file: - file.write("\n".join(markdown)) \ No newline at end of file + file.write("\n".join(markdown)) From cab38d7ec2aadad781c95f1259ac63e514e8dc1b Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Thu, 30 Jan 2025 22:35:11 +0200 Subject: [PATCH 06/88] blacked . --- GANDLF/Configuration/user_defined_parameters.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index 9232c2773..0a51b227e 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -1,4 +1,6 @@ from pydantic import BaseModel, Field from typing import Optional, Dict + class UserDefinedParameters(BaseModel): + pass From 0e299db67a0b82f2ac4076057d8ed0d1e9b5bd0d Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 1 Feb 2025 18:17:53 +0200 Subject: [PATCH 07/88] define user_defined_parameters.py --- .../Configuration/user_defined_parameters.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index 0a51b227e..dbdc58125 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -1,6 +1,20 @@ -from pydantic import BaseModel, Field -from typing import Optional, Dict +from pydantic import BaseModel,field_validator +from GANDLF.config_manager import version_check +from importlib.metadata import version +#TODO: Define the paramaters from the config_manager + +#TODO: Some parameters maybe can define as a seperated Model + +class Version(BaseModel): + minimum: str + maximum: str class UserDefinedParameters(BaseModel): - pass + version: Version + + @classmethod + @field_validator('version', mode='after') + def validate_version(cls, values: Version) -> Version: + if version_check(values.model_dump(), version_to_check=version("GANDLF")): + return values From 576577d6508c488e77ed023010206b13bce4535d Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 1 Feb 2025 18:19:23 +0200 Subject: [PATCH 08/88] create parameters(BaseModel) --- GANDLF/Configuration/parameters.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 GANDLF/Configuration/parameters.py diff --git a/GANDLF/Configuration/parameters.py b/GANDLF/Configuration/parameters.py new file mode 100644 index 000000000..a762371be --- /dev/null +++ b/GANDLF/Configuration/parameters.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel, ConfigDict +from GANDLF.Configuration.default_parameters import DefaultParameters +from GANDLF.Configuration.user_defined_parameters import UserDefinedParameters + +class ParametersConfiguration(BaseModel): + model_config = ConfigDict() + +class Parameters(ParametersConfiguration,DefaultParameters,UserDefinedParameters): + pass \ No newline at end of file From 5b0ae987495080b5b623adbe8a7d54887d60d221 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 1 Feb 2025 18:20:00 +0200 Subject: [PATCH 09/88] made some code changes in generate_documentation.py --- GANDLF/Configuration/generate_documentation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GANDLF/Configuration/generate_documentation.py b/GANDLF/Configuration/generate_documentation.py index ece6207bb..011a85fd6 100644 --- a/GANDLF/Configuration/generate_documentation.py +++ b/GANDLF/Configuration/generate_documentation.py @@ -1,6 +1,5 @@ import utils -import default_parameters markdown_output = utils.generate_and_save_markdown( - default_parameters.DefaultParameters, "doc/user_model.md" + default_parameters.DefaultParameters, "configuration_docs/user_model.md" ) From 10d5f789f106aa1d9e113c2819169c2538edc04a Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 1 Feb 2025 18:20:20 +0200 Subject: [PATCH 10/88] add pydantic in setup.py --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fdb5d7c6f..f19e421e5 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,8 @@ import sys, re, os + + from setuptools import setup, find_packages @@ -85,7 +87,8 @@ "openslide-bin", "openslide-python==1.4.1", "lion-pytorch==0.2.2", -] + "pydantic" + ] if __name__ == "__main__": setup( From 207e4c0d7587a2eac471f89c52ef82822446a698 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 1 Feb 2025 18:21:46 +0200 Subject: [PATCH 11/88] comment out config_manager --- GANDLF/config_manager.py | 1236 +++++++++++++++++++------------------- 1 file changed, 624 insertions(+), 612 deletions(-) diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index ed26ec8e1..ded27870e 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -1,16 +1,20 @@ # import logging import traceback from typing import Optional, Union +from pydantic import BaseModel, ValidationError, field_validator import sys, yaml, ast import numpy as np from copy import deepcopy +from torch.fx.experimental.validator import ValidationException + from .utils import version_check from GANDLF.data.post_process import postprocessing_after_reverse_one_hot_encoding from GANDLF.privacy.opacus import parse_opacus_params from GANDLF.metrics import surface_distance_ids from importlib.metadata import version +from GANDLF.Configuration.parameters import Parameters ## dictionary to define defaults for appropriate options, which are evaluated parameter_defaults = { @@ -127,615 +131,616 @@ def _parseConfig( if not isinstance(config_file_path, dict): params = yaml.safe_load(open(config_file_path, "r")) - if version_check_flag: # this is only to be used for testing - assert ( - "version" in params - ), "The 'version' key needs to be defined in config with 'minimum' and 'maximum' fields to determine the compatibility of configuration with code base" - version_check(params["version"], version_to_check=version("GANDLF")) - - if "patch_size" in params: - # duplicate patch size if it is an int or float - if isinstance(params["patch_size"], int) or isinstance( - params["patch_size"], float - ): - params["patch_size"] = [params["patch_size"]] - # in case someone decides to pass a single value list - if len(params["patch_size"]) == 1: - actual_patch_size = [] - for _ in range(params["model"]["dimension"]): - actual_patch_size.append(params["patch_size"][0]) - params["patch_size"] = actual_patch_size - - # parse patch size as needed for computations - if len(params["patch_size"]) == 2: # 2d check - # ensuring same size during torchio processing - params["patch_size"].append(1) - if "dimension" not in params["model"]: - params["model"]["dimension"] = 2 - elif len(params["patch_size"]) == 3: # 2d check - if "dimension" not in params["model"]: - params["model"]["dimension"] = 3 - assert "patch_size" in params, "Patch size needs to be defined in the config file" - - if "resize" in params: - print( - "WARNING: 'resize' should be defined under 'data_processing', this will be skipped", - file=sys.stderr, - ) - - assert "modality" in params, "'modality' needs to be defined in the config file" - params["modality"] = params["modality"].lower() - assert params["modality"] in [ - "rad", - "histo", - "path", - ], "Modality should be either 'rad' or 'path'" - - assert ( - "loss_function" in params - ), "'loss_function' needs to be defined in the config file" - if "loss_function" in params: - # check if user has passed a dict - if isinstance(params["loss_function"], dict): # if this is a dict - if len(params["loss_function"]) > 0: # only proceed if something is defined - for key in params["loss_function"]: # iterate through all keys - if key == "mse": - if (params["loss_function"][key] is None) or not ( - "reduction" in params["loss_function"][key] - ): - params["loss_function"][key] = {} - params["loss_function"][key]["reduction"] = "mean" - else: - # use simple string for other functions - can be extended with parameters, if needed - params["loss_function"] = key - else: - # check if user has passed a single string - if params["loss_function"] == "mse": - params["loss_function"] = {} - params["loss_function"]["mse"] = {} - params["loss_function"]["mse"]["reduction"] = "mean" - elif params["loss_function"] == "focal": - params["loss_function"] = {} - params["loss_function"]["focal"] = {} - params["loss_function"]["focal"]["gamma"] = 2.0 - params["loss_function"]["focal"]["size_average"] = True - - assert "metrics" in params, "'metrics' needs to be defined in the config file" - if "metrics" in params: - if not isinstance(params["metrics"], dict): - temp_dict = {} - else: - temp_dict = params["metrics"] - - # initialize metrics dict - for metric in params["metrics"]: - # assigning a new variable because some metrics can be dicts, and we want to get the first key - comparison_string = metric - if isinstance(metric, dict): - comparison_string = list(metric.keys())[0] - # these metrics always need to be dicts - if comparison_string in [ - "accuracy", - "f1", - "precision", - "recall", - "specificity", - "iou", - ]: - if not isinstance(metric, dict): - temp_dict[metric] = {} - else: - temp_dict[comparison_string] = metric - elif not isinstance(metric, dict): - temp_dict[metric] = None - - # special case for accuracy, precision, recall, and specificity; which could be dicts - ## need to find a better way to do this - if any( - _ in comparison_string - for _ in ["precision", "recall", "specificity", "accuracy", "f1"] - ): - if comparison_string != "classification_accuracy": - temp_dict[comparison_string] = initialize_key( - temp_dict[comparison_string], "average", "weighted" - ) - temp_dict[comparison_string] = initialize_key( - temp_dict[comparison_string], "multi_class", True - ) - temp_dict[comparison_string] = initialize_key( - temp_dict[comparison_string], "mdmc_average", "samplewise" - ) - temp_dict[comparison_string] = initialize_key( - temp_dict[comparison_string], "threshold", 0.5 - ) - if comparison_string == "accuracy": - temp_dict[comparison_string] = initialize_key( - temp_dict[comparison_string], "subset_accuracy", False - ) - elif "iou" in comparison_string: - temp_dict["iou"] = initialize_key( - temp_dict["iou"], "reduction", "elementwise_mean" - ) - temp_dict["iou"] = initialize_key(temp_dict["iou"], "threshold", 0.5) - elif comparison_string in surface_distance_ids: - temp_dict[comparison_string] = initialize_key( - temp_dict[comparison_string], "connectivity", 1 - ) - temp_dict[comparison_string] = initialize_key( - temp_dict[comparison_string], "threshold", None - ) - - params["metrics"] = temp_dict - - # this is NOT a required parameter - a user should be able to train with NO augmentations - params = initialize_key(params, "data_augmentation", {}) - # for all others, ensure probability is present - params["data_augmentation"]["default_probability"] = params[ - "data_augmentation" - ].get("default_probability", 0.5) - - if not (params["data_augmentation"] is None): - if len(params["data_augmentation"]) > 0: # only when augmentations are defined - # special case for random swapping and elastic transformations - which takes a patch size for computation - for key in ["swap", "elastic"]: - if key in params["data_augmentation"]: - params["data_augmentation"][key] = initialize_key( - params["data_augmentation"][key], - "patch_size", - np.round(np.array(params["patch_size"]) / 10) - .astype("int") - .tolist(), - ) - - # special case for swap default initialization - if "swap" in params["data_augmentation"]: - params["data_augmentation"]["swap"] = initialize_key( - params["data_augmentation"]["swap"], "num_iterations", 100 - ) - - # special case for affine default initialization - if "affine" in params["data_augmentation"]: - params["data_augmentation"]["affine"] = initialize_key( - params["data_augmentation"]["affine"], "scales", 0.1 - ) - params["data_augmentation"]["affine"] = initialize_key( - params["data_augmentation"]["affine"], "degrees", 15 - ) - params["data_augmentation"]["affine"] = initialize_key( - params["data_augmentation"]["affine"], "translation", 2 - ) - - if "motion" in params["data_augmentation"]: - params["data_augmentation"]["motion"] = initialize_key( - params["data_augmentation"]["motion"], "num_transforms", 2 - ) - params["data_augmentation"]["motion"] = initialize_key( - params["data_augmentation"]["motion"], "degrees", 15 - ) - params["data_augmentation"]["motion"] = initialize_key( - params["data_augmentation"]["motion"], "translation", 2 - ) - params["data_augmentation"]["motion"] = initialize_key( - params["data_augmentation"]["motion"], "interpolation", "linear" - ) - - # special case for random blur/noise - which takes a std-dev range - for std_aug in ["blur", "noise_var"]: - if std_aug in params["data_augmentation"]: - params["data_augmentation"][std_aug] = initialize_key( - params["data_augmentation"][std_aug], "std", None - ) - for std_aug in ["noise"]: - if std_aug in params["data_augmentation"]: - params["data_augmentation"][std_aug] = initialize_key( - params["data_augmentation"][std_aug], "std", [0, 1] - ) - - # special case for random noise - which takes a mean range - for mean_aug in ["noise", "noise_var"]: - if mean_aug in params["data_augmentation"]: - params["data_augmentation"][mean_aug] = initialize_key( - params["data_augmentation"][mean_aug], "mean", 0 - ) - - # special case for augmentations that need axis defined - for axis_aug in ["flip", "anisotropic", "rotate_90", "rotate_180"]: - if axis_aug in params["data_augmentation"]: - params["data_augmentation"][axis_aug] = initialize_key( - params["data_augmentation"][axis_aug], "axis", [0, 1, 2] - ) - - # special case for colorjitter - if "colorjitter" in params["data_augmentation"]: - params["data_augmentation"] = initialize_key( - params["data_augmentation"], "colorjitter", {} - ) - for key in ["brightness", "contrast", "saturation"]: - params["data_augmentation"]["colorjitter"] = initialize_key( - params["data_augmentation"]["colorjitter"], key, [0, 1] - ) - params["data_augmentation"]["colorjitter"] = initialize_key( - params["data_augmentation"]["colorjitter"], "hue", [-0.5, 0.5] - ) - - # Added HED augmentation in gandlf - hed_augmentation_types = [ - "hed_transform", - # "hed_transform_light", - # "hed_transform_heavy", - ] - for augmentation_type in hed_augmentation_types: - if augmentation_type in params["data_augmentation"]: - params["data_augmentation"] = initialize_key( - params["data_augmentation"], "hed_transform", {} - ) - ranges = [ - "haematoxylin_bias_range", - "eosin_bias_range", - "dab_bias_range", - "haematoxylin_sigma_range", - "eosin_sigma_range", - "dab_sigma_range", - ] - - default_range = ( - [-0.1, 0.1] - if augmentation_type == "hed_transform" - else ( - [-0.03, 0.03] - if augmentation_type == "hed_transform_light" - else [-0.95, 0.95] - ) - ) - - for key in ranges: - params["data_augmentation"]["hed_transform"] = initialize_key( - params["data_augmentation"]["hed_transform"], - key, - default_range, - ) - - params["data_augmentation"]["hed_transform"] = initialize_key( - params["data_augmentation"]["hed_transform"], - "cutoff_range", - [0, 1], - ) - - # special case for anisotropic - if "anisotropic" in params["data_augmentation"]: - if not ("downsampling" in params["data_augmentation"]["anisotropic"]): - default_downsampling = 1.5 - else: - default_downsampling = params["data_augmentation"]["anisotropic"][ - "downsampling" - ] - - initialize_downsampling = False - if isinstance(default_downsampling, list): - if len(default_downsampling) != 2: - initialize_downsampling = True - print( - "WARNING: 'anisotropic' augmentation needs to be either a single number of a list of 2 numbers: https://torchio.readthedocs.io/transforms/augmentation.html?highlight=randomswap#torchio.transforms.RandomAnisotropy.", - file=sys.stderr, - ) - default_downsampling = default_downsampling[0] # only - else: - initialize_downsampling = True - - if initialize_downsampling: - if default_downsampling < 1: - print( - "WARNING: 'anisotropic' augmentation needs the 'downsampling' parameter to be greater than 1, defaulting to 1.5.", - file=sys.stderr, - ) - # default - params["data_augmentation"]["anisotropic"]["downsampling"] = 1.5 - - for key in params["data_augmentation"]: - if key != "default_probability": - params["data_augmentation"][key] = initialize_key( - params["data_augmentation"][key], - "probability", - params["data_augmentation"]["default_probability"], - ) - - # this is NOT a required parameter - a user should be able to train with NO built-in pre-processing - params = initialize_key(params, "data_preprocessing", {}) - if not (params["data_preprocessing"] is None): - # perform this only when pre-processing is defined - if len(params["data_preprocessing"]) > 0: - thresholdOrClip = False - # this can be extended, as required - thresholdOrClipDict = ["threshold", "clip", "clamp"] + # if version_check_flag: # this is only to be used for testing + # assert ( + # "version" in params + # ), "The 'version' key needs to be defined in config with 'minimum' and 'maximum' fields to determine the compatibility of configuration with code base" + # version_check(params["version"], version_to_check=version("GANDLF")) + + # if "patch_size" in params: + # # duplicate patch size if it is an int or float + # if isinstance(params["patch_size"], int) or isinstance( + # params["patch_size"], float + # ): + # params["patch_size"] = [params["patch_size"]] + # # in case someone decides to pass a single value list + # if len(params["patch_size"]) == 1: + # actual_patch_size = [] + # for _ in range(params["model"]["dimension"]): + # actual_patch_size.append(params["patch_size"][0]) + # params["patch_size"] = actual_patch_size + # + # # parse patch size as needed for computations + # if len(params["patch_size"]) == 2: # 2d check + # # ensuring same size during torchio processing + # params["patch_size"].append(1) + # if "dimension" not in params["model"]: + # params["model"]["dimension"] = 2 + # elif len(params["patch_size"]) == 3: # 2d check + # if "dimension" not in params["model"]: + # params["model"]["dimension"] = 3 + # assert "patch_size" in params, "Patch size needs to be defined in the config file" + # + # if "resize" in params: + # print( + # "WARNING: 'resize' should be defined under 'data_processing', this will be skipped", + # file=sys.stderr, + # ) + # + # assert "modality" in params, "'modality' needs to be defined in the config file" + # params["modality"] = params["modality"].lower() + # assert params["modality"] in [ + # "rad", + # "histo", + # "path", + # ], "Modality should be either 'rad' or 'path'" + # + # assert ( + # "loss_function" in params + # ), "'loss_function' needs to be defined in the config file" + # if "loss_function" in params: + # # check if user has passed a dict + # if isinstance(params["loss_function"], dict): # if this is a dict + # if len(params["loss_function"]) > 0: # only proceed if something is defined + # for key in params["loss_function"]: # iterate through all keys + # if key == "mse": + # if (params["loss_function"][key] is None) or not ( + # "reduction" in params["loss_function"][key] + # ): + # params["loss_function"][key] = {} + # params["loss_function"][key]["reduction"] = "mean" + # else: + # # use simple string for other functions - can be extended with parameters, if needed + # params["loss_function"] = key + # else: + # # check if user has passed a single string + # if params["loss_function"] == "mse": + # params["loss_function"] = {} + # params["loss_function"]["mse"] = {} + # params["loss_function"]["mse"]["reduction"] = "mean" + # elif params["loss_function"] == "focal": + # params["loss_function"] = {} + # params["loss_function"]["focal"] = {} + # params["loss_function"]["focal"]["gamma"] = 2.0 + # params["loss_function"]["focal"]["size_average"] = True + # + # assert "metrics" in params, "'metrics' needs to be defined in the config file" + # if "metrics" in params: + # if not isinstance(params["metrics"], dict): + # temp_dict = {} + # else: + # temp_dict = params["metrics"] + # + # # initialize metrics dict + # for metric in params["metrics"]: + # # assigning a new variable because some metrics can be dicts, and we want to get the first key + # comparison_string = metric + # if isinstance(metric, dict): + # comparison_string = list(metric.keys())[0] + # # these metrics always need to be dicts + # if comparison_string in [ + # "accuracy", + # "f1", + # "precision", + # "recall", + # "specificity", + # "iou", + # ]: + # if not isinstance(metric, dict): + # temp_dict[metric] = {} + # else: + # temp_dict[comparison_string] = metric + # elif not isinstance(metric, dict): + # temp_dict[metric] = None + # + # # special case for accuracy, precision, recall, and specificity; which could be dicts + # ## need to find a better way to do this + # if any( + # _ in comparison_string + # for _ in ["precision", "recall", "specificity", "accuracy", "f1"] + # ): + # if comparison_string != "classification_accuracy": + # temp_dict[comparison_string] = initialize_key( + # temp_dict[comparison_string], "average", "weighted" + # ) + # temp_dict[comparison_string] = initialize_key( + # temp_dict[comparison_string], "multi_class", True + # ) + # temp_dict[comparison_string] = initialize_key( + # temp_dict[comparison_string], "mdmc_average", "samplewise" + # ) + # temp_dict[comparison_string] = initialize_key( + # temp_dict[comparison_string], "threshold", 0.5 + # ) + # if comparison_string == "accuracy": + # temp_dict[comparison_string] = initialize_key( + # temp_dict[comparison_string], "subset_accuracy", False + # ) + # elif "iou" in comparison_string: + # temp_dict["iou"] = initialize_key( + # temp_dict["iou"], "reduction", "elementwise_mean" + # ) + # temp_dict["iou"] = initialize_key(temp_dict["iou"], "threshold", 0.5) + # elif comparison_string in surface_distance_ids: + # temp_dict[comparison_string] = initialize_key( + # temp_dict[comparison_string], "connectivity", 1 + # ) + # temp_dict[comparison_string] = initialize_key( + # temp_dict[comparison_string], "threshold", None + # ) + # + # params["metrics"] = temp_dict + # + # # this is NOT a required parameter - a user should be able to train with NO augmentations + # params = initialize_key(params, "data_augmentation", {}) + # # for all others, ensure probability is present + # params["data_augmentation"]["default_probability"] = params[ + # "data_augmentation" + # ].get("default_probability", 0.5) + # + # if not (params["data_augmentation"] is None): + # if len(params["data_augmentation"]) > 0: # only when augmentations are defined + # # special case for random swapping and elastic transformations - which takes a patch size for computation + # for key in ["swap", "elastic"]: + # if key in params["data_augmentation"]: + # params["data_augmentation"][key] = initialize_key( + # params["data_augmentation"][key], + # "patch_size", + # np.round(np.array(params["patch_size"]) / 10) + # .astype("int") + # .tolist(), + # ) + # + # # special case for swap default initialization + # if "swap" in params["data_augmentation"]: + # params["data_augmentation"]["swap"] = initialize_key( + # params["data_augmentation"]["swap"], "num_iterations", 100 + # ) + # + # # special case for affine default initialization + # if "affine" in params["data_augmentation"]: + # params["data_augmentation"]["affine"] = initialize_key( + # params["data_augmentation"]["affine"], "scales", 0.1 + # ) + # params["data_augmentation"]["affine"] = initialize_key( + # params["data_augmentation"]["affine"], "degrees", 15 + # ) + # params["data_augmentation"]["affine"] = initialize_key( + # params["data_augmentation"]["affine"], "translation", 2 + # ) + # + # if "motion" in params["data_augmentation"]: + # params["data_augmentation"]["motion"] = initialize_key( + # params["data_augmentation"]["motion"], "num_transforms", 2 + # ) + # params["data_augmentation"]["motion"] = initialize_key( + # params["data_augmentation"]["motion"], "degrees", 15 + # ) + # params["data_augmentation"]["motion"] = initialize_key( + # params["data_augmentation"]["motion"], "translation", 2 + # ) + # params["data_augmentation"]["motion"] = initialize_key( + # params["data_augmentation"]["motion"], "interpolation", "linear" + # ) + # + # # special case for random blur/noise - which takes a std-dev range + # for std_aug in ["blur", "noise_var"]: + # if std_aug in params["data_augmentation"]: + # params["data_augmentation"][std_aug] = initialize_key( + # params["data_augmentation"][std_aug], "std", None + # ) + # for std_aug in ["noise"]: + # if std_aug in params["data_augmentation"]: + # params["data_augmentation"][std_aug] = initialize_key( + # params["data_augmentation"][std_aug], "std", [0, 1] + # ) + # + # # special case for random noise - which takes a mean range + # for mean_aug in ["noise", "noise_var"]: + # if mean_aug in params["data_augmentation"]: + # params["data_augmentation"][mean_aug] = initialize_key( + # params["data_augmentation"][mean_aug], "mean", 0 + # ) + # + # # special case for augmentations that need axis defined + # for axis_aug in ["flip", "anisotropic", "rotate_90", "rotate_180"]: + # if axis_aug in params["data_augmentation"]: + # params["data_augmentation"][axis_aug] = initialize_key( + # params["data_augmentation"][axis_aug], "axis", [0, 1, 2] + # ) + # + # # special case for colorjitter + # if "colorjitter" in params["data_augmentation"]: + # params["data_augmentation"] = initialize_key( + # params["data_augmentation"], "colorjitter", {} + # ) + # for key in ["brightness", "contrast", "saturation"]: + # params["data_augmentation"]["colorjitter"] = initialize_key( + # params["data_augmentation"]["colorjitter"], key, [0, 1] + # ) + # params["data_augmentation"]["colorjitter"] = initialize_key( + # params["data_augmentation"]["colorjitter"], "hue", [-0.5, 0.5] + # ) + # + # # Added HED augmentation in gandlf + # hed_augmentation_types = [ + # "hed_transform", + # # "hed_transform_light", + # # "hed_transform_heavy", + # ] + # for augmentation_type in hed_augmentation_types: + # if augmentation_type in params["data_augmentation"]: + # params["data_augmentation"] = initialize_key( + # params["data_augmentation"], "hed_transform", {} + # ) + # ranges = [ + # "haematoxylin_bias_range", + # "eosin_bias_range", + # "dab_bias_range", + # "haematoxylin_sigma_range", + # "eosin_sigma_range", + # "dab_sigma_range", + # ] + # + # default_range = ( + # [-0.1, 0.1] + # if augmentation_type == "hed_transform" + # else ( + # [-0.03, 0.03] + # if augmentation_type == "hed_transform_light" + # else [-0.95, 0.95] + # ) + # ) + # + # for key in ranges: + # params["data_augmentation"]["hed_transform"] = initialize_key( + # params["data_augmentation"]["hed_transform"], + # key, + # default_range, + # ) + # + # params["data_augmentation"]["hed_transform"] = initialize_key( + # params["data_augmentation"]["hed_transform"], + # "cutoff_range", + # [0, 1], + # ) + # + # # special case for anisotropic + # if "anisotropic" in params["data_augmentation"]: + # if not ("downsampling" in params["data_augmentation"]["anisotropic"]): + # default_downsampling = 1.5 + # else: + # default_downsampling = params["data_augmentation"]["anisotropic"][ + # "downsampling" + # ] + # + # initialize_downsampling = False + # if isinstance(default_downsampling, list): + # if len(default_downsampling) != 2: + # initialize_downsampling = True + # print( + # "WARNING: 'anisotropic' augmentation needs to be either a single number of a list of 2 numbers: https://torchio.readthedocs.io/transforms/augmentation.html?highlight=randomswap#torchio.transforms.RandomAnisotropy.", + # file=sys.stderr, + # ) + # default_downsampling = default_downsampling[0] # only + # else: + # initialize_downsampling = True + # + # if initialize_downsampling: + # if default_downsampling < 1: + # print( + # "WARNING: 'anisotropic' augmentation needs the 'downsampling' parameter to be greater than 1, defaulting to 1.5.", + # file=sys.stderr, + # ) + # # default + # params["data_augmentation"]["anisotropic"]["downsampling"] = 1.5 + # + # for key in params["data_augmentation"]: + # if key != "default_probability": + # params["data_augmentation"][key] = initialize_key( + # params["data_augmentation"][key], + # "probability", + # params["data_augmentation"]["default_probability"], + # ) + # + # # this is NOT a required parameter - a user should be able to train with NO built-in pre-processing + # params = initialize_key(params, "data_preprocessing", {}) + # if not (params["data_preprocessing"] is None): + # # perform this only when pre-processing is defined + # if len(params["data_preprocessing"]) > 0: + # thresholdOrClip = False + # # this can be extended, as required + # thresholdOrClipDict = ["threshold", "clip", "clamp"] + # + # resize_requested = False + # temp_dict = deepcopy(params["data_preprocessing"]) + # for key in params["data_preprocessing"]: + # if key in ["resize", "resize_image", "resize_images", "resize_patch"]: + # resize_requested = True + # + # if key in ["resample_min", "resample_minimum"]: + # if "resolution" in params["data_preprocessing"][key]: + # resize_requested = True + # resolution_temp = np.array( + # params["data_preprocessing"][key]["resolution"] + # ) + # if resolution_temp.size == 1: + # temp_dict[key]["resolution"] = np.array( + # [resolution_temp, resolution_temp] + # ).tolist() + # else: + # temp_dict.pop(key) + # + # params["data_preprocessing"] = temp_dict + # + # if resize_requested and "resample" in params["data_preprocessing"]: + # for key in ["resize", "resize_image", "resize_images", "resize_patch"]: + # if key in params["data_preprocessing"]: + # params["data_preprocessing"].pop(key) + # + # print( + # "WARNING: Different 'resize' operations are ignored as 'resample' is defined under 'data_processing'", + # file=sys.stderr, + # ) + # + # # iterate through all keys + # for key in params["data_preprocessing"]: # iterate through all keys + # if key in thresholdOrClipDict: + # # we only allow one of threshold or clip to occur and not both + # assert not ( + # thresholdOrClip + # ), "Use only `threshold` or `clip`, not both" + # thresholdOrClip = True + # # initialize if nothing is present + # if not (isinstance(params["data_preprocessing"][key], dict)): + # params["data_preprocessing"][key] = {} + # + # # if one of the required parameters is not present, initialize with lowest/highest possible values + # # this ensures the absence of a field doesn't affect processing + # # for threshold or clip, ensure min and max are defined + # if not "min" in params["data_preprocessing"][key]: + # params["data_preprocessing"][key]["min"] = sys.float_info.min + # if not "max" in params["data_preprocessing"][key]: + # params["data_preprocessing"][key]["max"] = sys.float_info.max + # + # if key == "histogram_matching": + # if params["data_preprocessing"][key] is not False: + # if not (isinstance(params["data_preprocessing"][key], dict)): + # params["data_preprocessing"][key] = {} + # + # if key == "histogram_equalization": + # if params["data_preprocessing"][key] is not False: + # # if histogram equalization is enabled, call histogram_matching + # params["data_preprocessing"]["histogram_matching"] = {} + # + # if key == "adaptive_histogram_equalization": + # if params["data_preprocessing"][key] is not False: + # # if histogram equalization is enabled, call histogram_matching + # params["data_preprocessing"]["histogram_matching"] = { + # "target": "adaptive" + # } + # + # # this is NOT a required parameter - a user should be able to train with NO built-in post-processing + # params = initialize_key(params, "data_postprocessing", {}) + # params = initialize_key( + # params, "data_postprocessing_after_reverse_one_hot_encoding", {} + # ) + # temp_dict = deepcopy(params["data_postprocessing"]) + # for key in temp_dict: + # if key in postprocessing_after_reverse_one_hot_encoding: + # params["data_postprocessing_after_reverse_one_hot_encoding"][key] = params[ + # "data_postprocessing" + # ][key] + # params["data_postprocessing"].pop(key) + # + # if "model" in params: + # assert isinstance( + # params["model"], dict + # ), "The 'model' parameter needs to be populated as a dictionary" + # assert ( + # len(params["model"]) > 0 + # ), "The 'model' parameter needs to be populated as a dictionary and should have all properties present" + # assert ( + # "architecture" in params["model"] + # ), "The 'model' parameter needs 'architecture' to be defined" + # assert ( + # "final_layer" in params["model"] + # ), "The 'model' parameter needs 'final_layer' to be defined" + # assert ( + # "dimension" in params["model"] + # ), "The 'model' parameter needs 'dimension' to be defined" + # + # if "amp" in params["model"]: + # pass + # else: + # print("NOT using Mixed Precision Training") + # params["model"]["amp"] = False + # + # if "norm_type" in params["model"]: + # if ( + # params["model"]["norm_type"] == None + # or params["model"]["norm_type"].lower() == "none" + # ): + # if not ("vgg" in params["model"]["architecture"]): + # raise ValueError( + # "Normalization type cannot be 'None' for non-VGG architectures" + # ) + # else: + # print("WARNING: Initializing 'norm_type' as 'batch'", flush=True) + # params["model"]["norm_type"] = "batch" + # + # if not ("base_filters" in params["model"]): + # base_filters = 32 + # params["model"]["base_filters"] = base_filters + # print("Using default 'base_filters' in 'model': ", base_filters) + # if not ("class_list" in params["model"]): + # params["model"]["class_list"] = [] # ensure that this is initialized + # if not ("ignore_label_validation" in params["model"]): + # params["model"]["ignore_label_validation"] = None + # if "batch_norm" in params["model"]: + # print( + # "WARNING: 'batch_norm' is no longer supported, please use 'norm_type' in 'model' instead", + # flush=True, + # ) + # params["model"]["print_summary"] = params["model"].get("print_summary", True) + # + # channel_keys_to_check = ["n_channels", "channels", "model_channels"] + # for key in channel_keys_to_check: + # if key in params["model"]: + # params["model"]["num_channels"] = params["model"][key] + # break + # + # # initialize model type for processing: if not defined, default to torch + # if not ("type" in params["model"]): + # params["model"]["type"] = "torch" + # + # # initialize openvino model data type for processing: if not defined, default to FP32 + # if not ("data_type" in params["model"]): + # params["model"]["data_type"] = "FP32" + # + # # set default save strategy for model + # if not ("save_at_every_epoch" in params["model"]): + # params["model"]["save_at_every_epoch"] = False + # + # if params["model"]["save_at_every_epoch"]: + # print( + # "WARNING: 'save_at_every_epoch' will result in TREMENDOUS storage usage; use at your own risk." + # ) + # + # if isinstance(params["model"]["class_list"], str): + # if ("||" in params["model"]["class_list"]) or ( + # "&&" in params["model"]["class_list"] + # ): + # # special case for multi-class computation - this needs to be handled during one-hot encoding mask construction + # print( + # "WARNING: This is a special case for multi-class computation, where different labels are processed together, `reverse_one_hot` will need mapping information to work correctly" + # ) + # temp_classList = params["model"]["class_list"] + # # we don't need the brackets + # temp_classList = temp_classList.replace("[", "") + # temp_classList = temp_classList.replace("]", "") + # params["model"]["class_list"] = temp_classList.split(",") + # else: + # try: + # params["model"]["class_list"] = eval(params["model"]["class_list"]) + # except Exception as e: + # ## todo: ensure logging captures assertion errors + # assert ( + # False + # ), f"Could not evaluate the `class_list` in `model`, Exception: {str(e)}, {traceback.format_exc()}" + # # logging.error( + # # f"Could not evaluate the `class_list` in `model`, Exception: {str(e)}, {traceback.format_exc()}" + # # ) + # + # assert ( + # "nested_training" in params + # ), "The parameter 'nested_training' needs to be defined" + # # initialize defaults for nested training + # params["nested_training"]["stratified"] = params["nested_training"].get( + # "stratified", False + # ) + # params["nested_training"]["stratified"] = params["nested_training"].get( + # "proportional", params["nested_training"]["stratified"] + # ) + # params["nested_training"]["testing"] = params["nested_training"].get("testing", -5) + # params["nested_training"]["validation"] = params["nested_training"].get( + # "validation", -5 + # ) + # + # parallel_compute_command = "" + # if "parallel_compute_command" in params: + # parallel_compute_command = params["parallel_compute_command"] + # parallel_compute_command = parallel_compute_command.replace("'", "") + # parallel_compute_command = parallel_compute_command.replace('"', "") + # params["parallel_compute_command"] = parallel_compute_command + # + # if "opt" in params: + # print("DeprecationWarning: 'opt' has been superseded by 'optimizer'") + # params["optimizer"] = params["opt"] + # + # # initialize defaults for patch sampler + # temp_patch_sampler_dict = { + # "type": "uniform", + # "enable_padding": False, + # "padding_mode": "symmetric", + # "biased_sampling": False, + # } + # # check if patch_sampler is defined in the config + # if "patch_sampler" in params: + # # if "patch_sampler" is a string, then it is the type of sampler + # if isinstance(params["patch_sampler"], str): + # print( + # "WARNING: Defining 'patch_sampler' as a string will be deprecated in a future release, please use a dictionary instead" + # ) + # temp_patch_sampler_dict["type"] = params["patch_sampler"].lower() + # elif isinstance(params["patch_sampler"], dict): + # # dict requires special handling + # for key in params["patch_sampler"]: + # temp_patch_sampler_dict[key] = params["patch_sampler"][key] + # + # # now assign the dict back to the params + # params["patch_sampler"] = temp_patch_sampler_dict + # del temp_patch_sampler_dict + # + # # define defaults + # for current_parameter in parameter_defaults: + # params = initialize_parameter( + # params, current_parameter, parameter_defaults[current_parameter], True + # ) + # + # for current_parameter in parameter_defaults_string: + # params = initialize_parameter( + # params, + # current_parameter, + # parameter_defaults_string[current_parameter], + # False, + # ) + # + # # ensure that the scheduler and optimizer are dicts + # if isinstance(params["scheduler"], str): + # temp_dict = {} + # temp_dict["type"] = params["scheduler"] + # params["scheduler"] = temp_dict + # + # if not ("step_size" in params["scheduler"]): + # params["scheduler"]["step_size"] = params["learning_rate"] / 5.0 + # print( + # "WARNING: Setting default step_size to:", params["scheduler"]["step_size"] + # ) + # + # # initialize default optimizer + # params["optimizer"] = params.get("optimizer", {}) + # if isinstance(params["optimizer"], str): + # temp_dict = {} + # temp_dict["type"] = params["optimizer"] + # params["optimizer"] = temp_dict + # + # # initialize defaults for DP + # if params.get("differential_privacy"): + # params = parse_opacus_params(params, initialize_key) + # + # # initialize defaults for inference mechanism + # inference_mechanism = {"grid_aggregator_overlap": "crop", "patch_overlap": 0} + # initialize_inference_mechanism = False + # if not ("inference_mechanism" in params): + # initialize_inference_mechanism = True + # elif not (isinstance(params["inference_mechanism"], dict)): + # initialize_inference_mechanism = True + # else: + # for key in inference_mechanism: + # if not (key in params["inference_mechanism"]): + # params["inference_mechanism"][key] = inference_mechanism[key] + # + # if initialize_inference_mechanism: + # params["inference_mechanism"] = inference_mechanism - resize_requested = False - temp_dict = deepcopy(params["data_preprocessing"]) - for key in params["data_preprocessing"]: - if key in ["resize", "resize_image", "resize_images", "resize_patch"]: - resize_requested = True - - if key in ["resample_min", "resample_minimum"]: - if "resolution" in params["data_preprocessing"][key]: - resize_requested = True - resolution_temp = np.array( - params["data_preprocessing"][key]["resolution"] - ) - if resolution_temp.size == 1: - temp_dict[key]["resolution"] = np.array( - [resolution_temp, resolution_temp] - ).tolist() - else: - temp_dict.pop(key) - - params["data_preprocessing"] = temp_dict - - if resize_requested and "resample" in params["data_preprocessing"]: - for key in ["resize", "resize_image", "resize_images", "resize_patch"]: - if key in params["data_preprocessing"]: - params["data_preprocessing"].pop(key) - - print( - "WARNING: Different 'resize' operations are ignored as 'resample' is defined under 'data_processing'", - file=sys.stderr, - ) - - # iterate through all keys - for key in params["data_preprocessing"]: # iterate through all keys - if key in thresholdOrClipDict: - # we only allow one of threshold or clip to occur and not both - assert not ( - thresholdOrClip - ), "Use only `threshold` or `clip`, not both" - thresholdOrClip = True - # initialize if nothing is present - if not (isinstance(params["data_preprocessing"][key], dict)): - params["data_preprocessing"][key] = {} - - # if one of the required parameters is not present, initialize with lowest/highest possible values - # this ensures the absence of a field doesn't affect processing - # for threshold or clip, ensure min and max are defined - if not "min" in params["data_preprocessing"][key]: - params["data_preprocessing"][key]["min"] = sys.float_info.min - if not "max" in params["data_preprocessing"][key]: - params["data_preprocessing"][key]["max"] = sys.float_info.max - - if key == "histogram_matching": - if params["data_preprocessing"][key] is not False: - if not (isinstance(params["data_preprocessing"][key], dict)): - params["data_preprocessing"][key] = {} - - if key == "histogram_equalization": - if params["data_preprocessing"][key] is not False: - # if histogram equalization is enabled, call histogram_matching - params["data_preprocessing"]["histogram_matching"] = {} - - if key == "adaptive_histogram_equalization": - if params["data_preprocessing"][key] is not False: - # if histogram equalization is enabled, call histogram_matching - params["data_preprocessing"]["histogram_matching"] = { - "target": "adaptive" - } - - # this is NOT a required parameter - a user should be able to train with NO built-in post-processing - params = initialize_key(params, "data_postprocessing", {}) - params = initialize_key( - params, "data_postprocessing_after_reverse_one_hot_encoding", {} - ) - temp_dict = deepcopy(params["data_postprocessing"]) - for key in temp_dict: - if key in postprocessing_after_reverse_one_hot_encoding: - params["data_postprocessing_after_reverse_one_hot_encoding"][key] = params[ - "data_postprocessing" - ][key] - params["data_postprocessing"].pop(key) - - if "model" in params: - assert isinstance( - params["model"], dict - ), "The 'model' parameter needs to be populated as a dictionary" - assert ( - len(params["model"]) > 0 - ), "The 'model' parameter needs to be populated as a dictionary and should have all properties present" - assert ( - "architecture" in params["model"] - ), "The 'model' parameter needs 'architecture' to be defined" - assert ( - "final_layer" in params["model"] - ), "The 'model' parameter needs 'final_layer' to be defined" - assert ( - "dimension" in params["model"] - ), "The 'model' parameter needs 'dimension' to be defined" - - if "amp" in params["model"]: - pass - else: - print("NOT using Mixed Precision Training") - params["model"]["amp"] = False - - if "norm_type" in params["model"]: - if ( - params["model"]["norm_type"] == None - or params["model"]["norm_type"].lower() == "none" - ): - if not ("vgg" in params["model"]["architecture"]): - raise ValueError( - "Normalization type cannot be 'None' for non-VGG architectures" - ) - else: - print("WARNING: Initializing 'norm_type' as 'batch'", flush=True) - params["model"]["norm_type"] = "batch" - - if not ("base_filters" in params["model"]): - base_filters = 32 - params["model"]["base_filters"] = base_filters - print("Using default 'base_filters' in 'model': ", base_filters) - if not ("class_list" in params["model"]): - params["model"]["class_list"] = [] # ensure that this is initialized - if not ("ignore_label_validation" in params["model"]): - params["model"]["ignore_label_validation"] = None - if "batch_norm" in params["model"]: - print( - "WARNING: 'batch_norm' is no longer supported, please use 'norm_type' in 'model' instead", - flush=True, - ) - params["model"]["print_summary"] = params["model"].get("print_summary", True) - - channel_keys_to_check = ["n_channels", "channels", "model_channels"] - for key in channel_keys_to_check: - if key in params["model"]: - params["model"]["num_channels"] = params["model"][key] - break - - # initialize model type for processing: if not defined, default to torch - if not ("type" in params["model"]): - params["model"]["type"] = "torch" - - # initialize openvino model data type for processing: if not defined, default to FP32 - if not ("data_type" in params["model"]): - params["model"]["data_type"] = "FP32" - - # set default save strategy for model - if not ("save_at_every_epoch" in params["model"]): - params["model"]["save_at_every_epoch"] = False - - if params["model"]["save_at_every_epoch"]: - print( - "WARNING: 'save_at_every_epoch' will result in TREMENDOUS storage usage; use at your own risk." - ) - - if isinstance(params["model"]["class_list"], str): - if ("||" in params["model"]["class_list"]) or ( - "&&" in params["model"]["class_list"] - ): - # special case for multi-class computation - this needs to be handled during one-hot encoding mask construction - print( - "WARNING: This is a special case for multi-class computation, where different labels are processed together, `reverse_one_hot` will need mapping information to work correctly" - ) - temp_classList = params["model"]["class_list"] - # we don't need the brackets - temp_classList = temp_classList.replace("[", "") - temp_classList = temp_classList.replace("]", "") - params["model"]["class_list"] = temp_classList.split(",") - else: - try: - params["model"]["class_list"] = eval(params["model"]["class_list"]) - except Exception as e: - ## todo: ensure logging captures assertion errors - assert ( - False - ), f"Could not evaluate the `class_list` in `model`, Exception: {str(e)}, {traceback.format_exc()}" - # logging.error( - # f"Could not evaluate the `class_list` in `model`, Exception: {str(e)}, {traceback.format_exc()}" - # ) - - assert ( - "nested_training" in params - ), "The parameter 'nested_training' needs to be defined" - # initialize defaults for nested training - params["nested_training"]["stratified"] = params["nested_training"].get( - "stratified", False - ) - params["nested_training"]["stratified"] = params["nested_training"].get( - "proportional", params["nested_training"]["stratified"] - ) - params["nested_training"]["testing"] = params["nested_training"].get("testing", -5) - params["nested_training"]["validation"] = params["nested_training"].get( - "validation", -5 - ) - - parallel_compute_command = "" - if "parallel_compute_command" in params: - parallel_compute_command = params["parallel_compute_command"] - parallel_compute_command = parallel_compute_command.replace("'", "") - parallel_compute_command = parallel_compute_command.replace('"', "") - params["parallel_compute_command"] = parallel_compute_command - - if "opt" in params: - print("DeprecationWarning: 'opt' has been superseded by 'optimizer'") - params["optimizer"] = params["opt"] - - # initialize defaults for patch sampler - temp_patch_sampler_dict = { - "type": "uniform", - "enable_padding": False, - "padding_mode": "symmetric", - "biased_sampling": False, - } - # check if patch_sampler is defined in the config - if "patch_sampler" in params: - # if "patch_sampler" is a string, then it is the type of sampler - if isinstance(params["patch_sampler"], str): - print( - "WARNING: Defining 'patch_sampler' as a string will be deprecated in a future release, please use a dictionary instead" - ) - temp_patch_sampler_dict["type"] = params["patch_sampler"].lower() - elif isinstance(params["patch_sampler"], dict): - # dict requires special handling - for key in params["patch_sampler"]: - temp_patch_sampler_dict[key] = params["patch_sampler"][key] - - # now assign the dict back to the params - params["patch_sampler"] = temp_patch_sampler_dict - del temp_patch_sampler_dict - - # define defaults - for current_parameter in parameter_defaults: - params = initialize_parameter( - params, current_parameter, parameter_defaults[current_parameter], True - ) - - for current_parameter in parameter_defaults_string: - params = initialize_parameter( - params, - current_parameter, - parameter_defaults_string[current_parameter], - False, - ) - - # ensure that the scheduler and optimizer are dicts - if isinstance(params["scheduler"], str): - temp_dict = {} - temp_dict["type"] = params["scheduler"] - params["scheduler"] = temp_dict - - if not ("step_size" in params["scheduler"]): - params["scheduler"]["step_size"] = params["learning_rate"] / 5.0 - print( - "WARNING: Setting default step_size to:", params["scheduler"]["step_size"] - ) - - # initialize default optimizer - params["optimizer"] = params.get("optimizer", {}) - if isinstance(params["optimizer"], str): - temp_dict = {} - temp_dict["type"] = params["optimizer"] - params["optimizer"] = temp_dict - - # initialize defaults for DP - if params.get("differential_privacy"): - params = parse_opacus_params(params, initialize_key) - - # initialize defaults for inference mechanism - inference_mechanism = {"grid_aggregator_overlap": "crop", "patch_overlap": 0} - initialize_inference_mechanism = False - if not ("inference_mechanism" in params): - initialize_inference_mechanism = True - elif not (isinstance(params["inference_mechanism"], dict)): - initialize_inference_mechanism = True - else: - for key in inference_mechanism: - if not (key in params["inference_mechanism"]): - params["inference_mechanism"][key] = inference_mechanism[key] - - if initialize_inference_mechanism: - params["inference_mechanism"] = inference_mechanism return params def ConfigManager( config_file_path: Union[str, dict], version_check_flag: bool = True -) -> None: +) -> dict: """ This function parses the configuration file and returns a dictionary of parameters. @@ -747,13 +752,20 @@ def ConfigManager( dict: The parameter dictionary. """ try: - return _parseConfig(config_file_path, version_check_flag) - except Exception as e: - ## todo: ensure logging captures assertion errors - assert ( - False - ), f"Config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" - # logging.error( - # f"gandlf config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" - # ) - # raise + parameters = Parameters( + **_parseConfig(config_file_path, version_check_flag) + ).model_dump() + return parameters + # except Exception as e: + # ## todo: ensure logging captures assertion errors + # assert ( + # False + # ), f"Config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" + # # logging.error( + # # f"gandlf config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" + # # ) + # # raise + except ValidationError as exc: + print(exc.errors()) + + From 39c5b1bee01f5498c7c851a73eff12ecc6f1cab3 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 1 Feb 2025 18:22:23 +0200 Subject: [PATCH 12/88] add a temp test dir for testing purposes --- GANDLF/Configuration/__init__.py | 0 test_configuration/config_all_options.yaml | 311 +++++++++++++++++++++ test_configuration/test_configuration.py | 7 + 3 files changed, 318 insertions(+) create mode 100644 GANDLF/Configuration/__init__.py create mode 100644 test_configuration/config_all_options.yaml create mode 100644 test_configuration/test_configuration.py diff --git a/GANDLF/Configuration/__init__.py b/GANDLF/Configuration/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test_configuration/config_all_options.yaml b/test_configuration/config_all_options.yaml new file mode 100644 index 000000000..ac97cdee6 --- /dev/null +++ b/test_configuration/config_all_options.yaml @@ -0,0 +1,311 @@ +# affix version +version1: + { + minimum: 0.1.3-dev, + maximum: 0.1.3-dev # this should NOT be made a variable, but should be tested after every tag is created + } +vere2: + { + minimum: 0.1.3-dev, + maximum: 0.1.3-dev # this should NOT be made a variable, but should be tested after every tag is created + } +## Choose the model parameters here +#model: +# { +# dimension: 3, # the dimension of the model and dataset: defines dimensionality of computations +# base_filters: 30, # Set base filters: number of filters present in the initial module of the U-Net convolution; for IncU-Net, keep this divisible by 4 +# architecture: resunet, # options: unet, resunet, deep_resunet, deep_unet, light_resunet, light_unet, fcn, uinc, vgg, densenet +# norm_type: batch, # options: batch, instance, or none (only for VGG); used for all networks +# final_layer: softmax, # can be either sigmoid, softmax or none (none == regression/logits) +# # sigmoid_input_multiplier: 1.0, # this is used during sigmoid, and defaults to 1.0 +# class_list: [0,1,2,4], # Set the list of labels the model should train on and predict +# # class_list: '[*range(0,100,1)]' # a range of values from 0 to 99 with a step of 1 will be created; customize as needed, but ensure this is defined as a string as it will be passed through 'eval' function +# # class_list: '[0,1||2||3,1||4,4]', # combinatorial training - this will construct one-hot encoded mask using logical operands between specified annotations. Note that double '|' or '&' should be passed and not single to avoid python parsing +# ignore_label_validation: 0, # this is the location of the class_list whose performance is ignored during validation metric calculation +# amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False +# # num_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types from the CSV +# # save_at_every_epoch: True, # allows you to save the model at every epoch +# # print_summary: True, # prints the summary of the model before training; defaults to True +# +# ## densenet models have the following optional parameters: +# # growth_rate (int) - how many filters to add each layer (k in paper) +# # num_init_features (int) - the number of filters to learn in the first convolution layer +# # bn_size (int) - multiplicative factor for number of bottle neck layers (i.e. bn_size * k features in the bottleneck layer) +# # drop_rate (float) - dropout rate after each dense layer +# # num_classes (int) - number of classification classes +# +# ## unet_multilayer, unetr, transunet have the following optional parameter: +# # depth (int) - the number of encoder/decoder layers +# +# ## imagenet_unet has the following optional parameter: +# # pretrained (bool) - if True (default), uses the pretrained imagenet weights +# # final_layer - one of ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] +# # encoder_name (str) - the name of the encoder to use, pick from https://github.com/qubvel/segmentation_models.pytorch#encoders +# # decoder_use_batchnorm (str) - whether to use batch norm or not or inplace, this will override 'norm_type', see https://github.com/qubvel/segmentation_models.pytorch/blob/master/segmentation_models_pytorch/decoders/unet/model.py +# # decoder_attention_type (str) - the decoder attention type, see https://github.com/qubvel/segmentation_models.pytorch/blob/master/segmentation_models_pytorch/decoders/unet/model.py +# # encoder_depth (int) - the depth of the encoder, also picked up from 'depth' +# # decoder_channels (list) - a list of numbers of channels for each decoder layer, should be same length as 'encoder_depth' +# # converter_type (str) - either acs (targets ACSConv) or conv3d (targets nn.Conv3d) or soft (targets SoftACSConv with learnable weights, default); see https://doi.org/10.1109/JBHI.2021.3049452 +# # the following parameters can be used to convert the "imagenet_unet" model to a classifier/regression network; they only come into the picture when the "problem_type" is identified as not segmentation. +# # - pooling (str): One of "max", "avg"; default is "avg" +# # - dropout (float): Dropout factor in [0, 1); default is 0.2 +# } +### metrics to evaluate the validation performance +#metrics: +# - dice # segmentation +# # - hausdorff # hausdorff 100 percentile, segmentation +# # - hausdorff95 # hausdorff 95 percentile, segmentation +# # - mse # regression/classification +# # - accuracy # classification ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/accuracy.html +# # - classification_accuracy # classification +# # - balanced_accuracy # classification ## more details https://scikit-learn.org/stable/modules/generated/sklearn.metrics.balanced_accuracy_score.html +# # - per_label_accuracy # used for classification +# # - f1 # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/f1_score.html +# # - precision # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/precision.html +# # - recall # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/recall.html +# # - iou # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/jaccard_index.html +### this customizes the inference, primarily used for segmentation outputs +#inference_mechanism: { +# grid_aggregator_overlap: crop, # this option provides the option to strategize the grid aggregation output; should be either 'crop' or 'average' - https://torchio.readthedocs.io/patches/patch_inference.html#grid-aggregator +# patch_overlap: 0, # amount of overlap of patches during inference, defaults to 0; see https://torchio.readthedocs.io/patches/patch_inference.html#gridsampler +#} +## this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements +## in I/O at the expense of memory consumption +#in_memory: False +## if enabled, resize/resample operations in `data_preprocessing` will save files to disk instead of directly getting read into memory as tensors +#memory_save_mode: False +## this will save the generated masks for validation and testing data for qualitative analysis +#save_output: False +## this will save the patches used during training for qualitative analysis +#save_training: False +## Set the Modality : rad for radiology, path for histopathology +#modality: rad +### Patch size during training - 2D patch for breast images since third dimension is not patched +#patch_size: [144,144,64] +## uniform: UniformSampler or label: LabelSampler +#patch_sampler: uniform +## patch_sampler: label +## patch_sampler: +## { +## type: label, +## enable_padding: True, +## padding_mode: symmetric, # for options, see 'mode' in https://numpy.org/doc/stable/reference/generated/numpy.pad.html +## biased_sampling: True, # adds additional sampling probability of labels based on "sampling_weights" key; only gets invoked when using label sampler. If not present, gets calculated using the same mechanism as weighted_loss +## } +## If enabled, this parameter pads images and labels when label sampler is used +#enable_padding: False +## Number of epochs +#num_epochs: 100 +## Set the patience - measured in number of epochs after which, if the performance metric does not improve, exit the training loop - defaults to the number of epochs +#patience: 50 +## Set the batch size +#batch_size: 1 +## gradient clip : norm, value, agc +#clip_mode: norm +## clip_gradient value +#clip_grad: 0.1 +### Set the initial learning rate +#learning_rate: 0.001 +## Learning rate scheduler - options:"triangle", "triangle_modified", "exp", "step", "reduce-on-plateau", "cosineannealing", "triangular", "triangular2", "exp_range" +## triangle/triangle_modified use LambdaLR but triangular/triangular2/exp_range uses CyclicLR +#scheduler: +# { +# type: triangle, +# min_lr: 0.00001, +# max_lr: 1, +# } +## Set which loss function you want to use - options : 'dc' - for dice only, 'dcce' - for sum of dice and CE and you can guess the next (only lower-case please) +## options: dc (dice only), dc_log (-log of dice), ce (), dcce (sum of dice and ce), focal/dc_focal, mcc/mcc_log, mse () ... +## mse is the MSE defined by torch and can define a variable 'reduction'; see https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html#torch.nn.MSELoss +## focal is the focal loss and can define 2 variables: gamma and size_average +## use mse_torch for regression/classification problems and dice for segmentation +#loss_function: dc +## this parameter weights the loss to handle imbalanced losses better +#weighted_loss: True # generates new keys "class_weights" and "penalty_weights" that handle the aggregate weights of the class and penalties per label, respectively +##loss_function: +## { +## 'mse':{ +## 'reduction': 'mean' # see https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html#torch.nn.MSELoss for all options +## } +## } +##loss_function: +## { +## 'focal':{ +## 'gamma': 1.0 +## } +## } +## Which optimizer do you want to use - sgd, asgd, adam, adamw, adamax, sparseadam, rprop, adadelta, adagrad, rmsprop, +## each has their own options and functionalities, which are initialized with defaults, see GANDLF.optimizers.wrap_torch for details +#optimizer: adam +### this parameter controls the nested training process +## performs randomized k-fold cross-validation, see https://en.wikipedia.org/wiki/Cross-validation_(statistics) for details +## split is performed using sklearn's KFold method: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html +## for train on a single fold, use '-' before the fold number to make the number of folds "negative" -- NOT recommended +#nested_training: +# { +# stratified: False, # this will perform stratified k-fold cross-validation but only with offline data splitting, see https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html +# testing: 5, # this controls the number of testing data folds for final model evaluation; [NOT recommended] to disable this, use '1' +# validation: 5 # this controls the number of validation data folds to be used for model *selection* during training (not used for back-propagation) +# } +### pre-processing +## this constructs an order of transformations, which is applied to all images in the data loader +## order: all_methods_as_specified_in_dict --> normalize [normalization methods always applied at the end] +## 'to_canonical': change the image to canonical orientation, see https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.ToCanonical +## 'rgba2rgb': convert images from rgba to rgb +## 'threshold': performs intensity thresholding; i.e., if x[i] < min: x[i] = 0; and if x[i] > max: x[i] = 0 +## 'clip': performs intensity clipping; i.e., if x[i] < min: x[i] = min; and if x[i] > max: x[i] = max +## 'threshold'/'clip': if either min/max is not defined, it is taken as the minimum/maximum of the image, respectively +## 'normalize': performs z-score normalization: https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.ZNormalization +## 'normalize_positive':perform z-score normalize but with mean and std-dev calculated on only pixels > 0 +## 'normalize_nonZero': perform z-score normalize but with mean and std-dev calculated on only non-zero pixels +## 'normalize_nonZero_masked': perform z-score normalize but with mean and std-dev calculated on only non-zero pixels with the stats applied on non-zero pixels +## 'crop_external_zero_planes': crops all non-zero planes from input tensor to reduce image search space +## 'resample: resolution: X,Y,Z': resample the voxel resolution: https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.Resample +## 'resample: resolution: X': resample the voxel resolution in an isotropic manner: https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.Resample +## resize the image(s) and mask (this should be greater than or equal to patch_size); resize is done ONLY when resample is not defined -- WARNING: resizing images on the fly ensures that images get loaded in memory, which dramatically increases RAM usage +## 'resize_image' resizes the image and mask BEFORE applying any another operation +## 'resize_patch' resizes the image and mask AFTER extracting the patch +#data_preprocessing: +# { +# # 'histogram_matching':{ +# # 'target': '/path/to/target/image.nii.gz', # this is the target image to which the histogram of the current image is matched, if this not defined, histogram equalization is performed on the entire image with an equal ramp of [-1,1] +# # 'num_hist_level': 1024, # number of histogram levels +# # 'num_match_points': 16, # number of matching points for histogram matching +# # }, +# # 'histogram_equalization':{ # this performs global histogram equalization using the same logic as 'histogram_matching', just without the target +# # 'num_hist_level': 1024, # number of histogram levels +# # 'num_match_points': 16, # number of matching points for histogram matching +# # }, +# # 'adaptive_histogram_equalization', # this performs Power Law Adaptive Histogram Equalization using https://simpleitk.org/doxygen/latest/html/classitk_1_1simple_1_1AdaptiveHistogramEqualizationImageFilter.html +# 'threshold':{ +# 'min': 10, +# 'max': 75 +# }, +# # 'clip':{ +# # 'min': 10, +# # 'max': 75 +# # }, +# 'normalize', +# # 'normalize_positive', # this performs z-score normalization only on pixels > 0 +# # 'normalize_nonZero', # this performs z-score normalization only on non-zero pixels +# # 'normalize_nonZero_masked', # this performs z-score normalization only on masked region +# 'resample':{ +# 'resolution': [1,2,3] +# }, +# 'resample_min':{ +# 'resolution': 1, # this will be the maximum spacing (translates to minium resolution) across all axes +# }, +# #'resize_image': [128,128], # this is generally not recommended, as it changes image properties in unexpected ways +# #'resize_patch': [128,128], # this is generally not recommended, as it changes image properties in unexpected ways +# 'crop_external_zero_planes', # this will crop all zero-valued planes across all axes +# 'crop': [64,64,64], # this will crop the image by removing specified number of pixels; see https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.Crop +# 'centercrop': [64,64,64], # this will crop the image to the specified size from the center of image; see https://torchio.readthedocs.io/transforms/preprocessing.html#croporpad +# ## histogram matching algorithms +# # 'histogram_matching':{ +# # 'target': '/path/to/template.nii.gz', # if this is absent, global histogram equalization takes place +# # # 'target': 'adaptive', # this will perform adaptive histogram matching using https://simpleitk.org/doxygen/latest/html/classitk_1_1simple_1_1AdaptiveHistogramEqualizationImageFilter.html +# # }, +# ## stain normalization algorithms +# # 'stain_normalization':{ +# # 'target': '/path/to/target.png', # this is required +# # 'extractor': 'vahadane', # can be either vahadane, ruifrok or macenko; defaults to ruifrok +# # } +# ## rescale image +# # 'rescale':{ +# # 'in_min_max': [15,125], # desired output intensity range, defaults to min/max of image +# # 'out_min_max': [0,1], # desired output intensity range, defaults to [0,1] +# # 'percentiles': [5,95], # percentile values of the input image that will be mapped to the output range, defaults to [0,100] +# # } +# } +### various data augmentation techniques +## options: affine, elastic, downsample, motion, kspace, bias, blur, gaussianNoise, swap +## keep/edit as needed +## all transforms: https://torchio.readthedocs.io/transforms/transforms.html +## 'kspace': one of ghosting or spiking is picked (randomly) for augmentation +## 'probability' sub-parameter adds the probability of the particular augmentation getting added during training (this is always 1 for normalize and resampling) +#data_augmentation: +# { +# default_probability: 1.0, # keeping probability 1.0 to ensure that all augmentations are applied +# 'affine':{ # for options, see https://torchio.readthedocs.io/transforms/augmentation.html#randomaffine +# 'scales': [0.5, 1.5], +# 'degrees': 25, +# 'translation': 2, +# }, +# 'elastic': # for options, see https://torchio.readthedocs.io/transforms/augmentation.html#randomelasticdeformation +# { +# 'num_control_points': 7, +# 'max_displacement': 0.1, +# 'locked_borders': 2, +# }, +# 'kspace':{ +# 'probability': 1 +# }, +# 'motion':{ +# 'probability': 1 +# }, +# 'bias', +# blur, # this is a gaussian blur, and can take 'std' as a sub-parameter, however, the default 'std' is [0, 0.015 * std(image)] +# ## example of blur with specific std range +# # 'blur': { +# # 'std': [0, 1] # example std-dev range, for details, see https://torchio.readthedocs.io/transforms/augmentation.html#torchio.transforms.RandomBlur +# # }, +# 'noise': { # for details, see https://torchio.readthedocs.io/transforms/augmentation.html#torchio.transforms.RandomNoise +# 'mean': 0, # default mean +# 'std': [0, 1] # default std-dev range +# }, +# noise_var, # this is a random noise, and can take 'std' and 'mean' as a sub-parameter, however, the default 'std' is [0, 0.015 * std(image)] +# 'gamma', +# 'swap':{ +# 'patch_size': 15, # patch size for swapping; if a single number if provided, the same number is used for all axes +# 'num_iterations': 50, # number of times that two patches will be swapped, defaults to 100 +# }, +# 'flip':{ +# 'axis': [0,1,2] # one or more axes can be put here. if this isn't defined, all axes are considered +# }, +# 'anisotropic':{ +# 'axis': [0,1], +# 'downsampling': [2,2.5] +# }, +# 'rotate_90':{ # explicitly rotate image by 90 +# 'axis': [0,2] # one or more axes can be put here. if this isn't defined, all axes are considered +# }, +# 'rotate_180', # explicitly rotate image by 180; if 'axis' isn't defined, default is [1,2,3] +# 'colorjitter':{ # this is used to apply the ColorJitter transform form torch - only used for rgb images +# 'brightness': [0,1], # optional: needs to be between [0,1] +# 'contrast': [0,0.75], # optional: needs to be between [0,1] +# 'saturation': [0,0.5], # optional: needs to be between [0,1] +# 'hue': [-0.25,0.25], # optional: needs to be between [-0.5,0.5] for range and [0,1] for a single value +# }, +# 'hed_transform':{ +# 'haematoxylin_bias_range': [-0.1, 0.1], +# 'eosin_bias_range': [-0.1, 0.1], +# 'dab_bias_range': [-0.1, 0.1], +# 'haematoxylin_sigma_range': [-0.1, 0.1], +# 'eosin_sigma_range': [-0.1, 0.1], +# 'dab_sigma_range': [-0.1, 0.1], +# 'cutoff_range': [0.01, 0.99], +# } +# } +## ## post-processing steps - only applied before output labels are saved +## data_postprocessing: +## { +## 'fill_holes', # this will fill holes in the image +## 'mapping': {0: 0, 1: 1, 2: 4}, # this will map the labels to a new set of labels, useful to convert labels from combinatorial training (i.e., combined segmentation labels) +## } +### parallel training on HPC - here goes the command to prepend to send to a high performance computing +## cluster for parallel computing during multi-fold training +## not used for single fold training +## this gets passed before the training_loop, so ensure enough memory is provided along with other parameters +## that your HPC would expect +## ${outputDir} will be changed to the outputDir you pass in CLI + '/${fold_number}' +## ensure that the correct location of the virtual environment is getting invoked, otherwise it would pick up the system python, which might not have all dependencies +## parallel_compute_command: 'qsub -b y -l gpu -l h_vmem=32G -cwd -o ${outputDir}/\$JOB_ID.stdout -e ${outputDir}/\$JOB_ID.stderr `pwd`/sge_wrapper _correct_location_of_virtual_environment_/venv/bin/python' +### queue configuration - https://torchio.readthedocs.io/data/patch_training.html?#queue +## this determines the maximum number of patches that can be stored in the queue. Using a large number means that the queue needs to be filled less often, but more CPU memory is needed to store the patches +#q_max_length: 40 +## this determines the number of patches to extract from each volume. A small number of patches ensures a large variability in the queue, but training will be slower +#q_samples_per_volume: 5 +## this determines the number subprocesses to use for data loading; '0' means main process is used +#q_num_workers: 2 # scale this according to available CPU resources +## used for debugging +#q_verbose: False diff --git a/test_configuration/test_configuration.py b/test_configuration/test_configuration.py new file mode 100644 index 000000000..1611e6cd0 --- /dev/null +++ b/test_configuration/test_configuration.py @@ -0,0 +1,7 @@ +from GANDLF.config_manager import ConfigManager +from pathlib import Path +if __name__ == "__main__": + testingDir = Path(__file__).parent.absolute().__str__() + parameters = ConfigManager(testingDir+"/config_all_options.yaml",version_check_flag=False) + print(parameters) + From adc2ebc80511f3b1cc0e58fed431269d12b18295 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 1 Feb 2025 18:24:30 +0200 Subject: [PATCH 13/88] fix spelling errors --- GANDLF/Configuration/user_defined_parameters.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index dbdc58125..d35f7a974 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -2,10 +2,6 @@ from GANDLF.config_manager import version_check from importlib.metadata import version -#TODO: Define the paramaters from the config_manager - -#TODO: Some parameters maybe can define as a seperated Model - class Version(BaseModel): minimum: str maximum: str From 24de26a42911c64941a90d177db63abe8d471d10 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 1 Feb 2025 18:25:29 +0200 Subject: [PATCH 14/88] blacked . --- GANDLF/Configuration/parameters.py | 6 ++++-- GANDLF/Configuration/user_defined_parameters.py | 8 +++++--- GANDLF/config_manager.py | 3 --- setup.py | 4 ++-- test_configuration/test_configuration.py | 6 ++++-- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/GANDLF/Configuration/parameters.py b/GANDLF/Configuration/parameters.py index a762371be..8dbd73b91 100644 --- a/GANDLF/Configuration/parameters.py +++ b/GANDLF/Configuration/parameters.py @@ -2,8 +2,10 @@ from GANDLF.Configuration.default_parameters import DefaultParameters from GANDLF.Configuration.user_defined_parameters import UserDefinedParameters + class ParametersConfiguration(BaseModel): model_config = ConfigDict() -class Parameters(ParametersConfiguration,DefaultParameters,UserDefinedParameters): - pass \ No newline at end of file + +class Parameters(ParametersConfiguration, DefaultParameters, UserDefinedParameters): + pass diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index d35f7a974..25851bb3a 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -1,16 +1,18 @@ -from pydantic import BaseModel,field_validator -from GANDLF.config_manager import version_check +from pydantic import BaseModel, field_validator +from GANDLF.config_manager import version_check from importlib.metadata import version + class Version(BaseModel): minimum: str maximum: str + class UserDefinedParameters(BaseModel): version: Version @classmethod - @field_validator('version', mode='after') + @field_validator("version", mode="after") def validate_version(cls, values: Version) -> Version: if version_check(values.model_dump(), version_to_check=version("GANDLF")): return values diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index ded27870e..8992299fe 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -734,7 +734,6 @@ def _parseConfig( # if initialize_inference_mechanism: # params["inference_mechanism"] = inference_mechanism - return params @@ -767,5 +766,3 @@ def ConfigManager( # # raise except ValidationError as exc: print(exc.errors()) - - diff --git a/setup.py b/setup.py index f19e421e5..d7ccf4876 100644 --- a/setup.py +++ b/setup.py @@ -87,8 +87,8 @@ "openslide-bin", "openslide-python==1.4.1", "lion-pytorch==0.2.2", - "pydantic" - ] + "pydantic", +] if __name__ == "__main__": setup( diff --git a/test_configuration/test_configuration.py b/test_configuration/test_configuration.py index 1611e6cd0..8e0e75a63 100644 --- a/test_configuration/test_configuration.py +++ b/test_configuration/test_configuration.py @@ -1,7 +1,9 @@ from GANDLF.config_manager import ConfigManager from pathlib import Path + if __name__ == "__main__": testingDir = Path(__file__).parent.absolute().__str__() - parameters = ConfigManager(testingDir+"/config_all_options.yaml",version_check_flag=False) + parameters = ConfigManager( + testingDir + "/config_all_options.yaml", version_check_flag=False + ) print(parameters) - From 2ddad48b807021386a367a662a2a3c86349c39c0 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 3 Feb 2025 21:47:17 +0200 Subject: [PATCH 15/88] refactor user_defined_parameters.py --- .../Configuration/user_defined_parameters.py | 31 ++++++++++++++----- test_configuration/config_all_options.yaml | 9 ++---- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index 25851bb3a..a3d5cb528 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -1,18 +1,33 @@ -from pydantic import BaseModel, field_validator +from typing import Union +from pydantic import BaseModel, model_validator, Field from GANDLF.config_manager import version_check from importlib.metadata import version +from typing_extensions import Self class Version(BaseModel): minimum: str maximum: str + @model_validator(mode="after") + def validate_version(self) -> Self: + if version_check(self.model_dump(), version_to_check=version("GANDLF")): + return self -class UserDefinedParameters(BaseModel): - version: Version - @classmethod - @field_validator("version", mode="after") - def validate_version(cls, values: Version) -> Version: - if version_check(values.model_dump(), version_to_check=version("GANDLF")): - return values +class PatchSize(BaseModel): + patch_size: list[Union[int, float]] + + @model_validator(mode="after") + def validate_patch_size(self) -> Self: + if isinstance(self.patch_size, int) or isinstance(self.patch_size, float): + self.patch_size = [self.patch_size] + return self + + +class UserDefinedParameters(BaseModel): + version: Version = Field( + default=Version(minimum=version("GANDLF"), maximum=version("GANDLF")), + description="Whether weighted loss is to be used or not.", + ) + patch_size: PatchSize = Field(..., description="Patch size.") diff --git a/test_configuration/config_all_options.yaml b/test_configuration/config_all_options.yaml index ac97cdee6..dd4a8473e 100644 --- a/test_configuration/config_all_options.yaml +++ b/test_configuration/config_all_options.yaml @@ -1,14 +1,11 @@ # affix version -version1: - { - minimum: 0.1.3-dev, - maximum: 0.1.3-dev # this should NOT be made a variable, but should be tested after every tag is created - } -vere2: +version: { minimum: 0.1.3-dev, maximum: 0.1.3-dev # this should NOT be made a variable, but should be tested after every tag is created } +weighted_loss: True +patch_size: 1 ## Choose the model parameters here #model: # { From 2b2fc034e560e0968223b3a9522da0d6b58e5821 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 4 Feb 2025 00:40:53 +0200 Subject: [PATCH 16/88] add patch_size parameter and validation --- .../Configuration/user_defined_parameters.py | 39 ++++++++++++++----- GANDLF/config_manager.py | 2 +- test_configuration/config_all_options.yaml | 6 ++- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index a3d5cb528..57a5a6004 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -1,5 +1,5 @@ from typing import Union -from pydantic import BaseModel, model_validator, Field +from pydantic import BaseModel, model_validator, Field, field_validator from GANDLF.config_manager import version_check from importlib.metadata import version from typing_extensions import Self @@ -15,14 +15,8 @@ def validate_version(self) -> Self: return self -class PatchSize(BaseModel): - patch_size: list[Union[int, float]] - - @model_validator(mode="after") - def validate_patch_size(self) -> Self: - if isinstance(self.patch_size, int) or isinstance(self.patch_size, float): - self.patch_size = [self.patch_size] - return self +class Model(BaseModel): + dimension: Union[int, None] = Field(description="Dimension.", default=None) class UserDefinedParameters(BaseModel): @@ -30,4 +24,29 @@ class UserDefinedParameters(BaseModel): default=Version(minimum=version("GANDLF"), maximum=version("GANDLF")), description="Whether weighted loss is to be used or not.", ) - patch_size: PatchSize = Field(..., description="Patch size.") + patch_size: Union[list[Union[int, float]], int, float] = Field( + ..., description="Patch size." + ) + model: Model = Field(..., description="Model.") + + # Validators + @model_validator(mode="after") + def validate_patch_size(self) -> Self: + # Validation for patch_size + if isinstance(self.patch_size, int) or isinstance(self.patch_size, float): + self.patch_size = [self.patch_size] + if len(self.patch_size) == 1 and self.model.dimension is not None: + actual_patch_size = [] + for _ in range(self.model.dimension): + actual_patch_size.append(self.patch_size[0]) + self.patch_size = actual_patch_size + if len(self.patch_size) == 2: # 2d check + # ensuring same size during torchio processing + self.patch_size.append(1) + if self.model.dimension is None: + self.model.dimension = 2 + elif len(self.patch_size) == 3: # 2d check + if self.model.dimension is None: + self.model.dimension = 3 + + return self diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index 8992299fe..e54ee6dd4 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -149,7 +149,7 @@ def _parseConfig( # for _ in range(params["model"]["dimension"]): # actual_patch_size.append(params["patch_size"][0]) # params["patch_size"] = actual_patch_size - # + # # # # parse patch size as needed for computations # if len(params["patch_size"]) == 2: # 2d check # # ensuring same size during torchio processing diff --git a/test_configuration/config_all_options.yaml b/test_configuration/config_all_options.yaml index dd4a8473e..c0af2091a 100644 --- a/test_configuration/config_all_options.yaml +++ b/test_configuration/config_all_options.yaml @@ -5,7 +5,11 @@ version: maximum: 0.1.3-dev # this should NOT be made a variable, but should be tested after every tag is created } weighted_loss: True -patch_size: 1 +patch_size: [1,2,3] +model: + { + + } ## Choose the model parameters here #model: # { From 8c8a296fbf97e6d10d8ff9e124a6f77ecfd924ad Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 4 Feb 2025 17:19:42 +0200 Subject: [PATCH 17/88] add parameter --- GANDLF/Configuration/user_defined_parameters.py | 3 ++- test_configuration/config_all_options.yaml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index 57a5a6004..7df0b5a5a 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -2,7 +2,7 @@ from pydantic import BaseModel, model_validator, Field, field_validator from GANDLF.config_manager import version_check from importlib.metadata import version -from typing_extensions import Self +from typing_extensions import Self, Literal class Version(BaseModel): @@ -28,6 +28,7 @@ class UserDefinedParameters(BaseModel): ..., description="Patch size." ) model: Model = Field(..., description="Model.") + modality: Literal["rad","histo","path"] = Field(..., description="Modality.") # Validators @model_validator(mode="after") diff --git a/test_configuration/config_all_options.yaml b/test_configuration/config_all_options.yaml index c0af2091a..3b596eb18 100644 --- a/test_configuration/config_all_options.yaml +++ b/test_configuration/config_all_options.yaml @@ -6,6 +6,7 @@ version: } weighted_loss: True patch_size: [1,2,3] +modality: "" model: { From 99f1158a75534399c487001563ea73e232fa36fa Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 4 Feb 2025 17:50:51 +0200 Subject: [PATCH 18/88] add parameter loss_function --- .../Configuration/user_defined_parameters.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index 7df0b5a5a..864fe97b9 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -29,6 +29,7 @@ class UserDefinedParameters(BaseModel): ) model: Model = Field(..., description="Model.") modality: Literal["rad","histo","path"] = Field(..., description="Modality.") + loss_function: Union[dict,str] = Field(..., description="Loss function.") # Validators @model_validator(mode="after") @@ -49,5 +50,28 @@ def validate_patch_size(self) -> Self: elif len(self.patch_size) == 3: # 2d check if self.model.dimension is None: self.model.dimension = 3 + #Loss_function + if isinstance(self.loss_function, dict): # if this is a dict + if len(self.loss_function) > 0: # only proceed if something is defined + for key in self.loss_function: # iterate through all keys + if key == "mse": + if (self.loss_function[key] is None) or not ( + "reduction" in self.loss_function[key] + ): + self.loss_function[key] = {} + self.loss_function[key]["reduction"] = "mean" + else: + # use simple string for other functions - can be extended with parameters, if needed + self.loss_function = key + else: + if self.loss_function == "focal": + self.loss_function = {"focal": {}} + self.loss_function["focal"]["gamma"] = 2.0 + self.loss_function["focal"]["size_average"] = True + elif self.loss_function == "mse": + self.loss_function = {"mse": {}} + self.loss_function["mse"]["reduction"] = "mean" + + return self From 5c2731a8ba076009d50788e6de4f83e301f922e8 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 4 Feb 2025 22:26:01 +0200 Subject: [PATCH 19/88] refactor the code --- .../Configuration/user_defined_parameters.py | 73 +- GANDLF/Configuration/utils.py | 30 +- GANDLF/Configuration/validators.py | 118 ++++ GANDLF/config_manager.py | 628 +++++++----------- test_configuration/config_all_options.yaml | 12 +- 5 files changed, 429 insertions(+), 432 deletions(-) create mode 100644 GANDLF/Configuration/validators.py diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index 864fe97b9..5fcc739d4 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -1,8 +1,16 @@ from typing import Union -from pydantic import BaseModel, model_validator, Field, field_validator +from pydantic import ( + BaseModel, + model_validator, + Field, + field_validator, + AfterValidator, + BeforeValidator, +) from GANDLF.config_manager import version_check from importlib.metadata import version -from typing_extensions import Self, Literal +from typing_extensions import Self, Literal, Annotated +from GANDLF.Configuration.validators import * class Version(BaseModel): @@ -25,53 +33,22 @@ class UserDefinedParameters(BaseModel): description="Whether weighted loss is to be used or not.", ) patch_size: Union[list[Union[int, float]], int, float] = Field( - ..., description="Patch size." + description="Patch size." ) - model: Model = Field(..., description="Model.") - modality: Literal["rad","histo","path"] = Field(..., description="Modality.") - loss_function: Union[dict,str] = Field(..., description="Loss function.") + model: Model = Field(description="Model.") + modality: Literal["rad", "histo", "path"] = Field(description="Modality.") + loss_function: Annotated[ + Union[dict, str], + Field(description="Loss function."), + AfterValidator(validate_loss_function), + ] + metrics: Annotated[ + Union[dict, list[str]], + Field(description="Metrics."), + AfterValidator(validate_metrics), + ] # Validators @model_validator(mode="after") - def validate_patch_size(self) -> Self: - # Validation for patch_size - if isinstance(self.patch_size, int) or isinstance(self.patch_size, float): - self.patch_size = [self.patch_size] - if len(self.patch_size) == 1 and self.model.dimension is not None: - actual_patch_size = [] - for _ in range(self.model.dimension): - actual_patch_size.append(self.patch_size[0]) - self.patch_size = actual_patch_size - if len(self.patch_size) == 2: # 2d check - # ensuring same size during torchio processing - self.patch_size.append(1) - if self.model.dimension is None: - self.model.dimension = 2 - elif len(self.patch_size) == 3: # 2d check - if self.model.dimension is None: - self.model.dimension = 3 - #Loss_function - if isinstance(self.loss_function, dict): # if this is a dict - if len(self.loss_function) > 0: # only proceed if something is defined - for key in self.loss_function: # iterate through all keys - if key == "mse": - if (self.loss_function[key] is None) or not ( - "reduction" in self.loss_function[key] - ): - self.loss_function[key] = {} - self.loss_function[key]["reduction"] = "mean" - else: - # use simple string for other functions - can be extended with parameters, if needed - self.loss_function = key - else: - if self.loss_function == "focal": - self.loss_function = {"focal": {}} - self.loss_function["focal"]["gamma"] = 2.0 - self.loss_function["focal"]["size_average"] = True - elif self.loss_function == "mse": - self.loss_function = {"mse": {}} - self.loss_function["mse"]["reduction"] = "mean" - - - - return self + def validate(self) -> Self: + return validate_patch(self) # check if it is the right approach diff --git a/GANDLF/Configuration/utils.py b/GANDLF/Configuration/utils.py index 439f19fd0..784516c86 100644 --- a/GANDLF/Configuration/utils.py +++ b/GANDLF/Configuration/utils.py @@ -1,4 +1,4 @@ -from typing import Type +from typing import Type, Optional, Union from pydantic import BaseModel from typing import Type @@ -37,3 +37,31 @@ def generate_and_save_markdown(model: Type[BaseModel], file_path: str) -> None: # Write to file with open(file_path, "w", encoding="utf-8") as file: file.write("\n".join(markdown)) + + +def initialize_key( + parameters: dict, key: str, value: Optional[Union[str, float, list, dict]] = None +) -> dict: + """ + This function initializes a key in the parameters dictionary to a value if it is absent. + + Args: + parameters (dict): The parameter dictionary. + key (str): The key to initialize. + value (Optional[Union[str, float, list, dict]], optional): The value to initialize. Defaults to None. + + Returns: + dict: The parameter dictionary. + """ + if parameters is None: + parameters = {} + if key in parameters: + if parameters[key] is not None: + if isinstance(parameters[key], dict): + # if key is present but not defined + if len(parameters[key]) == 0: + parameters[key] = value + else: + parameters[key] = value # if key is absent + + return parameters diff --git a/GANDLF/Configuration/validators.py b/GANDLF/Configuration/validators.py new file mode 100644 index 000000000..666fa62f0 --- /dev/null +++ b/GANDLF/Configuration/validators.py @@ -0,0 +1,118 @@ + +from GANDLF.Configuration.utils import initialize_key +from GANDLF.metrics import surface_distance_ids + + +def validate_loss_function(value) -> dict: + if isinstance(value, dict): # if this is a dict + if len(value) > 0: # only proceed if something is defined + for key in value: # iterate through all keys + if key == "mse": + if (value[key] is None) or not ("reduction" in value[key]): + value[key] = {} + value[key]["reduction"] = "mean" + else: + # use simple string for other functions - can be extended with parameters, if needed + value = key + else: + if value == "focal": + value = {"focal": {}} + value["focal"]["gamma"] = 2.0 + value["focal"]["size_average"] = True + elif value == "mse": + value = {"mse": {}} + value["mse"]["reduction"] = "mean" + + return value + + +def validate_metrics(value) -> dict: + if not isinstance(value, dict): + temp_dict = {} + else: + temp_dict = value + + # initialize metrics dict + for metric in value: + # assigning a new variable because some metrics can be dicts, and we want to get the first key + comparison_string = metric + if isinstance(metric, dict): + comparison_string = list(metric.keys())[0] + # these metrics always need to be dicts + if comparison_string in [ + "accuracy", + "f1", + "precision", + "recall", + "specificity", + "iou", + ]: + if not isinstance(metric, dict): + temp_dict[metric] = {} + else: + temp_dict[comparison_string] = metric + elif not isinstance(metric, dict): + temp_dict[metric] = None + + # special case for accuracy, precision, recall, and specificity; which could be dicts + ## need to find a better way to do this + if any( + _ in comparison_string + for _ in ["precision", "recall", "specificity", "accuracy", "f1"] + ): + if comparison_string != "classification_accuracy": + temp_dict[comparison_string] = initialize_key( + temp_dict[comparison_string], "average", "weighted" + ) + temp_dict[comparison_string] = initialize_key( + temp_dict[comparison_string], "multi_class", True + ) + temp_dict[comparison_string] = initialize_key( + temp_dict[comparison_string], "mdmc_average", "samplewise" + ) + temp_dict[comparison_string] = initialize_key( + temp_dict[comparison_string], "threshold", 0.5 + ) + if comparison_string == "accuracy": + temp_dict[comparison_string] = initialize_key( + temp_dict[comparison_string], "subset_accuracy", False + ) + elif "iou" in comparison_string: + temp_dict["iou"] = initialize_key( + temp_dict["iou"], "reduction", "elementwise_mean" + ) + temp_dict["iou"] = initialize_key(temp_dict["iou"], "threshold", 0.5) + elif comparison_string in surface_distance_ids: + temp_dict[comparison_string] = initialize_key( + temp_dict[comparison_string], "connectivity", 1 + ) + temp_dict[comparison_string] = initialize_key( + temp_dict[comparison_string], "threshold", None + ) + + value = temp_dict + return value + + +def validate_an_example(value, patch) -> dict: + return value + + +def validate_patch(self): + if isinstance(self.patch_size, int) or isinstance(self.patch_size, float): + self.patch_size = [self.patch_size] + if len(self.patch_size) == 1 and self.model.dimension is not None: + actual_patch_size = [] + for _ in range(self.model.dimension): + actual_patch_size.append(self.patch_size[0]) + self.patch_size = actual_patch_size + if len(self.patch_size) == 2: # 2d check + # ensuring same size during torchio processing + self.patch_size.append(1) + if self.model.dimension is None: + self.model.dimension = 2 + elif len(self.patch_size) == 3: # 2d check + if self.model.dimension is None: + self.model.dimension = 3 + + return self diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index e54ee6dd4..461c3f1c0 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -131,35 +131,6 @@ def _parseConfig( if not isinstance(config_file_path, dict): params = yaml.safe_load(open(config_file_path, "r")) - # if version_check_flag: # this is only to be used for testing - # assert ( - # "version" in params - # ), "The 'version' key needs to be defined in config with 'minimum' and 'maximum' fields to determine the compatibility of configuration with code base" - # version_check(params["version"], version_to_check=version("GANDLF")) - - # if "patch_size" in params: - # # duplicate patch size if it is an int or float - # if isinstance(params["patch_size"], int) or isinstance( - # params["patch_size"], float - # ): - # params["patch_size"] = [params["patch_size"]] - # # in case someone decides to pass a single value list - # if len(params["patch_size"]) == 1: - # actual_patch_size = [] - # for _ in range(params["model"]["dimension"]): - # actual_patch_size.append(params["patch_size"][0]) - # params["patch_size"] = actual_patch_size - # # - # # parse patch size as needed for computations - # if len(params["patch_size"]) == 2: # 2d check - # # ensuring same size during torchio processing - # params["patch_size"].append(1) - # if "dimension" not in params["model"]: - # params["model"]["dimension"] = 2 - # elif len(params["patch_size"]) == 3: # 2d check - # if "dimension" not in params["model"]: - # params["model"]["dimension"] = 3 - # assert "patch_size" in params, "Patch size needs to be defined in the config file" # # if "resize" in params: # print( @@ -167,282 +138,179 @@ def _parseConfig( # file=sys.stderr, # ) # - # assert "modality" in params, "'modality' needs to be defined in the config file" - # params["modality"] = params["modality"].lower() - # assert params["modality"] in [ - # "rad", - # "histo", - # "path", - # ], "Modality should be either 'rad' or 'path'" - # - # assert ( - # "loss_function" in params - # ), "'loss_function' needs to be defined in the config file" - # if "loss_function" in params: - # # check if user has passed a dict - # if isinstance(params["loss_function"], dict): # if this is a dict - # if len(params["loss_function"]) > 0: # only proceed if something is defined - # for key in params["loss_function"]: # iterate through all keys - # if key == "mse": - # if (params["loss_function"][key] is None) or not ( - # "reduction" in params["loss_function"][key] - # ): - # params["loss_function"][key] = {} - # params["loss_function"][key]["reduction"] = "mean" - # else: - # # use simple string for other functions - can be extended with parameters, if needed - # params["loss_function"] = key - # else: - # # check if user has passed a single string - # if params["loss_function"] == "mse": - # params["loss_function"] = {} - # params["loss_function"]["mse"] = {} - # params["loss_function"]["mse"]["reduction"] = "mean" - # elif params["loss_function"] == "focal": - # params["loss_function"] = {} - # params["loss_function"]["focal"] = {} - # params["loss_function"]["focal"]["gamma"] = 2.0 - # params["loss_function"]["focal"]["size_average"] = True - # - # assert "metrics" in params, "'metrics' needs to be defined in the config file" - # if "metrics" in params: - # if not isinstance(params["metrics"], dict): - # temp_dict = {} - # else: - # temp_dict = params["metrics"] - # - # # initialize metrics dict - # for metric in params["metrics"]: - # # assigning a new variable because some metrics can be dicts, and we want to get the first key - # comparison_string = metric - # if isinstance(metric, dict): - # comparison_string = list(metric.keys())[0] - # # these metrics always need to be dicts - # if comparison_string in [ - # "accuracy", - # "f1", - # "precision", - # "recall", - # "specificity", - # "iou", - # ]: - # if not isinstance(metric, dict): - # temp_dict[metric] = {} - # else: - # temp_dict[comparison_string] = metric - # elif not isinstance(metric, dict): - # temp_dict[metric] = None - # - # # special case for accuracy, precision, recall, and specificity; which could be dicts - # ## need to find a better way to do this - # if any( - # _ in comparison_string - # for _ in ["precision", "recall", "specificity", "accuracy", "f1"] - # ): - # if comparison_string != "classification_accuracy": - # temp_dict[comparison_string] = initialize_key( - # temp_dict[comparison_string], "average", "weighted" - # ) - # temp_dict[comparison_string] = initialize_key( - # temp_dict[comparison_string], "multi_class", True - # ) - # temp_dict[comparison_string] = initialize_key( - # temp_dict[comparison_string], "mdmc_average", "samplewise" - # ) - # temp_dict[comparison_string] = initialize_key( - # temp_dict[comparison_string], "threshold", 0.5 - # ) - # if comparison_string == "accuracy": - # temp_dict[comparison_string] = initialize_key( - # temp_dict[comparison_string], "subset_accuracy", False - # ) - # elif "iou" in comparison_string: - # temp_dict["iou"] = initialize_key( - # temp_dict["iou"], "reduction", "elementwise_mean" - # ) - # temp_dict["iou"] = initialize_key(temp_dict["iou"], "threshold", 0.5) - # elif comparison_string in surface_distance_ids: - # temp_dict[comparison_string] = initialize_key( - # temp_dict[comparison_string], "connectivity", 1 - # ) - # temp_dict[comparison_string] = initialize_key( - # temp_dict[comparison_string], "threshold", None - # ) - # - # params["metrics"] = temp_dict - # - # # this is NOT a required parameter - a user should be able to train with NO augmentations - # params = initialize_key(params, "data_augmentation", {}) - # # for all others, ensure probability is present - # params["data_augmentation"]["default_probability"] = params[ - # "data_augmentation" - # ].get("default_probability", 0.5) - # - # if not (params["data_augmentation"] is None): - # if len(params["data_augmentation"]) > 0: # only when augmentations are defined - # # special case for random swapping and elastic transformations - which takes a patch size for computation - # for key in ["swap", "elastic"]: - # if key in params["data_augmentation"]: - # params["data_augmentation"][key] = initialize_key( - # params["data_augmentation"][key], - # "patch_size", - # np.round(np.array(params["patch_size"]) / 10) - # .astype("int") - # .tolist(), - # ) - # - # # special case for swap default initialization - # if "swap" in params["data_augmentation"]: - # params["data_augmentation"]["swap"] = initialize_key( - # params["data_augmentation"]["swap"], "num_iterations", 100 - # ) - # - # # special case for affine default initialization - # if "affine" in params["data_augmentation"]: - # params["data_augmentation"]["affine"] = initialize_key( - # params["data_augmentation"]["affine"], "scales", 0.1 - # ) - # params["data_augmentation"]["affine"] = initialize_key( - # params["data_augmentation"]["affine"], "degrees", 15 - # ) - # params["data_augmentation"]["affine"] = initialize_key( - # params["data_augmentation"]["affine"], "translation", 2 - # ) - # - # if "motion" in params["data_augmentation"]: - # params["data_augmentation"]["motion"] = initialize_key( - # params["data_augmentation"]["motion"], "num_transforms", 2 - # ) - # params["data_augmentation"]["motion"] = initialize_key( - # params["data_augmentation"]["motion"], "degrees", 15 - # ) - # params["data_augmentation"]["motion"] = initialize_key( - # params["data_augmentation"]["motion"], "translation", 2 - # ) - # params["data_augmentation"]["motion"] = initialize_key( - # params["data_augmentation"]["motion"], "interpolation", "linear" - # ) - # - # # special case for random blur/noise - which takes a std-dev range - # for std_aug in ["blur", "noise_var"]: - # if std_aug in params["data_augmentation"]: - # params["data_augmentation"][std_aug] = initialize_key( - # params["data_augmentation"][std_aug], "std", None - # ) - # for std_aug in ["noise"]: - # if std_aug in params["data_augmentation"]: - # params["data_augmentation"][std_aug] = initialize_key( - # params["data_augmentation"][std_aug], "std", [0, 1] - # ) - # - # # special case for random noise - which takes a mean range - # for mean_aug in ["noise", "noise_var"]: - # if mean_aug in params["data_augmentation"]: - # params["data_augmentation"][mean_aug] = initialize_key( - # params["data_augmentation"][mean_aug], "mean", 0 - # ) - # - # # special case for augmentations that need axis defined - # for axis_aug in ["flip", "anisotropic", "rotate_90", "rotate_180"]: - # if axis_aug in params["data_augmentation"]: - # params["data_augmentation"][axis_aug] = initialize_key( - # params["data_augmentation"][axis_aug], "axis", [0, 1, 2] - # ) - # - # # special case for colorjitter - # if "colorjitter" in params["data_augmentation"]: - # params["data_augmentation"] = initialize_key( - # params["data_augmentation"], "colorjitter", {} - # ) - # for key in ["brightness", "contrast", "saturation"]: - # params["data_augmentation"]["colorjitter"] = initialize_key( - # params["data_augmentation"]["colorjitter"], key, [0, 1] - # ) - # params["data_augmentation"]["colorjitter"] = initialize_key( - # params["data_augmentation"]["colorjitter"], "hue", [-0.5, 0.5] - # ) - # - # # Added HED augmentation in gandlf - # hed_augmentation_types = [ - # "hed_transform", - # # "hed_transform_light", - # # "hed_transform_heavy", - # ] - # for augmentation_type in hed_augmentation_types: - # if augmentation_type in params["data_augmentation"]: - # params["data_augmentation"] = initialize_key( - # params["data_augmentation"], "hed_transform", {} - # ) - # ranges = [ - # "haematoxylin_bias_range", - # "eosin_bias_range", - # "dab_bias_range", - # "haematoxylin_sigma_range", - # "eosin_sigma_range", - # "dab_sigma_range", - # ] - # - # default_range = ( - # [-0.1, 0.1] - # if augmentation_type == "hed_transform" - # else ( - # [-0.03, 0.03] - # if augmentation_type == "hed_transform_light" - # else [-0.95, 0.95] - # ) - # ) - # - # for key in ranges: - # params["data_augmentation"]["hed_transform"] = initialize_key( - # params["data_augmentation"]["hed_transform"], - # key, - # default_range, - # ) - # - # params["data_augmentation"]["hed_transform"] = initialize_key( - # params["data_augmentation"]["hed_transform"], - # "cutoff_range", - # [0, 1], - # ) - # - # # special case for anisotropic - # if "anisotropic" in params["data_augmentation"]: - # if not ("downsampling" in params["data_augmentation"]["anisotropic"]): - # default_downsampling = 1.5 - # else: - # default_downsampling = params["data_augmentation"]["anisotropic"][ - # "downsampling" - # ] - # - # initialize_downsampling = False - # if isinstance(default_downsampling, list): - # if len(default_downsampling) != 2: - # initialize_downsampling = True - # print( - # "WARNING: 'anisotropic' augmentation needs to be either a single number of a list of 2 numbers: https://torchio.readthedocs.io/transforms/augmentation.html?highlight=randomswap#torchio.transforms.RandomAnisotropy.", - # file=sys.stderr, - # ) - # default_downsampling = default_downsampling[0] # only - # else: - # initialize_downsampling = True - # - # if initialize_downsampling: - # if default_downsampling < 1: - # print( - # "WARNING: 'anisotropic' augmentation needs the 'downsampling' parameter to be greater than 1, defaulting to 1.5.", - # file=sys.stderr, - # ) - # # default - # params["data_augmentation"]["anisotropic"]["downsampling"] = 1.5 - # - # for key in params["data_augmentation"]: - # if key != "default_probability": - # params["data_augmentation"][key] = initialize_key( - # params["data_augmentation"][key], - # "probability", - # params["data_augmentation"]["default_probability"], - # ) - # + + # this is NOT a required parameter - a user should be able to train with NO augmentations + params = initialize_key(params, "data_augmentation", {}) + # for all others, ensure probability is present + params["data_augmentation"]["default_probability"] = params[ + "data_augmentation" + ].get("default_probability", 0.5) + + if not (params["data_augmentation"] is None): + if len(params["data_augmentation"]) > 0: # only when augmentations are defined + # special case for random swapping and elastic transformations - which takes a patch size for computation + for key in ["swap", "elastic"]: + if key in params["data_augmentation"]: + params["data_augmentation"][key] = initialize_key( + params["data_augmentation"][key], + "patch_size", + np.round(np.array(params["patch_size"]) / 10) + .astype("int") + .tolist(), + ) + + # special case for swap default initialization + if "swap" in params["data_augmentation"]: + params["data_augmentation"]["swap"] = initialize_key( + params["data_augmentation"]["swap"], "num_iterations", 100 + ) + + # special case for affine default initialization + if "affine" in params["data_augmentation"]: + params["data_augmentation"]["affine"] = initialize_key( + params["data_augmentation"]["affine"], "scales", 0.1 + ) + params["data_augmentation"]["affine"] = initialize_key( + params["data_augmentation"]["affine"], "degrees", 15 + ) + params["data_augmentation"]["affine"] = initialize_key( + params["data_augmentation"]["affine"], "translation", 2 + ) + + if "motion" in params["data_augmentation"]: + params["data_augmentation"]["motion"] = initialize_key( + params["data_augmentation"]["motion"], "num_transforms", 2 + ) + params["data_augmentation"]["motion"] = initialize_key( + params["data_augmentation"]["motion"], "degrees", 15 + ) + params["data_augmentation"]["motion"] = initialize_key( + params["data_augmentation"]["motion"], "translation", 2 + ) + params["data_augmentation"]["motion"] = initialize_key( + params["data_augmentation"]["motion"], "interpolation", "linear" + ) + + # special case for random blur/noise - which takes a std-dev range + for std_aug in ["blur", "noise_var"]: + if std_aug in params["data_augmentation"]: + params["data_augmentation"][std_aug] = initialize_key( + params["data_augmentation"][std_aug], "std", None + ) + for std_aug in ["noise"]: + if std_aug in params["data_augmentation"]: + params["data_augmentation"][std_aug] = initialize_key( + params["data_augmentation"][std_aug], "std", [0, 1] + ) + + # special case for random noise - which takes a mean range + for mean_aug in ["noise", "noise_var"]: + if mean_aug in params["data_augmentation"]: + params["data_augmentation"][mean_aug] = initialize_key( + params["data_augmentation"][mean_aug], "mean", 0 + ) + + # special case for augmentations that need axis defined + for axis_aug in ["flip", "anisotropic", "rotate_90", "rotate_180"]: + if axis_aug in params["data_augmentation"]: + params["data_augmentation"][axis_aug] = initialize_key( + params["data_augmentation"][axis_aug], "axis", [0, 1, 2] + ) + + # special case for colorjitter + if "colorjitter" in params["data_augmentation"]: + params["data_augmentation"] = initialize_key( + params["data_augmentation"], "colorjitter", {} + ) + for key in ["brightness", "contrast", "saturation"]: + params["data_augmentation"]["colorjitter"] = initialize_key( + params["data_augmentation"]["colorjitter"], key, [0, 1] + ) + params["data_augmentation"]["colorjitter"] = initialize_key( + params["data_augmentation"]["colorjitter"], "hue", [-0.5, 0.5] + ) + + # Added HED augmentation in gandlf + hed_augmentation_types = [ + "hed_transform", + # "hed_transform_light", + # "hed_transform_heavy", + ] + for augmentation_type in hed_augmentation_types: + if augmentation_type in params["data_augmentation"]: + params["data_augmentation"] = initialize_key( + params["data_augmentation"], "hed_transform", {} + ) + ranges = [ + "haematoxylin_bias_range", + "eosin_bias_range", + "dab_bias_range", + "haematoxylin_sigma_range", + "eosin_sigma_range", + "dab_sigma_range", + ] + + default_range = ( + [-0.1, 0.1] + if augmentation_type == "hed_transform" + else ( + [-0.03, 0.03] + if augmentation_type == "hed_transform_light" + else [-0.95, 0.95] + ) + ) + + for key in ranges: + params["data_augmentation"]["hed_transform"] = initialize_key( + params["data_augmentation"]["hed_transform"], + key, + default_range, + ) + + params["data_augmentation"]["hed_transform"] = initialize_key( + params["data_augmentation"]["hed_transform"], + "cutoff_range", + [0, 1], + ) + + # special case for anisotropic + if "anisotropic" in params["data_augmentation"]: + if not ("downsampling" in params["data_augmentation"]["anisotropic"]): + default_downsampling = 1.5 + else: + default_downsampling = params["data_augmentation"]["anisotropic"][ + "downsampling" + ] + + initialize_downsampling = False + if isinstance(default_downsampling, list): + if len(default_downsampling) != 2: + initialize_downsampling = True + print( + "WARNING: 'anisotropic' augmentation needs to be either a single number of a list of 2 numbers: https://torchio.readthedocs.io/transforms/augmentation.html?highlight=randomswap#torchio.transforms.RandomAnisotropy.", + file=sys.stderr, + ) + default_downsampling = default_downsampling[0] # only + else: + initialize_downsampling = True + + if initialize_downsampling: + if default_downsampling < 1: + print( + "WARNING: 'anisotropic' augmentation needs the 'downsampling' parameter to be greater than 1, defaulting to 1.5.", + file=sys.stderr, + ) + # default + params["data_augmentation"]["anisotropic"]["downsampling"] = 1.5 + + for key in params["data_augmentation"]: + if key != "default_probability": + params["data_augmentation"][key] = initialize_key( + params["data_augmentation"][key], + "probability", + params["data_augmentation"]["default_probability"], + ) + # # this is NOT a required parameter - a user should be able to train with NO built-in pre-processing # params = initialize_key(params, "data_preprocessing", {}) # if not (params["data_preprocessing"] is None): @@ -532,81 +400,81 @@ def _parseConfig( # "data_postprocessing" # ][key] # params["data_postprocessing"].pop(key) - # - # if "model" in params: - # assert isinstance( - # params["model"], dict - # ), "The 'model' parameter needs to be populated as a dictionary" - # assert ( - # len(params["model"]) > 0 - # ), "The 'model' parameter needs to be populated as a dictionary and should have all properties present" - # assert ( - # "architecture" in params["model"] - # ), "The 'model' parameter needs 'architecture' to be defined" - # assert ( - # "final_layer" in params["model"] - # ), "The 'model' parameter needs 'final_layer' to be defined" - # assert ( - # "dimension" in params["model"] - # ), "The 'model' parameter needs 'dimension' to be defined" - # - # if "amp" in params["model"]: - # pass - # else: - # print("NOT using Mixed Precision Training") - # params["model"]["amp"] = False - # - # if "norm_type" in params["model"]: - # if ( - # params["model"]["norm_type"] == None - # or params["model"]["norm_type"].lower() == "none" - # ): - # if not ("vgg" in params["model"]["architecture"]): - # raise ValueError( - # "Normalization type cannot be 'None' for non-VGG architectures" - # ) - # else: - # print("WARNING: Initializing 'norm_type' as 'batch'", flush=True) - # params["model"]["norm_type"] = "batch" - # - # if not ("base_filters" in params["model"]): - # base_filters = 32 - # params["model"]["base_filters"] = base_filters - # print("Using default 'base_filters' in 'model': ", base_filters) - # if not ("class_list" in params["model"]): - # params["model"]["class_list"] = [] # ensure that this is initialized - # if not ("ignore_label_validation" in params["model"]): - # params["model"]["ignore_label_validation"] = None - # if "batch_norm" in params["model"]: - # print( - # "WARNING: 'batch_norm' is no longer supported, please use 'norm_type' in 'model' instead", - # flush=True, - # ) - # params["model"]["print_summary"] = params["model"].get("print_summary", True) - # - # channel_keys_to_check = ["n_channels", "channels", "model_channels"] - # for key in channel_keys_to_check: - # if key in params["model"]: - # params["model"]["num_channels"] = params["model"][key] - # break - # - # # initialize model type for processing: if not defined, default to torch - # if not ("type" in params["model"]): - # params["model"]["type"] = "torch" - # - # # initialize openvino model data type for processing: if not defined, default to FP32 - # if not ("data_type" in params["model"]): - # params["model"]["data_type"] = "FP32" - # - # # set default save strategy for model - # if not ("save_at_every_epoch" in params["model"]): - # params["model"]["save_at_every_epoch"] = False - # - # if params["model"]["save_at_every_epoch"]: - # print( - # "WARNING: 'save_at_every_epoch' will result in TREMENDOUS storage usage; use at your own risk." - # ) - # + # if "model" in params: + assert isinstance( + params["model"], dict + ), "The 'model' parameter needs to be populated as a dictionary" + assert ( + len(params["model"]) > 0 + ), "The 'model' parameter needs to be populated as a dictionary and should have all properties present" + assert ( + "architecture" in params["model"] + ), "The 'model' parameter needs 'architecture' to be defined" + assert ( + "final_layer" in params["model"] + ), "The 'model' parameter needs 'final_layer' to be defined" + assert ( + "dimension" in params["model"] + ), "The 'model' parameter needs 'dimension' to be defined" + + if "amp" in params["model"]: + pass + else: + print("NOT using Mixed Precision Training") + params["model"]["amp"] = False + + if "norm_type" in params["model"]: + if ( + params["model"]["norm_type"] == None + or params["model"]["norm_type"].lower() == "none" + ): + if not ("vgg" in params["model"]["architecture"]): + raise ValueError( + "Normalization type cannot be 'None' for non-VGG architectures" + ) + else: + print("WARNING: Initializing 'norm_type' as 'batch'", flush=True) + params["model"]["norm_type"] = "batch" + + if not ("base_filters" in params["model"]): + base_filters = 32 + params["model"]["base_filters"] = base_filters + print("Using default 'base_filters' in 'model': ", base_filters) + if not ("class_list" in params["model"]): + params["model"]["class_list"] = [] # ensure that this is initialized + if not ("ignore_label_validation" in params["model"]): + params["model"]["ignore_label_validation"] = None + if "batch_norm" in params["model"]: + print( + "WARNING: 'batch_norm' is no longer supported, please use 'norm_type' in 'model' instead", + flush=True, + ) + params["model"]["print_summary"] = params["model"].get("print_summary", True) + + channel_keys_to_check = ["n_channels", "channels", "model_channels"] + for key in channel_keys_to_check: + if key in params["model"]: + params["model"]["num_channels"] = params["model"][key] + break + + # initialize model type for processing: if not defined, default to torch + if not ("type" in params["model"]): + params["model"]["type"] = "torch" + + # initialize openvino model data type for processing: if not defined, default to FP32 + if not ("data_type" in params["model"]): + params["model"]["data_type"] = "FP32" + + # set default save strategy for model + if not ("save_at_every_epoch" in params["model"]): + params["model"]["save_at_every_epoch"] = False + + if params["model"]["save_at_every_epoch"]: + print( + "WARNING: 'save_at_every_epoch' will result in TREMENDOUS storage usage; use at your own risk." + ) + + # if isinstance(params["model"]["class_list"], str): # if ("||" in params["model"]["class_list"]) or ( # "&&" in params["model"]["class_list"] diff --git a/test_configuration/config_all_options.yaml b/test_configuration/config_all_options.yaml index 3b596eb18..d669438a8 100644 --- a/test_configuration/config_all_options.yaml +++ b/test_configuration/config_all_options.yaml @@ -5,12 +5,18 @@ version: maximum: 0.1.3-dev # this should NOT be made a variable, but should be tested after every tag is created } weighted_loss: True -patch_size: [1,2,3] -modality: "" +patch_size: [1] +modality: "histo" +loss_function: "mse" model: { - + dimension: 3 } +metrics: + - "dice" + - "classification" + - "accuracy" + - "iou" ## Choose the model parameters here #model: # { From f208db3760f360ad0f400fb76749d1b99683446c Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 4 Feb 2025 23:18:43 +0200 Subject: [PATCH 20/88] add model parameters and refactor the code --- GANDLF/Configuration/model_parameters.py | 33 +++++++++++++++++++ .../Configuration/user_defined_parameters.py | 8 ++--- GANDLF/Configuration/validators.py | 2 +- test_configuration/config_all_options.yaml | 13 ++++++-- 4 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 GANDLF/Configuration/model_parameters.py diff --git a/GANDLF/Configuration/model_parameters.py b/GANDLF/Configuration/model_parameters.py new file mode 100644 index 000000000..cddf42b9e --- /dev/null +++ b/GANDLF/Configuration/model_parameters.py @@ -0,0 +1,33 @@ +from pydantic import ( + BaseModel, + model_validator, + Field, + field_validator, + AfterValidator, + BeforeValidator, +) + +from typing_extensions import Self, Literal, Annotated, Optional +from typing import Union + +class Model(BaseModel): + dimension: Union[int, None] = Field(description="Dimension.", default=None) + architecture: Union[str,dict] = Field(description="Architecture.") + final_layer: str = Field(description="Final layer.") + norm_type: Optional[str] = Field(description="Normalization type.") + base_filters: Optional[int] = Field(description="Base filters.") + class_list: Optional[list] = Field(default=[],description="Class list.") + num_channels: Optional[int] = Field(description="Number of channels.") + type: Optional[str] = Field(description="Type of model.") + data_type: Optional[str] = Field(description="Data type.") + save_at_every_epoch: bool = Field(default=False,description="Save at every epoch.") + amp: bool = Field(default = False,description="Amplifier.") + ignore_label_validation: None = Field(default=None ,description="Ignore label validation.") # To check it + print_summary:bool = Field(default=True ,description="Print summary.") + + @model_validator(mode="after") + def validate_model(self): + if self.amp is False: + print("NOT using Mixed Precision Training") + + diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index 5fcc739d4..22af8ae30 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -9,8 +9,9 @@ ) from GANDLF.config_manager import version_check from importlib.metadata import version -from typing_extensions import Self, Literal, Annotated +from typing_extensions import Self, Literal, Annotated, Optional from GANDLF.Configuration.validators import * +from GANDLF.Configuration.model_parameters import Model class Version(BaseModel): @@ -23,9 +24,6 @@ def validate_version(self) -> Self: return self -class Model(BaseModel): - dimension: Union[int, None] = Field(description="Dimension.", default=None) - class UserDefinedParameters(BaseModel): version: Version = Field( @@ -35,7 +33,7 @@ class UserDefinedParameters(BaseModel): patch_size: Union[list[Union[int, float]], int, float] = Field( description="Patch size." ) - model: Model = Field(description="Model.") + model: Annotated[Model,Field(description="Model.")] modality: Literal["rad", "histo", "path"] = Field(description="Modality.") loss_function: Annotated[ Union[dict, str], diff --git a/GANDLF/Configuration/validators.py b/GANDLF/Configuration/validators.py index 666fa62f0..b4fe2cea4 100644 --- a/GANDLF/Configuration/validators.py +++ b/GANDLF/Configuration/validators.py @@ -1,4 +1,3 @@ - from GANDLF.Configuration.utils import initialize_key from GANDLF.metrics import surface_distance_ids @@ -116,3 +115,4 @@ def validate_patch(self): self.model.dimension = 3 return self + diff --git a/test_configuration/config_all_options.yaml b/test_configuration/config_all_options.yaml index d669438a8..d1841267a 100644 --- a/test_configuration/config_all_options.yaml +++ b/test_configuration/config_all_options.yaml @@ -10,8 +10,17 @@ modality: "histo" loss_function: "mse" model: { - dimension: 3 - } + dimension: 3, # the dimension of the model and dataset: defines dimensionality of computations + base_filters: 30, # Set base filters: number of filters present in the initial module of the U-Net convolution; for IncU-Net, keep this divisible by 4 + architecture: resunet, # options: unet, resunet, deep_resunet, deep_unet, light_resunet, light_unet, fcn, uinc, vgg, densenet + norm_type: batch, # options: batch, instance, or none (only for VGG); used for all networks + final_layer: softmax, # can be either sigmoid, softmax or none (none == regression/logits) + # sigmoid_input_multiplier: 1.0, # this is used during sigmoid, and defaults to 1.0 + class_list: [0,1,2,4], # Set the list of labels the model should train on and predict + # class_list: '[*range(0,100,1)]' # a range of values from 0 to 99 with a step of 1 will be created; customize as needed, but ensure this is defined as a string as it will be passed through 'eval' function + # class_list: '[0,1||2||3,1||4,4]', # combinatorial training - this will construct one-hot encoded mask using logical operands between specified annotations. Note that double '|' or '&' should be passed and not single to avoid python parsing + ignore_label_validation: 0, # this is the location of the class_list whose performance is ignored during validation metric calculation +} metrics: - "dice" - "classification" From 7dc0f3f846f5010f2510208d20c903ff942e2edc Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Thu, 6 Feb 2025 23:57:50 +0200 Subject: [PATCH 21/88] update model_parameters.py --- GANDLF/Configuration/model_parameters.py | 34 ++++++++++------- GANDLF/Configuration/parameters.py | 2 +- .../Configuration/user_defined_parameters.py | 5 ++- GANDLF/Configuration/validators.py | 37 ++++++++++++++++--- 4 files changed, 56 insertions(+), 22 deletions(-) diff --git a/GANDLF/Configuration/model_parameters.py b/GANDLF/Configuration/model_parameters.py index cddf42b9e..900b6ac33 100644 --- a/GANDLF/Configuration/model_parameters.py +++ b/GANDLF/Configuration/model_parameters.py @@ -1,33 +1,41 @@ from pydantic import ( BaseModel, model_validator, - Field, - field_validator, - AfterValidator, - BeforeValidator, + Field, AliasChoices, field_validator, AfterValidator, ConfigDict ) from typing_extensions import Self, Literal, Annotated, Optional from typing import Union +from GANDLF.Configuration.validators import validate_class_list class Model(BaseModel): - dimension: Union[int, None] = Field(description="Dimension.", default=None) + model_config = ConfigDict(extra='allow') + dimension: Optional[int] = Field(description="Dimension.") architecture: Union[str,dict] = Field(description="Architecture.") final_layer: str = Field(description="Final layer.") - norm_type: Optional[str] = Field(description="Normalization type.") - base_filters: Optional[int] = Field(description="Base filters.") - class_list: Optional[list] = Field(default=[],description="Class list.") - num_channels: Optional[int] = Field(description="Number of channels.") - type: Optional[str] = Field(description="Type of model.") - data_type: Optional[str] = Field(description="Data type.") + norm_type: str = Field(description="Normalization type.",default= None) # TODO: check it + base_filters: Optional[int] = Field(description="Base filters.", default= None, validate_default= True) # default is 32 + class_list: Union[list, str] = Field(default=[],description="Class list." ) # TODO: check it for class_list: '[0,1||2||3,1||4,4]' + num_channels: Optional[int] = Field(description="Number of channels.", validation_alias=AliasChoices('num_channels', "n_channels","channels","model_channels" )) # TODO: check it + type: Optional[str] = Field(description="Type of model.",default= "torch") + data_type: str = Field(description="Data type.",default= "FP32") save_at_every_epoch: bool = Field(default=False,description="Save at every epoch.") amp: bool = Field(default = False,description="Amplifier.") - ignore_label_validation: None = Field(default=None ,description="Ignore label validation.") # To check it + ignore_label_validation: int = Field(default=None ,description="Ignore label validation.") #TODO: To check it print_summary:bool = Field(default=True ,description="Print summary.") + batch_norm: int = Field(default=None,deprecated="batch_norm is no longer supported, please use 'norm_type' in 'model' instead.") + @model_validator(mode="after") - def validate_model(self): + def model_validate(self) -> Self: + self.class_list = validate_class_list(self.class_list) if self.amp is False: print("NOT using Mixed Precision Training") + if self.save_at_every_epoch: + print("WARNING: 'save_at_every_epoch' will result in TREMENDOUS storage usage; use at your own risk.") # TODO: It is better to use logging.warning + if self.base_filters is None: + self.base_filters = 32 + print("Using default 'base_filters' in 'model': ", self.base_filters) + return self \ No newline at end of file diff --git a/GANDLF/Configuration/parameters.py b/GANDLF/Configuration/parameters.py index 8dbd73b91..36dbc7014 100644 --- a/GANDLF/Configuration/parameters.py +++ b/GANDLF/Configuration/parameters.py @@ -4,7 +4,7 @@ class ParametersConfiguration(BaseModel): - model_config = ConfigDict() + model_config = ConfigDict(extra='allow') class Parameters(ParametersConfiguration, DefaultParameters, UserDefinedParameters): diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index 22af8ae30..e58dd2dde 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -33,7 +33,7 @@ class UserDefinedParameters(BaseModel): patch_size: Union[list[Union[int, float]], int, float] = Field( description="Patch size." ) - model: Annotated[Model,Field(description="Model.")] + model: Model = Field(...,description="The model to use. ") modality: Literal["rad", "histo", "path"] = Field(description="Modality.") loss_function: Annotated[ Union[dict, str], @@ -49,4 +49,5 @@ class UserDefinedParameters(BaseModel): # Validators @model_validator(mode="after") def validate(self) -> Self: - return validate_patch(self) # check if it is the right approach + validate_patch_size(self) + return self diff --git a/GANDLF/Configuration/validators.py b/GANDLF/Configuration/validators.py index b4fe2cea4..02ef68424 100644 --- a/GANDLF/Configuration/validators.py +++ b/GANDLF/Configuration/validators.py @@ -1,3 +1,7 @@ +import traceback + +from typing_extensions import Self + from GANDLF.Configuration.utils import initialize_key from GANDLF.metrics import surface_distance_ids @@ -93,11 +97,35 @@ def validate_metrics(value) -> dict: return value -def validate_an_example(value, patch) -> dict: +def validate_class_list(value): + if isinstance(value, str): + if ("||" in value) or ( + "&&" in value + ): + # special case for multi-class computation - this needs to be handled during one-hot encoding mask construction + print( + "WARNING: This is a special case for multi-class computation, where different labels are processed together, `reverse_one_hot` will need mapping information to work correctly" + ) + temp_class_list =value + # we don't need the brackets + temp_class_list = temp_class_list.replace("[", "") + temp_class_list = temp_class_list.replace("]", "") + value = temp_class_list.split(",") + else: + try: + value = eval(value) + return value + except Exception as e: + ## todo: ensure logging captures assertion errors + assert ( + False + ), f"Could not evaluate the `class_list` in `model`, Exception: {str(e)}, {traceback.format_exc()}" + # logging.error( + # f"Could not evaluate the `class_list` in `model`, Exception: {str(e)}, {traceback.format_exc()}" + # ) return value - -def validate_patch(self): +def validate_patch_size(self): if isinstance(self.patch_size, int) or isinstance(self.patch_size, float): self.patch_size = [self.patch_size] if len(self.patch_size) == 1 and self.model.dimension is not None: @@ -113,6 +141,3 @@ def validate_patch(self): elif len(self.patch_size) == 3: # 2d check if self.model.dimension is None: self.model.dimension = 3 - - return self - From d3b33f81b6845d001e132b742c9c914c53a90655 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 12:11:26 +0200 Subject: [PATCH 22/88] update model_parameters.py --- GANDLF/Configuration/model_parameters.py | 88 +++++++++++++++++------- GANDLF/Configuration/parameters.py | 2 +- 2 files changed, 65 insertions(+), 25 deletions(-) diff --git a/GANDLF/Configuration/model_parameters.py b/GANDLF/Configuration/model_parameters.py index 900b6ac33..cb685538e 100644 --- a/GANDLF/Configuration/model_parameters.py +++ b/GANDLF/Configuration/model_parameters.py @@ -1,41 +1,81 @@ -from pydantic import ( - BaseModel, - model_validator, - Field, AliasChoices, field_validator, AfterValidator, ConfigDict -) +from pydantic import BaseModel, model_validator, Field, AliasChoices, ConfigDict -from typing_extensions import Self, Literal, Annotated, Optional +from typing_extensions import Self, Literal, Optional from typing import Union -from GANDLF.Configuration.validators import validate_class_list +from GANDLF.Configuration.validators import validate_class_list, validate_norm_type +# Define model architecture options +ARCHITECTURE_OPTIONS = Literal[ + "unet", + "resunet", + "deep_resunet", + "deep_unet", + "light_resunet", + "light_unet", + "fcn", + "uinc", + "vgg", + "densenet", +] +NORM_TYPE_OPTIONS = Literal["batch", "instance", "None"] + + +# You can define new parameters for model here. Please read the pydantic documentation. +# It allows extra fields in model dict. class Model(BaseModel): - model_config = ConfigDict(extra='allow') + model_config = ConfigDict( + extra="allow" + ) # it allows extra fields in the model dict dimension: Optional[int] = Field(description="Dimension.") - architecture: Union[str,dict] = Field(description="Architecture.") + architecture: Union[ARCHITECTURE_OPTIONS, dict] = Field(description="Architecture.") final_layer: str = Field(description="Final layer.") - norm_type: str = Field(description="Normalization type.",default= None) # TODO: check it - base_filters: Optional[int] = Field(description="Base filters.", default= None, validate_default= True) # default is 32 - class_list: Union[list, str] = Field(default=[],description="Class list." ) # TODO: check it for class_list: '[0,1||2||3,1||4,4]' - num_channels: Optional[int] = Field(description="Number of channels.", validation_alias=AliasChoices('num_channels', "n_channels","channels","model_channels" )) # TODO: check it - type: Optional[str] = Field(description="Type of model.",default= "torch") - data_type: str = Field(description="Data type.",default= "FP32") - save_at_every_epoch: bool = Field(default=False,description="Save at every epoch.") - amp: bool = Field(default = False,description="Amplifier.") - ignore_label_validation: int = Field(default=None ,description="Ignore label validation.") #TODO: To check it - print_summary:bool = Field(default=True ,description="Print summary.") - batch_norm: int = Field(default=None,deprecated="batch_norm is no longer supported, please use 'norm_type' in 'model' instead.") - + norm_type: Optional[NORM_TYPE_OPTIONS] = Field( + description="Normalization type.", default="batch" + ) # TODO: check it again + base_filters: Optional[int] = Field( + description="Base filters.", default=None, validate_default=True + ) # default is 32 + class_list: Union[list, str] = Field(default=[], description="Class list.") + num_channels: Optional[int] = Field( + description="Number of channels.", + validation_alias=AliasChoices( + "num_channels", "n_channels", "channels", "model_channels" + ), + ) # TODO: check it + type: Optional[str] = Field(description="Type of model.", default="torch") + data_type: str = Field(description="Data type.", default="FP32") + save_at_every_epoch: bool = Field(default=False, description="Save at every epoch.") + amp: bool = Field(default=False, description="Amplifier.") + ignore_label_validation: int = Field( + default=None, description="Ignore label validation." + ) # TODO: To check it + print_summary: bool = Field(default=True, description="Print summary.") + batch_norm: str = Field(default=None) # TODO: Check it for deprecated option @model_validator(mode="after") def model_validate(self) -> Self: - self.class_list = validate_class_list(self.class_list) + # TODO: Change the print to logging.warnings + self.class_list = validate_class_list( + self.class_list + ) # init and validate the class_list parameter + self.norm_type = validate_norm_type( + self.norm_type, self.architecture + ) # init and validate the norm type if self.amp is False: print("NOT using Mixed Precision Training") if self.save_at_every_epoch: - print("WARNING: 'save_at_every_epoch' will result in TREMENDOUS storage usage; use at your own risk.") # TODO: It is better to use logging.warning + print( + "WARNING: 'save_at_every_epoch' will result in TREMENDOUS storage usage; use at your own risk." + ) # TODO: It is better to use logging.warning if self.base_filters is None: self.base_filters = 32 print("Using default 'base_filters' in 'model': ", self.base_filters) - return self \ No newline at end of file + + if self.batch_norm is not None: + print( + "WARNING: 'batch_norm' is no longer supported, please use 'norm_type' in 'model' instead" + ) + + return self diff --git a/GANDLF/Configuration/parameters.py b/GANDLF/Configuration/parameters.py index 36dbc7014..26e4e25cd 100644 --- a/GANDLF/Configuration/parameters.py +++ b/GANDLF/Configuration/parameters.py @@ -4,7 +4,7 @@ class ParametersConfiguration(BaseModel): - model_config = ConfigDict(extra='allow') + model_config = ConfigDict(extra="allow") class Parameters(ParametersConfiguration, DefaultParameters, UserDefinedParameters): From 61803aa46f48ef13933400eb9acc73e793b0a0eb Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 12:12:16 +0200 Subject: [PATCH 23/88] add nestedTraining --- .../Configuration/user_defined_parameters.py | 37 ++++++++++++++----- GANDLF/Configuration/utils.py | 4 +- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index e58dd2dde..debe2db72 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -1,12 +1,5 @@ from typing import Union -from pydantic import ( - BaseModel, - model_validator, - Field, - field_validator, - AfterValidator, - BeforeValidator, -) +from pydantic import BaseModel, model_validator, Field, AfterValidator from GANDLF.config_manager import version_check from importlib.metadata import version from typing_extensions import Self, Literal, Annotated, Optional @@ -24,6 +17,27 @@ def validate_version(self) -> Self: return self +class NestedTraining(BaseModel): + stratified: bool = Field( + default=False, + description="this will perform stratified k-fold cross-validation but only with offline data splitting", + ) + testing: int = Field( + default=-5, + description="this controls the number of testing data folds for final model evaluation; [NOT recommended] to disable this, use '1'", + ) + validation: int = Field( + default=-5, + description="this controls the number of validation data folds to be used for model *selection* during training (not used for back-propagation)", + ) + proportional: bool = Field(default=None) + + @model_validator(mode="after") + def validate_nested_training(self) -> Self: + if self.proportional is not None: + self.stratified = self.proportional + return self + class UserDefinedParameters(BaseModel): version: Version = Field( @@ -33,7 +47,7 @@ class UserDefinedParameters(BaseModel): patch_size: Union[list[Union[int, float]], int, float] = Field( description="Patch size." ) - model: Model = Field(...,description="The model to use. ") + model: Model = Field(..., description="The model to use. ") modality: Literal["rad", "histo", "path"] = Field(description="Modality.") loss_function: Annotated[ Union[dict, str], @@ -45,9 +59,12 @@ class UserDefinedParameters(BaseModel): Field(description="Metrics."), AfterValidator(validate_metrics), ] + nested_training: NestedTraining = Field(description="Nested training.") # Validators @model_validator(mode="after") def validate(self) -> Self: - validate_patch_size(self) + self.patch_size, self.model.dimension = validate_patch_size( + self.patch_size, self.model.dimension + ) return self diff --git a/GANDLF/Configuration/utils.py b/GANDLF/Configuration/utils.py index 784516c86..729e16ab7 100644 --- a/GANDLF/Configuration/utils.py +++ b/GANDLF/Configuration/utils.py @@ -1,5 +1,5 @@ -from typing import Type, Optional, Union -from pydantic import BaseModel +from typing import Optional, Union + from typing import Type from pydantic import BaseModel From 5ce1f28ccb4e3eca23e8acb8842903917a1f8576 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 12:12:45 +0200 Subject: [PATCH 24/88] update the validators.py --- GANDLF/Configuration/validators.py | 50 ++++++++++++++++++------------ 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/GANDLF/Configuration/validators.py b/GANDLF/Configuration/validators.py index 02ef68424..e78fe9317 100644 --- a/GANDLF/Configuration/validators.py +++ b/GANDLF/Configuration/validators.py @@ -1,7 +1,5 @@ import traceback -from typing_extensions import Self - from GANDLF.Configuration.utils import initialize_key from GANDLF.metrics import surface_distance_ids @@ -99,14 +97,12 @@ def validate_metrics(value) -> dict: def validate_class_list(value): if isinstance(value, str): - if ("||" in value) or ( - "&&" in value - ): + if ("||" in value) or ("&&" in value): # special case for multi-class computation - this needs to be handled during one-hot encoding mask construction print( "WARNING: This is a special case for multi-class computation, where different labels are processed together, `reverse_one_hot` will need mapping information to work correctly" ) - temp_class_list =value + temp_class_list = value # we don't need the brackets temp_class_list = temp_class_list.replace("[", "") temp_class_list = temp_class_list.replace("]", "") @@ -125,19 +121,33 @@ def validate_class_list(value): # ) return value -def validate_patch_size(self): - if isinstance(self.patch_size, int) or isinstance(self.patch_size, float): - self.patch_size = [self.patch_size] - if len(self.patch_size) == 1 and self.model.dimension is not None: + +def validate_patch_size(patch_size, dimension) -> list: + if isinstance(patch_size, int) or isinstance(patch_size, float): + patch_size = [patch_size] + if len(patch_size) == 1 and dimension is not None: actual_patch_size = [] - for _ in range(self.model.dimension): - actual_patch_size.append(self.patch_size[0]) - self.patch_size = actual_patch_size - if len(self.patch_size) == 2: # 2d check + for _ in range(dimension): + actual_patch_size.append(patch_size[0]) + patch_size = actual_patch_size + if len(patch_size) == 2: # 2d check # ensuring same size during torchio processing - self.patch_size.append(1) - if self.model.dimension is None: - self.model.dimension = 2 - elif len(self.patch_size) == 3: # 2d check - if self.model.dimension is None: - self.model.dimension = 3 + patch_size.append(1) + if dimension is None: + dimension = 2 + elif len(patch_size) == 3: # 2d check + if dimension is None: + dimension = 3 + return [patch_size, dimension] + + +def validate_norm_type(norm_type, architecture): + if norm_type is None or norm_type.lower() == "none": + if "vgg" in architecture: + raise ValueError( + "Normalization type cannot be 'None' for non-VGG architectures" + ) + else: + print("WARNING: Initializing 'norm_type' as 'batch'", flush=True) + norm_type = "batch" + return norm_type From 6df3cf16bd6c5a38971c32ac7809ab8180998152 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 13:38:36 +0200 Subject: [PATCH 25/88] create scheduler_parameters.py --- GANDLF/Configuration/scheduler_parameters.py | 22 +++++++++++++++ .../Configuration/user_defined_parameters.py | 27 ++++++++++++++++--- GANDLF/Configuration/validators.py | 18 +++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 GANDLF/Configuration/scheduler_parameters.py diff --git a/GANDLF/Configuration/scheduler_parameters.py b/GANDLF/Configuration/scheduler_parameters.py new file mode 100644 index 000000000..abdee1bee --- /dev/null +++ b/GANDLF/Configuration/scheduler_parameters.py @@ -0,0 +1,22 @@ +from pydantic import BaseModel, ConfigDict, Field +from typing_extensions import Literal + + +class Scheduler(BaseModel): + model_config = ConfigDict( + extra="allow" + ) + type: Literal[ + "triangle", + "triangle_modified", + "exp", + "step", + "reduce-on-plateau", + "cosineannealing", + "triangular", + "triangular2", + "exp_range", + ] = Field(description="triangle/triangle_modified use LambdaLR but triangular/triangular2/exp_range uses CyclicLR",) + # min_lr: 0.00001, #TODO: this should be defined ?? + # max_lr: 1, #TODO: this should be defined ?? + step_size: float = Field(description="step_size",default=None) diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index debe2db72..5e34c0d36 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -1,5 +1,8 @@ from typing import Union -from pydantic import BaseModel, model_validator, Field, AfterValidator +from pydantic import BaseModel, model_validator, Field, AfterValidator, ConfigDict + +from GANDLF.Configuration.default_parameters import DefaultParameters +from GANDLF.Configuration.scheduler_parameters import Scheduler from GANDLF.config_manager import version_check from importlib.metadata import version from typing_extensions import Self, Literal, Annotated, Optional @@ -7,7 +10,7 @@ from GANDLF.Configuration.model_parameters import Model -class Version(BaseModel): +class Version(BaseModel): # TODO: Maybe should be to another folder minimum: str maximum: str @@ -17,7 +20,7 @@ def validate_version(self) -> Self: return self -class NestedTraining(BaseModel): +class NestedTraining(BaseModel): # TODO: Maybe should be in another folder stratified: bool = Field( default=False, description="this will perform stratified k-fold cross-validation but only with offline data splitting", @@ -39,7 +42,10 @@ def validate_nested_training(self) -> Self: return self -class UserDefinedParameters(BaseModel): + + + +class UserDefinedParameters(DefaultParameters): version: Version = Field( default=Version(minimum=version("GANDLF"), maximum=version("GANDLF")), description="Whether weighted loss is to be used or not.", @@ -60,11 +66,24 @@ class UserDefinedParameters(BaseModel): AfterValidator(validate_metrics), ] nested_training: NestedTraining = Field(description="Nested training.") + parallel_compute_command: str = Field( + default="", description="Parallel compute command." + ) + scheduler: Union[str, Scheduler] = Field(description="Scheduler.") # Validators @model_validator(mode="after") def validate(self) -> Self: + # valiadate the patch_size self.patch_size, self.model.dimension = validate_patch_size( self.patch_size, self.model.dimension ) + # validate the parallel_compute_command + self.parallel_compute_command = validate_parallel_compute_command( + self.parallel_compute_command + ) + #validate scheduler + self.scheduler = validate_schedular(self.scheduler, self.learning_rate) + + return self diff --git a/GANDLF/Configuration/validators.py b/GANDLF/Configuration/validators.py index e78fe9317..f5e7aacff 100644 --- a/GANDLF/Configuration/validators.py +++ b/GANDLF/Configuration/validators.py @@ -1,5 +1,6 @@ import traceback +from GANDLF.Configuration.scheduler_parameters import Scheduler from GANDLF.Configuration.utils import initialize_key from GANDLF.metrics import surface_distance_ids @@ -151,3 +152,20 @@ def validate_norm_type(norm_type, architecture): print("WARNING: Initializing 'norm_type' as 'batch'", flush=True) norm_type = "batch" return norm_type + + +def validate_parallel_compute_command(value): + parallel_compute_command = value + parallel_compute_command = parallel_compute_command.replace( + "'", "" + ) # TODO: Check it again,should change from ' to ` + parallel_compute_command = parallel_compute_command.replace('"', "") + value = parallel_compute_command + return value + +def validate_schedular(value, learning_rate): + if isinstance(value, str): + value = Scheduler(type=value) + if value.step_size is None: + value.step_size = learning_rate / 5.0 + return value \ No newline at end of file From 3bde09694c783027d05a24d3df3153a07e8d98f4 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 13:43:42 +0200 Subject: [PATCH 26/88] create nested_training_parameters.py --- .../nested_training_parameters.py | 25 +++++++++++++++++ GANDLF/Configuration/parameters.py | 2 +- .../Configuration/user_defined_parameters.py | 27 +------------------ 3 files changed, 27 insertions(+), 27 deletions(-) create mode 100644 GANDLF/Configuration/nested_training_parameters.py diff --git a/GANDLF/Configuration/nested_training_parameters.py b/GANDLF/Configuration/nested_training_parameters.py new file mode 100644 index 000000000..3c976e287 --- /dev/null +++ b/GANDLF/Configuration/nested_training_parameters.py @@ -0,0 +1,25 @@ +from pydantic import BaseModel, Field, model_validator +from typing_extensions import Self + + +class NestedTraining(BaseModel): # TODO: Maybe should be in another folder + stratified: bool = Field( + default=False, + description="this will perform stratified k-fold cross-validation but only with offline data splitting", + ) + testing: int = Field( + default=-5, + description="this controls the number of testing data folds for final model evaluation; [NOT recommended] to disable this, use '1'", + ) + validation: int = Field( + default=-5, + description="this controls the number of validation data folds to be used for model *selection* during training (not used for back-propagation)", + ) + proportional: bool = Field(default=None) + + @model_validator(mode="after") + def validate_nested_training(self) -> Self: + if self.proportional is not None: + self.stratified = self.proportional + return self + diff --git a/GANDLF/Configuration/parameters.py b/GANDLF/Configuration/parameters.py index 26e4e25cd..04a2d1d14 100644 --- a/GANDLF/Configuration/parameters.py +++ b/GANDLF/Configuration/parameters.py @@ -7,5 +7,5 @@ class ParametersConfiguration(BaseModel): model_config = ConfigDict(extra="allow") -class Parameters(ParametersConfiguration, DefaultParameters, UserDefinedParameters): +class Parameters(ParametersConfiguration,UserDefinedParameters): pass diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/user_defined_parameters.py index 5e34c0d36..c0087f683 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/user_defined_parameters.py @@ -2,6 +2,7 @@ from pydantic import BaseModel, model_validator, Field, AfterValidator, ConfigDict from GANDLF.Configuration.default_parameters import DefaultParameters +from GANDLF.Configuration.nested_training_parameters import NestedTraining from GANDLF.Configuration.scheduler_parameters import Scheduler from GANDLF.config_manager import version_check from importlib.metadata import version @@ -19,32 +20,6 @@ def validate_version(self) -> Self: if version_check(self.model_dump(), version_to_check=version("GANDLF")): return self - -class NestedTraining(BaseModel): # TODO: Maybe should be in another folder - stratified: bool = Field( - default=False, - description="this will perform stratified k-fold cross-validation but only with offline data splitting", - ) - testing: int = Field( - default=-5, - description="this controls the number of testing data folds for final model evaluation; [NOT recommended] to disable this, use '1'", - ) - validation: int = Field( - default=-5, - description="this controls the number of validation data folds to be used for model *selection* during training (not used for back-propagation)", - ) - proportional: bool = Field(default=None) - - @model_validator(mode="after") - def validate_nested_training(self) -> Self: - if self.proportional is not None: - self.stratified = self.proportional - return self - - - - - class UserDefinedParameters(DefaultParameters): version: Version = Field( default=Version(minimum=version("GANDLF"), maximum=version("GANDLF")), From ed5f7f51c6d1318e212cf51fe21bb2da3b332c31 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 13:44:13 +0200 Subject: [PATCH 27/88] update the test_configuration.py --- test_configuration/test_configuration.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test_configuration/test_configuration.py b/test_configuration/test_configuration.py index 8e0e75a63..2b3ae89a9 100644 --- a/test_configuration/test_configuration.py +++ b/test_configuration/test_configuration.py @@ -1,3 +1,5 @@ +import json + from GANDLF.config_manager import ConfigManager from pathlib import Path @@ -6,4 +8,4 @@ parameters = ConfigManager( testingDir + "/config_all_options.yaml", version_check_flag=False ) - print(parameters) + print(json.dumps(parameters,indent=4)) From 09c66944f63c79eee2b6142e5d1e9b3662fe97d2 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 13:49:39 +0200 Subject: [PATCH 28/88] fix scheduler step_size --- GANDLF/Configuration/validators.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GANDLF/Configuration/validators.py b/GANDLF/Configuration/validators.py index f5e7aacff..d7af309e2 100644 --- a/GANDLF/Configuration/validators.py +++ b/GANDLF/Configuration/validators.py @@ -163,9 +163,10 @@ def validate_parallel_compute_command(value): value = parallel_compute_command return value + def validate_schedular(value, learning_rate): if isinstance(value, str): value = Scheduler(type=value) - if value.step_size is None: + if value.step_size is None: value.step_size = learning_rate / 5.0 - return value \ No newline at end of file + return value From e0d6d0b1148c3e7681371d3703c716193ec3f45e Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 14:00:12 +0200 Subject: [PATCH 29/88] change the configuration structure --- GANDLF/Configuration/Parameters/__init__.py | 0 .../{ => Parameters}/default_parameters.py | 0 .../{ => Parameters}/model_parameters.py | 0 .../nested_training_parameters.py | 3 +-- .../Parameters/scheduler_parameters.py | 25 +++++++++++++++++++ .../user_defined_parameters.py | 15 ++++++----- GANDLF/Configuration/parameters.py | 5 ++-- GANDLF/Configuration/scheduler_parameters.py | 22 ---------------- GANDLF/Configuration/validators.py | 2 +- 9 files changed, 36 insertions(+), 36 deletions(-) create mode 100644 GANDLF/Configuration/Parameters/__init__.py rename GANDLF/Configuration/{ => Parameters}/default_parameters.py (100%) rename GANDLF/Configuration/{ => Parameters}/model_parameters.py (100%) rename GANDLF/Configuration/{ => Parameters}/nested_training_parameters.py (92%) create mode 100644 GANDLF/Configuration/Parameters/scheduler_parameters.py rename GANDLF/Configuration/{ => Parameters}/user_defined_parameters.py (84%) delete mode 100644 GANDLF/Configuration/scheduler_parameters.py diff --git a/GANDLF/Configuration/Parameters/__init__.py b/GANDLF/Configuration/Parameters/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/GANDLF/Configuration/default_parameters.py b/GANDLF/Configuration/Parameters/default_parameters.py similarity index 100% rename from GANDLF/Configuration/default_parameters.py rename to GANDLF/Configuration/Parameters/default_parameters.py diff --git a/GANDLF/Configuration/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py similarity index 100% rename from GANDLF/Configuration/model_parameters.py rename to GANDLF/Configuration/Parameters/model_parameters.py diff --git a/GANDLF/Configuration/nested_training_parameters.py b/GANDLF/Configuration/Parameters/nested_training_parameters.py similarity index 92% rename from GANDLF/Configuration/nested_training_parameters.py rename to GANDLF/Configuration/Parameters/nested_training_parameters.py index 3c976e287..fd02a3e0a 100644 --- a/GANDLF/Configuration/nested_training_parameters.py +++ b/GANDLF/Configuration/Parameters/nested_training_parameters.py @@ -2,7 +2,7 @@ from typing_extensions import Self -class NestedTraining(BaseModel): # TODO: Maybe should be in another folder +class NestedTraining(BaseModel): stratified: bool = Field( default=False, description="this will perform stratified k-fold cross-validation but only with offline data splitting", @@ -22,4 +22,3 @@ def validate_nested_training(self) -> Self: if self.proportional is not None: self.stratified = self.proportional return self - diff --git a/GANDLF/Configuration/Parameters/scheduler_parameters.py b/GANDLF/Configuration/Parameters/scheduler_parameters.py new file mode 100644 index 000000000..9a765c564 --- /dev/null +++ b/GANDLF/Configuration/Parameters/scheduler_parameters.py @@ -0,0 +1,25 @@ +from pydantic import BaseModel, ConfigDict, Field +from typing_extensions import Literal + + +TYPE_OPTIONS = Literal[ + "triangle", + "triangle_modified", + "exp", + "step", + "reduce-on-plateau", + "cosineannealing", + "triangular", + "triangular2", + "exp_range", +] + +# It allows extra parameters +class Scheduler(BaseModel): + model_config = ConfigDict(extra= "allow") + type: TYPE_OPTIONS = Field( + description="triangle/triangle_modified use LambdaLR but triangular/triangular2/exp_range uses CyclicLR" + ) + # min_lr: 0.00001, #TODO: this should be defined ?? + # max_lr: 1, #TODO: this should be defined ?? + step_size: float = Field(description="step_size", default=None) diff --git a/GANDLF/Configuration/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py similarity index 84% rename from GANDLF/Configuration/user_defined_parameters.py rename to GANDLF/Configuration/Parameters/user_defined_parameters.py index c0087f683..ffe84398f 100644 --- a/GANDLF/Configuration/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -1,14 +1,13 @@ from typing import Union -from pydantic import BaseModel, model_validator, Field, AfterValidator, ConfigDict +from pydantic import BaseModel, model_validator, Field, AfterValidator -from GANDLF.Configuration.default_parameters import DefaultParameters -from GANDLF.Configuration.nested_training_parameters import NestedTraining -from GANDLF.Configuration.scheduler_parameters import Scheduler +from GANDLF.Configuration.Parameters.default_parameters import DefaultParameters +from GANDLF.Configuration.Parameters.nested_training_parameters import NestedTraining from GANDLF.config_manager import version_check from importlib.metadata import version -from typing_extensions import Self, Literal, Annotated, Optional +from typing_extensions import Self, Literal, Annotated from GANDLF.Configuration.validators import * -from GANDLF.Configuration.model_parameters import Model +from GANDLF.Configuration.Parameters.model_parameters import Model class Version(BaseModel): # TODO: Maybe should be to another folder @@ -20,6 +19,7 @@ def validate_version(self) -> Self: if version_check(self.model_dump(), version_to_check=version("GANDLF")): return self + class UserDefinedParameters(DefaultParameters): version: Version = Field( default=Version(minimum=version("GANDLF"), maximum=version("GANDLF")), @@ -57,8 +57,7 @@ def validate(self) -> Self: self.parallel_compute_command = validate_parallel_compute_command( self.parallel_compute_command ) - #validate scheduler + # validate scheduler self.scheduler = validate_schedular(self.scheduler, self.learning_rate) - return self diff --git a/GANDLF/Configuration/parameters.py b/GANDLF/Configuration/parameters.py index 04a2d1d14..9a0d0d6df 100644 --- a/GANDLF/Configuration/parameters.py +++ b/GANDLF/Configuration/parameters.py @@ -1,11 +1,10 @@ from pydantic import BaseModel, ConfigDict -from GANDLF.Configuration.default_parameters import DefaultParameters -from GANDLF.Configuration.user_defined_parameters import UserDefinedParameters +from GANDLF.Configuration.Parameters.user_defined_parameters import UserDefinedParameters class ParametersConfiguration(BaseModel): model_config = ConfigDict(extra="allow") -class Parameters(ParametersConfiguration,UserDefinedParameters): +class Parameters(ParametersConfiguration, UserDefinedParameters): pass diff --git a/GANDLF/Configuration/scheduler_parameters.py b/GANDLF/Configuration/scheduler_parameters.py deleted file mode 100644 index abdee1bee..000000000 --- a/GANDLF/Configuration/scheduler_parameters.py +++ /dev/null @@ -1,22 +0,0 @@ -from pydantic import BaseModel, ConfigDict, Field -from typing_extensions import Literal - - -class Scheduler(BaseModel): - model_config = ConfigDict( - extra="allow" - ) - type: Literal[ - "triangle", - "triangle_modified", - "exp", - "step", - "reduce-on-plateau", - "cosineannealing", - "triangular", - "triangular2", - "exp_range", - ] = Field(description="triangle/triangle_modified use LambdaLR but triangular/triangular2/exp_range uses CyclicLR",) - # min_lr: 0.00001, #TODO: this should be defined ?? - # max_lr: 1, #TODO: this should be defined ?? - step_size: float = Field(description="step_size",default=None) diff --git a/GANDLF/Configuration/validators.py b/GANDLF/Configuration/validators.py index d7af309e2..60af99d3e 100644 --- a/GANDLF/Configuration/validators.py +++ b/GANDLF/Configuration/validators.py @@ -1,6 +1,6 @@ import traceback -from GANDLF.Configuration.scheduler_parameters import Scheduler +from GANDLF.Configuration.Parameters.scheduler_parameters import Scheduler from GANDLF.Configuration.utils import initialize_key from GANDLF.metrics import surface_distance_ids From 6ef1701984c2f3fb03fab5b02004064075b8bbd1 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 14:02:58 +0200 Subject: [PATCH 30/88] change the location of validators file --- GANDLF/Configuration/Parameters/model_parameters.py | 2 +- GANDLF/Configuration/Parameters/user_defined_parameters.py | 2 +- GANDLF/Configuration/{ => Parameters}/validators.py | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename GANDLF/Configuration/{ => Parameters}/validators.py (100%) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index cb685538e..c2d0513fd 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -2,7 +2,7 @@ from typing_extensions import Self, Literal, Optional from typing import Union -from GANDLF.Configuration.validators import validate_class_list, validate_norm_type +from GANDLF.Configuration.Parameters.validators import validate_class_list, validate_norm_type # Define model architecture options ARCHITECTURE_OPTIONS = Literal[ diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index ffe84398f..4c080fff1 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -6,7 +6,7 @@ from GANDLF.config_manager import version_check from importlib.metadata import version from typing_extensions import Self, Literal, Annotated -from GANDLF.Configuration.validators import * +from GANDLF.Configuration.Parameters.validators import * from GANDLF.Configuration.Parameters.model_parameters import Model diff --git a/GANDLF/Configuration/validators.py b/GANDLF/Configuration/Parameters/validators.py similarity index 100% rename from GANDLF/Configuration/validators.py rename to GANDLF/Configuration/Parameters/validators.py From 7e2462b7b1547b020e0ef62d43942c5d9b3119b3 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 14:03:51 +0200 Subject: [PATCH 31/88] update the test_configuration.py --- test_configuration/config_all_options.yaml | 40 +++++++++++++++------- test_configuration/test_configuration.py | 2 +- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/test_configuration/config_all_options.yaml b/test_configuration/config_all_options.yaml index d1841267a..51926412c 100644 --- a/test_configuration/config_all_options.yaml +++ b/test_configuration/config_all_options.yaml @@ -5,27 +5,43 @@ version: maximum: 0.1.3-dev # this should NOT be made a variable, but should be tested after every tag is created } weighted_loss: True -patch_size: [1] +patch_size: 2 modality: "histo" loss_function: "mse" model: { - dimension: 3, # the dimension of the model and dataset: defines dimensionality of computations - base_filters: 30, # Set base filters: number of filters present in the initial module of the U-Net convolution; for IncU-Net, keep this divisible by 4 - architecture: resunet, # options: unet, resunet, deep_resunet, deep_unet, light_resunet, light_unet, fcn, uinc, vgg, densenet - norm_type: batch, # options: batch, instance, or none (only for VGG); used for all networks + dimension: 2, # the dimension of the model and dataset: defines dimensionality of computations + # Set base filters: number of filters present in the initial module of the U-Net convolution; for IncU-Net, keep this divisible by 4 + architecture: "vgg", # options: unet, resunet, deep_resunet, deep_unet, light_resunet, light_unet, fcn, uinc, vgg, densenet + norm_type: instance, # options: batch, instance, or none (only for VGG); used for all networks final_layer: softmax, # can be either sigmoid, softmax or none (none == regression/logits) # sigmoid_input_multiplier: 1.0, # this is used during sigmoid, and defaults to 1.0 - class_list: [0,1,2,4], # Set the list of labels the model should train on and predict - # class_list: '[*range(0,100,1)]' # a range of values from 0 to 99 with a step of 1 will be created; customize as needed, but ensure this is defined as a string as it will be passed through 'eval' function - # class_list: '[0,1||2||3,1||4,4]', # combinatorial training - this will construct one-hot encoded mask using logical operands between specified annotations. Note that double '|' or '&' should be passed and not single to avoid python parsing +# class_list: [0,1,2,4], # Set the list of labels the model should train on and predict + class_list: [0,1,2], # a range of values from 0 to 99 with a step of 1 will be created; customize as needed, but ensure this is defined as a string as it will be passed through 'eval' function +# class_list: '[0,1||2||3,1||4,4]', # combinatorial training - this will construct one-hot encoded mask using logical operands between specified annotations. Note that double '|' or '&' should be passed and not single to avoid python parsing ignore_label_validation: 0, # this is the location of the class_list whose performance is ignored during validation metric calculation + channels : 3, + save_at_every_epoch: True, + asdasd: asdad, + batch_norm: "ben" } +nested_training: + { + stratified: False, # this will perform stratified k-fold cross-validation but only with offline data splitting, see https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html + testing: 5, # this controls the number of testing data folds for final model evaluation; [NOT recommended] to disable this, use '1' + validation: 5, # this controls the number of validation data folds to be used for model *selection* during training (not used for back-propagation) + } +parallel_compute_command: 'qsub - b y -l gpu -l h_vmem=32G -cwd -o ${outputDir}/\$JOB_ID.stdout -e ${outputDir}/\$JOB_ID.stderr `pwd`/sge_wrapper _correct_location_of_virtual_environment_/venv/bin/python' metrics: - "dice" - "classification" - "accuracy" - "iou" +scheduler: { + type: triangle, + min_lr: 0.00001, + max_lr: 1, +} ## Choose the model parameters here #model: # { @@ -82,10 +98,10 @@ metrics: # # - recall # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/recall.html # # - iou # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/jaccard_index.html ### this customizes the inference, primarily used for segmentation outputs -#inference_mechanism: { -# grid_aggregator_overlap: crop, # this option provides the option to strategize the grid aggregation output; should be either 'crop' or 'average' - https://torchio.readthedocs.io/patches/patch_inference.html#grid-aggregator -# patch_overlap: 0, # amount of overlap of patches during inference, defaults to 0; see https://torchio.readthedocs.io/patches/patch_inference.html#gridsampler -#} +inference_mechanism: { + grid_aggregator_overlap: crop, # this option provides the option to strategize the grid aggregation output; should be either 'crop' or 'average' - https://torchio.readthedocs.io/patches/patch_inference.html#grid-aggregator + patch_overlap: 0, # amount of overlap of patches during inference, defaults to 0; see https://torchio.readthedocs.io/patches/patch_inference.html#gridsampler +} ## this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements ## in I/O at the expense of memory consumption #in_memory: False diff --git a/test_configuration/test_configuration.py b/test_configuration/test_configuration.py index 2b3ae89a9..00f3072e9 100644 --- a/test_configuration/test_configuration.py +++ b/test_configuration/test_configuration.py @@ -8,4 +8,4 @@ parameters = ConfigManager( testingDir + "/config_all_options.yaml", version_check_flag=False ) - print(json.dumps(parameters,indent=4)) + print(json.dumps(parameters, indent=4)) From aedc67e3416c4cf80965294f57e70bbfc302b506 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sat, 8 Feb 2025 15:43:27 +0200 Subject: [PATCH 32/88] update the configuration with patch_sampler.py --- .../Parameters/model_parameters.py | 5 +- .../Parameters/optimizer_parameters.py | 19 ++++ .../Configuration/Parameters/patch_sampler.py | 8 ++ .../Parameters/scheduler_parameters.py | 3 +- .../Parameters/user_defined_parameters.py | 17 +++- GANDLF/Configuration/Parameters/validators.py | 99 ++++++++++++++++++- GANDLF/Configuration/parameters.py | 4 +- 7 files changed, 148 insertions(+), 7 deletions(-) create mode 100644 GANDLF/Configuration/Parameters/optimizer_parameters.py create mode 100644 GANDLF/Configuration/Parameters/patch_sampler.py diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index c2d0513fd..0eed09765 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -2,7 +2,10 @@ from typing_extensions import Self, Literal, Optional from typing import Union -from GANDLF.Configuration.Parameters.validators import validate_class_list, validate_norm_type +from GANDLF.Configuration.Parameters.validators import ( + validate_class_list, + validate_norm_type, +) # Define model architecture options ARCHITECTURE_OPTIONS = Literal[ diff --git a/GANDLF/Configuration/Parameters/optimizer_parameters.py b/GANDLF/Configuration/Parameters/optimizer_parameters.py new file mode 100644 index 000000000..435bb062a --- /dev/null +++ b/GANDLF/Configuration/Parameters/optimizer_parameters.py @@ -0,0 +1,19 @@ +from pydantic import BaseModel, Field +from typing_extensions import Literal + +OPTIMIZER_OPTIONS = Literal[ + "sgd", + "asgd", + "adam", + "adamw", + "adamax", + "sparseadam", + "rprop", + "adadelta", + "adagrad", + "rmsprop", +] + + +class Optimizer(BaseModel): + type: OPTIMIZER_OPTIONS = Field(description="Type of optimizer to use") diff --git a/GANDLF/Configuration/Parameters/patch_sampler.py b/GANDLF/Configuration/Parameters/patch_sampler.py new file mode 100644 index 000000000..60d6c8a4b --- /dev/null +++ b/GANDLF/Configuration/Parameters/patch_sampler.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel, Field + + +class PatchSampler(BaseModel): + type: str = Field(default="uniform") + enable_padding: bool = Field(default=False) + padding_mode: str = Field(default= "symmetric") + biased_sampling: bool = Field(default=False) \ No newline at end of file diff --git a/GANDLF/Configuration/Parameters/scheduler_parameters.py b/GANDLF/Configuration/Parameters/scheduler_parameters.py index 9a765c564..0c7a80bec 100644 --- a/GANDLF/Configuration/Parameters/scheduler_parameters.py +++ b/GANDLF/Configuration/Parameters/scheduler_parameters.py @@ -14,9 +14,10 @@ "exp_range", ] + # It allows extra parameters class Scheduler(BaseModel): - model_config = ConfigDict(extra= "allow") + model_config = ConfigDict(extra="allow") type: TYPE_OPTIONS = Field( description="triangle/triangle_modified use LambdaLR but triangular/triangular2/exp_range uses CyclicLR" ) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 4c080fff1..90d81336e 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -1,8 +1,8 @@ from typing import Union from pydantic import BaseModel, model_validator, Field, AfterValidator - from GANDLF.Configuration.Parameters.default_parameters import DefaultParameters from GANDLF.Configuration.Parameters.nested_training_parameters import NestedTraining +from GANDLF.Configuration.Parameters.patch_sampler import PatchSampler from GANDLF.config_manager import version_check from importlib.metadata import version from typing_extensions import Self, Literal, Annotated @@ -44,12 +44,19 @@ class UserDefinedParameters(DefaultParameters): parallel_compute_command: str = Field( default="", description="Parallel compute command." ) - scheduler: Union[str, Scheduler] = Field(description="Scheduler.") + scheduler: Union[str, Scheduler] = Field(description="Scheduler.", default=Scheduler(type="triangle_modified")) + optimizer: Union[str, Optimizer] = Field(description="Optimizer.",default=Optimizer(type="adam")) + patch_sampler: Union[str, PatchSampler] = Field(description="Patch sampler.", default=PatchSampler()) + + + + #TODO: It should be defined with a better way (using a BaseMedel class) + data_preprocessing: Annotated[Union[dict], Field(description="Data preprocessing."), AfterValidator(validate_data_preprocessing)] = {} # Validators @model_validator(mode="after") def validate(self) -> Self: - # valiadate the patch_size + # validate the patch_size self.patch_size, self.model.dimension = validate_patch_size( self.patch_size, self.model.dimension ) @@ -59,5 +66,9 @@ def validate(self) -> Self: ) # validate scheduler self.scheduler = validate_schedular(self.scheduler, self.learning_rate) + # validate optimizer + self.optimizer = validate_optimizer(self.optimizer) + #validate patch_sampler + self.patch_sampler = validate_patch_sampler(self.patch_sampler) return self diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index 60af99d3e..39fd8ca17 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -1,5 +1,11 @@ import traceback +from copy import deepcopy +import numpy as np +import sys + +from GANDLF.Configuration.Parameters.optimizer_parameters import Optimizer +from GANDLF.Configuration.Parameters.patch_sampler import PatchSampler from GANDLF.Configuration.Parameters.scheduler_parameters import Scheduler from GANDLF.Configuration.utils import initialize_key from GANDLF.metrics import surface_distance_ids @@ -168,5 +174,96 @@ def validate_schedular(value, learning_rate): if isinstance(value, str): value = Scheduler(type=value) if value.step_size is None: - value.step_size = learning_rate / 5.0 + value.step_size = learning_rate / 5.0 + return value + + +def validate_optimizer(value): + if isinstance(value, str): + value = Optimizer(type=value) + return value + + +def validate_data_preprocessing(value) -> dict: + if not (value is None): + # perform this only when pre-processing is defined + if len(value) > 0: + thresholdOrClip = False + # this can be extended, as required + thresholdOrClipDict = ["threshold", "clip", "clamp"] + + resize_requested = False + temp_dict = deepcopy(value) + for key in value: + if key in ["resize", "resize_image", "resize_images", "resize_patch"]: + resize_requested = True + + if key in ["resample_min", "resample_minimum"]: + if "resolution" in value[key]: + resize_requested = True + resolution_temp = np.array( + value[key]["resolution"] + ) + if resolution_temp.size == 1: + temp_dict[key]["resolution"] = np.array( + [resolution_temp, resolution_temp] + ).tolist() + else: + temp_dict.pop(key) + + value= temp_dict + + if resize_requested and "resample" in value: + for key in ["resize", "resize_image", "resize_images", "resize_patch"]: + if key in value: + value.pop(key) + + print( + "WARNING: Different 'resize' operations are ignored as 'resample' is defined under 'data_processing'", + file=sys.stderr, + ) + + # iterate through all keys + for key in value: # iterate through all keys + if key in thresholdOrClipDict: + # we only allow one of threshold or clip to occur and not both + assert not ( + thresholdOrClip + ), "Use only `threshold` or `clip`, not both" + thresholdOrClip = True + # initialize if nothing is present + if not (isinstance(value[key], dict)): + value[key] = {} + + # if one of the required parameters is not present, initialize with lowest/highest possible values + # this ensures the absence of a field doesn't affect processing + # for threshold or clip, ensure min and max are defined + if not "min" in value[key]: + value[key]["min"] = sys.float_info.min + if not "max" in value[key]: + value[key]["max"] = sys.float_info.max + + if key == "histogram_matching": + if value[key] is not False: + if not (isinstance(value[key], dict)): + value[key] = {} + + if key == "histogram_equalization": + if value[key] is not False: + # if histogram equalization is enabled, call histogram_matching + value["histogram_matching"] = {} + + if key == "adaptive_histogram_equalization": + if value[key] is not False: + # if histogram equalization is enabled, call histogram_matching + value["histogram_matching"] = { + "target": "adaptive" + } + return value + + +def validate_patch_sampler(value): + if isinstance(value, str): + value = PatchSampler(type=value.lower()) return value + diff --git a/GANDLF/Configuration/parameters.py b/GANDLF/Configuration/parameters.py index 9a0d0d6df..985e9f200 100644 --- a/GANDLF/Configuration/parameters.py +++ b/GANDLF/Configuration/parameters.py @@ -1,5 +1,7 @@ from pydantic import BaseModel, ConfigDict -from GANDLF.Configuration.Parameters.user_defined_parameters import UserDefinedParameters +from GANDLF.Configuration.Parameters.user_defined_parameters import ( + UserDefinedParameters, +) class ParametersConfiguration(BaseModel): From 79fc32db0a5750e0c3764d6c015a1bdd97accf45 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 12:01:58 +0200 Subject: [PATCH 33/88] update the configuration --- .../Parameters/user_defined_parameters.py | 16 +- GANDLF/Configuration/Parameters/validators.py | 177 +++++++ GANDLF/config_manager.py | 452 ++++++------------ 3 files changed, 344 insertions(+), 301 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 90d81336e..73c3bec0e 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -19,6 +19,9 @@ def validate_version(self) -> Self: if version_check(self.model_dump(), version_to_check=version("GANDLF")): return self +class InferenceMechanism(BaseModel): + grid_aggregator_overlap: Literal["crop", "average"] = Field(default="crop") + patch_overlap:int = Field(default=0) class UserDefinedParameters(DefaultParameters): version: Version = Field( @@ -45,14 +48,19 @@ class UserDefinedParameters(DefaultParameters): default="", description="Parallel compute command." ) scheduler: Union[str, Scheduler] = Field(description="Scheduler.", default=Scheduler(type="triangle_modified")) - optimizer: Union[str, Optimizer] = Field(description="Optimizer.",default=Optimizer(type="adam")) + optimizer: Union[str, Optimizer] = Field(description="Optimizer.",default=Optimizer(type="adam"), alias="opt") #TODO: Check it again patch_sampler: Union[str, PatchSampler] = Field(description="Patch sampler.", default=PatchSampler()) + inference_mechanism: InferenceMechanism = Field(description="Inference mechanism.",default=InferenceMechanism()) - #TODO: It should be defined with a better way (using a BaseMedel class) - data_preprocessing: Annotated[Union[dict], Field(description="Data preprocessing."), AfterValidator(validate_data_preprocessing)] = {} + #TODO: It should be defined with a better way (using a BaseModel class) + data_preprocessing: Annotated[dict, Field(description="Data preprocessing."), AfterValidator(validate_data_preprocessing)] = {} + #TODO: It should be defined with a better way (using a BaseModel class) + data_postprocessing: Annotated[dict, Field(description="Data augmentation."), AfterValidator(validate_data_postprocessing)]={} + #TODO: It should be defined with a better way (using a BaseModel class) + data_augmentation: Annotated[dict, Field(description="Data augmentation.")] = {} # Validators @model_validator(mode="after") def validate(self) -> Self: @@ -70,5 +78,7 @@ def validate(self) -> Self: self.optimizer = validate_optimizer(self.optimizer) #validate patch_sampler self.patch_sampler = validate_patch_sampler(self.patch_sampler) + # validate_data_augmentation + self.data_preprocessing = validate_data_augmentation(self.data_preprocessing,self.patch_size) return self diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index 39fd8ca17..5f9f54274 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -1,5 +1,6 @@ import traceback from copy import deepcopy +from GANDLF.data.post_process import postprocessing_after_reverse_one_hot_encoding import numpy as np import sys @@ -261,9 +262,185 @@ def validate_data_preprocessing(value) -> dict: } return value +def validate_data_postprocessing(value) -> dict: + value = initialize_key( + value, "data_postprocessing_after_reverse_one_hot_encoding", {} + ) + temp_dict = deepcopy(value) + for key in temp_dict: + if key in postprocessing_after_reverse_one_hot_encoding: + value["data_postprocessing_after_reverse_one_hot_encoding"][key] = value[key] + value.pop(key) + return value def validate_patch_sampler(value): if isinstance(value, str): value = PatchSampler(type=value.lower()) return value +def validate_data_augmentation(value,patch_size)-> dict: + value["default_probability"] = value.get("default_probability", 0.5) + if not (value is None): + if len(value) > 0: # only when augmentations are defined + # special case for random swapping and elastic transformations - which takes a patch size for computation + for key in ["swap", "elastic"]: + if key in value: + value[key] = initialize_key( + value[key], + "patch_size", + np.round(np.array(patch_size) / 10) + .astype("int") + .tolist(), + ) + + # special case for swap default initialization + if "swap" in value: + value["swap"] = initialize_key( + value["swap"], "num_iterations", 100 + ) + + # special case for affine default initialization + if "affine" in value: + value["affine"] = initialize_key( + value["affine"], "scales", 0.1 + ) + value["affine"] = initialize_key( + value["affine"], "degrees", 15 + ) + value["affine"] = initialize_key( + value["affine"], "translation", 2 + ) + + if "motion" in value: + value["motion"] = initialize_key( + value["motion"], "num_transforms", 2 + ) + value["motion"] = initialize_key( + value["motion"], "degrees", 15 + ) + value["motion"] = initialize_key( + value["motion"], "translation", 2 + ) + value["motion"] = initialize_key( + value["motion"], "interpolation", "linear" + ) + + # special case for random blur/noise - which takes a std-dev range + for std_aug in ["blur", "noise_var"]: + if std_aug in value: + value[std_aug] = initialize_key( + value[std_aug], "std", None + ) + for std_aug in ["noise"]: + if std_aug in value: + value[std_aug] = initialize_key( + value[std_aug], "std", [0, 1] + ) + + # special case for random noise - which takes a mean range + for mean_aug in ["noise", "noise_var"]: + if mean_aug in value: + value[mean_aug] = initialize_key( + value[mean_aug], "mean", 0 + ) + + # special case for augmentations that need axis defined + for axis_aug in ["flip", "anisotropic", "rotate_90", "rotate_180"]: + if axis_aug in value: + value[axis_aug] = initialize_key( + value[axis_aug], "axis", [0, 1, 2] + ) + + # special case for colorjitter + if "colorjitter" in value: + value = initialize_key( + value, "colorjitter", {} + ) + for key in ["brightness", "contrast", "saturation"]: + value["colorjitter"] = initialize_key( + value["colorjitter"], key, [0, 1] + ) + value["colorjitter"] = initialize_key( + value["colorjitter"], "hue", [-0.5, 0.5] + ) + + # Added HED augmentation in gandlf + hed_augmentation_types = [ + "hed_transform", + # "hed_transform_light", + # "hed_transform_heavy", + ] + for augmentation_type in hed_augmentation_types: + if augmentation_type in value: + value = initialize_key( + value, "hed_transform", {} + ) + ranges = [ + "haematoxylin_bias_range", + "eosin_bias_range", + "dab_bias_range", + "haematoxylin_sigma_range", + "eosin_sigma_range", + "dab_sigma_range", + ] + + default_range = ( + [-0.1, 0.1] + if augmentation_type == "hed_transform" + else ( + [-0.03, 0.03] + if augmentation_type == "hed_transform_light" + else [-0.95, 0.95] + ) + ) + + for key in ranges: + value["hed_transform"] = initialize_key( + value["hed_transform"], + key, + default_range, + ) + + value["hed_transform"] = initialize_key( + value["hed_transform"], + "cutoff_range", + [0, 1], + ) + + # special case for anisotropic + if "anisotropic" in value: + if not ("downsampling" in value["anisotropic"]): + default_downsampling = 1.5 + else: + default_downsampling = value["anisotropic"][ + "downsampling" + ] + + initialize_downsampling = False + if isinstance(default_downsampling, list): + if len(default_downsampling) != 2: + initialize_downsampling = True + print( + "WARNING: 'anisotropic' augmentation needs to be either a single number of a list of 2 numbers: https://torchio.readthedocs.io/transforms/augmentation.html?highlight=randomswap#torchio.transforms.RandomAnisotropy.", + file=sys.stderr, + ) + default_downsampling = default_downsampling[0] # only + else: + initialize_downsampling = True + + if initialize_downsampling: + if default_downsampling < 1: + print( + "WARNING: 'anisotropic' augmentation needs the 'downsampling' parameter to be greater than 1, defaulting to 1.5.", + file=sys.stderr, + ) + # default + value["anisotropic"]["downsampling"] = 1.5 + + for key in value: + if key != "default_probability": + value[key] = initialize_key( + value[key], + "probability", + value["default_probability"], + ) \ No newline at end of file diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index 461c3f1c0..ee7ec7e1e 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -131,13 +131,11 @@ def _parseConfig( if not isinstance(config_file_path, dict): params = yaml.safe_load(open(config_file_path, "r")) - # - # if "resize" in params: - # print( - # "WARNING: 'resize' should be defined under 'data_processing', this will be skipped", - # file=sys.stderr, - # ) - # + if "resize" in params: + print( + "WARNING: 'resize' should be defined under 'data_processing', this will be skipped", + file=sys.stderr, + ) # this is NOT a required parameter - a user should be able to train with NO augmentations params = initialize_key(params, "data_augmentation", {}) @@ -311,296 +309,154 @@ def _parseConfig( params["data_augmentation"]["default_probability"], ) - # # this is NOT a required parameter - a user should be able to train with NO built-in pre-processing - # params = initialize_key(params, "data_preprocessing", {}) - # if not (params["data_preprocessing"] is None): - # # perform this only when pre-processing is defined - # if len(params["data_preprocessing"]) > 0: - # thresholdOrClip = False - # # this can be extended, as required - # thresholdOrClipDict = ["threshold", "clip", "clamp"] - # - # resize_requested = False - # temp_dict = deepcopy(params["data_preprocessing"]) - # for key in params["data_preprocessing"]: - # if key in ["resize", "resize_image", "resize_images", "resize_patch"]: - # resize_requested = True - # - # if key in ["resample_min", "resample_minimum"]: - # if "resolution" in params["data_preprocessing"][key]: - # resize_requested = True - # resolution_temp = np.array( - # params["data_preprocessing"][key]["resolution"] - # ) - # if resolution_temp.size == 1: - # temp_dict[key]["resolution"] = np.array( - # [resolution_temp, resolution_temp] - # ).tolist() - # else: - # temp_dict.pop(key) - # - # params["data_preprocessing"] = temp_dict - # - # if resize_requested and "resample" in params["data_preprocessing"]: - # for key in ["resize", "resize_image", "resize_images", "resize_patch"]: - # if key in params["data_preprocessing"]: - # params["data_preprocessing"].pop(key) - # - # print( - # "WARNING: Different 'resize' operations are ignored as 'resample' is defined under 'data_processing'", - # file=sys.stderr, - # ) - # - # # iterate through all keys - # for key in params["data_preprocessing"]: # iterate through all keys - # if key in thresholdOrClipDict: - # # we only allow one of threshold or clip to occur and not both - # assert not ( - # thresholdOrClip - # ), "Use only `threshold` or `clip`, not both" - # thresholdOrClip = True - # # initialize if nothing is present - # if not (isinstance(params["data_preprocessing"][key], dict)): - # params["data_preprocessing"][key] = {} - # - # # if one of the required parameters is not present, initialize with lowest/highest possible values - # # this ensures the absence of a field doesn't affect processing - # # for threshold or clip, ensure min and max are defined - # if not "min" in params["data_preprocessing"][key]: - # params["data_preprocessing"][key]["min"] = sys.float_info.min - # if not "max" in params["data_preprocessing"][key]: - # params["data_preprocessing"][key]["max"] = sys.float_info.max - # - # if key == "histogram_matching": - # if params["data_preprocessing"][key] is not False: - # if not (isinstance(params["data_preprocessing"][key], dict)): - # params["data_preprocessing"][key] = {} - # - # if key == "histogram_equalization": - # if params["data_preprocessing"][key] is not False: - # # if histogram equalization is enabled, call histogram_matching - # params["data_preprocessing"]["histogram_matching"] = {} - # - # if key == "adaptive_histogram_equalization": - # if params["data_preprocessing"][key] is not False: - # # if histogram equalization is enabled, call histogram_matching - # params["data_preprocessing"]["histogram_matching"] = { - # "target": "adaptive" - # } - # - # # this is NOT a required parameter - a user should be able to train with NO built-in post-processing - # params = initialize_key(params, "data_postprocessing", {}) - # params = initialize_key( - # params, "data_postprocessing_after_reverse_one_hot_encoding", {} - # ) - # temp_dict = deepcopy(params["data_postprocessing"]) - # for key in temp_dict: - # if key in postprocessing_after_reverse_one_hot_encoding: - # params["data_postprocessing_after_reverse_one_hot_encoding"][key] = params[ - # "data_postprocessing" - # ][key] - # params["data_postprocessing"].pop(key) - # if "model" in params: - assert isinstance( - params["model"], dict - ), "The 'model' parameter needs to be populated as a dictionary" - assert ( - len(params["model"]) > 0 - ), "The 'model' parameter needs to be populated as a dictionary and should have all properties present" - assert ( - "architecture" in params["model"] - ), "The 'model' parameter needs 'architecture' to be defined" - assert ( - "final_layer" in params["model"] - ), "The 'model' parameter needs 'final_layer' to be defined" - assert ( - "dimension" in params["model"] - ), "The 'model' parameter needs 'dimension' to be defined" - - if "amp" in params["model"]: - pass - else: - print("NOT using Mixed Precision Training") - params["model"]["amp"] = False - - if "norm_type" in params["model"]: - if ( - params["model"]["norm_type"] == None - or params["model"]["norm_type"].lower() == "none" - ): - if not ("vgg" in params["model"]["architecture"]): - raise ValueError( - "Normalization type cannot be 'None' for non-VGG architectures" - ) - else: - print("WARNING: Initializing 'norm_type' as 'batch'", flush=True) - params["model"]["norm_type"] = "batch" - - if not ("base_filters" in params["model"]): - base_filters = 32 - params["model"]["base_filters"] = base_filters - print("Using default 'base_filters' in 'model': ", base_filters) - if not ("class_list" in params["model"]): - params["model"]["class_list"] = [] # ensure that this is initialized - if not ("ignore_label_validation" in params["model"]): - params["model"]["ignore_label_validation"] = None - if "batch_norm" in params["model"]: - print( - "WARNING: 'batch_norm' is no longer supported, please use 'norm_type' in 'model' instead", - flush=True, - ) - params["model"]["print_summary"] = params["model"].get("print_summary", True) - - channel_keys_to_check = ["n_channels", "channels", "model_channels"] - for key in channel_keys_to_check: - if key in params["model"]: - params["model"]["num_channels"] = params["model"][key] - break - - # initialize model type for processing: if not defined, default to torch - if not ("type" in params["model"]): - params["model"]["type"] = "torch" - - # initialize openvino model data type for processing: if not defined, default to FP32 - if not ("data_type" in params["model"]): - params["model"]["data_type"] = "FP32" - - # set default save strategy for model - if not ("save_at_every_epoch" in params["model"]): - params["model"]["save_at_every_epoch"] = False - - if params["model"]["save_at_every_epoch"]: - print( - "WARNING: 'save_at_every_epoch' will result in TREMENDOUS storage usage; use at your own risk." - ) - - - # if isinstance(params["model"]["class_list"], str): - # if ("||" in params["model"]["class_list"]) or ( - # "&&" in params["model"]["class_list"] - # ): - # # special case for multi-class computation - this needs to be handled during one-hot encoding mask construction - # print( - # "WARNING: This is a special case for multi-class computation, where different labels are processed together, `reverse_one_hot` will need mapping information to work correctly" - # ) - # temp_classList = params["model"]["class_list"] - # # we don't need the brackets - # temp_classList = temp_classList.replace("[", "") - # temp_classList = temp_classList.replace("]", "") - # params["model"]["class_list"] = temp_classList.split(",") - # else: - # try: - # params["model"]["class_list"] = eval(params["model"]["class_list"]) - # except Exception as e: - # ## todo: ensure logging captures assertion errors - # assert ( - # False - # ), f"Could not evaluate the `class_list` in `model`, Exception: {str(e)}, {traceback.format_exc()}" - # # logging.error( - # # f"Could not evaluate the `class_list` in `model`, Exception: {str(e)}, {traceback.format_exc()}" - # # ) - # - # assert ( - # "nested_training" in params - # ), "The parameter 'nested_training' needs to be defined" - # # initialize defaults for nested training - # params["nested_training"]["stratified"] = params["nested_training"].get( - # "stratified", False - # ) - # params["nested_training"]["stratified"] = params["nested_training"].get( - # "proportional", params["nested_training"]["stratified"] - # ) - # params["nested_training"]["testing"] = params["nested_training"].get("testing", -5) - # params["nested_training"]["validation"] = params["nested_training"].get( - # "validation", -5 - # ) - # - # parallel_compute_command = "" - # if "parallel_compute_command" in params: - # parallel_compute_command = params["parallel_compute_command"] - # parallel_compute_command = parallel_compute_command.replace("'", "") - # parallel_compute_command = parallel_compute_command.replace('"', "") - # params["parallel_compute_command"] = parallel_compute_command - # - # if "opt" in params: - # print("DeprecationWarning: 'opt' has been superseded by 'optimizer'") - # params["optimizer"] = params["opt"] - # - # # initialize defaults for patch sampler - # temp_patch_sampler_dict = { - # "type": "uniform", - # "enable_padding": False, - # "padding_mode": "symmetric", - # "biased_sampling": False, - # } - # # check if patch_sampler is defined in the config - # if "patch_sampler" in params: - # # if "patch_sampler" is a string, then it is the type of sampler - # if isinstance(params["patch_sampler"], str): - # print( - # "WARNING: Defining 'patch_sampler' as a string will be deprecated in a future release, please use a dictionary instead" - # ) - # temp_patch_sampler_dict["type"] = params["patch_sampler"].lower() - # elif isinstance(params["patch_sampler"], dict): - # # dict requires special handling - # for key in params["patch_sampler"]: - # temp_patch_sampler_dict[key] = params["patch_sampler"][key] - # - # # now assign the dict back to the params - # params["patch_sampler"] = temp_patch_sampler_dict - # del temp_patch_sampler_dict - # - # # define defaults - # for current_parameter in parameter_defaults: - # params = initialize_parameter( - # params, current_parameter, parameter_defaults[current_parameter], True - # ) - # - # for current_parameter in parameter_defaults_string: - # params = initialize_parameter( - # params, - # current_parameter, - # parameter_defaults_string[current_parameter], - # False, - # ) - # - # # ensure that the scheduler and optimizer are dicts - # if isinstance(params["scheduler"], str): - # temp_dict = {} - # temp_dict["type"] = params["scheduler"] - # params["scheduler"] = temp_dict - # - # if not ("step_size" in params["scheduler"]): - # params["scheduler"]["step_size"] = params["learning_rate"] / 5.0 - # print( - # "WARNING: Setting default step_size to:", params["scheduler"]["step_size"] - # ) - # - # # initialize default optimizer - # params["optimizer"] = params.get("optimizer", {}) - # if isinstance(params["optimizer"], str): - # temp_dict = {} - # temp_dict["type"] = params["optimizer"] - # params["optimizer"] = temp_dict - # - # # initialize defaults for DP - # if params.get("differential_privacy"): - # params = parse_opacus_params(params, initialize_key) - # - # # initialize defaults for inference mechanism - # inference_mechanism = {"grid_aggregator_overlap": "crop", "patch_overlap": 0} - # initialize_inference_mechanism = False - # if not ("inference_mechanism" in params): - # initialize_inference_mechanism = True - # elif not (isinstance(params["inference_mechanism"], dict)): - # initialize_inference_mechanism = True - # else: - # for key in inference_mechanism: - # if not (key in params["inference_mechanism"]): - # params["inference_mechanism"][key] = inference_mechanism[key] - # - # if initialize_inference_mechanism: - # params["inference_mechanism"] = inference_mechanism + # this is NOT a required parameter - a user should be able to train with NO built-in pre-processing + params = initialize_key(params, "data_preprocessing", {}) + if not (params["data_preprocessing"] is None): + # perform this only when pre-processing is defined + if len(params["data_preprocessing"]) > 0: + thresholdOrClip = False + # this can be extended, as required + thresholdOrClipDict = ["threshold", "clip", "clamp"] + + resize_requested = False + temp_dict = deepcopy(params["data_preprocessing"]) + for key in params["data_preprocessing"]: + if key in ["resize", "resize_image", "resize_images", "resize_patch"]: + resize_requested = True + + if key in ["resample_min", "resample_minimum"]: + if "resolution" in params["data_preprocessing"][key]: + resize_requested = True + resolution_temp = np.array( + params["data_preprocessing"][key]["resolution"] + ) + if resolution_temp.size == 1: + temp_dict[key]["resolution"] = np.array( + [resolution_temp, resolution_temp] + ).tolist() + else: + temp_dict.pop(key) + + params["data_preprocessing"] = temp_dict + + if resize_requested and "resample" in params["data_preprocessing"]: + for key in ["resize", "resize_image", "resize_images", "resize_patch"]: + if key in params["data_preprocessing"]: + params["data_preprocessing"].pop(key) + + print( + "WARNING: Different 'resize' operations are ignored as 'resample' is defined under 'data_processing'", + file=sys.stderr, + ) + + # iterate through all keys + for key in params["data_preprocessing"]: # iterate through all keys + if key in thresholdOrClipDict: + # we only allow one of threshold or clip to occur and not both + assert not ( + thresholdOrClip + ), "Use only `threshold` or `clip`, not both" + thresholdOrClip = True + # initialize if nothing is present + if not (isinstance(params["data_preprocessing"][key], dict)): + params["data_preprocessing"][key] = {} + + # if one of the required parameters is not present, initialize with lowest/highest possible values + # this ensures the absence of a field doesn't affect processing + # for threshold or clip, ensure min and max are defined + if not "min" in params["data_preprocessing"][key]: + params["data_preprocessing"][key]["min"] = sys.float_info.min + if not "max" in params["data_preprocessing"][key]: + params["data_preprocessing"][key]["max"] = sys.float_info.max + + if key == "histogram_matching": + if params["data_preprocessing"][key] is not False: + if not (isinstance(params["data_preprocessing"][key], dict)): + params["data_preprocessing"][key] = {} + + if key == "histogram_equalization": + if params["data_preprocessing"][key] is not False: + # if histogram equalization is enabled, call histogram_matching + params["data_preprocessing"]["histogram_matching"] = {} + + if key == "adaptive_histogram_equalization": + if params["data_preprocessing"][key] is not False: + # if histogram equalization is enabled, call histogram_matching + params["data_preprocessing"]["histogram_matching"] = { + "target": "adaptive" + } + + # this is NOT a required parameter - a user should be able to train with NO built-in post-processing + params = initialize_key(params, "data_postprocessing", {}) + params = initialize_key( + params, "data_postprocessing_after_reverse_one_hot_encoding", {} + ) + temp_dict = deepcopy(params["data_postprocessing"]) + for key in temp_dict: + if key in postprocessing_after_reverse_one_hot_encoding: + params["data_postprocessing_after_reverse_one_hot_encoding"][key] = params[ + "data_postprocessing" + ][key] + params["data_postprocessing"].pop(key) + + if "opt" in params: + print("DeprecationWarning: 'opt' has been superseded by 'optimizer'") + params["optimizer"] = params["opt"] + + + # define defaults + for current_parameter in parameter_defaults: + params = initialize_parameter( + params, current_parameter, parameter_defaults[current_parameter], True + ) + + for current_parameter in parameter_defaults_string: + params = initialize_parameter( + params, + current_parameter, + parameter_defaults_string[current_parameter], + False, + ) + + + # initialize defaults for DP + if params.get("differential_privacy"): + params = parse_opacus_params(params, initialize_key) + + # initialize defaults for inference mechanism + inference_mechanism = {"grid_aggregator_overlap": "crop", "patch_overlap": 0} + initialize_inference_mechanism = False + if not ("inference_mechanism" in params): + initialize_inference_mechanism = True + elif not (isinstance(params["inference_mechanism"], dict)): + initialize_inference_mechanism = True + else: + for key in inference_mechanism: + if not (key in params["inference_mechanism"]): + params["inference_mechanism"][key] = inference_mechanism[key] + + if initialize_inference_mechanism: + params["inference_mechanism"] = inference_mechanism + + return params + + +def _parseConfig_temp( + config_file_path: Union[str, dict], version_check_flag: bool = True +) -> None: + """ + This function parses the configuration file and returns a dictionary of parameters. + + Args: + config_file_path (Union[str, dict]): The filename of the configuration file. + version_check_flag (bool, optional): Whether to check the version in configuration file. Defaults to True. + + Returns: + dict: The parameter dictionary. + """ + params = config_file_path + if not isinstance(config_file_path, dict): + params = yaml.safe_load(open(config_file_path, "r")) return params @@ -620,7 +476,7 @@ def ConfigManager( """ try: parameters = Parameters( - **_parseConfig(config_file_path, version_check_flag) + **_parseConfig_temp(config_file_path, version_check_flag) ).model_dump() return parameters # except Exception as e: From a35f3995ac4a975e4f4c85c3955875e62959c68e Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 12:11:45 +0200 Subject: [PATCH 34/88] blacked . --- .../Configuration/Parameters/patch_sampler.py | 4 +- .../Parameters/user_defined_parameters.py | 48 +++++++--- GANDLF/Configuration/Parameters/validators.py | 95 ++++++------------- GANDLF/config_manager.py | 2 - 4 files changed, 65 insertions(+), 84 deletions(-) diff --git a/GANDLF/Configuration/Parameters/patch_sampler.py b/GANDLF/Configuration/Parameters/patch_sampler.py index 60d6c8a4b..08bc31f25 100644 --- a/GANDLF/Configuration/Parameters/patch_sampler.py +++ b/GANDLF/Configuration/Parameters/patch_sampler.py @@ -4,5 +4,5 @@ class PatchSampler(BaseModel): type: str = Field(default="uniform") enable_padding: bool = Field(default=False) - padding_mode: str = Field(default= "symmetric") - biased_sampling: bool = Field(default=False) \ No newline at end of file + padding_mode: str = Field(default="symmetric") + biased_sampling: bool = Field(default=False) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 73c3bec0e..dab7d53b5 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -19,9 +19,11 @@ def validate_version(self) -> Self: if version_check(self.model_dump(), version_to_check=version("GANDLF")): return self + class InferenceMechanism(BaseModel): grid_aggregator_overlap: Literal["crop", "average"] = Field(default="crop") - patch_overlap:int = Field(default=0) + patch_overlap: int = Field(default=0) + class UserDefinedParameters(DefaultParameters): version: Version = Field( @@ -47,20 +49,34 @@ class UserDefinedParameters(DefaultParameters): parallel_compute_command: str = Field( default="", description="Parallel compute command." ) - scheduler: Union[str, Scheduler] = Field(description="Scheduler.", default=Scheduler(type="triangle_modified")) - optimizer: Union[str, Optimizer] = Field(description="Optimizer.",default=Optimizer(type="adam"), alias="opt") #TODO: Check it again - patch_sampler: Union[str, PatchSampler] = Field(description="Patch sampler.", default=PatchSampler()) - inference_mechanism: InferenceMechanism = Field(description="Inference mechanism.",default=InferenceMechanism()) - - - + scheduler: Union[str, Scheduler] = Field( + description="Scheduler.", default=Scheduler(type="triangle_modified") + ) + optimizer: Union[str, Optimizer] = Field( + description="Optimizer.", default=Optimizer(type="adam"), alias="opt" + ) # TODO: Check it again + patch_sampler: Union[str, PatchSampler] = Field( + description="Patch sampler.", default=PatchSampler() + ) + inference_mechanism: InferenceMechanism = Field( + description="Inference mechanism.", default=InferenceMechanism() + ) - #TODO: It should be defined with a better way (using a BaseModel class) - data_preprocessing: Annotated[dict, Field(description="Data preprocessing."), AfterValidator(validate_data_preprocessing)] = {} - #TODO: It should be defined with a better way (using a BaseModel class) - data_postprocessing: Annotated[dict, Field(description="Data augmentation."), AfterValidator(validate_data_postprocessing)]={} - #TODO: It should be defined with a better way (using a BaseModel class) + # TODO: It should be defined with a better way (using a BaseModel class) + data_preprocessing: Annotated[ + dict, + Field(description="Data preprocessing."), + AfterValidator(validate_data_preprocessing), + ] = {} + # TODO: It should be defined with a better way (using a BaseModel class) + data_postprocessing: Annotated[ + dict, + Field(description="Data augmentation."), + AfterValidator(validate_data_postprocessing), + ] = {} + # TODO: It should be defined with a better way (using a BaseModel class) data_augmentation: Annotated[dict, Field(description="Data augmentation.")] = {} + # Validators @model_validator(mode="after") def validate(self) -> Self: @@ -76,9 +92,11 @@ def validate(self) -> Self: self.scheduler = validate_schedular(self.scheduler, self.learning_rate) # validate optimizer self.optimizer = validate_optimizer(self.optimizer) - #validate patch_sampler + # validate patch_sampler self.patch_sampler = validate_patch_sampler(self.patch_sampler) # validate_data_augmentation - self.data_preprocessing = validate_data_augmentation(self.data_preprocessing,self.patch_size) + self.data_preprocessing = validate_data_augmentation( + self.data_preprocessing, self.patch_size + ) return self diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index 5f9f54274..f5a63abf8 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -202,9 +202,7 @@ def validate_data_preprocessing(value) -> dict: if key in ["resample_min", "resample_minimum"]: if "resolution" in value[key]: resize_requested = True - resolution_temp = np.array( - value[key]["resolution"] - ) + resolution_temp = np.array(value[key]["resolution"]) if resolution_temp.size == 1: temp_dict[key]["resolution"] = np.array( [resolution_temp, resolution_temp] @@ -212,12 +210,12 @@ def validate_data_preprocessing(value) -> dict: else: temp_dict.pop(key) - value= temp_dict + value = temp_dict if resize_requested and "resample" in value: for key in ["resize", "resize_image", "resize_images", "resize_patch"]: if key in value: - value.pop(key) + value.pop(key) print( "WARNING: Different 'resize' operations are ignored as 'resample' is defined under 'data_processing'", @@ -257,11 +255,10 @@ def validate_data_preprocessing(value) -> dict: if key == "adaptive_histogram_equalization": if value[key] is not False: # if histogram equalization is enabled, call histogram_matching - value["histogram_matching"] = { - "target": "adaptive" - } + value["histogram_matching"] = {"target": "adaptive"} return value + def validate_data_postprocessing(value) -> dict: value = initialize_key( value, "data_postprocessing_after_reverse_one_hot_encoding", {} @@ -269,16 +266,20 @@ def validate_data_postprocessing(value) -> dict: temp_dict = deepcopy(value) for key in temp_dict: if key in postprocessing_after_reverse_one_hot_encoding: - value["data_postprocessing_after_reverse_one_hot_encoding"][key] = value[key] + value["data_postprocessing_after_reverse_one_hot_encoding"][key] = value[ + key + ] value.pop(key) return value + def validate_patch_sampler(value): if isinstance(value, str): value = PatchSampler(type=value.lower()) return value -def validate_data_augmentation(value,patch_size)-> dict: + +def validate_data_augmentation(value, patch_size) -> dict: value["default_probability"] = value.get("default_probability", 0.5) if not (value is None): if len(value) > 0: # only when augmentations are defined @@ -288,39 +289,23 @@ def validate_data_augmentation(value,patch_size)-> dict: value[key] = initialize_key( value[key], "patch_size", - np.round(np.array(patch_size) / 10) - .astype("int") - .tolist(), + np.round(np.array(patch_size) / 10).astype("int").tolist(), ) # special case for swap default initialization if "swap" in value: - value["swap"] = initialize_key( - value["swap"], "num_iterations", 100 - ) + value["swap"] = initialize_key(value["swap"], "num_iterations", 100) # special case for affine default initialization if "affine" in value: - value["affine"] = initialize_key( - value["affine"], "scales", 0.1 - ) - value["affine"] = initialize_key( - value["affine"], "degrees", 15 - ) - value["affine"] = initialize_key( - value["affine"], "translation", 2 - ) + value["affine"] = initialize_key(value["affine"], "scales", 0.1) + value["affine"] = initialize_key(value["affine"], "degrees", 15) + value["affine"] = initialize_key(value["affine"], "translation", 2) if "motion" in value: - value["motion"] = initialize_key( - value["motion"], "num_transforms", 2 - ) - value["motion"] = initialize_key( - value["motion"], "degrees", 15 - ) - value["motion"] = initialize_key( - value["motion"], "translation", 2 - ) + value["motion"] = initialize_key(value["motion"], "num_transforms", 2) + value["motion"] = initialize_key(value["motion"], "degrees", 15) + value["motion"] = initialize_key(value["motion"], "translation", 2) value["motion"] = initialize_key( value["motion"], "interpolation", "linear" ) @@ -328,34 +313,24 @@ def validate_data_augmentation(value,patch_size)-> dict: # special case for random blur/noise - which takes a std-dev range for std_aug in ["blur", "noise_var"]: if std_aug in value: - value[std_aug] = initialize_key( - value[std_aug], "std", None - ) + value[std_aug] = initialize_key(value[std_aug], "std", None) for std_aug in ["noise"]: if std_aug in value: - value[std_aug] = initialize_key( - value[std_aug], "std", [0, 1] - ) + value[std_aug] = initialize_key(value[std_aug], "std", [0, 1]) # special case for random noise - which takes a mean range for mean_aug in ["noise", "noise_var"]: if mean_aug in value: - value[mean_aug] = initialize_key( - value[mean_aug], "mean", 0 - ) + value[mean_aug] = initialize_key(value[mean_aug], "mean", 0) # special case for augmentations that need axis defined for axis_aug in ["flip", "anisotropic", "rotate_90", "rotate_180"]: if axis_aug in value: - value[axis_aug] = initialize_key( - value[axis_aug], "axis", [0, 1, 2] - ) + value[axis_aug] = initialize_key(value[axis_aug], "axis", [0, 1, 2]) # special case for colorjitter if "colorjitter" in value: - value = initialize_key( - value, "colorjitter", {} - ) + value = initialize_key(value, "colorjitter", {}) for key in ["brightness", "contrast", "saturation"]: value["colorjitter"] = initialize_key( value["colorjitter"], key, [0, 1] @@ -372,9 +347,7 @@ def validate_data_augmentation(value,patch_size)-> dict: ] for augmentation_type in hed_augmentation_types: if augmentation_type in value: - value = initialize_key( - value, "hed_transform", {} - ) + value = initialize_key(value, "hed_transform", {}) ranges = [ "haematoxylin_bias_range", "eosin_bias_range", @@ -396,15 +369,11 @@ def validate_data_augmentation(value,patch_size)-> dict: for key in ranges: value["hed_transform"] = initialize_key( - value["hed_transform"], - key, - default_range, + value["hed_transform"], key, default_range ) value["hed_transform"] = initialize_key( - value["hed_transform"], - "cutoff_range", - [0, 1], + value["hed_transform"], "cutoff_range", [0, 1] ) # special case for anisotropic @@ -412,9 +381,7 @@ def validate_data_augmentation(value,patch_size)-> dict: if not ("downsampling" in value["anisotropic"]): default_downsampling = 1.5 else: - default_downsampling = value["anisotropic"][ - "downsampling" - ] + default_downsampling = value["anisotropic"]["downsampling"] initialize_downsampling = False if isinstance(default_downsampling, list): @@ -440,7 +407,5 @@ def validate_data_augmentation(value,patch_size)-> dict: for key in value: if key != "default_probability": value[key] = initialize_key( - value[key], - "probability", - value["default_probability"], - ) \ No newline at end of file + value[key], "probability", value["default_probability"] + ) diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index ee7ec7e1e..1349b276d 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -403,7 +403,6 @@ def _parseConfig( print("DeprecationWarning: 'opt' has been superseded by 'optimizer'") params["optimizer"] = params["opt"] - # define defaults for current_parameter in parameter_defaults: params = initialize_parameter( @@ -418,7 +417,6 @@ def _parseConfig( False, ) - # initialize defaults for DP if params.get("differential_privacy"): params = parse_opacus_params(params, initialize_key) From 8fc772475774715c09d92f978e46e4a9ad990c47 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 12:15:06 +0200 Subject: [PATCH 35/88] see the full test --- .github/workflows/python-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 68e09865d..43439f735 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -23,7 +23,7 @@ jobs: - name: Run generic unit tests if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change run: | - pytest --cov=. --cov-report=xml -k "generic" + pytest -s --cov=. --cov-report=xml -k "generic" - name: Run classification unit tests with histology if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change run: | From 0f113563fa91bbdefe8e7ba6ef23b54e184ab351 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 12:25:36 +0200 Subject: [PATCH 36/88] delete test_configuration.py --- test_configuration/test_configuration.py | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 test_configuration/test_configuration.py diff --git a/test_configuration/test_configuration.py b/test_configuration/test_configuration.py deleted file mode 100644 index 00f3072e9..000000000 --- a/test_configuration/test_configuration.py +++ /dev/null @@ -1,11 +0,0 @@ -import json - -from GANDLF.config_manager import ConfigManager -from pathlib import Path - -if __name__ == "__main__": - testingDir = Path(__file__).parent.absolute().__str__() - parameters = ConfigManager( - testingDir + "/config_all_options.yaml", version_check_flag=False - ) - print(json.dumps(parameters, indent=4)) From a9162f757fcbbf917f990acbb3bd33ef9e86ada0 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 12:45:32 +0200 Subject: [PATCH 37/88] clean the configuration --- .../{ => Parameters}/parameters.py | 0 .../Configuration/generate_documentation.py | 5 - GANDLF/config_manager.py | 440 +----------------- 3 files changed, 4 insertions(+), 441 deletions(-) rename GANDLF/Configuration/{ => Parameters}/parameters.py (100%) delete mode 100644 GANDLF/Configuration/generate_documentation.py diff --git a/GANDLF/Configuration/parameters.py b/GANDLF/Configuration/Parameters/parameters.py similarity index 100% rename from GANDLF/Configuration/parameters.py rename to GANDLF/Configuration/Parameters/parameters.py diff --git a/GANDLF/Configuration/generate_documentation.py b/GANDLF/Configuration/generate_documentation.py deleted file mode 100644 index 011a85fd6..000000000 --- a/GANDLF/Configuration/generate_documentation.py +++ /dev/null @@ -1,5 +0,0 @@ -import utils - -markdown_output = utils.generate_and_save_markdown( - default_parameters.DefaultParameters, "configuration_docs/user_model.md" -) diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index 1349b276d..0541420ba 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -1,117 +1,10 @@ # import logging -import traceback from typing import Optional, Union -from pydantic import BaseModel, ValidationError, field_validator -import sys, yaml, ast -import numpy as np -from copy import deepcopy +from pydantic import ValidationError +import yaml -from torch.fx.experimental.validator import ValidationException -from .utils import version_check -from GANDLF.data.post_process import postprocessing_after_reverse_one_hot_encoding -from GANDLF.privacy.opacus import parse_opacus_params - -from GANDLF.metrics import surface_distance_ids -from importlib.metadata import version -from GANDLF.Configuration.parameters import Parameters - -## dictionary to define defaults for appropriate options, which are evaluated -parameter_defaults = { - "weighted_loss": False, # whether weighted loss is to be used or not - "verbose": False, # general application verbosity - "q_verbose": False, # queue construction verbosity - "medcam_enabled": False, # interpretability via medcam - "save_training": False, # save outputs during training - "save_output": False, # save outputs during validation/testing - "in_memory": False, # pin data to cpu memory - "pin_memory_dataloader": False, # pin data to gpu memory - "scaling_factor": 1, # scaling factor for regression problems - "q_max_length": 100, # the max length of queue - "q_samples_per_volume": 10, # number of samples per volume - "q_num_workers": 4, # number of worker threads to use - "num_epochs": 100, # total number of epochs to train - "patience": 100, # number of epochs to wait for performance improvement - "batch_size": 1, # default batch size of training - "learning_rate": 0.001, # default learning rate - "clip_grad": None, # clip_gradient value - "track_memory_usage": False, # default memory tracking - "memory_save_mode": False, # default memory saving, if enabled, resize/resample will save files to disk - "print_rgb_label_warning": True, # print rgb label warning - "data_postprocessing": {}, # default data postprocessing - "grid_aggregator_overlap": "crop", # default grid aggregator overlap strategy - "determinism": False, # using deterministic version of computation - "previous_parameters": None, # previous parameters to be used for resuming training and perform sanity checking -} - -## dictionary to define string defaults for appropriate options -parameter_defaults_string = { - "optimizer": "adam", # the optimizer - "scheduler": "triangle_modified", # the default scheduler - "clip_mode": None, # default clip mode -} - - -def initialize_parameter( - params: dict, - parameter_to_initialize: str, - value: Optional[Union[str, list, int, dict]] = None, - evaluate: Optional[bool] = True, -) -> dict: - """ - This function will initialize the parameter in the parameters dict to the value if it is absent. - - Args: - params (dict): The parameter dictionary. - parameter_to_initialize (str): The parameter to initialize. - value (Optional[Union[str, list, int, dict]], optional): The value to initialize. Defaults to None. - evaluate (Optional[bool], optional): Whether to evaluate the value. Defaults to True. - - Returns: - dict: The parameter dictionary. - """ - if parameter_to_initialize in params: - if evaluate: - if isinstance(params[parameter_to_initialize], str): - if params[parameter_to_initialize].lower() == "none": - params[parameter_to_initialize] = ast.literal_eval( - params[parameter_to_initialize] - ) - else: - print( - "WARNING: Initializing '" + parameter_to_initialize + "' as " + str(value) - ) - params[parameter_to_initialize] = value - - return params - - -def initialize_key( - parameters: dict, key: str, value: Optional[Union[str, float, list, dict]] = None -) -> dict: - """ - This function initializes a key in the parameters dictionary to a value if it is absent. - - Args: - parameters (dict): The parameter dictionary. - key (str): The key to initialize. - value (Optional[Union[str, float, list, dict]], optional): The value to initialize. Defaults to None. - - Returns: - dict: The parameter dictionary. - """ - if parameters is None: - parameters = {} - if key in parameters: - if parameters[key] is not None: - if isinstance(parameters[key], dict): - # if key is present but not defined - if len(parameters[key]) == 0: - parameters[key] = value - else: - parameters[key] = value # if key is absent - - return parameters +from GANDLF.Configuration.Parameters.parameters import Parameters def _parseConfig( @@ -120,331 +13,6 @@ def _parseConfig( """ This function parses the configuration file and returns a dictionary of parameters. - Args: - config_file_path (Union[str, dict]): The filename of the configuration file. - version_check_flag (bool, optional): Whether to check the version in configuration file. Defaults to True. - - Returns: - dict: The parameter dictionary. - """ - params = config_file_path - if not isinstance(config_file_path, dict): - params = yaml.safe_load(open(config_file_path, "r")) - - if "resize" in params: - print( - "WARNING: 'resize' should be defined under 'data_processing', this will be skipped", - file=sys.stderr, - ) - - # this is NOT a required parameter - a user should be able to train with NO augmentations - params = initialize_key(params, "data_augmentation", {}) - # for all others, ensure probability is present - params["data_augmentation"]["default_probability"] = params[ - "data_augmentation" - ].get("default_probability", 0.5) - - if not (params["data_augmentation"] is None): - if len(params["data_augmentation"]) > 0: # only when augmentations are defined - # special case for random swapping and elastic transformations - which takes a patch size for computation - for key in ["swap", "elastic"]: - if key in params["data_augmentation"]: - params["data_augmentation"][key] = initialize_key( - params["data_augmentation"][key], - "patch_size", - np.round(np.array(params["patch_size"]) / 10) - .astype("int") - .tolist(), - ) - - # special case for swap default initialization - if "swap" in params["data_augmentation"]: - params["data_augmentation"]["swap"] = initialize_key( - params["data_augmentation"]["swap"], "num_iterations", 100 - ) - - # special case for affine default initialization - if "affine" in params["data_augmentation"]: - params["data_augmentation"]["affine"] = initialize_key( - params["data_augmentation"]["affine"], "scales", 0.1 - ) - params["data_augmentation"]["affine"] = initialize_key( - params["data_augmentation"]["affine"], "degrees", 15 - ) - params["data_augmentation"]["affine"] = initialize_key( - params["data_augmentation"]["affine"], "translation", 2 - ) - - if "motion" in params["data_augmentation"]: - params["data_augmentation"]["motion"] = initialize_key( - params["data_augmentation"]["motion"], "num_transforms", 2 - ) - params["data_augmentation"]["motion"] = initialize_key( - params["data_augmentation"]["motion"], "degrees", 15 - ) - params["data_augmentation"]["motion"] = initialize_key( - params["data_augmentation"]["motion"], "translation", 2 - ) - params["data_augmentation"]["motion"] = initialize_key( - params["data_augmentation"]["motion"], "interpolation", "linear" - ) - - # special case for random blur/noise - which takes a std-dev range - for std_aug in ["blur", "noise_var"]: - if std_aug in params["data_augmentation"]: - params["data_augmentation"][std_aug] = initialize_key( - params["data_augmentation"][std_aug], "std", None - ) - for std_aug in ["noise"]: - if std_aug in params["data_augmentation"]: - params["data_augmentation"][std_aug] = initialize_key( - params["data_augmentation"][std_aug], "std", [0, 1] - ) - - # special case for random noise - which takes a mean range - for mean_aug in ["noise", "noise_var"]: - if mean_aug in params["data_augmentation"]: - params["data_augmentation"][mean_aug] = initialize_key( - params["data_augmentation"][mean_aug], "mean", 0 - ) - - # special case for augmentations that need axis defined - for axis_aug in ["flip", "anisotropic", "rotate_90", "rotate_180"]: - if axis_aug in params["data_augmentation"]: - params["data_augmentation"][axis_aug] = initialize_key( - params["data_augmentation"][axis_aug], "axis", [0, 1, 2] - ) - - # special case for colorjitter - if "colorjitter" in params["data_augmentation"]: - params["data_augmentation"] = initialize_key( - params["data_augmentation"], "colorjitter", {} - ) - for key in ["brightness", "contrast", "saturation"]: - params["data_augmentation"]["colorjitter"] = initialize_key( - params["data_augmentation"]["colorjitter"], key, [0, 1] - ) - params["data_augmentation"]["colorjitter"] = initialize_key( - params["data_augmentation"]["colorjitter"], "hue", [-0.5, 0.5] - ) - - # Added HED augmentation in gandlf - hed_augmentation_types = [ - "hed_transform", - # "hed_transform_light", - # "hed_transform_heavy", - ] - for augmentation_type in hed_augmentation_types: - if augmentation_type in params["data_augmentation"]: - params["data_augmentation"] = initialize_key( - params["data_augmentation"], "hed_transform", {} - ) - ranges = [ - "haematoxylin_bias_range", - "eosin_bias_range", - "dab_bias_range", - "haematoxylin_sigma_range", - "eosin_sigma_range", - "dab_sigma_range", - ] - - default_range = ( - [-0.1, 0.1] - if augmentation_type == "hed_transform" - else ( - [-0.03, 0.03] - if augmentation_type == "hed_transform_light" - else [-0.95, 0.95] - ) - ) - - for key in ranges: - params["data_augmentation"]["hed_transform"] = initialize_key( - params["data_augmentation"]["hed_transform"], - key, - default_range, - ) - - params["data_augmentation"]["hed_transform"] = initialize_key( - params["data_augmentation"]["hed_transform"], - "cutoff_range", - [0, 1], - ) - - # special case for anisotropic - if "anisotropic" in params["data_augmentation"]: - if not ("downsampling" in params["data_augmentation"]["anisotropic"]): - default_downsampling = 1.5 - else: - default_downsampling = params["data_augmentation"]["anisotropic"][ - "downsampling" - ] - - initialize_downsampling = False - if isinstance(default_downsampling, list): - if len(default_downsampling) != 2: - initialize_downsampling = True - print( - "WARNING: 'anisotropic' augmentation needs to be either a single number of a list of 2 numbers: https://torchio.readthedocs.io/transforms/augmentation.html?highlight=randomswap#torchio.transforms.RandomAnisotropy.", - file=sys.stderr, - ) - default_downsampling = default_downsampling[0] # only - else: - initialize_downsampling = True - - if initialize_downsampling: - if default_downsampling < 1: - print( - "WARNING: 'anisotropic' augmentation needs the 'downsampling' parameter to be greater than 1, defaulting to 1.5.", - file=sys.stderr, - ) - # default - params["data_augmentation"]["anisotropic"]["downsampling"] = 1.5 - - for key in params["data_augmentation"]: - if key != "default_probability": - params["data_augmentation"][key] = initialize_key( - params["data_augmentation"][key], - "probability", - params["data_augmentation"]["default_probability"], - ) - - # this is NOT a required parameter - a user should be able to train with NO built-in pre-processing - params = initialize_key(params, "data_preprocessing", {}) - if not (params["data_preprocessing"] is None): - # perform this only when pre-processing is defined - if len(params["data_preprocessing"]) > 0: - thresholdOrClip = False - # this can be extended, as required - thresholdOrClipDict = ["threshold", "clip", "clamp"] - - resize_requested = False - temp_dict = deepcopy(params["data_preprocessing"]) - for key in params["data_preprocessing"]: - if key in ["resize", "resize_image", "resize_images", "resize_patch"]: - resize_requested = True - - if key in ["resample_min", "resample_minimum"]: - if "resolution" in params["data_preprocessing"][key]: - resize_requested = True - resolution_temp = np.array( - params["data_preprocessing"][key]["resolution"] - ) - if resolution_temp.size == 1: - temp_dict[key]["resolution"] = np.array( - [resolution_temp, resolution_temp] - ).tolist() - else: - temp_dict.pop(key) - - params["data_preprocessing"] = temp_dict - - if resize_requested and "resample" in params["data_preprocessing"]: - for key in ["resize", "resize_image", "resize_images", "resize_patch"]: - if key in params["data_preprocessing"]: - params["data_preprocessing"].pop(key) - - print( - "WARNING: Different 'resize' operations are ignored as 'resample' is defined under 'data_processing'", - file=sys.stderr, - ) - - # iterate through all keys - for key in params["data_preprocessing"]: # iterate through all keys - if key in thresholdOrClipDict: - # we only allow one of threshold or clip to occur and not both - assert not ( - thresholdOrClip - ), "Use only `threshold` or `clip`, not both" - thresholdOrClip = True - # initialize if nothing is present - if not (isinstance(params["data_preprocessing"][key], dict)): - params["data_preprocessing"][key] = {} - - # if one of the required parameters is not present, initialize with lowest/highest possible values - # this ensures the absence of a field doesn't affect processing - # for threshold or clip, ensure min and max are defined - if not "min" in params["data_preprocessing"][key]: - params["data_preprocessing"][key]["min"] = sys.float_info.min - if not "max" in params["data_preprocessing"][key]: - params["data_preprocessing"][key]["max"] = sys.float_info.max - - if key == "histogram_matching": - if params["data_preprocessing"][key] is not False: - if not (isinstance(params["data_preprocessing"][key], dict)): - params["data_preprocessing"][key] = {} - - if key == "histogram_equalization": - if params["data_preprocessing"][key] is not False: - # if histogram equalization is enabled, call histogram_matching - params["data_preprocessing"]["histogram_matching"] = {} - - if key == "adaptive_histogram_equalization": - if params["data_preprocessing"][key] is not False: - # if histogram equalization is enabled, call histogram_matching - params["data_preprocessing"]["histogram_matching"] = { - "target": "adaptive" - } - - # this is NOT a required parameter - a user should be able to train with NO built-in post-processing - params = initialize_key(params, "data_postprocessing", {}) - params = initialize_key( - params, "data_postprocessing_after_reverse_one_hot_encoding", {} - ) - temp_dict = deepcopy(params["data_postprocessing"]) - for key in temp_dict: - if key in postprocessing_after_reverse_one_hot_encoding: - params["data_postprocessing_after_reverse_one_hot_encoding"][key] = params[ - "data_postprocessing" - ][key] - params["data_postprocessing"].pop(key) - - if "opt" in params: - print("DeprecationWarning: 'opt' has been superseded by 'optimizer'") - params["optimizer"] = params["opt"] - - # define defaults - for current_parameter in parameter_defaults: - params = initialize_parameter( - params, current_parameter, parameter_defaults[current_parameter], True - ) - - for current_parameter in parameter_defaults_string: - params = initialize_parameter( - params, - current_parameter, - parameter_defaults_string[current_parameter], - False, - ) - - # initialize defaults for DP - if params.get("differential_privacy"): - params = parse_opacus_params(params, initialize_key) - - # initialize defaults for inference mechanism - inference_mechanism = {"grid_aggregator_overlap": "crop", "patch_overlap": 0} - initialize_inference_mechanism = False - if not ("inference_mechanism" in params): - initialize_inference_mechanism = True - elif not (isinstance(params["inference_mechanism"], dict)): - initialize_inference_mechanism = True - else: - for key in inference_mechanism: - if not (key in params["inference_mechanism"]): - params["inference_mechanism"][key] = inference_mechanism[key] - - if initialize_inference_mechanism: - params["inference_mechanism"] = inference_mechanism - - return params - - -def _parseConfig_temp( - config_file_path: Union[str, dict], version_check_flag: bool = True -) -> None: - """ - This function parses the configuration file and returns a dictionary of parameters. - Args: config_file_path (Union[str, dict]): The filename of the configuration file. version_check_flag (bool, optional): Whether to check the version in configuration file. Defaults to True. @@ -474,7 +42,7 @@ def ConfigManager( """ try: parameters = Parameters( - **_parseConfig_temp(config_file_path, version_check_flag) + **_parseConfig(config_file_path, version_check_flag) ).model_dump() return parameters # except Exception as e: From 2cf78f31f778460594e7523ae3c186837337119f Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 12:46:45 +0200 Subject: [PATCH 38/88] black --- GANDLF/config_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index 0541420ba..49a99daf6 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -1,7 +1,7 @@ # import logging from typing import Optional, Union from pydantic import ValidationError -import yaml +import yaml from GANDLF.Configuration.Parameters.parameters import Parameters From 9713f2e069c13c2595ecba2d40a15d49324e5393 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 12:49:02 +0200 Subject: [PATCH 39/88] delete test config_all_options.yaml --- test_configuration/config_all_options.yaml | 344 --------------------- 1 file changed, 344 deletions(-) delete mode 100644 test_configuration/config_all_options.yaml diff --git a/test_configuration/config_all_options.yaml b/test_configuration/config_all_options.yaml deleted file mode 100644 index 51926412c..000000000 --- a/test_configuration/config_all_options.yaml +++ /dev/null @@ -1,344 +0,0 @@ -# affix version -version: - { - minimum: 0.1.3-dev, - maximum: 0.1.3-dev # this should NOT be made a variable, but should be tested after every tag is created - } -weighted_loss: True -patch_size: 2 -modality: "histo" -loss_function: "mse" -model: - { - dimension: 2, # the dimension of the model and dataset: defines dimensionality of computations - # Set base filters: number of filters present in the initial module of the U-Net convolution; for IncU-Net, keep this divisible by 4 - architecture: "vgg", # options: unet, resunet, deep_resunet, deep_unet, light_resunet, light_unet, fcn, uinc, vgg, densenet - norm_type: instance, # options: batch, instance, or none (only for VGG); used for all networks - final_layer: softmax, # can be either sigmoid, softmax or none (none == regression/logits) - # sigmoid_input_multiplier: 1.0, # this is used during sigmoid, and defaults to 1.0 -# class_list: [0,1,2,4], # Set the list of labels the model should train on and predict - class_list: [0,1,2], # a range of values from 0 to 99 with a step of 1 will be created; customize as needed, but ensure this is defined as a string as it will be passed through 'eval' function -# class_list: '[0,1||2||3,1||4,4]', # combinatorial training - this will construct one-hot encoded mask using logical operands between specified annotations. Note that double '|' or '&' should be passed and not single to avoid python parsing - ignore_label_validation: 0, # this is the location of the class_list whose performance is ignored during validation metric calculation - channels : 3, - save_at_every_epoch: True, - asdasd: asdad, - batch_norm: "ben" -} -nested_training: - { - stratified: False, # this will perform stratified k-fold cross-validation but only with offline data splitting, see https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html - testing: 5, # this controls the number of testing data folds for final model evaluation; [NOT recommended] to disable this, use '1' - validation: 5, # this controls the number of validation data folds to be used for model *selection* during training (not used for back-propagation) - } -parallel_compute_command: 'qsub - b y -l gpu -l h_vmem=32G -cwd -o ${outputDir}/\$JOB_ID.stdout -e ${outputDir}/\$JOB_ID.stderr `pwd`/sge_wrapper _correct_location_of_virtual_environment_/venv/bin/python' -metrics: - - "dice" - - "classification" - - "accuracy" - - "iou" -scheduler: { - type: triangle, - min_lr: 0.00001, - max_lr: 1, -} -## Choose the model parameters here -#model: -# { -# dimension: 3, # the dimension of the model and dataset: defines dimensionality of computations -# base_filters: 30, # Set base filters: number of filters present in the initial module of the U-Net convolution; for IncU-Net, keep this divisible by 4 -# architecture: resunet, # options: unet, resunet, deep_resunet, deep_unet, light_resunet, light_unet, fcn, uinc, vgg, densenet -# norm_type: batch, # options: batch, instance, or none (only for VGG); used for all networks -# final_layer: softmax, # can be either sigmoid, softmax or none (none == regression/logits) -# # sigmoid_input_multiplier: 1.0, # this is used during sigmoid, and defaults to 1.0 -# class_list: [0,1,2,4], # Set the list of labels the model should train on and predict -# # class_list: '[*range(0,100,1)]' # a range of values from 0 to 99 with a step of 1 will be created; customize as needed, but ensure this is defined as a string as it will be passed through 'eval' function -# # class_list: '[0,1||2||3,1||4,4]', # combinatorial training - this will construct one-hot encoded mask using logical operands between specified annotations. Note that double '|' or '&' should be passed and not single to avoid python parsing -# ignore_label_validation: 0, # this is the location of the class_list whose performance is ignored during validation metric calculation -# amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False -# # num_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types from the CSV -# # save_at_every_epoch: True, # allows you to save the model at every epoch -# # print_summary: True, # prints the summary of the model before training; defaults to True -# -# ## densenet models have the following optional parameters: -# # growth_rate (int) - how many filters to add each layer (k in paper) -# # num_init_features (int) - the number of filters to learn in the first convolution layer -# # bn_size (int) - multiplicative factor for number of bottle neck layers (i.e. bn_size * k features in the bottleneck layer) -# # drop_rate (float) - dropout rate after each dense layer -# # num_classes (int) - number of classification classes -# -# ## unet_multilayer, unetr, transunet have the following optional parameter: -# # depth (int) - the number of encoder/decoder layers -# -# ## imagenet_unet has the following optional parameter: -# # pretrained (bool) - if True (default), uses the pretrained imagenet weights -# # final_layer - one of ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] -# # encoder_name (str) - the name of the encoder to use, pick from https://github.com/qubvel/segmentation_models.pytorch#encoders -# # decoder_use_batchnorm (str) - whether to use batch norm or not or inplace, this will override 'norm_type', see https://github.com/qubvel/segmentation_models.pytorch/blob/master/segmentation_models_pytorch/decoders/unet/model.py -# # decoder_attention_type (str) - the decoder attention type, see https://github.com/qubvel/segmentation_models.pytorch/blob/master/segmentation_models_pytorch/decoders/unet/model.py -# # encoder_depth (int) - the depth of the encoder, also picked up from 'depth' -# # decoder_channels (list) - a list of numbers of channels for each decoder layer, should be same length as 'encoder_depth' -# # converter_type (str) - either acs (targets ACSConv) or conv3d (targets nn.Conv3d) or soft (targets SoftACSConv with learnable weights, default); see https://doi.org/10.1109/JBHI.2021.3049452 -# # the following parameters can be used to convert the "imagenet_unet" model to a classifier/regression network; they only come into the picture when the "problem_type" is identified as not segmentation. -# # - pooling (str): One of "max", "avg"; default is "avg" -# # - dropout (float): Dropout factor in [0, 1); default is 0.2 -# } -### metrics to evaluate the validation performance -#metrics: -# - dice # segmentation -# # - hausdorff # hausdorff 100 percentile, segmentation -# # - hausdorff95 # hausdorff 95 percentile, segmentation -# # - mse # regression/classification -# # - accuracy # classification ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/accuracy.html -# # - classification_accuracy # classification -# # - balanced_accuracy # classification ## more details https://scikit-learn.org/stable/modules/generated/sklearn.metrics.balanced_accuracy_score.html -# # - per_label_accuracy # used for classification -# # - f1 # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/f1_score.html -# # - precision # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/precision.html -# # - recall # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/recall.html -# # - iou # classification/segmentation ## more details https://lightning.ai/docs/torchmetrics/v1.1.2/classification/jaccard_index.html -### this customizes the inference, primarily used for segmentation outputs -inference_mechanism: { - grid_aggregator_overlap: crop, # this option provides the option to strategize the grid aggregation output; should be either 'crop' or 'average' - https://torchio.readthedocs.io/patches/patch_inference.html#grid-aggregator - patch_overlap: 0, # amount of overlap of patches during inference, defaults to 0; see https://torchio.readthedocs.io/patches/patch_inference.html#gridsampler -} -## this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements -## in I/O at the expense of memory consumption -#in_memory: False -## if enabled, resize/resample operations in `data_preprocessing` will save files to disk instead of directly getting read into memory as tensors -#memory_save_mode: False -## this will save the generated masks for validation and testing data for qualitative analysis -#save_output: False -## this will save the patches used during training for qualitative analysis -#save_training: False -## Set the Modality : rad for radiology, path for histopathology -#modality: rad -### Patch size during training - 2D patch for breast images since third dimension is not patched -#patch_size: [144,144,64] -## uniform: UniformSampler or label: LabelSampler -#patch_sampler: uniform -## patch_sampler: label -## patch_sampler: -## { -## type: label, -## enable_padding: True, -## padding_mode: symmetric, # for options, see 'mode' in https://numpy.org/doc/stable/reference/generated/numpy.pad.html -## biased_sampling: True, # adds additional sampling probability of labels based on "sampling_weights" key; only gets invoked when using label sampler. If not present, gets calculated using the same mechanism as weighted_loss -## } -## If enabled, this parameter pads images and labels when label sampler is used -#enable_padding: False -## Number of epochs -#num_epochs: 100 -## Set the patience - measured in number of epochs after which, if the performance metric does not improve, exit the training loop - defaults to the number of epochs -#patience: 50 -## Set the batch size -#batch_size: 1 -## gradient clip : norm, value, agc -#clip_mode: norm -## clip_gradient value -#clip_grad: 0.1 -### Set the initial learning rate -#learning_rate: 0.001 -## Learning rate scheduler - options:"triangle", "triangle_modified", "exp", "step", "reduce-on-plateau", "cosineannealing", "triangular", "triangular2", "exp_range" -## triangle/triangle_modified use LambdaLR but triangular/triangular2/exp_range uses CyclicLR -#scheduler: -# { -# type: triangle, -# min_lr: 0.00001, -# max_lr: 1, -# } -## Set which loss function you want to use - options : 'dc' - for dice only, 'dcce' - for sum of dice and CE and you can guess the next (only lower-case please) -## options: dc (dice only), dc_log (-log of dice), ce (), dcce (sum of dice and ce), focal/dc_focal, mcc/mcc_log, mse () ... -## mse is the MSE defined by torch and can define a variable 'reduction'; see https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html#torch.nn.MSELoss -## focal is the focal loss and can define 2 variables: gamma and size_average -## use mse_torch for regression/classification problems and dice for segmentation -#loss_function: dc -## this parameter weights the loss to handle imbalanced losses better -#weighted_loss: True # generates new keys "class_weights" and "penalty_weights" that handle the aggregate weights of the class and penalties per label, respectively -##loss_function: -## { -## 'mse':{ -## 'reduction': 'mean' # see https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html#torch.nn.MSELoss for all options -## } -## } -##loss_function: -## { -## 'focal':{ -## 'gamma': 1.0 -## } -## } -## Which optimizer do you want to use - sgd, asgd, adam, adamw, adamax, sparseadam, rprop, adadelta, adagrad, rmsprop, -## each has their own options and functionalities, which are initialized with defaults, see GANDLF.optimizers.wrap_torch for details -#optimizer: adam -### this parameter controls the nested training process -## performs randomized k-fold cross-validation, see https://en.wikipedia.org/wiki/Cross-validation_(statistics) for details -## split is performed using sklearn's KFold method: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html -## for train on a single fold, use '-' before the fold number to make the number of folds "negative" -- NOT recommended -#nested_training: -# { -# stratified: False, # this will perform stratified k-fold cross-validation but only with offline data splitting, see https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html -# testing: 5, # this controls the number of testing data folds for final model evaluation; [NOT recommended] to disable this, use '1' -# validation: 5 # this controls the number of validation data folds to be used for model *selection* during training (not used for back-propagation) -# } -### pre-processing -## this constructs an order of transformations, which is applied to all images in the data loader -## order: all_methods_as_specified_in_dict --> normalize [normalization methods always applied at the end] -## 'to_canonical': change the image to canonical orientation, see https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.ToCanonical -## 'rgba2rgb': convert images from rgba to rgb -## 'threshold': performs intensity thresholding; i.e., if x[i] < min: x[i] = 0; and if x[i] > max: x[i] = 0 -## 'clip': performs intensity clipping; i.e., if x[i] < min: x[i] = min; and if x[i] > max: x[i] = max -## 'threshold'/'clip': if either min/max is not defined, it is taken as the minimum/maximum of the image, respectively -## 'normalize': performs z-score normalization: https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.ZNormalization -## 'normalize_positive':perform z-score normalize but with mean and std-dev calculated on only pixels > 0 -## 'normalize_nonZero': perform z-score normalize but with mean and std-dev calculated on only non-zero pixels -## 'normalize_nonZero_masked': perform z-score normalize but with mean and std-dev calculated on only non-zero pixels with the stats applied on non-zero pixels -## 'crop_external_zero_planes': crops all non-zero planes from input tensor to reduce image search space -## 'resample: resolution: X,Y,Z': resample the voxel resolution: https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.Resample -## 'resample: resolution: X': resample the voxel resolution in an isotropic manner: https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.Resample -## resize the image(s) and mask (this should be greater than or equal to patch_size); resize is done ONLY when resample is not defined -- WARNING: resizing images on the fly ensures that images get loaded in memory, which dramatically increases RAM usage -## 'resize_image' resizes the image and mask BEFORE applying any another operation -## 'resize_patch' resizes the image and mask AFTER extracting the patch -#data_preprocessing: -# { -# # 'histogram_matching':{ -# # 'target': '/path/to/target/image.nii.gz', # this is the target image to which the histogram of the current image is matched, if this not defined, histogram equalization is performed on the entire image with an equal ramp of [-1,1] -# # 'num_hist_level': 1024, # number of histogram levels -# # 'num_match_points': 16, # number of matching points for histogram matching -# # }, -# # 'histogram_equalization':{ # this performs global histogram equalization using the same logic as 'histogram_matching', just without the target -# # 'num_hist_level': 1024, # number of histogram levels -# # 'num_match_points': 16, # number of matching points for histogram matching -# # }, -# # 'adaptive_histogram_equalization', # this performs Power Law Adaptive Histogram Equalization using https://simpleitk.org/doxygen/latest/html/classitk_1_1simple_1_1AdaptiveHistogramEqualizationImageFilter.html -# 'threshold':{ -# 'min': 10, -# 'max': 75 -# }, -# # 'clip':{ -# # 'min': 10, -# # 'max': 75 -# # }, -# 'normalize', -# # 'normalize_positive', # this performs z-score normalization only on pixels > 0 -# # 'normalize_nonZero', # this performs z-score normalization only on non-zero pixels -# # 'normalize_nonZero_masked', # this performs z-score normalization only on masked region -# 'resample':{ -# 'resolution': [1,2,3] -# }, -# 'resample_min':{ -# 'resolution': 1, # this will be the maximum spacing (translates to minium resolution) across all axes -# }, -# #'resize_image': [128,128], # this is generally not recommended, as it changes image properties in unexpected ways -# #'resize_patch': [128,128], # this is generally not recommended, as it changes image properties in unexpected ways -# 'crop_external_zero_planes', # this will crop all zero-valued planes across all axes -# 'crop': [64,64,64], # this will crop the image by removing specified number of pixels; see https://torchio.readthedocs.io/transforms/preprocessing.html#torchio.transforms.Crop -# 'centercrop': [64,64,64], # this will crop the image to the specified size from the center of image; see https://torchio.readthedocs.io/transforms/preprocessing.html#croporpad -# ## histogram matching algorithms -# # 'histogram_matching':{ -# # 'target': '/path/to/template.nii.gz', # if this is absent, global histogram equalization takes place -# # # 'target': 'adaptive', # this will perform adaptive histogram matching using https://simpleitk.org/doxygen/latest/html/classitk_1_1simple_1_1AdaptiveHistogramEqualizationImageFilter.html -# # }, -# ## stain normalization algorithms -# # 'stain_normalization':{ -# # 'target': '/path/to/target.png', # this is required -# # 'extractor': 'vahadane', # can be either vahadane, ruifrok or macenko; defaults to ruifrok -# # } -# ## rescale image -# # 'rescale':{ -# # 'in_min_max': [15,125], # desired output intensity range, defaults to min/max of image -# # 'out_min_max': [0,1], # desired output intensity range, defaults to [0,1] -# # 'percentiles': [5,95], # percentile values of the input image that will be mapped to the output range, defaults to [0,100] -# # } -# } -### various data augmentation techniques -## options: affine, elastic, downsample, motion, kspace, bias, blur, gaussianNoise, swap -## keep/edit as needed -## all transforms: https://torchio.readthedocs.io/transforms/transforms.html -## 'kspace': one of ghosting or spiking is picked (randomly) for augmentation -## 'probability' sub-parameter adds the probability of the particular augmentation getting added during training (this is always 1 for normalize and resampling) -#data_augmentation: -# { -# default_probability: 1.0, # keeping probability 1.0 to ensure that all augmentations are applied -# 'affine':{ # for options, see https://torchio.readthedocs.io/transforms/augmentation.html#randomaffine -# 'scales': [0.5, 1.5], -# 'degrees': 25, -# 'translation': 2, -# }, -# 'elastic': # for options, see https://torchio.readthedocs.io/transforms/augmentation.html#randomelasticdeformation -# { -# 'num_control_points': 7, -# 'max_displacement': 0.1, -# 'locked_borders': 2, -# }, -# 'kspace':{ -# 'probability': 1 -# }, -# 'motion':{ -# 'probability': 1 -# }, -# 'bias', -# blur, # this is a gaussian blur, and can take 'std' as a sub-parameter, however, the default 'std' is [0, 0.015 * std(image)] -# ## example of blur with specific std range -# # 'blur': { -# # 'std': [0, 1] # example std-dev range, for details, see https://torchio.readthedocs.io/transforms/augmentation.html#torchio.transforms.RandomBlur -# # }, -# 'noise': { # for details, see https://torchio.readthedocs.io/transforms/augmentation.html#torchio.transforms.RandomNoise -# 'mean': 0, # default mean -# 'std': [0, 1] # default std-dev range -# }, -# noise_var, # this is a random noise, and can take 'std' and 'mean' as a sub-parameter, however, the default 'std' is [0, 0.015 * std(image)] -# 'gamma', -# 'swap':{ -# 'patch_size': 15, # patch size for swapping; if a single number if provided, the same number is used for all axes -# 'num_iterations': 50, # number of times that two patches will be swapped, defaults to 100 -# }, -# 'flip':{ -# 'axis': [0,1,2] # one or more axes can be put here. if this isn't defined, all axes are considered -# }, -# 'anisotropic':{ -# 'axis': [0,1], -# 'downsampling': [2,2.5] -# }, -# 'rotate_90':{ # explicitly rotate image by 90 -# 'axis': [0,2] # one or more axes can be put here. if this isn't defined, all axes are considered -# }, -# 'rotate_180', # explicitly rotate image by 180; if 'axis' isn't defined, default is [1,2,3] -# 'colorjitter':{ # this is used to apply the ColorJitter transform form torch - only used for rgb images -# 'brightness': [0,1], # optional: needs to be between [0,1] -# 'contrast': [0,0.75], # optional: needs to be between [0,1] -# 'saturation': [0,0.5], # optional: needs to be between [0,1] -# 'hue': [-0.25,0.25], # optional: needs to be between [-0.5,0.5] for range and [0,1] for a single value -# }, -# 'hed_transform':{ -# 'haematoxylin_bias_range': [-0.1, 0.1], -# 'eosin_bias_range': [-0.1, 0.1], -# 'dab_bias_range': [-0.1, 0.1], -# 'haematoxylin_sigma_range': [-0.1, 0.1], -# 'eosin_sigma_range': [-0.1, 0.1], -# 'dab_sigma_range': [-0.1, 0.1], -# 'cutoff_range': [0.01, 0.99], -# } -# } -## ## post-processing steps - only applied before output labels are saved -## data_postprocessing: -## { -## 'fill_holes', # this will fill holes in the image -## 'mapping': {0: 0, 1: 1, 2: 4}, # this will map the labels to a new set of labels, useful to convert labels from combinatorial training (i.e., combined segmentation labels) -## } -### parallel training on HPC - here goes the command to prepend to send to a high performance computing -## cluster for parallel computing during multi-fold training -## not used for single fold training -## this gets passed before the training_loop, so ensure enough memory is provided along with other parameters -## that your HPC would expect -## ${outputDir} will be changed to the outputDir you pass in CLI + '/${fold_number}' -## ensure that the correct location of the virtual environment is getting invoked, otherwise it would pick up the system python, which might not have all dependencies -## parallel_compute_command: 'qsub -b y -l gpu -l h_vmem=32G -cwd -o ${outputDir}/\$JOB_ID.stdout -e ${outputDir}/\$JOB_ID.stderr `pwd`/sge_wrapper _correct_location_of_virtual_environment_/venv/bin/python' -### queue configuration - https://torchio.readthedocs.io/data/patch_training.html?#queue -## this determines the maximum number of patches that can be stored in the queue. Using a large number means that the queue needs to be filled less often, but more CPU memory is needed to store the patches -#q_max_length: 40 -## this determines the number of patches to extract from each volume. A small number of patches ensures a large variability in the queue, but training will be slower -#q_samples_per_volume: 5 -## this determines the number subprocesses to use for data loading; '0' means main process is used -#q_num_workers: 2 # scale this according to available CPU resources -## used for debugging -#q_verbose: False From abaa1a80f00b96d39c93fba237ac14c7270aaf45 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 13:54:08 +0200 Subject: [PATCH 40/88] fix version bug --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index dab7d53b5..72599278f 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -3,7 +3,7 @@ from GANDLF.Configuration.Parameters.default_parameters import DefaultParameters from GANDLF.Configuration.Parameters.nested_training_parameters import NestedTraining from GANDLF.Configuration.Parameters.patch_sampler import PatchSampler -from GANDLF.config_manager import version_check +from GANDLF.utils import version_check from importlib.metadata import version from typing_extensions import Self, Literal, Annotated from GANDLF.Configuration.Parameters.validators import * From dfac0183993f376d28abc815b0fc85e36776e472 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 14:01:38 +0200 Subject: [PATCH 41/88] fix the num_channels bug in the tests --- GANDLF/Configuration/Parameters/model_parameters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index 0eed09765..501af1cf3 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -44,6 +44,7 @@ class Model(BaseModel): validation_alias=AliasChoices( "num_channels", "n_channels", "channels", "model_channels" ), + default=3, ) # TODO: check it type: Optional[str] = Field(description="Type of model.", default="torch") data_type: str = Field(description="Data type.", default="FP32") From a0622b6290a50c8417ce28babbc812626f364735 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 17:46:50 +0200 Subject: [PATCH 42/88] change the metrics type --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 72599278f..33abe07f1 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -41,7 +41,7 @@ class UserDefinedParameters(DefaultParameters): AfterValidator(validate_loss_function), ] metrics: Annotated[ - Union[dict, list[str]], + list[Union[str,dict]], Field(description="Metrics."), AfterValidator(validate_metrics), ] From af5e1e1f52ab7df5cdb78e53cb0c0330ba1332cd Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 18:30:41 +0200 Subject: [PATCH 43/88] update validators and user_defined_parameters.py --- .../Configuration/Parameters/user_defined_parameters.py | 8 ++++---- GANDLF/Configuration/Parameters/validators.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 33abe07f1..413fffc53 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -53,8 +53,8 @@ class UserDefinedParameters(DefaultParameters): description="Scheduler.", default=Scheduler(type="triangle_modified") ) optimizer: Union[str, Optimizer] = Field( - description="Optimizer.", default=Optimizer(type="adam"), alias="opt" - ) # TODO: Check it again + description="Optimizer.", default=Optimizer(type="adam"), + ) # TODO: Check it again for (opt) patch_sampler: Union[str, PatchSampler] = Field( description="Patch sampler.", default=PatchSampler() ) @@ -95,8 +95,8 @@ def validate(self) -> Self: # validate patch_sampler self.patch_sampler = validate_patch_sampler(self.patch_sampler) # validate_data_augmentation - self.data_preprocessing = validate_data_augmentation( - self.data_preprocessing, self.patch_size + self.data_augmentation = validate_data_augmentation( + self.data_augmentation, self.patch_size ) return self diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index f5a63abf8..20ef8f445 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -409,3 +409,4 @@ def validate_data_augmentation(value, patch_size) -> dict: value[key] = initialize_key( value[key], "probability", value["default_probability"] ) + return value \ No newline at end of file From 7db6dc59c61612e86aad8af61e93585db4d3ee9d Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 18:43:45 +0200 Subject: [PATCH 44/88] blacked . --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 4 ++-- GANDLF/Configuration/Parameters/validators.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 413fffc53..e4902c684 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -41,7 +41,7 @@ class UserDefinedParameters(DefaultParameters): AfterValidator(validate_loss_function), ] metrics: Annotated[ - list[Union[str,dict]], + list[Union[str, dict]], Field(description="Metrics."), AfterValidator(validate_metrics), ] @@ -53,7 +53,7 @@ class UserDefinedParameters(DefaultParameters): description="Scheduler.", default=Scheduler(type="triangle_modified") ) optimizer: Union[str, Optimizer] = Field( - description="Optimizer.", default=Optimizer(type="adam"), + description="Optimizer.", default=Optimizer(type="adam") ) # TODO: Check it again for (opt) patch_sampler: Union[str, PatchSampler] = Field( description="Patch sampler.", default=PatchSampler() diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index 20ef8f445..cd535fd94 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -409,4 +409,4 @@ def validate_data_augmentation(value, patch_size) -> dict: value[key] = initialize_key( value[key], "probability", value["default_probability"] ) - return value \ No newline at end of file + return value From aa91f058206965ea7a178c276d95544a2c897def Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 18:47:11 +0200 Subject: [PATCH 45/88] updated model architecture with "vgg16" --- GANDLF/Configuration/Parameters/model_parameters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index 501af1cf3..4fc5c58ea 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -19,6 +19,7 @@ "uinc", "vgg", "densenet", + "vgg16" ] NORM_TYPE_OPTIONS = Literal["batch", "instance", "None"] From 1c996061ec816bb6f265eae818b082fd0ff91bf6 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Sun, 9 Feb 2025 18:48:44 +0200 Subject: [PATCH 46/88] blacked . --- GANDLF/Configuration/Parameters/model_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index 4fc5c58ea..93952d795 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -19,7 +19,7 @@ "uinc", "vgg", "densenet", - "vgg16" + "vgg16", ] NORM_TYPE_OPTIONS = Literal["batch", "instance", "None"] From 1ec5477abf16bce354912a0880ece58228ee9cf0 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 18:34:16 +0200 Subject: [PATCH 47/88] update the model and the nested_training --- GANDLF/Configuration/Parameters/model_parameters.py | 3 ++- GANDLF/Configuration/Parameters/nested_training_parameters.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index 93952d795..6dcd4e85e 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -20,6 +20,7 @@ "vgg", "densenet", "vgg16", + "sdnet" ] NORM_TYPE_OPTIONS = Literal["batch", "instance", "None"] @@ -51,7 +52,7 @@ class Model(BaseModel): data_type: str = Field(description="Data type.", default="FP32") save_at_every_epoch: bool = Field(default=False, description="Save at every epoch.") amp: bool = Field(default=False, description="Amplifier.") - ignore_label_validation: int = Field( + ignore_label_validation: Optional[int] = Field( default=None, description="Ignore label validation." ) # TODO: To check it print_summary: bool = Field(default=True, description="Print summary.") diff --git a/GANDLF/Configuration/Parameters/nested_training_parameters.py b/GANDLF/Configuration/Parameters/nested_training_parameters.py index fd02a3e0a..a7d7a049e 100644 --- a/GANDLF/Configuration/Parameters/nested_training_parameters.py +++ b/GANDLF/Configuration/Parameters/nested_training_parameters.py @@ -1,5 +1,5 @@ from pydantic import BaseModel, Field, model_validator -from typing_extensions import Self +from typing_extensions import Self, Optional class NestedTraining(BaseModel): @@ -15,7 +15,7 @@ class NestedTraining(BaseModel): default=-5, description="this controls the number of validation data folds to be used for model *selection* during training (not used for back-propagation)", ) - proportional: bool = Field(default=None) + proportional: Optional[bool] = Field(default=None) @model_validator(mode="after") def validate_nested_training(self) -> Self: From a30b986abe35d799db04d8dad0535d356b37a45d Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 18:46:13 +0200 Subject: [PATCH 48/88] update the model and the user_defined_parameters --- GANDLF/Configuration/Parameters/model_parameters.py | 6 ++++-- GANDLF/Configuration/Parameters/user_defined_parameters.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index 6dcd4e85e..1ebb5aee1 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -20,7 +20,7 @@ "vgg", "densenet", "vgg16", - "sdnet" + "sdnet", ] NORM_TYPE_OPTIONS = Literal["batch", "instance", "None"] @@ -56,7 +56,9 @@ class Model(BaseModel): default=None, description="Ignore label validation." ) # TODO: To check it print_summary: bool = Field(default=True, description="Print summary.") - batch_norm: str = Field(default=None) # TODO: Check it for deprecated option + batch_norm: Optional[str] = Field( + default=None + ) # TODO: Check it for deprecated option @model_validator(mode="after") def model_validate(self) -> Self: diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index e4902c684..c3bda9948 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -41,7 +41,7 @@ class UserDefinedParameters(DefaultParameters): AfterValidator(validate_loss_function), ] metrics: Annotated[ - list[Union[str, dict]], + [Union[dict, list[Union[str, dict]]]], Field(description="Metrics."), AfterValidator(validate_metrics), ] From e6714f111dbc734991dde199cdd3a407ff33d2c4 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 18:52:12 +0200 Subject: [PATCH 49/88] minor changes --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index c3bda9948..ed648eede 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -41,7 +41,7 @@ class UserDefinedParameters(DefaultParameters): AfterValidator(validate_loss_function), ] metrics: Annotated[ - [Union[dict, list[Union[str, dict]]]], + Union[dict, list[Union[str, dict]]], Field(description="Metrics."), AfterValidator(validate_metrics), ] From f3ccfea85495232324b3e3c6a6da88d0db16854e Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 19:06:42 +0200 Subject: [PATCH 50/88] update model_parameters --- .../Configuration/Parameters/model_parameters.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index 1ebb5aee1..1b330960a 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -21,6 +21,21 @@ "densenet", "vgg16", "sdnet", + "densenet121", + "imagenet_vgg11", + "imagenet_vgg11_bn", + "imagenet_vgg13", + "imagenet_vgg13_bn", + "imagenet_vgg16", + "imagenet_vgg16_bn", + "imagenet_vgg19", + "imagenet_vgg19_bn", + "resnet18", + "densenet121", + "resnet18", + "resnet50", + "efficientnetb0", + "imagenet_unet", ] NORM_TYPE_OPTIONS = Literal["batch", "instance", "None"] From d19bf5a815fe2474293fa6ff6ba0151f03eca955 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 19:42:42 +0200 Subject: [PATCH 51/88] update validators and user_defined_parameters with data_postprocessing_after_reverse_one_hot_encoding --- .../Parameters/user_defined_parameters.py | 11 +++++++++++ GANDLF/Configuration/Parameters/validators.py | 15 ++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index ed648eede..3c4a710a9 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -61,6 +61,9 @@ class UserDefinedParameters(DefaultParameters): inference_mechanism: InferenceMechanism = Field( description="Inference mechanism.", default=InferenceMechanism() ) + data_postprocessing_after_reverse_one_hot_encoding: dict = Field( + description="data_postprocessing_after_reverse_one_hot_encoding." + ) # TODO: It should be defined with a better way (using a BaseModel class) data_preprocessing: Annotated[ @@ -98,5 +101,13 @@ def validate(self) -> Self: self.data_augmentation = validate_data_augmentation( self.data_augmentation, self.patch_size ) + # validate data_postprocessing_after_reverse_one_hot_encoding + ( + self.data_postprocessing_after_reverse_one_hot_encoding, + self.data_postprocessing, + ) = validate_data_postprocessing_after_reverse_one_hot_encoding( + self.data_postprocessing_after_reverse_one_hot_encoding, + self.data_postprocessing, + ) return self diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index cd535fd94..83ac9e661 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -259,18 +259,15 @@ def validate_data_preprocessing(value) -> dict: return value -def validate_data_postprocessing(value) -> dict: - value = initialize_key( - value, "data_postprocessing_after_reverse_one_hot_encoding", {} - ) +def validate_data_postprocessing_after_reverse_one_hot_encoding( + value, data_postproccesing +) -> list: temp_dict = deepcopy(value) for key in temp_dict: if key in postprocessing_after_reverse_one_hot_encoding: - value["data_postprocessing_after_reverse_one_hot_encoding"][key] = value[ - key - ] - value.pop(key) - return value + value[key] = data_postproccesing[key] + data_postproccesing.pop(key) + return [value, data_postproccesing] def validate_patch_sampler(value): From b054a5fc4f1312d79b0e3112c9603f15dd96fb86 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 19:47:10 +0200 Subject: [PATCH 52/88] update user_defined_parameters.py --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 3c4a710a9..a3ef67825 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -75,7 +75,6 @@ class UserDefinedParameters(DefaultParameters): data_postprocessing: Annotated[ dict, Field(description="Data augmentation."), - AfterValidator(validate_data_postprocessing), ] = {} # TODO: It should be defined with a better way (using a BaseModel class) data_augmentation: Annotated[dict, Field(description="Data augmentation.")] = {} From 27542d8ce14069c5916fe7d87a5366f19aeaa9eb Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 19:48:14 +0200 Subject: [PATCH 53/88] update user_defined_parameters.py --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index a3ef67825..b9ee7ac1b 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -72,10 +72,7 @@ class UserDefinedParameters(DefaultParameters): AfterValidator(validate_data_preprocessing), ] = {} # TODO: It should be defined with a better way (using a BaseModel class) - data_postprocessing: Annotated[ - dict, - Field(description="Data augmentation."), - ] = {} + data_postprocessing: Annotated[dict, Field(description="Data augmentation.")] = {} # TODO: It should be defined with a better way (using a BaseModel class) data_augmentation: Annotated[dict, Field(description="Data augmentation.")] = {} From 0afd0e54e1897db373e687c9ea4f6e4e0d9db759 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 19:53:37 +0200 Subject: [PATCH 54/88] update user_defined_parameters.py --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index b9ee7ac1b..b698e85f9 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -62,7 +62,8 @@ class UserDefinedParameters(DefaultParameters): description="Inference mechanism.", default=InferenceMechanism() ) data_postprocessing_after_reverse_one_hot_encoding: dict = Field( - description="data_postprocessing_after_reverse_one_hot_encoding." + description="data_postprocessing_after_reverse_one_hot_encoding.", + default = {} ) # TODO: It should be defined with a better way (using a BaseModel class) From afe7fd63059e7aec96dd9059e0107a8a8176ba02 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 19:56:23 +0200 Subject: [PATCH 55/88] update user_defined_parameters.py --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index b698e85f9..be6a55d14 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -62,8 +62,7 @@ class UserDefinedParameters(DefaultParameters): description="Inference mechanism.", default=InferenceMechanism() ) data_postprocessing_after_reverse_one_hot_encoding: dict = Field( - description="data_postprocessing_after_reverse_one_hot_encoding.", - default = {} + description="data_postprocessing_after_reverse_one_hot_encoding.", default={} ) # TODO: It should be defined with a better way (using a BaseModel class) From ef55332dd7c4e3dad17b3ab5f5d2b8ae994ef3b3 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 20:16:34 +0200 Subject: [PATCH 56/88] fix spelling error --- GANDLF/Configuration/Parameters/validators.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index 83ac9e661..6325d12e0 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -260,14 +260,14 @@ def validate_data_preprocessing(value) -> dict: def validate_data_postprocessing_after_reverse_one_hot_encoding( - value, data_postproccesing + value, data_postprocessing ) -> list: temp_dict = deepcopy(value) for key in temp_dict: if key in postprocessing_after_reverse_one_hot_encoding: - value[key] = data_postproccesing[key] - data_postproccesing.pop(key) - return [value, data_postproccesing] + value[key] = data_postprocessing[key] + data_postprocessing.pop(key) + return [value, data_postprocessing] def validate_patch_sampler(value): From 02dc31018433d9eaf889cd9110f7d50d614be432 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 21:58:22 +0200 Subject: [PATCH 57/88] update the scheduler --- GANDLF/Configuration/Parameters/scheduler_parameters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GANDLF/Configuration/Parameters/scheduler_parameters.py b/GANDLF/Configuration/Parameters/scheduler_parameters.py index 0c7a80bec..ec3567c62 100644 --- a/GANDLF/Configuration/Parameters/scheduler_parameters.py +++ b/GANDLF/Configuration/Parameters/scheduler_parameters.py @@ -12,6 +12,7 @@ "triangular", "triangular2", "exp_range", + "exponential" ] From 55b32c8133a42029ca6477319f1af9d8d2be15e4 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 21:59:57 +0200 Subject: [PATCH 58/88] update the scheduler --- GANDLF/Configuration/Parameters/scheduler_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/scheduler_parameters.py b/GANDLF/Configuration/Parameters/scheduler_parameters.py index ec3567c62..b13ec13d3 100644 --- a/GANDLF/Configuration/Parameters/scheduler_parameters.py +++ b/GANDLF/Configuration/Parameters/scheduler_parameters.py @@ -12,7 +12,7 @@ "triangular", "triangular2", "exp_range", - "exponential" + "exponential", ] From cb700348d6a62ba4f26b2929785a648aaf86cd59 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 22:02:32 +0200 Subject: [PATCH 59/88] update user_defined_parameters --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index be6a55d14..4140df37c 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -72,7 +72,6 @@ class UserDefinedParameters(DefaultParameters): AfterValidator(validate_data_preprocessing), ] = {} # TODO: It should be defined with a better way (using a BaseModel class) - data_postprocessing: Annotated[dict, Field(description="Data augmentation.")] = {} # TODO: It should be defined with a better way (using a BaseModel class) data_augmentation: Annotated[dict, Field(description="Data augmentation.")] = {} From c7f7f4642bf8a3c8448d986503ca090a35a17b97 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 22:30:47 +0200 Subject: [PATCH 60/88] change scheduler_classification_rad_2d --- testing/test_full.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/test_full.py b/testing/test_full.py index eccf0b3c8..3584301e9 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -988,8 +988,8 @@ def test_train_scheduler_classification_rad_2d(device): parameters = populate_header_in_parameters(parameters, parameters["headers"]) parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters["scheduler"] = {} - parameters["scheduler"]["type"] = scheduler + parameters["scheduler"] = scheduler + # parameters["scheduler"]["type"] = scheduler parameters["nested_training"]["testing"] = -5 parameters["nested_training"]["validation"] = -5 sanitize_outputDir() From c02dbe1113cc206dce17716286ea80c967d2a388 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 22:50:36 +0200 Subject: [PATCH 61/88] update scheduler_parameters.py --- GANDLF/Configuration/Parameters/scheduler_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/scheduler_parameters.py b/GANDLF/Configuration/Parameters/scheduler_parameters.py index b13ec13d3..a472aaf24 100644 --- a/GANDLF/Configuration/Parameters/scheduler_parameters.py +++ b/GANDLF/Configuration/Parameters/scheduler_parameters.py @@ -7,7 +7,7 @@ "triangle_modified", "exp", "step", - "reduce-on-plateau", + "reduce_on_plateau", "cosineannealing", "triangular", "triangular2", From afca6569e10bd2798762a379c8192fc5a0fe89e0 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 23:27:12 +0200 Subject: [PATCH 62/88] update scheduler_parameters.py --- GANDLF/Configuration/Parameters/scheduler_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/scheduler_parameters.py b/GANDLF/Configuration/Parameters/scheduler_parameters.py index a472aaf24..b13ec13d3 100644 --- a/GANDLF/Configuration/Parameters/scheduler_parameters.py +++ b/GANDLF/Configuration/Parameters/scheduler_parameters.py @@ -7,7 +7,7 @@ "triangle_modified", "exp", "step", - "reduce_on_plateau", + "reduce-on-plateau", "cosineannealing", "triangular", "triangular2", From 2fd32ba98eef7fa14534dcd857a116f616d3ab1d Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 23:58:38 +0200 Subject: [PATCH 63/88] added differential_privacy in parameters --- .../Parameters/user_defined_parameters.py | 6 +-- GANDLF/Configuration/Parameters/validators.py | 44 +++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 4140df37c..945f0886f 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -64,7 +64,7 @@ class UserDefinedParameters(DefaultParameters): data_postprocessing_after_reverse_one_hot_encoding: dict = Field( description="data_postprocessing_after_reverse_one_hot_encoding.", default={} ) - + differential_privacy: dict = Field(description="Differential privacy.", default={}) # TODO: It should be defined with a better way (using a BaseModel class) data_preprocessing: Annotated[ dict, @@ -72,7 +72,6 @@ class UserDefinedParameters(DefaultParameters): AfterValidator(validate_data_preprocessing), ] = {} # TODO: It should be defined with a better way (using a BaseModel class) - # TODO: It should be defined with a better way (using a BaseModel class) data_augmentation: Annotated[dict, Field(description="Data augmentation.")] = {} # Validators @@ -104,5 +103,6 @@ def validate(self) -> Self: self.data_postprocessing_after_reverse_one_hot_encoding, self.data_postprocessing, ) - + #validate differential_privacy + self.differential_privacy = validate_differential_privacy(self.differential_privacy,self.batch_size) return self diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index 6325d12e0..a1fd2dc79 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -407,3 +407,47 @@ def validate_data_augmentation(value, patch_size) -> dict: value[key], "probability", value["default_probability"] ) return value + +def validate_differential_privacy(value, batch_size): + # if not isinstance(value, dict): + # print( + # "WARNING: Non dictionary value for the key: 'differential_privacy' was used, replacing with default valued dictionary." + # ) + # value = {} + # these are some defaults + value = initialize_key( + value, "noise_multiplier", 10.0 + ) + value = initialize_key( + value, "max_grad_norm", 1.0 + ) + value = initialize_key( + value, "accountant", "rdp" + ) + value = initialize_key( + value, "secure_mode", False + ) + value = initialize_key( + value, "allow_opacus_model_fix", True + ) + value = initialize_key( + value, "delta", 1e-5 + ) + value = initialize_key( + value, "physical_batch_size", batch_size + ) + + if value["physical_batch_size"] > batch_size: + print( + f"WARNING: The physical batch size {value['physical_batch_size']} is greater" + f"than the batch size {batch_size}, setting the physical batch size to the batch size." + ) + value["physical_batch_size"] = batch_size + + # these keys need to be parsed as floats, not strings + for key in ["noise_multiplier", "max_grad_norm", "delta", "epsilon"]: + if key in value: + value[key] = float( + value[key] + ) + return value \ No newline at end of file From 600315224f28a545034d494c60c4b19eab026c61 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Mon, 10 Feb 2025 23:59:23 +0200 Subject: [PATCH 64/88] added differential_privacy in parameters --- .../Parameters/user_defined_parameters.py | 6 ++- GANDLF/Configuration/Parameters/validators.py | 37 ++++++------------- 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 945f0886f..449712952 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -103,6 +103,8 @@ def validate(self) -> Self: self.data_postprocessing_after_reverse_one_hot_encoding, self.data_postprocessing, ) - #validate differential_privacy - self.differential_privacy = validate_differential_privacy(self.differential_privacy,self.batch_size) + # validate differential_privacy + self.differential_privacy = validate_differential_privacy( + self.differential_privacy, self.batch_size + ) return self diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index a1fd2dc79..ad58f3c9a 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -408,34 +408,21 @@ def validate_data_augmentation(value, patch_size) -> dict: ) return value + def validate_differential_privacy(value, batch_size): # if not isinstance(value, dict): # print( # "WARNING: Non dictionary value for the key: 'differential_privacy' was used, replacing with default valued dictionary." # ) # value = {} - # these are some defaults - value = initialize_key( - value, "noise_multiplier", 10.0 - ) - value = initialize_key( - value, "max_grad_norm", 1.0 - ) - value = initialize_key( - value, "accountant", "rdp" - ) - value = initialize_key( - value, "secure_mode", False - ) - value = initialize_key( - value, "allow_opacus_model_fix", True - ) - value = initialize_key( - value, "delta", 1e-5 - ) - value = initialize_key( - value, "physical_batch_size", batch_size - ) + # these are some defaults + value = initialize_key(value, "noise_multiplier", 10.0) + value = initialize_key(value, "max_grad_norm", 1.0) + value = initialize_key(value, "accountant", "rdp") + value = initialize_key(value, "secure_mode", False) + value = initialize_key(value, "allow_opacus_model_fix", True) + value = initialize_key(value, "delta", 1e-5) + value = initialize_key(value, "physical_batch_size", batch_size) if value["physical_batch_size"] > batch_size: print( @@ -447,7 +434,5 @@ def validate_differential_privacy(value, batch_size): # these keys need to be parsed as floats, not strings for key in ["noise_multiplier", "max_grad_norm", "delta", "epsilon"]: if key in value: - value[key] = float( - value[key] - ) - return value \ No newline at end of file + value[key] = float(value[key]) + return value From cf7b351ccb9bbd9a114c2bba6a558100044523b8 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 00:19:34 +0200 Subject: [PATCH 65/88] remove batch_norm --- GANDLF/Configuration/Parameters/model_parameters.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index 1b330960a..ba29c4465 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -71,9 +71,6 @@ class Model(BaseModel): default=None, description="Ignore label validation." ) # TODO: To check it print_summary: bool = Field(default=True, description="Print summary.") - batch_norm: Optional[str] = Field( - default=None - ) # TODO: Check it for deprecated option @model_validator(mode="after") def model_validate(self) -> Self: @@ -96,9 +93,6 @@ def model_validate(self) -> Self: self.base_filters = 32 print("Using default 'base_filters' in 'model': ", self.base_filters) - if self.batch_norm is not None: - print( - "WARNING: 'batch_norm' is no longer supported, please use 'norm_type' in 'model' instead" - ) + return self From 007a9672c0c7d33c5a5252c27ae966627303e4b2 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 00:21:22 +0200 Subject: [PATCH 66/88] remove batch_norm --- GANDLF/Configuration/Parameters/model_parameters.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index ba29c4465..3b41abfc4 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -93,6 +93,4 @@ def model_validate(self) -> Self: self.base_filters = 32 print("Using default 'base_filters' in 'model': ", self.base_filters) - - return self From 61acfdcc902356504a632eaed467376776e9fdb6 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 01:18:34 +0200 Subject: [PATCH 67/88] update configuration --- .../Parameters/user_defined_parameters.py | 4 +++- GANDLF/Configuration/Parameters/validators.py | 12 +++++++----- GANDLF/config_manager.py | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 449712952..e98c01300 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -64,7 +64,9 @@ class UserDefinedParameters(DefaultParameters): data_postprocessing_after_reverse_one_hot_encoding: dict = Field( description="data_postprocessing_after_reverse_one_hot_encoding.", default={} ) - differential_privacy: dict = Field(description="Differential privacy.", default={}) + differential_privacy: dict = Field( + description="Differential privacy.", default=None + ) # TODO: It should be defined with a better way (using a BaseModel class) data_preprocessing: Annotated[ dict, diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index ad58f3c9a..ce9eb1242 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -410,11 +410,13 @@ def validate_data_augmentation(value, patch_size) -> dict: def validate_differential_privacy(value, batch_size): - # if not isinstance(value, dict): - # print( - # "WARNING: Non dictionary value for the key: 'differential_privacy' was used, replacing with default valued dictionary." - # ) - # value = {} + if value is None: + return value + if not isinstance(value, dict): + print( + "WARNING: Non dictionary value for the key: 'differential_privacy' was used, replacing with default valued dictionary." + ) + value = {} # these are some defaults value = initialize_key(value, "noise_multiplier", 10.0) value = initialize_key(value, "max_grad_norm", 1.0) diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index 49a99daf6..9145f605d 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -43,7 +43,7 @@ def ConfigManager( try: parameters = Parameters( **_parseConfig(config_file_path, version_check_flag) - ).model_dump() + ).model_dump(exclude_none=True) return parameters # except Exception as e: # ## todo: ensure logging captures assertion errors From 60aa3d5e7725a42dd6ff2a1bb593b83943e259a1 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 01:47:19 +0200 Subject: [PATCH 68/88] add exclude parameters --- GANDLF/Configuration/Parameters/exclude_parameters.py | 1 + GANDLF/Configuration/Parameters/model_parameters.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 GANDLF/Configuration/Parameters/exclude_parameters.py diff --git a/GANDLF/Configuration/Parameters/exclude_parameters.py b/GANDLF/Configuration/Parameters/exclude_parameters.py new file mode 100644 index 000000000..e6f079985 --- /dev/null +++ b/GANDLF/Configuration/Parameters/exclude_parameters.py @@ -0,0 +1 @@ +exclude_parameters = {"differential_privacy"} diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index 3b41abfc4..91ed31f7d 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -67,7 +67,7 @@ class Model(BaseModel): data_type: str = Field(description="Data type.", default="FP32") save_at_every_epoch: bool = Field(default=False, description="Save at every epoch.") amp: bool = Field(default=False, description="Amplifier.") - ignore_label_validation: Optional[int] = Field( + ignore_label_validation: Union[int, None] = Field( default=None, description="Ignore label validation." ) # TODO: To check it print_summary: bool = Field(default=True, description="Print summary.") From 6443161de925263e1f777c02fd47f3afcfd0bfee Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 01:56:43 +0200 Subject: [PATCH 69/88] add exclude parameters --- GANDLF/config_manager.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index 9145f605d..fe740dc4a 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -5,6 +5,7 @@ from GANDLF.Configuration.Parameters.parameters import Parameters +from GANDLF.Configuration.Parameters.exclude_parameters import exclude_parameters def _parseConfig( @@ -41,9 +42,16 @@ def ConfigManager( dict: The parameter dictionary. """ try: - parameters = Parameters( + parameters_config = Parameters( **_parseConfig(config_file_path, version_check_flag) - ).model_dump(exclude_none=True) + ) + parameters = parameters_config.model_dump( + exclude={ + field + for field in exclude_parameters + if getattr(parameters_config, field) is None + } + ) return parameters # except Exception as e: # ## todo: ensure logging captures assertion errors From 9953aacaeb990fdc55a73a1d54e182a689069731 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 02:16:36 +0200 Subject: [PATCH 70/88] update scheduler_parameters.py --- GANDLF/Configuration/Parameters/scheduler_parameters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GANDLF/Configuration/Parameters/scheduler_parameters.py b/GANDLF/Configuration/Parameters/scheduler_parameters.py index b13ec13d3..77f0bb49f 100644 --- a/GANDLF/Configuration/Parameters/scheduler_parameters.py +++ b/GANDLF/Configuration/Parameters/scheduler_parameters.py @@ -8,6 +8,7 @@ "exp", "step", "reduce-on-plateau", + "reduce_on_plateau", "cosineannealing", "triangular", "triangular2", From c4e05792e7edefc545bece1284230576becc191e Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 20:33:23 +0200 Subject: [PATCH 71/88] update literals --- .../Parameters/model_parameters.py | 32 ++----------------- .../Parameters/optimizer_parameters.py | 15 ++------- .../Parameters/scheduler_parameters.py | 15 ++------- 3 files changed, 7 insertions(+), 55 deletions(-) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index 91ed31f7d..565faefbf 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -6,37 +6,9 @@ validate_class_list, validate_norm_type, ) - +from GANDLF.models import global_models_dict # Define model architecture options -ARCHITECTURE_OPTIONS = Literal[ - "unet", - "resunet", - "deep_resunet", - "deep_unet", - "light_resunet", - "light_unet", - "fcn", - "uinc", - "vgg", - "densenet", - "vgg16", - "sdnet", - "densenet121", - "imagenet_vgg11", - "imagenet_vgg11_bn", - "imagenet_vgg13", - "imagenet_vgg13_bn", - "imagenet_vgg16", - "imagenet_vgg16_bn", - "imagenet_vgg19", - "imagenet_vgg19_bn", - "resnet18", - "densenet121", - "resnet18", - "resnet50", - "efficientnetb0", - "imagenet_unet", -] +ARCHITECTURE_OPTIONS = Literal[tuple(global_models_dict.keys())] NORM_TYPE_OPTIONS = Literal["batch", "instance", "None"] diff --git a/GANDLF/Configuration/Parameters/optimizer_parameters.py b/GANDLF/Configuration/Parameters/optimizer_parameters.py index 435bb062a..47dec7a0b 100644 --- a/GANDLF/Configuration/Parameters/optimizer_parameters.py +++ b/GANDLF/Configuration/Parameters/optimizer_parameters.py @@ -1,18 +1,9 @@ from pydantic import BaseModel, Field from typing_extensions import Literal +from GANDLF.optimizers import global_optimizer_dict -OPTIMIZER_OPTIONS = Literal[ - "sgd", - "asgd", - "adam", - "adamw", - "adamax", - "sparseadam", - "rprop", - "adadelta", - "adagrad", - "rmsprop", -] +#takes the keys from global optimizer +OPTIMIZER_OPTIONS = Literal[tuple(global_optimizer_dict.keys())] class Optimizer(BaseModel): diff --git a/GANDLF/Configuration/Parameters/scheduler_parameters.py b/GANDLF/Configuration/Parameters/scheduler_parameters.py index 77f0bb49f..96b110a48 100644 --- a/GANDLF/Configuration/Parameters/scheduler_parameters.py +++ b/GANDLF/Configuration/Parameters/scheduler_parameters.py @@ -1,20 +1,9 @@ from pydantic import BaseModel, ConfigDict, Field from typing_extensions import Literal +from GANDLF.schedulers import global_schedulers_dict -TYPE_OPTIONS = Literal[ - "triangle", - "triangle_modified", - "exp", - "step", - "reduce-on-plateau", - "reduce_on_plateau", - "cosineannealing", - "triangular", - "triangular2", - "exp_range", - "exponential", -] +TYPE_OPTIONS = Literal[tuple(global_schedulers_dict.keys())] # It allows extra parameters From af437b94052a3c2db28bfb7fe68eadad86e50cf6 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 20:33:59 +0200 Subject: [PATCH 72/88] update literals --- GANDLF/Configuration/Parameters/model_parameters.py | 1 + GANDLF/Configuration/Parameters/optimizer_parameters.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index 565faefbf..a9b0c9050 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -7,6 +7,7 @@ validate_norm_type, ) from GANDLF.models import global_models_dict + # Define model architecture options ARCHITECTURE_OPTIONS = Literal[tuple(global_models_dict.keys())] NORM_TYPE_OPTIONS = Literal["batch", "instance", "None"] diff --git a/GANDLF/Configuration/Parameters/optimizer_parameters.py b/GANDLF/Configuration/Parameters/optimizer_parameters.py index 47dec7a0b..eec57fb4f 100644 --- a/GANDLF/Configuration/Parameters/optimizer_parameters.py +++ b/GANDLF/Configuration/Parameters/optimizer_parameters.py @@ -2,7 +2,7 @@ from typing_extensions import Literal from GANDLF.optimizers import global_optimizer_dict -#takes the keys from global optimizer +# takes the keys from global optimizer OPTIMIZER_OPTIONS = Literal[tuple(global_optimizer_dict.keys())] From caa4dca8303093c4e273dc089a48aae81265c451 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 20:40:34 +0200 Subject: [PATCH 73/88] update the differential_privacy --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index e98c01300..5140cdc70 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -5,7 +5,7 @@ from GANDLF.Configuration.Parameters.patch_sampler import PatchSampler from GANDLF.utils import version_check from importlib.metadata import version -from typing_extensions import Self, Literal, Annotated +from typing_extensions import Self, Literal, Annotated, Any from GANDLF.Configuration.Parameters.validators import * from GANDLF.Configuration.Parameters.model_parameters import Model @@ -64,7 +64,7 @@ class UserDefinedParameters(DefaultParameters): data_postprocessing_after_reverse_one_hot_encoding: dict = Field( description="data_postprocessing_after_reverse_one_hot_encoding.", default={} ) - differential_privacy: dict = Field( + differential_privacy: Any = Field( description="Differential privacy.", default=None ) # TODO: It should be defined with a better way (using a BaseModel class) From a0aeb43716e1ed7f7c6e66583cdb113ed30c8713 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 20:42:16 +0200 Subject: [PATCH 74/88] update the differential_privacy --- GANDLF/Configuration/Parameters/user_defined_parameters.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index 5140cdc70..abc0028f4 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -64,9 +64,7 @@ class UserDefinedParameters(DefaultParameters): data_postprocessing_after_reverse_one_hot_encoding: dict = Field( description="data_postprocessing_after_reverse_one_hot_encoding.", default={} ) - differential_privacy: Any = Field( - description="Differential privacy.", default=None - ) + differential_privacy: Any = Field(description="Differential privacy.", default=None) # TODO: It should be defined with a better way (using a BaseModel class) data_preprocessing: Annotated[ dict, From 3d63edcf0265a85d13c61d6d88d1f5d9b1e75805 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 21:07:03 +0200 Subject: [PATCH 75/88] update validators --- GANDLF/Configuration/Parameters/validators.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index ce9eb1242..744aee99c 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -410,6 +410,8 @@ def validate_data_augmentation(value, patch_size) -> dict: def validate_differential_privacy(value, batch_size): + if isinstance(value, dict): + return value if value is None: return value if not isinstance(value, dict): From 612d2e0eb1a40da01ad4e4fdc35b1a496b21d265 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 21:19:49 +0200 Subject: [PATCH 76/88] change the workflow --- .github/workflows/python-test.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 43439f735..54757645c 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -20,14 +20,14 @@ jobs: id: dependencies uses: ./.github/workflows/dependencies - - name: Run generic unit tests - if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change - run: | - pytest -s --cov=. --cov-report=xml -k "generic" - - name: Run classification unit tests with histology - if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change - run: | - pytest --cov=. --cov-report=xml --cov-append -k "classification and histology" +# - name: Run generic unit tests +# if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change +# run: | +# pytest -s --cov=. --cov-report=xml -k "generic" +# - name: Run classification unit tests with histology +# if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change +# run: | +# pytest --cov=. --cov-report=xml --cov-append -k "classification and histology" - name: Run classification unit tests if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change run: | From d7bef6308b595d6a65eb4fcea180499e7d458359 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 22:32:33 +0200 Subject: [PATCH 77/88] update the validators.py --- GANDLF/Configuration/Parameters/validators.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index 744aee99c..ce9eb1242 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -410,8 +410,6 @@ def validate_data_augmentation(value, patch_size) -> dict: def validate_differential_privacy(value, batch_size): - if isinstance(value, dict): - return value if value is None: return value if not isinstance(value, dict): From dcea6abf866853bf2a7b00dc38bed41499af3413 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 22:47:39 +0200 Subject: [PATCH 78/88] change the test_full --- testing/test_full.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testing/test_full.py b/testing/test_full.py index 3584301e9..353030d28 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd import logging - +import json from pydicom.data import get_testdata_file import cv2 @@ -3362,6 +3362,8 @@ def test_differential_privacy_epsilon_classification_rad_2d(device): yaml.dump(parameters, file) parameters = parseConfig(file_config_temp, version_check_flag=True) + print(json.dumps(parameters)) + TrainingManager( dataframe=training_data, outputDir=outputDir, From d08d8ee3daa0e485c11655c529151e6dbab345c2 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 22:52:35 +0200 Subject: [PATCH 79/88] change workflow --- .github/workflows/python-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 54757645c..ae1a87753 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -31,7 +31,7 @@ jobs: - name: Run classification unit tests if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change run: | - pytest --cov=. --cov-report=xml --cov-append -k "classification and not histology" + pytest -s --cov=. --cov-report=xml --cov-append -k "classification and not histology" - name: Run segmentation unit tests if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change run: | From 3e902c284af6d122a8e2c381161e5c012be8bf9b Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Tue, 11 Feb 2025 23:16:39 +0200 Subject: [PATCH 80/88] update validators.py --- GANDLF/Configuration/Parameters/validators.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index ce9eb1242..f9983fd33 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -155,9 +155,6 @@ def validate_norm_type(norm_type, architecture): raise ValueError( "Normalization type cannot be 'None' for non-VGG architectures" ) - else: - print("WARNING: Initializing 'norm_type' as 'batch'", flush=True) - norm_type = "batch" return norm_type From fc2ed69469054ab0327ac8cd6368b28a42a80981 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Wed, 12 Feb 2025 00:22:27 +0200 Subject: [PATCH 81/88] update pydantic configuration --- GANDLF/Configuration/Parameters/default_parameters.py | 6 ++++-- GANDLF/Configuration/Parameters/model_parameters.py | 2 +- GANDLF/Configuration/Parameters/user_defined_parameters.py | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/GANDLF/Configuration/Parameters/default_parameters.py b/GANDLF/Configuration/Parameters/default_parameters.py index 5afe342de..a55da612e 100644 --- a/GANDLF/Configuration/Parameters/default_parameters.py +++ b/GANDLF/Configuration/Parameters/default_parameters.py @@ -1,5 +1,7 @@ from pydantic import BaseModel, Field -from typing import Optional, Dict +from typing import Optional, Dict, Set + +from typing_extensions import Union class DefaultParameters(BaseModel): @@ -50,7 +52,7 @@ class DefaultParameters(BaseModel): print_rgb_label_warning: bool = Field( default=True, description="Print a warning for RGB labels." ) - data_postprocessing: Dict = Field( + data_postprocessing: Union[dict, set] = Field( default={}, description="Default data postprocessing configuration." ) grid_aggregator_overlap: str = Field( diff --git a/GANDLF/Configuration/Parameters/model_parameters.py b/GANDLF/Configuration/Parameters/model_parameters.py index a9b0c9050..3f82c2720 100644 --- a/GANDLF/Configuration/Parameters/model_parameters.py +++ b/GANDLF/Configuration/Parameters/model_parameters.py @@ -10,7 +10,7 @@ # Define model architecture options ARCHITECTURE_OPTIONS = Literal[tuple(global_models_dict.keys())] -NORM_TYPE_OPTIONS = Literal["batch", "instance", "None"] +NORM_TYPE_OPTIONS = Literal["batch", "instance", "none"] # You can define new parameters for model here. Please read the pydantic documentation. diff --git a/GANDLF/Configuration/Parameters/user_defined_parameters.py b/GANDLF/Configuration/Parameters/user_defined_parameters.py index abc0028f4..4e54ed52a 100644 --- a/GANDLF/Configuration/Parameters/user_defined_parameters.py +++ b/GANDLF/Configuration/Parameters/user_defined_parameters.py @@ -41,7 +41,7 @@ class UserDefinedParameters(DefaultParameters): AfterValidator(validate_loss_function), ] metrics: Annotated[ - Union[dict, list[Union[str, dict]]], + Union[dict, list[Union[str, dict, set]]], Field(description="Metrics."), AfterValidator(validate_metrics), ] @@ -107,4 +107,5 @@ def validate(self) -> Self: self.differential_privacy = validate_differential_privacy( self.differential_privacy, self.batch_size ) + return self From 3ca341a3c0e28d8ae5e43e15bde69444fa0a092d Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Wed, 12 Feb 2025 00:23:34 +0200 Subject: [PATCH 82/88] update pydantic configuration --- .github/workflows/python-test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index ae1a87753..d440686e7 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -28,10 +28,10 @@ jobs: # if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change # run: | # pytest --cov=. --cov-report=xml --cov-append -k "classification and histology" - - name: Run classification unit tests - if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change - run: | - pytest -s --cov=. --cov-report=xml --cov-append -k "classification and not histology" +# - name: Run classification unit tests +# if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change +# run: | +# pytest -s --cov=. --cov-report=xml --cov-append -k "classification and not histology" - name: Run segmentation unit tests if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change run: | From 6606de4ed61d00e6387fa07d2299b68d7a57c338 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Wed, 12 Feb 2025 00:56:38 +0200 Subject: [PATCH 83/88] fix norm_type error --- GANDLF/Configuration/Parameters/validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/Configuration/Parameters/validators.py b/GANDLF/Configuration/Parameters/validators.py index f9983fd33..5308e12c9 100644 --- a/GANDLF/Configuration/Parameters/validators.py +++ b/GANDLF/Configuration/Parameters/validators.py @@ -151,7 +151,7 @@ def validate_patch_size(patch_size, dimension) -> list: def validate_norm_type(norm_type, architecture): if norm_type is None or norm_type.lower() == "none": - if "vgg" in architecture: + if not ("vgg" in architecture): raise ValueError( "Normalization type cannot be 'None' for non-VGG architectures" ) From f624516665baa14b3e805fd398850fdbaab171d8 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Wed, 12 Feb 2025 01:22:39 +0200 Subject: [PATCH 84/88] update test_full --- testing/test_full.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testing/test_full.py b/testing/test_full.py index 353030d28..aa6645f05 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -5,6 +5,8 @@ import pandas as pd import logging import json + +from pydantic import ValidationError from pydicom.data import get_testdata_file import cv2 @@ -1118,7 +1120,7 @@ def test_train_normtype_segmentation_rad_3d(device): for norm_type in ["none", None]: parameters["model"]["norm_type"] = norm_type file_config_temp = write_temp_config_path(parameters) - with pytest.raises(Exception) as exc_info: + with pytest.raises(ValidationError ) as exc_info: parameters = ConfigManager(file_config_temp, version_check_flag=False) print("Exception raised:", exc_info.value) From afd63debca95f4deed9dc1ab352f41218fb621d4 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Wed, 12 Feb 2025 01:24:52 +0200 Subject: [PATCH 85/88] update test_full --- testing/test_full.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test_full.py b/testing/test_full.py index aa6645f05..0424cc123 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -1120,7 +1120,7 @@ def test_train_normtype_segmentation_rad_3d(device): for norm_type in ["none", None]: parameters["model"]["norm_type"] = norm_type file_config_temp = write_temp_config_path(parameters) - with pytest.raises(ValidationError ) as exc_info: + with pytest.raises(ValidationError) as exc_info: parameters = ConfigManager(file_config_temp, version_check_flag=False) print("Exception raised:", exc_info.value) From dae6c1e19ac9b2b4990bc71f113487f5d4ab1e11 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Wed, 12 Feb 2025 01:49:01 +0200 Subject: [PATCH 86/88] update config_manager.py --- GANDLF/config_manager.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index fe740dc4a..129dfa297 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -1,4 +1,5 @@ # import logging +import traceback from typing import Optional, Union from pydantic import ValidationError import yaml @@ -53,14 +54,13 @@ def ConfigManager( } ) return parameters - # except Exception as e: - # ## todo: ensure logging captures assertion errors - # assert ( - # False - # ), f"Config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" - # # logging.error( - # # f"gandlf config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" - # # ) - # # raise - except ValidationError as exc: - print(exc.errors()) + except Exception as e: + ## todo: ensure logging captures assertion errors + assert ( + False + ), f"Config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" + # logging.error( + # f"gandlf config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" + # ) + # raise + From 17c1d6d1c99a77fa7520a9f54c3b513709f421b2 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Wed, 12 Feb 2025 02:06:26 +0200 Subject: [PATCH 87/88] update config_manager.py and test_full.py --- GANDLF/config_manager.py | 1 - testing/test_full.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py index 129dfa297..461b8b064 100644 --- a/GANDLF/config_manager.py +++ b/GANDLF/config_manager.py @@ -63,4 +63,3 @@ def ConfigManager( # f"gandlf config parsing failed: {config_file_path=}, {version_check_flag=}, Exception: {str(e)}, {traceback.format_exc()}" # ) # raise - diff --git a/testing/test_full.py b/testing/test_full.py index 0424cc123..d25268df1 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -6,7 +6,6 @@ import logging import json -from pydantic import ValidationError from pydicom.data import get_testdata_file import cv2 @@ -1120,7 +1119,7 @@ def test_train_normtype_segmentation_rad_3d(device): for norm_type in ["none", None]: parameters["model"]["norm_type"] = norm_type file_config_temp = write_temp_config_path(parameters) - with pytest.raises(ValidationError) as exc_info: + with pytest.raises(Exception) as exc_info: parameters = ConfigManager(file_config_temp, version_check_flag=False) print("Exception raised:", exc_info.value) From c7c696f2d8c7d29700fddc8fa2dd17b88dd81d49 Mon Sep 17 00:00:00 2001 From: vmalefioudakis Date: Wed, 12 Feb 2025 10:53:07 +0200 Subject: [PATCH 88/88] revert python_tests in GitHub workflows --- .github/workflows/python-test.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index d440686e7..68e09865d 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -20,18 +20,18 @@ jobs: id: dependencies uses: ./.github/workflows/dependencies -# - name: Run generic unit tests -# if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change -# run: | -# pytest -s --cov=. --cov-report=xml -k "generic" -# - name: Run classification unit tests with histology -# if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change -# run: | -# pytest --cov=. --cov-report=xml --cov-append -k "classification and histology" -# - name: Run classification unit tests -# if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change -# run: | -# pytest -s --cov=. --cov-report=xml --cov-append -k "classification and not histology" + - name: Run generic unit tests + if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml -k "generic" + - name: Run classification unit tests with histology + if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml --cov-append -k "classification and histology" + - name: Run classification unit tests + if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change + run: | + pytest --cov=. --cov-report=xml --cov-append -k "classification and not histology" - name: Run segmentation unit tests if: ${{steps.dependencies.outputs.other_modified_files_count > 0}} # Run on any non-docs change run: |