From 20d63752059031ebe74d5edcb53083f869abc014 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 6 Jan 2023 15:52:49 +0100 Subject: [PATCH 001/163] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 111feaac..7b1be7d7 100644 --- a/.gitignore +++ b/.gitignore @@ -116,3 +116,4 @@ MH_DEBUG # Cached data **/cache/ .DS_Store +.vscode/launch.json From 8bbfff47c6090db055979c07b05bdb38d1be016c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 6 Jan 2023 16:46:52 +0100 Subject: [PATCH 002/163] introduce target split --- icu_benchmarks/data/preprocess.py | 67 ++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 9 deletions(-) diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py index 57016231..d4c713ea 100644 --- a/icu_benchmarks/data/preprocess.py +++ b/icu_benchmarks/data/preprocess.py @@ -9,7 +9,7 @@ import pickle from sklearn.impute import MissingIndicator, SimpleImputer -from sklearn.model_selection import KFold +from sklearn.model_selection import KFold, LeavePOut, StratifiedKFold from sklearn.preprocessing import LabelEncoder from recipys.recipe import Recipe @@ -63,6 +63,55 @@ def make_single_split( return data_split +def make_target_split( + data: dict[pd.DataFrame], + vars: dict[str], + num_folds: int, + fold_index: int, + seed: int = 42, + debug: bool = False, + target_size = 100, +) -> dict[dict[pd.DataFrame]]: + """Randomly split the data into training, validation, and test set. + + Args: + data: dictionary containing data divided int OUTCOME, STATIC, and DYNAMIC. + vars: Contains the names of columns in the data. + num_folds: Number of folds for cross validation. + seed: Random seed. + debug: Load less data if true. + + Returns: + Input data divided into 'train', 'val', and 'test'. + """ + id = vars["GROUP"] + fraction_to_load = 1 if not debug else 0.01 + stays = data["STATIC"][[id]].sample(frac=fraction_to_load, random_state=seed) + + train_and_val = stays.sample(target_size, random_state=seed) + test = stays.drop(train_and_val.index).index + train_and_val_labels = data["OUTCOME"].label.loc[train_and_val.index] + + target_folds = StratifiedKFold(num_folds, shuffle=True, random_state=seed) + train, val = list(target_folds.split(train_and_val, train_and_val_labels))[fold_index] + + split = { + "train": stays.iloc[train], + "val": stays.iloc[val], + "test": stays.iloc[test], + } + data_split = {} + + for fold in split.keys(): # Loop through train / val / test + # Loop through DYNAMIC / STATIC / OUTCOME + # set sort to true to make sure that IDs are reordered after scrambling earlier + data_split[fold] = { + data_type: data[data_type].merge(split[fold], on=id, how="right", sort=True) for data_type in data.keys() + } + + return data_split + + def apply_recipe_to_splits(recipe: Recipe, data: dict[dict[pd.DataFrame]], type: str) -> dict[dict[pd.DataFrame]]: """Fits and transforms the training data, then transforms the validation and test data with the recipe. @@ -115,18 +164,18 @@ def preprocess_data( config_string = f"{dumped_file_names}{dumped_vars}{use_features}{seed}{fold_index}{debug}".encode("utf-8") cache_file = cache_dir / hashlib.md5(config_string).hexdigest() - if use_cache: - if cache_file.exists(): - with open(cache_file, "rb") as f: - logging.info(f"Loading cached data from {cache_file}.") - return pickle.load(f) - else: - logging.info(f"No cached data found in {cache_file}, loading raw data.") + # if use_cache: + # if cache_file.exists(): + # with open(cache_file, "rb") as f: + # logging.info(f"Loading cached data from {cache_file}.") + # return pickle.load(f) + # else: + # logging.info(f"No cached data found in {cache_file}, loading raw data.") data = {f: pq.read_table(data_dir / file_names[f]).to_pandas() for f in ["STATIC", "DYNAMIC", "OUTCOME"]} logging.info("Generating splits.") - data = make_single_split(data, vars, num_folds, fold_index, seed=seed, debug=debug) + data = make_target_split(data, vars, num_folds, fold_index, seed=seed, debug=debug) logging.info("Preprocessing static data.") sta_rec = Recipe(data["train"]["STATIC"], [], vars["STATIC"]) From d62d52470fd084811f1a921c9c57d53d9315c138 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 7 Jan 2023 01:24:28 +0100 Subject: [PATCH 003/163] refactor fold size --- icu_benchmarks/data/preprocess.py | 100 +++++++++++------------------- 1 file changed, 35 insertions(+), 65 deletions(-) diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py index 1b78bfe1..2ed8d9ff 100644 --- a/icu_benchmarks/data/preprocess.py +++ b/icu_benchmarks/data/preprocess.py @@ -24,6 +24,7 @@ def make_single_split( fold_index: int, seed: int = 42, debug: bool = False, + fold_size: int = None, ) -> dict[dict[pd.DataFrame]]: """Randomly split the data into training, validation, and test set. @@ -41,72 +42,38 @@ def make_single_split( fraction_to_load = 1 if not debug else 0.01 stays = data["STATIC"][[id]].sample(frac=fraction_to_load, random_state=seed) - outer = KFold(num_folds, shuffle=True, random_state=seed) + if fold_size: + train_and_val = stays.sample(fold_size, random_state=seed) + test = stays.drop(train_and_val.index) + train_and_val_labels = data["OUTCOME"].label.loc[train_and_val.index] - train, test_and_val = list(outer.split(stays))[fold_index] - val, test = np.array_split(test_and_val, 2) + target_folds = StratifiedKFold(num_folds, shuffle=True, random_state=seed) + train, val = list(target_folds.split(train_and_val, train_and_val_labels))[fold_index] - split = { - "train": stays.iloc[train], - "val": stays.iloc[val], - "test": stays.iloc[test], - } - data_split = {} - - for fold in split.keys(): # Loop through train / val / test - # Loop through DYNAMIC / STATIC / OUTCOME - # set sort to true to make sure that IDs are reordered after scrambling earlier - data_split[fold] = { - data_type: data[data_type].merge(split[fold], on=id, how="right", sort=True) for data_type in data.keys() + split = { + "train": train_and_val.iloc[train], + "val": train_and_val.iloc[val], + "test": test, } + else: + outer = KFold(num_folds, shuffle=True, random_state=seed) - return data_split - - -def make_target_split( - data: dict[pd.DataFrame], - vars: dict[str], - num_folds: int, - fold_index: int, - seed: int = 42, - debug: bool = False, - target_size = 100, -) -> dict[dict[pd.DataFrame]]: - """Randomly split the data into training, validation, and test set. - - Args: - data: dictionary containing data divided int OUTCOME, STATIC, and DYNAMIC. - vars: Contains the names of columns in the data. - num_folds: Number of folds for cross validation. - seed: Random seed. - debug: Load less data if true. - - Returns: - Input data divided into 'train', 'val', and 'test'. - """ - id = vars["GROUP"] - fraction_to_load = 1 if not debug else 0.01 - stays = data["STATIC"][[id]].sample(frac=fraction_to_load, random_state=seed) - - train_and_val = stays.sample(target_size, random_state=seed) - test = stays.drop(train_and_val.index).index - train_and_val_labels = data["OUTCOME"].label.loc[train_and_val.index] + train, test_and_val = list(outer.split(stays))[fold_index] + val, test = np.array_split(test_and_val, 2) - target_folds = StratifiedKFold(num_folds, shuffle=True, random_state=seed) - train, val = list(target_folds.split(train_and_val, train_and_val_labels))[fold_index] + split = { + "train": stays.iloc[train], + "val": stays.iloc[val], + "test": stays.iloc[test], + } + - split = { - "train": stays.iloc[train], - "val": stays.iloc[val], - "test": stays.iloc[test], - } data_split = {} - - for fold in split.keys(): # Loop through train / val / test + for fold_name, fold in split.items(): # Loop through train / val / test # Loop through DYNAMIC / STATIC / OUTCOME # set sort to true to make sure that IDs are reordered after scrambling earlier - data_split[fold] = { - data_type: data[data_type].merge(split[fold], on=id, how="right", sort=True) for data_type in data.keys() + data_split[fold_name] = { + data_type: data[data_type].merge(fold, on=id, how="right", sort=True) for data_type in data.keys() } return data_split @@ -139,6 +106,7 @@ def preprocess_data( debug: bool = False, use_cache: bool = False, num_folds: int = 5, + fold_size: int = None, fold_index: int = 0, ) -> dict[dict[pd.DataFrame]]: """Perform loading, splitting, imputing and normalising of task data. @@ -159,23 +127,25 @@ def preprocess_data( nested within split (train/val/test). """ cache_dir = data_dir / "cache" + if fold_size: + cache_dir = cache_dir / f"T{fold_size}" dumped_file_names = json.dumps(file_names, sort_keys=True) dumped_vars = json.dumps(vars, sort_keys=True) config_string = f"{dumped_file_names}{dumped_vars}{use_features}{seed}{fold_index}{debug}".encode("utf-8") cache_file = cache_dir / hashlib.md5(config_string).hexdigest() - # if use_cache: - # if cache_file.exists(): - # with open(cache_file, "rb") as f: - # logging.info(f"Loading cached data from {cache_file}.") - # return pickle.load(f) - # else: - # logging.info(f"No cached data found in {cache_file}, loading raw data.") + if use_cache: + if cache_file.exists(): + with open(cache_file, "rb") as f: + logging.info(f"Loading cached data from {cache_file}.") + return pickle.load(f) + else: + logging.info(f"No cached data found in {cache_file}, loading raw data.") data = {f: pq.read_table(data_dir / file_names[f]).to_pandas() for f in ["STATIC", "DYNAMIC", "OUTCOME"]} logging.info("Generating splits.") - data = make_target_split(data, vars, num_folds, fold_index, seed=seed, debug=debug) + data = make_single_split(data, vars, num_folds, fold_index, seed=seed, debug=debug, fold_size=fold_size) logging.info("Preprocessing static data.") sta_rec = Recipe(data["train"]["STATIC"], [], vars["STATIC"]) From be991ff374204c0bd44213719d177da4c9e4a860 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 7 Jan 2023 01:24:34 +0100 Subject: [PATCH 004/163] Update train.py --- icu_benchmarks/models/train.py | 1 - 1 file changed, 1 deletion(-) diff --git a/icu_benchmarks/models/train.py b/icu_benchmarks/models/train.py index d6d54024..1489ce4e 100644 --- a/icu_benchmarks/models/train.py +++ b/icu_benchmarks/models/train.py @@ -63,7 +63,6 @@ def train_common( model.load_weights(source_dir / "model.joblib") else: raise Exception("No weights to load at path : {}".format(source_dir / "model.*")) - else: try: model.train(dataset, val_dataset, weight, seed) From dcf1a6d45d319cbe793d7267676945288e1c55e0 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 7 Jan 2023 01:25:03 +0100 Subject: [PATCH 005/163] make cpu and gin flag general, rename gin flag --- icu_benchmarks/run.py | 2 +- icu_benchmarks/run_utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index 653174b0..ccd10fe5 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -48,7 +48,7 @@ def main(my_args=tuple(sys.argv[1:])): if args.experiment else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")] ) - gin.parse_config_files_and_bindings(gin_config_files, args.hyperparams, finalize_config=False) + gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False) run_dir = create_run_dir(log_dir) choose_and_bind_hyperparameters( args.tune, args.data_dir, run_dir, args.seeds[0], checkpoint=checkpoint, debug=args.debug diff --git a/icu_benchmarks/run_utils.py b/icu_benchmarks/run_utils.py index 0dca0522..a615b226 100644 --- a/icu_benchmarks/run_utils.py +++ b/icu_benchmarks/run_utils.py @@ -36,15 +36,15 @@ def build_parser() -> ArgumentParser: general_args.add_argument( "-s", "--seeds", default=[1111], nargs="+", type=int, help="Random seed for processing, tuning and training." ) + general_args.add_argument("--cpu", default=False, action=BooleanOptionalAction, help="Set to train and test on CPU.") general_args.add_argument("-db", "--debug", default=False, action=BooleanOptionalAction, help="Set to load less data.") general_args.add_argument("-c", "--cache", action=BooleanOptionalAction, help="Set to cache and use preprocessed data.") general_args.add_argument("-pl", "--plot", action=BooleanOptionalAction, help="Generate common plots.") + general_args.add_argument("-gb", "--gin-bindings", nargs="+", help="Overwrite or add gin bindings.") # MODEL TRAINING ARGUMENTS prep_and_train = subparsers.add_parser("train", help="Preprocess data and train model.", parents=[parent_parser]) prep_and_train.add_argument("--reproducible", default=True, action=BooleanOptionalAction, help="Make torch reproducible.") - prep_and_train.add_argument("--cpu", default=False, action=BooleanOptionalAction, help="Set to train on CPU.") - prep_and_train.add_argument("-hp", "--hyperparams", nargs="+", help="Hyperparameters for model.") prep_and_train.add_argument("--tune", default=False, action=BooleanOptionalAction, help="Find best hyperparameters.") prep_and_train.add_argument("--checkpoint", type=Path, help="Use previous checkpoint.") From ef4e68768b8d83c669af3208bd3436072a8a26bf Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 7 Jan 2023 01:25:18 +0100 Subject: [PATCH 006/163] Update preprocess.py --- icu_benchmarks/data/preprocess.py | 1 - 1 file changed, 1 deletion(-) diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py index 2ed8d9ff..4996e7f3 100644 --- a/icu_benchmarks/data/preprocess.py +++ b/icu_benchmarks/data/preprocess.py @@ -66,7 +66,6 @@ def make_single_split( "val": stays.iloc[val], "test": stays.iloc[test], } - data_split = {} for fold_name, fold in split.items(): # Loop through train / val / test From 63c06e2a827d5df3e375ca8aaadfee2ca1a52787 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 7 Jan 2023 15:22:48 +0100 Subject: [PATCH 007/163] add basis for domain adaptation --- icu_benchmarks/models/domain_adaptation.py | 134 +++++++++++++++++++++ icu_benchmarks/models/wrappers.py | 20 +++ 2 files changed, 154 insertions(+) create mode 100644 icu_benchmarks/models/domain_adaptation.py diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py new file mode 100644 index 00000000..7f527848 --- /dev/null +++ b/icu_benchmarks/models/domain_adaptation.py @@ -0,0 +1,134 @@ +import os +import random +import gin +import torch +import logging +import numpy as np +import pandas as pd +from pathlib import Path +from skopt import gp_minimize + +from icu_benchmarks.data.loader import RICUDataset +from icu_benchmarks.models.wrappers import MLWrapper +from icu_benchmarks.models.utils import save_config_file + + +def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, model_dir: Path, log_dir: Path): + """Get predictions for a single model. + + Args: + model: Model to get predictions for. + dataset: Dataset to get predictions for. + model_dir: Path to directory where model weights are stored. + log_dir: Path to directory where model output should be saved. + + Returns: + Tuple of predictions and labels. + """ + model.set_log_dir(log_dir) + if (model_dir / "model.torch").is_file(): + model.load_weights(model_dir / "model.torch") + elif (model_dir / "model.txt").is_file(): + model.load_weights(model_dir / "model.txt") + elif (model_dir / "model.joblib").is_file(): + model.load_weights(model_dir / "model.joblib") + else: + raise Exception("No weights to load at path : {}".format(model_dir / "model.*")) + return model.predict(dataset) + + +@gin.configurable("domain_adaptation") +def evaluate_model_combination( + data: dict[str, pd.DataFrame], + log_dir: Path, + source_dir: Path = None, + seed: int = 1234, + reproducible: bool = True, + model: object = MLWrapper, + weight: str = None, + test_on: str = "Test", + +): + """Common wrapper to train all benchmarked models. + + Args: + data: Dict containing data to be trained on. + log_dir: Path to directory where model output should be saved. + source_dir: If set to load weights, path to directory containing trained weights. + seed: Common seed used for any random operation. + reproducible: If set to true, set torch to run reproducibly. + """ + + # Setting the seed before gin parsing + os.environ["PYTHONHASHSEED"] = str(seed) + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + if reproducible: + os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" + torch.use_deterministic_algorithms(True) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + dataset = RICUDataset(data, split="train") + test_dataset = RICUDataset(data, split=test_on) + weight = dataset.get_balance() + + predictions = [] + for source_dataset in source_dir.iterdir(): + model_dir = source_dir / source_dataset + predictions.append(get_predictions_for_single_model(model, dataset, model_dir, log_dir)) + + test_pred = np.average(predictions, axis=0, weights=dataset_weights) + + # save config file again to capture missing gin parameters + return log_loss(test_label, test_pred) + + +@gin.configurable("tune_hyperparameters") +def choose_and_bind_hyperparameters( + data_dir: Path, + log_dir: Path, + seed: int, + n_initial_points: int = 3, + n_calls: int = 20, + folds_to_tune_on: int = gin.REQUIRED, + debug: bool = False, +): + """Choose hyperparameters to tune and bind them to gin. + + Args: + data_dir: Path to the data directory. + log_dir: Path to the log directory. + seed: Random seed. + n_initial_points: Number of initial points to explore. + n_calls: Number of iterations to optimize the hyperparameters. + folds_to_tune_on: Number of folds to tune on. + debug: Whether to load less data and enable more logging. + + Raises: + ValueError: If checkpoint is not None and the checkpoint does not exist. + """ + + def convex_model_combination(hyperparams): + return preprocess_and_train_for_folds( + data_dir, + Path(temp_dir), + seed, + num_folds_to_train=folds_to_tune_on, + use_cache=True, + test_on="val", + debug=debug, + ) + + res = gp_minimize( + bind_params_and_train, + hyperparams_bounds, + n_calls=n_calls, + n_initial_points=n_initial_points, + random_state=seed, + noise=1e-10, # the models are deterministic, but noise is needed for the gp to work + ) + + print(res) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index 5e820f14..bf06083b 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -289,6 +289,22 @@ def save_weights(self, epoch, save_path): def load_weights(self, load_path): load_model_state(load_path, self.encoder, optimizer=self.optimizer) + def predict(self, dataset, weight, seed): + self.set_metrics() + test_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=self.n_worker, pin_memory=self.pin_memory) + if isinstance(weight, list): + weight = torch.FloatTensor(weight).to(self.device) + test_loss, test_metrics = self.evaluate(test_loader, self.metrics, weight) + + self.encoder.eval() + all_preds = [] + with torch.no_grad(): + for elem in test_loader: + _, preds, _ = self.step_fn(elem, weight) + all_preds += preds + + return all_preds + @gin.configurable("MLWrapper") class MLWrapper(object): @@ -421,3 +437,7 @@ def load_weights(self, load_path): else: with open(load_path, "rb") as f: self.model = joblib.load(f) + + def predict(self, dataset, weight, seed): + test_rep, _ = dataset.get_data_and_labels() + return self.model.predict_proba(test_rep) \ No newline at end of file From 83782b5fb4d79760e21dd6c38c0c6acbc513bfb2 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Tue, 10 Jan 2023 23:52:07 +0100 Subject: [PATCH 008/163] refactor folds for targets --- icu_benchmarks/data/preprocess.py | 43 +++++++++++++------------------ 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py index 0a30e711..92428dda 100644 --- a/icu_benchmarks/data/preprocess.py +++ b/icu_benchmarks/data/preprocess.py @@ -2,13 +2,14 @@ import gin import json import hashlib +import numpy as np import pandas as pd import pyarrow.parquet as pq from pathlib import Path import pickle from sklearn.impute import MissingIndicator, SimpleImputer -from sklearn.model_selection import LeavePOut, StratifiedKFold +from sklearn.model_selection import StratifiedKFold from sklearn.preprocessing import LabelEncoder from recipys.recipe import Recipe @@ -47,33 +48,25 @@ def make_single_split( stays = data["STATIC"][id].sample(frac=fraction_to_load, random_state=seed) labels = data["OUTCOME"][vars["LABEL"]] + + outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed) + dev, test = list(outer_CV.split(stays, labels))[repetition_index] + if fold_size: - train_and_val = stays.sample(fold_size, random_state=seed) - test = stays.drop(train_and_val.index) - train_and_val_labels = data["OUTCOME"].label.loc[train_and_val.index] + test = np.append(test, dev[fold_size:]) + dev = dev[:fold_size] - target_folds = StratifiedKFold(cv_folds, shuffle=True, random_state=seed) - train, val = list(target_folds.split(train_and_val, train_and_val_labels))[fold_index] + dev_stays = stays.iloc[dev] + dev_labels = labels.iloc[dev] - split = { - "train": train_and_val.iloc[train], - "val": train_and_val.iloc[val], - "test": test, - } - else: - outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed) - inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed) - - dev, test = list(outer_CV.split(stays, labels))[repetition_index] - dev_stays = stays.iloc[dev] - dev_labels = labels.iloc[dev] - train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index] - - split = { - "train": dev_stays.iloc[train], - "val": dev_stays.iloc[val], - "test": stays.iloc[test], - } + inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed) + train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index] + + split = { + "train": dev_stays.iloc[train], + "val": dev_stays.iloc[val], + "test": stays.iloc[test], + } data_split = {} for fold_name, fold in split.items(): # Loop through train / val / test From 43ebd5fae16d19ca80b7e10a38d07329f6285cb6 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Wed, 11 Jan 2023 19:17:10 +0100 Subject: [PATCH 009/163] add evaluation function to test whole dataset also redo fold for DA if it contains to little positive samples --- icu_benchmarks/cross_validation.py | 49 ++++++++++++++++ icu_benchmarks/data/preprocess.py | 89 ++++++++++++++++++++++-------- icu_benchmarks/run.py | 12 +++- 3 files changed, 125 insertions(+), 25 deletions(-) diff --git a/icu_benchmarks/cross_validation.py b/icu_benchmarks/cross_validation.py index ec79d3d8..db95ee28 100644 --- a/icu_benchmarks/cross_validation.py +++ b/icu_benchmarks/cross_validation.py @@ -75,3 +75,52 @@ def execute_repeated_cv( log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) return agg_loss / (cv_repetitions_to_train * cv_folds_to_train) + + +def evaluate( + data_dir: Path, + log_dir: Path, + seed: int, + source_dir: Path = None, + reproducible: bool = True, + debug: bool = False, + use_cache: bool = False, +) -> float: + """Preprocesses data and trains a model for each fold. + + Args: + data_dir: Path to the data directory. + log_dir: Path to the log directory. + seed: Random seed. + load_weights: Whether to load weights from source_dir. + source_dir: Path to the source directory. + cv_folds: Number of folds for cross validation. + cv_folds_to_train: Number of folds to use during training. If None, all folds are trained on. + reproducible: Whether to make torch reproducible. + debug: Whether to load less data and enable more logging. + use_cache: Whether to cache and use cached data. + test_on: Dataset to test on. Can be "test" or "val" (e.g. for hyperparameter tuning). + + Returns: + The average loss of all folds. + """ + + data = preprocess_data( + data_dir, + seed=seed, + debug=debug, + use_cache=use_cache, + test_all=True, + ) + + run_dir_seed = log_dir / f"seed_{seed}" + run_dir_seed.mkdir(parents=True, exist_ok=True) + + return train_common( + data, + log_dir=run_dir_seed, + load_weights=True, + source_dir=source_dir, + seed=seed, + reproducible=reproducible, + ) diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py index 03afc2f3..04563e9f 100644 --- a/icu_benchmarks/data/preprocess.py +++ b/icu_benchmarks/data/preprocess.py @@ -27,6 +27,7 @@ def make_single_split( seed: int = 42, debug: bool = False, fold_size: int = None, + test_all: bool = False, ) -> dict[dict[pd.DataFrame]]: """Randomly split the data into training, validation, and test set. @@ -39,6 +40,7 @@ def make_single_split( fold_index: Index of the fold to return. seed: Random seed. debug: Load less data if true. + test_all: If true, the test set will be the entire dataset. Returns: Input data divided into 'train', 'val', and 'test'. @@ -50,25 +52,42 @@ def make_single_split( stays = stays.sample(frac=0.01, random_state=seed) labels = data["OUTCOME"][vars["LABEL"]].loc[stays.index] - - outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed) - dev, test = list(outer_CV.split(stays, labels))[repetition_index] - - if fold_size: - test = np.append(test, dev[fold_size:]) - dev = dev[:fold_size] - - dev_stays = stays.iloc[dev] - dev_labels = labels.iloc[dev] - - inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed) - train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index] - - split = { - "train": dev_stays.iloc[train], - "val": dev_stays.iloc[val], - "test": stays.iloc[test], - } + if test_all: + split = { + "train": stays.iloc[0:0], + "val": stays.iloc[0:0], + "test": stays, + } + else: + outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed) + dev, test = list(outer_CV.split(stays, labels))[repetition_index] + + if fold_size: + start_index = 0 + end_index = fold_size + pre_dev = dev[start_index:end_index] + leave_for_test = dev[end_index:] + pre_dev_labels = labels.iloc[pre_dev] + while pre_dev_labels.sum() < cv_folds: + start_index += fold_size + end_index += fold_size + pre_dev = dev[start_index:end_index] + pre_dev_labels = labels.iloc[pre_dev] + leave_for_test = np.append(dev[0:start_index], dev[end_index:]) + dev = pre_dev + test = np.append(test, leave_for_test) + + dev_stays = stays.iloc[dev] + dev_labels = labels.iloc[dev] + + inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed) + train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index] + + split = { + "train": dev_stays.iloc[train], + "val": dev_stays.iloc[val], + "test": stays.iloc[test], + } data_split = {} for fold_name, fold in split.items(): # Loop through train / val / test @@ -81,17 +100,24 @@ def make_single_split( return data_split -def apply_recipe_to_splits(recipe: Recipe, data: dict[dict[pd.DataFrame]], type: str) -> dict[dict[pd.DataFrame]]: +def apply_recipe_to_splits( + recipe: Recipe, data: dict[dict[pd.DataFrame]], type: str, test_all: bool = False +) -> dict[dict[pd.DataFrame]]: """Fits and transforms the training data, then transforms the validation and test data with the recipe. Args: recipe: Object containing info about the data and steps. data: Dict containing 'train', 'val', and 'test' and types of data per split. type: Whether to apply recipe to dynamic data, static data or outcomes. + test_all: If true, the test set will be the entire dataset. Returns: Transformed data divided into 'train', 'val', and 'test'. """ + if test_all: + data["test"][type] = recipe.prep(data["test"][type]) + return data + data["train"][type] = recipe.prep() data["val"][type] = recipe.bake(data["val"][type]) data["test"][type] = recipe.bake(data["test"][type]) @@ -112,6 +138,7 @@ def preprocess_data( cv_folds: int = 5, fold_size: int = None, fold_index: int = 0, + test_all: bool = False, ) -> dict[dict[pd.DataFrame]]: """Perform loading, splitting, imputing and normalising of task data. @@ -127,6 +154,7 @@ def preprocess_data( repetition_index: Index of the repetition to return. cv_folds: Number of folds to use for cross validation. fold_index: Index of the fold to return. + test_all: If true, the test set will be the entire dataset. Returns: Preprocessed data as DataFrame in a hierarchical dict with data type (STATIC/DYNAMIC/OUTCOME) @@ -135,6 +163,8 @@ def preprocess_data( cache_dir = data_dir / "cache" if fold_size: cache_dir = cache_dir / f"T{fold_size}" + if test_all: + cache_dir = cache_dir / "test_complete" dumped_file_names = json.dumps(file_names, sort_keys=True) dumped_vars = json.dumps(vars, sort_keys=True) config_string = f"{dumped_file_names}{dumped_vars}{use_features}{seed}{repetition_index}{fold_index}{debug}".encode( @@ -153,7 +183,18 @@ def preprocess_data( data = {f: pq.read_table(data_dir / file_names[f]).to_pandas() for f in ["STATIC", "DYNAMIC", "OUTCOME"]} logging.info("Generating splits.") - data = make_single_split(data, vars, cv_repetitions, repetition_index, cv_folds, fold_index, seed=seed, debug=debug, fold_size=fold_size) + data = make_single_split( + data, + vars, + cv_repetitions, + repetition_index, + cv_folds, + fold_index, + seed=seed, + debug=debug, + fold_size=fold_size, + test_all=test_all, + ) logging.info("Preprocessing static data.") sta_rec = Recipe(data["train"]["STATIC"], [], vars["STATIC"]) @@ -162,7 +203,7 @@ def preprocess_data( sta_rec.add_step(StepSklearn(SimpleImputer(missing_values=None, strategy="most_frequent"), sel=has_type("object"))) sta_rec.add_step(StepSklearn(LabelEncoder(), sel=has_type("object"), columnwise=True)) - data = apply_recipe_to_splits(sta_rec, data, "STATIC") + data = apply_recipe_to_splits(sta_rec, data, "STATIC", test_all=test_all) logging.info("Preprocessing dynamic data.") dyn_rec = Recipe(data["train"]["DYNAMIC"], [], vars["DYNAMIC"], vars["GROUP"], vars["SEQUENCE"]) @@ -176,11 +217,11 @@ def preprocess_data( dyn_rec.add_step(StepImputeFill(method="ffill")) dyn_rec.add_step(StepImputeFill(value=0)) - data = apply_recipe_to_splits(dyn_rec, data, "DYNAMIC") + data = apply_recipe_to_splits(dyn_rec, data, "DYNAMIC", test_all=test_all) if use_cache and not cache_file.exists(): if not cache_dir.exists(): - cache_dir.mkdir() + cache_dir.mkdir(parents=True) cache_file.touch() with open(cache_file, "wb") as f: pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index e786ca9e..02b07d03 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -6,7 +6,7 @@ from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters from utils.plotting.utils import plot_agg_results -from icu_benchmarks.cross_validation import execute_repeated_cv +from icu_benchmarks.cross_validation import execute_repeated_cv, evaluate from icu_benchmarks.run_utils import ( build_parser, create_run_dir, @@ -40,6 +40,16 @@ def main(my_args=tuple(sys.argv[1:])): run_dir = create_run_dir(log_dir) source_dir = args.source_dir gin.parse_config_file(source_dir / "train_config.gin") + evaluate( + args.data_dir, + run_dir, + args.seed, + source_dir=source_dir, + reproducible=reproducible, + debug=args.debug, + use_cache=args.cache, + ) + return else: reproducible = args.reproducible checkpoint = log_dir / args.checkpoint if args.checkpoint else None From 405129ad01aec849aa5ddc75efe98b42b35e1a6e Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Wed, 11 Jan 2023 19:17:35 +0100 Subject: [PATCH 010/163] update predict function for booster --- icu_benchmarks/models/wrappers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index 02f23174..5b59a9a2 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -439,4 +439,7 @@ def load_weights(self, load_path): def predict(self, dataset, weight, seed): test_rep, _ = dataset.get_data_and_labels() - return self.model.predict_proba(test_rep) \ No newline at end of file + if isinstance(self.model, lightgbm.basic.Booster): # If we reload a LGBM classifier + return self.model.predict(test_rep) + else: + return self.model.predict_proba(test_rep) From 7a6b01c37dcac939ab18ef5bb4b01b8ec7de71e5 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Wed, 11 Jan 2023 19:20:09 +0100 Subject: [PATCH 011/163] update domain adaptation script --- icu_benchmarks/models/domain_adaptation.py | 177 ++++++++++++++++----- icu_benchmarks/models/train.py | 3 + icu_benchmarks/run.py | 11 ++ icu_benchmarks/run_utils.py | 3 + 4 files changed, 151 insertions(+), 43 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 7f527848..6959437d 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -7,10 +7,14 @@ import pandas as pd from pathlib import Path from skopt import gp_minimize +from sklearn.metrics import log_loss from icu_benchmarks.data.loader import RICUDataset +from icu_benchmarks.data.preprocess import preprocess_data +from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters +from icu_benchmarks.models.train import train_common from icu_benchmarks.models.wrappers import MLWrapper -from icu_benchmarks.models.utils import save_config_file +from icu_benchmarks.run_utils import log_full_line def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, model_dir: Path, log_dir: Path): @@ -25,6 +29,7 @@ def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, mod Returns: Tuple of predictions and labels. """ + model = MLWrapper() model.set_log_dir(log_dir) if (model_dir / "model.torch").is_file(): model.load_weights(model_dir / "model.torch") @@ -34,11 +39,10 @@ def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, mod model.load_weights(model_dir / "model.joblib") else: raise Exception("No weights to load at path : {}".format(model_dir / "model.*")) - return model.predict(dataset) + return model.predict(dataset, None, None) -@gin.configurable("domain_adaptation") -def evaluate_model_combination( +def get_predictions_for_all_models( data: dict[str, pd.DataFrame], log_dir: Path, source_dir: Path = None, @@ -46,8 +50,8 @@ def evaluate_model_combination( reproducible: bool = True, model: object = MLWrapper, weight: str = None, - test_on: str = "Test", - + test_on: str = "test", + target_model: object = None, ): """Common wrapper to train all benchmarked models. @@ -58,6 +62,7 @@ def evaluate_model_combination( seed: Common seed used for any random operation. reproducible: If set to true, set torch to run reproducibly. """ + model = MLWrapper() # Setting the seed before gin parsing os.environ["PYTHONHASHSEED"] = str(seed) @@ -71,36 +76,56 @@ def evaluate_model_combination( torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False - dataset = RICUDataset(data, split="train") test_dataset = RICUDataset(data, split=test_on) - weight = dataset.get_balance() + val_dataset = RICUDataset(data, split="val") + # weight = test_dataset.get_balance() + _, val_labels = val_dataset.get_data_and_labels() + _, test_labels = test_dataset.get_data_and_labels() - predictions = [] - for source_dataset in source_dir.iterdir(): - model_dir = source_dir / source_dataset - predictions.append(get_predictions_for_single_model(model, dataset, model_dir, log_dir)) + val_predictions = {} + test_predictions = {} + for model_dir in source_dir.iterdir(): + if model_dir.is_dir(): + val_predictions[model_dir.name] = get_predictions_for_single_model(model, val_dataset, model_dir, log_dir) + test_predictions[model_dir.name] = get_predictions_for_single_model(model, test_dataset, model_dir, log_dir) + val_predictions["target"] = target_model.output_transform(target_model.predict(val_dataset, None, None)) + test_predictions["target"] = target_model.output_transform(target_model.predict(test_dataset, None, None)) - test_pred = np.average(predictions, axis=0, weights=dataset_weights) + return val_predictions, val_labels, test_predictions, test_labels - # save config file again to capture missing gin parameters - return log_loss(test_label, test_pred) +def get_model_metrics(test_predictions: np.ndarray, test_labels: np.ndarray): + """Evaluate a combination of models. -@gin.configurable("tune_hyperparameters") -def choose_and_bind_hyperparameters( + Args: + test_predictions: Predictions for test set. + test_labels: Labels for test set. + """ + model = MLWrapper() + model.set_metrics(test_labels) + test_metric_results = {} + for name, metric in model.metrics.items(): + value = metric(model.label_transform(test_labels), test_predictions) + test_metric_results[name] = value + # Only log float values + if isinstance(value, np.float): + logging.info("test {}: {}".format(name, value)) + return test_metric_results + + +def domain_adaptation( data_dir: Path, - log_dir: Path, + run_dir: Path, seed: int, - n_initial_points: int = 3, - n_calls: int = 20, - folds_to_tune_on: int = gin.REQUIRED, + n_initial_points: int = 10, + n_calls: int = 50, debug: bool = False, ): """Choose hyperparameters to tune and bind them to gin. Args: data_dir: Path to the data directory. - log_dir: Path to the log directory. + run_dir: Path to the log directory. seed: Random seed. n_initial_points: Number of initial points to explore. n_calls: Number of iterations to optimize the hyperparameters. @@ -111,24 +136,90 @@ def choose_and_bind_hyperparameters( ValueError: If checkpoint is not None and the checkpoint does not exist. """ - def convex_model_combination(hyperparams): - return preprocess_and_train_for_folds( - data_dir, - Path(temp_dir), - seed, - num_folds_to_train=folds_to_tune_on, - use_cache=True, - test_on="val", - debug=debug, - ) - - res = gp_minimize( - bind_params_and_train, - hyperparams_bounds, - n_calls=n_calls, - n_initial_points=n_initial_points, - random_state=seed, - noise=1e-10, # the models are deterministic, but noise is needed for the gp to work - ) - - print(res) + # train target baselines + + + agg_loss = 0 + cv_repetitions = 5 + cv_repetitions_to_train = 5 + cv_folds = 5 + cv_folds_to_train = 5 + datasets = ["hirid", "eicu", "aumc", "miiv"] + weight_bounds = ((0.0001, 1.0) for _ in range(len(datasets))) + task_dir = Path("../data/mortality24/") + + # evaluate models on same test split + for dataset in datasets: + log_full_line(f"STARTING {dataset}", char="#", num_newlines=2) + choose_and_bind_hyperparameters(True, task_dir / dataset, run_dir, seed, debug=debug) + for repetition in range(cv_repetitions_to_train): + for fold_index in range(cv_folds_to_train): + data = preprocess_data( + data_dir, + seed=seed, + debug=debug, + use_cache=True, + cv_repetitions=cv_repetitions, + repetition_index=repetition, + cv_folds=cv_folds, + fold_index=fold_index, + ) + + run_dir_seed = run_dir / f"seed_{seed}" / f"fold_{fold_index}" + run_dir_seed.mkdir(parents=True, exist_ok=True) + + # evaluate target baselines + curr_loss, target_model = train_common( + data, + log_dir=run_dir_seed, + seed=seed, + reproducible=True, + test_on="test", + return_model=True, + ) + agg_loss += curr_loss + + val_predictions, val_labels, test_predictions, test_labels = get_predictions_for_all_models( + data, + run_dir, + source_dir=Path("../models/best_models/Mortality24/LGBMClassifier"), + seed=seed, + target_model=target_model, + ) + + # evaluate source baselines and oracle + for source in datasets: + if source == dataset: + continue + logging.info("Evaluating model: {}".format(source)) + get_model_metrics(test_predictions[source], test_labels) + + # evaluate convex combination of models + val_predictions_wo_oracle = [pred for source, pred in val_predictions.items() if source != dataset] + test_predictions_wo_oracle = [pred for source, pred in test_predictions.items() if source != dataset] + def convex_model_combination(model_weights): + val_pred = np.average(val_predictions_wo_oracle, axis=0, weights=model_weights) + return log_loss(val_labels, val_pred) + + logging.disable(logging.INFO) + res = gp_minimize( + convex_model_combination, + weight_bounds, + n_calls=n_calls, + n_initial_points=n_initial_points, + random_state=seed, + noise=1e-10, # the models are deterministic, but noise is needed for the gp to work + ) + logging.disable(logging.NOTSET) + best_model_weights = res.x + logging.info(best_model_weights) + test_pred = np.average(test_predictions_wo_oracle, axis=0, weights=best_model_weights) + get_model_metrics(test_pred, test_labels) + + + log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) + log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) + log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10) + + return agg_loss / (cv_repetitions_to_train * cv_folds_to_train) + \ No newline at end of file diff --git a/icu_benchmarks/models/train.py b/icu_benchmarks/models/train.py index dc38f9bc..478dded0 100644 --- a/icu_benchmarks/models/train.py +++ b/icu_benchmarks/models/train.py @@ -24,6 +24,7 @@ def train_common( model: object = MLWrapper, weight: str = None, test_on: str = "test", + return_model: bool = False, ): """Common wrapper to train all benchmarked models. @@ -75,4 +76,6 @@ def train_common( # save config file again to capture missing gin parameters save_config_file(log_dir) + if return_model: + return model.test(test_dataset, weight, seed), model return model.test(test_dataset, weight, seed) diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index 02b07d03..266bef93 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -5,6 +5,7 @@ from pathlib import Path from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters +from icu_benchmarks.models.domain_adaptation import domain_adaptation from utils.plotting.utils import plot_agg_results from icu_benchmarks.cross_validation import execute_repeated_cv, evaluate from icu_benchmarks.run_utils import ( @@ -50,6 +51,16 @@ def main(my_args=tuple(sys.argv[1:])): use_cache=args.cache, ) return + if args.command == "da": + run_dir = create_run_dir(log_dir) + gin_config_files = ( + [Path(f"configs/experiments/{args.experiment}.gin")] + if args.experiment + else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")] + ) + gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False) + domain_adaptation(args.data_dir, run_dir, args.seed) + return else: reproducible = args.reproducible checkpoint = log_dir / args.checkpoint if args.checkpoint else None diff --git a/icu_benchmarks/run_utils.py b/icu_benchmarks/run_utils.py index 0901df59..29dc5378 100644 --- a/icu_benchmarks/run_utils.py +++ b/icu_benchmarks/run_utils.py @@ -48,6 +48,9 @@ def build_parser() -> ArgumentParser: evaluate.add_argument("-sn", "--source-name", required=True, type=Path, help="Name of the source dataset.") evaluate.add_argument("--source-dir", required=True, type=Path, help="Directory containing gin and model weights.") + # DOMAIN ADAPTATION ARGUMENTS + prep_and_train = subparsers.add_parser("da", help="Run DA experiment.", parents=[parent_parser]) + return parser From 7515597498319dfd5a787dc16ddcf860691cc7ad Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Wed, 11 Jan 2023 23:28:50 +0100 Subject: [PATCH 012/163] remove weight tuning, adapt for LR --- icu_benchmarks/models/domain_adaptation.py | 103 ++++++++------------- icu_benchmarks/run.py | 2 +- 2 files changed, 42 insertions(+), 63 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 6959437d..83494b36 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -17,11 +17,11 @@ from icu_benchmarks.run_utils import log_full_line -def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, model_dir: Path, log_dir: Path): +def get_predictions_for_single_model(target_model: object, dataset: RICUDataset, model_dir: Path, log_dir: Path): """Get predictions for a single model. Args: - model: Model to get predictions for. + target_model: Model to get predictions for. dataset: Dataset to get predictions for. model_dir: Path to directory where model weights are stored. log_dir: Path to directory where model output should be saved. @@ -29,7 +29,7 @@ def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, mod Returns: Tuple of predictions and labels. """ - model = MLWrapper() + model = MLWrapper(target_model.model) model.set_log_dir(log_dir) if (model_dir / "model.torch").is_file(): model.load_weights(model_dir / "model.torch") @@ -43,15 +43,14 @@ def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, mod def get_predictions_for_all_models( + target_model: object, data: dict[str, pd.DataFrame], log_dir: Path, source_dir: Path = None, seed: int = 1234, reproducible: bool = True, - model: object = MLWrapper, - weight: str = None, test_on: str = "test", - target_model: object = None, + source_datasets: list = None, ): """Common wrapper to train all benchmarked models. @@ -62,8 +61,6 @@ def get_predictions_for_all_models( seed: Common seed used for any random operation. reproducible: If set to true, set torch to run reproducibly. """ - model = MLWrapper() - # Setting the seed before gin parsing os.environ["PYTHONHASHSEED"] = str(seed) random.seed(seed) @@ -77,32 +74,28 @@ def get_predictions_for_all_models( torch.backends.cudnn.benchmark = False test_dataset = RICUDataset(data, split=test_on) - val_dataset = RICUDataset(data, split="val") - # weight = test_dataset.get_balance() - _, val_labels = val_dataset.get_data_and_labels() _, test_labels = test_dataset.get_data_and_labels() - val_predictions = {} test_predictions = {} - for model_dir in source_dir.iterdir(): - if model_dir.is_dir(): - val_predictions[model_dir.name] = get_predictions_for_single_model(model, val_dataset, model_dir, log_dir) - test_predictions[model_dir.name] = get_predictions_for_single_model(model, test_dataset, model_dir, log_dir) - val_predictions["target"] = target_model.output_transform(target_model.predict(val_dataset, None, None)) + for source in source_datasets: + model_dir = source_dir / source + test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir) test_predictions["target"] = target_model.output_transform(target_model.predict(test_dataset, None, None)) - return val_predictions, val_labels, test_predictions, test_labels + for name, prediction in test_predictions.items(): + if prediction.ndim == 2: + test_predictions[name] = prediction[:, 1] + + return test_predictions, test_labels -def get_model_metrics(test_predictions: np.ndarray, test_labels: np.ndarray): +def get_model_metrics(model: object, test_predictions: np.ndarray, test_labels: np.ndarray): """Evaluate a combination of models. Args: test_predictions: Predictions for test set. test_labels: Labels for test set. """ - model = MLWrapper() - model.set_metrics(test_labels) test_metric_results = {} for name, metric in model.metrics.items(): value = metric(model.label_transform(test_labels), test_predictions) @@ -114,11 +107,10 @@ def get_model_metrics(test_predictions: np.ndarray, test_labels: np.ndarray): def domain_adaptation( - data_dir: Path, run_dir: Path, seed: int, - n_initial_points: int = 10, - n_calls: int = 50, + task: str = None, + model: str = None, debug: bool = False, ): """Choose hyperparameters to tune and bind them to gin. @@ -135,23 +127,22 @@ def domain_adaptation( Raises: ValueError: If checkpoint is not None and the checkpoint does not exist. """ - - # train target baselines - - agg_loss = 0 cv_repetitions = 5 - cv_repetitions_to_train = 5 + cv_repetitions_to_train = 3 cv_folds = 5 cv_folds_to_train = 5 - datasets = ["hirid", "eicu", "aumc", "miiv"] - weight_bounds = ((0.0001, 1.0) for _ in range(len(datasets))) - task_dir = Path("../data/mortality24/") + datasets = ["hirid", "aumc", "miiv"] + weights = [1] * (len(datasets) - 1) + [1] + task_dir = Path("../data/") / task + model_path = Path("../models/best_models/") # evaluate models on same test split for dataset in datasets: + data_dir = task_dir / dataset + source_datasets = [d for d in datasets if d != dataset] log_full_line(f"STARTING {dataset}", char="#", num_newlines=2) - choose_and_bind_hyperparameters(True, task_dir / dataset, run_dir, seed, debug=debug) + choose_and_bind_hyperparameters(True, data_dir, run_dir, seed, debug=debug) for repetition in range(cv_repetitions_to_train): for fold_index in range(cv_folds_to_train): data = preprocess_data( @@ -179,43 +170,31 @@ def domain_adaptation( ) agg_loss += curr_loss - val_predictions, val_labels, test_predictions, test_labels = get_predictions_for_all_models( + test_predictions, test_labels = get_predictions_for_all_models( + target_model, data, run_dir, - source_dir=Path("../models/best_models/Mortality24/LGBMClassifier"), + source_dir=model_path / task / model, seed=seed, - target_model=target_model, + source_datasets=source_datasets, ) - # evaluate source baselines and oracle - for source in datasets: - if source == dataset: - continue + # evaluate source baselines + for source in source_datasets: logging.info("Evaluating model: {}".format(source)) - get_model_metrics(test_predictions[source], test_labels) + get_model_metrics(target_model, test_predictions[source], test_labels) # evaluate convex combination of models - val_predictions_wo_oracle = [pred for source, pred in val_predictions.items() if source != dataset] - test_predictions_wo_oracle = [pred for source, pred in test_predictions.items() if source != dataset] - def convex_model_combination(model_weights): - val_pred = np.average(val_predictions_wo_oracle, axis=0, weights=model_weights) - return log_loss(val_labels, val_pred) - - logging.disable(logging.INFO) - res = gp_minimize( - convex_model_combination, - weight_bounds, - n_calls=n_calls, - n_initial_points=n_initial_points, - random_state=seed, - noise=1e-10, # the models are deterministic, but noise is needed for the gp to work - ) - logging.disable(logging.NOTSET) - best_model_weights = res.x - logging.info(best_model_weights) - test_pred = np.average(test_predictions_wo_oracle, axis=0, weights=best_model_weights) - get_model_metrics(test_pred, test_labels) - + test_predictions_list = list(test_predictions.values()) + + logging.info("Evaluating convex combination of models.") + target_weights = [0.1, 0.2, 0.5, 1, 2, 5] + weights = [1] * (len(datasets) - 1) + for t in target_weights: + w = weights + [t * sum(weights)] + logging.info(f"Evaluating target weight: {t}") + test_pred = np.average(test_predictions_list, axis=0, weights=w) + get_model_metrics(target_model, test_pred, test_labels) log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index 266bef93..806586cc 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -59,7 +59,7 @@ def main(my_args=tuple(sys.argv[1:])): else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")] ) gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False) - domain_adaptation(args.data_dir, run_dir, args.seed) + domain_adaptation(run_dir, args.seed, args.task_name, model) return else: reproducible = args.reproducible From 0789bcfbe67147356cc4fa810dbe8216095f6b82 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Wed, 11 Jan 2023 23:45:51 +0100 Subject: [PATCH 013/163] iterate over target sizes --- icu_benchmarks/models/domain_adaptation.py | 110 ++++++++++----------- icu_benchmarks/models/train.py | 2 +- 2 files changed, 54 insertions(+), 58 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 83494b36..5b3e75da 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -127,11 +127,11 @@ def domain_adaptation( Raises: ValueError: If checkpoint is not None and the checkpoint does not exist. """ - agg_loss = 0 cv_repetitions = 5 cv_repetitions_to_train = 3 cv_folds = 5 cv_folds_to_train = 5 + target_sizes = [500, 1000, 2000] datasets = ["hirid", "aumc", "miiv"] weights = [1] * (len(datasets) - 1) + [1] task_dir = Path("../data/") / task @@ -142,62 +142,58 @@ def domain_adaptation( data_dir = task_dir / dataset source_datasets = [d for d in datasets if d != dataset] log_full_line(f"STARTING {dataset}", char="#", num_newlines=2) - choose_and_bind_hyperparameters(True, data_dir, run_dir, seed, debug=debug) - for repetition in range(cv_repetitions_to_train): - for fold_index in range(cv_folds_to_train): - data = preprocess_data( - data_dir, - seed=seed, - debug=debug, - use_cache=True, - cv_repetitions=cv_repetitions, - repetition_index=repetition, - cv_folds=cv_folds, - fold_index=fold_index, - ) - - run_dir_seed = run_dir / f"seed_{seed}" / f"fold_{fold_index}" - run_dir_seed.mkdir(parents=True, exist_ok=True) - - # evaluate target baselines - curr_loss, target_model = train_common( - data, - log_dir=run_dir_seed, - seed=seed, - reproducible=True, - test_on="test", - return_model=True, - ) - agg_loss += curr_loss - - test_predictions, test_labels = get_predictions_for_all_models( - target_model, - data, - run_dir, - source_dir=model_path / task / model, - seed=seed, - source_datasets=source_datasets, - ) - - # evaluate source baselines - for source in source_datasets: - logging.info("Evaluating model: {}".format(source)) - get_model_metrics(target_model, test_predictions[source], test_labels) - - # evaluate convex combination of models - test_predictions_list = list(test_predictions.values()) - - logging.info("Evaluating convex combination of models.") - target_weights = [0.1, 0.2, 0.5, 1, 2, 5] - weights = [1] * (len(datasets) - 1) - for t in target_weights: - w = weights + [t * sum(weights)] - logging.info(f"Evaluating target weight: {t}") - test_pred = np.average(test_predictions_list, axis=0, weights=w) - get_model_metrics(target_model, test_pred, test_labels) - - log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) - log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) + for target_size in target_sizes: + log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1) + gin.bind_parameter("preprocess.fold_size", target_size) + choose_and_bind_hyperparameters(True, data_dir, run_dir, seed, debug=debug) + for repetition in range(cv_repetitions_to_train): + for fold_index in range(cv_folds_to_train): + data = preprocess_data( + data_dir, + seed=seed, + debug=debug, + use_cache=True, + cv_repetitions=cv_repetitions, + repetition_index=repetition, + cv_folds=cv_folds, + fold_index=fold_index, + ) + + run_dir_seed = run_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" + run_dir_seed.mkdir(parents=True, exist_ok=True) + + # evaluate target baselines + target_model = train_common(data, log_dir=run_dir_seed, seed=seed, return_model=True) + + test_predictions, test_labels = get_predictions_for_all_models( + target_model, + data, + run_dir, + source_dir=model_path / task / model, + seed=seed, + source_datasets=source_datasets, + ) + + # evaluate source baselines + for baseline, predictions in test_predictions.items(): + logging.info("Evaluating model: {}".format(baseline)) + get_model_metrics(target_model, predictions, test_labels) + + # evaluate convex combination of models + test_predictions_list = list(test_predictions.values()) + + logging.info("Evaluating convex combination of models.") + target_weights = [0.1, 0.2, 0.5, 1, 2, 5] + weights = [1] * (len(datasets) - 1) + for t in target_weights: + w = weights + [t * sum(weights)] + logging.info(f"Evaluating target weight: {t}") + test_pred = np.average(test_predictions_list, axis=0, weights=w) + get_model_metrics(target_model, test_pred, test_labels) + + log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) + log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) + log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5) log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10) return agg_loss / (cv_repetitions_to_train * cv_folds_to_train) diff --git a/icu_benchmarks/models/train.py b/icu_benchmarks/models/train.py index 478dded0..c81189d1 100644 --- a/icu_benchmarks/models/train.py +++ b/icu_benchmarks/models/train.py @@ -77,5 +77,5 @@ def train_common( # save config file again to capture missing gin parameters save_config_file(log_dir) if return_model: - return model.test(test_dataset, weight, seed), model + return model return model.test(test_dataset, weight, seed) From aa3345a0ae685a789e9e485d40586a5d4f83f945 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 00:44:41 +0100 Subject: [PATCH 014/163] aggregate and average DA metrics --- icu_benchmarks/models/domain_adaptation.py | 58 +++++++++++++++++----- icu_benchmarks/run.py | 2 +- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 5b3e75da..ab66b314 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -1,3 +1,4 @@ +import json import os import random import gin @@ -6,14 +7,14 @@ import numpy as np import pandas as pd from pathlib import Path -from skopt import gp_minimize -from sklearn.metrics import log_loss +import scipy.stats as stats from icu_benchmarks.data.loader import RICUDataset from icu_benchmarks.data.preprocess import preprocess_data from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters from icu_benchmarks.models.train import train_common from icu_benchmarks.models.wrappers import MLWrapper +from icu_benchmarks.models.utils import JsonNumpyEncoder from icu_benchmarks.run_utils import log_full_line @@ -128,9 +129,9 @@ def domain_adaptation( ValueError: If checkpoint is not None and the checkpoint does not exist. """ cv_repetitions = 5 - cv_repetitions_to_train = 3 + cv_repetitions_to_train = 2 cv_folds = 5 - cv_folds_to_train = 5 + cv_folds_to_train = 2 target_sizes = [500, 1000, 2000] datasets = ["hirid", "aumc", "miiv"] weights = [1] * (len(datasets) - 1) + [1] @@ -145,9 +146,15 @@ def domain_adaptation( for target_size in target_sizes: log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1) gin.bind_parameter("preprocess.fold_size", target_size) - choose_and_bind_hyperparameters(True, data_dir, run_dir, seed, debug=debug) + log_dir = run_dir / task / dataset / f"target_{target_size}" + log_dir.mkdir(parents=True, exist_ok=True) + choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) + results = {} for repetition in range(cv_repetitions_to_train): for fold_index in range(cv_folds_to_train): + results[f"{repetition}_{fold_index}"] = {} + fold_results = results[f"{repetition}_{fold_index}"] + data = preprocess_data( data_dir, seed=seed, @@ -159,16 +166,16 @@ def domain_adaptation( fold_index=fold_index, ) - run_dir_seed = run_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" - run_dir_seed.mkdir(parents=True, exist_ok=True) + log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" + log_dir_fold.mkdir(parents=True, exist_ok=True) # evaluate target baselines - target_model = train_common(data, log_dir=run_dir_seed, seed=seed, return_model=True) + target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) test_predictions, test_labels = get_predictions_for_all_models( target_model, data, - run_dir, + log_dir_fold, source_dir=model_path / task / model, seed=seed, source_datasets=source_datasets, @@ -177,7 +184,8 @@ def domain_adaptation( # evaluate source baselines for baseline, predictions in test_predictions.items(): logging.info("Evaluating model: {}".format(baseline)) - get_model_metrics(target_model, predictions, test_labels) + fold_results[baseline] = get_model_metrics(target_model, predictions, test_labels) + # evaluate convex combination of models test_predictions_list = list(test_predictions.values()) @@ -189,12 +197,36 @@ def domain_adaptation( w = weights + [t * sum(weights)] logging.info(f"Evaluating target weight: {t}") test_pred = np.average(test_predictions_list, axis=0, weights=w) - get_model_metrics(target_model, test_pred, test_labels) + fold_results[f"convex_combination_{t}"] = get_model_metrics(target_model, test_pred, test_labels) log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5) - log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10) - return agg_loss / (cv_repetitions_to_train * cv_folds_to_train) + source_metrics = {} + for result in results.values(): + for source, source_stats in result.items(): + for metric, score in source_stats.items(): + if isinstance(score, (float, int)): + source_metrics.setdefault(source, {}).setdefault(metric, []).append(score) + + # Compute statistical metric over aggregated results + averaged_metrics = {} + for source, source_stats in source_metrics.items(): + for metric, scores in source_stats.items(): + averaged_metrics.setdefault(source, {}).setdefault(metric, []).append({ + "avg": np.mean(scores), + "std": np.std(scores), + "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)), + }) + + with open(log_dir / "aggregated_source_metrics.json", "w") as f: + json.dump(results, f, cls=JsonNumpyEncoder) + + with open(log_dir / "averaged_source_metrics.json", "w") as f: + json.dump(averaged_metrics, f, cls=JsonNumpyEncoder) + + logging.info(f"Averaged results: {averaged_metrics}") + + log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10) \ No newline at end of file diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index 806586cc..e1ec52c2 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -52,7 +52,7 @@ def main(my_args=tuple(sys.argv[1:])): ) return if args.command == "da": - run_dir = create_run_dir(log_dir) + run_dir = create_run_dir(args.log_dir) gin_config_files = ( [Path(f"configs/experiments/{args.experiment}.gin")] if args.experiment From 3da7f6b7ef46b6f10b5c3b9718fb1665ee6ea96d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 00:45:35 +0100 Subject: [PATCH 015/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index ab66b314..ff6437d6 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -201,7 +201,6 @@ def domain_adaptation( log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) - log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5) source_metrics = {} for result in results.values(): @@ -227,6 +226,7 @@ def domain_adaptation( json.dump(averaged_metrics, f, cls=JsonNumpyEncoder) logging.info(f"Averaged results: {averaged_metrics}") + log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5) log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10) \ No newline at end of file From 6f6a82eb597237ad5749eef8e50010c14d9ed4a4 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 00:57:54 +0100 Subject: [PATCH 016/163] use data_dir for da --- icu_benchmarks/models/domain_adaptation.py | 3 ++- icu_benchmarks/run.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index ff6437d6..8b3006ed 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -108,6 +108,7 @@ def get_model_metrics(model: object, test_predictions: np.ndarray, test_labels: def domain_adaptation( + data_dir: Path, run_dir: Path, seed: int, task: str = None, @@ -135,7 +136,7 @@ def domain_adaptation( target_sizes = [500, 1000, 2000] datasets = ["hirid", "aumc", "miiv"] weights = [1] * (len(datasets) - 1) + [1] - task_dir = Path("../data/") / task + task_dir = data_dir / task model_path = Path("../models/best_models/") # evaluate models on same test split diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index e1ec52c2..af803f24 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -59,7 +59,7 @@ def main(my_args=tuple(sys.argv[1:])): else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")] ) gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False) - domain_adaptation(run_dir, args.seed, args.task_name, model) + domain_adaptation(args.data_dir, run_dir, args.seed, args.task_name, model) return else: reproducible = args.reproducible From ccb8bbe0c598d9049858fa3a148b91f0112b9921 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 01:27:25 +0100 Subject: [PATCH 017/163] disable confusion matrix --- icu_benchmarks/models/metric_constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/metric_constants.py b/icu_benchmarks/models/metric_constants.py index 97d64a08..cebd62b4 100644 --- a/icu_benchmarks/models/metric_constants.py +++ b/icu_benchmarks/models/metric_constants.py @@ -51,7 +51,7 @@ class DLMetrics: BINARY_CLASSIFICATION = { "AUC": ROC_AUC(), "Calibration_Curve": CalibrationCurve(), - "Confusion_Matrix": ConfusionMatrix(num_classes=2), + # "Confusion_Matrix": ConfusionMatrix(num_classes=2), "PR": AveragePrecision(), "PRC": PrecisionRecallCurve(), "ROC": RocCurve(), From 1b6319f7f8f40a9fa4a4740ca04c62f1519bbc05 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 01:30:50 +0100 Subject: [PATCH 018/163] Update Transformer.gin --- configs/models/Transformer.gin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin index 7e89cf2a..42faa756 100644 --- a/configs/models/Transformer.gin +++ b/configs/models/Transformer.gin @@ -35,6 +35,6 @@ model/hyperparameter.dropout = (0.0, 0.4) model/hyperparameter.dropout_att = (0.0, 0.4) tune_hyperparameters.scopes = ["model", "optimizer"] -tune_hyperparameters.n_initial_points = 5 -tune_hyperparameters.n_calls = 30 +tune_hyperparameters.n_initial_points = 2 +tune_hyperparameters.n_calls = 4 tune_hyperparameters.folds_to_tune_on = 2 From bb297d809ec43be73e853dcec7da63d47d14a23c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 01:39:41 +0100 Subject: [PATCH 019/163] rename encoder to model in wrapper --- icu_benchmarks/models/wrappers.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index 5b59a9a2..5ef4a0e6 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -55,10 +55,10 @@ def __init__(self, encoder=LSTMNet, loss=torch.nn.functional.cross_entropy, opti self.pin_memory = pin_memory self.n_worker = n_worker - self.encoder = encoder - self.encoder.to(device) + self.model = encoder + self.model.to(device) self.loss = loss - self.optimizer = optimizer_fn(self.encoder.parameters()) + self.optimizer = optimizer_fn(self.model.parameters()) self.scaler = None def set_log_dir(self, log_dir: Path): @@ -82,12 +82,12 @@ def softmax_multi_output_transform(output): # Binary classification # output transform is not applied for contrib metrics so we do our own. - if self.encoder.logit.out_features == 2: + if self.model.logit.out_features == 2: self.output_transform = softmax_binary_output_transform self.metrics = DLMetrics.BINARY_CLASSIFICATION # Regression - elif self.encoder.logit.out_features == 1: + elif self.model.logit.out_features == 1: self.output_transform = lambda x: x if self.scaler is not None: self.metrics = {"MAE": MAE(invert_transform=self.scaler.inverse_transform)} @@ -119,7 +119,7 @@ def step_fn(self, element, loss_weight=None): data = data.float().to(self.device) else: raise Exception("Loader should return either (data, label) or (data, label, mask)") - out = self.encoder(data) + out = self.model(data) if len(out) == 2 and isinstance(out, tuple): out, aux_loss = out else: @@ -135,7 +135,7 @@ def step_fn(self, element, loss_weight=None): def _do_training(self, train_loader, weight, metrics): # Training epoch - self.encoder.train() + self.model.train() agg_train_loss = 0 for elem in tqdm(train_loader, leave=False): loss, preds, target = self.step_fn(elem, weight) @@ -265,7 +265,7 @@ def test(self, dataset, weight, seed): return test_loss def evaluate(self, eval_loader, metrics, weight): - self.encoder.eval() + self.model.eval() agg_eval_loss = 0 with torch.no_grad(): @@ -283,10 +283,10 @@ def evaluate(self, eval_loader, metrics, weight): return eval_loss, eval_metric_results def save_weights(self, epoch, save_path): - save_model(self.encoder, self.optimizer, epoch, save_path) + save_model(self.model, self.optimizer, epoch, save_path) def load_weights(self, load_path): - load_model_state(load_path, self.encoder, optimizer=self.optimizer) + load_model_state(load_path, self.model, optimizer=self.optimizer) def predict(self, dataset, weight, seed): self.set_metrics() @@ -295,7 +295,7 @@ def predict(self, dataset, weight, seed): weight = torch.FloatTensor(weight).to(self.device) test_loss, test_metrics = self.evaluate(test_loader, self.metrics, weight) - self.encoder.eval() + self.model.eval() all_preds = [] with torch.no_grad(): for elem in test_loader: From 119ad353b0d927b764f5c466e1de8eaec1e79ce7 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 01:46:38 +0100 Subject: [PATCH 020/163] fix model path --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 8b3006ed..f43b966c 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -137,7 +137,7 @@ def domain_adaptation( datasets = ["hirid", "aumc", "miiv"] weights = [1] * (len(datasets) - 1) + [1] task_dir = data_dir / task - model_path = Path("../models/best_models/") + model_path = Path("../yaib_models/best_models/") # evaluate models on same test split for dataset in datasets: From 7ffbb4bfdc0b649e1a4983ed41428850c293833d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 01:46:42 +0100 Subject: [PATCH 021/163] Update Transformer.gin --- configs/models/Transformer.gin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin index 42faa756..7085e672 100644 --- a/configs/models/Transformer.gin +++ b/configs/models/Transformer.gin @@ -14,7 +14,7 @@ DLWrapper.optimizer_fn = @Adam DLWrapper.train.epochs = 1000 DLWrapper.train.batch_size = 64 -DLWrapper.train.patience = 10 +DLWrapper.train.patience = 5 DLWrapper.train.min_delta = 1e-4 # Optimizer params From 12036b5b9e87cd9a2efb41af5ae7b8eedd8e7b00 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 01:59:09 +0100 Subject: [PATCH 022/163] load correct wrapper --- icu_benchmarks/models/domain_adaptation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index f43b966c..ff1b9122 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -13,7 +13,7 @@ from icu_benchmarks.data.preprocess import preprocess_data from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters from icu_benchmarks.models.train import train_common -from icu_benchmarks.models.wrappers import MLWrapper +from icu_benchmarks.models.wrappers import DLWrapper, MLWrapper from icu_benchmarks.models.utils import JsonNumpyEncoder from icu_benchmarks.run_utils import log_full_line @@ -30,7 +30,10 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset, Returns: Tuple of predictions and labels. """ - model = MLWrapper(target_model.model) + if isinstance(target_model, DLWrapper): + model = DLWrapper(target_model.model) + else: + model = MLWrapper(target_model.model) model.set_log_dir(log_dir) if (model_dir / "model.torch").is_file(): model.load_weights(model_dir / "model.torch") From 40642ea7479136300897f2d9bd02e8f4b495edf9 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 02:06:05 +0100 Subject: [PATCH 023/163] initialize wrapper without model --- icu_benchmarks/models/domain_adaptation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index ff1b9122..4af228ad 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -31,9 +31,9 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset, Tuple of predictions and labels. """ if isinstance(target_model, DLWrapper): - model = DLWrapper(target_model.model) + model = DLWrapper() else: - model = MLWrapper(target_model.model) + model = MLWrapper() model.set_log_dir(log_dir) if (model_dir / "model.torch").is_file(): model.load_weights(model_dir / "model.torch") From c1e322240efc5177a02049c829355fa4a9a30786 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 02:14:02 +0100 Subject: [PATCH 024/163] instantiate encoder in wrapper --- configs/models/Transformer.gin | 2 +- icu_benchmarks/models/wrappers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin index 7085e672..1fe0a78d 100644 --- a/configs/models/Transformer.gin +++ b/configs/models/Transformer.gin @@ -9,7 +9,7 @@ preprocess.use_features = False # Train params train_common.model = @DLWrapper() -DLWrapper.encoder = @Transformer() +DLWrapper.encoder = @Transformer DLWrapper.optimizer_fn = @Adam DLWrapper.train.epochs = 1000 diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index 5ef4a0e6..fd36303e 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -55,7 +55,7 @@ def __init__(self, encoder=LSTMNet, loss=torch.nn.functional.cross_entropy, opti self.pin_memory = pin_memory self.n_worker = n_worker - self.model = encoder + self.model = encoder() self.model.to(device) self.loss = loss self.optimizer = optimizer_fn(self.model.parameters()) From 98662158f29a3701b04b4ae1e1cd463c6c0f24e0 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 02:22:00 +0100 Subject: [PATCH 025/163] revert instantiation --- configs/models/Transformer.gin | 2 +- icu_benchmarks/models/wrappers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin index 1fe0a78d..7085e672 100644 --- a/configs/models/Transformer.gin +++ b/configs/models/Transformer.gin @@ -9,7 +9,7 @@ preprocess.use_features = False # Train params train_common.model = @DLWrapper() -DLWrapper.encoder = @Transformer +DLWrapper.encoder = @Transformer() DLWrapper.optimizer_fn = @Adam DLWrapper.train.epochs = 1000 diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index fd36303e..5ef4a0e6 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -55,7 +55,7 @@ def __init__(self, encoder=LSTMNet, loss=torch.nn.functional.cross_entropy, opti self.pin_memory = pin_memory self.n_worker = n_worker - self.model = encoder() + self.model = encoder self.model.to(device) self.loss = loss self.optimizer = optimizer_fn(self.model.parameters()) From 4cc5fca7920ca2039663cd557b3554795f7798fe Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 02:33:53 +0100 Subject: [PATCH 026/163] load model configs --- icu_benchmarks/models/domain_adaptation.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 4af228ad..de2f0e48 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -30,6 +30,7 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset, Returns: Tuple of predictions and labels. """ + gin.parse_config_file(model_dir / "train_config.gin") if isinstance(target_model, DLWrapper): model = DLWrapper() else: @@ -81,10 +82,10 @@ def get_predictions_for_all_models( _, test_labels = test_dataset.get_data_and_labels() test_predictions = {} + test_predictions["target"] = target_model.output_transform(target_model.predict(test_dataset, None, None)) for source in source_datasets: model_dir = source_dir / source test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir) - test_predictions["target"] = target_model.output_transform(target_model.predict(test_dataset, None, None)) for name, prediction in test_predictions.items(): if prediction.ndim == 2: @@ -138,7 +139,8 @@ def domain_adaptation( cv_folds_to_train = 2 target_sizes = [500, 1000, 2000] datasets = ["hirid", "aumc", "miiv"] - weights = [1] * (len(datasets) - 1) + [1] + target_weights = [0.1, 0.2, 0.5, 1, 2, 5] + weights = [1] * (len(datasets) - 1) task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") @@ -153,9 +155,11 @@ def domain_adaptation( log_dir = run_dir / task / dataset / f"target_{target_size}" log_dir.mkdir(parents=True, exist_ok=True) choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) + gin_config_with_target_hyperparameters = gin.config_str() results = {} for repetition in range(cv_repetitions_to_train): for fold_index in range(cv_folds_to_train): + gin.parse_config(gin_config_with_target_hyperparameters) results[f"{repetition}_{fold_index}"] = {} fold_results = results[f"{repetition}_{fold_index}"] @@ -190,15 +194,12 @@ def domain_adaptation( logging.info("Evaluating model: {}".format(baseline)) fold_results[baseline] = get_model_metrics(target_model, predictions, test_labels) - # evaluate convex combination of models test_predictions_list = list(test_predictions.values()) logging.info("Evaluating convex combination of models.") - target_weights = [0.1, 0.2, 0.5, 1, 2, 5] - weights = [1] * (len(datasets) - 1) for t in target_weights: - w = weights + [t * sum(weights)] + w = [t * sum(weights)] + weights logging.info(f"Evaluating target weight: {t}") test_pred = np.average(test_predictions_list, axis=0, weights=w) fold_results[f"convex_combination_{t}"] = get_model_metrics(target_model, test_pred, test_labels) From aefb1655926510bda254ac7bfe448674d63c0843 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 02:45:51 +0100 Subject: [PATCH 027/163] update lgbm config --- configs/models/LGBMClassifier.gin | 3 +-- configs/models/LogisticRegression.gin | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/configs/models/LGBMClassifier.gin b/configs/models/LGBMClassifier.gin index 76fe778e..59d07162 100644 --- a/configs/models/LGBMClassifier.gin +++ b/configs/models/LGBMClassifier.gin @@ -9,12 +9,11 @@ preprocess.use_features = True train_common.model = @MLWrapper() MLWrapper.model = @LGBMClassifier() -MLWrapper.train.patience = 10 model/hyperparameter.class_to_tune = @LGBMClassifier model/hyperparameter.colsample_bytree = (0.33, 1.0) model/hyperparameter.max_depth = (3, 7) -model/hyperparameter.min_child_samples = 1000 +model/hyperparameter.min_child_samples = (2, 1024, "log-uniform", 2) model/hyperparameter.n_estimators = 100000 model/hyperparameter.num_leaves = (8, 128, "log-uniform", 2) model/hyperparameter.subsample = (0.33, 1.0) diff --git a/configs/models/LogisticRegression.gin b/configs/models/LogisticRegression.gin index 8bef8066..cf8884b3 100644 --- a/configs/models/LogisticRegression.gin +++ b/configs/models/LogisticRegression.gin @@ -9,7 +9,6 @@ preprocess.use_features = True train_common.model = @MLWrapper() MLWrapper.model = @LogisticRegression() -MLWrapper.train.patience = 10 model/hyperparameter.class_to_tune = @LogisticRegression model/hyperparameter.solver = "saga" From 85543e2a4f282511d7a457223fbd24ba41b5417b Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 02:46:29 +0100 Subject: [PATCH 028/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index de2f0e48..0a674b2e 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -134,9 +134,9 @@ def domain_adaptation( ValueError: If checkpoint is not None and the checkpoint does not exist. """ cv_repetitions = 5 - cv_repetitions_to_train = 2 + cv_repetitions_to_train = 5 cv_folds = 5 - cv_folds_to_train = 2 + cv_folds_to_train = 5 target_sizes = [500, 1000, 2000] datasets = ["hirid", "aumc", "miiv"] target_weights = [0.1, 0.2, 0.5, 1, 2, 5] From 0030a6964462d2f819da1edb2924701215609ac7 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 02:47:58 +0100 Subject: [PATCH 029/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 0a674b2e..6ba5a074 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -138,7 +138,7 @@ def domain_adaptation( cv_folds = 5 cv_folds_to_train = 5 target_sizes = [500, 1000, 2000] - datasets = ["hirid", "aumc", "miiv"] + datasets = ["hirid", "aumc", "eicu", "miiv"] target_weights = [0.1, 0.2, 0.5, 1, 2, 5] weights = [1] * (len(datasets) - 1) task_dir = data_dir / task From 2afc544f959d59eb1a91b5425a789d72b54deb0c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 14:17:27 +0100 Subject: [PATCH 030/163] include model in log_dir --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 6ba5a074..624be2b7 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -152,7 +152,7 @@ def domain_adaptation( for target_size in target_sizes: log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1) gin.bind_parameter("preprocess.fold_size", target_size) - log_dir = run_dir / task / dataset / f"target_{target_size}" + log_dir = run_dir / task / model / dataset / f"target_{target_size}" log_dir.mkdir(parents=True, exist_ok=True) choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) gin_config_with_target_hyperparameters = gin.config_str() From 19549541abd01fc175b887393a34f30a22683d1d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 14:18:05 +0100 Subject: [PATCH 031/163] reduce hyperparameter training for shallow models --- configs/models/LGBMClassifier.gin | 2 +- configs/models/LogisticRegression.gin | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/models/LGBMClassifier.gin b/configs/models/LGBMClassifier.gin index 59d07162..0b725442 100644 --- a/configs/models/LGBMClassifier.gin +++ b/configs/models/LGBMClassifier.gin @@ -21,5 +21,5 @@ model/hyperparameter.subsample_freq = 1 tune_hyperparameters.scopes = ["model"] tune_hyperparameters.n_initial_points = 10 -tune_hyperparameters.n_calls = 250 +tune_hyperparameters.n_calls = 150 tune_hyperparameters.folds_to_tune_on = 3 diff --git a/configs/models/LogisticRegression.gin b/configs/models/LogisticRegression.gin index cf8884b3..9a18f798 100644 --- a/configs/models/LogisticRegression.gin +++ b/configs/models/LogisticRegression.gin @@ -19,5 +19,5 @@ model/hyperparameter.l1_ratio = (0.0, 1.0) tune_hyperparameters.scopes = ["model"] tune_hyperparameters.n_initial_points = 10 -tune_hyperparameters.n_calls = 150 +tune_hyperparameters.n_calls = 100 tune_hyperparameters.folds_to_tune_on = 3 From 761e354053b5131175fac522eb5c17a7257002b1 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 14:42:26 +0100 Subject: [PATCH 032/163] reset gin config for repeated HP tuning --- icu_benchmarks/models/domain_adaptation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 624be2b7..c43aa7b8 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -143,6 +143,7 @@ def domain_adaptation( weights = [1] * (len(datasets) - 1) task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") + gin_config_before_tuning = gin.config_str() # evaluate models on same test split for dataset in datasets: @@ -150,6 +151,8 @@ def domain_adaptation( source_datasets = [d for d in datasets if d != dataset] log_full_line(f"STARTING {dataset}", char="#", num_newlines=2) for target_size in target_sizes: + gin.clear_config() + gin.parse_config(gin_config_before_tuning) log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1) gin.bind_parameter("preprocess.fold_size", target_size) log_dir = run_dir / task / model / dataset / f"target_{target_size}" From f7c01fed0a0fa18c16b0a31f2e0be95a40560a1f Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 15:05:32 +0100 Subject: [PATCH 033/163] remove duplicate output transform --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index c43aa7b8..196c1e1b 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -82,7 +82,7 @@ def get_predictions_for_all_models( _, test_labels = test_dataset.get_data_and_labels() test_predictions = {} - test_predictions["target"] = target_model.output_transform(target_model.predict(test_dataset, None, None)) + test_predictions["target"] = target_model.predict(test_dataset, None, None) for source in source_datasets: model_dir = source_dir / source test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir) From 685ee3eff5956ae1a7f5e9711327427d1f3fa506 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 15:17:48 +0100 Subject: [PATCH 034/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 196c1e1b..7e85c203 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -44,6 +44,7 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset, model.load_weights(model_dir / "model.joblib") else: raise Exception("No weights to load at path : {}".format(model_dir / "model.*")) + logging.info(f"Generating predictions for model : {model_dir}") return model.predict(dataset, None, None) @@ -82,13 +83,14 @@ def get_predictions_for_all_models( _, test_labels = test_dataset.get_data_and_labels() test_predictions = {} + logging.info("Generating predictions for target") test_predictions["target"] = target_model.predict(test_dataset, None, None) for source in source_datasets: model_dir = source_dir / source test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir) for name, prediction in test_predictions.items(): - if prediction.ndim == 2: + if not isinstance(prediction, list) and prediction.ndim == 2: test_predictions[name] = prediction[:, 1] return test_predictions, test_labels From 3f6875416f068c7ecad3f1563de51fed56f16d42 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 15:17:58 +0100 Subject: [PATCH 035/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 7e85c203..dd747cb4 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -159,7 +159,7 @@ def domain_adaptation( gin.bind_parameter("preprocess.fold_size", target_size) log_dir = run_dir / task / model / dataset / f"target_{target_size}" log_dir.mkdir(parents=True, exist_ok=True) - choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) + choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug) gin_config_with_target_hyperparameters = gin.config_str() results = {} for repetition in range(cv_repetitions_to_train): From efe115fa97028785296ad06ef0908f2337b10f27 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 16:52:11 +0100 Subject: [PATCH 036/163] move metrics calculation to wrapper --- icu_benchmarks/models/domain_adaptation.py | 17 -------------- icu_benchmarks/models/wrappers.py | 26 +++++++++++++++++++--- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index dd747cb4..1a953925 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -96,23 +96,6 @@ def get_predictions_for_all_models( return test_predictions, test_labels -def get_model_metrics(model: object, test_predictions: np.ndarray, test_labels: np.ndarray): - """Evaluate a combination of models. - - Args: - test_predictions: Predictions for test set. - test_labels: Labels for test set. - """ - test_metric_results = {} - for name, metric in model.metrics.items(): - value = metric(model.label_transform(test_labels), test_predictions) - test_metric_results[name] = value - # Only log float values - if isinstance(value, np.float): - logging.info("test {}: {}".format(name, value)) - return test_metric_results - - def domain_adaptation( data_dir: Path, run_dir: Path, diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index 5ef4a0e6..add20fb8 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -290,20 +290,30 @@ def load_weights(self, load_path): def predict(self, dataset, weight, seed): self.set_metrics() - test_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=self.n_worker, pin_memory=self.pin_memory) + loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=self.n_worker, pin_memory=self.pin_memory) if isinstance(weight, list): weight = torch.FloatTensor(weight).to(self.device) - test_loss, test_metrics = self.evaluate(test_loader, self.metrics, weight) self.model.eval() all_preds = [] with torch.no_grad(): - for elem in test_loader: + for elem in loader: _, preds, _ = self.step_fn(elem, weight) all_preds += preds return all_preds + def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray): + metric_results = {} + for name, metric in self.metrics.items(): + metric.update(self.output_transform((predictions, labels))) + value = metric.compute() + metric_results[name] = value + # Only log float values + if isinstance(value, np.float): + logging.info("Test {}: {}".format(name, value)) + return metric_results + @gin.configurable("MLWrapper") class MLWrapper(object): @@ -443,3 +453,13 @@ def predict(self, dataset, weight, seed): return self.model.predict(test_rep) else: return self.model.predict_proba(test_rep) + + def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray): + metric_results = {} + for name, metric in self.metrics.items(): + value = metric(self.label_transform(labels), predictions) + metric_results[name] = value + # Only log float values + if isinstance(value, np.float): + logging.info("Test {}: {}".format(name, value)) + return metric_results From c3299fb04c762d0f9e5b71c496a244d9bc886e94 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 16:55:14 +0100 Subject: [PATCH 037/163] fix function call --- icu_benchmarks/models/domain_adaptation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 1a953925..3c9f0986 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -180,7 +180,7 @@ def domain_adaptation( # evaluate source baselines for baseline, predictions in test_predictions.items(): logging.info("Evaluating model: {}".format(baseline)) - fold_results[baseline] = get_model_metrics(target_model, predictions, test_labels) + fold_results[baseline] = target_model.calculate_metrics(predictions, test_labels) # evaluate convex combination of models test_predictions_list = list(test_predictions.values()) @@ -190,7 +190,7 @@ def domain_adaptation( w = [t * sum(weights)] + weights logging.info(f"Evaluating target weight: {t}") test_pred = np.average(test_predictions_list, axis=0, weights=w) - fold_results[f"convex_combination_{t}"] = get_model_metrics(target_model, test_pred, test_labels) + fold_results[f"convex_combination_{t}"] = target_model.calculate_metrics(test_pred, test_labels) log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) From 294ef742f02c0a6c957ac0ad686ab64d5900710e Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 16:55:31 +0100 Subject: [PATCH 038/163] increase batch_size for test --- icu_benchmarks/models/wrappers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index add20fb8..c6f7c2a6 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -290,7 +290,7 @@ def load_weights(self, load_path): def predict(self, dataset, weight, seed): self.set_metrics() - loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=self.n_worker, pin_memory=self.pin_memory) + loader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=self.n_worker, pin_memory=self.pin_memory) if isinstance(weight, list): weight = torch.FloatTensor(weight).to(self.device) From 5f36045d0697628174cac59eecdc25cb119c01d1 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:11:12 +0100 Subject: [PATCH 039/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index c6f7c2a6..cc511754 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -305,6 +305,7 @@ def predict(self, dataset, weight, seed): def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray): metric_results = {} + predictions = torch.from_numpy(predictions) for name, metric in self.metrics.items(): metric.update(self.output_transform((predictions, labels))) value = metric.compute() From d57b375f0ff07d44d642ba89e06cd98903a0160d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:12:23 +0100 Subject: [PATCH 040/163] Update Transformer.gin --- configs/models/Transformer.gin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin index 7085e672..c5c0e5f8 100644 --- a/configs/models/Transformer.gin +++ b/configs/models/Transformer.gin @@ -14,7 +14,7 @@ DLWrapper.optimizer_fn = @Adam DLWrapper.train.epochs = 1000 DLWrapper.train.batch_size = 64 -DLWrapper.train.patience = 5 +DLWrapper.train.patience = 1 DLWrapper.train.min_delta = 1e-4 # Optimizer params From 442a3c4c0865e4030e187fa8648a89a1fd40df95 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:13:22 +0100 Subject: [PATCH 041/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index cc511754..60ad0a43 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -303,9 +303,9 @@ def predict(self, dataset, weight, seed): return all_preds - def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray): + def calculate_metrics(self: object, predictions: list, labels: np.ndarray): metric_results = {} - predictions = torch.from_numpy(predictions) + predictions = torch.FloatTensor(predictions) for name, metric in self.metrics.items(): metric.update(self.output_transform((predictions, labels))) value = metric.compute() From 0967ff5e2b1abd93d457b8dd530359922a9873b4 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:14:55 +0100 Subject: [PATCH 042/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index 60ad0a43..dd674f3d 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -305,6 +305,7 @@ def predict(self, dataset, weight, seed): def calculate_metrics(self: object, predictions: list, labels: np.ndarray): metric_results = {} + print(predictions) predictions = torch.FloatTensor(predictions) for name, metric in self.metrics.items(): metric.update(self.output_transform((predictions, labels))) From de888771a8d9fffcb6a170052a45ca93f16c86ce Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:16:49 +0100 Subject: [PATCH 043/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index dd674f3d..7e377617 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -295,18 +295,17 @@ def predict(self, dataset, weight, seed): weight = torch.FloatTensor(weight).to(self.device) self.model.eval() - all_preds = [] + all_preds = torch.FloatTensor() with torch.no_grad(): for elem in loader: _, preds, _ = self.step_fn(elem, weight) - all_preds += preds + all_preds = all_preds.cat(preds) return all_preds - def calculate_metrics(self: object, predictions: list, labels: np.ndarray): + def calculate_metrics(self: object, predictions: torch.tensor, labels: np.ndarray): metric_results = {} print(predictions) - predictions = torch.FloatTensor(predictions) for name, metric in self.metrics.items(): metric.update(self.output_transform((predictions, labels))) value = metric.compute() From 2ecda4046af0dcb2a1885e30a6a11ee278f7f73c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:18:06 +0100 Subject: [PATCH 044/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index 7e377617..e06333d4 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -299,7 +299,7 @@ def predict(self, dataset, weight, seed): with torch.no_grad(): for elem in loader: _, preds, _ = self.step_fn(elem, weight) - all_preds = all_preds.cat(preds) + all_preds = torch.cat((all_preds, preds)) return all_preds From 6c4cd96553c7515c1db74d3257b34736febe82b1 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:18:46 +0100 Subject: [PATCH 045/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index e06333d4..e889e231 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -295,7 +295,7 @@ def predict(self, dataset, weight, seed): weight = torch.FloatTensor(weight).to(self.device) self.model.eval() - all_preds = torch.FloatTensor() + all_preds = torch.FloatTensor().to(self.device) with torch.no_grad(): for elem in loader: _, preds, _ = self.step_fn(elem, weight) From 9fae10e1990922181543de24ddbdceb6e3578b0b Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:21:47 +0100 Subject: [PATCH 046/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index e889e231..e270e23f 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -295,17 +295,19 @@ def predict(self, dataset, weight, seed): weight = torch.FloatTensor(weight).to(self.device) self.model.eval() - all_preds = torch.FloatTensor().to(self.device) + all_preds = np.ndarray() with torch.no_grad(): for elem in loader: _, preds, _ = self.step_fn(elem, weight) - all_preds = torch.cat((all_preds, preds)) + all_preds = np.append(all_preds, preds.cpu().numpy().tolist()) + print(all_preds) return all_preds - def calculate_metrics(self: object, predictions: torch.tensor, labels: np.ndarray): + def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray): metric_results = {} print(predictions) + predictions = torch.from_numpy(predictions) for name, metric in self.metrics.items(): metric.update(self.output_transform((predictions, labels))) value = metric.compute() From 5d6cb2e9d2e3055df297c3778aeb7ec218ea40f6 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:24:16 +0100 Subject: [PATCH 047/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index e270e23f..65f0ecd5 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -295,11 +295,12 @@ def predict(self, dataset, weight, seed): weight = torch.FloatTensor(weight).to(self.device) self.model.eval() - all_preds = np.ndarray() + all_preds = [] with torch.no_grad(): for elem in loader: _, preds, _ = self.step_fn(elem, weight) - all_preds = np.append(all_preds, preds.cpu().numpy().tolist()) + all_preds += preds.cpu().numpy().tolist() + all_preds = np.array(all_preds) print(all_preds) return all_preds From 57db01b0b5a70c1f05730f1f5e511301b352cb52 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:28:02 +0100 Subject: [PATCH 048/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 3c9f0986..851b98e8 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -90,7 +90,7 @@ def get_predictions_for_all_models( test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir) for name, prediction in test_predictions.items(): - if not isinstance(prediction, list) and prediction.ndim == 2: + if isinstance(target_model, MLWrapper) and prediction.ndim == 2: test_predictions[name] = prediction[:, 1] return test_predictions, test_labels From b93b402b36028560eb806b89408688f92438b25d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:32:02 +0100 Subject: [PATCH 049/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index 65f0ecd5..4bb2086f 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -298,8 +298,9 @@ def predict(self, dataset, weight, seed): all_preds = [] with torch.no_grad(): for elem in loader: - _, preds, _ = self.step_fn(elem, weight) - all_preds += preds.cpu().numpy().tolist() + _, preds, target = self.step_fn(elem, weight) + preds, target = self.output_transform((preds, target)) + all_preds += preds all_preds = np.array(all_preds) print(all_preds) @@ -310,7 +311,7 @@ def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray) print(predictions) predictions = torch.from_numpy(predictions) for name, metric in self.metrics.items(): - metric.update(self.output_transform((predictions, labels))) + metric.update((predictions, labels)) value = metric.compute() metric_results[name] = value # Only log float values From 0cd0b13c58358211f1f32276344bdca1cb30578f Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:34:04 +0100 Subject: [PATCH 050/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index 4bb2086f..a2d99451 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -309,7 +309,6 @@ def predict(self, dataset, weight, seed): def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray): metric_results = {} print(predictions) - predictions = torch.from_numpy(predictions) for name, metric in self.metrics.items(): metric.update((predictions, labels)) value = metric.compute() From 623a80da1c6a6f9d4b4bdb45c914225f385f15af Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:34:51 +0100 Subject: [PATCH 051/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index a2d99451..980b4a38 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -300,7 +300,7 @@ def predict(self, dataset, weight, seed): for elem in loader: _, preds, target = self.step_fn(elem, weight) preds, target = self.output_transform((preds, target)) - all_preds += preds + all_preds += preds.cpu().numpy().tolist() all_preds = np.array(all_preds) print(all_preds) From 702c91742095ea975b1ad08d6bb68dfaea2a9603 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:43:13 +0100 Subject: [PATCH 052/163] change metric calculation --- icu_benchmarks/models/domain_adaptation.py | 16 ++++++++++++++-- icu_benchmarks/models/wrappers.py | 22 ---------------------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 851b98e8..86bd9463 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -12,6 +12,7 @@ from icu_benchmarks.data.loader import RICUDataset from icu_benchmarks.data.preprocess import preprocess_data from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters +from icu_benchmarks.models.metric_constants import MLMetrics from icu_benchmarks.models.train import train_common from icu_benchmarks.models.wrappers import DLWrapper, MLWrapper from icu_benchmarks.models.utils import JsonNumpyEncoder @@ -48,6 +49,17 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset, return model.predict(dataset, None, None) +def calculate_metrics(predictions: np.ndarray, labels: np.ndarray): + metric_results = {} + for name, metric in MLMetrics.BINARY_CLASSIFICATION.items(): + value = metric(labels, predictions) + metric_results[name] = value + # Only log float values + if isinstance(value, np.float): + logging.info("Test {}: {}".format(name, value)) + return metric_results + + def get_predictions_for_all_models( target_model: object, data: dict[str, pd.DataFrame], @@ -180,7 +192,7 @@ def domain_adaptation( # evaluate source baselines for baseline, predictions in test_predictions.items(): logging.info("Evaluating model: {}".format(baseline)) - fold_results[baseline] = target_model.calculate_metrics(predictions, test_labels) + fold_results[baseline] = calculate_metrics(predictions, test_labels) # evaluate convex combination of models test_predictions_list = list(test_predictions.values()) @@ -190,7 +202,7 @@ def domain_adaptation( w = [t * sum(weights)] + weights logging.info(f"Evaluating target weight: {t}") test_pred = np.average(test_predictions_list, axis=0, weights=w) - fold_results[f"convex_combination_{t}"] = target_model.calculate_metrics(test_pred, test_labels) + fold_results[f"convex_combination_{t}"] = calculate_metrics(test_pred, test_labels) log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index 980b4a38..d1119476 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -306,18 +306,6 @@ def predict(self, dataset, weight, seed): return all_preds - def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray): - metric_results = {} - print(predictions) - for name, metric in self.metrics.items(): - metric.update((predictions, labels)) - value = metric.compute() - metric_results[name] = value - # Only log float values - if isinstance(value, np.float): - logging.info("Test {}: {}".format(name, value)) - return metric_results - @gin.configurable("MLWrapper") class MLWrapper(object): @@ -457,13 +445,3 @@ def predict(self, dataset, weight, seed): return self.model.predict(test_rep) else: return self.model.predict_proba(test_rep) - - def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray): - metric_results = {} - for name, metric in self.metrics.items(): - value = metric(self.label_transform(labels), predictions) - metric_results[name] = value - # Only log float values - if isinstance(value, np.float): - logging.info("Test {}: {}".format(name, value)) - return metric_results From 570f6ab6a7d051753cc31e7acd2c8f2774aaae1b Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:46:16 +0100 Subject: [PATCH 053/163] Update Transformer.gin --- configs/models/Transformer.gin | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin index c5c0e5f8..e6a1a2ac 100644 --- a/configs/models/Transformer.gin +++ b/configs/models/Transformer.gin @@ -14,7 +14,7 @@ DLWrapper.optimizer_fn = @Adam DLWrapper.train.epochs = 1000 DLWrapper.train.batch_size = 64 -DLWrapper.train.patience = 1 +DLWrapper.train.patience = 5 DLWrapper.train.min_delta = 1e-4 # Optimizer params @@ -35,6 +35,6 @@ model/hyperparameter.dropout = (0.0, 0.4) model/hyperparameter.dropout_att = (0.0, 0.4) tune_hyperparameters.scopes = ["model", "optimizer"] -tune_hyperparameters.n_initial_points = 2 -tune_hyperparameters.n_calls = 4 +tune_hyperparameters.n_initial_points = 5 +tune_hyperparameters.n_calls = 10 tune_hyperparameters.folds_to_tune_on = 2 From 162adada45c2afbc62de1c967b6615d9a93a605e Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:46:53 +0100 Subject: [PATCH 054/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 86bd9463..3ea57dc2 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -154,7 +154,7 @@ def domain_adaptation( gin.bind_parameter("preprocess.fold_size", target_size) log_dir = run_dir / task / model / dataset / f"target_{target_size}" log_dir.mkdir(parents=True, exist_ok=True) - choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug) + choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) gin_config_with_target_hyperparameters = gin.config_str() results = {} for repetition in range(cv_repetitions_to_train): From c44a1b0c5e80d2c3df8555fcdb448a1420b0c00b Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 17:51:58 +0100 Subject: [PATCH 055/163] Update wrappers.py --- icu_benchmarks/models/wrappers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py index d1119476..8bacdee1 100644 --- a/icu_benchmarks/models/wrappers.py +++ b/icu_benchmarks/models/wrappers.py @@ -302,7 +302,6 @@ def predict(self, dataset, weight, seed): preds, target = self.output_transform((preds, target)) all_preds += preds.cpu().numpy().tolist() all_preds = np.array(all_preds) - print(all_preds) return all_preds From ea23e1ba86c79177880323acc65f29e00a68c501 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 19:23:21 +0100 Subject: [PATCH 056/163] add dg baseline --- icu_benchmarks/models/domain_adaptation.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 3ea57dc2..78bb3f38 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -196,6 +196,11 @@ def domain_adaptation( # evaluate convex combination of models test_predictions_list = list(test_predictions.values()) + test_predictions_list_without_target = test_predictions_list[1:] + + logging.info("Evaluating convex combination of models without target.") + test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights) + fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) logging.info("Evaluating convex combination of models.") for t in target_weights: From d56d98b08ae66044e972be56969ca7a1b5ff33ce Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 23:26:49 +0100 Subject: [PATCH 057/163] fix json encoder --- icu_benchmarks/models/domain_adaptation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 78bb3f38..6d53bd83 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -15,7 +15,7 @@ from icu_benchmarks.models.metric_constants import MLMetrics from icu_benchmarks.models.train import train_common from icu_benchmarks.models.wrappers import DLWrapper, MLWrapper -from icu_benchmarks.models.utils import JsonNumpyEncoder +from icu_benchmarks.models.utils import JsonResultLoggingEncoder from icu_benchmarks.run_utils import log_full_line @@ -230,10 +230,10 @@ def domain_adaptation( }) with open(log_dir / "aggregated_source_metrics.json", "w") as f: - json.dump(results, f, cls=JsonNumpyEncoder) + json.dump(results, f, cls=JsonResultLoggingEncoder) with open(log_dir / "averaged_source_metrics.json", "w") as f: - json.dump(averaged_metrics, f, cls=JsonNumpyEncoder) + json.dump(averaged_metrics, f, cls=JsonResultLoggingEncoder) logging.info(f"Averaged results: {averaged_metrics}") log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5) From e9b5c79b140ce3bd06383388606e6c1a6fc0e114 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 23:27:50 +0100 Subject: [PATCH 058/163] Update Transformer.gin --- configs/models/Transformer.gin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin index e6a1a2ac..7e89cf2a 100644 --- a/configs/models/Transformer.gin +++ b/configs/models/Transformer.gin @@ -14,7 +14,7 @@ DLWrapper.optimizer_fn = @Adam DLWrapper.train.epochs = 1000 DLWrapper.train.batch_size = 64 -DLWrapper.train.patience = 5 +DLWrapper.train.patience = 10 DLWrapper.train.min_delta = 1e-4 # Optimizer params @@ -36,5 +36,5 @@ model/hyperparameter.dropout_att = (0.0, 0.4) tune_hyperparameters.scopes = ["model", "optimizer"] tune_hyperparameters.n_initial_points = 5 -tune_hyperparameters.n_calls = 10 +tune_hyperparameters.n_calls = 30 tune_hyperparameters.folds_to_tune_on = 2 From 2ca631ea73dcd4cb6af105afe3407dbc0ac3767c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 12 Jan 2023 23:56:03 +0100 Subject: [PATCH 059/163] only execute da for one dataset at a time --- icu_benchmarks/models/domain_adaptation.py | 193 ++++++++++----------- icu_benchmarks/run.py | 2 +- 2 files changed, 97 insertions(+), 98 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 6d53bd83..5fe7461c 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -109,6 +109,7 @@ def get_predictions_for_all_models( def domain_adaptation( + dataset: str, data_dir: Path, run_dir: Path, seed: int, @@ -143,100 +144,98 @@ def domain_adaptation( gin_config_before_tuning = gin.config_str() # evaluate models on same test split - for dataset in datasets: - data_dir = task_dir / dataset - source_datasets = [d for d in datasets if d != dataset] - log_full_line(f"STARTING {dataset}", char="#", num_newlines=2) - for target_size in target_sizes: - gin.clear_config() - gin.parse_config(gin_config_before_tuning) - log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1) - gin.bind_parameter("preprocess.fold_size", target_size) - log_dir = run_dir / task / model / dataset / f"target_{target_size}" - log_dir.mkdir(parents=True, exist_ok=True) - choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) - gin_config_with_target_hyperparameters = gin.config_str() - results = {} - for repetition in range(cv_repetitions_to_train): - for fold_index in range(cv_folds_to_train): - gin.parse_config(gin_config_with_target_hyperparameters) - results[f"{repetition}_{fold_index}"] = {} - fold_results = results[f"{repetition}_{fold_index}"] - - data = preprocess_data( - data_dir, - seed=seed, - debug=debug, - use_cache=True, - cv_repetitions=cv_repetitions, - repetition_index=repetition, - cv_folds=cv_folds, - fold_index=fold_index, - ) - - log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" - log_dir_fold.mkdir(parents=True, exist_ok=True) - - # evaluate target baselines - target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) - - test_predictions, test_labels = get_predictions_for_all_models( - target_model, - data, - log_dir_fold, - source_dir=model_path / task / model, - seed=seed, - source_datasets=source_datasets, - ) - - # evaluate source baselines - for baseline, predictions in test_predictions.items(): - logging.info("Evaluating model: {}".format(baseline)) - fold_results[baseline] = calculate_metrics(predictions, test_labels) - - # evaluate convex combination of models - test_predictions_list = list(test_predictions.values()) - test_predictions_list_without_target = test_predictions_list[1:] - - logging.info("Evaluating convex combination of models without target.") - test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights) - fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) - - logging.info("Evaluating convex combination of models.") - for t in target_weights: - w = [t * sum(weights)] + weights - logging.info(f"Evaluating target weight: {t}") - test_pred = np.average(test_predictions_list, axis=0, weights=w) - fold_results[f"convex_combination_{t}"] = calculate_metrics(test_pred, test_labels) - - log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) - log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) - - source_metrics = {} - for result in results.values(): - for source, source_stats in result.items(): - for metric, score in source_stats.items(): - if isinstance(score, (float, int)): - source_metrics.setdefault(source, {}).setdefault(metric, []).append(score) - - # Compute statistical metric over aggregated results - averaged_metrics = {} - for source, source_stats in source_metrics.items(): - for metric, scores in source_stats.items(): - averaged_metrics.setdefault(source, {}).setdefault(metric, []).append({ - "avg": np.mean(scores), - "std": np.std(scores), - "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)), - }) - - with open(log_dir / "aggregated_source_metrics.json", "w") as f: - json.dump(results, f, cls=JsonResultLoggingEncoder) - - with open(log_dir / "averaged_source_metrics.json", "w") as f: - json.dump(averaged_metrics, f, cls=JsonResultLoggingEncoder) - - logging.info(f"Averaged results: {averaged_metrics}") - log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5) - - log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10) - \ No newline at end of file + data_dir = task_dir / dataset + source_datasets = [d for d in datasets if d != dataset] + log_full_line(f"STARTING {dataset}", char="#", num_newlines=2) + for target_size in target_sizes: + gin.clear_config() + gin.parse_config(gin_config_before_tuning) + log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1) + gin.bind_parameter("preprocess.fold_size", target_size) + log_dir = run_dir / task / model / dataset / f"target_{target_size}" + log_dir.mkdir(parents=True, exist_ok=True) + choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) + gin_config_with_target_hyperparameters = gin.config_str() + results = {} + for repetition in range(cv_repetitions_to_train): + for fold_index in range(cv_folds_to_train): + gin.parse_config(gin_config_with_target_hyperparameters) + results[f"{repetition}_{fold_index}"] = {} + fold_results = results[f"{repetition}_{fold_index}"] + + data = preprocess_data( + data_dir, + seed=seed, + debug=debug, + use_cache=True, + cv_repetitions=cv_repetitions, + repetition_index=repetition, + cv_folds=cv_folds, + fold_index=fold_index, + ) + + log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" + log_dir_fold.mkdir(parents=True, exist_ok=True) + + # evaluate target baselines + target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) + + test_predictions, test_labels = get_predictions_for_all_models( + target_model, + data, + log_dir_fold, + source_dir=model_path / task / model, + seed=seed, + source_datasets=source_datasets, + ) + + # evaluate source baselines + for baseline, predictions in test_predictions.items(): + logging.info("Evaluating model: {}".format(baseline)) + fold_results[baseline] = calculate_metrics(predictions, test_labels) + + # evaluate convex combination of models + test_predictions_list = list(test_predictions.values()) + test_predictions_list_without_target = test_predictions_list[1:] + + logging.info("Evaluating convex combination of models without target.") + test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights) + fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) + + logging.info("Evaluating convex combination of models.") + for t in target_weights: + w = [t * sum(weights)] + weights + logging.info(f"Evaluating target weight: {t}") + test_pred = np.average(test_predictions_list, axis=0, weights=w) + fold_results[f"convex_combination_{t}"] = calculate_metrics(test_pred, test_labels) + + log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) + log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) + + source_metrics = {} + for result in results.values(): + for source, source_stats in result.items(): + for metric, score in source_stats.items(): + if isinstance(score, (float, int)): + source_metrics.setdefault(source, {}).setdefault(metric, []).append(score) + + # Compute statistical metric over aggregated results + averaged_metrics = {} + for source, source_stats in source_metrics.items(): + for metric, scores in source_stats.items(): + averaged_metrics.setdefault(source, {}).setdefault(metric, []).append({ + "avg": np.mean(scores), + "std": np.std(scores), + "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)), + }) + + with open(log_dir / "aggregated_source_metrics.json", "w") as f: + json.dump(results, f, cls=JsonResultLoggingEncoder) + + with open(log_dir / "averaged_source_metrics.json", "w") as f: + json.dump(averaged_metrics, f, cls=JsonResultLoggingEncoder) + + logging.info(f"Averaged results: {averaged_metrics}") + log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5) + + log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=5) diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index 4db87271..bb76a49b 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -63,7 +63,7 @@ def main(my_args=tuple(sys.argv[1:])): else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")] ) gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False) - domain_adaptation(args.data_dir, run_dir, args.seed, args.task_name, model) + domain_adaptation(name, args.data_dir, run_dir, args.seed, args.task_name, model) return else: reproducible = args.reproducible From 45852579fe8949e84f6bd7b9eb8cec47ac5e02ae Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 00:14:09 +0100 Subject: [PATCH 060/163] remove run dir --- icu_benchmarks/run.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index bb76a49b..54ef17a9 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -56,14 +56,13 @@ def main(my_args=tuple(sys.argv[1:])): ) return if args.command == "da": - run_dir = create_run_dir(args.log_dir) gin_config_files = ( [Path(f"configs/experiments/{args.experiment}.gin")] if args.experiment else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")] ) gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False) - domain_adaptation(name, args.data_dir, run_dir, args.seed, args.task_name, model) + domain_adaptation(name, args.data_dir, args.log_dir, args.seed, args.task_name, model) return else: reproducible = args.reproducible From fc0d41e1c9173247ada672a99b8a0d62deec2fd4 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 14:04:50 +0100 Subject: [PATCH 061/163] Create da_to_csv.py --- scripts/results/da_to_csv.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 scripts/results/da_to_csv.py diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py new file mode 100644 index 00000000..35ac1973 --- /dev/null +++ b/scripts/results/da_to_csv.py @@ -0,0 +1,15 @@ +import json +from pathlib import Path +import csv + +models_dir = Path("../DA_logs") +for metric in ["AUC", "PR"]: + for endpoint in models_dir.iterdir(): + with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file: + writer = csv.writer(csv_file) + for model in endpoint.iterdir(): + for target in model.iterdir(): + for target_size in target.iterdir(): + with open(target_size / 'averaged_source_metrics.json', 'r') as f: + results = json.load(f) + writer.writerow([model.name, target, target_size] + [source[metric] for source in results]) From 440aa15b8b4b4e857156b46fa91f0bd1abc84562 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 14:08:25 +0100 Subject: [PATCH 062/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 35ac1973..4ed32fb0 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -8,6 +8,8 @@ with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file: writer = csv.writer(csv_file) for model in endpoint.iterdir(): + if model.name == "LSTM": + continue for target in model.iterdir(): for target_size in target.iterdir(): with open(target_size / 'averaged_source_metrics.json', 'r') as f: From 7a2583b56a42d10ee5ae3491fbfe259124b4e3ed Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 14:10:31 +0100 Subject: [PATCH 063/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 4ed32fb0..d303aefe 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -14,4 +14,4 @@ for target_size in target.iterdir(): with open(target_size / 'averaged_source_metrics.json', 'r') as f: results = json.load(f) - writer.writerow([model.name, target, target_size] + [source[metric] for source in results]) + writer.writerow([model.name, target, target_size] + [source_metrics[metric] for source_name, source_metrics in results.items()]) From 11d80e2e7c123f39ffd65e1877ffe2e5c3261f44 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 14:28:01 +0100 Subject: [PATCH 064/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index d303aefe..c91bd2e2 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -5,13 +5,14 @@ models_dir = Path("../DA_logs") for metric in ["AUC", "PR"]: for endpoint in models_dir.iterdir(): - with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file: - writer = csv.writer(csv_file) - for model in endpoint.iterdir(): - if model.name == "LSTM": - continue - for target in model.iterdir(): - for target_size in target.iterdir(): - with open(target_size / 'averaged_source_metrics.json', 'r') as f: - results = json.load(f) - writer.writerow([model.name, target, target_size] + [source_metrics[metric] for source_name, source_metrics in results.items()]) + if endpoint.is_dir(): + with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file: + writer = csv.writer(csv_file) + for model in endpoint.iterdir(): + if model.name == "LSTM": + continue + for target in model.iterdir(): + for target_size in target.iterdir(): + with open(target_size / 'averaged_source_metrics.json', 'r') as f: + results = json.load(f) + writer.writerow([model.name, target, target_size] + [source_metrics[metric] for source_name, source_metrics in results.items()]) From 18fe642610348760ec89f7f44478407c92f4a889 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 14:30:27 +0100 Subject: [PATCH 065/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index c91bd2e2..eff1dba5 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -15,4 +15,4 @@ for target_size in target.iterdir(): with open(target_size / 'averaged_source_metrics.json', 'r') as f: results = json.load(f) - writer.writerow([model.name, target, target_size] + [source_metrics[metric] for source_name, source_metrics in results.items()]) + writer.writerow([model.name, target.name, target_size.name] + [source_metrics[metric] for source_name, source_metrics in results.items()]) From 3df1fcd7c285a3654e9d651a727ab623b48cbd6c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 14:37:06 +0100 Subject: [PATCH 066/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index eff1dba5..ea09ff7a 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -15,4 +15,6 @@ for target_size in target.iterdir(): with open(target_size / 'averaged_source_metrics.json', 'r') as f: results = json.load(f) - writer.writerow([model.name, target.name, target_size.name] + [source_metrics[metric] for source_name, source_metrics in results.items()]) + source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()] + source_metrics = [metr for _, metr in source_metrics.items()] + writer.writerow([model.name, target.name, target_size.name] + source_metrics) From 1ce0f17a227505f59eb7504caf7c9e379d598020 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 14:40:47 +0100 Subject: [PATCH 067/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index ea09ff7a..575e649a 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -16,5 +16,5 @@ with open(target_size / 'averaged_source_metrics.json', 'r') as f: results = json.load(f) source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()] - source_metrics = [metr for _, metr in source_metrics.items()] + source_metrics = [(metr["avg"], metr["std"], metr["CI_0.95"]) for metr in source_metrics] writer.writerow([model.name, target.name, target_size.name] + source_metrics) From 2e7ac54b5da4eddd184120a1ab679aac7b376267 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 14:41:43 +0100 Subject: [PATCH 068/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 575e649a..594e8770 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -16,5 +16,5 @@ with open(target_size / 'averaged_source_metrics.json', 'r') as f: results = json.load(f) source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()] - source_metrics = [(metr["avg"], metr["std"], metr["CI_0.95"]) for metr in source_metrics] + source_metrics = [(metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]) for metr in source_metrics] writer.writerow([model.name, target.name, target_size.name] + source_metrics) From fd8082226dbed42a2fc2a3d8c63720de832b534d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 14:44:12 +0100 Subject: [PATCH 069/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 594e8770..bce95ad2 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -16,5 +16,6 @@ with open(target_size / 'averaged_source_metrics.json', 'r') as f: results = json.load(f) source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()] - source_metrics = [(metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]) for metr in source_metrics] + source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics] + source_metrics_flat = [item for sublist in source_metrics for item in sublist] writer.writerow([model.name, target.name, target_size.name] + source_metrics) From 5b9ba0367fed38a6b8bc190bbb5e29ad7891cd9c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 14:45:00 +0100 Subject: [PATCH 070/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index bce95ad2..283f2b12 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -18,4 +18,4 @@ source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()] source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics] source_metrics_flat = [item for sublist in source_metrics for item in sublist] - writer.writerow([model.name, target.name, target_size.name] + source_metrics) + writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat) From 22b7f61930df1a132e1dc9baf35cad239b111450 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 15:08:44 +0100 Subject: [PATCH 071/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 283f2b12..87f08da9 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -8,6 +8,13 @@ if endpoint.is_dir(): with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file: writer = csv.writer(csv_file) + source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5'] + stats = ['avg', 'std', 'CI_0.95'] + # combine fieldnames and stats + full_fields = [f'{source}_{stat}' for source in source_names for stat in stats] + writer = csv.DictWriter(csv_file, fieldnames=full_fields) + + writer.writeheader() for model in endpoint.iterdir(): if model.name == "LSTM": continue @@ -15,7 +22,8 @@ for target_size in target.iterdir(): with open(target_size / 'averaged_source_metrics.json', 'r') as f: results = json.load(f) - source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()] - source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics] - source_metrics_flat = [item for sublist in source_metrics for item in sublist] - writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat) + # source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()] + # source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics] + # source_metrics_flat = [item for sublist in source_metrics for item in sublist] + # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat) + writer.writerow({f'{source}_{stat}': source_metrics[metric][stat] for source, source_metrics in results.items() for stat in stats}) From 095c79bedacad19dc499d55eefd24971ebe111bd Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 15:09:31 +0100 Subject: [PATCH 072/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 87f08da9..0cc3faff 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -26,4 +26,4 @@ # source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics] # source_metrics_flat = [item for sublist in source_metrics for item in sublist] # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat) - writer.writerow({f'{source}_{stat}': source_metrics[metric][stat] for source, source_metrics in results.items() for stat in stats}) + writer.writerow({f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats}) From 88a79e0900ffd42f96ad65e6637e0bd5961ef09b Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 15:12:26 +0100 Subject: [PATCH 073/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 0cc3faff..67a35e74 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -8,11 +8,12 @@ if endpoint.is_dir(): with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file: writer = csv.writer(csv_file) + info = ["model", "target", "target_size"] source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5'] stats = ['avg', 'std', 'CI_0.95'] # combine fieldnames and stats full_fields = [f'{source}_{stat}' for source in source_names for stat in stats] - writer = csv.DictWriter(csv_file, fieldnames=full_fields) + writer = csv.DictWriter(csv_file, fieldnames=info+full_fields) writer.writeheader() for model in endpoint.iterdir(): @@ -26,4 +27,7 @@ # source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics] # source_metrics_flat = [item for sublist in source_metrics for item in sublist] # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat) - writer.writerow({f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats}) + + info = [model.name, target.name, target_size.name] + metrics_row = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats} + writer.writerow(info + metrics_row) From c8396b0427c1b37d4ccb653ad657c68325da0d02 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 15:13:39 +0100 Subject: [PATCH 074/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 67a35e74..5b88fba2 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -28,6 +28,10 @@ # source_metrics_flat = [item for sublist in source_metrics for item in sublist] # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat) - info = [model.name, target.name, target_size.name] + info = { + 'model': model.name, + 'target': target.name, + 'target_size': target_size.name + } metrics_row = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats} writer.writerow(info + metrics_row) From 6e505bfe0ec68486c6e6976297cd8dc13bf0333b Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 15:14:53 +0100 Subject: [PATCH 075/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 5b88fba2..268da395 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -28,10 +28,11 @@ # source_metrics_flat = [item for sublist in source_metrics for item in sublist] # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat) - info = { + row_data = { 'model': model.name, 'target': target.name, 'target_size': target_size.name } - metrics_row = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats} - writer.writerow(info + metrics_row) + metrics_data = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats} + row_data.update(metrics_data) + writer.writerow(row_data) From ea062c91b427c73b4f82b4f268630d865d3a7160 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 15:39:10 +0100 Subject: [PATCH 076/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 268da395..87ee4126 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -20,13 +20,10 @@ if model.name == "LSTM": continue for target in model.iterdir(): - for target_size in target.iterdir(): - with open(target_size / 'averaged_source_metrics.json', 'r') as f: + target_sizes = ['target_500', 'target_1000', 'target_2000'] + for target_size in target_sizes: + with open(target / target_size / 'averaged_source_metrics.json', 'r') as f: results = json.load(f) - # source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()] - # source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics] - # source_metrics_flat = [item for sublist in source_metrics for item in sublist] - # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat) row_data = { 'model': model.name, From f66c78785c5705ab2f61026228ed63ee388025c1 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 15:39:33 +0100 Subject: [PATCH 077/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 87ee4126..ddcb71cc 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -28,7 +28,7 @@ row_data = { 'model': model.name, 'target': target.name, - 'target_size': target_size.name + 'target_size': target_size } metrics_data = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats} row_data.update(metrics_data) From 6aeec62c1a4d771fa21a3a8248226c82d9e8401d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 15:45:24 +0100 Subject: [PATCH 078/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index ddcb71cc..2322ca4a 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -30,6 +30,11 @@ 'target': target.name, 'target_size': target_size } - metrics_data = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats} - row_data.update(metrics_data) + for stat in stats: + for source, source_metrics in results.items(): + if stat == 'CI_0.95': + row_data[f'{source}_{stat}_min'] = source_metrics[metric][0][stat][0] * 100 + row_data[f'{source}_{stat}_max'] = source_metrics[metric][0][stat][1] * 100 + else: + row_data[f'{source}_{stat}'] = source_metrics[metric][0][stat] * 100 writer.writerow(row_data) From a3e68f6542a6f7297f74319d0951b926a36d3c07 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 15:45:52 +0100 Subject: [PATCH 079/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 2322ca4a..f8d686be 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -10,7 +10,7 @@ writer = csv.writer(csv_file) info = ["model", "target", "target_size"] source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5'] - stats = ['avg', 'std', 'CI_0.95'] + stats = ['avg', 'std', 'CI_0.95_min', 'CI_0.95_max'] # combine fieldnames and stats full_fields = [f'{source}_{stat}' for source in source_names for stat in stats] writer = csv.DictWriter(csv_file, fieldnames=info+full_fields) From 87b7fbe307e4c1b100b89b8127c5f0dc17174669 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 13 Jan 2023 15:46:50 +0100 Subject: [PATCH 080/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index f8d686be..11e6d40b 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -10,6 +10,7 @@ writer = csv.writer(csv_file) info = ["model", "target", "target_size"] source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5'] + stats_basis = ['avg', 'std', 'CI_0.95'] stats = ['avg', 'std', 'CI_0.95_min', 'CI_0.95_max'] # combine fieldnames and stats full_fields = [f'{source}_{stat}' for source in source_names for stat in stats] @@ -30,7 +31,7 @@ 'target': target.name, 'target_size': target_size } - for stat in stats: + for stat in stats_basis: for source, source_metrics in results.items(): if stat == 'CI_0.95': row_data[f'{source}_{stat}_min'] = source_metrics[metric][0][stat][0] * 100 From 2277198ece9e3932d8bc5a25f5d70d9656f33308 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 15:50:32 +0100 Subject: [PATCH 081/163] remove evaluate and test_all --- icu_benchmarks/cross_validation.py | 49 ------------------ icu_benchmarks/data/preprocess.py | 80 +++++++++++------------------- icu_benchmarks/run.py | 12 +---- 3 files changed, 31 insertions(+), 110 deletions(-) diff --git a/icu_benchmarks/cross_validation.py b/icu_benchmarks/cross_validation.py index 347c33a8..adbac6b0 100644 --- a/icu_benchmarks/cross_validation.py +++ b/icu_benchmarks/cross_validation.py @@ -90,52 +90,3 @@ def execute_repeated_cv( log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) return agg_loss / (cv_repetitions_to_train * cv_folds_to_train) - - -def evaluate( - data_dir: Path, - log_dir: Path, - seed: int, - source_dir: Path = None, - reproducible: bool = True, - debug: bool = False, - use_cache: bool = False, -) -> float: - """Preprocesses data and trains a model for each fold. - - Args: - data_dir: Path to the data directory. - log_dir: Path to the log directory. - seed: Random seed. - load_weights: Whether to load weights from source_dir. - source_dir: Path to the source directory. - cv_folds: Number of folds for cross validation. - cv_folds_to_train: Number of folds to use during training. If None, all folds are trained on. - reproducible: Whether to make torch reproducible. - debug: Whether to load less data and enable more logging. - use_cache: Whether to cache and use cached data. - test_on: Dataset to test on. Can be "test" or "val" (e.g. for hyperparameter tuning). - - Returns: - The average loss of all folds. - """ - - data = preprocess_data( - data_dir, - seed=seed, - debug=debug, - use_cache=use_cache, - test_all=True, - ) - - run_dir_seed = log_dir / f"seed_{seed}" - run_dir_seed.mkdir(parents=True, exist_ok=True) - - return train_common( - data, - log_dir=run_dir_seed, - load_weights=True, - source_dir=source_dir, - seed=seed, - reproducible=reproducible, - ) diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py index 04563e9f..6f573d1d 100644 --- a/icu_benchmarks/data/preprocess.py +++ b/icu_benchmarks/data/preprocess.py @@ -27,7 +27,6 @@ def make_single_split( seed: int = 42, debug: bool = False, fold_size: int = None, - test_all: bool = False, ) -> dict[dict[pd.DataFrame]]: """Randomly split the data into training, validation, and test set. @@ -40,7 +39,6 @@ def make_single_split( fold_index: Index of the fold to return. seed: Random seed. debug: Load less data if true. - test_all: If true, the test set will be the entire dataset. Returns: Input data divided into 'train', 'val', and 'test'. @@ -52,42 +50,36 @@ def make_single_split( stays = stays.sample(frac=0.01, random_state=seed) labels = data["OUTCOME"][vars["LABEL"]].loc[stays.index] - if test_all: - split = { - "train": stays.iloc[0:0], - "val": stays.iloc[0:0], - "test": stays, - } - else: - outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed) - dev, test = list(outer_CV.split(stays, labels))[repetition_index] + + outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed) + dev, test = list(outer_CV.split(stays, labels))[repetition_index] - if fold_size: - start_index = 0 - end_index = fold_size + if fold_size: + start_index = 0 + end_index = fold_size + pre_dev = dev[start_index:end_index] + leave_for_test = dev[end_index:] + pre_dev_labels = labels.iloc[pre_dev] + while pre_dev_labels.sum() < cv_folds: + start_index += fold_size + end_index += fold_size pre_dev = dev[start_index:end_index] - leave_for_test = dev[end_index:] pre_dev_labels = labels.iloc[pre_dev] - while pre_dev_labels.sum() < cv_folds: - start_index += fold_size - end_index += fold_size - pre_dev = dev[start_index:end_index] - pre_dev_labels = labels.iloc[pre_dev] - leave_for_test = np.append(dev[0:start_index], dev[end_index:]) - dev = pre_dev - test = np.append(test, leave_for_test) - - dev_stays = stays.iloc[dev] - dev_labels = labels.iloc[dev] - - inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed) - train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index] - - split = { - "train": dev_stays.iloc[train], - "val": dev_stays.iloc[val], - "test": stays.iloc[test], - } + leave_for_test = np.append(dev[0:start_index], dev[end_index:]) + dev = pre_dev + test = np.append(test, leave_for_test) + + dev_stays = stays.iloc[dev] + dev_labels = labels.iloc[dev] + + inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed) + train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index] + + split = { + "train": dev_stays.iloc[train], + "val": dev_stays.iloc[val], + "test": stays.iloc[test], + } data_split = {} for fold_name, fold in split.items(): # Loop through train / val / test @@ -100,24 +92,17 @@ def make_single_split( return data_split -def apply_recipe_to_splits( - recipe: Recipe, data: dict[dict[pd.DataFrame]], type: str, test_all: bool = False -) -> dict[dict[pd.DataFrame]]: +def apply_recipe_to_splits(recipe: Recipe, data: dict[dict[pd.DataFrame]], type: str) -> dict[dict[pd.DataFrame]]: """Fits and transforms the training data, then transforms the validation and test data with the recipe. Args: recipe: Object containing info about the data and steps. data: Dict containing 'train', 'val', and 'test' and types of data per split. type: Whether to apply recipe to dynamic data, static data or outcomes. - test_all: If true, the test set will be the entire dataset. Returns: Transformed data divided into 'train', 'val', and 'test'. """ - if test_all: - data["test"][type] = recipe.prep(data["test"][type]) - return data - data["train"][type] = recipe.prep() data["val"][type] = recipe.bake(data["val"][type]) data["test"][type] = recipe.bake(data["test"][type]) @@ -138,7 +123,6 @@ def preprocess_data( cv_folds: int = 5, fold_size: int = None, fold_index: int = 0, - test_all: bool = False, ) -> dict[dict[pd.DataFrame]]: """Perform loading, splitting, imputing and normalising of task data. @@ -154,7 +138,6 @@ def preprocess_data( repetition_index: Index of the repetition to return. cv_folds: Number of folds to use for cross validation. fold_index: Index of the fold to return. - test_all: If true, the test set will be the entire dataset. Returns: Preprocessed data as DataFrame in a hierarchical dict with data type (STATIC/DYNAMIC/OUTCOME) @@ -163,8 +146,6 @@ def preprocess_data( cache_dir = data_dir / "cache" if fold_size: cache_dir = cache_dir / f"T{fold_size}" - if test_all: - cache_dir = cache_dir / "test_complete" dumped_file_names = json.dumps(file_names, sort_keys=True) dumped_vars = json.dumps(vars, sort_keys=True) config_string = f"{dumped_file_names}{dumped_vars}{use_features}{seed}{repetition_index}{fold_index}{debug}".encode( @@ -193,7 +174,6 @@ def preprocess_data( seed=seed, debug=debug, fold_size=fold_size, - test_all=test_all, ) logging.info("Preprocessing static data.") @@ -203,7 +183,7 @@ def preprocess_data( sta_rec.add_step(StepSklearn(SimpleImputer(missing_values=None, strategy="most_frequent"), sel=has_type("object"))) sta_rec.add_step(StepSklearn(LabelEncoder(), sel=has_type("object"), columnwise=True)) - data = apply_recipe_to_splits(sta_rec, data, "STATIC", test_all=test_all) + data = apply_recipe_to_splits(sta_rec, data, "STATIC") logging.info("Preprocessing dynamic data.") dyn_rec = Recipe(data["train"]["DYNAMIC"], [], vars["DYNAMIC"], vars["GROUP"], vars["SEQUENCE"]) @@ -217,7 +197,7 @@ def preprocess_data( dyn_rec.add_step(StepImputeFill(method="ffill")) dyn_rec.add_step(StepImputeFill(value=0)) - data = apply_recipe_to_splits(dyn_rec, data, "DYNAMIC", test_all=test_all) + data = apply_recipe_to_splits(dyn_rec, data, "DYNAMIC") if use_cache and not cache_file.exists(): if not cache_dir.exists(): diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index 54ef17a9..9e43c78b 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -9,7 +9,7 @@ from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters from icu_benchmarks.models.domain_adaptation import domain_adaptation from scripts.plotting.utils import plot_aggregated_results -from icu_benchmarks.cross_validation import execute_repeated_cv, evaluate +from icu_benchmarks.cross_validation import execute_repeated_cv from icu_benchmarks.run_utils import ( build_parser, create_run_dir, @@ -45,16 +45,6 @@ def main(my_args=tuple(sys.argv[1:])): run_dir = create_run_dir(log_dir) source_dir = args.source_dir gin.parse_config_file(source_dir / "train_config.gin") - evaluate( - args.data_dir, - run_dir, - args.seed, - source_dir=source_dir, - reproducible=reproducible, - debug=args.debug, - use_cache=args.cache, - ) - return if args.command == "da": gin_config_files = ( [Path(f"configs/experiments/{args.experiment}.gin")] From ccc0d34c2b7dad5cf608dc451859636820e3fe66 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 15:51:47 +0100 Subject: [PATCH 082/163] Update LogisticRegression.gin --- configs/models/LogisticRegression.gin | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/models/LogisticRegression.gin b/configs/models/LogisticRegression.gin index 9a18f798..0d18b8f7 100644 --- a/configs/models/LogisticRegression.gin +++ b/configs/models/LogisticRegression.gin @@ -18,6 +18,6 @@ model/hyperparameter.penalty = ["l1", "l2", "elasticnet"] model/hyperparameter.l1_ratio = (0.0, 1.0) tune_hyperparameters.scopes = ["model"] -tune_hyperparameters.n_initial_points = 10 -tune_hyperparameters.n_calls = 100 -tune_hyperparameters.folds_to_tune_on = 3 +tune_hyperparameters.n_initial_points = 5 +tune_hyperparameters.n_calls = 30 +tune_hyperparameters.folds_to_tune_on = 2 From 395841f0625fdcd714955193a91e16006e500818 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 15:52:25 +0100 Subject: [PATCH 083/163] Update preprocess.py --- icu_benchmarks/data/preprocess.py | 1 - 1 file changed, 1 deletion(-) diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py index 6f573d1d..96a6abfe 100644 --- a/icu_benchmarks/data/preprocess.py +++ b/icu_benchmarks/data/preprocess.py @@ -50,7 +50,6 @@ def make_single_split( stays = stays.sample(frac=0.01, random_state=seed) labels = data["OUTCOME"][vars["LABEL"]].loc[stays.index] - outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed) dev, test = list(outer_CV.split(stays, labels))[repetition_index] From e1b3fe12cc3e3941ec7b35f849f0296ddd0dc214 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 15:53:23 +0100 Subject: [PATCH 084/163] Update preprocess.py --- icu_benchmarks/data/preprocess.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py index 96a6abfe..27427bdc 100644 --- a/icu_benchmarks/data/preprocess.py +++ b/icu_benchmarks/data/preprocess.py @@ -51,6 +51,8 @@ def make_single_split( labels = data["OUTCOME"][vars["LABEL"]].loc[stays.index] outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed) + inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed) + dev, test = list(outer_CV.split(stays, labels))[repetition_index] if fold_size: @@ -71,7 +73,6 @@ def make_single_split( dev_stays = stays.iloc[dev] dev_labels = labels.iloc[dev] - inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed) train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index] split = { From 94969b713f72be8d10d31174a9ca176b660ce48f Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 16:33:15 +0100 Subject: [PATCH 085/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 11e6d40b..fd0678f3 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -18,8 +18,6 @@ writer.writeheader() for model in endpoint.iterdir(): - if model.name == "LSTM": - continue for target in model.iterdir(): target_sizes = ['target_500', 'target_1000', 'target_2000'] for target_size in target_sizes: From c647be980def6de8a45072d1ca4ad6c09cc21f9c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 21:04:52 +0100 Subject: [PATCH 086/163] fix comments --- icu_benchmarks/models/domain_adaptation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 5fe7461c..168ec07c 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -177,7 +177,7 @@ def domain_adaptation( log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" log_dir_fold.mkdir(parents=True, exist_ok=True) - # evaluate target baselines + # train target model target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) test_predictions, test_labels = get_predictions_for_all_models( @@ -189,10 +189,10 @@ def domain_adaptation( source_datasets=source_datasets, ) - # evaluate source baselines for baseline, predictions in test_predictions.items(): logging.info("Evaluating model: {}".format(baseline)) fold_results[baseline] = calculate_metrics(predictions, test_labels) + # evaluate baselines # evaluate convex combination of models test_predictions_list = list(test_predictions.values()) From 7d54d7ea6354126742dc7b5b9e43bbee43964dc6 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 21:05:39 +0100 Subject: [PATCH 087/163] test different weights --- icu_benchmarks/models/domain_adaptation.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 168ec07c..a73775c3 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -136,9 +136,19 @@ def domain_adaptation( cv_folds = 5 cv_folds_to_train = 5 target_sizes = [500, 1000, 2000] - datasets = ["hirid", "aumc", "eicu", "miiv"] + datasets = ["miiv", "aumc", "eicu", "miiv"] target_weights = [0.1, 0.2, 0.5, 1, 2, 5] - weights = [1] * (len(datasets) - 1) + target_weights = [0.1, 0.2, 0.5, 1, 2, 5] + # weights = [1] * (len(datasets) - 1) + weights = [ + [0, 1, 2, 1], + [0, 1, 5, 1], + [0, 1, 10, 1], + [1, 1, 1, 1], + [1, 1, 2, 1], + [1, 1, 5, 1], + [1, 1, 10, 1], + ] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") gin_config_before_tuning = gin.config_str() @@ -203,10 +213,11 @@ def domain_adaptation( fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) logging.info("Evaluating convex combination of models.") - for t in target_weights: - w = [t * sum(weights)] + weights + for w in weights: + # w = weights + [t * sum(weights)] logging.info(f"Evaluating target weight: {t}") - test_pred = np.average(test_predictions_list, axis=0, weights=w) + logging.info(f"Evaluating weights: {w}") + test_pred = np.average(source_predictions_with_target, axis=0, weights=w) fold_results[f"convex_combination_{t}"] = calculate_metrics(test_pred, test_labels) log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) From c6a54cec2ec430ccd734777b239a63fbf15a452c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 21:11:16 +0100 Subject: [PATCH 088/163] only plot avg --- scripts/results/da_to_csv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index fd0678f3..75281df1 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -11,7 +11,9 @@ info = ["model", "target", "target_size"] source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5'] stats_basis = ['avg', 'std', 'CI_0.95'] + stats_basis = ['avg'] stats = ['avg', 'std', 'CI_0.95_min', 'CI_0.95_max'] + stats = ['avg'] # combine fieldnames and stats full_fields = [f'{source}_{stat}' for source in source_names for stat in stats] writer = csv.DictWriter(csv_file, fieldnames=info+full_fields) From ac1a7bb3f873f4a3f51c1fb23afb219351bf3f5d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 21:15:28 +0100 Subject: [PATCH 089/163] test other weights --- icu_benchmarks/models/domain_adaptation.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index a73775c3..36ff8ad5 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -136,8 +136,7 @@ def domain_adaptation( cv_folds = 5 cv_folds_to_train = 5 target_sizes = [500, 1000, 2000] - datasets = ["miiv", "aumc", "eicu", "miiv"] - target_weights = [0.1, 0.2, 0.5, 1, 2, 5] + datasets = ["aumc", "eicu", "hirid", "miiv"] target_weights = [0.1, 0.2, 0.5, 1, 2, 5] # weights = [1] * (len(datasets) - 1) weights = [ @@ -209,16 +208,16 @@ def domain_adaptation( test_predictions_list_without_target = test_predictions_list[1:] logging.info("Evaluating convex combination of models without target.") - test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights) + test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1]) fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) logging.info("Evaluating convex combination of models.") for w in weights: # w = weights + [t * sum(weights)] - logging.info(f"Evaluating target weight: {t}") + # logging.info(f"Evaluating target weight: {t}") logging.info(f"Evaluating weights: {w}") - test_pred = np.average(source_predictions_with_target, axis=0, weights=w) - fold_results[f"convex_combination_{t}"] = calculate_metrics(test_pred, test_labels) + test_pred = np.average(test_predictions_list, axis=0, weights=w) + fold_results[f"convex_combination_{w}"] = calculate_metrics(test_pred, test_labels) log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) From da3fce1d361bd54602ae13d927cdac6e0bf12d34 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 21:19:33 +0100 Subject: [PATCH 090/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 36ff8ad5..8d247c0d 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -163,7 +163,7 @@ def domain_adaptation( gin.bind_parameter("preprocess.fold_size", target_size) log_dir = run_dir / task / model / dataset / f"target_{target_size}" log_dir.mkdir(parents=True, exist_ok=True) - choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) + choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug) gin_config_with_target_hyperparameters = gin.config_str() results = {} for repetition in range(cv_repetitions_to_train): From c0f8c392c267b5eddb61398f1eb64a371c24122e Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 22:07:53 +0100 Subject: [PATCH 091/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 75281df1..346b3126 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -20,10 +20,10 @@ writer.writeheader() for model in endpoint.iterdir(): - for target in model.iterdir(): + for target in ["aumc", "eicu", "hirid", "miiv"]: target_sizes = ['target_500', 'target_1000', 'target_2000'] for target_size in target_sizes: - with open(target / target_size / 'averaged_source_metrics.json', 'r') as f: + with open(model / target / target_size / 'averaged_source_metrics.json', 'r') as f: results = json.load(f) row_data = { From 7d91bac44c99c478aecaf4f49d05dcf3f8c6000b Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 14 Jan 2023 22:10:27 +0100 Subject: [PATCH 092/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 346b3126..8d625cc2 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -28,7 +28,7 @@ row_data = { 'model': model.name, - 'target': target.name, + 'target': target, 'target_size': target_size } for stat in stats_basis: From 4c038854ff57e5651194c70e68cd7170ce439ceb Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 00:49:02 +0100 Subject: [PATCH 093/163] auc and loss based weigth functions --- icu_benchmarks/models/domain_adaptation.py | 175 ++++++++++++++++----- 1 file changed, 134 insertions(+), 41 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 8d247c0d..5ec3b687 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -1,3 +1,4 @@ +import inspect import json import os import random @@ -8,6 +9,7 @@ import pandas as pd from pathlib import Path import scipy.stats as stats +from sklearn.metrics import log_loss, roc_auc_score from icu_benchmarks.data.loader import RICUDataset from icu_benchmarks.data.preprocess import preprocess_data @@ -19,22 +21,13 @@ from icu_benchmarks.run_utils import log_full_line -def get_predictions_for_single_model(target_model: object, dataset: RICUDataset, model_dir: Path, log_dir: Path): - """Get predictions for a single model. - - Args: - target_model: Model to get predictions for. - dataset: Dataset to get predictions for. - model_dir: Path to directory where model weights are stored. - log_dir: Path to directory where model output should be saved. - - Returns: - Tuple of predictions and labels. - """ +def load_model(model_dir: Path, log_dir: Path): + """Load model from gin config.""" gin.parse_config_file(model_dir / "train_config.gin") - if isinstance(target_model, DLWrapper): + model_type = gin.query_parameter("train_common.model") + if str(model_type) == "@DLWrapper()": model = DLWrapper() - else: + elif str(model_type) == "@MLWrapper()": model = MLWrapper() model.set_log_dir(log_dir) if (model_dir / "model.torch").is_file(): @@ -45,7 +38,22 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset, model.load_weights(model_dir / "model.joblib") else: raise Exception("No weights to load at path : {}".format(model_dir / "model.*")) - logging.info(f"Generating predictions for model : {model_dir}") + return model + + +def get_predictions_for_single_model(dataset: RICUDataset, model_dir: Path, log_dir: Path): + """Get predictions for a single model. + + Args: + target_model: Model to get predictions for. + dataset: Dataset to get predictions for. + model_dir: Path to directory where model weights are stored. + log_dir: Path to directory where model output should be saved. + + Returns: + Tuple of predictions and labels. + """ + model = load_model(model_dir, log_dir) return model.predict(dataset, None, None) @@ -55,8 +63,8 @@ def calculate_metrics(predictions: np.ndarray, labels: np.ndarray): value = metric(labels, predictions) metric_results[name] = value # Only log float values - if isinstance(value, np.float): - logging.info("Test {}: {}".format(name, value)) + # if isinstance(value, np.float): + # logging.info("Test {}: {}".format(name, value)) return metric_results @@ -95,11 +103,10 @@ def get_predictions_for_all_models( _, test_labels = test_dataset.get_data_and_labels() test_predictions = {} - logging.info("Generating predictions for target") test_predictions["target"] = target_model.predict(test_dataset, None, None) for source in source_datasets: model_dir = source_dir / source - test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir) + test_predictions[model_dir.name] = get_predictions_for_single_model(test_dataset, model_dir, log_dir) for name, prediction in test_predictions.items(): if isinstance(target_model, MLWrapper) and prediction.ndim == 2: @@ -139,14 +146,23 @@ def domain_adaptation( datasets = ["aumc", "eicu", "hirid", "miiv"] target_weights = [0.1, 0.2, 0.5, 1, 2, 5] # weights = [1] * (len(datasets) - 1) - weights = [ - [0, 1, 2, 1], - [0, 1, 5, 1], - [0, 1, 10, 1], - [1, 1, 1, 1], - [1, 1, 2, 1], - [1, 1, 5, 1], - [1, 1, 10, 1], + auc_functions = [ + lambda x: (x-0.5) ** 1, + lambda x: (x-0.5) ** 2, + lambda x: (x-0.5) ** 3, + lambda x: (x-0.5) ** 4, + lambda x: (x-0.5) ** 5, + lambda x: ((2 ** (10*(x-0.5))) - 1), + lambda x: ((3 ** (10*(x-0.5))) - 1), + ] + loss_functions = [ + lambda x: (1-x) ** 1, + lambda x: (1-x) ** 2, + lambda x: (1-x) ** 3, + lambda x: (1-x) ** 4, + lambda x: (1-x) ** 5, + lambda x: ((2 ** (10*(1-x))) - 1), + lambda x: ((3 ** (10*(1-x))) - 1), ] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") @@ -163,12 +179,12 @@ def domain_adaptation( gin.bind_parameter("preprocess.fold_size", target_size) log_dir = run_dir / task / model / dataset / f"target_{target_size}" log_dir.mkdir(parents=True, exist_ok=True) - choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug) - gin_config_with_target_hyperparameters = gin.config_str() + # choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug) + # gin_config_with_target_hyperparameters = gin.config_str() results = {} for repetition in range(cv_repetitions_to_train): for fold_index in range(cv_folds_to_train): - gin.parse_config(gin_config_with_target_hyperparameters) + # gin.parse_config(gin_config_with_target_hyperparameters) results[f"{repetition}_{fold_index}"] = {} fold_results = results[f"{repetition}_{fold_index}"] @@ -187,8 +203,26 @@ def domain_adaptation( log_dir_fold.mkdir(parents=True, exist_ok=True) # train target model - target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) + # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) + target_model = load_model(Path("../yaib_logs/DA") / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold) + val_predictions, val_labels = get_predictions_for_all_models( + target_model, + data, + log_dir_fold, + source_dir=model_path / task / model, + seed=seed, + source_datasets=source_datasets, + test_on="val", + ) + val_losses = {} + val_aucs = {} + val_losses["target"] = log_loss(val_labels, val_predictions["target"]) + val_aucs["target"] = roc_auc_score(val_labels, val_predictions["target"]) + for baseline, predictions in val_predictions.items(): + val_losses[baseline] = log_loss(val_labels, predictions) + val_aucs[baseline] = roc_auc_score(val_labels, predictions) + test_predictions, test_labels = get_predictions_for_all_models( target_model, data, @@ -199,7 +233,7 @@ def domain_adaptation( ) for baseline, predictions in test_predictions.items(): - logging.info("Evaluating model: {}".format(baseline)) + # logging.info("Evaluating model: {}".format(baseline)) fold_results[baseline] = calculate_metrics(predictions, test_labels) # evaluate baselines @@ -207,19 +241,78 @@ def domain_adaptation( test_predictions_list = list(test_predictions.values()) test_predictions_list_without_target = test_predictions_list[1:] - logging.info("Evaluating convex combination of models without target.") + # logging.info("Evaluating convex combination of models without target.") test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1]) fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) - logging.info("Evaluating convex combination of models.") - for w in weights: - # w = weights + [t * sum(weights)] - # logging.info(f"Evaluating target weight: {t}") - logging.info(f"Evaluating weights: {w}") - test_pred = np.average(test_predictions_list, axis=0, weights=w) - fold_results[f"convex_combination_{w}"] = calculate_metrics(test_pred, test_labels) + # logging.info("Evaluating convex combination of models.") + # for w in weights: + # # w = weights + [t * sum(weights)] + # # logging.info(f"Evaluating target weight: {t}") + # logging.info(f"Evaluating weights: {w}") + # test_pred = np.average(test_predictions_list, axis=0, weights=w) + # fold_results[f"convex_combination_{w}"] = calculate_metrics(test_pred, test_labels) + + # find top three auc functions + rated_auc_functions = [] + for f in auc_functions: + f_str = inspect.getsource(f).replace(" ", "")[:-2] + # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.") + weights = [f(x) for x in val_aucs.values()] + # logging.info(f"weights: {weights}") + test_pred = np.average(test_predictions_list, axis=0, weights=weights) + fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels) + rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"])) + rated_auc_functions.sort(key=lambda x: x[1], reverse=True) + - log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) + # find top three loss functions + rated_loss_functions = [] + for f in loss_functions: + # strip whitespace + f_str = inspect.getsource(f).replace(" ", "")[:-2] + # logging.info(f"Evaluating convex combination of models with loss function {f_str}.") + weights = [f(x) for x in val_losses.values()] + # logging.info(f"losses: {val_losses.values()}") + # logging.info(f"weights: {weights}") + test_pred = np.average(test_predictions_list, axis=0, weights=weights) + fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels) + rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"])) + rated_loss_functions.sort(key=lambda x: x[1], reverse=True) + + # logging.info(f"Top three AUC functions: {rated_auc_functions[:3]}") + # logging.info(f"Top three loss functions: {rated_loss_functions[:3]}") + + log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) + # average results over folds + agg_aucs = {} + for fold_results in results.values(): + for source, metrics in fold_results.items(): + agg_aucs.setdefault(source, []).append(metrics["AUC"]) + + avg_aucs = {} + for source, aucs in agg_aucs.items(): + avg_aucs[source] = np.mean(aucs) + + # print baselines first, then top three AUC, then top three loss + for source, auc in avg_aucs.items(): + if source in ["target", "convex_combination_without_target"] + datasets: + logging.info(f"{source}: {auc}") + avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True) + i = 0 + for source, auc in avg_aucs_list: + if "AUC" in source: + i += 1 + logging.info(f"{source}: {auc}") + if i == 3: + break + i = 0 + for source, auc in avg_aucs_list: + if "loss" in source: + i += 1 + logging.info(f"{source}: {auc}") + if i == 3: + break log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) source_metrics = {} From 464ad05ddab6173d6f180d3aa42dd3c2535e6ec6 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 00:49:36 +0100 Subject: [PATCH 094/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 5ec3b687..16e155d6 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -204,7 +204,7 @@ def domain_adaptation( # train target model # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) - target_model = load_model(Path("../yaib_logs/DA") / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold) + target_model = load_model(Path("../DA_logs/") / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold) val_predictions, val_labels = get_predictions_for_all_models( target_model, From 45932cfd344dc8e01e5dad2ad1a8951f1786c8a4 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 01:05:39 +0100 Subject: [PATCH 095/163] cache predictions --- icu_benchmarks/models/domain_adaptation.py | 54 ++++++++++++++-------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 16e155d6..156df6f8 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -166,6 +166,7 @@ def domain_adaptation( ] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") + old_run_dir = Path("../DA_logs/") gin_config_before_tuning = gin.config_str() # evaluate models on same test split @@ -204,17 +205,25 @@ def domain_adaptation( # train target model # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) - target_model = load_model(Path("../DA_logs/") / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold) + target_model = load_model(old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold) - val_predictions, val_labels = get_predictions_for_all_models( - target_model, - data, - log_dir_fold, - source_dir=model_path / task / model, - seed=seed, - source_datasets=source_datasets, - test_on="val", - ) + # generate predictions and write to file if not already done + if not (log_dir_fold / "val_predictions.json").exists(): + val_predictions, val_labels = get_predictions_for_all_models( + target_model, + data, + log_dir_fold, + source_dir=model_path / task / model, + seed=seed, + source_datasets=source_datasets, + test_on="val", + ) + with open(log_dir_fold / "val_predictions.json", "w") as f: + json.dump(val_predictions, f, cls=JsonResultLoggingEncoder) + else: + with open(log_dir_fold / "val_predictions.json", "r") as f: + val_predictions = json.load(f) + _, val_labels = RICUDataset(data, split="val").get_data_and_labels() val_losses = {} val_aucs = {} val_losses["target"] = log_loss(val_labels, val_predictions["target"]) @@ -223,14 +232,23 @@ def domain_adaptation( val_losses[baseline] = log_loss(val_labels, predictions) val_aucs[baseline] = roc_auc_score(val_labels, predictions) - test_predictions, test_labels = get_predictions_for_all_models( - target_model, - data, - log_dir_fold, - source_dir=model_path / task / model, - seed=seed, - source_datasets=source_datasets, - ) + # generate predictions and write to file if not already done + if not (log_dir_fold / "test_predictions.json").exists(): + test_predictions, test_labels = get_predictions_for_all_models( + target_model, + data, + log_dir_fold, + source_dir=model_path / task / model, + seed=seed, + source_datasets=source_datasets, + ) + with open(log_dir_fold / "test_predictions.json", "w") as f: + json.dump(test_predictions, f, cls=JsonResultLoggingEncoder) + else: + with open(log_dir_fold / "test_predictions.json", "r") as f: + test_predictions = json.load(f) + _, test_labels = RICUDataset(data, split="test").get_data_and_labels() + for baseline, predictions in test_predictions.items(): # logging.info("Evaluating model: {}".format(baseline)) From be5ad300cb0202d9b075c25a6d4dc65100222f44 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 01:38:24 +0100 Subject: [PATCH 096/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 156df6f8..527fb9b0 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -231,6 +231,7 @@ def domain_adaptation( for baseline, predictions in val_predictions.items(): val_losses[baseline] = log_loss(val_labels, predictions) val_aucs[baseline] = roc_auc_score(val_labels, predictions) + logging.info("Validation losses: %s", val_losses) # generate predictions and write to file if not already done if not (log_dir_fold / "test_predictions.json").exists(): From 5c8265a2c334f755ebb05a01d0c139b7af358cab Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 02:47:15 +0100 Subject: [PATCH 097/163] Update preprocess.py --- icu_benchmarks/data/preprocess.py | 1 + 1 file changed, 1 insertion(+) diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py index 27427bdc..11aec03e 100644 --- a/icu_benchmarks/data/preprocess.py +++ b/icu_benchmarks/data/preprocess.py @@ -123,6 +123,7 @@ def preprocess_data( cv_folds: int = 5, fold_size: int = None, fold_index: int = 0, + test_all: bool = False, ) -> dict[dict[pd.DataFrame]]: """Perform loading, splitting, imputing and normalising of task data. From a834354284589831f26a1caf8f962f262ae0004e Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 02:53:09 +0100 Subject: [PATCH 098/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 527fb9b0..8f0ca8ce 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -166,7 +166,7 @@ def domain_adaptation( ] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") - old_run_dir = Path("../DA_logs/") + old_run_dir = Path("../yaib_logs/DA") gin_config_before_tuning = gin.config_str() # evaluate models on same test split @@ -280,6 +280,9 @@ def domain_adaptation( weights = [f(x) for x in val_aucs.values()] # logging.info(f"weights: {weights}") test_pred = np.average(test_predictions_list, axis=0, weights=weights) + print(f_str) + print(test_pred.min()) + print(test_pred.max()) fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels) rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"])) rated_auc_functions.sort(key=lambda x: x[1], reverse=True) From 5d6e273bc3411b0179ab5734eccf14bf58f51fb8 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 02:53:43 +0100 Subject: [PATCH 099/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 8f0ca8ce..d360783f 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -166,7 +166,7 @@ def domain_adaptation( ] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") - old_run_dir = Path("../yaib_logs/DA") + old_run_dir = Path("../DA_logs") gin_config_before_tuning = gin.config_str() # evaluate models on same test split From 5ba94281476e43d8be5c6f50eb935475d26abb98 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 02:55:14 +0100 Subject: [PATCH 100/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index d360783f..ac7871e0 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -254,6 +254,8 @@ def domain_adaptation( for baseline, predictions in test_predictions.items(): # logging.info("Evaluating model: {}".format(baseline)) fold_results[baseline] = calculate_metrics(predictions, test_labels) + print(test_predictions.min()) + print(test_predictions.max()) # evaluate baselines # evaluate convex combination of models From a8b202129500a5073dbf7aee48bf377c2e13bf54 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 02:55:39 +0100 Subject: [PATCH 101/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index ac7871e0..b56c183d 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -254,8 +254,8 @@ def domain_adaptation( for baseline, predictions in test_predictions.items(): # logging.info("Evaluating model: {}".format(baseline)) fold_results[baseline] = calculate_metrics(predictions, test_labels) - print(test_predictions.min()) - print(test_predictions.max()) + print(predictions.min()) + print(predictions.max()) # evaluate baselines # evaluate convex combination of models From 0c97842330b913f9ea1b66a03439682f1c1399bf Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 02:56:29 +0100 Subject: [PATCH 102/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index b56c183d..fcbb3b0e 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -254,8 +254,8 @@ def domain_adaptation( for baseline, predictions in test_predictions.items(): # logging.info("Evaluating model: {}".format(baseline)) fold_results[baseline] = calculate_metrics(predictions, test_labels) - print(predictions.min()) - print(predictions.max()) + print(min(predictions)) + print(max(predictions)) # evaluate baselines # evaluate convex combination of models From 95addb712cb40a06da56d0ebb145d52528e1d125 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 02:57:52 +0100 Subject: [PATCH 103/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index fcbb3b0e..b5ecb056 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -285,6 +285,8 @@ def domain_adaptation( print(f_str) print(test_pred.min()) print(test_pred.max()) + print(weights) + fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels) rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"])) rated_auc_functions.sort(key=lambda x: x[1], reverse=True) From bbb693939494cb671717535018b1f085ad85826e Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 03:02:13 +0100 Subject: [PATCH 104/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index b5ecb056..a8a63348 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -231,6 +231,7 @@ def domain_adaptation( for baseline, predictions in val_predictions.items(): val_losses[baseline] = log_loss(val_labels, predictions) val_aucs[baseline] = roc_auc_score(val_labels, predictions) + logging.info("Validation AUCS: %s", val_aucs) logging.info("Validation losses: %s", val_losses) # generate predictions and write to file if not already done @@ -282,6 +283,7 @@ def domain_adaptation( weights = [f(x) for x in val_aucs.values()] # logging.info(f"weights: {weights}") test_pred = np.average(test_predictions_list, axis=0, weights=weights) + test_pred = (test_pred-np.min(test_pred))/(np.max(test_pred)-np.min(test_pred)) print(f_str) print(test_pred.min()) print(test_pred.max()) From da34511f884470dd8b79830cdd9f1a56b0bb2104 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 03:13:13 +0100 Subject: [PATCH 105/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index a8a63348..6039dbac 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -281,12 +281,9 @@ def domain_adaptation( f_str = inspect.getsource(f).replace(" ", "")[:-2] # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.") weights = [f(x) for x in val_aucs.values()] + weights.clip(min=0) # logging.info(f"weights: {weights}") - test_pred = np.average(test_predictions_list, axis=0, weights=weights) - test_pred = (test_pred-np.min(test_pred))/(np.max(test_pred)-np.min(test_pred)) print(f_str) - print(test_pred.min()) - print(test_pred.max()) print(weights) fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels) From bf6c6b9b392d3978de31489d2b1abd6c038bdbf7 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 03:13:49 +0100 Subject: [PATCH 106/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 6039dbac..93e793a0 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -280,7 +280,7 @@ def domain_adaptation( for f in auc_functions: f_str = inspect.getsource(f).replace(" ", "")[:-2] # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.") - weights = [f(x) for x in val_aucs.values()] + weights = np.array([f(x) for x in val_aucs.values()]) weights.clip(min=0) # logging.info(f"weights: {weights}") print(f_str) From 06e8e1ac5816fac7c60efdd3f89e020916c5c93a Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 03:14:51 +0100 Subject: [PATCH 107/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 93e793a0..3188c320 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -281,7 +281,8 @@ def domain_adaptation( f_str = inspect.getsource(f).replace(" ", "")[:-2] # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.") weights = np.array([f(x) for x in val_aucs.values()]) - weights.clip(min=0) + weights = weights.clip(min=0) + test_pred = np.average(test_predictions_list, axis=0, weights=weights) # logging.info(f"weights: {weights}") print(f_str) print(weights) From 0df060ccc62186232c656a0e75b110a8d1b53d9e Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 03:15:23 +0100 Subject: [PATCH 108/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 3188c320..0616cdcd 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -283,7 +283,7 @@ def domain_adaptation( weights = np.array([f(x) for x in val_aucs.values()]) weights = weights.clip(min=0) test_pred = np.average(test_predictions_list, axis=0, weights=weights) - # logging.info(f"weights: {weights}") + logging.info(f"weights: {weights}") print(f_str) print(weights) From 52f6090756aa0fac341076d9a5c2a1eab85b885b Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 03:48:17 +0100 Subject: [PATCH 109/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 114 ++++++++++++--------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 0616cdcd..0855655a 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -166,6 +166,7 @@ def domain_adaptation( ] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") + # old_run_dir = Path("../yaib_logs/DA") old_run_dir = Path("../DA_logs") gin_config_before_tuning = gin.config_str() @@ -184,6 +185,8 @@ def domain_adaptation( # gin_config_with_target_hyperparameters = gin.config_str() results = {} for repetition in range(cv_repetitions_to_train): + agg_val_losses = [] + agg_val_aucs = [] for fold_index in range(cv_folds_to_train): # gin.parse_config(gin_config_with_target_hyperparameters) results[f"{repetition}_{fold_index}"] = {} @@ -255,8 +258,6 @@ def domain_adaptation( for baseline, predictions in test_predictions.items(): # logging.info("Evaluating model: {}".format(baseline)) fold_results[baseline] = calculate_metrics(predictions, test_labels) - print(min(predictions)) - print(max(predictions)) # evaluate baselines # evaluate convex combination of models @@ -267,6 +268,9 @@ def domain_adaptation( test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1]) fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) + agg_val_losses.append(val_losses) + agg_val_aucs.append(val_aucs) + # logging.info("Evaluating convex combination of models.") # for w in weights: # # w = weights + [t * sum(weights)] @@ -275,41 +279,53 @@ def domain_adaptation( # test_pred = np.average(test_predictions_list, axis=0, weights=w) # fold_results[f"convex_combination_{w}"] = calculate_metrics(test_pred, test_labels) - # find top three auc functions - rated_auc_functions = [] - for f in auc_functions: - f_str = inspect.getsource(f).replace(" ", "")[:-2] - # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.") - weights = np.array([f(x) for x in val_aucs.values()]) - weights = weights.clip(min=0) - test_pred = np.average(test_predictions_list, axis=0, weights=weights) - logging.info(f"weights: {weights}") - print(f_str) - print(weights) - - fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels) - rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"])) - rated_auc_functions.sort(key=lambda x: x[1], reverse=True) - - - # find top three loss functions - rated_loss_functions = [] - for f in loss_functions: - # strip whitespace - f_str = inspect.getsource(f).replace(" ", "")[:-2] - # logging.info(f"Evaluating convex combination of models with loss function {f_str}.") - weights = [f(x) for x in val_losses.values()] - # logging.info(f"losses: {val_losses.values()}") - # logging.info(f"weights: {weights}") - test_pred = np.average(test_predictions_list, axis=0, weights=weights) - fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels) - rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"])) - rated_loss_functions.sort(key=lambda x: x[1], reverse=True) - # logging.info(f"Top three AUC functions: {rated_auc_functions[:3]}") # logging.info(f"Top three loss functions: {rated_loss_functions[:3]}") - log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) + log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) + + avg_val_losses = np.array([np.mean([x[source] for x in agg_val_losses]) for source in val_losses.keys()]) + avg_val_aucs = {source: np.mean([x[source] for x in agg_val_aucs]) for source in val_aucs.keys()} + logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses))) + logging.info("Average validation AUCs: %s", dict(zip(val_aucs.keys(), avg_val_aucs))) + + scaled_losses = 0.9 * avg_val_losses / np.max(avg_val_losses) + logging.info(f"scaled_losses: {scaled_losses}") + + # find top three auc functions + rated_auc_functions = [] + for f in auc_functions: + f_str = inspect.getsource(f).replace(" ", "")[:-2] + # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.") + weights = np.array([f(x) for x in avg_val_aucs.values()]) + weights = weights.clip(min=0) + test_pred = np.average(test_predictions_list, axis=0, weights=weights) + # logging.info(f"weights: {weights}") + + fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels) + rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"])) + rated_auc_functions.sort(key=lambda x: x[1], reverse=True) + # print top three auc functions + for f_str, auc in rated_auc_functions[:3]: + logging.info(f"{f_str}: {auc}") + + + # find top three loss functions + rated_loss_functions = [] + for f in loss_functions: + # strip whitespace + f_str = inspect.getsource(f).replace(" ", "")[:-2] + # logging.info(f"Evaluating convex combination of models with loss function {f_str}.") + weights = [f(x) for x in scaled_losses] + logging.info(f"weights: {weights}") + test_pred = np.average(test_predictions_list, axis=0, weights=weights) + fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels) + rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"])) + rated_loss_functions.sort(key=lambda x: x[1], reverse=True) + for f_str, auc in rated_auc_functions[:3]: + logging.info(f"{f_str}: {auc}") + + # average results over folds agg_aucs = {} for fold_results in results.values(): @@ -324,21 +340,21 @@ def domain_adaptation( for source, auc in avg_aucs.items(): if source in ["target", "convex_combination_without_target"] + datasets: logging.info(f"{source}: {auc}") - avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True) - i = 0 - for source, auc in avg_aucs_list: - if "AUC" in source: - i += 1 - logging.info(f"{source}: {auc}") - if i == 3: - break - i = 0 - for source, auc in avg_aucs_list: - if "loss" in source: - i += 1 - logging.info(f"{source}: {auc}") - if i == 3: - break + # avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True) + # i = 0 + # for source, auc in avg_aucs_list: + # if "AUC" in source: + # i += 1 + # logging.info(f"{source}: {auc}") + # if i == 3: + # break + # i = 0 + # for source, auc in avg_aucs_list: + # if "loss" in source: + # i += 1 + # logging.info(f"{source}: {auc}") + # if i == 3: + # break log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) source_metrics = {} From a9c83c1e192f324e6ff4c88c4aaa7ef2e17aabd1 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 03:52:09 +0100 Subject: [PATCH 110/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 0855655a..bc77d7c8 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -322,7 +322,7 @@ def domain_adaptation( fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels) rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"])) rated_loss_functions.sort(key=lambda x: x[1], reverse=True) - for f_str, auc in rated_auc_functions[:3]: + for f_str, auc in rated_loss_functions[:3]: logging.info(f"{f_str}: {auc}") From 3db23a42b7e33150440696beb5b9f64f5e9bc9b6 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 04:18:19 +0100 Subject: [PATCH 111/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index bc77d7c8..18c0980a 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -289,7 +289,7 @@ def domain_adaptation( logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses))) logging.info("Average validation AUCs: %s", dict(zip(val_aucs.keys(), avg_val_aucs))) - scaled_losses = 0.9 * avg_val_losses / np.max(avg_val_losses) + scaled_losses = np.array(0.9 * avg_val_losses / np.max(avg_val_losses)) logging.info(f"scaled_losses: {scaled_losses}") # find top three auc functions @@ -325,6 +325,12 @@ def domain_adaptation( for f_str, auc in rated_loss_functions[:3]: logging.info(f"{f_str}: {auc}") + # evaluate source only mixture + logging.info("Evaluating loss weighted source only mixture.") + loss_based_weights = 1 - scaled_losses[1:] + test_pred = np.average(test_predictions_list_without_target, axis=0, weights=loss_based_weights) + fold_results[f"loss_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels) + logging.info(f"auc: {fold_results[f'loss_based_source_only_mixture']['AUC']}") # average results over folds agg_aucs = {} From 88d5ce59a394f4de7225dc4df4c6dcbc99725988 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 04:29:03 +0100 Subject: [PATCH 112/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 18c0980a..0edf6108 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -317,7 +317,7 @@ def domain_adaptation( f_str = inspect.getsource(f).replace(" ", "")[:-2] # logging.info(f"Evaluating convex combination of models with loss function {f_str}.") weights = [f(x) for x in scaled_losses] - logging.info(f"weights: {weights}") + # logging.info(f"weights: {weights}") test_pred = np.average(test_predictions_list, axis=0, weights=weights) fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels) rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"])) @@ -331,6 +331,12 @@ def domain_adaptation( test_pred = np.average(test_predictions_list_without_target, axis=0, weights=loss_based_weights) fold_results[f"loss_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels) logging.info(f"auc: {fold_results[f'loss_based_source_only_mixture']['AUC']}") + + logging.info("Evaluating auc weighted source only mixture.") + auc_based_weights = [avg_val_aucs.values() - 0.5][1:] ** 2 + test_pred = np.average(test_predictions_list_without_target, axis=0, weights=auc_based_weights) + fold_results[f"auc_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels) + logging.info(f"auc: {fold_results[f'auc_based_source_only_mixture']['AUC']}") # average results over folds agg_aucs = {} From ac2eac1b7b2ea7a14f4bab07b2a1b23108ce9a79 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 04:31:45 +0100 Subject: [PATCH 113/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 0edf6108..7c935ed4 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -333,7 +333,7 @@ def domain_adaptation( logging.info(f"auc: {fold_results[f'loss_based_source_only_mixture']['AUC']}") logging.info("Evaluating auc weighted source only mixture.") - auc_based_weights = [avg_val_aucs.values() - 0.5][1:] ** 2 + auc_based_weights = (np.array(list(avg_val_aucs.values())) - 0.5)[1:] ** 2 test_pred = np.average(test_predictions_list_without_target, axis=0, weights=auc_based_weights) fold_results[f"auc_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels) logging.info(f"auc: {fold_results[f'auc_based_source_only_mixture']['AUC']}") From fcc811f12714cb90e7cd61ee60f694d104b34f1d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 14:27:34 +0100 Subject: [PATCH 114/163] test target with predictions --- icu_benchmarks/models/domain_adaptation.py | 42 +++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 7c935ed4..8fda4ae9 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -209,6 +209,17 @@ def domain_adaptation( # train target model # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) target_model = load_model(old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold) + + # get predictions for train set + train_predictions, train_labels = get_predictions_for_all_models( + target_model, + data, + log_dir_fold, + source_dir=model_path / task / model, + seed=seed, + source_datasets=source_datasets, + test_on="train", + ) # generate predictions and write to file if not already done if not (log_dir_fold / "val_predictions.json").exists(): @@ -255,6 +266,35 @@ def domain_adaptation( _, test_labels = RICUDataset(data, split="test").get_data_and_labels() + + # join predictions with static data and train new model + gin.clear_config() + gin.parse_config(gin_config_before_tuning) + gin.bind_parameter("preprocess.fold_size", target_size) + data_with_predictions = preprocess_data( + data_dir, + seed=seed, + debug=debug, + use_cache=True, + cv_repetitions=cv_repetitions, + repetition_index=repetition, + cv_folds=cv_folds, + fold_index=fold_index, + ) + data_with_predictions["train"]["STATIC"] = data_with_predictions["train"]["STATIC"].join(pd.DataFrame(list(train_predictions.values())[1:]).T) + data_with_predictions["val"]["STATIC"] = data_with_predictions["val"]["STATIC"].join(pd.DataFrame(list(val_predictions.values())[1:]).T) + data_with_predictions["test"]["STATIC"] = data_with_predictions["test"]["STATIC"].join(pd.DataFrame(list(test_predictions.values())[1:]).T) + target_model_with_predictions = MLWrapper() + target_model_with_predictions.set_log_dir(log_dir_fold) + target_model_with_predictions.train(RICUDataset(data_with_predictions, split="train"), RICUDataset(data_with_predictions, split="val"), "balanced", seed) + dataset_with_predictions = RICUDataset(data_with_predictions, split="test") + preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None) + preds_w_preds = preds_w_preds[:, 1] + fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels) + logging.info(f"auc with preds: {fold_results[f'target_with_predictions']['AUC']}") + + + for baseline, predictions in test_predictions.items(): # logging.info("Evaluating model: {}".format(baseline)) fold_results[baseline] = calculate_metrics(predictions, test_labels) @@ -350,7 +390,7 @@ def domain_adaptation( # print baselines first, then top three AUC, then top three loss for source, auc in avg_aucs.items(): - if source in ["target", "convex_combination_without_target"] + datasets: + if source in ["target", "convex_combination_without_target", "target_with_predictions"] + datasets: logging.info(f"{source}: {auc}") # avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True) # i = 0 From f20715c8016f0c94716b64ef28bb63cab4af43d7 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 14:29:37 +0100 Subject: [PATCH 115/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 8fda4ae9..2da69bd7 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -284,7 +284,11 @@ def domain_adaptation( data_with_predictions["train"]["STATIC"] = data_with_predictions["train"]["STATIC"].join(pd.DataFrame(list(train_predictions.values())[1:]).T) data_with_predictions["val"]["STATIC"] = data_with_predictions["val"]["STATIC"].join(pd.DataFrame(list(val_predictions.values())[1:]).T) data_with_predictions["test"]["STATIC"] = data_with_predictions["test"]["STATIC"].join(pd.DataFrame(list(test_predictions.values())[1:]).T) - target_model_with_predictions = MLWrapper() + model_type = gin.query_parameter("train_common.model") + if str(model_type) == "@DLWrapper()": + target_model_with_predictions = DLWrapper() + elif str(model_type) == "@MLWrapper()": + target_model_with_predictions = MLWrapper() target_model_with_predictions.set_log_dir(log_dir_fold) target_model_with_predictions.train(RICUDataset(data_with_predictions, split="train"), RICUDataset(data_with_predictions, split="val"), "balanced", seed) dataset_with_predictions = RICUDataset(data_with_predictions, split="test") From f2b2ac5fc46653cb966f711c095d315a93a8833f Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 14:51:40 +0100 Subject: [PATCH 116/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 2da69bd7..814444f8 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -208,7 +208,8 @@ def domain_adaptation( # train target model # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) - target_model = load_model(old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold) + target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}" + target_model = load_model(target_model_dir, log_dir_fold) # get predictions for train set train_predictions, train_labels = get_predictions_for_all_models( @@ -266,10 +267,10 @@ def domain_adaptation( _, test_labels = RICUDataset(data, split="test").get_data_and_labels() - # join predictions with static data and train new model gin.clear_config() gin.parse_config(gin_config_before_tuning) + gin.parse_config_file(target_model_dir / "train_config.gin") gin.bind_parameter("preprocess.fold_size", target_size) data_with_predictions = preprocess_data( data_dir, From fdfca07cb779046727667a7c39831ba817a35e96 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 14:54:00 +0100 Subject: [PATCH 117/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 814444f8..f097d457 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -269,7 +269,6 @@ def domain_adaptation( # join predictions with static data and train new model gin.clear_config() - gin.parse_config(gin_config_before_tuning) gin.parse_config_file(target_model_dir / "train_config.gin") gin.bind_parameter("preprocess.fold_size", target_size) data_with_predictions = preprocess_data( From 89a5a6c303afc5a60ad219bd1ee2d3578dd310f0 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 14:56:39 +0100 Subject: [PATCH 118/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index f097d457..14f6474e 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -270,6 +270,7 @@ def domain_adaptation( # join predictions with static data and train new model gin.clear_config() gin.parse_config_file(target_model_dir / "train_config.gin") + gin.bind_parameter("Transformer.emb", 103) gin.bind_parameter("preprocess.fold_size", target_size) data_with_predictions = preprocess_data( data_dir, From 8cf0207b75ded5b6b2985a3d8658aa647f7c4258 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 14:58:58 +0100 Subject: [PATCH 119/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 14f6474e..f12a18f8 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -294,7 +294,8 @@ def domain_adaptation( target_model_with_predictions.train(RICUDataset(data_with_predictions, split="train"), RICUDataset(data_with_predictions, split="val"), "balanced", seed) dataset_with_predictions = RICUDataset(data_with_predictions, split="test") preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None) - preds_w_preds = preds_w_preds[:, 1] + if preds_w_preds.shape[1] == 2: + preds_w_preds = preds_w_preds[:, 1] fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels) logging.info(f"auc with preds: {fold_results[f'target_with_predictions']['AUC']}") From d89edbf9776572bd8a4fd72f8907dfd13633f00f Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 15:02:53 +0100 Subject: [PATCH 120/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index f12a18f8..92782396 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -294,7 +294,7 @@ def domain_adaptation( target_model_with_predictions.train(RICUDataset(data_with_predictions, split="train"), RICUDataset(data_with_predictions, split="val"), "balanced", seed) dataset_with_predictions = RICUDataset(data_with_predictions, split="test") preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None) - if preds_w_preds.shape[1] == 2: + if isinstance(target_model_with_predictions, MLWrapper): preds_w_preds = preds_w_preds[:, 1] fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels) logging.info(f"auc with preds: {fold_results[f'target_with_predictions']['AUC']}") From 918890ab1b2d873a594bb3acb39113bae2299ac5 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 15:29:18 +0100 Subject: [PATCH 121/163] test cc with preds --- icu_benchmarks/models/domain_adaptation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 92782396..383fa690 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -298,8 +298,8 @@ def domain_adaptation( preds_w_preds = preds_w_preds[:, 1] fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels) logging.info(f"auc with preds: {fold_results[f'target_with_predictions']['AUC']}") - + for baseline, predictions in test_predictions.items(): # logging.info("Evaluating model: {}".format(baseline)) @@ -314,6 +314,9 @@ def domain_adaptation( test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1]) fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) + test_pred_with_preds = np.average([preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[.5,1,1,1]) + fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels) + agg_val_losses.append(val_losses) agg_val_aucs.append(val_aucs) @@ -396,7 +399,7 @@ def domain_adaptation( # print baselines first, then top three AUC, then top three loss for source, auc in avg_aucs.items(): - if source in ["target", "convex_combination_without_target", "target_with_predictions"] + datasets: + if source in ["target", "convex_combination_without_target", "target_with_predictions", "cc_with_preds"] + datasets: logging.info(f"{source}: {auc}") # avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True) # i = 0 From 871d5622a07101511e418ff5fab23d6713bf054d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 16:29:54 +0100 Subject: [PATCH 122/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 383fa690..8bfb714c 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -271,6 +271,7 @@ def domain_adaptation( gin.clear_config() gin.parse_config_file(target_model_dir / "train_config.gin") gin.bind_parameter("Transformer.emb", 103) + gin.bind_parameter("LSTM.emb", 103) gin.bind_parameter("preprocess.fold_size", target_size) data_with_predictions = preprocess_data( data_dir, From cc837594d8f5beb9d4f0593f7e9539b52b326e8e Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 16:52:06 +0100 Subject: [PATCH 123/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 8bfb714c..effe108a 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -271,7 +271,7 @@ def domain_adaptation( gin.clear_config() gin.parse_config_file(target_model_dir / "train_config.gin") gin.bind_parameter("Transformer.emb", 103) - gin.bind_parameter("LSTM.emb", 103) + gin.bind_parameter("LSTMNet.emb", 103) gin.bind_parameter("preprocess.fold_size", target_size) data_with_predictions = preprocess_data( data_dir, From 89d12a31be52258885a6adcb0356a1cb3c464cb2 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 15 Jan 2023 16:53:32 +0100 Subject: [PATCH 124/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index effe108a..f5028b45 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -271,7 +271,7 @@ def domain_adaptation( gin.clear_config() gin.parse_config_file(target_model_dir / "train_config.gin") gin.bind_parameter("Transformer.emb", 103) - gin.bind_parameter("LSTMNet.emb", 103) + gin.bind_parameter("LSTMNet.input_dim", 103) gin.bind_parameter("preprocess.fold_size", target_size) data_with_predictions = preprocess_data( data_dir, From 6014ea49927ade698c29903d51ba75de8bdd2921 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Mon, 16 Jan 2023 12:42:52 +0100 Subject: [PATCH 125/163] boil down to relevant appraoches --- icu_benchmarks/models/domain_adaptation.py | 254 ++++++--------------- 1 file changed, 75 insertions(+), 179 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index f5028b45..f9dc667b 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -144,26 +144,6 @@ def domain_adaptation( cv_folds_to_train = 5 target_sizes = [500, 1000, 2000] datasets = ["aumc", "eicu", "hirid", "miiv"] - target_weights = [0.1, 0.2, 0.5, 1, 2, 5] - # weights = [1] * (len(datasets) - 1) - auc_functions = [ - lambda x: (x-0.5) ** 1, - lambda x: (x-0.5) ** 2, - lambda x: (x-0.5) ** 3, - lambda x: (x-0.5) ** 4, - lambda x: (x-0.5) ** 5, - lambda x: ((2 ** (10*(x-0.5))) - 1), - lambda x: ((3 ** (10*(x-0.5))) - 1), - ] - loss_functions = [ - lambda x: (1-x) ** 1, - lambda x: (1-x) ** 2, - lambda x: (1-x) ** 3, - lambda x: (1-x) ** 4, - lambda x: (1-x) ** 5, - lambda x: ((2 ** (10*(1-x))) - 1), - lambda x: ((3 ** (10*(1-x))) - 1), - ] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") # old_run_dir = Path("../yaib_logs/DA") @@ -184,9 +164,9 @@ def domain_adaptation( # choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug) # gin_config_with_target_hyperparameters = gin.config_str() results = {} + loss_weighted_results = {} for repetition in range(cv_repetitions_to_train): agg_val_losses = [] - agg_val_aucs = [] for fold_index in range(cv_folds_to_train): # gin.parse_config(gin_config_with_target_hyperparameters) results[f"{repetition}_{fold_index}"] = {} @@ -206,66 +186,67 @@ def domain_adaptation( log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" log_dir_fold.mkdir(parents=True, exist_ok=True) - # train target model - # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) + # load or train target model target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}" - target_model = load_model(target_model_dir, log_dir_fold) - - # get predictions for train set - train_predictions, train_labels = get_predictions_for_all_models( - target_model, - data, - log_dir_fold, - source_dir=model_path / task / model, - seed=seed, - source_datasets=source_datasets, - test_on="train", - ) - - # generate predictions and write to file if not already done - if not (log_dir_fold / "val_predictions.json").exists(): - val_predictions, val_labels = get_predictions_for_all_models( - target_model, - data, - log_dir_fold, - source_dir=model_path / task / model, - seed=seed, - source_datasets=source_datasets, - test_on="val", - ) - with open(log_dir_fold / "val_predictions.json", "w") as f: - json.dump(val_predictions, f, cls=JsonResultLoggingEncoder) + if target_model_dir.exists(): + target_model = load_model(target_model_dir, log_dir_fold) else: - with open(log_dir_fold / "val_predictions.json", "r") as f: - val_predictions = json.load(f) - _, val_labels = RICUDataset(data, split="val").get_data_and_labels() - val_losses = {} - val_aucs = {} + target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) + + def get_preds(split): + if not (log_dir_fold / f"{split}_predictions.json").exists(): + predictions, labels = get_predictions_for_all_models( + target_model, + data, + log_dir_fold, + source_dir=model_path / task / model, + seed=seed, + source_datasets=source_datasets, + test_on=split, + ) + with open(log_dir_fold / f"{split}_predictions.json", "w") as f: + json.dump(predictions, f, cls=JsonResultLoggingEncoder) + else: + with open(log_dir_fold / f"{split}_predictions.json", "r") as f: + predictions = json.load(f) + _, labels = RICUDataset(data, split=split).get_data_and_labels() + return predictions, labels + + # get predictions for train set + train_predictions, train_labels = get_preds("train") + test_predictions, test_labels = get_preds("test") + val_predictions, val_labels = get_preds("val") + val_losses = {baseline: log_loss(val_labels, predictions) for baseline, predictions in val_predictions.items()} val_losses["target"] = log_loss(val_labels, val_predictions["target"]) - val_aucs["target"] = roc_auc_score(val_labels, val_predictions["target"]) - for baseline, predictions in val_predictions.items(): - val_losses[baseline] = log_loss(val_labels, predictions) - val_aucs[baseline] = roc_auc_score(val_labels, predictions) - logging.info("Validation AUCS: %s", val_aucs) - logging.info("Validation losses: %s", val_losses) - - # generate predictions and write to file if not already done - if not (log_dir_fold / "test_predictions.json").exists(): - test_predictions, test_labels = get_predictions_for_all_models( - target_model, - data, - log_dir_fold, - source_dir=model_path / task / model, - seed=seed, - source_datasets=source_datasets, - ) - with open(log_dir_fold / "test_predictions.json", "w") as f: - json.dump(test_predictions, f, cls=JsonResultLoggingEncoder) - else: - with open(log_dir_fold / "test_predictions.json", "r") as f: - test_predictions = json.load(f) - _, test_labels = RICUDataset(data, split="test").get_data_and_labels() + # logging.info("Validation AUCS: %s", val_aucs) + # logging.info("Validation losses: %s", val_losses) + agg_val_losses.append(val_losses) + # evaluate baselines + for baseline, predictions in test_predictions.items(): + # logging.info("Evaluating model: {}".format(baseline)) + fold_results[baseline] = calculate_metrics(predictions, test_labels) + + # evaluate convex combination of models without target + test_predictions_list = list(test_predictions.values()) + test_predictions_list_without_target = test_predictions_list[1:] + test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1]) + fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) + + # evaluate convex combination of models with target + weights = { + "aumc": 10535, + "eicu": 113382, + "hirid": 12859, + "mimic": 52045, + } + weights_without_target = [v for k, v in weights.items() if k != dataset] + target_weights = [0.5, 1, 2] + for t in target_weights: + w = [t * sum(weights_without_target)] + weights_without_target + # logging.info(f"Evaluating target weight: {t}") + test_pred = np.average(test_predictions_list, axis=0, weights=w) + fold_results[f"target_weight_{t}"] = calculate_metrics(test_pred, test_labels) # join predictions with static data and train new model gin.clear_config() @@ -298,95 +279,10 @@ def domain_adaptation( if isinstance(target_model_with_predictions, MLWrapper): preds_w_preds = preds_w_preds[:, 1] fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels) - logging.info(f"auc with preds: {fold_results[f'target_with_predictions']['AUC']}") - - - - for baseline, predictions in test_predictions.items(): - # logging.info("Evaluating model: {}".format(baseline)) - fold_results[baseline] = calculate_metrics(predictions, test_labels) - # evaluate baselines - - # evaluate convex combination of models - test_predictions_list = list(test_predictions.values()) - test_predictions_list_without_target = test_predictions_list[1:] - - # logging.info("Evaluating convex combination of models without target.") - test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1]) - fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) - test_pred_with_preds = np.average([preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[.5,1,1,1]) fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels) - agg_val_losses.append(val_losses) - agg_val_aucs.append(val_aucs) - - # logging.info("Evaluating convex combination of models.") - # for w in weights: - # # w = weights + [t * sum(weights)] - # # logging.info(f"Evaluating target weight: {t}") - # logging.info(f"Evaluating weights: {w}") - # test_pred = np.average(test_predictions_list, axis=0, weights=w) - # fold_results[f"convex_combination_{w}"] = calculate_metrics(test_pred, test_labels) - - # logging.info(f"Top three AUC functions: {rated_auc_functions[:3]}") - # logging.info(f"Top three loss functions: {rated_loss_functions[:3]}") - log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) - - avg_val_losses = np.array([np.mean([x[source] for x in agg_val_losses]) for source in val_losses.keys()]) - avg_val_aucs = {source: np.mean([x[source] for x in agg_val_aucs]) for source in val_aucs.keys()} - logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses))) - logging.info("Average validation AUCs: %s", dict(zip(val_aucs.keys(), avg_val_aucs))) - - scaled_losses = np.array(0.9 * avg_val_losses / np.max(avg_val_losses)) - logging.info(f"scaled_losses: {scaled_losses}") - - # find top three auc functions - rated_auc_functions = [] - for f in auc_functions: - f_str = inspect.getsource(f).replace(" ", "")[:-2] - # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.") - weights = np.array([f(x) for x in avg_val_aucs.values()]) - weights = weights.clip(min=0) - test_pred = np.average(test_predictions_list, axis=0, weights=weights) - # logging.info(f"weights: {weights}") - - fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels) - rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"])) - rated_auc_functions.sort(key=lambda x: x[1], reverse=True) - # print top three auc functions - for f_str, auc in rated_auc_functions[:3]: - logging.info(f"{f_str}: {auc}") - - - # find top three loss functions - rated_loss_functions = [] - for f in loss_functions: - # strip whitespace - f_str = inspect.getsource(f).replace(" ", "")[:-2] - # logging.info(f"Evaluating convex combination of models with loss function {f_str}.") - weights = [f(x) for x in scaled_losses] - # logging.info(f"weights: {weights}") - test_pred = np.average(test_predictions_list, axis=0, weights=weights) - fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels) - rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"])) - rated_loss_functions.sort(key=lambda x: x[1], reverse=True) - for f_str, auc in rated_loss_functions[:3]: - logging.info(f"{f_str}: {auc}") - - # evaluate source only mixture - logging.info("Evaluating loss weighted source only mixture.") - loss_based_weights = 1 - scaled_losses[1:] - test_pred = np.average(test_predictions_list_without_target, axis=0, weights=loss_based_weights) - fold_results[f"loss_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels) - logging.info(f"auc: {fold_results[f'loss_based_source_only_mixture']['AUC']}") - - logging.info("Evaluating auc weighted source only mixture.") - auc_based_weights = (np.array(list(avg_val_aucs.values())) - 0.5)[1:] ** 2 - test_pred = np.average(test_predictions_list_without_target, axis=0, weights=auc_based_weights) - fold_results[f"auc_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels) - logging.info(f"auc: {fold_results[f'auc_based_source_only_mixture']['AUC']}") # average results over folds agg_aucs = {} @@ -398,25 +294,21 @@ def domain_adaptation( for source, aucs in agg_aucs.items(): avg_aucs[source] = np.mean(aucs) + avg_val_losses = np.array([np.mean([x[source] for x in agg_val_losses]) for source in val_losses.keys()]) + logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses))) + scaled_losses = np.array(0.9 * avg_val_losses / np.max(avg_val_losses)) + logging.info(f"scaled_losses: {scaled_losses}") + + weights = [(1-x) for x in scaled_losses] + # logging.info(f"weights: {weights}") + test_pred = np.average(test_predictions_list, axis=0, weights=weights) + loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels) + avg_aucs["loss_weighted"] = calculate_metrics(test_pred, test_labels)["AUC"] + # print baselines first, then top three AUC, then top three loss for source, auc in avg_aucs.items(): - if source in ["target", "convex_combination_without_target", "target_with_predictions", "cc_with_preds"] + datasets: - logging.info(f"{source}: {auc}") - # avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True) - # i = 0 - # for source, auc in avg_aucs_list: - # if "AUC" in source: - # i += 1 - # logging.info(f"{source}: {auc}") - # if i == 3: - # break - # i = 0 - # for source, auc in avg_aucs_list: - # if "loss" in source: - # i += 1 - # logging.info(f"{source}: {auc}") - # if i == 3: - # break + logging.info(f"{source}: {auc}") + log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3) source_metrics = {} @@ -425,6 +317,10 @@ def domain_adaptation( for metric, score in source_stats.items(): if isinstance(score, (float, int)): source_metrics.setdefault(source, {}).setdefault(metric, []).append(score) + for loss_weighted_result in loss_weighted_results.values(): + for metric, score in loss_weighted_result.items(): + if isinstance(score, (float, int)): + source_metrics.setdefault("loss_weighted", {}).setdefault(metric, []).append(score) # Compute statistical metric over aggregated results averaged_metrics = {} From a6b31d4cae0b52250e8488a4ba217caf7653b989 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Mon, 16 Jan 2023 12:44:15 +0100 Subject: [PATCH 126/163] format --- icu_benchmarks/models/domain_adaptation.py | 57 +++++++++++++++------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index f9dc667b..7062ef5f 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -187,7 +187,15 @@ def domain_adaptation( log_dir_fold.mkdir(parents=True, exist_ok=True) # load or train target model - target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}" + target_model_dir = ( + old_run_dir + / task + / model + / dataset + / f"target_{target_size}" + / f"cv_rep_{repetition}" + / f"fold_{fold_index}" + ) if target_model_dir.exists(): target_model = load_model(target_model_dir, log_dir_fold) else: @@ -211,7 +219,7 @@ def get_preds(split): predictions = json.load(f) _, labels = RICUDataset(data, split=split).get_data_and_labels() return predictions, labels - + # get predictions for train set train_predictions, train_labels = get_preds("train") test_predictions, test_labels = get_preds("test") @@ -226,11 +234,11 @@ def get_preds(split): for baseline, predictions in test_predictions.items(): # logging.info("Evaluating model: {}".format(baseline)) fold_results[baseline] = calculate_metrics(predictions, test_labels) - + # evaluate convex combination of models without target test_predictions_list = list(test_predictions.values()) test_predictions_list_without_target = test_predictions_list[1:] - test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1]) + test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1]) fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) # evaluate convex combination of models with target @@ -243,7 +251,7 @@ def get_preds(split): weights_without_target = [v for k, v in weights.items() if k != dataset] target_weights = [0.5, 1, 2] for t in target_weights: - w = [t * sum(weights_without_target)] + weights_without_target + w = [t * sum(weights_without_target)] + weights_without_target # logging.info(f"Evaluating target weight: {t}") test_pred = np.average(test_predictions_list, axis=0, weights=w) fold_results[f"target_weight_{t}"] = calculate_metrics(test_pred, test_labels) @@ -264,26 +272,39 @@ def get_preds(split): cv_folds=cv_folds, fold_index=fold_index, ) - data_with_predictions["train"]["STATIC"] = data_with_predictions["train"]["STATIC"].join(pd.DataFrame(list(train_predictions.values())[1:]).T) - data_with_predictions["val"]["STATIC"] = data_with_predictions["val"]["STATIC"].join(pd.DataFrame(list(val_predictions.values())[1:]).T) - data_with_predictions["test"]["STATIC"] = data_with_predictions["test"]["STATIC"].join(pd.DataFrame(list(test_predictions.values())[1:]).T) + data_with_predictions["train"]["STATIC"] = data_with_predictions["train"]["STATIC"].join( + pd.DataFrame(list(train_predictions.values())[1:]).T + ) + data_with_predictions["val"]["STATIC"] = data_with_predictions["val"]["STATIC"].join( + pd.DataFrame(list(val_predictions.values())[1:]).T + ) + data_with_predictions["test"]["STATIC"] = data_with_predictions["test"]["STATIC"].join( + pd.DataFrame(list(test_predictions.values())[1:]).T + ) model_type = gin.query_parameter("train_common.model") if str(model_type) == "@DLWrapper()": target_model_with_predictions = DLWrapper() elif str(model_type) == "@MLWrapper()": target_model_with_predictions = MLWrapper() target_model_with_predictions.set_log_dir(log_dir_fold) - target_model_with_predictions.train(RICUDataset(data_with_predictions, split="train"), RICUDataset(data_with_predictions, split="val"), "balanced", seed) + target_model_with_predictions.train( + RICUDataset(data_with_predictions, split="train"), + RICUDataset(data_with_predictions, split="val"), + "balanced", + seed, + ) dataset_with_predictions = RICUDataset(data_with_predictions, split="test") preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None) if isinstance(target_model_with_predictions, MLWrapper): preds_w_preds = preds_w_preds[:, 1] fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels) - test_pred_with_preds = np.average([preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[.5,1,1,1]) + test_pred_with_preds = np.average( + [preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[0.5, 1, 1, 1] + ) fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels) log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) - + # average results over folds agg_aucs = {} for fold_results in results.values(): @@ -299,7 +320,7 @@ def get_preds(split): scaled_losses = np.array(0.9 * avg_val_losses / np.max(avg_val_losses)) logging.info(f"scaled_losses: {scaled_losses}") - weights = [(1-x) for x in scaled_losses] + weights = [(1 - x) for x in scaled_losses] # logging.info(f"weights: {weights}") test_pred = np.average(test_predictions_list, axis=0, weights=weights) loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels) @@ -326,11 +347,13 @@ def get_preds(split): averaged_metrics = {} for source, source_stats in source_metrics.items(): for metric, scores in source_stats.items(): - averaged_metrics.setdefault(source, {}).setdefault(metric, []).append({ - "avg": np.mean(scores), - "std": np.std(scores), - "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)), - }) + averaged_metrics.setdefault(source, {}).setdefault(metric, []).append( + { + "avg": np.mean(scores), + "std": np.std(scores), + "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)), + } + ) with open(log_dir / "aggregated_source_metrics.json", "w") as f: json.dump(results, f, cls=JsonResultLoggingEncoder) From 272747c04a8f41b9c25a231a7e6dde036084989c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Mon, 16 Jan 2023 12:45:52 +0100 Subject: [PATCH 127/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 47 +++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 8d625cc2..efb0ae4f 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -6,36 +6,45 @@ for metric in ["AUC", "PR"]: for endpoint in models_dir.iterdir(): if endpoint.is_dir(): - with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file: + with open(models_dir / f"{endpoint.name}_{metric}_results.csv", "w") as csv_file: writer = csv.writer(csv_file) info = ["model", "target", "target_size"] - source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5'] - stats_basis = ['avg', 'std', 'CI_0.95'] - stats_basis = ['avg'] - stats = ['avg', 'std', 'CI_0.95_min', 'CI_0.95_max'] - stats = ['avg'] + source_names = [ + "target", + "aumc", + "eicu", + "hirid", + "miiv", + "convex_combination_without_target", + "target_weight_0.5", + "target_weight_1", + "target_weight_2", + "loss_weighted", + "target_with_predictions", + "cc_with_preds", + ] + stats_basis = ["avg", "std", "CI_0.95"] + stats_basis = ["avg"] + stats = ["avg", "std", "CI_0.95_min", "CI_0.95_max"] + stats = ["avg"] # combine fieldnames and stats - full_fields = [f'{source}_{stat}' for source in source_names for stat in stats] - writer = csv.DictWriter(csv_file, fieldnames=info+full_fields) + full_fields = [f"{source}_{stat}" for source in source_names for stat in stats] + writer = csv.DictWriter(csv_file, fieldnames=info + full_fields) writer.writeheader() for model in endpoint.iterdir(): for target in ["aumc", "eicu", "hirid", "miiv"]: - target_sizes = ['target_500', 'target_1000', 'target_2000'] + target_sizes = ["target_500", "target_1000", "target_2000"] for target_size in target_sizes: - with open(model / target / target_size / 'averaged_source_metrics.json', 'r') as f: + with open(model / target / target_size / "averaged_source_metrics.json", "r") as f: results = json.load(f) - row_data = { - 'model': model.name, - 'target': target, - 'target_size': target_size - } + row_data = {"model": model.name, "target": target, "target_size": target_size} for stat in stats_basis: for source, source_metrics in results.items(): - if stat == 'CI_0.95': - row_data[f'{source}_{stat}_min'] = source_metrics[metric][0][stat][0] * 100 - row_data[f'{source}_{stat}_max'] = source_metrics[metric][0][stat][1] * 100 + if stat == "CI_0.95": + row_data[f"{source}_{stat}_min"] = source_metrics[metric][0][stat][0] * 100 + row_data[f"{source}_{stat}_max"] = source_metrics[metric][0][stat][1] * 100 else: - row_data[f'{source}_{stat}'] = source_metrics[metric][0][stat] * 100 + row_data[f"{source}_{stat}"] = source_metrics[metric][0][stat] * 100 writer.writerow(row_data) From 5d2d20ff37b1d66bc1e4363fc82c865579b99032 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Mon, 16 Jan 2023 13:32:42 +0100 Subject: [PATCH 128/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 27 ++++++++++------------ 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 7062ef5f..96fc511a 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -161,14 +161,18 @@ def domain_adaptation( gin.bind_parameter("preprocess.fold_size", target_size) log_dir = run_dir / task / model / dataset / f"target_{target_size}" log_dir.mkdir(parents=True, exist_ok=True) - # choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug) - # gin_config_with_target_hyperparameters = gin.config_str() + target_model_dir = (old_run_dir / task / model / dataset / f"target_{target_size}") + if not (target_model_dir / "cv_rep_0" / "fold_0").exists(): + choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) + else: + gin.parse_config_file(target_model_dir / "cv_rep_0" / "fold_0" / "train_config.gin") + gin_config_with_target_hyperparameters = gin.config_str() results = {} loss_weighted_results = {} for repetition in range(cv_repetitions_to_train): agg_val_losses = [] for fold_index in range(cv_folds_to_train): - # gin.parse_config(gin_config_with_target_hyperparameters) + gin.parse_config(gin_config_with_target_hyperparameters) results[f"{repetition}_{fold_index}"] = {} fold_results = results[f"{repetition}_{fold_index}"] @@ -187,18 +191,11 @@ def domain_adaptation( log_dir_fold.mkdir(parents=True, exist_ok=True) # load or train target model - target_model_dir = ( - old_run_dir - / task - / model - / dataset - / f"target_{target_size}" - / f"cv_rep_{repetition}" - / f"fold_{fold_index}" - ) - if target_model_dir.exists(): - target_model = load_model(target_model_dir, log_dir_fold) + target_model_dir_fold = target_model_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" + if target_model_dir_fold.exists(): + target_model = load_model(target_model_dir_fold, log_dir_fold) else: + logging.info("Model not found, training new model.") target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) def get_preds(split): @@ -258,7 +255,7 @@ def get_preds(split): # join predictions with static data and train new model gin.clear_config() - gin.parse_config_file(target_model_dir / "train_config.gin") + gin.parse_config(gin_config_with_target_hyperparameters) gin.bind_parameter("Transformer.emb", 103) gin.bind_parameter("LSTMNet.input_dim", 103) gin.bind_parameter("preprocess.fold_size", target_size) From d896d560ce74b01347e277ab4ff024b32dca31be Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Mon, 16 Jan 2023 15:11:21 +0100 Subject: [PATCH 129/163] fix da for miiv --- icu_benchmarks/models/domain_adaptation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 96fc511a..cf19c8a6 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -138,6 +138,9 @@ def domain_adaptation( Raises: ValueError: If checkpoint is not None and the checkpoint does not exist. """ + if dataset != "miiv": + return + cv_repetitions = 5 cv_repetitions_to_train = 5 cv_folds = 5 @@ -243,7 +246,7 @@ def get_preds(split): "aumc": 10535, "eicu": 113382, "hirid": 12859, - "mimic": 52045, + "miiv": 52045, } weights_without_target = [v for k, v in weights.items() if k != dataset] target_weights = [0.5, 1, 2] From 101f2620018128cb9927e4e3e2bad188ddb608f0 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Mon, 16 Jan 2023 23:41:38 +0100 Subject: [PATCH 130/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index efb0ae4f..cb960605 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -2,7 +2,7 @@ from pathlib import Path import csv -models_dir = Path("../DA_logs") +models_dir = Path("../DA_new") for metric in ["AUC", "PR"]: for endpoint in models_dir.iterdir(): if endpoint.is_dir(): From ac098ce1aca13793a9703a53cf5518897138af70 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Tue, 17 Jan 2023 13:25:18 +0100 Subject: [PATCH 131/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index cf19c8a6..21bfe9d2 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -145,7 +145,8 @@ def domain_adaptation( cv_repetitions_to_train = 5 cv_folds = 5 cv_folds_to_train = 5 - target_sizes = [500, 1000, 2000] + # target_sizes = [500, 1000, 2000] + target_sizes = [500] datasets = ["aumc", "eicu", "hirid", "miiv"] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") From db0393f711406bda96e80ab7efdf0eaf6fd706dd Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Tue, 17 Jan 2023 16:37:13 +0100 Subject: [PATCH 132/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index cb960605..3b4fa36f 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -36,15 +36,16 @@ for target in ["aumc", "eicu", "hirid", "miiv"]: target_sizes = ["target_500", "target_1000", "target_2000"] for target_size in target_sizes: - with open(model / target / target_size / "averaged_source_metrics.json", "r") as f: - results = json.load(f) + if (model / target / target_size).exists(): + with open(model / target / target_size / "averaged_source_metrics.json", "r") as f: + results = json.load(f) - row_data = {"model": model.name, "target": target, "target_size": target_size} - for stat in stats_basis: - for source, source_metrics in results.items(): - if stat == "CI_0.95": - row_data[f"{source}_{stat}_min"] = source_metrics[metric][0][stat][0] * 100 - row_data[f"{source}_{stat}_max"] = source_metrics[metric][0][stat][1] * 100 - else: - row_data[f"{source}_{stat}"] = source_metrics[metric][0][stat] * 100 - writer.writerow(row_data) + row_data = {"model": model.name, "target": target, "target_size": target_size} + for stat in stats_basis: + for source, source_metrics in results.items(): + if stat == "CI_0.95": + row_data[f"{source}_{stat}_min"] = source_metrics[metric][0][stat][0] * 100 + row_data[f"{source}_{stat}_max"] = source_metrics[metric][0][stat][1] * 100 + else: + row_data[f"{source}_{stat}"] = source_metrics[metric][0][stat] * 100 + writer.writerow(row_data) From 871810ecfd89206d2365e2dfa9f6b438b0184855 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Tue, 17 Jan 2023 17:08:51 +0100 Subject: [PATCH 133/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 21bfe9d2..8df6ad84 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -138,15 +138,11 @@ def domain_adaptation( Raises: ValueError: If checkpoint is not None and the checkpoint does not exist. """ - if dataset != "miiv": - return - cv_repetitions = 5 cv_repetitions_to_train = 5 cv_folds = 5 cv_folds_to_train = 5 - # target_sizes = [500, 1000, 2000] - target_sizes = [500] + target_sizes = [500, 1000, 2000] datasets = ["aumc", "eicu", "hirid", "miiv"] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") @@ -318,15 +314,19 @@ def get_preds(split): avg_val_losses = np.array([np.mean([x[source] for x in agg_val_losses]) for source in val_losses.keys()]) logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses))) - scaled_losses = np.array(0.9 * avg_val_losses / np.max(avg_val_losses)) - logging.info(f"scaled_losses: {scaled_losses}") - weights = [(1 - x) for x in scaled_losses] - # logging.info(f"weights: {weights}") + weights = 1 / avg_val_losses + logging.info(f"weights: {weights}") test_pred = np.average(test_predictions_list, axis=0, weights=weights) loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels) avg_aucs["loss_weighted"] = calculate_metrics(test_pred, test_labels)["AUC"] + weights = (1 / avg_val_losses) ** 2 + logging.info(f"weights: {weights}") + test_pred = np.average(test_predictions_list, axis=0, weights=weights) + loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels) + avg_aucs["squared_loss_weighted"] = calculate_metrics(test_pred, test_labels)["AUC"] + # print baselines first, then top three AUC, then top three loss for source, auc in avg_aucs.items(): logging.info(f"{source}: {auc}") From f10e0bb40ca641a59d81563a4d1424baddcd63b6 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Tue, 17 Jan 2023 18:14:53 +0100 Subject: [PATCH 134/163] fix weight for combined --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 8df6ad84..50b30cc3 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -296,7 +296,7 @@ def get_preds(split): preds_w_preds = preds_w_preds[:, 1] fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels) test_pred_with_preds = np.average( - [preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[0.5, 1, 1, 1] + [preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[0.5*sum(weights_without_target)] + weights_without_target ) fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels) From 3c83511e876ca66e2fa34d1e8dc6e7c501fcd68f Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Tue, 17 Jan 2023 19:16:53 +0100 Subject: [PATCH 135/163] include max prediction --- icu_benchmarks/models/domain_adaptation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 50b30cc3..cc47f15b 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -238,6 +238,10 @@ def get_preds(split): test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1]) fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) + # evaluate max probability + max_pred = np.max(test_predictions_list, axis=0) + fold_results[f"max_prediction"] = calculate_metrics(max_pred, test_labels) + # evaluate convex combination of models with target weights = { "aumc": 10535, From a6a4551c529e162e5d70f8250aa646f6516ba029 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Tue, 17 Jan 2023 22:24:42 +0100 Subject: [PATCH 136/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index cc47f15b..9641d22c 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -9,7 +9,8 @@ import pandas as pd from pathlib import Path import scipy.stats as stats -from sklearn.metrics import log_loss, roc_auc_score +from sklearn.metrics import log_loss +from skopt import gp_minimize from icu_benchmarks.data.loader import RICUDataset from icu_benchmarks.data.preprocess import preprocess_data @@ -304,6 +305,25 @@ def get_preds(split): ) fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels) + def convex_model_combination(model_weights): + val_pred = np.average(list(val_predictions.values()), axis=0, weights=model_weights) + return log_loss(val_labels, val_pred) + + logging.disable(logging.INFO) + res = gp_minimize( + convex_model_combination, + [(0.01, 1)] * len(datasets), + n_calls=50, + n_initial_points=10, + random_state=seed, + noise=1e-10, # the models are deterministic, but noise is needed for the gp to work + ) + logging.disable(logging.NOTSET) + best_model_weights = res.x + logging.info(best_model_weights) + test_pred = np.average(test_predictions_list, axis=0, weights=best_model_weights) + fold_results["bayes_opt"] = calculate_metrics(test_pred, test_labels) + log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO) # average results over folds From 2c590783cfc181ae10e746759d0d590cd91972bd Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Wed, 18 Jan 2023 01:28:57 +0100 Subject: [PATCH 137/163] format --- icu_benchmarks/models/domain_adaptation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 9641d22c..c384530e 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -162,7 +162,7 @@ def domain_adaptation( gin.bind_parameter("preprocess.fold_size", target_size) log_dir = run_dir / task / model / dataset / f"target_{target_size}" log_dir.mkdir(parents=True, exist_ok=True) - target_model_dir = (old_run_dir / task / model / dataset / f"target_{target_size}") + target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}" if not (target_model_dir / "cv_rep_0" / "fold_0").exists(): choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) else: @@ -301,7 +301,9 @@ def get_preds(split): preds_w_preds = preds_w_preds[:, 1] fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels) test_pred_with_preds = np.average( - [preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[0.5*sum(weights_without_target)] + weights_without_target + [preds_w_preds] + test_predictions_list_without_target, + axis=0, + weights=[0.5 * sum(weights_without_target)] + weights_without_target, ) fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels) From 479246a597ec1586ee0d0f1ebe5069be45efcfd9 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Wed, 18 Jan 2023 10:42:37 +0100 Subject: [PATCH 138/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index c384530e..2c415f54 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -143,7 +143,8 @@ def domain_adaptation( cv_repetitions_to_train = 5 cv_folds = 5 cv_folds_to_train = 5 - target_sizes = [500, 1000, 2000] + # target_sizes = [500, 1000, 2000] + target_sizes = [1000, 2000] datasets = ["aumc", "eicu", "hirid", "miiv"] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") From 0c2af6daa5ca17c7987a0b5e9040a03b02519ef8 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Wed, 18 Jan 2023 20:24:40 +0100 Subject: [PATCH 139/163] changes for sepsis --- icu_benchmarks/models/domain_adaptation.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 2c415f54..c75851df 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -143,9 +143,9 @@ def domain_adaptation( cv_repetitions_to_train = 5 cv_folds = 5 cv_folds_to_train = 5 - # target_sizes = [500, 1000, 2000] - target_sizes = [1000, 2000] - datasets = ["aumc", "eicu", "hirid", "miiv"] + target_sizes = [500, 1000, 2000] + # datasets = ["aumc", "eicu", "hirid", "miiv"] + datasets = ["aumc", "hirid"] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") # old_run_dir = Path("../yaib_logs/DA") @@ -164,7 +164,7 @@ def domain_adaptation( log_dir = run_dir / task / model / dataset / f"target_{target_size}" log_dir.mkdir(parents=True, exist_ok=True) target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}" - if not (target_model_dir / "cv_rep_0" / "fold_0").exists(): + if not (target_model_dir / "cv_rep_0" / "fold_0" / "train_config.gin").exists(): choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug) else: gin.parse_config_file(target_model_dir / "cv_rep_0" / "fold_0" / "train_config.gin") @@ -194,9 +194,9 @@ def domain_adaptation( # load or train target model target_model_dir_fold = target_model_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" - if target_model_dir_fold.exists(): + try: target_model = load_model(target_model_dir_fold, log_dir_fold) - else: + except: logging.info("Model not found, training new model.") target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True) @@ -236,9 +236,9 @@ def get_preds(split): # evaluate convex combination of models without target test_predictions_list = list(test_predictions.values()) - test_predictions_list_without_target = test_predictions_list[1:] - test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1]) - fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) + # test_predictions_list_without_target = test_predictions_list[1:] + # test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1]) + # fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) # evaluate max probability max_pred = np.max(test_predictions_list, axis=0) @@ -247,9 +247,9 @@ def get_preds(split): # evaluate convex combination of models with target weights = { "aumc": 10535, - "eicu": 113382, + # "eicu": 113382, "hirid": 12859, - "miiv": 52045, + # "miiv": 52045, } weights_without_target = [v for k, v in weights.items() if k != dataset] target_weights = [0.5, 1, 2] From 4cff9eb4219ec7c9fd5bfe3faf13febf7a68ab00 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 00:28:53 +0100 Subject: [PATCH 140/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index c75851df..c4e838cf 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -148,8 +148,8 @@ def domain_adaptation( datasets = ["aumc", "hirid"] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") - # old_run_dir = Path("../yaib_logs/DA") - old_run_dir = Path("../DA_logs") + # old_run_dir = Path("../yaib_logs/DA_sep") + old_run_dir = Path("../DA_seps") gin_config_before_tuning = gin.config_str() # evaluate models on same test split @@ -236,9 +236,9 @@ def get_preds(split): # evaluate convex combination of models without target test_predictions_list = list(test_predictions.values()) - # test_predictions_list_without_target = test_predictions_list[1:] - # test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1]) - # fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) + test_predictions_list_without_target = test_predictions_list[1:] + test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1]) + fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) # evaluate max probability max_pred = np.max(test_predictions_list, axis=0) From 89eead011739d9967232a6a7cb51383f687509c7 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 00:31:56 +0100 Subject: [PATCH 141/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 3b4fa36f..dfffef82 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -16,10 +16,12 @@ "hirid", "miiv", "convex_combination_without_target", + "max_prediction_avg", "target_weight_0.5", "target_weight_1", "target_weight_2", "loss_weighted", + "bayes_opt_avg", "target_with_predictions", "cc_with_preds", ] From 00986de71f23cc641117633a1cf82dc434fa5626 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 00:40:06 +0100 Subject: [PATCH 142/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index dfffef82..2529f3d4 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -16,12 +16,12 @@ "hirid", "miiv", "convex_combination_without_target", - "max_prediction_avg", + "max_prediction", "target_weight_0.5", "target_weight_1", "target_weight_2", "loss_weighted", - "bayes_opt_avg", + "bayes_opt", "target_with_predictions", "cc_with_preds", ] From 6aa022957096123d7acd14ba2d22d49fdbdaa995 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 00:46:15 +0100 Subject: [PATCH 143/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index c4e838cf..9ba97369 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -143,13 +143,13 @@ def domain_adaptation( cv_repetitions_to_train = 5 cv_folds = 5 cv_folds_to_train = 5 - target_sizes = [500, 1000, 2000] - # datasets = ["aumc", "eicu", "hirid", "miiv"] - datasets = ["aumc", "hirid"] + target_sizes = [500] + datasets = ["aumc", "eicu", "hirid", "miiv"] + # datasets = ["aumc", "hirid"] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") # old_run_dir = Path("../yaib_logs/DA_sep") - old_run_dir = Path("../DA_seps") + old_run_dir = Path("../DA_new") gin_config_before_tuning = gin.config_str() # evaluate models on same test split @@ -247,9 +247,9 @@ def get_preds(split): # evaluate convex combination of models with target weights = { "aumc": 10535, - # "eicu": 113382, + "eicu": 113382, "hirid": 12859, - # "miiv": 52045, + "miiv": 52045, } weights_without_target = [v for k, v in weights.items() if k != dataset] target_weights = [0.5, 1, 2] From 4e7174c39dddf7a6453ed43db8507c15037e5b9e Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 00:48:53 +0100 Subject: [PATCH 144/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 9ba97369..f332268f 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -143,13 +143,13 @@ def domain_adaptation( cv_repetitions_to_train = 5 cv_folds = 5 cv_folds_to_train = 5 - target_sizes = [500] + target_sizes = [500, 1000, 2000] datasets = ["aumc", "eicu", "hirid", "miiv"] # datasets = ["aumc", "hirid"] task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") # old_run_dir = Path("../yaib_logs/DA_sep") - old_run_dir = Path("../DA_new") + old_run_dir = Path("../DA_seps") gin_config_before_tuning = gin.config_str() # evaluate models on same test split From 7f84b2a101381e372476700195e4ffdaf50eae74 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 11:46:03 +0100 Subject: [PATCH 145/163] correct logging for loss weighted --- icu_benchmarks/models/domain_adaptation.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index f332268f..1f02395b 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -170,8 +170,9 @@ def domain_adaptation( gin.parse_config_file(target_model_dir / "cv_rep_0" / "fold_0" / "train_config.gin") gin_config_with_target_hyperparameters = gin.config_str() results = {} - loss_weighted_results = {} + loss_weighted_results = [] for repetition in range(cv_repetitions_to_train): + loss_weighted_results.append({}) agg_val_losses = [] for fold_index in range(cv_folds_to_train): gin.parse_config(gin_config_with_target_hyperparameters) @@ -345,14 +346,14 @@ def convex_model_combination(model_weights): weights = 1 / avg_val_losses logging.info(f"weights: {weights}") test_pred = np.average(test_predictions_list, axis=0, weights=weights) - loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels) - avg_aucs["loss_weighted"] = calculate_metrics(test_pred, test_labels)["AUC"] + loss_weighted_results[repetition]["loss_weighted"] = calculate_metrics(test_pred, test_labels) + avg_aucs["loss_weighted"] = loss_weighted_results[repetition]["loss_weighted"]["AUC"] weights = (1 / avg_val_losses) ** 2 logging.info(f"weights: {weights}") test_pred = np.average(test_predictions_list, axis=0, weights=weights) - loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels) - avg_aucs["squared_loss_weighted"] = calculate_metrics(test_pred, test_labels)["AUC"] + loss_weighted_results[repetition]["squared_loss_weighted"] = calculate_metrics(test_pred, test_labels) + avg_aucs["squared_loss_weighted"] = loss_weighted_results[repetition]["squared_loss_weighted"]["AUC"] # print baselines first, then top three AUC, then top three loss for source, auc in avg_aucs.items(): From a30a56f1a5bb22ba5b6ef2d22a16f08c90912643 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 11:46:17 +0100 Subject: [PATCH 146/163] only use source weights --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 1f02395b..e29f2f7e 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -252,7 +252,7 @@ def get_preds(split): "hirid": 12859, "miiv": 52045, } - weights_without_target = [v for k, v in weights.items() if k != dataset] + weights_without_target = [v for k, v in weights.items() if k in source_datasets] target_weights = [0.5, 1, 2] for t in target_weights: w = [t * sum(weights_without_target)] + weights_without_target From 16fba8af266b59933b59fe52ddddae05c5bcadab Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 11:50:30 +0100 Subject: [PATCH 147/163] use debug to set source datasets --- icu_benchmarks/models/domain_adaptation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index e29f2f7e..e45a83e0 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -144,8 +144,8 @@ def domain_adaptation( cv_folds = 5 cv_folds_to_train = 5 target_sizes = [500, 1000, 2000] - datasets = ["aumc", "eicu", "hirid", "miiv"] - # datasets = ["aumc", "hirid"] + datasets = ["aumc", "hirid"] if debug else ["aumc", "eicu", "hirid", "miiv"] + debug = False task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") # old_run_dir = Path("../yaib_logs/DA_sep") From 4d40c7b3dfbd5376195c4efc66c17bc6bb7cdda3 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 11:51:14 +0100 Subject: [PATCH 148/163] Update run.py --- icu_benchmarks/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index 9e43c78b..0617b01e 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -52,7 +52,7 @@ def main(my_args=tuple(sys.argv[1:])): else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")] ) gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False) - domain_adaptation(name, args.data_dir, args.log_dir, args.seed, args.task_name, model) + domain_adaptation(name, args.data_dir, args.log_dir, args.seed, args.task_name, model, debug=args.debug) return else: reproducible = args.reproducible From b351a7056f4f721911592c04facf76a04056e582 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 11:52:32 +0100 Subject: [PATCH 149/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index e45a83e0..74e75e41 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -145,11 +145,12 @@ def domain_adaptation( cv_folds_to_train = 5 target_sizes = [500, 1000, 2000] datasets = ["aumc", "hirid"] if debug else ["aumc", "eicu", "hirid", "miiv"] + # old_run_dir = Path("../yaib_logs/DA_sep") + old_run_dir = Path("../DA_seps") if debug else Path("../DA_new") debug = False task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") - # old_run_dir = Path("../yaib_logs/DA_sep") - old_run_dir = Path("../DA_seps") + gin_config_before_tuning = gin.config_str() # evaluate models on same test split From c881d876b6fb786ec5d80af0f4d6c3a04bb791ba Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Thu, 19 Jan 2023 13:45:38 +0100 Subject: [PATCH 150/163] fix loss_weighted --- icu_benchmarks/models/domain_adaptation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 74e75e41..f5dc7d99 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -368,10 +368,11 @@ def convex_model_combination(model_weights): for metric, score in source_stats.items(): if isinstance(score, (float, int)): source_metrics.setdefault(source, {}).setdefault(metric, []).append(score) - for loss_weighted_result in loss_weighted_results.values(): - for metric, score in loss_weighted_result.items(): - if isinstance(score, (float, int)): - source_metrics.setdefault("loss_weighted", {}).setdefault(metric, []).append(score) + for loss_weighted_result in loss_weighted_results: + for source, source_stats in loss_weighted_result.items(): + for metric, score in source_stats.items(): + if isinstance(score, (float, int)): + source_metrics.setdefault(source, {}).setdefault(metric, []).append(score) # Compute statistical metric over aggregated results averaged_metrics = {} From 467bd74c7f707c29708afd3a4349bab65dc7c7df Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 20 Jan 2023 00:12:05 +0100 Subject: [PATCH 151/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index f5dc7d99..ba7e5d69 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -239,8 +239,9 @@ def get_preds(split): # evaluate convex combination of models without target test_predictions_list = list(test_predictions.values()) test_predictions_list_without_target = test_predictions_list[1:] - test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1]) - fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) + if not debug: + test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1]) + fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) # evaluate max probability max_pred = np.max(test_predictions_list, axis=0) From 9f6f4a62717bb958c9b6766dc45aa61cb9d140f3 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 20 Jan 2023 11:34:09 +0100 Subject: [PATCH 152/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index ba7e5d69..62081f19 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -139,15 +139,15 @@ def domain_adaptation( Raises: ValueError: If checkpoint is not None and the checkpoint does not exist. """ + is_sepsis = task == "sepsis" cv_repetitions = 5 cv_repetitions_to_train = 5 cv_folds = 5 cv_folds_to_train = 5 target_sizes = [500, 1000, 2000] - datasets = ["aumc", "hirid"] if debug else ["aumc", "eicu", "hirid", "miiv"] + datasets = ["aumc", "hirid"] if is_sepsis else ["aumc", "eicu", "hirid", "miiv"] # old_run_dir = Path("../yaib_logs/DA_sep") - old_run_dir = Path("../DA_seps") if debug else Path("../DA_new") - debug = False + old_run_dir = Path("../DA_seps") if is_sepsis else Path("../DA_new") task_dir = data_dir / task model_path = Path("../yaib_models/best_models/") @@ -239,7 +239,7 @@ def get_preds(split): # evaluate convex combination of models without target test_predictions_list = list(test_predictions.values()) test_predictions_list_without_target = test_predictions_list[1:] - if not debug: + if not is_sepsis: test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1]) fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) From b543be642a4a135921b6adbdd64d6f378d71a1ba Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 20 Jan 2023 12:08:00 +0100 Subject: [PATCH 153/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 2529f3d4..1dc2dd26 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -21,6 +21,7 @@ "target_weight_1", "target_weight_2", "loss_weighted", + "squared_loss_weighted_avg", "bayes_opt", "target_with_predictions", "cc_with_preds", From 8bc39b866ce0020c053ec846b5697441dd889743 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Fri, 20 Jan 2023 12:09:21 +0100 Subject: [PATCH 154/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 1dc2dd26..7251694d 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -21,7 +21,7 @@ "target_weight_1", "target_weight_2", "loss_weighted", - "squared_loss_weighted_avg", + "squared_loss_weighted", "bayes_opt", "target_with_predictions", "cc_with_preds", From 45454a61b3f5525f6b72e89d0027793f481b01b5 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 21 Jan 2023 15:40:53 +0100 Subject: [PATCH 155/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 62081f19..54085ab0 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -145,7 +145,7 @@ def domain_adaptation( cv_folds = 5 cv_folds_to_train = 5 target_sizes = [500, 1000, 2000] - datasets = ["aumc", "hirid"] if is_sepsis else ["aumc", "eicu", "hirid", "miiv"] + datasets = ["aumc", "hirid", "miiv"] if is_sepsis else ["aumc", "eicu", "hirid", "miiv"] # old_run_dir = Path("../yaib_logs/DA_sep") old_run_dir = Path("../DA_seps") if is_sepsis else Path("../DA_new") task_dir = data_dir / task From e4d739f1af6f486385a08792420a618c5f22b39e Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sat, 21 Jan 2023 20:53:40 +0100 Subject: [PATCH 156/163] Update domain_adaptation.py --- icu_benchmarks/models/domain_adaptation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 54085ab0..2566f651 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -239,9 +239,9 @@ def get_preds(split): # evaluate convex combination of models without target test_predictions_list = list(test_predictions.values()) test_predictions_list_without_target = test_predictions_list[1:] - if not is_sepsis: - test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1]) - fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) + weights_without_target = [1, 1] if is_sepsis else [1, 1, 1] + test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights_without_target) + fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels) # evaluate max probability max_pred = np.max(test_predictions_list, axis=0) From 870214a83e50d714d5f6fb0695516c29f4cbea8d Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Sun, 22 Jan 2023 22:43:13 +0100 Subject: [PATCH 157/163] Update da_to_csv.py --- scripts/results/da_to_csv.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py index 7251694d..e380e578 100644 --- a/scripts/results/da_to_csv.py +++ b/scripts/results/da_to_csv.py @@ -26,10 +26,8 @@ "target_with_predictions", "cc_with_preds", ] - stats_basis = ["avg", "std", "CI_0.95"] - stats_basis = ["avg"] - stats = ["avg", "std", "CI_0.95_min", "CI_0.95_max"] - stats = ["avg"] + stats_basis = ["avg", "std"] + stats = ["avg", "std"] # combine fieldnames and stats full_fields = [f"{source}_{stat}" for source in source_names for stat in stats] writer = csv.DictWriter(csv_file, fieldnames=info + full_fields) @@ -37,10 +35,11 @@ writer.writeheader() for model in endpoint.iterdir(): for target in ["aumc", "eicu", "hirid", "miiv"]: - target_sizes = ["target_500", "target_1000", "target_2000"] + target_sizes = [500, 1000, 2000] for target_size in target_sizes: - if (model / target / target_size).exists(): - with open(model / target / target_size / "averaged_source_metrics.json", "r") as f: + target_str = f"target_{target_size}" + if (model / target / target_str).exists(): + with open(model / target / target_str / "averaged_source_metrics.json", "r") as f: results = json.load(f) row_data = {"model": model.name, "target": target, "target_size": target_size} From e069f6b1129df81cfdf5b70c326467d6c69fea2c Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Mon, 23 Jan 2023 15:07:55 +0100 Subject: [PATCH 158/163] rename script --- scripts/results/{da_to_csv.py => mortality_to_csv.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/results/{da_to_csv.py => mortality_to_csv.py} (100%) diff --git a/scripts/results/da_to_csv.py b/scripts/results/mortality_to_csv.py similarity index 100% rename from scripts/results/da_to_csv.py rename to scripts/results/mortality_to_csv.py From c0c733b9d14991e1e194b3b8dc087b0233147833 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Mon, 23 Jan 2023 15:08:41 +0100 Subject: [PATCH 159/163] Create sepsis_to_csv.py --- scripts/results/sepsis_to_csv.py | 53 ++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 scripts/results/sepsis_to_csv.py diff --git a/scripts/results/sepsis_to_csv.py b/scripts/results/sepsis_to_csv.py new file mode 100644 index 00000000..65ad0a56 --- /dev/null +++ b/scripts/results/sepsis_to_csv.py @@ -0,0 +1,53 @@ +import json +from pathlib import Path +import csv + +models_dir = Path("../DA_sep_new") +for metric in ["AUC", "PR"]: + for endpoint in models_dir.iterdir(): + if endpoint.is_dir(): + with open(models_dir / f"{endpoint.name}_{metric}_results.csv", "w") as csv_file: + writer = csv.writer(csv_file) + info = ["model", "target", "target_size"] + source_names = [ + "target", + "aumc", + "eicu", + "hirid", + "miiv", + "convex_combination_without_target", + "max_prediction", + "target_weight_0.5", + "target_weight_1", + "target_weight_2", + "loss_weighted", + "squared_loss_weighted", + "bayes_opt", + "target_with_predictions", + "cc_with_preds", + ] + stats_basis = ["avg", "std"] + stats = ["avg", "std"] + # combine fieldnames and stats + full_fields = [f"{source}_{stat}" for source in source_names for stat in stats] + writer = csv.DictWriter(csv_file, fieldnames=info + full_fields) + + writer.writeheader() + for model in endpoint.iterdir(): + for target in ["aumc", "hirid", "miiv"]: + target_sizes = [500, 1000, 2000] + for target_size in target_sizes: + target_str = f"target_{target_size}" + if (model / target / target_str).exists(): + with open(model / target / target_str / "averaged_source_metrics.json", "r") as f: + results = json.load(f) + + row_data = {"model": model.name, "target": target, "target_size": target_size} + for stat in stats_basis: + for source, source_metrics in results.items(): + if stat == "CI_0.95": + row_data[f"{source}_{stat}_min"] = source_metrics[metric][0][stat][0] * 100 + row_data[f"{source}_{stat}_max"] = source_metrics[metric][0][stat][1] * 100 + else: + row_data[f"{source}_{stat}"] = source_metrics[metric][0][stat] * 100 + writer.writerow(row_data) From c1e555af99b16680d49bec0a58f150a82cce71a8 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Mon, 23 Jan 2023 15:18:22 +0100 Subject: [PATCH 160/163] Update sepsis_to_csv.py --- scripts/results/sepsis_to_csv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/results/sepsis_to_csv.py b/scripts/results/sepsis_to_csv.py index 65ad0a56..813c3aeb 100644 --- a/scripts/results/sepsis_to_csv.py +++ b/scripts/results/sepsis_to_csv.py @@ -12,7 +12,6 @@ source_names = [ "target", "aumc", - "eicu", "hirid", "miiv", "convex_combination_without_target", From 6a7300d905a40dc9d34d23d7c6e44b901262b9f1 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Wed, 25 Jan 2023 16:58:25 +0100 Subject: [PATCH 161/163] tables to latex --- scripts/results/da_results_to_latex.py | 69 ++++++++++++++++++++++ scripts/results/da_results_to_latex_sep.py | 69 ++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 scripts/results/da_results_to_latex.py create mode 100644 scripts/results/da_results_to_latex_sep.py diff --git a/scripts/results/da_results_to_latex.py b/scripts/results/da_results_to_latex.py new file mode 100644 index 00000000..a2e8cbd6 --- /dev/null +++ b/scripts/results/da_results_to_latex.py @@ -0,0 +1,69 @@ +import csv + +rawNamesMap = { + "target": "Target", + "aumc": "AUMCdb", + "eicu": "eICU", + "hirid": "HiRID", + "miiv": "MIMIC-IV", + "convex_combination_without_target": "Convex UDA", + "max_prediction": "Max Pooling", + "target_weight_0.5": "Weighted $\\alpha=1/3$", + "target_weight_2": "Weighted $\\alpha=2/3$", + "loss_weighted": "Weighted Loss", + "bayes_opt": "Weighted Bayes", + "target_with_predictions": "Prediction-Feature", + "cc_with_preds": "Combined", +} + +def csv_to_dict(file_name): + with open(file_name, 'r') as file: + reader = csv.DictReader(file) + data = [row for row in reader] + tables = {} + for row in data: + row_without_target = {key: value for key, value in row.items() if key != 'target' and key != 'target_size' and key != 'model'} + tables.setdefault((row['target'], row['target_size']), {})[row['model']] = row_without_target + return tables + + +def dict_to_latex(combination, data, metric): + table = '\\begin{table}[h]\n' + table += '\\centering\n' + table += '\\footnotesize' + table += '\\caption{{Sepsis prediction on {0} with target size {1}, {2} with standard deviation.}}\n'.format(rawNamesMap[combination[0]], combination[1], "AUROC" if metric == "auc" else "AUPRC") + headers = ['Model'] + for model, scores in data.items(): + headers += [model] + + table += '\\begin{tabular}{l|' + ''.join(['c'] * (len(headers) - 1)) + '}\n' + table += '\\textbf{' + '} & \\textbf{'.join(headers) + '}\\\\\n' + table += '\\hline\n' + + for score_name, score in data[model].items(): + if "_avg" in score_name: + raw_name = score_name.split("_avg")[0] + if raw_name == combination[0] or not raw_name in rawNamesMap: + continue + clean_name = rawNamesMap[raw_name] + values = [clean_name] + for model in headers[1:]: + scores = data[model] + avg = "{:.2f}".format(float(scores[score_name])) + std = "{:.2f}".format(float(scores[f"{raw_name}_std"])) + values.append(f"${avg} \pm {std}$") + table += ' & '.join(values) + '\\\\\n' + + table += '\\end{tabular}\n' + table += '\\end{table}\n' + return table + +if __name__ == '__main__': + for metric in ["auc", "pr"]: + file_name = f'../yaib_logs/sep_{metric}.csv' + data = csv_to_dict(file_name) + for key, row in data.items(): + table = dict_to_latex(key, row, metric) + print(table) + print('\n' * 5) + diff --git a/scripts/results/da_results_to_latex_sep.py b/scripts/results/da_results_to_latex_sep.py new file mode 100644 index 00000000..c0c0aafe --- /dev/null +++ b/scripts/results/da_results_to_latex_sep.py @@ -0,0 +1,69 @@ +import csv + +rawNamesMap = { + "target": "Target", + "aumc": "AUMCdb", + "eicu": "eICU", + "hirid": "HiRID", + "miiv": "MIMIC-IV", + "convex_combination_without_target": "Convex UDA", + "max_prediction": "Max Pooling", + "target_weight_0.5": "Weighted $\\alpha=1/3$", + "target_weight_2": "Weighted $\\alpha=2/3$", + "loss_weighted": "Weighted Loss", + "bayes_opt": "Weighted Bayes", + "target_with_predictions": "Prediction-Feature", + "cc_with_preds": "Combined", +} + +def csv_to_dict(file_name): + with open(file_name, 'r') as file: + reader = csv.DictReader(file) + data = [row for row in reader] + tables = {} + for row in data: + row_without_target = {key: value for key, value in row.items() if key != 'target' and key != 'target_size' and key != 'model'} + tables.setdefault((row['target']), {})[row['target_size']] = row_without_target + return tables + + +def dict_to_latex(combination, data, metric): + table = '\\begin{table}[h]\n' + table += '\\centering\n' + table += '\\footnotesize' + table += '\\caption{{Sepsis prediction on {0} with LGBM, {1} with standard deviation.}}\n'.format(rawNamesMap[combination], "AUROC" if metric == "auc" else "AUPRC") + headers = ['Target Size'] + for target_size, scores in data.items(): + headers += [target_size] + + table += '\\begin{tabular}{l|' + ''.join(['c'] * (len(headers) - 1)) + '}\n' + table += '\\textbf{' + '} & \\textbf{'.join(headers) + '}\\\\\n' + table += '\\hline\n' + + for score_name, score in data[target_size].items(): + if "_avg" in score_name: + raw_name = score_name.split("_avg")[0] + if raw_name == combination[0] or not raw_name in rawNamesMap: + continue + clean_name = rawNamesMap[raw_name] + values = [clean_name] + for target_size in headers[1:]: + scores = data[target_size] + avg = "{:.2f}".format(float(scores[score_name])) + std = "{:.2f}".format(float(scores[f"{raw_name}_std"])) + values.append(f"${avg} \pm {std}$") + table += ' & '.join(values) + '\\\\\n' + + table += '\\end{tabular}\n' + table += '\\end{table}\n' + return table + +if __name__ == '__main__': + for metric in ["auc", "pr"]: + file_name = f'../yaib_logs/sep_{metric}.csv' + data = csv_to_dict(file_name) + for key, row in data.items(): + table = dict_to_latex(key, row, metric) + print(table) + print('\n' * 5) + From 60ae45fb9e628fe6d437fd1d22260f8ad1e7aeb5 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Tue, 8 Aug 2023 16:49:30 +0200 Subject: [PATCH 162/163] import domain adaptation --- icu_benchmarks/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py index 21fce158..0a47cf7b 100644 --- a/icu_benchmarks/run.py +++ b/icu_benchmarks/run.py @@ -22,6 +22,7 @@ setup_logging, ) from icu_benchmarks.contants import RunMode +from icu_benchmarks.models import domain_adaptation @gin.configurable("Run") From b0583c719282462a56b78d516022887e71c44d48 Mon Sep 17 00:00:00 2001 From: Hendrik Schmidt Date: Tue, 8 Aug 2023 17:08:18 +0200 Subject: [PATCH 163/163] make train run --- icu_benchmarks/models/domain_adaptation.py | 33 +++++++++------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py index 2566f651..8df55050 100644 --- a/icu_benchmarks/models/domain_adaptation.py +++ b/icu_benchmarks/models/domain_adaptation.py @@ -12,14 +12,14 @@ from sklearn.metrics import log_loss from skopt import gp_minimize -from icu_benchmarks.data.loader import RICUDataset -from icu_benchmarks.data.preprocess import preprocess_data -from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters -from icu_benchmarks.models.metric_constants import MLMetrics +from icu_benchmarks.data.loader import PredictionDataset +from icu_benchmarks.data.preprocessor import Preprocessor, DefaultClassificationPreprocessor +from icu_benchmarks.tuning.hyperparameters import choose_and_bind_hyperparameters from icu_benchmarks.models.train import train_common from icu_benchmarks.models.wrappers import DLWrapper, MLWrapper from icu_benchmarks.models.utils import JsonResultLoggingEncoder from icu_benchmarks.run_utils import log_full_line +from .constants import MLMetrics def load_model(model_dir: Path, log_dir: Path): @@ -42,7 +42,7 @@ def load_model(model_dir: Path, log_dir: Path): return model -def get_predictions_for_single_model(dataset: RICUDataset, model_dir: Path, log_dir: Path): +def get_predictions_for_single_model(dataset: PredictionDataset, model_dir: Path, log_dir: Path): """Get predictions for a single model. Args: @@ -100,7 +100,7 @@ def get_predictions_for_all_models( torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False - test_dataset = RICUDataset(data, split=test_on) + test_dataset = PredictionDataset(data, split=test_on) _, test_labels = test_dataset.get_data_and_labels() test_predictions = {} @@ -153,6 +153,8 @@ def domain_adaptation( gin_config_before_tuning = gin.config_str() + preprocessor = preprocessor(use_static_features=True) + # evaluate models on same test split data_dir = task_dir / dataset source_datasets = [d for d in datasets if d != dataset] @@ -180,16 +182,7 @@ def domain_adaptation( results[f"{repetition}_{fold_index}"] = {} fold_results = results[f"{repetition}_{fold_index}"] - data = preprocess_data( - data_dir, - seed=seed, - debug=debug, - use_cache=True, - cv_repetitions=cv_repetitions, - repetition_index=repetition, - cv_folds=cv_folds, - fold_index=fold_index, - ) + data = preprocessor.apply(data, vars) log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}" log_dir_fold.mkdir(parents=True, exist_ok=True) @@ -218,7 +211,7 @@ def get_preds(split): else: with open(log_dir_fold / f"{split}_predictions.json", "r") as f: predictions = json.load(f) - _, labels = RICUDataset(data, split=split).get_data_and_labels() + _, labels = PredictionDataset(data, split=split).get_data_and_labels() return predictions, labels # get predictions for train set @@ -294,12 +287,12 @@ def get_preds(split): target_model_with_predictions = MLWrapper() target_model_with_predictions.set_log_dir(log_dir_fold) target_model_with_predictions.train( - RICUDataset(data_with_predictions, split="train"), - RICUDataset(data_with_predictions, split="val"), + PredictionDataset(data_with_predictions, split="train"), + PredictionDataset(data_with_predictions, split="val"), "balanced", seed, ) - dataset_with_predictions = RICUDataset(data_with_predictions, split="test") + dataset_with_predictions = PredictionDataset(data_with_predictions, split="test") preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None) if isinstance(target_model_with_predictions, MLWrapper): preds_w_preds = preds_w_preds[:, 1]