From 20d63752059031ebe74d5edcb53083f869abc014 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 6 Jan 2023 15:52:49 +0100
Subject: [PATCH 001/163] Update .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 111feaac..7b1be7d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -116,3 +116,4 @@ MH_DEBUG
 # Cached data
 **/cache/
 .DS_Store
+.vscode/launch.json

From 8bbfff47c6090db055979c07b05bdb38d1be016c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 6 Jan 2023 16:46:52 +0100
Subject: [PATCH 002/163] introduce target split

---
 icu_benchmarks/data/preprocess.py | 67 ++++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 9 deletions(-)

diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py
index 57016231..d4c713ea 100644
--- a/icu_benchmarks/data/preprocess.py
+++ b/icu_benchmarks/data/preprocess.py
@@ -9,7 +9,7 @@
 import pickle
 
 from sklearn.impute import MissingIndicator, SimpleImputer
-from sklearn.model_selection import KFold
+from sklearn.model_selection import KFold, LeavePOut, StratifiedKFold
 from sklearn.preprocessing import LabelEncoder
 
 from recipys.recipe import Recipe
@@ -63,6 +63,55 @@ def make_single_split(
     return data_split
 
 
+def make_target_split(
+    data: dict[pd.DataFrame],
+    vars: dict[str],
+    num_folds: int,
+    fold_index: int,
+    seed: int = 42,
+    debug: bool = False,
+    target_size = 100,
+) -> dict[dict[pd.DataFrame]]:
+    """Randomly split the data into training, validation, and test set.
+
+    Args:
+        data: dictionary containing data divided int OUTCOME, STATIC, and DYNAMIC.
+        vars: Contains the names of columns in the data.
+        num_folds: Number of folds for cross validation.
+        seed: Random seed.
+        debug: Load less data if true.
+
+    Returns:
+        Input data divided into 'train', 'val', and 'test'.
+    """
+    id = vars["GROUP"]
+    fraction_to_load = 1 if not debug else 0.01
+    stays = data["STATIC"][[id]].sample(frac=fraction_to_load, random_state=seed)
+
+    train_and_val = stays.sample(target_size, random_state=seed)
+    test = stays.drop(train_and_val.index).index
+    train_and_val_labels = data["OUTCOME"].label.loc[train_and_val.index]
+
+    target_folds = StratifiedKFold(num_folds, shuffle=True, random_state=seed)
+    train, val = list(target_folds.split(train_and_val, train_and_val_labels))[fold_index]
+
+    split = {
+        "train": stays.iloc[train],
+        "val": stays.iloc[val],
+        "test": stays.iloc[test],
+    }
+    data_split = {}
+
+    for fold in split.keys():  # Loop through train / val / test
+        # Loop through DYNAMIC / STATIC / OUTCOME
+        # set sort to true to make sure that IDs are reordered after scrambling earlier
+        data_split[fold] = {
+            data_type: data[data_type].merge(split[fold], on=id, how="right", sort=True) for data_type in data.keys()
+        }
+
+    return data_split
+
+
 def apply_recipe_to_splits(recipe: Recipe, data: dict[dict[pd.DataFrame]], type: str) -> dict[dict[pd.DataFrame]]:
     """Fits and transforms the training data, then transforms the validation and test data with the recipe.
 
@@ -115,18 +164,18 @@ def preprocess_data(
     config_string = f"{dumped_file_names}{dumped_vars}{use_features}{seed}{fold_index}{debug}".encode("utf-8")
     cache_file = cache_dir / hashlib.md5(config_string).hexdigest()
 
-    if use_cache:
-        if cache_file.exists():
-            with open(cache_file, "rb") as f:
-                logging.info(f"Loading cached data from {cache_file}.")
-                return pickle.load(f)
-        else:
-            logging.info(f"No cached data found in {cache_file}, loading raw data.")
+    # if use_cache:
+    #     if cache_file.exists():
+    #         with open(cache_file, "rb") as f:
+    #             logging.info(f"Loading cached data from {cache_file}.")
+    #             return pickle.load(f)
+    #     else:
+    #         logging.info(f"No cached data found in {cache_file}, loading raw data.")
 
     data = {f: pq.read_table(data_dir / file_names[f]).to_pandas() for f in ["STATIC", "DYNAMIC", "OUTCOME"]}
 
     logging.info("Generating splits.")
-    data = make_single_split(data, vars, num_folds, fold_index, seed=seed, debug=debug)
+    data = make_target_split(data, vars, num_folds, fold_index, seed=seed, debug=debug)
 
     logging.info("Preprocessing static data.")
     sta_rec = Recipe(data["train"]["STATIC"], [], vars["STATIC"])

From d62d52470fd084811f1a921c9c57d53d9315c138 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 7 Jan 2023 01:24:28 +0100
Subject: [PATCH 003/163] refactor fold size

---
 icu_benchmarks/data/preprocess.py | 100 +++++++++++-------------------
 1 file changed, 35 insertions(+), 65 deletions(-)

diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py
index 1b78bfe1..2ed8d9ff 100644
--- a/icu_benchmarks/data/preprocess.py
+++ b/icu_benchmarks/data/preprocess.py
@@ -24,6 +24,7 @@ def make_single_split(
     fold_index: int,
     seed: int = 42,
     debug: bool = False,
+    fold_size: int = None,
 ) -> dict[dict[pd.DataFrame]]:
     """Randomly split the data into training, validation, and test set.
 
@@ -41,72 +42,38 @@ def make_single_split(
     fraction_to_load = 1 if not debug else 0.01
     stays = data["STATIC"][[id]].sample(frac=fraction_to_load, random_state=seed)
 
-    outer = KFold(num_folds, shuffle=True, random_state=seed)
+    if fold_size:
+        train_and_val = stays.sample(fold_size, random_state=seed)
+        test = stays.drop(train_and_val.index)
+        train_and_val_labels = data["OUTCOME"].label.loc[train_and_val.index]
 
-    train, test_and_val = list(outer.split(stays))[fold_index]
-    val, test = np.array_split(test_and_val, 2)
+        target_folds = StratifiedKFold(num_folds, shuffle=True, random_state=seed)
+        train, val = list(target_folds.split(train_and_val, train_and_val_labels))[fold_index]
 
-    split = {
-        "train": stays.iloc[train],
-        "val": stays.iloc[val],
-        "test": stays.iloc[test],
-    }
-    data_split = {}
-
-    for fold in split.keys():  # Loop through train / val / test
-        # Loop through DYNAMIC / STATIC / OUTCOME
-        # set sort to true to make sure that IDs are reordered after scrambling earlier
-        data_split[fold] = {
-            data_type: data[data_type].merge(split[fold], on=id, how="right", sort=True) for data_type in data.keys()
+        split = {
+            "train": train_and_val.iloc[train],
+            "val": train_and_val.iloc[val],
+            "test": test,
         }
+    else:
+        outer = KFold(num_folds, shuffle=True, random_state=seed)
 
-    return data_split
-
-
-def make_target_split(
-    data: dict[pd.DataFrame],
-    vars: dict[str],
-    num_folds: int,
-    fold_index: int,
-    seed: int = 42,
-    debug: bool = False,
-    target_size = 100,
-) -> dict[dict[pd.DataFrame]]:
-    """Randomly split the data into training, validation, and test set.
-
-    Args:
-        data: dictionary containing data divided int OUTCOME, STATIC, and DYNAMIC.
-        vars: Contains the names of columns in the data.
-        num_folds: Number of folds for cross validation.
-        seed: Random seed.
-        debug: Load less data if true.
-
-    Returns:
-        Input data divided into 'train', 'val', and 'test'.
-    """
-    id = vars["GROUP"]
-    fraction_to_load = 1 if not debug else 0.01
-    stays = data["STATIC"][[id]].sample(frac=fraction_to_load, random_state=seed)
-
-    train_and_val = stays.sample(target_size, random_state=seed)
-    test = stays.drop(train_and_val.index).index
-    train_and_val_labels = data["OUTCOME"].label.loc[train_and_val.index]
+        train, test_and_val = list(outer.split(stays))[fold_index]
+        val, test = np.array_split(test_and_val, 2)
 
-    target_folds = StratifiedKFold(num_folds, shuffle=True, random_state=seed)
-    train, val = list(target_folds.split(train_and_val, train_and_val_labels))[fold_index]
+        split = {
+            "train": stays.iloc[train],
+            "val": stays.iloc[val],
+            "test": stays.iloc[test],
+        }
+    
 
-    split = {
-        "train": stays.iloc[train],
-        "val": stays.iloc[val],
-        "test": stays.iloc[test],
-    }
     data_split = {}
-
-    for fold in split.keys():  # Loop through train / val / test
+    for fold_name, fold in split.items():  # Loop through train / val / test
         # Loop through DYNAMIC / STATIC / OUTCOME
         # set sort to true to make sure that IDs are reordered after scrambling earlier
-        data_split[fold] = {
-            data_type: data[data_type].merge(split[fold], on=id, how="right", sort=True) for data_type in data.keys()
+        data_split[fold_name] = {
+            data_type: data[data_type].merge(fold, on=id, how="right", sort=True) for data_type in data.keys()
         }
 
     return data_split
@@ -139,6 +106,7 @@ def preprocess_data(
     debug: bool = False,
     use_cache: bool = False,
     num_folds: int = 5,
+    fold_size: int = None,
     fold_index: int = 0,
 ) -> dict[dict[pd.DataFrame]]:
     """Perform loading, splitting, imputing and normalising of task data.
@@ -159,23 +127,25 @@ def preprocess_data(
             nested within split (train/val/test).
     """
     cache_dir = data_dir / "cache"
+    if fold_size:
+        cache_dir = cache_dir / f"T{fold_size}"
     dumped_file_names = json.dumps(file_names, sort_keys=True)
     dumped_vars = json.dumps(vars, sort_keys=True)
     config_string = f"{dumped_file_names}{dumped_vars}{use_features}{seed}{fold_index}{debug}".encode("utf-8")
     cache_file = cache_dir / hashlib.md5(config_string).hexdigest()
 
-    # if use_cache:
-    #     if cache_file.exists():
-    #         with open(cache_file, "rb") as f:
-    #             logging.info(f"Loading cached data from {cache_file}.")
-    #             return pickle.load(f)
-    #     else:
-    #         logging.info(f"No cached data found in {cache_file}, loading raw data.")
+    if use_cache:
+        if cache_file.exists():
+            with open(cache_file, "rb") as f:
+                logging.info(f"Loading cached data from {cache_file}.")
+                return pickle.load(f)
+        else:
+            logging.info(f"No cached data found in {cache_file}, loading raw data.")
 
     data = {f: pq.read_table(data_dir / file_names[f]).to_pandas() for f in ["STATIC", "DYNAMIC", "OUTCOME"]}
 
     logging.info("Generating splits.")
-    data = make_target_split(data, vars, num_folds, fold_index, seed=seed, debug=debug)
+    data = make_single_split(data, vars, num_folds, fold_index, seed=seed, debug=debug, fold_size=fold_size)
 
     logging.info("Preprocessing static data.")
     sta_rec = Recipe(data["train"]["STATIC"], [], vars["STATIC"])

From be991ff374204c0bd44213719d177da4c9e4a860 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 7 Jan 2023 01:24:34 +0100
Subject: [PATCH 004/163] Update train.py

---
 icu_benchmarks/models/train.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/icu_benchmarks/models/train.py b/icu_benchmarks/models/train.py
index d6d54024..1489ce4e 100644
--- a/icu_benchmarks/models/train.py
+++ b/icu_benchmarks/models/train.py
@@ -63,7 +63,6 @@ def train_common(
             model.load_weights(source_dir / "model.joblib")
         else:
             raise Exception("No weights to load at path : {}".format(source_dir / "model.*"))
-
     else:
         try:
             model.train(dataset, val_dataset, weight, seed)

From dcf1a6d45d319cbe793d7267676945288e1c55e0 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 7 Jan 2023 01:25:03 +0100
Subject: [PATCH 005/163] make cpu and gin flag general, rename gin flag

---
 icu_benchmarks/run.py       | 2 +-
 icu_benchmarks/run_utils.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index 653174b0..ccd10fe5 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -48,7 +48,7 @@ def main(my_args=tuple(sys.argv[1:])):
             if args.experiment
             else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")]
         )
-        gin.parse_config_files_and_bindings(gin_config_files, args.hyperparams, finalize_config=False)
+        gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False)
         run_dir = create_run_dir(log_dir)
         choose_and_bind_hyperparameters(
             args.tune, args.data_dir, run_dir, args.seeds[0], checkpoint=checkpoint, debug=args.debug
diff --git a/icu_benchmarks/run_utils.py b/icu_benchmarks/run_utils.py
index 0dca0522..a615b226 100644
--- a/icu_benchmarks/run_utils.py
+++ b/icu_benchmarks/run_utils.py
@@ -36,15 +36,15 @@ def build_parser() -> ArgumentParser:
     general_args.add_argument(
         "-s", "--seeds", default=[1111], nargs="+", type=int, help="Random seed for processing, tuning and training."
     )
+    general_args.add_argument("--cpu", default=False, action=BooleanOptionalAction, help="Set to train and test on CPU.")
     general_args.add_argument("-db", "--debug", default=False, action=BooleanOptionalAction, help="Set to load less data.")
     general_args.add_argument("-c", "--cache", action=BooleanOptionalAction, help="Set to cache and use preprocessed data.")
     general_args.add_argument("-pl", "--plot", action=BooleanOptionalAction, help="Generate common plots.")
+    general_args.add_argument("-gb", "--gin-bindings", nargs="+", help="Overwrite or add gin bindings.")
 
     # MODEL TRAINING ARGUMENTS
     prep_and_train = subparsers.add_parser("train", help="Preprocess data and train model.", parents=[parent_parser])
     prep_and_train.add_argument("--reproducible", default=True, action=BooleanOptionalAction, help="Make torch reproducible.")
-    prep_and_train.add_argument("--cpu", default=False, action=BooleanOptionalAction, help="Set to train on CPU.")
-    prep_and_train.add_argument("-hp", "--hyperparams", nargs="+", help="Hyperparameters for model.")
     prep_and_train.add_argument("--tune", default=False, action=BooleanOptionalAction, help="Find best hyperparameters.")
     prep_and_train.add_argument("--checkpoint", type=Path, help="Use previous checkpoint.")
 

From ef4e68768b8d83c669af3208bd3436072a8a26bf Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 7 Jan 2023 01:25:18 +0100
Subject: [PATCH 006/163] Update preprocess.py

---
 icu_benchmarks/data/preprocess.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py
index 2ed8d9ff..4996e7f3 100644
--- a/icu_benchmarks/data/preprocess.py
+++ b/icu_benchmarks/data/preprocess.py
@@ -66,7 +66,6 @@ def make_single_split(
             "val": stays.iloc[val],
             "test": stays.iloc[test],
         }
-    
 
     data_split = {}
     for fold_name, fold in split.items():  # Loop through train / val / test

From 63c06e2a827d5df3e375ca8aaadfee2ca1a52787 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 7 Jan 2023 15:22:48 +0100
Subject: [PATCH 007/163] add basis for domain adaptation

---
 icu_benchmarks/models/domain_adaptation.py | 134 +++++++++++++++++++++
 icu_benchmarks/models/wrappers.py          |  20 +++
 2 files changed, 154 insertions(+)
 create mode 100644 icu_benchmarks/models/domain_adaptation.py

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
new file mode 100644
index 00000000..7f527848
--- /dev/null
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -0,0 +1,134 @@
+import os
+import random
+import gin
+import torch
+import logging
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from skopt import gp_minimize
+
+from icu_benchmarks.data.loader import RICUDataset
+from icu_benchmarks.models.wrappers import MLWrapper
+from icu_benchmarks.models.utils import save_config_file
+
+
+def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, model_dir: Path, log_dir: Path):
+    """Get predictions for a single model.
+
+    Args:
+        model: Model to get predictions for.
+        dataset: Dataset to get predictions for.
+        model_dir: Path to directory where model weights are stored.
+        log_dir: Path to directory where model output should be saved.
+
+    Returns:
+        Tuple of predictions and labels.
+    """
+    model.set_log_dir(log_dir)
+    if (model_dir / "model.torch").is_file():
+        model.load_weights(model_dir / "model.torch")
+    elif (model_dir / "model.txt").is_file():
+        model.load_weights(model_dir / "model.txt")
+    elif (model_dir / "model.joblib").is_file():
+        model.load_weights(model_dir / "model.joblib")
+    else:
+        raise Exception("No weights to load at path : {}".format(model_dir / "model.*"))
+    return model.predict(dataset)
+
+
+@gin.configurable("domain_adaptation")
+def evaluate_model_combination(
+    data: dict[str, pd.DataFrame],
+    log_dir: Path,
+    source_dir: Path = None,
+    seed: int = 1234,
+    reproducible: bool = True,
+    model: object = MLWrapper,
+    weight: str = None,
+    test_on: str = "Test",
+
+):
+    """Common wrapper to train all benchmarked models.
+
+    Args:
+        data: Dict containing data to be trained on.
+        log_dir: Path to directory where model output should be saved.
+        source_dir: If set to load weights, path to directory containing trained weights.
+        seed: Common seed used for any random operation.
+        reproducible: If set to true, set torch to run reproducibly.
+    """
+
+    # Setting the seed before gin parsing
+    os.environ["PYTHONHASHSEED"] = str(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+    if reproducible:
+        os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
+        torch.use_deterministic_algorithms(True)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+    dataset = RICUDataset(data, split="train")
+    test_dataset = RICUDataset(data, split=test_on)
+    weight = dataset.get_balance()
+
+    predictions = []
+    for source_dataset in source_dir.iterdir():
+        model_dir = source_dir / source_dataset
+        predictions.append(get_predictions_for_single_model(model, dataset, model_dir, log_dir))
+
+    test_pred = np.average(predictions, axis=0, weights=dataset_weights)
+
+    # save config file again to capture missing gin parameters
+    return log_loss(test_label, test_pred)
+
+
+@gin.configurable("tune_hyperparameters")
+def choose_and_bind_hyperparameters(
+    data_dir: Path,
+    log_dir: Path,
+    seed: int,
+    n_initial_points: int = 3,
+    n_calls: int = 20,
+    folds_to_tune_on: int = gin.REQUIRED,
+    debug: bool = False,
+):
+    """Choose hyperparameters to tune and bind them to gin.
+
+    Args:
+        data_dir: Path to the data directory.
+        log_dir: Path to the log directory.
+        seed: Random seed.
+        n_initial_points: Number of initial points to explore.
+        n_calls: Number of iterations to optimize the hyperparameters.
+        folds_to_tune_on: Number of folds to tune on.
+        debug: Whether to load less data and enable more logging.
+
+    Raises:
+        ValueError: If checkpoint is not None and the checkpoint does not exist.
+    """
+
+    def convex_model_combination(hyperparams):
+        return preprocess_and_train_for_folds(
+            data_dir,
+            Path(temp_dir),
+            seed,
+            num_folds_to_train=folds_to_tune_on,
+            use_cache=True,
+            test_on="val",
+            debug=debug,
+        )
+
+    res = gp_minimize(
+        bind_params_and_train,
+        hyperparams_bounds,
+        n_calls=n_calls,
+        n_initial_points=n_initial_points,
+        random_state=seed,
+        noise=1e-10,  # the models are deterministic, but noise is needed for the gp to work
+    )
+
+    print(res)
diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index 5e820f14..bf06083b 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -289,6 +289,22 @@ def save_weights(self, epoch, save_path):
     def load_weights(self, load_path):
         load_model_state(load_path, self.encoder, optimizer=self.optimizer)
 
+    def predict(self, dataset, weight, seed):
+        self.set_metrics()
+        test_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=self.n_worker, pin_memory=self.pin_memory)
+        if isinstance(weight, list):
+            weight = torch.FloatTensor(weight).to(self.device)
+        test_loss, test_metrics = self.evaluate(test_loader, self.metrics, weight)
+
+        self.encoder.eval()
+        all_preds = []
+        with torch.no_grad():
+            for elem in test_loader:
+                _, preds, _ = self.step_fn(elem, weight)
+                all_preds += preds
+
+        return all_preds
+
 
 @gin.configurable("MLWrapper")
 class MLWrapper(object):
@@ -421,3 +437,7 @@ def load_weights(self, load_path):
         else:
             with open(load_path, "rb") as f:
                 self.model = joblib.load(f)
+
+    def predict(self, dataset, weight, seed):
+        test_rep, _ = dataset.get_data_and_labels()
+        return self.model.predict_proba(test_rep)
\ No newline at end of file

From 83782b5fb4d79760e21dd6c38c0c6acbc513bfb2 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Tue, 10 Jan 2023 23:52:07 +0100
Subject: [PATCH 008/163] refactor folds for targets

---
 icu_benchmarks/data/preprocess.py | 43 +++++++++++++------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py
index 0a30e711..92428dda 100644
--- a/icu_benchmarks/data/preprocess.py
+++ b/icu_benchmarks/data/preprocess.py
@@ -2,13 +2,14 @@
 import gin
 import json
 import hashlib
+import numpy as np
 import pandas as pd
 import pyarrow.parquet as pq
 from pathlib import Path
 import pickle
 
 from sklearn.impute import MissingIndicator, SimpleImputer
-from sklearn.model_selection import LeavePOut, StratifiedKFold
+from sklearn.model_selection import StratifiedKFold
 from sklearn.preprocessing import LabelEncoder
 
 from recipys.recipe import Recipe
@@ -47,33 +48,25 @@ def make_single_split(
     stays = data["STATIC"][id].sample(frac=fraction_to_load, random_state=seed)
     labels = data["OUTCOME"][vars["LABEL"]]
 
+
+    outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed)
+    dev, test = list(outer_CV.split(stays, labels))[repetition_index]
+
     if fold_size:
-        train_and_val = stays.sample(fold_size, random_state=seed)
-        test = stays.drop(train_and_val.index)
-        train_and_val_labels = data["OUTCOME"].label.loc[train_and_val.index]
+        test = np.append(test, dev[fold_size:])
+        dev = dev[:fold_size]
 
-        target_folds = StratifiedKFold(cv_folds, shuffle=True, random_state=seed)
-        train, val = list(target_folds.split(train_and_val, train_and_val_labels))[fold_index]
+    dev_stays = stays.iloc[dev]
+    dev_labels = labels.iloc[dev]
 
-        split = {
-            "train": train_and_val.iloc[train],
-            "val": train_and_val.iloc[val],
-            "test": test,
-        }
-    else:
-        outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed)
-        inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed)
-
-        dev, test = list(outer_CV.split(stays, labels))[repetition_index]
-        dev_stays = stays.iloc[dev]
-        dev_labels = labels.iloc[dev]
-        train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index]
-
-        split = {
-            "train": dev_stays.iloc[train],
-            "val": dev_stays.iloc[val],
-            "test": stays.iloc[test],
-        }
+    inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed)
+    train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index]
+
+    split = {
+        "train": dev_stays.iloc[train],
+        "val": dev_stays.iloc[val],
+        "test": stays.iloc[test],
+    }
 
     data_split = {}
     for fold_name, fold in split.items():  # Loop through train / val / test

From 43ebd5fae16d19ca80b7e10a38d07329f6285cb6 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Wed, 11 Jan 2023 19:17:10 +0100
Subject: [PATCH 009/163] add evaluation function to test whole dataset

also redo fold for DA if it contains to little positive samples
---
 icu_benchmarks/cross_validation.py | 49 ++++++++++++++++
 icu_benchmarks/data/preprocess.py  | 89 ++++++++++++++++++++++--------
 icu_benchmarks/run.py              | 12 +++-
 3 files changed, 125 insertions(+), 25 deletions(-)

diff --git a/icu_benchmarks/cross_validation.py b/icu_benchmarks/cross_validation.py
index ec79d3d8..db95ee28 100644
--- a/icu_benchmarks/cross_validation.py
+++ b/icu_benchmarks/cross_validation.py
@@ -75,3 +75,52 @@ def execute_repeated_cv(
         log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
 
     return agg_loss / (cv_repetitions_to_train * cv_folds_to_train)
+
+
+def evaluate(
+    data_dir: Path,
+    log_dir: Path,
+    seed: int,
+    source_dir: Path = None,
+    reproducible: bool = True,
+    debug: bool = False,
+    use_cache: bool = False,
+) -> float:
+    """Preprocesses data and trains a model for each fold.
+
+    Args:
+        data_dir: Path to the data directory.
+        log_dir: Path to the log directory.
+        seed: Random seed.
+        load_weights: Whether to load weights from source_dir.
+        source_dir: Path to the source directory.
+        cv_folds: Number of folds for cross validation.
+        cv_folds_to_train: Number of folds to use during training. If None, all folds are trained on.
+        reproducible: Whether to make torch reproducible.
+        debug: Whether to load less data and enable more logging.
+        use_cache: Whether to cache and use cached data.
+        test_on: Dataset to test on. Can be "test" or "val" (e.g. for hyperparameter tuning).
+
+    Returns:
+        The average loss of all folds.
+    """
+
+    data = preprocess_data(
+        data_dir,
+        seed=seed,
+        debug=debug,
+        use_cache=use_cache,
+        test_all=True,
+    )
+
+    run_dir_seed = log_dir / f"seed_{seed}"
+    run_dir_seed.mkdir(parents=True, exist_ok=True)
+
+    return train_common(
+        data,
+        log_dir=run_dir_seed,
+        load_weights=True,
+        source_dir=source_dir,
+        seed=seed,
+        reproducible=reproducible,
+    )
diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py
index 03afc2f3..04563e9f 100644
--- a/icu_benchmarks/data/preprocess.py
+++ b/icu_benchmarks/data/preprocess.py
@@ -27,6 +27,7 @@ def make_single_split(
     seed: int = 42,
     debug: bool = False,
     fold_size: int = None,
+    test_all: bool = False,
 ) -> dict[dict[pd.DataFrame]]:
     """Randomly split the data into training, validation, and test set.
 
@@ -39,6 +40,7 @@ def make_single_split(
         fold_index: Index of the fold to return.
         seed: Random seed.
         debug: Load less data if true.
+        test_all: If true, the test set will be the entire dataset.
 
     Returns:
         Input data divided into 'train', 'val', and 'test'.
@@ -50,25 +52,42 @@ def make_single_split(
         stays = stays.sample(frac=0.01, random_state=seed)
     labels = data["OUTCOME"][vars["LABEL"]].loc[stays.index]
 
-
-    outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed)
-    dev, test = list(outer_CV.split(stays, labels))[repetition_index]
-
-    if fold_size:
-        test = np.append(test, dev[fold_size:])
-        dev = dev[:fold_size]
-
-    dev_stays = stays.iloc[dev]
-    dev_labels = labels.iloc[dev]
-
-    inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed)
-    train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index]
-
-    split = {
-        "train": dev_stays.iloc[train],
-        "val": dev_stays.iloc[val],
-        "test": stays.iloc[test],
-    }
+    if test_all:
+        split = {
+            "train": stays.iloc[0:0],
+            "val": stays.iloc[0:0],
+            "test": stays,
+        }
+    else:
+        outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed)
+        dev, test = list(outer_CV.split(stays, labels))[repetition_index]
+
+        if fold_size:
+            start_index = 0
+            end_index = fold_size
+            pre_dev = dev[start_index:end_index]
+            leave_for_test = dev[end_index:]
+            pre_dev_labels = labels.iloc[pre_dev]
+            while pre_dev_labels.sum() < cv_folds:
+                start_index += fold_size
+                end_index += fold_size
+                pre_dev = dev[start_index:end_index]
+                pre_dev_labels = labels.iloc[pre_dev]
+                leave_for_test = np.append(dev[0:start_index], dev[end_index:])
+            dev = pre_dev
+            test = np.append(test, leave_for_test)
+
+        dev_stays = stays.iloc[dev]
+        dev_labels = labels.iloc[dev]
+
+        inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed)
+        train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index]
+
+        split = {
+            "train": dev_stays.iloc[train],
+            "val": dev_stays.iloc[val],
+            "test": stays.iloc[test],
+        }
 
     data_split = {}
     for fold_name, fold in split.items():  # Loop through train / val / test
@@ -81,17 +100,24 @@ def make_single_split(
     return data_split
 
 
-def apply_recipe_to_splits(recipe: Recipe, data: dict[dict[pd.DataFrame]], type: str) -> dict[dict[pd.DataFrame]]:
+def apply_recipe_to_splits(
+    recipe: Recipe, data: dict[dict[pd.DataFrame]], type: str, test_all: bool = False
+) -> dict[dict[pd.DataFrame]]:
     """Fits and transforms the training data, then transforms the validation and test data with the recipe.
 
     Args:
         recipe: Object containing info about the data and steps.
         data: Dict containing 'train', 'val', and 'test' and types of data per split.
         type: Whether to apply recipe to dynamic data, static data or outcomes.
+        test_all: If true, the test set will be the entire dataset.
 
     Returns:
         Transformed data divided into 'train', 'val', and 'test'.
     """
+    if test_all:
+        data["test"][type] = recipe.prep(data["test"][type])
+        return data
+
     data["train"][type] = recipe.prep()
     data["val"][type] = recipe.bake(data["val"][type])
     data["test"][type] = recipe.bake(data["test"][type])
@@ -112,6 +138,7 @@ def preprocess_data(
     cv_folds: int = 5,
     fold_size: int = None,
     fold_index: int = 0,
+    test_all: bool = False,
 ) -> dict[dict[pd.DataFrame]]:
     """Perform loading, splitting, imputing and normalising of task data.
 
@@ -127,6 +154,7 @@ def preprocess_data(
         repetition_index: Index of the repetition to return.
         cv_folds: Number of folds to use for cross validation.
         fold_index: Index of the fold to return.
+        test_all: If true, the test set will be the entire dataset.
 
     Returns:
         Preprocessed data as DataFrame in a hierarchical dict with data type (STATIC/DYNAMIC/OUTCOME)
@@ -135,6 +163,8 @@ def preprocess_data(
     cache_dir = data_dir / "cache"
     if fold_size:
         cache_dir = cache_dir / f"T{fold_size}"
+    if test_all:
+        cache_dir = cache_dir / "test_complete"
     dumped_file_names = json.dumps(file_names, sort_keys=True)
     dumped_vars = json.dumps(vars, sort_keys=True)
     config_string = f"{dumped_file_names}{dumped_vars}{use_features}{seed}{repetition_index}{fold_index}{debug}".encode(
@@ -153,7 +183,18 @@ def preprocess_data(
     data = {f: pq.read_table(data_dir / file_names[f]).to_pandas() for f in ["STATIC", "DYNAMIC", "OUTCOME"]}
 
     logging.info("Generating splits.")
-    data = make_single_split(data, vars, cv_repetitions, repetition_index, cv_folds, fold_index, seed=seed, debug=debug, fold_size=fold_size)
+    data = make_single_split(
+        data,
+        vars,
+        cv_repetitions,
+        repetition_index,
+        cv_folds,
+        fold_index,
+        seed=seed,
+        debug=debug,
+        fold_size=fold_size,
+        test_all=test_all,
+    )
 
     logging.info("Preprocessing static data.")
     sta_rec = Recipe(data["train"]["STATIC"], [], vars["STATIC"])
@@ -162,7 +203,7 @@ def preprocess_data(
     sta_rec.add_step(StepSklearn(SimpleImputer(missing_values=None, strategy="most_frequent"), sel=has_type("object")))
     sta_rec.add_step(StepSklearn(LabelEncoder(), sel=has_type("object"), columnwise=True))
 
-    data = apply_recipe_to_splits(sta_rec, data, "STATIC")
+    data = apply_recipe_to_splits(sta_rec, data, "STATIC", test_all=test_all)
 
     logging.info("Preprocessing dynamic data.")
     dyn_rec = Recipe(data["train"]["DYNAMIC"], [], vars["DYNAMIC"], vars["GROUP"], vars["SEQUENCE"])
@@ -176,11 +217,11 @@ def preprocess_data(
     dyn_rec.add_step(StepImputeFill(method="ffill"))
     dyn_rec.add_step(StepImputeFill(value=0))
 
-    data = apply_recipe_to_splits(dyn_rec, data, "DYNAMIC")
+    data = apply_recipe_to_splits(dyn_rec, data, "DYNAMIC", test_all=test_all)
 
     if use_cache and not cache_file.exists():
         if not cache_dir.exists():
-            cache_dir.mkdir()
+            cache_dir.mkdir(parents=True)
         cache_file.touch()
         with open(cache_file, "wb") as f:
             pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index e786ca9e..02b07d03 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -6,7 +6,7 @@
 
 from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters
 from utils.plotting.utils import plot_agg_results
-from icu_benchmarks.cross_validation import execute_repeated_cv
+from icu_benchmarks.cross_validation import execute_repeated_cv, evaluate
 from icu_benchmarks.run_utils import (
     build_parser,
     create_run_dir,
@@ -40,6 +40,16 @@ def main(my_args=tuple(sys.argv[1:])):
         run_dir = create_run_dir(log_dir)
         source_dir = args.source_dir
         gin.parse_config_file(source_dir / "train_config.gin")
+        evaluate(
+            args.data_dir,
+            run_dir,
+            args.seed,
+            source_dir=source_dir,
+            reproducible=reproducible,
+            debug=args.debug,
+            use_cache=args.cache,
+        )
+        return
     else:
         reproducible = args.reproducible
         checkpoint = log_dir / args.checkpoint if args.checkpoint else None

From 405129ad01aec849aa5ddc75efe98b42b35e1a6e Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Wed, 11 Jan 2023 19:17:35 +0100
Subject: [PATCH 010/163] update predict function for booster

---
 icu_benchmarks/models/wrappers.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index 02f23174..5b59a9a2 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -439,4 +439,7 @@ def load_weights(self, load_path):
 
     def predict(self, dataset, weight, seed):
         test_rep, _ = dataset.get_data_and_labels()
-        return self.model.predict_proba(test_rep)
\ No newline at end of file
+        if isinstance(self.model, lightgbm.basic.Booster):  # If we reload a LGBM classifier
+            return self.model.predict(test_rep)
+        else:
+            return self.model.predict_proba(test_rep)

From 7a6b01c37dcac939ab18ef5bb4b01b8ec7de71e5 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Wed, 11 Jan 2023 19:20:09 +0100
Subject: [PATCH 011/163] update domain adaptation script

---
 icu_benchmarks/models/domain_adaptation.py | 177 ++++++++++++++++-----
 icu_benchmarks/models/train.py             |   3 +
 icu_benchmarks/run.py                      |  11 ++
 icu_benchmarks/run_utils.py                |   3 +
 4 files changed, 151 insertions(+), 43 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 7f527848..6959437d 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -7,10 +7,14 @@
 import pandas as pd
 from pathlib import Path
 from skopt import gp_minimize
+from sklearn.metrics import log_loss
 
 from icu_benchmarks.data.loader import RICUDataset
+from icu_benchmarks.data.preprocess import preprocess_data
+from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters
+from icu_benchmarks.models.train import train_common
 from icu_benchmarks.models.wrappers import MLWrapper
-from icu_benchmarks.models.utils import save_config_file
+from icu_benchmarks.run_utils import log_full_line
 
 
 def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, model_dir: Path, log_dir: Path):
@@ -25,6 +29,7 @@ def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, mod
     Returns:
         Tuple of predictions and labels.
     """
+    model = MLWrapper()
     model.set_log_dir(log_dir)
     if (model_dir / "model.torch").is_file():
         model.load_weights(model_dir / "model.torch")
@@ -34,11 +39,10 @@ def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, mod
         model.load_weights(model_dir / "model.joblib")
     else:
         raise Exception("No weights to load at path : {}".format(model_dir / "model.*"))
-    return model.predict(dataset)
+    return model.predict(dataset, None, None)
 
 
-@gin.configurable("domain_adaptation")
-def evaluate_model_combination(
+def get_predictions_for_all_models(
     data: dict[str, pd.DataFrame],
     log_dir: Path,
     source_dir: Path = None,
@@ -46,8 +50,8 @@ def evaluate_model_combination(
     reproducible: bool = True,
     model: object = MLWrapper,
     weight: str = None,
-    test_on: str = "Test",
-
+    test_on: str = "test",
+    target_model: object = None,
 ):
     """Common wrapper to train all benchmarked models.
 
@@ -58,6 +62,7 @@ def evaluate_model_combination(
         seed: Common seed used for any random operation.
         reproducible: If set to true, set torch to run reproducibly.
     """
+    model = MLWrapper()
 
     # Setting the seed before gin parsing
     os.environ["PYTHONHASHSEED"] = str(seed)
@@ -71,36 +76,56 @@ def evaluate_model_combination(
         torch.backends.cudnn.deterministic = True
         torch.backends.cudnn.benchmark = False
 
-    dataset = RICUDataset(data, split="train")
     test_dataset = RICUDataset(data, split=test_on)
-    weight = dataset.get_balance()
+    val_dataset = RICUDataset(data, split="val")
+    # weight = test_dataset.get_balance()
+    _, val_labels = val_dataset.get_data_and_labels()
+    _, test_labels = test_dataset.get_data_and_labels()
 
-    predictions = []
-    for source_dataset in source_dir.iterdir():
-        model_dir = source_dir / source_dataset
-        predictions.append(get_predictions_for_single_model(model, dataset, model_dir, log_dir))
+    val_predictions = {}
+    test_predictions = {}
+    for model_dir in source_dir.iterdir():
+        if model_dir.is_dir():
+            val_predictions[model_dir.name] = get_predictions_for_single_model(model, val_dataset, model_dir, log_dir)
+            test_predictions[model_dir.name] = get_predictions_for_single_model(model, test_dataset, model_dir, log_dir)
+    val_predictions["target"] = target_model.output_transform(target_model.predict(val_dataset, None, None))
+    test_predictions["target"] = target_model.output_transform(target_model.predict(test_dataset, None, None))
 
-    test_pred = np.average(predictions, axis=0, weights=dataset_weights)
+    return val_predictions, val_labels, test_predictions, test_labels
 
-    # save config file again to capture missing gin parameters
-    return log_loss(test_label, test_pred)
 
+def get_model_metrics(test_predictions: np.ndarray, test_labels: np.ndarray):
+    """Evaluate a combination of models.
 
-@gin.configurable("tune_hyperparameters")
-def choose_and_bind_hyperparameters(
+    Args:
+        test_predictions: Predictions for test set.
+        test_labels: Labels for test set.
+    """
+    model = MLWrapper()
+    model.set_metrics(test_labels)
+    test_metric_results = {}
+    for name, metric in model.metrics.items():
+        value = metric(model.label_transform(test_labels), test_predictions)
+        test_metric_results[name] = value
+        # Only log float values
+        if isinstance(value, np.float):
+            logging.info("test {}: {}".format(name, value))
+    return test_metric_results
+
+
+def domain_adaptation(
     data_dir: Path,
-    log_dir: Path,
+    run_dir: Path,
     seed: int,
-    n_initial_points: int = 3,
-    n_calls: int = 20,
-    folds_to_tune_on: int = gin.REQUIRED,
+    n_initial_points: int = 10,
+    n_calls: int = 50,
     debug: bool = False,
 ):
     """Choose hyperparameters to tune and bind them to gin.
 
     Args:
         data_dir: Path to the data directory.
-        log_dir: Path to the log directory.
+        run_dir: Path to the log directory.
         seed: Random seed.
         n_initial_points: Number of initial points to explore.
         n_calls: Number of iterations to optimize the hyperparameters.
@@ -111,24 +136,90 @@ def choose_and_bind_hyperparameters(
         ValueError: If checkpoint is not None and the checkpoint does not exist.
     """
 
-    def convex_model_combination(hyperparams):
-        return preprocess_and_train_for_folds(
-            data_dir,
-            Path(temp_dir),
-            seed,
-            num_folds_to_train=folds_to_tune_on,
-            use_cache=True,
-            test_on="val",
-            debug=debug,
-        )
-
-    res = gp_minimize(
-        bind_params_and_train,
-        hyperparams_bounds,
-        n_calls=n_calls,
-        n_initial_points=n_initial_points,
-        random_state=seed,
-        noise=1e-10,  # the models are deterministic, but noise is needed for the gp to work
-    )
-
-    print(res)
+    # train target baselines
+    
+    
+    agg_loss = 0
+    cv_repetitions = 5
+    cv_repetitions_to_train = 5
+    cv_folds = 5
+    cv_folds_to_train = 5
+    datasets = ["hirid", "eicu", "aumc", "miiv"]
+    weight_bounds = ((0.0001, 1.0) for _ in range(len(datasets)))
+    task_dir = Path("../data/mortality24/")
+
+    # evaluate models on same test split
+    for dataset in datasets:
+        log_full_line(f"STARTING {dataset}", char="#", num_newlines=2)
+        choose_and_bind_hyperparameters(True, task_dir / dataset, run_dir, seed, debug=debug)
+        for repetition in range(cv_repetitions_to_train):
+            for fold_index in range(cv_folds_to_train):
+                data = preprocess_data(
+                    data_dir,
+                    seed=seed,
+                    debug=debug,
+                    use_cache=True,
+                    cv_repetitions=cv_repetitions,
+                    repetition_index=repetition,
+                    cv_folds=cv_folds,
+                    fold_index=fold_index,
+                )
+
+                run_dir_seed = run_dir / f"seed_{seed}" / f"fold_{fold_index}"
+                run_dir_seed.mkdir(parents=True, exist_ok=True)
+
+                # evaluate target baselines
+                curr_loss, target_model = train_common(
+                    data,
+                    log_dir=run_dir_seed,
+                    seed=seed,
+                    reproducible=True,
+                    test_on="test",
+                    return_model=True,
+                )
+                agg_loss += curr_loss
+
+                val_predictions, val_labels, test_predictions, test_labels = get_predictions_for_all_models(
+                    data,
+                    run_dir,
+                    source_dir=Path("../models/best_models/Mortality24/LGBMClassifier"),
+                    seed=seed,
+                    target_model=target_model,
+                )
+
+                # evaluate source baselines and oracle
+                for source in datasets:
+                    if source == dataset:
+                        continue
+                    logging.info("Evaluating model: {}".format(source))
+                    get_model_metrics(test_predictions[source], test_labels)
+
+                # evaluate convex combination of models
+                val_predictions_wo_oracle = [pred for source, pred in val_predictions.items() if source != dataset]
+                test_predictions_wo_oracle = [pred for source, pred in test_predictions.items() if source != dataset]
+                def convex_model_combination(model_weights):
+                    val_pred = np.average(val_predictions_wo_oracle, axis=0, weights=model_weights)
+                    return log_loss(val_labels, val_pred)
+
+                logging.disable(logging.INFO)
+                res = gp_minimize(
+                    convex_model_combination,
+                    weight_bounds,
+                    n_calls=n_calls,
+                    n_initial_points=n_initial_points,
+                    random_state=seed,
+                    noise=1e-10,  # the models are deterministic, but noise is needed for the gp to work
+                )
+                logging.disable(logging.NOTSET)
+                best_model_weights = res.x
+                logging.info(best_model_weights)
+                test_pred = np.average(test_predictions_wo_oracle, axis=0, weights=best_model_weights)
+                get_model_metrics(test_pred, test_labels)
+
+
+                log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
+            log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
+        log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10)
+
+    return agg_loss / (cv_repetitions_to_train * cv_folds_to_train)
+ 
\ No newline at end of file
diff --git a/icu_benchmarks/models/train.py b/icu_benchmarks/models/train.py
index dc38f9bc..478dded0 100644
--- a/icu_benchmarks/models/train.py
+++ b/icu_benchmarks/models/train.py
@@ -24,6 +24,7 @@ def train_common(
     model: object = MLWrapper,
     weight: str = None,
     test_on: str = "test",
+    return_model: bool = False,
 ):
     """Common wrapper to train all benchmarked models.
 
@@ -75,4 +76,6 @@ def train_common(
 
     # save config file again to capture missing gin parameters
     save_config_file(log_dir)
+    if return_model:
+        return model.test(test_dataset, weight, seed), model
     return model.test(test_dataset, weight, seed)
diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index 02b07d03..266bef93 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 
 from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters
+from icu_benchmarks.models.domain_adaptation import domain_adaptation
 from utils.plotting.utils import plot_agg_results
 from icu_benchmarks.cross_validation import execute_repeated_cv, evaluate
 from icu_benchmarks.run_utils import (
@@ -50,6 +51,16 @@ def main(my_args=tuple(sys.argv[1:])):
             use_cache=args.cache,
         )
         return
+    if args.command == "da":
+        run_dir = create_run_dir(log_dir)
+        gin_config_files = (
+            [Path(f"configs/experiments/{args.experiment}.gin")]
+            if args.experiment
+            else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")]
+        )
+        gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False)
+        domain_adaptation(args.data_dir, run_dir, args.seed)
+        return
     else:
         reproducible = args.reproducible
         checkpoint = log_dir / args.checkpoint if args.checkpoint else None
diff --git a/icu_benchmarks/run_utils.py b/icu_benchmarks/run_utils.py
index 0901df59..29dc5378 100644
--- a/icu_benchmarks/run_utils.py
+++ b/icu_benchmarks/run_utils.py
@@ -48,6 +48,9 @@ def build_parser() -> ArgumentParser:
     evaluate.add_argument("-sn", "--source-name", required=True, type=Path, help="Name of the source dataset.")
     evaluate.add_argument("--source-dir", required=True, type=Path, help="Directory containing gin and model weights.")
 
+    # DOMAIN ADAPTATION ARGUMENTS
+    prep_and_train = subparsers.add_parser("da", help="Run DA experiment.", parents=[parent_parser])
+
     return parser
 
 

From 7515597498319dfd5a787dc16ddcf860691cc7ad Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Wed, 11 Jan 2023 23:28:50 +0100
Subject: [PATCH 012/163] remove weight tuning, adapt for LR

---
 icu_benchmarks/models/domain_adaptation.py | 103 ++++++++-------------
 icu_benchmarks/run.py                      |   2 +-
 2 files changed, 42 insertions(+), 63 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 6959437d..83494b36 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -17,11 +17,11 @@
 from icu_benchmarks.run_utils import log_full_line
 
 
-def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, model_dir: Path, log_dir: Path):
+def get_predictions_for_single_model(target_model: object, dataset: RICUDataset, model_dir: Path, log_dir: Path):
     """Get predictions for a single model.
 
     Args:
-        model: Model to get predictions for.
+        target_model: Model to get predictions for.
         dataset: Dataset to get predictions for.
         model_dir: Path to directory where model weights are stored.
         log_dir: Path to directory where model output should be saved.
@@ -29,7 +29,7 @@ def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, mod
     Returns:
         Tuple of predictions and labels.
     """
-    model = MLWrapper()
+    model = MLWrapper(target_model.model)
     model.set_log_dir(log_dir)
     if (model_dir / "model.torch").is_file():
         model.load_weights(model_dir / "model.torch")
@@ -43,15 +43,14 @@ def get_predictions_for_single_model(model: MLWrapper, dataset: RICUDataset, mod
 
 
 def get_predictions_for_all_models(
+    target_model: object,
     data: dict[str, pd.DataFrame],
     log_dir: Path,
     source_dir: Path = None,
     seed: int = 1234,
     reproducible: bool = True,
-    model: object = MLWrapper,
-    weight: str = None,
     test_on: str = "test",
-    target_model: object = None,
+    source_datasets: list = None,
 ):
     """Common wrapper to train all benchmarked models.
 
@@ -62,8 +61,6 @@ def get_predictions_for_all_models(
         seed: Common seed used for any random operation.
         reproducible: If set to true, set torch to run reproducibly.
     """
-    model = MLWrapper()
-
     # Setting the seed before gin parsing
     os.environ["PYTHONHASHSEED"] = str(seed)
     random.seed(seed)
@@ -77,32 +74,28 @@ def get_predictions_for_all_models(
         torch.backends.cudnn.benchmark = False
 
     test_dataset = RICUDataset(data, split=test_on)
-    val_dataset = RICUDataset(data, split="val")
-    # weight = test_dataset.get_balance()
-    _, val_labels = val_dataset.get_data_and_labels()
     _, test_labels = test_dataset.get_data_and_labels()
 
-    val_predictions = {}
     test_predictions = {}
-    for model_dir in source_dir.iterdir():
-        if model_dir.is_dir():
-            val_predictions[model_dir.name] = get_predictions_for_single_model(model, val_dataset, model_dir, log_dir)
-            test_predictions[model_dir.name] = get_predictions_for_single_model(model, test_dataset, model_dir, log_dir)
-    val_predictions["target"] = target_model.output_transform(target_model.predict(val_dataset, None, None))
+    for source in source_datasets:
+        model_dir = source_dir / source
+        test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir)
     test_predictions["target"] = target_model.output_transform(target_model.predict(test_dataset, None, None))
 
-    return val_predictions, val_labels, test_predictions, test_labels
+    for name, prediction in test_predictions.items():
+        if prediction.ndim == 2:
+            test_predictions[name] = prediction[:, 1]
+
+    return test_predictions, test_labels
 
 
-def get_model_metrics(test_predictions: np.ndarray, test_labels: np.ndarray):
+def get_model_metrics(model: object, test_predictions: np.ndarray, test_labels: np.ndarray):
     """Evaluate a combination of models.
 
     Args:
         test_predictions: Predictions for test set.
         test_labels: Labels for test set.
     """
-    model = MLWrapper()
-    model.set_metrics(test_labels)
     test_metric_results = {}
     for name, metric in model.metrics.items():
         value = metric(model.label_transform(test_labels), test_predictions)
@@ -114,11 +107,10 @@ def get_model_metrics(test_predictions: np.ndarray, test_labels: np.ndarray):
 
 
 def domain_adaptation(
-    data_dir: Path,
     run_dir: Path,
     seed: int,
-    n_initial_points: int = 10,
-    n_calls: int = 50,
+    task: str = None,
+    model: str = None,
     debug: bool = False,
 ):
     """Choose hyperparameters to tune and bind them to gin.
@@ -135,23 +127,22 @@ def domain_adaptation(
     Raises:
         ValueError: If checkpoint is not None and the checkpoint does not exist.
     """
-
-    # train target baselines
-    
-    
     agg_loss = 0
     cv_repetitions = 5
-    cv_repetitions_to_train = 5
+    cv_repetitions_to_train = 3
     cv_folds = 5
     cv_folds_to_train = 5
-    datasets = ["hirid", "eicu", "aumc", "miiv"]
-    weight_bounds = ((0.0001, 1.0) for _ in range(len(datasets)))
-    task_dir = Path("../data/mortality24/")
+    datasets = ["hirid", "aumc", "miiv"]
+    weights = [1] * (len(datasets) - 1) + [1]
+    task_dir = Path("../data/") / task
+    model_path = Path("../models/best_models/")
 
     # evaluate models on same test split
     for dataset in datasets:
+        data_dir = task_dir / dataset
+        source_datasets = [d for d in datasets if d != dataset]
         log_full_line(f"STARTING {dataset}", char="#", num_newlines=2)
-        choose_and_bind_hyperparameters(True, task_dir / dataset, run_dir, seed, debug=debug)
+        choose_and_bind_hyperparameters(True, data_dir, run_dir, seed, debug=debug)
         for repetition in range(cv_repetitions_to_train):
             for fold_index in range(cv_folds_to_train):
                 data = preprocess_data(
@@ -179,43 +170,31 @@ def domain_adaptation(
                 )
                 agg_loss += curr_loss
 
-                val_predictions, val_labels, test_predictions, test_labels = get_predictions_for_all_models(
+                test_predictions, test_labels = get_predictions_for_all_models(
+                    target_model,
                     data,
                     run_dir,
-                    source_dir=Path("../models/best_models/Mortality24/LGBMClassifier"),
+                    source_dir=model_path / task / model,
                     seed=seed,
-                    target_model=target_model,
+                    source_datasets=source_datasets,
                 )
 
-                # evaluate source baselines and oracle
-                for source in datasets:
-                    if source == dataset:
-                        continue
+                # evaluate source baselines
+                for source in source_datasets:
                     logging.info("Evaluating model: {}".format(source))
-                    get_model_metrics(test_predictions[source], test_labels)
+                    get_model_metrics(target_model, test_predictions[source], test_labels)
 
                 # evaluate convex combination of models
-                val_predictions_wo_oracle = [pred for source, pred in val_predictions.items() if source != dataset]
-                test_predictions_wo_oracle = [pred for source, pred in test_predictions.items() if source != dataset]
-                def convex_model_combination(model_weights):
-                    val_pred = np.average(val_predictions_wo_oracle, axis=0, weights=model_weights)
-                    return log_loss(val_labels, val_pred)
-
-                logging.disable(logging.INFO)
-                res = gp_minimize(
-                    convex_model_combination,
-                    weight_bounds,
-                    n_calls=n_calls,
-                    n_initial_points=n_initial_points,
-                    random_state=seed,
-                    noise=1e-10,  # the models are deterministic, but noise is needed for the gp to work
-                )
-                logging.disable(logging.NOTSET)
-                best_model_weights = res.x
-                logging.info(best_model_weights)
-                test_pred = np.average(test_predictions_wo_oracle, axis=0, weights=best_model_weights)
-                get_model_metrics(test_pred, test_labels)
-
+                test_predictions_list = list(test_predictions.values())
+
+                logging.info("Evaluating convex combination of models.")
+                target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
+                weights = [1] * (len(datasets) - 1)
+                for t in target_weights:
+                    w = weights + [t * sum(weights)]
+                    logging.info(f"Evaluating target weight: {t}")
+                    test_pred = np.average(test_predictions_list, axis=0, weights=w)
+                    get_model_metrics(target_model, test_pred, test_labels)
 
                 log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
             log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index 266bef93..806586cc 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -59,7 +59,7 @@ def main(my_args=tuple(sys.argv[1:])):
             else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")]
         )
         gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False)
-        domain_adaptation(args.data_dir, run_dir, args.seed)
+        domain_adaptation(run_dir, args.seed, args.task_name, model)
         return
     else:
         reproducible = args.reproducible

From 0789bcfbe67147356cc4fa810dbe8216095f6b82 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Wed, 11 Jan 2023 23:45:51 +0100
Subject: [PATCH 013/163] iterate over target sizes

---
 icu_benchmarks/models/domain_adaptation.py | 110 ++++++++++-----------
 icu_benchmarks/models/train.py             |   2 +-
 2 files changed, 54 insertions(+), 58 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 83494b36..5b3e75da 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -127,11 +127,11 @@ def domain_adaptation(
     Raises:
         ValueError: If checkpoint is not None and the checkpoint does not exist.
     """
-    agg_loss = 0
     cv_repetitions = 5
     cv_repetitions_to_train = 3
     cv_folds = 5
     cv_folds_to_train = 5
+    target_sizes = [500, 1000, 2000]
     datasets = ["hirid", "aumc", "miiv"]
     weights = [1] * (len(datasets) - 1) + [1]
     task_dir = Path("../data/") / task
@@ -142,62 +142,58 @@ def domain_adaptation(
         data_dir = task_dir / dataset
         source_datasets = [d for d in datasets if d != dataset]
         log_full_line(f"STARTING {dataset}", char="#", num_newlines=2)
-        choose_and_bind_hyperparameters(True, data_dir, run_dir, seed, debug=debug)
-        for repetition in range(cv_repetitions_to_train):
-            for fold_index in range(cv_folds_to_train):
-                data = preprocess_data(
-                    data_dir,
-                    seed=seed,
-                    debug=debug,
-                    use_cache=True,
-                    cv_repetitions=cv_repetitions,
-                    repetition_index=repetition,
-                    cv_folds=cv_folds,
-                    fold_index=fold_index,
-                )
-
-                run_dir_seed = run_dir / f"seed_{seed}" / f"fold_{fold_index}"
-                run_dir_seed.mkdir(parents=True, exist_ok=True)
-
-                # evaluate target baselines
-                curr_loss, target_model = train_common(
-                    data,
-                    log_dir=run_dir_seed,
-                    seed=seed,
-                    reproducible=True,
-                    test_on="test",
-                    return_model=True,
-                )
-                agg_loss += curr_loss
-
-                test_predictions, test_labels = get_predictions_for_all_models(
-                    target_model,
-                    data,
-                    run_dir,
-                    source_dir=model_path / task / model,
-                    seed=seed,
-                    source_datasets=source_datasets,
-                )
-
-                # evaluate source baselines
-                for source in source_datasets:
-                    logging.info("Evaluating model: {}".format(source))
-                    get_model_metrics(target_model, test_predictions[source], test_labels)
-
-                # evaluate convex combination of models
-                test_predictions_list = list(test_predictions.values())
-
-                logging.info("Evaluating convex combination of models.")
-                target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
-                weights = [1] * (len(datasets) - 1)
-                for t in target_weights:
-                    w = weights + [t * sum(weights)]
-                    logging.info(f"Evaluating target weight: {t}")
-                    test_pred = np.average(test_predictions_list, axis=0, weights=w)
-                    get_model_metrics(target_model, test_pred, test_labels)
-
-                log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
-            log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
+        for target_size in target_sizes:
+            log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1)
+            gin.bind_parameter("preprocess.fold_size", target_size)
+            choose_and_bind_hyperparameters(True, data_dir, run_dir, seed, debug=debug)
+            for repetition in range(cv_repetitions_to_train):
+                for fold_index in range(cv_folds_to_train):
+                    data = preprocess_data(
+                        data_dir,
+                        seed=seed,
+                        debug=debug,
+                        use_cache=True,
+                        cv_repetitions=cv_repetitions,
+                        repetition_index=repetition,
+                        cv_folds=cv_folds,
+                        fold_index=fold_index,
+                    )
+
+                    run_dir_seed = run_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}"
+                    run_dir_seed.mkdir(parents=True, exist_ok=True)
+
+                    # evaluate target baselines
+                    target_model = train_common(data, log_dir=run_dir_seed, seed=seed, return_model=True)
+                    
+                    test_predictions, test_labels = get_predictions_for_all_models(
+                        target_model,
+                        data,
+                        run_dir,
+                        source_dir=model_path / task / model,
+                        seed=seed,
+                        source_datasets=source_datasets,
+                    )
+
+                    # evaluate source baselines
+                    for baseline, predictions in test_predictions.items():
+                        logging.info("Evaluating model: {}".format(baseline))
+                        get_model_metrics(target_model, predictions, test_labels)
+
+                    # evaluate convex combination of models
+                    test_predictions_list = list(test_predictions.values())
+
+                    logging.info("Evaluating convex combination of models.")
+                    target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
+                    weights = [1] * (len(datasets) - 1)
+                    for t in target_weights:
+                        w = weights + [t * sum(weights)]
+                        logging.info(f"Evaluating target weight: {t}")
+                        test_pred = np.average(test_predictions_list, axis=0, weights=w)
+                        get_model_metrics(target_model, test_pred, test_labels)
+
+                    log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
+                log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
+            log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5)
         log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10)
 
     return agg_loss / (cv_repetitions_to_train * cv_folds_to_train)
diff --git a/icu_benchmarks/models/train.py b/icu_benchmarks/models/train.py
index 478dded0..c81189d1 100644
--- a/icu_benchmarks/models/train.py
+++ b/icu_benchmarks/models/train.py
@@ -77,5 +77,5 @@ def train_common(
     # save config file again to capture missing gin parameters
     save_config_file(log_dir)
     if return_model:
-        return model.test(test_dataset, weight, seed), model
+        return model
     return model.test(test_dataset, weight, seed)

From aa3345a0ae685a789e9e485d40586a5d4f83f945 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 00:44:41 +0100
Subject: [PATCH 014/163] aggregate and average DA metrics

---
 icu_benchmarks/models/domain_adaptation.py | 58 +++++++++++++++++-----
 icu_benchmarks/run.py                      |  2 +-
 2 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 5b3e75da..ab66b314 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -1,3 +1,4 @@
+import json
 import os
 import random
 import gin
@@ -6,14 +7,14 @@
 import numpy as np
 import pandas as pd
 from pathlib import Path
-from skopt import gp_minimize
-from sklearn.metrics import log_loss
+import scipy.stats as stats
 
 from icu_benchmarks.data.loader import RICUDataset
 from icu_benchmarks.data.preprocess import preprocess_data
 from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters
 from icu_benchmarks.models.train import train_common
 from icu_benchmarks.models.wrappers import MLWrapper
+from icu_benchmarks.models.utils import JsonNumpyEncoder
 from icu_benchmarks.run_utils import log_full_line
 
 
@@ -128,9 +129,9 @@ def domain_adaptation(
         ValueError: If checkpoint is not None and the checkpoint does not exist.
     """
     cv_repetitions = 5
-    cv_repetitions_to_train = 3
+    cv_repetitions_to_train = 2
     cv_folds = 5
-    cv_folds_to_train = 5
+    cv_folds_to_train = 2
     target_sizes = [500, 1000, 2000]
     datasets = ["hirid", "aumc", "miiv"]
     weights = [1] * (len(datasets) - 1) + [1]
@@ -145,9 +146,15 @@ def domain_adaptation(
         for target_size in target_sizes:
             log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1)
             gin.bind_parameter("preprocess.fold_size", target_size)
-            choose_and_bind_hyperparameters(True, data_dir, run_dir, seed, debug=debug)
+            log_dir = run_dir / task / dataset / f"target_{target_size}"
+            log_dir.mkdir(parents=True, exist_ok=True)
+            choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
+            results = {}
             for repetition in range(cv_repetitions_to_train):
                 for fold_index in range(cv_folds_to_train):
+                    results[f"{repetition}_{fold_index}"] = {}
+                    fold_results = results[f"{repetition}_{fold_index}"]
+
                     data = preprocess_data(
                         data_dir,
                         seed=seed,
@@ -159,16 +166,16 @@ def domain_adaptation(
                         fold_index=fold_index,
                     )
 
-                    run_dir_seed = run_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}"
-                    run_dir_seed.mkdir(parents=True, exist_ok=True)
+                    log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}"
+                    log_dir_fold.mkdir(parents=True, exist_ok=True)
 
                     # evaluate target baselines
-                    target_model = train_common(data, log_dir=run_dir_seed, seed=seed, return_model=True)
+                    target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
                     
                     test_predictions, test_labels = get_predictions_for_all_models(
                         target_model,
                         data,
-                        run_dir,
+                        log_dir_fold,
                         source_dir=model_path / task / model,
                         seed=seed,
                         source_datasets=source_datasets,
@@ -177,7 +184,8 @@ def domain_adaptation(
                     # evaluate source baselines
                     for baseline, predictions in test_predictions.items():
                         logging.info("Evaluating model: {}".format(baseline))
-                        get_model_metrics(target_model, predictions, test_labels)
+                        fold_results[baseline] = get_model_metrics(target_model, predictions, test_labels)
+
 
                     # evaluate convex combination of models
                     test_predictions_list = list(test_predictions.values())
@@ -189,12 +197,36 @@ def domain_adaptation(
                         w = weights + [t * sum(weights)]
                         logging.info(f"Evaluating target weight: {t}")
                         test_pred = np.average(test_predictions_list, axis=0, weights=w)
-                        get_model_metrics(target_model, test_pred, test_labels)
+                        fold_results[f"convex_combination_{t}"] = get_model_metrics(target_model, test_pred, test_labels)
 
                     log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
                 log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
             log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5)
-        log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10)
 
-    return agg_loss / (cv_repetitions_to_train * cv_folds_to_train)
+            source_metrics = {}
+            for result in results.values():
+                for source, source_stats in result.items():
+                    for metric, score in source_stats.items():
+                        if isinstance(score, (float, int)):
+                            source_metrics.setdefault(source, {}).setdefault(metric, []).append(score)
+
+            # Compute statistical metric over aggregated results
+            averaged_metrics = {}
+            for source, source_stats in source_metrics.items():
+                for metric, scores in source_stats.items():
+                    averaged_metrics.setdefault(source, {}).setdefault(metric, []).append({
+                        "avg": np.mean(scores),
+                        "std": np.std(scores),
+                        "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)),
+                    })
+
+            with open(log_dir / "aggregated_source_metrics.json", "w") as f:
+                json.dump(results, f, cls=JsonNumpyEncoder)
+
+            with open(log_dir / "averaged_source_metrics.json", "w") as f:
+                json.dump(averaged_metrics, f, cls=JsonNumpyEncoder)
+
+            logging.info(f"Averaged results: {averaged_metrics}")
+
+        log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10)
  
\ No newline at end of file
diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index 806586cc..e1ec52c2 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -52,7 +52,7 @@ def main(my_args=tuple(sys.argv[1:])):
         )
         return
     if args.command == "da":
-        run_dir = create_run_dir(log_dir)
+        run_dir = create_run_dir(args.log_dir)
         gin_config_files = (
             [Path(f"configs/experiments/{args.experiment}.gin")]
             if args.experiment

From 3da7f6b7ef46b6f10b5c3b9718fb1665ee6ea96d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 00:45:35 +0100
Subject: [PATCH 015/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index ab66b314..ff6437d6 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -201,7 +201,6 @@ def domain_adaptation(
 
                     log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
                 log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
-            log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5)
 
             source_metrics = {}
             for result in results.values():
@@ -227,6 +226,7 @@ def domain_adaptation(
                 json.dump(averaged_metrics, f, cls=JsonNumpyEncoder)
 
             logging.info(f"Averaged results: {averaged_metrics}")
+            log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5)
 
         log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10)
  
\ No newline at end of file

From 6f6a82eb597237ad5749eef8e50010c14d9ed4a4 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 00:57:54 +0100
Subject: [PATCH 016/163] use data_dir for da

---
 icu_benchmarks/models/domain_adaptation.py | 3 ++-
 icu_benchmarks/run.py                      | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index ff6437d6..8b3006ed 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -108,6 +108,7 @@ def get_model_metrics(model: object, test_predictions: np.ndarray, test_labels:
 
 
 def domain_adaptation(
+    data_dir: Path,
     run_dir: Path,
     seed: int,
     task: str = None,
@@ -135,7 +136,7 @@ def domain_adaptation(
     target_sizes = [500, 1000, 2000]
     datasets = ["hirid", "aumc", "miiv"]
     weights = [1] * (len(datasets) - 1) + [1]
-    task_dir = Path("../data/") / task
+    task_dir = data_dir / task
     model_path = Path("../models/best_models/")
 
     # evaluate models on same test split
diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index e1ec52c2..af803f24 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -59,7 +59,7 @@ def main(my_args=tuple(sys.argv[1:])):
             else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")]
         )
         gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False)
-        domain_adaptation(run_dir, args.seed, args.task_name, model)
+        domain_adaptation(args.data_dir, run_dir, args.seed, args.task_name, model)
         return
     else:
         reproducible = args.reproducible

From ccb8bbe0c598d9049858fa3a148b91f0112b9921 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 01:27:25 +0100
Subject: [PATCH 017/163] disable confusion matrix

---
 icu_benchmarks/models/metric_constants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/metric_constants.py b/icu_benchmarks/models/metric_constants.py
index 97d64a08..cebd62b4 100644
--- a/icu_benchmarks/models/metric_constants.py
+++ b/icu_benchmarks/models/metric_constants.py
@@ -51,7 +51,7 @@ class DLMetrics:
     BINARY_CLASSIFICATION = {
         "AUC": ROC_AUC(),
         "Calibration_Curve": CalibrationCurve(),
-        "Confusion_Matrix": ConfusionMatrix(num_classes=2),
+        # "Confusion_Matrix": ConfusionMatrix(num_classes=2),
         "PR": AveragePrecision(),
         "PRC": PrecisionRecallCurve(),
         "ROC": RocCurve(),

From 1b6319f7f8f40a9fa4a4740ca04c62f1519bbc05 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 01:30:50 +0100
Subject: [PATCH 018/163] Update Transformer.gin

---
 configs/models/Transformer.gin | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin
index 7e89cf2a..42faa756 100644
--- a/configs/models/Transformer.gin
+++ b/configs/models/Transformer.gin
@@ -35,6 +35,6 @@ model/hyperparameter.dropout = (0.0, 0.4)
 model/hyperparameter.dropout_att = (0.0, 0.4)
 
 tune_hyperparameters.scopes = ["model", "optimizer"]
-tune_hyperparameters.n_initial_points = 5
-tune_hyperparameters.n_calls = 30
+tune_hyperparameters.n_initial_points = 2
+tune_hyperparameters.n_calls = 4
 tune_hyperparameters.folds_to_tune_on = 2

From bb297d809ec43be73e853dcec7da63d47d14a23c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 01:39:41 +0100
Subject: [PATCH 019/163] rename encoder to model in wrapper

---
 icu_benchmarks/models/wrappers.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index 5b59a9a2..5ef4a0e6 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -55,10 +55,10 @@ def __init__(self, encoder=LSTMNet, loss=torch.nn.functional.cross_entropy, opti
         self.pin_memory = pin_memory
         self.n_worker = n_worker
 
-        self.encoder = encoder
-        self.encoder.to(device)
+        self.model = encoder
+        self.model.to(device)
         self.loss = loss
-        self.optimizer = optimizer_fn(self.encoder.parameters())
+        self.optimizer = optimizer_fn(self.model.parameters())
         self.scaler = None
 
     def set_log_dir(self, log_dir: Path):
@@ -82,12 +82,12 @@ def softmax_multi_output_transform(output):
 
         # Binary classification
         # output transform is not applied for contrib metrics so we do our own.
-        if self.encoder.logit.out_features == 2:
+        if self.model.logit.out_features == 2:
             self.output_transform = softmax_binary_output_transform
             self.metrics = DLMetrics.BINARY_CLASSIFICATION
 
         # Regression
-        elif self.encoder.logit.out_features == 1:
+        elif self.model.logit.out_features == 1:
             self.output_transform = lambda x: x
             if self.scaler is not None:
                 self.metrics = {"MAE": MAE(invert_transform=self.scaler.inverse_transform)}
@@ -119,7 +119,7 @@ def step_fn(self, element, loss_weight=None):
                 data = data.float().to(self.device)
         else:
             raise Exception("Loader should return either (data, label) or (data, label, mask)")
-        out = self.encoder(data)
+        out = self.model(data)
         if len(out) == 2 and isinstance(out, tuple):
             out, aux_loss = out
         else:
@@ -135,7 +135,7 @@ def step_fn(self, element, loss_weight=None):
 
     def _do_training(self, train_loader, weight, metrics):
         # Training epoch
-        self.encoder.train()
+        self.model.train()
         agg_train_loss = 0
         for elem in tqdm(train_loader, leave=False):
             loss, preds, target = self.step_fn(elem, weight)
@@ -265,7 +265,7 @@ def test(self, dataset, weight, seed):
         return test_loss
 
     def evaluate(self, eval_loader, metrics, weight):
-        self.encoder.eval()
+        self.model.eval()
         agg_eval_loss = 0
 
         with torch.no_grad():
@@ -283,10 +283,10 @@ def evaluate(self, eval_loader, metrics, weight):
         return eval_loss, eval_metric_results
 
     def save_weights(self, epoch, save_path):
-        save_model(self.encoder, self.optimizer, epoch, save_path)
+        save_model(self.model, self.optimizer, epoch, save_path)
 
     def load_weights(self, load_path):
-        load_model_state(load_path, self.encoder, optimizer=self.optimizer)
+        load_model_state(load_path, self.model, optimizer=self.optimizer)
 
     def predict(self, dataset, weight, seed):
         self.set_metrics()
@@ -295,7 +295,7 @@ def predict(self, dataset, weight, seed):
             weight = torch.FloatTensor(weight).to(self.device)
         test_loss, test_metrics = self.evaluate(test_loader, self.metrics, weight)
 
-        self.encoder.eval()
+        self.model.eval()
         all_preds = []
         with torch.no_grad():
             for elem in test_loader:

From 119ad353b0d927b764f5c466e1de8eaec1e79ce7 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 01:46:38 +0100
Subject: [PATCH 020/163] fix model path

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 8b3006ed..f43b966c 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -137,7 +137,7 @@ def domain_adaptation(
     datasets = ["hirid", "aumc", "miiv"]
     weights = [1] * (len(datasets) - 1) + [1]
     task_dir = data_dir / task
-    model_path = Path("../models/best_models/")
+    model_path = Path("../yaib_models/best_models/")
 
     # evaluate models on same test split
     for dataset in datasets:

From 7ffbb4bfdc0b649e1a4983ed41428850c293833d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 01:46:42 +0100
Subject: [PATCH 021/163] Update Transformer.gin

---
 configs/models/Transformer.gin | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin
index 42faa756..7085e672 100644
--- a/configs/models/Transformer.gin
+++ b/configs/models/Transformer.gin
@@ -14,7 +14,7 @@ DLWrapper.optimizer_fn = @Adam
 
 DLWrapper.train.epochs = 1000
 DLWrapper.train.batch_size = 64
-DLWrapper.train.patience = 10
+DLWrapper.train.patience = 5
 DLWrapper.train.min_delta = 1e-4
 
 # Optimizer params

From 12036b5b9e87cd9a2efb41af5ae7b8eedd8e7b00 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 01:59:09 +0100
Subject: [PATCH 022/163] load correct wrapper

---
 icu_benchmarks/models/domain_adaptation.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index f43b966c..ff1b9122 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -13,7 +13,7 @@
 from icu_benchmarks.data.preprocess import preprocess_data
 from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters
 from icu_benchmarks.models.train import train_common
-from icu_benchmarks.models.wrappers import MLWrapper
+from icu_benchmarks.models.wrappers import DLWrapper, MLWrapper
 from icu_benchmarks.models.utils import JsonNumpyEncoder
 from icu_benchmarks.run_utils import log_full_line
 
@@ -30,7 +30,10 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset,
     Returns:
         Tuple of predictions and labels.
     """
-    model = MLWrapper(target_model.model)
+    if isinstance(target_model, DLWrapper):
+        model = DLWrapper(target_model.model)
+    else:
+        model = MLWrapper(target_model.model)
     model.set_log_dir(log_dir)
     if (model_dir / "model.torch").is_file():
         model.load_weights(model_dir / "model.torch")

From 40642ea7479136300897f2d9bd02e8f4b495edf9 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 02:06:05 +0100
Subject: [PATCH 023/163] initialize wrapper without model

---
 icu_benchmarks/models/domain_adaptation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index ff1b9122..4af228ad 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -31,9 +31,9 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset,
         Tuple of predictions and labels.
     """
     if isinstance(target_model, DLWrapper):
-        model = DLWrapper(target_model.model)
+        model = DLWrapper()
     else:
-        model = MLWrapper(target_model.model)
+        model = MLWrapper()
     model.set_log_dir(log_dir)
     if (model_dir / "model.torch").is_file():
         model.load_weights(model_dir / "model.torch")

From c1e322240efc5177a02049c829355fa4a9a30786 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 02:14:02 +0100
Subject: [PATCH 024/163] instantiate encoder in wrapper

---
 configs/models/Transformer.gin    | 2 +-
 icu_benchmarks/models/wrappers.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin
index 7085e672..1fe0a78d 100644
--- a/configs/models/Transformer.gin
+++ b/configs/models/Transformer.gin
@@ -9,7 +9,7 @@ preprocess.use_features = False
 # Train params
 train_common.model = @DLWrapper()
 
-DLWrapper.encoder = @Transformer()
+DLWrapper.encoder = @Transformer
 DLWrapper.optimizer_fn = @Adam
 
 DLWrapper.train.epochs = 1000
diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index 5ef4a0e6..fd36303e 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -55,7 +55,7 @@ def __init__(self, encoder=LSTMNet, loss=torch.nn.functional.cross_entropy, opti
         self.pin_memory = pin_memory
         self.n_worker = n_worker
 
-        self.model = encoder
+        self.model = encoder()
         self.model.to(device)
         self.loss = loss
         self.optimizer = optimizer_fn(self.model.parameters())

From 98662158f29a3701b04b4ae1e1cd463c6c0f24e0 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 02:22:00 +0100
Subject: [PATCH 025/163] revert instantiation

---
 configs/models/Transformer.gin    | 2 +-
 icu_benchmarks/models/wrappers.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin
index 1fe0a78d..7085e672 100644
--- a/configs/models/Transformer.gin
+++ b/configs/models/Transformer.gin
@@ -9,7 +9,7 @@ preprocess.use_features = False
 # Train params
 train_common.model = @DLWrapper()
 
-DLWrapper.encoder = @Transformer
+DLWrapper.encoder = @Transformer()
 DLWrapper.optimizer_fn = @Adam
 
 DLWrapper.train.epochs = 1000
diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index fd36303e..5ef4a0e6 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -55,7 +55,7 @@ def __init__(self, encoder=LSTMNet, loss=torch.nn.functional.cross_entropy, opti
         self.pin_memory = pin_memory
         self.n_worker = n_worker
 
-        self.model = encoder()
+        self.model = encoder
         self.model.to(device)
         self.loss = loss
         self.optimizer = optimizer_fn(self.model.parameters())

From 4cc5fca7920ca2039663cd557b3554795f7798fe Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 02:33:53 +0100
Subject: [PATCH 026/163] load model configs

---
 icu_benchmarks/models/domain_adaptation.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 4af228ad..de2f0e48 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -30,6 +30,7 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset,
     Returns:
         Tuple of predictions and labels.
     """
+    gin.parse_config_file(model_dir / "train_config.gin")
     if isinstance(target_model, DLWrapper):
         model = DLWrapper()
     else:
@@ -81,10 +82,10 @@ def get_predictions_for_all_models(
     _, test_labels = test_dataset.get_data_and_labels()
 
     test_predictions = {}
+    test_predictions["target"] = target_model.output_transform(target_model.predict(test_dataset, None, None))
     for source in source_datasets:
         model_dir = source_dir / source
         test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir)
-    test_predictions["target"] = target_model.output_transform(target_model.predict(test_dataset, None, None))
 
     for name, prediction in test_predictions.items():
         if prediction.ndim == 2:
@@ -138,7 +139,8 @@ def domain_adaptation(
     cv_folds_to_train = 2
     target_sizes = [500, 1000, 2000]
     datasets = ["hirid", "aumc", "miiv"]
-    weights = [1] * (len(datasets) - 1) + [1]
+    target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
+    weights = [1] * (len(datasets) - 1)
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
 
@@ -153,9 +155,11 @@ def domain_adaptation(
             log_dir = run_dir / task / dataset / f"target_{target_size}"
             log_dir.mkdir(parents=True, exist_ok=True)
             choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
+            gin_config_with_target_hyperparameters = gin.config_str()
             results = {}
             for repetition in range(cv_repetitions_to_train):
                 for fold_index in range(cv_folds_to_train):
+                    gin.parse_config(gin_config_with_target_hyperparameters)
                     results[f"{repetition}_{fold_index}"] = {}
                     fold_results = results[f"{repetition}_{fold_index}"]
 
@@ -190,15 +194,12 @@ def domain_adaptation(
                         logging.info("Evaluating model: {}".format(baseline))
                         fold_results[baseline] = get_model_metrics(target_model, predictions, test_labels)
 
-
                     # evaluate convex combination of models
                     test_predictions_list = list(test_predictions.values())
 
                     logging.info("Evaluating convex combination of models.")
-                    target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
-                    weights = [1] * (len(datasets) - 1)
                     for t in target_weights:
-                        w = weights + [t * sum(weights)]
+                        w = [t * sum(weights)] + weights
                         logging.info(f"Evaluating target weight: {t}")
                         test_pred = np.average(test_predictions_list, axis=0, weights=w)
                         fold_results[f"convex_combination_{t}"] = get_model_metrics(target_model, test_pred, test_labels)

From aefb1655926510bda254ac7bfe448674d63c0843 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 02:45:51 +0100
Subject: [PATCH 027/163] update lgbm config

---
 configs/models/LGBMClassifier.gin     | 3 +--
 configs/models/LogisticRegression.gin | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/configs/models/LGBMClassifier.gin b/configs/models/LGBMClassifier.gin
index 76fe778e..59d07162 100644
--- a/configs/models/LGBMClassifier.gin
+++ b/configs/models/LGBMClassifier.gin
@@ -9,12 +9,11 @@ preprocess.use_features = True
 train_common.model = @MLWrapper()
 
 MLWrapper.model = @LGBMClassifier()
-MLWrapper.train.patience = 10
 
 model/hyperparameter.class_to_tune = @LGBMClassifier
 model/hyperparameter.colsample_bytree = (0.33, 1.0)
 model/hyperparameter.max_depth = (3, 7)
-model/hyperparameter.min_child_samples = 1000
+model/hyperparameter.min_child_samples = (2, 1024, "log-uniform", 2)
 model/hyperparameter.n_estimators = 100000
 model/hyperparameter.num_leaves = (8, 128, "log-uniform", 2)
 model/hyperparameter.subsample = (0.33, 1.0)
diff --git a/configs/models/LogisticRegression.gin b/configs/models/LogisticRegression.gin
index 8bef8066..cf8884b3 100644
--- a/configs/models/LogisticRegression.gin
+++ b/configs/models/LogisticRegression.gin
@@ -9,7 +9,6 @@ preprocess.use_features = True
 train_common.model = @MLWrapper()
 
 MLWrapper.model = @LogisticRegression()
-MLWrapper.train.patience = 10
 
 model/hyperparameter.class_to_tune = @LogisticRegression
 model/hyperparameter.solver = "saga"

From 85543e2a4f282511d7a457223fbd24ba41b5417b Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 02:46:29 +0100
Subject: [PATCH 028/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index de2f0e48..0a674b2e 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -134,9 +134,9 @@ def domain_adaptation(
         ValueError: If checkpoint is not None and the checkpoint does not exist.
     """
     cv_repetitions = 5
-    cv_repetitions_to_train = 2
+    cv_repetitions_to_train = 5
     cv_folds = 5
-    cv_folds_to_train = 2
+    cv_folds_to_train = 5
     target_sizes = [500, 1000, 2000]
     datasets = ["hirid", "aumc", "miiv"]
     target_weights = [0.1, 0.2, 0.5, 1, 2, 5]

From 0030a6964462d2f819da1edb2924701215609ac7 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 02:47:58 +0100
Subject: [PATCH 029/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 0a674b2e..6ba5a074 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -138,7 +138,7 @@ def domain_adaptation(
     cv_folds = 5
     cv_folds_to_train = 5
     target_sizes = [500, 1000, 2000]
-    datasets = ["hirid", "aumc", "miiv"]
+    datasets = ["hirid", "aumc", "eicu", "miiv"]
     target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
     weights = [1] * (len(datasets) - 1)
     task_dir = data_dir / task

From 2afc544f959d59eb1a91b5425a789d72b54deb0c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 14:17:27 +0100
Subject: [PATCH 030/163] include model in log_dir

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 6ba5a074..624be2b7 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -152,7 +152,7 @@ def domain_adaptation(
         for target_size in target_sizes:
             log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1)
             gin.bind_parameter("preprocess.fold_size", target_size)
-            log_dir = run_dir / task / dataset / f"target_{target_size}"
+            log_dir = run_dir / task / model / dataset / f"target_{target_size}"
             log_dir.mkdir(parents=True, exist_ok=True)
             choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
             gin_config_with_target_hyperparameters = gin.config_str()

From 19549541abd01fc175b887393a34f30a22683d1d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 14:18:05 +0100
Subject: [PATCH 031/163] reduce hyperparameter training for shallow models

---
 configs/models/LGBMClassifier.gin     | 2 +-
 configs/models/LogisticRegression.gin | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/models/LGBMClassifier.gin b/configs/models/LGBMClassifier.gin
index 59d07162..0b725442 100644
--- a/configs/models/LGBMClassifier.gin
+++ b/configs/models/LGBMClassifier.gin
@@ -21,5 +21,5 @@ model/hyperparameter.subsample_freq = 1
 
 tune_hyperparameters.scopes = ["model"]
 tune_hyperparameters.n_initial_points = 10
-tune_hyperparameters.n_calls = 250
+tune_hyperparameters.n_calls = 150
 tune_hyperparameters.folds_to_tune_on = 3
diff --git a/configs/models/LogisticRegression.gin b/configs/models/LogisticRegression.gin
index cf8884b3..9a18f798 100644
--- a/configs/models/LogisticRegression.gin
+++ b/configs/models/LogisticRegression.gin
@@ -19,5 +19,5 @@ model/hyperparameter.l1_ratio = (0.0, 1.0)
 
 tune_hyperparameters.scopes = ["model"]
 tune_hyperparameters.n_initial_points = 10
-tune_hyperparameters.n_calls = 150
+tune_hyperparameters.n_calls = 100
 tune_hyperparameters.folds_to_tune_on = 3

From 761e354053b5131175fac522eb5c17a7257002b1 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 14:42:26 +0100
Subject: [PATCH 032/163] reset gin config for repeated HP tuning

---
 icu_benchmarks/models/domain_adaptation.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 624be2b7..c43aa7b8 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -143,6 +143,7 @@ def domain_adaptation(
     weights = [1] * (len(datasets) - 1)
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
+    gin_config_before_tuning = gin.config_str()
 
     # evaluate models on same test split
     for dataset in datasets:
@@ -150,6 +151,8 @@ def domain_adaptation(
         source_datasets = [d for d in datasets if d != dataset]
         log_full_line(f"STARTING {dataset}", char="#", num_newlines=2)
         for target_size in target_sizes:
+            gin.clear_config()
+            gin.parse_config(gin_config_before_tuning)
             log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1)
             gin.bind_parameter("preprocess.fold_size", target_size)
             log_dir = run_dir / task / model / dataset / f"target_{target_size}"

From f7c01fed0a0fa18c16b0a31f2e0be95a40560a1f Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 15:05:32 +0100
Subject: [PATCH 033/163] remove duplicate output transform

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index c43aa7b8..196c1e1b 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -82,7 +82,7 @@ def get_predictions_for_all_models(
     _, test_labels = test_dataset.get_data_and_labels()
 
     test_predictions = {}
-    test_predictions["target"] = target_model.output_transform(target_model.predict(test_dataset, None, None))
+    test_predictions["target"] = target_model.predict(test_dataset, None, None)
     for source in source_datasets:
         model_dir = source_dir / source
         test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir)

From 685ee3eff5956ae1a7f5e9711327427d1f3fa506 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 15:17:48 +0100
Subject: [PATCH 034/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 196c1e1b..7e85c203 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -44,6 +44,7 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset,
         model.load_weights(model_dir / "model.joblib")
     else:
         raise Exception("No weights to load at path : {}".format(model_dir / "model.*"))
+    logging.info(f"Generating predictions for model : {model_dir}")
     return model.predict(dataset, None, None)
 
 
@@ -82,13 +83,14 @@ def get_predictions_for_all_models(
     _, test_labels = test_dataset.get_data_and_labels()
 
     test_predictions = {}
+    logging.info("Generating predictions for target")
     test_predictions["target"] = target_model.predict(test_dataset, None, None)
     for source in source_datasets:
         model_dir = source_dir / source
         test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir)
 
     for name, prediction in test_predictions.items():
-        if prediction.ndim == 2:
+        if not isinstance(prediction, list) and prediction.ndim == 2:
             test_predictions[name] = prediction[:, 1]
 
     return test_predictions, test_labels

From 3f6875416f068c7ecad3f1563de51fed56f16d42 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 15:17:58 +0100
Subject: [PATCH 035/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 7e85c203..dd747cb4 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -159,7 +159,7 @@ def domain_adaptation(
             gin.bind_parameter("preprocess.fold_size", target_size)
             log_dir = run_dir / task / model / dataset / f"target_{target_size}"
             log_dir.mkdir(parents=True, exist_ok=True)
-            choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
+            choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug)
             gin_config_with_target_hyperparameters = gin.config_str()
             results = {}
             for repetition in range(cv_repetitions_to_train):

From efe115fa97028785296ad06ef0908f2337b10f27 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 16:52:11 +0100
Subject: [PATCH 036/163] move metrics calculation to wrapper

---
 icu_benchmarks/models/domain_adaptation.py | 17 --------------
 icu_benchmarks/models/wrappers.py          | 26 +++++++++++++++++++---
 2 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index dd747cb4..1a953925 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -96,23 +96,6 @@ def get_predictions_for_all_models(
     return test_predictions, test_labels
 
 
-def get_model_metrics(model: object, test_predictions: np.ndarray, test_labels: np.ndarray):
-    """Evaluate a combination of models.
-
-    Args:
-        test_predictions: Predictions for test set.
-        test_labels: Labels for test set.
-    """
-    test_metric_results = {}
-    for name, metric in model.metrics.items():
-        value = metric(model.label_transform(test_labels), test_predictions)
-        test_metric_results[name] = value
-        # Only log float values
-        if isinstance(value, np.float):
-            logging.info("test {}: {}".format(name, value))
-    return test_metric_results
-
-
 def domain_adaptation(
     data_dir: Path,
     run_dir: Path,
diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index 5ef4a0e6..add20fb8 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -290,20 +290,30 @@ def load_weights(self, load_path):
 
     def predict(self, dataset, weight, seed):
         self.set_metrics()
-        test_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=self.n_worker, pin_memory=self.pin_memory)
+        loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=self.n_worker, pin_memory=self.pin_memory)
         if isinstance(weight, list):
             weight = torch.FloatTensor(weight).to(self.device)
-        test_loss, test_metrics = self.evaluate(test_loader, self.metrics, weight)
 
         self.model.eval()
         all_preds = []
         with torch.no_grad():
-            for elem in test_loader:
+            for elem in loader:
                 _, preds, _ = self.step_fn(elem, weight)
                 all_preds += preds
 
         return all_preds
 
+    def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray):
+        metric_results = {}
+        for name, metric in self.metrics.items():
+            metric.update(self.output_transform((predictions, labels)))
+            value = metric.compute()
+            metric_results[name] = value
+            # Only log float values
+            if isinstance(value, np.float):
+                logging.info("Test {}: {}".format(name, value))
+        return metric_results
+
 
 @gin.configurable("MLWrapper")
 class MLWrapper(object):
@@ -443,3 +453,13 @@ def predict(self, dataset, weight, seed):
             return self.model.predict(test_rep)
         else:
             return self.model.predict_proba(test_rep)
+
+    def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray):
+        metric_results = {}
+        for name, metric in self.metrics.items():
+            value = metric(self.label_transform(labels), predictions)
+            metric_results[name] = value
+            # Only log float values
+            if isinstance(value, np.float):
+                logging.info("Test {}: {}".format(name, value))
+        return metric_results

From c3299fb04c762d0f9e5b71c496a244d9bc886e94 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 16:55:14 +0100
Subject: [PATCH 037/163] fix function call

---
 icu_benchmarks/models/domain_adaptation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 1a953925..3c9f0986 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -180,7 +180,7 @@ def domain_adaptation(
                     # evaluate source baselines
                     for baseline, predictions in test_predictions.items():
                         logging.info("Evaluating model: {}".format(baseline))
-                        fold_results[baseline] = get_model_metrics(target_model, predictions, test_labels)
+                        fold_results[baseline] = target_model.calculate_metrics(predictions, test_labels)
 
                     # evaluate convex combination of models
                     test_predictions_list = list(test_predictions.values())
@@ -190,7 +190,7 @@ def domain_adaptation(
                         w = [t * sum(weights)] + weights
                         logging.info(f"Evaluating target weight: {t}")
                         test_pred = np.average(test_predictions_list, axis=0, weights=w)
-                        fold_results[f"convex_combination_{t}"] = get_model_metrics(target_model, test_pred, test_labels)
+                        fold_results[f"convex_combination_{t}"] = target_model.calculate_metrics(test_pred, test_labels)
 
                     log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
                 log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)

From 294ef742f02c0a6c957ac0ad686ab64d5900710e Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 16:55:31 +0100
Subject: [PATCH 038/163] increase batch_size for test

---
 icu_benchmarks/models/wrappers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index add20fb8..c6f7c2a6 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -290,7 +290,7 @@ def load_weights(self, load_path):
 
     def predict(self, dataset, weight, seed):
         self.set_metrics()
-        loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=self.n_worker, pin_memory=self.pin_memory)
+        loader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=self.n_worker, pin_memory=self.pin_memory)
         if isinstance(weight, list):
             weight = torch.FloatTensor(weight).to(self.device)
 

From 5f36045d0697628174cac59eecdc25cb119c01d1 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:11:12 +0100
Subject: [PATCH 039/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index c6f7c2a6..cc511754 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -305,6 +305,7 @@ def predict(self, dataset, weight, seed):
 
     def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray):
         metric_results = {}
+        predictions = torch.from_numpy(predictions)
         for name, metric in self.metrics.items():
             metric.update(self.output_transform((predictions, labels)))
             value = metric.compute()

From d57b375f0ff07d44d642ba89e06cd98903a0160d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:12:23 +0100
Subject: [PATCH 040/163] Update Transformer.gin

---
 configs/models/Transformer.gin | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin
index 7085e672..c5c0e5f8 100644
--- a/configs/models/Transformer.gin
+++ b/configs/models/Transformer.gin
@@ -14,7 +14,7 @@ DLWrapper.optimizer_fn = @Adam
 
 DLWrapper.train.epochs = 1000
 DLWrapper.train.batch_size = 64
-DLWrapper.train.patience = 5
+DLWrapper.train.patience = 1
 DLWrapper.train.min_delta = 1e-4
 
 # Optimizer params

From 442a3c4c0865e4030e187fa8648a89a1fd40df95 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:13:22 +0100
Subject: [PATCH 041/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index cc511754..60ad0a43 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -303,9 +303,9 @@ def predict(self, dataset, weight, seed):
 
         return all_preds
 
-    def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray):
+    def calculate_metrics(self: object, predictions: list, labels: np.ndarray):
         metric_results = {}
-        predictions = torch.from_numpy(predictions)
+        predictions = torch.FloatTensor(predictions)
         for name, metric in self.metrics.items():
             metric.update(self.output_transform((predictions, labels)))
             value = metric.compute()

From 0967ff5e2b1abd93d457b8dd530359922a9873b4 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:14:55 +0100
Subject: [PATCH 042/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index 60ad0a43..dd674f3d 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -305,6 +305,7 @@ def predict(self, dataset, weight, seed):
 
     def calculate_metrics(self: object, predictions: list, labels: np.ndarray):
         metric_results = {}
+        print(predictions)
         predictions = torch.FloatTensor(predictions)
         for name, metric in self.metrics.items():
             metric.update(self.output_transform((predictions, labels)))

From de888771a8d9fffcb6a170052a45ca93f16c86ce Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:16:49 +0100
Subject: [PATCH 043/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index dd674f3d..7e377617 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -295,18 +295,17 @@ def predict(self, dataset, weight, seed):
             weight = torch.FloatTensor(weight).to(self.device)
 
         self.model.eval()
-        all_preds = []
+        all_preds = torch.FloatTensor()
         with torch.no_grad():
             for elem in loader:
                 _, preds, _ = self.step_fn(elem, weight)
-                all_preds += preds
+                all_preds = all_preds.cat(preds)
 
         return all_preds
 
-    def calculate_metrics(self: object, predictions: list, labels: np.ndarray):
+    def calculate_metrics(self: object, predictions: torch.tensor, labels: np.ndarray):
         metric_results = {}
         print(predictions)
-        predictions = torch.FloatTensor(predictions)
         for name, metric in self.metrics.items():
             metric.update(self.output_transform((predictions, labels)))
             value = metric.compute()

From 2ecda4046af0dcb2a1885e30a6a11ee278f7f73c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:18:06 +0100
Subject: [PATCH 044/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index 7e377617..e06333d4 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -299,7 +299,7 @@ def predict(self, dataset, weight, seed):
         with torch.no_grad():
             for elem in loader:
                 _, preds, _ = self.step_fn(elem, weight)
-                all_preds = all_preds.cat(preds)
+                all_preds = torch.cat((all_preds, preds))
 
         return all_preds
 

From 6c4cd96553c7515c1db74d3257b34736febe82b1 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:18:46 +0100
Subject: [PATCH 045/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index e06333d4..e889e231 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -295,7 +295,7 @@ def predict(self, dataset, weight, seed):
             weight = torch.FloatTensor(weight).to(self.device)
 
         self.model.eval()
-        all_preds = torch.FloatTensor()
+        all_preds = torch.FloatTensor().to(self.device)
         with torch.no_grad():
             for elem in loader:
                 _, preds, _ = self.step_fn(elem, weight)

From 9fae10e1990922181543de24ddbdceb6e3578b0b Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:21:47 +0100
Subject: [PATCH 046/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index e889e231..e270e23f 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -295,17 +295,19 @@ def predict(self, dataset, weight, seed):
             weight = torch.FloatTensor(weight).to(self.device)
 
         self.model.eval()
-        all_preds = torch.FloatTensor().to(self.device)
+        all_preds = np.ndarray()
         with torch.no_grad():
             for elem in loader:
                 _, preds, _ = self.step_fn(elem, weight)
-                all_preds = torch.cat((all_preds, preds))
+                all_preds = np.append(all_preds, preds.cpu().numpy().tolist())
+        print(all_preds)
 
         return all_preds
 
-    def calculate_metrics(self: object, predictions: torch.tensor, labels: np.ndarray):
+    def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray):
         metric_results = {}
         print(predictions)
+        predictions = torch.from_numpy(predictions)
         for name, metric in self.metrics.items():
             metric.update(self.output_transform((predictions, labels)))
             value = metric.compute()

From 5d6cb2e9d2e3055df297c3778aeb7ec218ea40f6 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:24:16 +0100
Subject: [PATCH 047/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index e270e23f..65f0ecd5 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -295,11 +295,12 @@ def predict(self, dataset, weight, seed):
             weight = torch.FloatTensor(weight).to(self.device)
 
         self.model.eval()
-        all_preds = np.ndarray()
+        all_preds = []
         with torch.no_grad():
             for elem in loader:
                 _, preds, _ = self.step_fn(elem, weight)
-                all_preds = np.append(all_preds, preds.cpu().numpy().tolist())
+                all_preds += preds.cpu().numpy().tolist()
+        all_preds = np.array(all_preds)
         print(all_preds)
 
         return all_preds

From 57db01b0b5a70c1f05730f1f5e511301b352cb52 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:28:02 +0100
Subject: [PATCH 048/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 3c9f0986..851b98e8 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -90,7 +90,7 @@ def get_predictions_for_all_models(
         test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir)
 
     for name, prediction in test_predictions.items():
-        if not isinstance(prediction, list) and prediction.ndim == 2:
+        if isinstance(target_model, MLWrapper) and prediction.ndim == 2:
             test_predictions[name] = prediction[:, 1]
 
     return test_predictions, test_labels

From b93b402b36028560eb806b89408688f92438b25d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:32:02 +0100
Subject: [PATCH 049/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index 65f0ecd5..4bb2086f 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -298,8 +298,9 @@ def predict(self, dataset, weight, seed):
         all_preds = []
         with torch.no_grad():
             for elem in loader:
-                _, preds, _ = self.step_fn(elem, weight)
-                all_preds += preds.cpu().numpy().tolist()
+                _, preds, target = self.step_fn(elem, weight)
+                preds, target = self.output_transform((preds, target))
+                all_preds += preds
         all_preds = np.array(all_preds)
         print(all_preds)
 
@@ -310,7 +311,7 @@ def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray)
         print(predictions)
         predictions = torch.from_numpy(predictions)
         for name, metric in self.metrics.items():
-            metric.update(self.output_transform((predictions, labels)))
+            metric.update((predictions, labels))
             value = metric.compute()
             metric_results[name] = value
             # Only log float values

From 0cd0b13c58358211f1f32276344bdca1cb30578f Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:34:04 +0100
Subject: [PATCH 050/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index 4bb2086f..a2d99451 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -309,7 +309,6 @@ def predict(self, dataset, weight, seed):
     def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray):
         metric_results = {}
         print(predictions)
-        predictions = torch.from_numpy(predictions)
         for name, metric in self.metrics.items():
             metric.update((predictions, labels))
             value = metric.compute()

From 623a80da1c6a6f9d4b4bdb45c914225f385f15af Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:34:51 +0100
Subject: [PATCH 051/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index a2d99451..980b4a38 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -300,7 +300,7 @@ def predict(self, dataset, weight, seed):
             for elem in loader:
                 _, preds, target = self.step_fn(elem, weight)
                 preds, target = self.output_transform((preds, target))
-                all_preds += preds
+                all_preds += preds.cpu().numpy().tolist()
         all_preds = np.array(all_preds)
         print(all_preds)
 

From 702c91742095ea975b1ad08d6bb68dfaea2a9603 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:43:13 +0100
Subject: [PATCH 052/163] change metric calculation

---
 icu_benchmarks/models/domain_adaptation.py | 16 ++++++++++++++--
 icu_benchmarks/models/wrappers.py          | 22 ----------------------
 2 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 851b98e8..86bd9463 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -12,6 +12,7 @@
 from icu_benchmarks.data.loader import RICUDataset
 from icu_benchmarks.data.preprocess import preprocess_data
 from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters
+from icu_benchmarks.models.metric_constants import MLMetrics
 from icu_benchmarks.models.train import train_common
 from icu_benchmarks.models.wrappers import DLWrapper, MLWrapper
 from icu_benchmarks.models.utils import JsonNumpyEncoder
@@ -48,6 +49,17 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset,
     return model.predict(dataset, None, None)
 
 
+def calculate_metrics(predictions: np.ndarray, labels: np.ndarray):
+    metric_results = {}
+    for name, metric in MLMetrics.BINARY_CLASSIFICATION.items():
+        value = metric(labels, predictions)
+        metric_results[name] = value
+        # Only log float values
+        if isinstance(value, np.float):
+            logging.info("Test {}: {}".format(name, value))
+    return metric_results
+
+
 def get_predictions_for_all_models(
     target_model: object,
     data: dict[str, pd.DataFrame],
@@ -180,7 +192,7 @@ def domain_adaptation(
                     # evaluate source baselines
                     for baseline, predictions in test_predictions.items():
                         logging.info("Evaluating model: {}".format(baseline))
-                        fold_results[baseline] = target_model.calculate_metrics(predictions, test_labels)
+                        fold_results[baseline] = calculate_metrics(predictions, test_labels)
 
                     # evaluate convex combination of models
                     test_predictions_list = list(test_predictions.values())
@@ -190,7 +202,7 @@ def domain_adaptation(
                         w = [t * sum(weights)] + weights
                         logging.info(f"Evaluating target weight: {t}")
                         test_pred = np.average(test_predictions_list, axis=0, weights=w)
-                        fold_results[f"convex_combination_{t}"] = target_model.calculate_metrics(test_pred, test_labels)
+                        fold_results[f"convex_combination_{t}"] = calculate_metrics(test_pred, test_labels)
 
                     log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
                 log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index 980b4a38..d1119476 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -306,18 +306,6 @@ def predict(self, dataset, weight, seed):
 
         return all_preds
 
-    def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray):
-        metric_results = {}
-        print(predictions)
-        for name, metric in self.metrics.items():
-            metric.update((predictions, labels))
-            value = metric.compute()
-            metric_results[name] = value
-            # Only log float values
-            if isinstance(value, np.float):
-                logging.info("Test {}: {}".format(name, value))
-        return metric_results
-
 
 @gin.configurable("MLWrapper")
 class MLWrapper(object):
@@ -457,13 +445,3 @@ def predict(self, dataset, weight, seed):
             return self.model.predict(test_rep)
         else:
             return self.model.predict_proba(test_rep)
-
-    def calculate_metrics(self: object, predictions: np.ndarray, labels: np.ndarray):
-        metric_results = {}
-        for name, metric in self.metrics.items():
-            value = metric(self.label_transform(labels), predictions)
-            metric_results[name] = value
-            # Only log float values
-            if isinstance(value, np.float):
-                logging.info("Test {}: {}".format(name, value))
-        return metric_results

From 570f6ab6a7d051753cc31e7acd2c8f2774aaae1b Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:46:16 +0100
Subject: [PATCH 053/163] Update Transformer.gin

---
 configs/models/Transformer.gin | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin
index c5c0e5f8..e6a1a2ac 100644
--- a/configs/models/Transformer.gin
+++ b/configs/models/Transformer.gin
@@ -14,7 +14,7 @@ DLWrapper.optimizer_fn = @Adam
 
 DLWrapper.train.epochs = 1000
 DLWrapper.train.batch_size = 64
-DLWrapper.train.patience = 1
+DLWrapper.train.patience = 5
 DLWrapper.train.min_delta = 1e-4
 
 # Optimizer params
@@ -35,6 +35,6 @@ model/hyperparameter.dropout = (0.0, 0.4)
 model/hyperparameter.dropout_att = (0.0, 0.4)
 
 tune_hyperparameters.scopes = ["model", "optimizer"]
-tune_hyperparameters.n_initial_points = 2
-tune_hyperparameters.n_calls = 4
+tune_hyperparameters.n_initial_points = 5
+tune_hyperparameters.n_calls = 10
 tune_hyperparameters.folds_to_tune_on = 2

From 162adada45c2afbc62de1c967b6615d9a93a605e Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:46:53 +0100
Subject: [PATCH 054/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 86bd9463..3ea57dc2 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -154,7 +154,7 @@ def domain_adaptation(
             gin.bind_parameter("preprocess.fold_size", target_size)
             log_dir = run_dir / task / model / dataset / f"target_{target_size}"
             log_dir.mkdir(parents=True, exist_ok=True)
-            choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug)
+            choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
             gin_config_with_target_hyperparameters = gin.config_str()
             results = {}
             for repetition in range(cv_repetitions_to_train):

From c44a1b0c5e80d2c3df8555fcdb448a1420b0c00b Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 17:51:58 +0100
Subject: [PATCH 055/163] Update wrappers.py

---
 icu_benchmarks/models/wrappers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/icu_benchmarks/models/wrappers.py b/icu_benchmarks/models/wrappers.py
index d1119476..8bacdee1 100644
--- a/icu_benchmarks/models/wrappers.py
+++ b/icu_benchmarks/models/wrappers.py
@@ -302,7 +302,6 @@ def predict(self, dataset, weight, seed):
                 preds, target = self.output_transform((preds, target))
                 all_preds += preds.cpu().numpy().tolist()
         all_preds = np.array(all_preds)
-        print(all_preds)
 
         return all_preds
 

From ea23e1ba86c79177880323acc65f29e00a68c501 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 19:23:21 +0100
Subject: [PATCH 056/163] add dg baseline

---
 icu_benchmarks/models/domain_adaptation.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 3ea57dc2..78bb3f38 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -196,6 +196,11 @@ def domain_adaptation(
 
                     # evaluate convex combination of models
                     test_predictions_list = list(test_predictions.values())
+                    test_predictions_list_without_target = test_predictions_list[1:]
+
+                    logging.info("Evaluating convex combination of models without target.")
+                    test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights)
+                    fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
                     logging.info("Evaluating convex combination of models.")
                     for t in target_weights:

From d56d98b08ae66044e972be56969ca7a1b5ff33ce Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 23:26:49 +0100
Subject: [PATCH 057/163] fix json encoder

---
 icu_benchmarks/models/domain_adaptation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 78bb3f38..6d53bd83 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -15,7 +15,7 @@
 from icu_benchmarks.models.metric_constants import MLMetrics
 from icu_benchmarks.models.train import train_common
 from icu_benchmarks.models.wrappers import DLWrapper, MLWrapper
-from icu_benchmarks.models.utils import JsonNumpyEncoder
+from icu_benchmarks.models.utils import JsonResultLoggingEncoder
 from icu_benchmarks.run_utils import log_full_line
 
 
@@ -230,10 +230,10 @@ def domain_adaptation(
                     })
 
             with open(log_dir / "aggregated_source_metrics.json", "w") as f:
-                json.dump(results, f, cls=JsonNumpyEncoder)
+                json.dump(results, f, cls=JsonResultLoggingEncoder)
 
             with open(log_dir / "averaged_source_metrics.json", "w") as f:
-                json.dump(averaged_metrics, f, cls=JsonNumpyEncoder)
+                json.dump(averaged_metrics, f, cls=JsonResultLoggingEncoder)
 
             logging.info(f"Averaged results: {averaged_metrics}")
             log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5)

From e9b5c79b140ce3bd06383388606e6c1a6fc0e114 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 23:27:50 +0100
Subject: [PATCH 058/163] Update Transformer.gin

---
 configs/models/Transformer.gin | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/models/Transformer.gin b/configs/models/Transformer.gin
index e6a1a2ac..7e89cf2a 100644
--- a/configs/models/Transformer.gin
+++ b/configs/models/Transformer.gin
@@ -14,7 +14,7 @@ DLWrapper.optimizer_fn = @Adam
 
 DLWrapper.train.epochs = 1000
 DLWrapper.train.batch_size = 64
-DLWrapper.train.patience = 5
+DLWrapper.train.patience = 10
 DLWrapper.train.min_delta = 1e-4
 
 # Optimizer params
@@ -36,5 +36,5 @@ model/hyperparameter.dropout_att = (0.0, 0.4)
 
 tune_hyperparameters.scopes = ["model", "optimizer"]
 tune_hyperparameters.n_initial_points = 5
-tune_hyperparameters.n_calls = 10
+tune_hyperparameters.n_calls = 30
 tune_hyperparameters.folds_to_tune_on = 2

From 2ca631ea73dcd4cb6af105afe3407dbc0ac3767c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 12 Jan 2023 23:56:03 +0100
Subject: [PATCH 059/163] only execute da for one dataset at a time

---
 icu_benchmarks/models/domain_adaptation.py | 193 ++++++++++-----------
 icu_benchmarks/run.py                      |   2 +-
 2 files changed, 97 insertions(+), 98 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 6d53bd83..5fe7461c 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -109,6 +109,7 @@ def get_predictions_for_all_models(
 
 
 def domain_adaptation(
+    dataset: str,
     data_dir: Path,
     run_dir: Path,
     seed: int,
@@ -143,100 +144,98 @@ def domain_adaptation(
     gin_config_before_tuning = gin.config_str()
 
     # evaluate models on same test split
-    for dataset in datasets:
-        data_dir = task_dir / dataset
-        source_datasets = [d for d in datasets if d != dataset]
-        log_full_line(f"STARTING {dataset}", char="#", num_newlines=2)
-        for target_size in target_sizes:
-            gin.clear_config()
-            gin.parse_config(gin_config_before_tuning)
-            log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1)
-            gin.bind_parameter("preprocess.fold_size", target_size)
-            log_dir = run_dir / task / model / dataset / f"target_{target_size}"
-            log_dir.mkdir(parents=True, exist_ok=True)
-            choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
-            gin_config_with_target_hyperparameters = gin.config_str()
-            results = {}
-            for repetition in range(cv_repetitions_to_train):
-                for fold_index in range(cv_folds_to_train):
-                    gin.parse_config(gin_config_with_target_hyperparameters)
-                    results[f"{repetition}_{fold_index}"] = {}
-                    fold_results = results[f"{repetition}_{fold_index}"]
-
-                    data = preprocess_data(
-                        data_dir,
-                        seed=seed,
-                        debug=debug,
-                        use_cache=True,
-                        cv_repetitions=cv_repetitions,
-                        repetition_index=repetition,
-                        cv_folds=cv_folds,
-                        fold_index=fold_index,
-                    )
-
-                    log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}"
-                    log_dir_fold.mkdir(parents=True, exist_ok=True)
-
-                    # evaluate target baselines
-                    target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
-                    
-                    test_predictions, test_labels = get_predictions_for_all_models(
-                        target_model,
-                        data,
-                        log_dir_fold,
-                        source_dir=model_path / task / model,
-                        seed=seed,
-                        source_datasets=source_datasets,
-                    )
-
-                    # evaluate source baselines
-                    for baseline, predictions in test_predictions.items():
-                        logging.info("Evaluating model: {}".format(baseline))
-                        fold_results[baseline] = calculate_metrics(predictions, test_labels)
-
-                    # evaluate convex combination of models
-                    test_predictions_list = list(test_predictions.values())
-                    test_predictions_list_without_target = test_predictions_list[1:]
-
-                    logging.info("Evaluating convex combination of models without target.")
-                    test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights)
-                    fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
-
-                    logging.info("Evaluating convex combination of models.")
-                    for t in target_weights:
-                        w = [t * sum(weights)] + weights
-                        logging.info(f"Evaluating target weight: {t}")
-                        test_pred = np.average(test_predictions_list, axis=0, weights=w)
-                        fold_results[f"convex_combination_{t}"] = calculate_metrics(test_pred, test_labels)
-
-                    log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
-                log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
-
-            source_metrics = {}
-            for result in results.values():
-                for source, source_stats in result.items():
-                    for metric, score in source_stats.items():
-                        if isinstance(score, (float, int)):
-                            source_metrics.setdefault(source, {}).setdefault(metric, []).append(score)
-
-            # Compute statistical metric over aggregated results
-            averaged_metrics = {}
-            for source, source_stats in source_metrics.items():
-                for metric, scores in source_stats.items():
-                    averaged_metrics.setdefault(source, {}).setdefault(metric, []).append({
-                        "avg": np.mean(scores),
-                        "std": np.std(scores),
-                        "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)),
-                    })
-
-            with open(log_dir / "aggregated_source_metrics.json", "w") as f:
-                json.dump(results, f, cls=JsonResultLoggingEncoder)
-
-            with open(log_dir / "averaged_source_metrics.json", "w") as f:
-                json.dump(averaged_metrics, f, cls=JsonResultLoggingEncoder)
-
-            logging.info(f"Averaged results: {averaged_metrics}")
-            log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5)
-
-        log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=10)
- 
\ No newline at end of file
+    data_dir = task_dir / dataset
+    source_datasets = [d for d in datasets if d != dataset]
+    log_full_line(f"STARTING {dataset}", char="#", num_newlines=2)
+    for target_size in target_sizes:
+        gin.clear_config()
+        gin.parse_config(gin_config_before_tuning)
+        log_full_line(f"STARTING TARGET SIZE {target_size}", char="*", num_newlines=1)
+        gin.bind_parameter("preprocess.fold_size", target_size)
+        log_dir = run_dir / task / model / dataset / f"target_{target_size}"
+        log_dir.mkdir(parents=True, exist_ok=True)
+        choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
+        gin_config_with_target_hyperparameters = gin.config_str()
+        results = {}
+        for repetition in range(cv_repetitions_to_train):
+            for fold_index in range(cv_folds_to_train):
+                gin.parse_config(gin_config_with_target_hyperparameters)
+                results[f"{repetition}_{fold_index}"] = {}
+                fold_results = results[f"{repetition}_{fold_index}"]
+
+                data = preprocess_data(
+                    data_dir,
+                    seed=seed,
+                    debug=debug,
+                    use_cache=True,
+                    cv_repetitions=cv_repetitions,
+                    repetition_index=repetition,
+                    cv_folds=cv_folds,
+                    fold_index=fold_index,
+                )
+
+                log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}"
+                log_dir_fold.mkdir(parents=True, exist_ok=True)
+
+                # evaluate target baselines
+                target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
+                
+                test_predictions, test_labels = get_predictions_for_all_models(
+                    target_model,
+                    data,
+                    log_dir_fold,
+                    source_dir=model_path / task / model,
+                    seed=seed,
+                    source_datasets=source_datasets,
+                )
+
+                # evaluate source baselines
+                for baseline, predictions in test_predictions.items():
+                    logging.info("Evaluating model: {}".format(baseline))
+                    fold_results[baseline] = calculate_metrics(predictions, test_labels)
+
+                # evaluate convex combination of models
+                test_predictions_list = list(test_predictions.values())
+                test_predictions_list_without_target = test_predictions_list[1:]
+
+                logging.info("Evaluating convex combination of models without target.")
+                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights)
+                fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
+
+                logging.info("Evaluating convex combination of models.")
+                for t in target_weights:
+                    w = [t * sum(weights)] + weights
+                    logging.info(f"Evaluating target weight: {t}")
+                    test_pred = np.average(test_predictions_list, axis=0, weights=w)
+                    fold_results[f"convex_combination_{t}"] = calculate_metrics(test_pred, test_labels)
+
+                log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
+            log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
+
+        source_metrics = {}
+        for result in results.values():
+            for source, source_stats in result.items():
+                for metric, score in source_stats.items():
+                    if isinstance(score, (float, int)):
+                        source_metrics.setdefault(source, {}).setdefault(metric, []).append(score)
+
+        # Compute statistical metric over aggregated results
+        averaged_metrics = {}
+        for source, source_stats in source_metrics.items():
+            for metric, scores in source_stats.items():
+                averaged_metrics.setdefault(source, {}).setdefault(metric, []).append({
+                    "avg": np.mean(scores),
+                    "std": np.std(scores),
+                    "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)),
+                })
+
+        with open(log_dir / "aggregated_source_metrics.json", "w") as f:
+            json.dump(results, f, cls=JsonResultLoggingEncoder)
+
+        with open(log_dir / "averaged_source_metrics.json", "w") as f:
+            json.dump(averaged_metrics, f, cls=JsonResultLoggingEncoder)
+
+        logging.info(f"Averaged results: {averaged_metrics}")
+        log_full_line(f"EVALUATED TARGET SIZE {target_size}", char="*", num_newlines=5)
+
+    log_full_line(f"EVALUATED {dataset}", char="#", num_newlines=5)
diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index 4db87271..bb76a49b 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -63,7 +63,7 @@ def main(my_args=tuple(sys.argv[1:])):
             else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")]
         )
         gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False)
-        domain_adaptation(args.data_dir, run_dir, args.seed, args.task_name, model)
+        domain_adaptation(name, args.data_dir, run_dir, args.seed, args.task_name, model)
         return
     else:
         reproducible = args.reproducible

From 45852579fe8949e84f6bd7b9eb8cec47ac5e02ae Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 00:14:09 +0100
Subject: [PATCH 060/163] remove run dir

---
 icu_benchmarks/run.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index bb76a49b..54ef17a9 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -56,14 +56,13 @@ def main(my_args=tuple(sys.argv[1:])):
         )
         return
     if args.command == "da":
-        run_dir = create_run_dir(args.log_dir)
         gin_config_files = (
             [Path(f"configs/experiments/{args.experiment}.gin")]
             if args.experiment
             else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")]
         )
         gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False)
-        domain_adaptation(name, args.data_dir, run_dir, args.seed, args.task_name, model)
+        domain_adaptation(name, args.data_dir, args.log_dir, args.seed, args.task_name, model)
         return
     else:
         reproducible = args.reproducible

From fc0d41e1c9173247ada672a99b8a0d62deec2fd4 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 14:04:50 +0100
Subject: [PATCH 061/163] Create da_to_csv.py

---
 scripts/results/da_to_csv.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 scripts/results/da_to_csv.py

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
new file mode 100644
index 00000000..35ac1973
--- /dev/null
+++ b/scripts/results/da_to_csv.py
@@ -0,0 +1,15 @@
+import json
+from pathlib import Path
+import csv
+
+models_dir = Path("../DA_logs")
+for metric in ["AUC", "PR"]:
+    for endpoint in models_dir.iterdir():
+        with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file:
+            writer = csv.writer(csv_file)
+            for model in endpoint.iterdir():
+                for target in model.iterdir():
+                    for target_size in target.iterdir():
+                        with open(target_size / 'averaged_source_metrics.json', 'r') as f:
+                            results = json.load(f)
+                            writer.writerow([model.name, target, target_size] + [source[metric] for source in results])

From 440aa15b8b4b4e857156b46fa91f0bd1abc84562 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 14:08:25 +0100
Subject: [PATCH 062/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 35ac1973..4ed32fb0 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -8,6 +8,8 @@
         with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file:
             writer = csv.writer(csv_file)
             for model in endpoint.iterdir():
+                if model.name == "LSTM":
+                    continue
                 for target in model.iterdir():
                     for target_size in target.iterdir():
                         with open(target_size / 'averaged_source_metrics.json', 'r') as f:

From 7a2583b56a42d10ee5ae3491fbfe259124b4e3ed Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 14:10:31 +0100
Subject: [PATCH 063/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 4ed32fb0..d303aefe 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -14,4 +14,4 @@
                     for target_size in target.iterdir():
                         with open(target_size / 'averaged_source_metrics.json', 'r') as f:
                             results = json.load(f)
-                            writer.writerow([model.name, target, target_size] + [source[metric] for source in results])
+                            writer.writerow([model.name, target, target_size] + [source_metrics[metric] for source_name, source_metrics in results.items()])

From 11d80e2e7c123f39ffd65e1877ffe2e5c3261f44 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 14:28:01 +0100
Subject: [PATCH 064/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index d303aefe..c91bd2e2 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -5,13 +5,14 @@
 models_dir = Path("../DA_logs")
 for metric in ["AUC", "PR"]:
     for endpoint in models_dir.iterdir():
-        with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file:
-            writer = csv.writer(csv_file)
-            for model in endpoint.iterdir():
-                if model.name == "LSTM":
-                    continue
-                for target in model.iterdir():
-                    for target_size in target.iterdir():
-                        with open(target_size / 'averaged_source_metrics.json', 'r') as f:
-                            results = json.load(f)
-                            writer.writerow([model.name, target, target_size] + [source_metrics[metric] for source_name, source_metrics in results.items()])
+        if endpoint.is_dir():
+            with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file:
+                writer = csv.writer(csv_file)
+                for model in endpoint.iterdir():
+                    if model.name == "LSTM":
+                        continue
+                    for target in model.iterdir():
+                        for target_size in target.iterdir():
+                            with open(target_size / 'averaged_source_metrics.json', 'r') as f:
+                                results = json.load(f)
+                                writer.writerow([model.name, target, target_size] + [source_metrics[metric] for source_name, source_metrics in results.items()])

From 18fe642610348760ec89f7f44478407c92f4a889 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 14:30:27 +0100
Subject: [PATCH 065/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index c91bd2e2..eff1dba5 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -15,4 +15,4 @@
                         for target_size in target.iterdir():
                             with open(target_size / 'averaged_source_metrics.json', 'r') as f:
                                 results = json.load(f)
-                                writer.writerow([model.name, target, target_size] + [source_metrics[metric] for source_name, source_metrics in results.items()])
+                                writer.writerow([model.name, target.name, target_size.name] + [source_metrics[metric] for source_name, source_metrics in results.items()])

From 3df1fcd7c285a3654e9d651a727ab623b48cbd6c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 14:37:06 +0100
Subject: [PATCH 066/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index eff1dba5..ea09ff7a 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -15,4 +15,6 @@
                         for target_size in target.iterdir():
                             with open(target_size / 'averaged_source_metrics.json', 'r') as f:
                                 results = json.load(f)
-                                writer.writerow([model.name, target.name, target_size.name] + [source_metrics[metric] for source_name, source_metrics in results.items()])
+                                source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()]
+                                source_metrics = [metr for _, metr in source_metrics.items()]
+                                writer.writerow([model.name, target.name, target_size.name] + source_metrics)

From 1ce0f17a227505f59eb7504caf7c9e379d598020 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 14:40:47 +0100
Subject: [PATCH 067/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index ea09ff7a..575e649a 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -16,5 +16,5 @@
                             with open(target_size / 'averaged_source_metrics.json', 'r') as f:
                                 results = json.load(f)
                                 source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()]
-                                source_metrics = [metr for _, metr in source_metrics.items()]
+                                source_metrics = [(metr["avg"], metr["std"], metr["CI_0.95"]) for metr in source_metrics]
                                 writer.writerow([model.name, target.name, target_size.name] + source_metrics)

From 2e7ac54b5da4eddd184120a1ab679aac7b376267 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 14:41:43 +0100
Subject: [PATCH 068/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 575e649a..594e8770 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -16,5 +16,5 @@
                             with open(target_size / 'averaged_source_metrics.json', 'r') as f:
                                 results = json.load(f)
                                 source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()]
-                                source_metrics = [(metr["avg"], metr["std"], metr["CI_0.95"]) for metr in source_metrics]
+                                source_metrics = [(metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]) for metr in source_metrics]
                                 writer.writerow([model.name, target.name, target_size.name] + source_metrics)

From fd8082226dbed42a2fc2a3d8c63720de832b534d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 14:44:12 +0100
Subject: [PATCH 069/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 594e8770..bce95ad2 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -16,5 +16,6 @@
                             with open(target_size / 'averaged_source_metrics.json', 'r') as f:
                                 results = json.load(f)
                                 source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()]
-                                source_metrics = [(metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]) for metr in source_metrics]
+                                source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics]
+                                source_metrics_flat = [item for sublist in source_metrics for item in sublist]
                                 writer.writerow([model.name, target.name, target_size.name] + source_metrics)

From 5b9ba0367fed38a6b8bc190bbb5e29ad7891cd9c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 14:45:00 +0100
Subject: [PATCH 070/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index bce95ad2..283f2b12 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -18,4 +18,4 @@
                                 source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()]
                                 source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics]
                                 source_metrics_flat = [item for sublist in source_metrics for item in sublist]
-                                writer.writerow([model.name, target.name, target_size.name] + source_metrics)
+                                writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat)

From 22b7f61930df1a132e1dc9baf35cad239b111450 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 15:08:44 +0100
Subject: [PATCH 071/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 283f2b12..87f08da9 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -8,6 +8,13 @@
         if endpoint.is_dir():
             with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file:
                 writer = csv.writer(csv_file)
+                source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5']
+                stats = ['avg', 'std', 'CI_0.95']
+                # combine fieldnames and stats
+                full_fields = [f'{source}_{stat}' for source in source_names for stat in stats]
+                writer = csv.DictWriter(csv_file, fieldnames=full_fields)
+
+                writer.writeheader()
                 for model in endpoint.iterdir():
                     if model.name == "LSTM":
                         continue
@@ -15,7 +22,8 @@
                         for target_size in target.iterdir():
                             with open(target_size / 'averaged_source_metrics.json', 'r') as f:
                                 results = json.load(f)
-                                source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()]
-                                source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics]
-                                source_metrics_flat = [item for sublist in source_metrics for item in sublist]
-                                writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat)
+                                # source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()]
+                                # source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics]
+                                # source_metrics_flat = [item for sublist in source_metrics for item in sublist]
+                                # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat)
+                                writer.writerow({f'{source}_{stat}': source_metrics[metric][stat] for source, source_metrics in results.items() for stat in stats})

From 095c79bedacad19dc499d55eefd24971ebe111bd Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 15:09:31 +0100
Subject: [PATCH 072/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 87f08da9..0cc3faff 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -26,4 +26,4 @@
                                 # source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics]
                                 # source_metrics_flat = [item for sublist in source_metrics for item in sublist]
                                 # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat)
-                                writer.writerow({f'{source}_{stat}': source_metrics[metric][stat] for source, source_metrics in results.items() for stat in stats})
+                                writer.writerow({f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats})

From 88a79e0900ffd42f96ad65e6637e0bd5961ef09b Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 15:12:26 +0100
Subject: [PATCH 073/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 0cc3faff..67a35e74 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -8,11 +8,12 @@
         if endpoint.is_dir():
             with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file:
                 writer = csv.writer(csv_file)
+                info = ["model", "target", "target_size"]
                 source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5']
                 stats = ['avg', 'std', 'CI_0.95']
                 # combine fieldnames and stats
                 full_fields = [f'{source}_{stat}' for source in source_names for stat in stats]
-                writer = csv.DictWriter(csv_file, fieldnames=full_fields)
+                writer = csv.DictWriter(csv_file, fieldnames=info+full_fields)
 
                 writer.writeheader()
                 for model in endpoint.iterdir():
@@ -26,4 +27,7 @@
                                 # source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics]
                                 # source_metrics_flat = [item for sublist in source_metrics for item in sublist]
                                 # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat)
-                                writer.writerow({f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats})
+
+                                info = [model.name, target.name, target_size.name]
+                                metrics_row = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats}
+                                writer.writerow(info + metrics_row)

From c8396b0427c1b37d4ccb653ad657c68325da0d02 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 15:13:39 +0100
Subject: [PATCH 074/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 67a35e74..5b88fba2 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -28,6 +28,10 @@
                                 # source_metrics_flat = [item for sublist in source_metrics for item in sublist]
                                 # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat)
 
-                                info = [model.name, target.name, target_size.name]
+                                info = {
+                                    'model': model.name,
+                                    'target': target.name,
+                                    'target_size': target_size.name
+                                }
                                 metrics_row = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats}
                                 writer.writerow(info + metrics_row)

From 6e505bfe0ec68486c6e6976297cd8dc13bf0333b Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 15:14:53 +0100
Subject: [PATCH 075/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 5b88fba2..268da395 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -28,10 +28,11 @@
                                 # source_metrics_flat = [item for sublist in source_metrics for item in sublist]
                                 # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat)
 
-                                info = {
+                                row_data = {
                                     'model': model.name,
                                     'target': target.name,
                                     'target_size': target_size.name
                                 }
-                                metrics_row = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats}
-                                writer.writerow(info + metrics_row)
+                                metrics_data = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats}
+                                row_data.update(metrics_data)
+                                writer.writerow(row_data)

From ea062c91b427c73b4f82b4f268630d865d3a7160 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 15:39:10 +0100
Subject: [PATCH 076/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 268da395..87ee4126 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -20,13 +20,10 @@
                     if model.name == "LSTM":
                         continue
                     for target in model.iterdir():
-                        for target_size in target.iterdir():
-                            with open(target_size / 'averaged_source_metrics.json', 'r') as f:
+                        target_sizes = ['target_500', 'target_1000', 'target_2000']
+                        for target_size in target_sizes:
+                            with open(target / target_size / 'averaged_source_metrics.json', 'r') as f:
                                 results = json.load(f)
-                                # source_metrics = [source_metrics[metric] for source_name, source_metrics in results.items()]
-                                # source_metrics = [[metr[0]["avg"], metr[0]["std"], metr[0]["CI_0.95"]] for metr in source_metrics]
-                                # source_metrics_flat = [item for sublist in source_metrics for item in sublist]
-                                # writer.writerow([model.name, target.name, target_size.name] + source_metrics_flat)
 
                                 row_data = {
                                     'model': model.name,

From f66c78785c5705ab2f61026228ed63ee388025c1 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 15:39:33 +0100
Subject: [PATCH 077/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 87ee4126..ddcb71cc 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -28,7 +28,7 @@
                                 row_data = {
                                     'model': model.name,
                                     'target': target.name,
-                                    'target_size': target_size.name
+                                    'target_size': target_size
                                 }
                                 metrics_data = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats}
                                 row_data.update(metrics_data)

From 6aeec62c1a4d771fa21a3a8248226c82d9e8401d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 15:45:24 +0100
Subject: [PATCH 078/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index ddcb71cc..2322ca4a 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -30,6 +30,11 @@
                                     'target': target.name,
                                     'target_size': target_size
                                 }
-                                metrics_data = {f'{source}_{stat}': source_metrics[metric][0][stat] for source, source_metrics in results.items() for stat in stats}
-                                row_data.update(metrics_data)
+                                for stat in stats:
+                                    for source, source_metrics in results.items():
+                                        if stat == 'CI_0.95':
+                                            row_data[f'{source}_{stat}_min'] = source_metrics[metric][0][stat][0] * 100
+                                            row_data[f'{source}_{stat}_max'] = source_metrics[metric][0][stat][1] * 100
+                                        else:
+                                            row_data[f'{source}_{stat}'] = source_metrics[metric][0][stat] * 100
                                 writer.writerow(row_data)

From a3e68f6542a6f7297f74319d0951b926a36d3c07 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 15:45:52 +0100
Subject: [PATCH 079/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 2322ca4a..f8d686be 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -10,7 +10,7 @@
                 writer = csv.writer(csv_file)
                 info = ["model", "target", "target_size"]
                 source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5']
-                stats = ['avg', 'std', 'CI_0.95']
+                stats = ['avg', 'std', 'CI_0.95_min', 'CI_0.95_max']
                 # combine fieldnames and stats
                 full_fields = [f'{source}_{stat}' for source in source_names for stat in stats]
                 writer = csv.DictWriter(csv_file, fieldnames=info+full_fields)

From 87b7fbe307e4c1b100b89b8127c5f0dc17174669 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 13 Jan 2023 15:46:50 +0100
Subject: [PATCH 080/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index f8d686be..11e6d40b 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -10,6 +10,7 @@
                 writer = csv.writer(csv_file)
                 info = ["model", "target", "target_size"]
                 source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5']
+                stats_basis = ['avg', 'std', 'CI_0.95']
                 stats = ['avg', 'std', 'CI_0.95_min', 'CI_0.95_max']
                 # combine fieldnames and stats
                 full_fields = [f'{source}_{stat}' for source in source_names for stat in stats]
@@ -30,7 +31,7 @@
                                     'target': target.name,
                                     'target_size': target_size
                                 }
-                                for stat in stats:
+                                for stat in stats_basis:
                                     for source, source_metrics in results.items():
                                         if stat == 'CI_0.95':
                                             row_data[f'{source}_{stat}_min'] = source_metrics[metric][0][stat][0] * 100

From 2277198ece9e3932d8bc5a25f5d70d9656f33308 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 15:50:32 +0100
Subject: [PATCH 081/163] remove evaluate and test_all

---
 icu_benchmarks/cross_validation.py | 49 ------------------
 icu_benchmarks/data/preprocess.py  | 80 +++++++++++-------------------
 icu_benchmarks/run.py              | 12 +----
 3 files changed, 31 insertions(+), 110 deletions(-)

diff --git a/icu_benchmarks/cross_validation.py b/icu_benchmarks/cross_validation.py
index 347c33a8..adbac6b0 100644
--- a/icu_benchmarks/cross_validation.py
+++ b/icu_benchmarks/cross_validation.py
@@ -90,52 +90,3 @@ def execute_repeated_cv(
         log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
 
     return agg_loss / (cv_repetitions_to_train * cv_folds_to_train)
-
-
-def evaluate(
-    data_dir: Path,
-    log_dir: Path,
-    seed: int,
-    source_dir: Path = None,
-    reproducible: bool = True,
-    debug: bool = False,
-    use_cache: bool = False,
-) -> float:
-    """Preprocesses data and trains a model for each fold.
-
-    Args:
-        data_dir: Path to the data directory.
-        log_dir: Path to the log directory.
-        seed: Random seed.
-        load_weights: Whether to load weights from source_dir.
-        source_dir: Path to the source directory.
-        cv_folds: Number of folds for cross validation.
-        cv_folds_to_train: Number of folds to use during training. If None, all folds are trained on.
-        reproducible: Whether to make torch reproducible.
-        debug: Whether to load less data and enable more logging.
-        use_cache: Whether to cache and use cached data.
-        test_on: Dataset to test on. Can be "test" or "val" (e.g. for hyperparameter tuning).
-
-    Returns:
-        The average loss of all folds.
-    """
-
-    data = preprocess_data(
-        data_dir,
-        seed=seed,
-        debug=debug,
-        use_cache=use_cache,
-        test_all=True,
-    )
-
-    run_dir_seed = log_dir / f"seed_{seed}"
-    run_dir_seed.mkdir(parents=True, exist_ok=True)
-
-    return train_common(
-        data,
-        log_dir=run_dir_seed,
-        load_weights=True,
-        source_dir=source_dir,
-        seed=seed,
-        reproducible=reproducible,
-    )
diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py
index 04563e9f..6f573d1d 100644
--- a/icu_benchmarks/data/preprocess.py
+++ b/icu_benchmarks/data/preprocess.py
@@ -27,7 +27,6 @@ def make_single_split(
     seed: int = 42,
     debug: bool = False,
     fold_size: int = None,
-    test_all: bool = False,
 ) -> dict[dict[pd.DataFrame]]:
     """Randomly split the data into training, validation, and test set.
 
@@ -40,7 +39,6 @@ def make_single_split(
         fold_index: Index of the fold to return.
         seed: Random seed.
         debug: Load less data if true.
-        test_all: If true, the test set will be the entire dataset.
 
     Returns:
         Input data divided into 'train', 'val', and 'test'.
@@ -52,42 +50,36 @@ def make_single_split(
         stays = stays.sample(frac=0.01, random_state=seed)
     labels = data["OUTCOME"][vars["LABEL"]].loc[stays.index]
 
-    if test_all:
-        split = {
-            "train": stays.iloc[0:0],
-            "val": stays.iloc[0:0],
-            "test": stays,
-        }
-    else:
-        outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed)
-        dev, test = list(outer_CV.split(stays, labels))[repetition_index]
+ 
+    outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed)
+    dev, test = list(outer_CV.split(stays, labels))[repetition_index]
 
-        if fold_size:
-            start_index = 0
-            end_index = fold_size
+    if fold_size:
+        start_index = 0
+        end_index = fold_size
+        pre_dev = dev[start_index:end_index]
+        leave_for_test = dev[end_index:]
+        pre_dev_labels = labels.iloc[pre_dev]
+        while pre_dev_labels.sum() < cv_folds:
+            start_index += fold_size
+            end_index += fold_size
             pre_dev = dev[start_index:end_index]
-            leave_for_test = dev[end_index:]
             pre_dev_labels = labels.iloc[pre_dev]
-            while pre_dev_labels.sum() < cv_folds:
-                start_index += fold_size
-                end_index += fold_size
-                pre_dev = dev[start_index:end_index]
-                pre_dev_labels = labels.iloc[pre_dev]
-                leave_for_test = np.append(dev[0:start_index], dev[end_index:])
-            dev = pre_dev
-            test = np.append(test, leave_for_test)
-
-        dev_stays = stays.iloc[dev]
-        dev_labels = labels.iloc[dev]
-
-        inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed)
-        train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index]
-
-        split = {
-            "train": dev_stays.iloc[train],
-            "val": dev_stays.iloc[val],
-            "test": stays.iloc[test],
-        }
+            leave_for_test = np.append(dev[0:start_index], dev[end_index:])
+        dev = pre_dev
+        test = np.append(test, leave_for_test)
+
+    dev_stays = stays.iloc[dev]
+    dev_labels = labels.iloc[dev]
+
+    inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed)
+    train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index]
+
+    split = {
+        "train": dev_stays.iloc[train],
+        "val": dev_stays.iloc[val],
+        "test": stays.iloc[test],
+    }
 
     data_split = {}
     for fold_name, fold in split.items():  # Loop through train / val / test
@@ -100,24 +92,17 @@ def make_single_split(
     return data_split
 
 
-def apply_recipe_to_splits(
-    recipe: Recipe, data: dict[dict[pd.DataFrame]], type: str, test_all: bool = False
-) -> dict[dict[pd.DataFrame]]:
+def apply_recipe_to_splits(recipe: Recipe, data: dict[dict[pd.DataFrame]], type: str) -> dict[dict[pd.DataFrame]]:
     """Fits and transforms the training data, then transforms the validation and test data with the recipe.
 
     Args:
         recipe: Object containing info about the data and steps.
         data: Dict containing 'train', 'val', and 'test' and types of data per split.
         type: Whether to apply recipe to dynamic data, static data or outcomes.
-        test_all: If true, the test set will be the entire dataset.
 
     Returns:
         Transformed data divided into 'train', 'val', and 'test'.
     """
-    if test_all:
-        data["test"][type] = recipe.prep(data["test"][type])
-        return data
-
     data["train"][type] = recipe.prep()
     data["val"][type] = recipe.bake(data["val"][type])
     data["test"][type] = recipe.bake(data["test"][type])
@@ -138,7 +123,6 @@ def preprocess_data(
     cv_folds: int = 5,
     fold_size: int = None,
     fold_index: int = 0,
-    test_all: bool = False,
 ) -> dict[dict[pd.DataFrame]]:
     """Perform loading, splitting, imputing and normalising of task data.
 
@@ -154,7 +138,6 @@ def preprocess_data(
         repetition_index: Index of the repetition to return.
         cv_folds: Number of folds to use for cross validation.
         fold_index: Index of the fold to return.
-        test_all: If true, the test set will be the entire dataset.
 
     Returns:
         Preprocessed data as DataFrame in a hierarchical dict with data type (STATIC/DYNAMIC/OUTCOME)
@@ -163,8 +146,6 @@ def preprocess_data(
     cache_dir = data_dir / "cache"
     if fold_size:
         cache_dir = cache_dir / f"T{fold_size}"
-    if test_all:
-        cache_dir = cache_dir / "test_complete"
     dumped_file_names = json.dumps(file_names, sort_keys=True)
     dumped_vars = json.dumps(vars, sort_keys=True)
     config_string = f"{dumped_file_names}{dumped_vars}{use_features}{seed}{repetition_index}{fold_index}{debug}".encode(
@@ -193,7 +174,6 @@ def preprocess_data(
         seed=seed,
         debug=debug,
         fold_size=fold_size,
-        test_all=test_all,
     )
 
     logging.info("Preprocessing static data.")
@@ -203,7 +183,7 @@ def preprocess_data(
     sta_rec.add_step(StepSklearn(SimpleImputer(missing_values=None, strategy="most_frequent"), sel=has_type("object")))
     sta_rec.add_step(StepSklearn(LabelEncoder(), sel=has_type("object"), columnwise=True))
 
-    data = apply_recipe_to_splits(sta_rec, data, "STATIC", test_all=test_all)
+    data = apply_recipe_to_splits(sta_rec, data, "STATIC")
 
     logging.info("Preprocessing dynamic data.")
     dyn_rec = Recipe(data["train"]["DYNAMIC"], [], vars["DYNAMIC"], vars["GROUP"], vars["SEQUENCE"])
@@ -217,7 +197,7 @@ def preprocess_data(
     dyn_rec.add_step(StepImputeFill(method="ffill"))
     dyn_rec.add_step(StepImputeFill(value=0))
 
-    data = apply_recipe_to_splits(dyn_rec, data, "DYNAMIC", test_all=test_all)
+    data = apply_recipe_to_splits(dyn_rec, data, "DYNAMIC")
 
     if use_cache and not cache_file.exists():
         if not cache_dir.exists():
diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index 54ef17a9..9e43c78b 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -9,7 +9,7 @@
 from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters
 from icu_benchmarks.models.domain_adaptation import domain_adaptation
 from scripts.plotting.utils import plot_aggregated_results
-from icu_benchmarks.cross_validation import execute_repeated_cv, evaluate
+from icu_benchmarks.cross_validation import execute_repeated_cv
 from icu_benchmarks.run_utils import (
     build_parser,
     create_run_dir,
@@ -45,16 +45,6 @@ def main(my_args=tuple(sys.argv[1:])):
         run_dir = create_run_dir(log_dir)
         source_dir = args.source_dir
         gin.parse_config_file(source_dir / "train_config.gin")
-        evaluate(
-            args.data_dir,
-            run_dir,
-            args.seed,
-            source_dir=source_dir,
-            reproducible=reproducible,
-            debug=args.debug,
-            use_cache=args.cache,
-        )
-        return
     if args.command == "da":
         gin_config_files = (
             [Path(f"configs/experiments/{args.experiment}.gin")]

From ccc0d34c2b7dad5cf608dc451859636820e3fe66 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 15:51:47 +0100
Subject: [PATCH 082/163] Update LogisticRegression.gin

---
 configs/models/LogisticRegression.gin | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configs/models/LogisticRegression.gin b/configs/models/LogisticRegression.gin
index 9a18f798..0d18b8f7 100644
--- a/configs/models/LogisticRegression.gin
+++ b/configs/models/LogisticRegression.gin
@@ -18,6 +18,6 @@ model/hyperparameter.penalty = ["l1", "l2", "elasticnet"]
 model/hyperparameter.l1_ratio = (0.0, 1.0)
 
 tune_hyperparameters.scopes = ["model"]
-tune_hyperparameters.n_initial_points = 10
-tune_hyperparameters.n_calls = 100
-tune_hyperparameters.folds_to_tune_on = 3
+tune_hyperparameters.n_initial_points = 5
+tune_hyperparameters.n_calls = 30
+tune_hyperparameters.folds_to_tune_on = 2

From 395841f0625fdcd714955193a91e16006e500818 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 15:52:25 +0100
Subject: [PATCH 083/163] Update preprocess.py

---
 icu_benchmarks/data/preprocess.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py
index 6f573d1d..96a6abfe 100644
--- a/icu_benchmarks/data/preprocess.py
+++ b/icu_benchmarks/data/preprocess.py
@@ -50,7 +50,6 @@ def make_single_split(
         stays = stays.sample(frac=0.01, random_state=seed)
     labels = data["OUTCOME"][vars["LABEL"]].loc[stays.index]
 
- 
     outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed)
     dev, test = list(outer_CV.split(stays, labels))[repetition_index]
 

From e1b3fe12cc3e3941ec7b35f849f0296ddd0dc214 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 15:53:23 +0100
Subject: [PATCH 084/163] Update preprocess.py

---
 icu_benchmarks/data/preprocess.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py
index 96a6abfe..27427bdc 100644
--- a/icu_benchmarks/data/preprocess.py
+++ b/icu_benchmarks/data/preprocess.py
@@ -51,6 +51,8 @@ def make_single_split(
     labels = data["OUTCOME"][vars["LABEL"]].loc[stays.index]
 
     outer_CV = StratifiedKFold(cv_repetitions, shuffle=True, random_state=seed)
+    inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed)
+
     dev, test = list(outer_CV.split(stays, labels))[repetition_index]
 
     if fold_size:
@@ -71,7 +73,6 @@ def make_single_split(
     dev_stays = stays.iloc[dev]
     dev_labels = labels.iloc[dev]
 
-    inner_CV = StratifiedKFold(cv_folds, shuffle=True, random_state=seed)
     train, val = list(inner_CV.split(dev_stays, dev_labels))[fold_index]
 
     split = {

From 94969b713f72be8d10d31174a9ca176b660ce48f Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 16:33:15 +0100
Subject: [PATCH 085/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 11e6d40b..fd0678f3 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -18,8 +18,6 @@
 
                 writer.writeheader()
                 for model in endpoint.iterdir():
-                    if model.name == "LSTM":
-                        continue
                     for target in model.iterdir():
                         target_sizes = ['target_500', 'target_1000', 'target_2000']
                         for target_size in target_sizes:

From c647be980def6de8a45072d1ca4ad6c09cc21f9c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 21:04:52 +0100
Subject: [PATCH 086/163] fix comments

---
 icu_benchmarks/models/domain_adaptation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 5fe7461c..168ec07c 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -177,7 +177,7 @@ def domain_adaptation(
                 log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}"
                 log_dir_fold.mkdir(parents=True, exist_ok=True)
 
-                # evaluate target baselines
+                # train target model
                 target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
                 
                 test_predictions, test_labels = get_predictions_for_all_models(
@@ -189,10 +189,10 @@ def domain_adaptation(
                     source_datasets=source_datasets,
                 )
 
-                # evaluate source baselines
                 for baseline, predictions in test_predictions.items():
                     logging.info("Evaluating model: {}".format(baseline))
                     fold_results[baseline] = calculate_metrics(predictions, test_labels)
+                # evaluate baselines
 
                 # evaluate convex combination of models
                 test_predictions_list = list(test_predictions.values())

From 7d54d7ea6354126742dc7b5b9e43bbee43964dc6 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 21:05:39 +0100
Subject: [PATCH 087/163] test different weights

---
 icu_benchmarks/models/domain_adaptation.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 168ec07c..a73775c3 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -136,9 +136,19 @@ def domain_adaptation(
     cv_folds = 5
     cv_folds_to_train = 5
     target_sizes = [500, 1000, 2000]
-    datasets = ["hirid", "aumc", "eicu", "miiv"]
+    datasets = ["miiv", "aumc", "eicu", "miiv"]
     target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
-    weights = [1] * (len(datasets) - 1)
+    target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
+    # weights = [1] * (len(datasets) - 1)
+    weights = [
+        [0, 1, 2, 1],
+        [0, 1, 5, 1],
+        [0, 1, 10, 1],
+        [1, 1, 1, 1],
+        [1, 1, 2, 1],
+        [1, 1, 5, 1],
+        [1, 1, 10, 1],
+    ]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
     gin_config_before_tuning = gin.config_str()
@@ -203,10 +213,11 @@ def domain_adaptation(
                 fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
                 logging.info("Evaluating convex combination of models.")
-                for t in target_weights:
-                    w = [t * sum(weights)] + weights
+                for w in weights:
+                    # w =  weights + [t * sum(weights)]
                     logging.info(f"Evaluating target weight: {t}")
-                    test_pred = np.average(test_predictions_list, axis=0, weights=w)
+                    logging.info(f"Evaluating weights: {w}")
+                    test_pred = np.average(source_predictions_with_target, axis=0, weights=w)
                     fold_results[f"convex_combination_{t}"] = calculate_metrics(test_pred, test_labels)
 
                 log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)

From c6a54cec2ec430ccd734777b239a63fbf15a452c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 21:11:16 +0100
Subject: [PATCH 088/163] only plot avg

---
 scripts/results/da_to_csv.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index fd0678f3..75281df1 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -11,7 +11,9 @@
                 info = ["model", "target", "target_size"]
                 source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5']
                 stats_basis = ['avg', 'std', 'CI_0.95']
+                stats_basis = ['avg']
                 stats = ['avg', 'std', 'CI_0.95_min', 'CI_0.95_max']
+                stats = ['avg']
                 # combine fieldnames and stats
                 full_fields = [f'{source}_{stat}' for source in source_names for stat in stats]
                 writer = csv.DictWriter(csv_file, fieldnames=info+full_fields)

From ac1a7bb3f873f4a3f51c1fb23afb219351bf3f5d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 21:15:28 +0100
Subject: [PATCH 089/163] test other weights

---
 icu_benchmarks/models/domain_adaptation.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index a73775c3..36ff8ad5 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -136,8 +136,7 @@ def domain_adaptation(
     cv_folds = 5
     cv_folds_to_train = 5
     target_sizes = [500, 1000, 2000]
-    datasets = ["miiv", "aumc", "eicu", "miiv"]
-    target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
+    datasets = ["aumc", "eicu", "hirid", "miiv"]
     target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
     # weights = [1] * (len(datasets) - 1)
     weights = [
@@ -209,16 +208,16 @@ def domain_adaptation(
                 test_predictions_list_without_target = test_predictions_list[1:]
 
                 logging.info("Evaluating convex combination of models without target.")
-                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights)
+                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1])
                 fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
                 logging.info("Evaluating convex combination of models.")
                 for w in weights:
                     # w =  weights + [t * sum(weights)]
-                    logging.info(f"Evaluating target weight: {t}")
+                    # logging.info(f"Evaluating target weight: {t}")
                     logging.info(f"Evaluating weights: {w}")
-                    test_pred = np.average(source_predictions_with_target, axis=0, weights=w)
-                    fold_results[f"convex_combination_{t}"] = calculate_metrics(test_pred, test_labels)
+                    test_pred = np.average(test_predictions_list, axis=0, weights=w)
+                    fold_results[f"convex_combination_{w}"] = calculate_metrics(test_pred, test_labels)
 
                 log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
             log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)

From da3fce1d361bd54602ae13d927cdac6e0bf12d34 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 21:19:33 +0100
Subject: [PATCH 090/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 36ff8ad5..8d247c0d 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -163,7 +163,7 @@ def domain_adaptation(
         gin.bind_parameter("preprocess.fold_size", target_size)
         log_dir = run_dir / task / model / dataset / f"target_{target_size}"
         log_dir.mkdir(parents=True, exist_ok=True)
-        choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
+        choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug)
         gin_config_with_target_hyperparameters = gin.config_str()
         results = {}
         for repetition in range(cv_repetitions_to_train):

From c0f8c392c267b5eddb61398f1eb64a371c24122e Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 22:07:53 +0100
Subject: [PATCH 091/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 75281df1..346b3126 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -20,10 +20,10 @@
 
                 writer.writeheader()
                 for model in endpoint.iterdir():
-                    for target in model.iterdir():
+                    for target in ["aumc", "eicu", "hirid", "miiv"]:
                         target_sizes = ['target_500', 'target_1000', 'target_2000']
                         for target_size in target_sizes:
-                            with open(target / target_size / 'averaged_source_metrics.json', 'r') as f:
+                            with open(model / target / target_size / 'averaged_source_metrics.json', 'r') as f:
                                 results = json.load(f)
 
                                 row_data = {

From 7d91bac44c99c478aecaf4f49d05dcf3f8c6000b Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 14 Jan 2023 22:10:27 +0100
Subject: [PATCH 092/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 346b3126..8d625cc2 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -28,7 +28,7 @@
 
                                 row_data = {
                                     'model': model.name,
-                                    'target': target.name,
+                                    'target': target,
                                     'target_size': target_size
                                 }
                                 for stat in stats_basis:

From 4c038854ff57e5651194c70e68cd7170ce439ceb Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 00:49:02 +0100
Subject: [PATCH 093/163] auc and loss based weigth functions

---
 icu_benchmarks/models/domain_adaptation.py | 175 ++++++++++++++++-----
 1 file changed, 134 insertions(+), 41 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 8d247c0d..5ec3b687 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -1,3 +1,4 @@
+import inspect
 import json
 import os
 import random
@@ -8,6 +9,7 @@
 import pandas as pd
 from pathlib import Path
 import scipy.stats as stats
+from sklearn.metrics import log_loss, roc_auc_score
 
 from icu_benchmarks.data.loader import RICUDataset
 from icu_benchmarks.data.preprocess import preprocess_data
@@ -19,22 +21,13 @@
 from icu_benchmarks.run_utils import log_full_line
 
 
-def get_predictions_for_single_model(target_model: object, dataset: RICUDataset, model_dir: Path, log_dir: Path):
-    """Get predictions for a single model.
-
-    Args:
-        target_model: Model to get predictions for.
-        dataset: Dataset to get predictions for.
-        model_dir: Path to directory where model weights are stored.
-        log_dir: Path to directory where model output should be saved.
-
-    Returns:
-        Tuple of predictions and labels.
-    """
+def load_model(model_dir: Path, log_dir: Path):
+    """Load model from gin config."""
     gin.parse_config_file(model_dir / "train_config.gin")
-    if isinstance(target_model, DLWrapper):
+    model_type = gin.query_parameter("train_common.model")
+    if str(model_type) == "@DLWrapper()":
         model = DLWrapper()
-    else:
+    elif str(model_type) == "@MLWrapper()":
         model = MLWrapper()
     model.set_log_dir(log_dir)
     if (model_dir / "model.torch").is_file():
@@ -45,7 +38,22 @@ def get_predictions_for_single_model(target_model: object, dataset: RICUDataset,
         model.load_weights(model_dir / "model.joblib")
     else:
         raise Exception("No weights to load at path : {}".format(model_dir / "model.*"))
-    logging.info(f"Generating predictions for model : {model_dir}")
+    return model
+
+
+def get_predictions_for_single_model(dataset: RICUDataset, model_dir: Path, log_dir: Path):
+    """Get predictions for a single model.
+
+    Args:
+        target_model: Model to get predictions for.
+        dataset: Dataset to get predictions for.
+        model_dir: Path to directory where model weights are stored.
+        log_dir: Path to directory where model output should be saved.
+
+    Returns:
+        Tuple of predictions and labels.
+    """
+    model = load_model(model_dir, log_dir)
     return model.predict(dataset, None, None)
 
 
@@ -55,8 +63,8 @@ def calculate_metrics(predictions: np.ndarray, labels: np.ndarray):
         value = metric(labels, predictions)
         metric_results[name] = value
         # Only log float values
-        if isinstance(value, np.float):
-            logging.info("Test {}: {}".format(name, value))
+        # if isinstance(value, np.float):
+        #     logging.info("Test {}: {}".format(name, value))
     return metric_results
 
 
@@ -95,11 +103,10 @@ def get_predictions_for_all_models(
     _, test_labels = test_dataset.get_data_and_labels()
 
     test_predictions = {}
-    logging.info("Generating predictions for target")
     test_predictions["target"] = target_model.predict(test_dataset, None, None)
     for source in source_datasets:
         model_dir = source_dir / source
-        test_predictions[model_dir.name] = get_predictions_for_single_model(target_model, test_dataset, model_dir, log_dir)
+        test_predictions[model_dir.name] = get_predictions_for_single_model(test_dataset, model_dir, log_dir)
 
     for name, prediction in test_predictions.items():
         if isinstance(target_model, MLWrapper) and prediction.ndim == 2:
@@ -139,14 +146,23 @@ def domain_adaptation(
     datasets = ["aumc", "eicu", "hirid", "miiv"]
     target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
     # weights = [1] * (len(datasets) - 1)
-    weights = [
-        [0, 1, 2, 1],
-        [0, 1, 5, 1],
-        [0, 1, 10, 1],
-        [1, 1, 1, 1],
-        [1, 1, 2, 1],
-        [1, 1, 5, 1],
-        [1, 1, 10, 1],
+    auc_functions = [
+        lambda x: (x-0.5) ** 1,
+        lambda x: (x-0.5) ** 2,
+        lambda x: (x-0.5) ** 3,
+        lambda x: (x-0.5) ** 4,
+        lambda x: (x-0.5) ** 5,
+        lambda x: ((2 ** (10*(x-0.5))) - 1),
+        lambda x: ((3 ** (10*(x-0.5))) - 1),
+    ]
+    loss_functions = [
+        lambda x: (1-x) ** 1,
+        lambda x: (1-x) ** 2,
+        lambda x: (1-x) ** 3,
+        lambda x: (1-x) ** 4,
+        lambda x: (1-x) ** 5,
+        lambda x: ((2 ** (10*(1-x))) - 1),
+        lambda x: ((3 ** (10*(1-x))) - 1),
     ]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
@@ -163,12 +179,12 @@ def domain_adaptation(
         gin.bind_parameter("preprocess.fold_size", target_size)
         log_dir = run_dir / task / model / dataset / f"target_{target_size}"
         log_dir.mkdir(parents=True, exist_ok=True)
-        choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug)
-        gin_config_with_target_hyperparameters = gin.config_str()
+        # choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug)
+        # gin_config_with_target_hyperparameters = gin.config_str()
         results = {}
         for repetition in range(cv_repetitions_to_train):
             for fold_index in range(cv_folds_to_train):
-                gin.parse_config(gin_config_with_target_hyperparameters)
+                # gin.parse_config(gin_config_with_target_hyperparameters)
                 results[f"{repetition}_{fold_index}"] = {}
                 fold_results = results[f"{repetition}_{fold_index}"]
 
@@ -187,8 +203,26 @@ def domain_adaptation(
                 log_dir_fold.mkdir(parents=True, exist_ok=True)
 
                 # train target model
-                target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
+                # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
+                target_model = load_model(Path("../yaib_logs/DA") / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold)
                 
+                val_predictions, val_labels = get_predictions_for_all_models(
+                    target_model,
+                    data,
+                    log_dir_fold,
+                    source_dir=model_path / task / model,
+                    seed=seed,
+                    source_datasets=source_datasets,
+                    test_on="val",
+                )
+                val_losses = {}
+                val_aucs = {}
+                val_losses["target"] = log_loss(val_labels, val_predictions["target"])
+                val_aucs["target"] = roc_auc_score(val_labels, val_predictions["target"])
+                for baseline, predictions in val_predictions.items():
+                    val_losses[baseline] = log_loss(val_labels, predictions)
+                    val_aucs[baseline] = roc_auc_score(val_labels, predictions)
+
                 test_predictions, test_labels = get_predictions_for_all_models(
                     target_model,
                     data,
@@ -199,7 +233,7 @@ def domain_adaptation(
                 )
 
                 for baseline, predictions in test_predictions.items():
-                    logging.info("Evaluating model: {}".format(baseline))
+                    # logging.info("Evaluating model: {}".format(baseline))
                     fold_results[baseline] = calculate_metrics(predictions, test_labels)
                 # evaluate baselines
 
@@ -207,19 +241,78 @@ def domain_adaptation(
                 test_predictions_list = list(test_predictions.values())
                 test_predictions_list_without_target = test_predictions_list[1:]
 
-                logging.info("Evaluating convex combination of models without target.")
+                # logging.info("Evaluating convex combination of models without target.")
                 test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1])
                 fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
-                logging.info("Evaluating convex combination of models.")
-                for w in weights:
-                    # w =  weights + [t * sum(weights)]
-                    # logging.info(f"Evaluating target weight: {t}")
-                    logging.info(f"Evaluating weights: {w}")
-                    test_pred = np.average(test_predictions_list, axis=0, weights=w)
-                    fold_results[f"convex_combination_{w}"] = calculate_metrics(test_pred, test_labels)
+                # logging.info("Evaluating convex combination of models.")
+                # for w in weights:
+                #     # w =  weights + [t * sum(weights)]
+                #     # logging.info(f"Evaluating target weight: {t}")
+                #     logging.info(f"Evaluating weights: {w}")
+                #     test_pred = np.average(test_predictions_list, axis=0, weights=w)
+                #     fold_results[f"convex_combination_{w}"] = calculate_metrics(test_pred, test_labels)
+
+                # find top three auc functions
+                rated_auc_functions = []
+                for f in auc_functions:
+                    f_str = inspect.getsource(f).replace(" ", "")[:-2]
+                    # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.")
+                    weights = [f(x) for x in val_aucs.values()]
+                    # logging.info(f"weights: {weights}")
+                    test_pred = np.average(test_predictions_list, axis=0, weights=weights)
+                    fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels)
+                    rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"]))
+                rated_auc_functions.sort(key=lambda x: x[1], reverse=True)
+                
 
-                log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
+                # find top three loss functions
+                rated_loss_functions = []
+                for f in loss_functions:
+                    # strip whitespace
+                    f_str = inspect.getsource(f).replace(" ", "")[:-2]
+                    # logging.info(f"Evaluating convex combination of models with loss function {f_str}.")
+                    weights = [f(x) for x in val_losses.values()]
+                    # logging.info(f"losses: {val_losses.values()}")
+                    # logging.info(f"weights: {weights}")
+                    test_pred = np.average(test_predictions_list, axis=0, weights=weights)
+                    fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels)
+                    rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"]))
+                rated_loss_functions.sort(key=lambda x: x[1], reverse=True)
+
+                # logging.info(f"Top three AUC functions: {rated_auc_functions[:3]}")
+                # logging.info(f"Top three loss functions: {rated_loss_functions[:3]}")
+
+                log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)            
+            # average results over folds
+            agg_aucs = {}
+            for fold_results in results.values():
+                for source, metrics in fold_results.items():
+                    agg_aucs.setdefault(source, []).append(metrics["AUC"])
+
+            avg_aucs = {}
+            for source, aucs in agg_aucs.items():
+                avg_aucs[source] = np.mean(aucs)
+
+            # print baselines first, then top three AUC, then top three loss
+            for source, auc in avg_aucs.items():
+                if source in ["target", "convex_combination_without_target"] + datasets:
+                    logging.info(f"{source}: {auc}")
+            avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True)
+            i = 0
+            for source, auc in avg_aucs_list:
+                if "AUC" in source:
+                    i += 1
+                    logging.info(f"{source}: {auc}")
+                    if i == 3:
+                        break
+            i = 0
+            for source, auc in avg_aucs_list:
+                if "loss" in source:
+                    i += 1
+                    logging.info(f"{source}: {auc}")
+                    if i == 3:
+                        break
             log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
 
         source_metrics = {}

From 464ad05ddab6173d6f180d3aa42dd3c2535e6ec6 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 00:49:36 +0100
Subject: [PATCH 094/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 5ec3b687..16e155d6 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -204,7 +204,7 @@ def domain_adaptation(
 
                 # train target model
                 # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
-                target_model = load_model(Path("../yaib_logs/DA") / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold)
+                target_model = load_model(Path("../DA_logs/") / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold)
                 
                 val_predictions, val_labels = get_predictions_for_all_models(
                     target_model,

From 45932cfd344dc8e01e5dad2ad1a8951f1786c8a4 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 01:05:39 +0100
Subject: [PATCH 095/163] cache predictions

---
 icu_benchmarks/models/domain_adaptation.py | 54 ++++++++++++++--------
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 16e155d6..156df6f8 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -166,6 +166,7 @@ def domain_adaptation(
     ]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
+    old_run_dir = Path("../DA_logs/")
     gin_config_before_tuning = gin.config_str()
 
     # evaluate models on same test split
@@ -204,17 +205,25 @@ def domain_adaptation(
 
                 # train target model
                 # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
-                target_model = load_model(Path("../DA_logs/") / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold)
+                target_model = load_model(old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold)
                 
-                val_predictions, val_labels = get_predictions_for_all_models(
-                    target_model,
-                    data,
-                    log_dir_fold,
-                    source_dir=model_path / task / model,
-                    seed=seed,
-                    source_datasets=source_datasets,
-                    test_on="val",
-                )
+                # generate predictions and write to file if not already done
+                if not (log_dir_fold / "val_predictions.json").exists():
+                    val_predictions, val_labels = get_predictions_for_all_models(
+                        target_model,
+                        data,
+                        log_dir_fold,
+                        source_dir=model_path / task / model,
+                        seed=seed,
+                        source_datasets=source_datasets,
+                        test_on="val",
+                    )
+                    with open(log_dir_fold / "val_predictions.json", "w") as f:
+                        json.dump(val_predictions, f, cls=JsonResultLoggingEncoder)
+                else:
+                    with open(log_dir_fold / "val_predictions.json", "r") as f:
+                        val_predictions = json.load(f)
+                    _, val_labels = RICUDataset(data, split="val").get_data_and_labels()
                 val_losses = {}
                 val_aucs = {}
                 val_losses["target"] = log_loss(val_labels, val_predictions["target"])
@@ -223,14 +232,23 @@ def domain_adaptation(
                     val_losses[baseline] = log_loss(val_labels, predictions)
                     val_aucs[baseline] = roc_auc_score(val_labels, predictions)
 
-                test_predictions, test_labels = get_predictions_for_all_models(
-                    target_model,
-                    data,
-                    log_dir_fold,
-                    source_dir=model_path / task / model,
-                    seed=seed,
-                    source_datasets=source_datasets,
-                )
+                # generate predictions and write to file if not already done
+                if not (log_dir_fold / "test_predictions.json").exists():
+                    test_predictions, test_labels = get_predictions_for_all_models(
+                        target_model,
+                        data,
+                        log_dir_fold,
+                        source_dir=model_path / task / model,
+                        seed=seed,
+                        source_datasets=source_datasets,
+                    )
+                    with open(log_dir_fold / "test_predictions.json", "w") as f:
+                        json.dump(test_predictions, f, cls=JsonResultLoggingEncoder)
+                else:
+                    with open(log_dir_fold / "test_predictions.json", "r") as f:
+                        test_predictions = json.load(f)
+                    _, test_labels = RICUDataset(data, split="test").get_data_and_labels()
+
 
                 for baseline, predictions in test_predictions.items():
                     # logging.info("Evaluating model: {}".format(baseline))

From be5ad300cb0202d9b075c25a6d4dc65100222f44 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 01:38:24 +0100
Subject: [PATCH 096/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 156df6f8..527fb9b0 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -231,6 +231,7 @@ def domain_adaptation(
                 for baseline, predictions in val_predictions.items():
                     val_losses[baseline] = log_loss(val_labels, predictions)
                     val_aucs[baseline] = roc_auc_score(val_labels, predictions)
+                logging.info("Validation losses: %s", val_losses)
 
                 # generate predictions and write to file if not already done
                 if not (log_dir_fold / "test_predictions.json").exists():

From 5c8265a2c334f755ebb05a01d0c139b7af358cab Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 02:47:15 +0100
Subject: [PATCH 097/163] Update preprocess.py

---
 icu_benchmarks/data/preprocess.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icu_benchmarks/data/preprocess.py b/icu_benchmarks/data/preprocess.py
index 27427bdc..11aec03e 100644
--- a/icu_benchmarks/data/preprocess.py
+++ b/icu_benchmarks/data/preprocess.py
@@ -123,6 +123,7 @@ def preprocess_data(
     cv_folds: int = 5,
     fold_size: int = None,
     fold_index: int = 0,
+    test_all: bool = False,
 ) -> dict[dict[pd.DataFrame]]:
     """Perform loading, splitting, imputing and normalising of task data.
 

From a834354284589831f26a1caf8f962f262ae0004e Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 02:53:09 +0100
Subject: [PATCH 098/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 527fb9b0..8f0ca8ce 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -166,7 +166,7 @@ def domain_adaptation(
     ]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
-    old_run_dir = Path("../DA_logs/")
+    old_run_dir = Path("../yaib_logs/DA")
     gin_config_before_tuning = gin.config_str()
 
     # evaluate models on same test split
@@ -280,6 +280,9 @@ def domain_adaptation(
                     weights = [f(x) for x in val_aucs.values()]
                     # logging.info(f"weights: {weights}")
                     test_pred = np.average(test_predictions_list, axis=0, weights=weights)
+                    print(f_str)
+                    print(test_pred.min())
+                    print(test_pred.max())
                     fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels)
                     rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"]))
                 rated_auc_functions.sort(key=lambda x: x[1], reverse=True)

From 5d6e273bc3411b0179ab5734eccf14bf58f51fb8 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 02:53:43 +0100
Subject: [PATCH 099/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 8f0ca8ce..d360783f 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -166,7 +166,7 @@ def domain_adaptation(
     ]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
-    old_run_dir = Path("../yaib_logs/DA")
+    old_run_dir = Path("../DA_logs")
     gin_config_before_tuning = gin.config_str()
 
     # evaluate models on same test split

From 5ba94281476e43d8be5c6f50eb935475d26abb98 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 02:55:14 +0100
Subject: [PATCH 100/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index d360783f..ac7871e0 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -254,6 +254,8 @@ def domain_adaptation(
                 for baseline, predictions in test_predictions.items():
                     # logging.info("Evaluating model: {}".format(baseline))
                     fold_results[baseline] = calculate_metrics(predictions, test_labels)
+                    print(test_predictions.min())
+                    print(test_predictions.max())
                 # evaluate baselines
 
                 # evaluate convex combination of models

From a8b202129500a5073dbf7aee48bf377c2e13bf54 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 02:55:39 +0100
Subject: [PATCH 101/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index ac7871e0..b56c183d 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -254,8 +254,8 @@ def domain_adaptation(
                 for baseline, predictions in test_predictions.items():
                     # logging.info("Evaluating model: {}".format(baseline))
                     fold_results[baseline] = calculate_metrics(predictions, test_labels)
-                    print(test_predictions.min())
-                    print(test_predictions.max())
+                    print(predictions.min())
+                    print(predictions.max())
                 # evaluate baselines
 
                 # evaluate convex combination of models

From 0c97842330b913f9ea1b66a03439682f1c1399bf Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 02:56:29 +0100
Subject: [PATCH 102/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index b56c183d..fcbb3b0e 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -254,8 +254,8 @@ def domain_adaptation(
                 for baseline, predictions in test_predictions.items():
                     # logging.info("Evaluating model: {}".format(baseline))
                     fold_results[baseline] = calculate_metrics(predictions, test_labels)
-                    print(predictions.min())
-                    print(predictions.max())
+                    print(min(predictions))
+                    print(max(predictions))
                 # evaluate baselines
 
                 # evaluate convex combination of models

From 95addb712cb40a06da56d0ebb145d52528e1d125 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 02:57:52 +0100
Subject: [PATCH 103/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index fcbb3b0e..b5ecb056 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -285,6 +285,8 @@ def domain_adaptation(
                     print(f_str)
                     print(test_pred.min())
                     print(test_pred.max())
+                    print(weights)
+                    
                     fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels)
                     rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"]))
                 rated_auc_functions.sort(key=lambda x: x[1], reverse=True)

From bbb693939494cb671717535018b1f085ad85826e Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 03:02:13 +0100
Subject: [PATCH 104/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index b5ecb056..a8a63348 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -231,6 +231,7 @@ def domain_adaptation(
                 for baseline, predictions in val_predictions.items():
                     val_losses[baseline] = log_loss(val_labels, predictions)
                     val_aucs[baseline] = roc_auc_score(val_labels, predictions)
+                logging.info("Validation AUCS: %s", val_aucs)
                 logging.info("Validation losses: %s", val_losses)
 
                 # generate predictions and write to file if not already done
@@ -282,6 +283,7 @@ def domain_adaptation(
                     weights = [f(x) for x in val_aucs.values()]
                     # logging.info(f"weights: {weights}")
                     test_pred = np.average(test_predictions_list, axis=0, weights=weights)
+                    test_pred = (test_pred-np.min(test_pred))/(np.max(test_pred)-np.min(test_pred))
                     print(f_str)
                     print(test_pred.min())
                     print(test_pred.max())

From da34511f884470dd8b79830cdd9f1a56b0bb2104 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 03:13:13 +0100
Subject: [PATCH 105/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index a8a63348..6039dbac 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -281,12 +281,9 @@ def domain_adaptation(
                     f_str = inspect.getsource(f).replace(" ", "")[:-2]
                     # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.")
                     weights = [f(x) for x in val_aucs.values()]
+                    weights.clip(min=0)
                     # logging.info(f"weights: {weights}")
-                    test_pred = np.average(test_predictions_list, axis=0, weights=weights)
-                    test_pred = (test_pred-np.min(test_pred))/(np.max(test_pred)-np.min(test_pred))
                     print(f_str)
-                    print(test_pred.min())
-                    print(test_pred.max())
                     print(weights)
                     
                     fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels)

From bf6c6b9b392d3978de31489d2b1abd6c038bdbf7 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 03:13:49 +0100
Subject: [PATCH 106/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 6039dbac..93e793a0 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -280,7 +280,7 @@ def domain_adaptation(
                 for f in auc_functions:
                     f_str = inspect.getsource(f).replace(" ", "")[:-2]
                     # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.")
-                    weights = [f(x) for x in val_aucs.values()]
+                    weights = np.array([f(x) for x in val_aucs.values()])
                     weights.clip(min=0)
                     # logging.info(f"weights: {weights}")
                     print(f_str)

From 06e8e1ac5816fac7c60efdd3f89e020916c5c93a Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 03:14:51 +0100
Subject: [PATCH 107/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 93e793a0..3188c320 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -281,7 +281,8 @@ def domain_adaptation(
                     f_str = inspect.getsource(f).replace(" ", "")[:-2]
                     # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.")
                     weights = np.array([f(x) for x in val_aucs.values()])
-                    weights.clip(min=0)
+                    weights = weights.clip(min=0)
+                    test_pred = np.average(test_predictions_list, axis=0, weights=weights)
                     # logging.info(f"weights: {weights}")
                     print(f_str)
                     print(weights)

From 0df060ccc62186232c656a0e75b110a8d1b53d9e Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 03:15:23 +0100
Subject: [PATCH 108/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 3188c320..0616cdcd 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -283,7 +283,7 @@ def domain_adaptation(
                     weights = np.array([f(x) for x in val_aucs.values()])
                     weights = weights.clip(min=0)
                     test_pred = np.average(test_predictions_list, axis=0, weights=weights)
-                    # logging.info(f"weights: {weights}")
+                    logging.info(f"weights: {weights}")
                     print(f_str)
                     print(weights)
                     

From 52f6090756aa0fac341076d9a5c2a1eab85b885b Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 03:48:17 +0100
Subject: [PATCH 109/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 114 ++++++++++++---------
 1 file changed, 65 insertions(+), 49 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 0616cdcd..0855655a 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -166,6 +166,7 @@ def domain_adaptation(
     ]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
+    # old_run_dir = Path("../yaib_logs/DA")
     old_run_dir = Path("../DA_logs")
     gin_config_before_tuning = gin.config_str()
 
@@ -184,6 +185,8 @@ def domain_adaptation(
         # gin_config_with_target_hyperparameters = gin.config_str()
         results = {}
         for repetition in range(cv_repetitions_to_train):
+            agg_val_losses = []
+            agg_val_aucs = []
             for fold_index in range(cv_folds_to_train):
                 # gin.parse_config(gin_config_with_target_hyperparameters)
                 results[f"{repetition}_{fold_index}"] = {}
@@ -255,8 +258,6 @@ def domain_adaptation(
                 for baseline, predictions in test_predictions.items():
                     # logging.info("Evaluating model: {}".format(baseline))
                     fold_results[baseline] = calculate_metrics(predictions, test_labels)
-                    print(min(predictions))
-                    print(max(predictions))
                 # evaluate baselines
 
                 # evaluate convex combination of models
@@ -267,6 +268,9 @@ def domain_adaptation(
                 test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1])
                 fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
+                agg_val_losses.append(val_losses)
+                agg_val_aucs.append(val_aucs)
+
                 # logging.info("Evaluating convex combination of models.")
                 # for w in weights:
                 #     # w =  weights + [t * sum(weights)]
@@ -275,41 +279,53 @@ def domain_adaptation(
                 #     test_pred = np.average(test_predictions_list, axis=0, weights=w)
                 #     fold_results[f"convex_combination_{w}"] = calculate_metrics(test_pred, test_labels)
 
-                # find top three auc functions
-                rated_auc_functions = []
-                for f in auc_functions:
-                    f_str = inspect.getsource(f).replace(" ", "")[:-2]
-                    # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.")
-                    weights = np.array([f(x) for x in val_aucs.values()])
-                    weights = weights.clip(min=0)
-                    test_pred = np.average(test_predictions_list, axis=0, weights=weights)
-                    logging.info(f"weights: {weights}")
-                    print(f_str)
-                    print(weights)
-                    
-                    fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels)
-                    rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"]))
-                rated_auc_functions.sort(key=lambda x: x[1], reverse=True)
-                
-
-                # find top three loss functions
-                rated_loss_functions = []
-                for f in loss_functions:
-                    # strip whitespace
-                    f_str = inspect.getsource(f).replace(" ", "")[:-2]
-                    # logging.info(f"Evaluating convex combination of models with loss function {f_str}.")
-                    weights = [f(x) for x in val_losses.values()]
-                    # logging.info(f"losses: {val_losses.values()}")
-                    # logging.info(f"weights: {weights}")
-                    test_pred = np.average(test_predictions_list, axis=0, weights=weights)
-                    fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels)
-                    rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"]))
-                rated_loss_functions.sort(key=lambda x: x[1], reverse=True)
-
                 # logging.info(f"Top three AUC functions: {rated_auc_functions[:3]}")
                 # logging.info(f"Top three loss functions: {rated_loss_functions[:3]}")
 
-                log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)            
+                log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
+
+            avg_val_losses = np.array([np.mean([x[source] for x in agg_val_losses]) for source in val_losses.keys()])
+            avg_val_aucs = {source: np.mean([x[source] for x in agg_val_aucs]) for source in val_aucs.keys()}
+            logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses)))
+            logging.info("Average validation AUCs: %s", dict(zip(val_aucs.keys(), avg_val_aucs)))
+
+            scaled_losses = 0.9 * avg_val_losses / np.max(avg_val_losses)
+            logging.info(f"scaled_losses: {scaled_losses}")
+
+            # find top three auc functions
+            rated_auc_functions = []
+            for f in auc_functions:
+                f_str = inspect.getsource(f).replace(" ", "")[:-2]
+                # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.")
+                weights = np.array([f(x) for x in avg_val_aucs.values()])
+                weights = weights.clip(min=0)
+                test_pred = np.average(test_predictions_list, axis=0, weights=weights)
+                # logging.info(f"weights: {weights}")
+                
+                fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels)
+                rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"]))
+            rated_auc_functions.sort(key=lambda x: x[1], reverse=True)
+            # print top three auc functions
+            for f_str, auc in rated_auc_functions[:3]:
+                logging.info(f"{f_str}: {auc}")
+            
+
+            # find top three loss functions
+            rated_loss_functions = []
+            for f in loss_functions:
+                # strip whitespace
+                f_str = inspect.getsource(f).replace(" ", "")[:-2]
+                # logging.info(f"Evaluating convex combination of models with loss function {f_str}.")
+                weights = [f(x) for x in scaled_losses]
+                logging.info(f"weights: {weights}")
+                test_pred = np.average(test_predictions_list, axis=0, weights=weights)
+                fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels)
+                rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"]))
+            rated_loss_functions.sort(key=lambda x: x[1], reverse=True)
+            for f_str, auc in rated_auc_functions[:3]:
+                logging.info(f"{f_str}: {auc}")
+
+            
             # average results over folds
             agg_aucs = {}
             for fold_results in results.values():
@@ -324,21 +340,21 @@ def domain_adaptation(
             for source, auc in avg_aucs.items():
                 if source in ["target", "convex_combination_without_target"] + datasets:
                     logging.info(f"{source}: {auc}")
-            avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True)
-            i = 0
-            for source, auc in avg_aucs_list:
-                if "AUC" in source:
-                    i += 1
-                    logging.info(f"{source}: {auc}")
-                    if i == 3:
-                        break
-            i = 0
-            for source, auc in avg_aucs_list:
-                if "loss" in source:
-                    i += 1
-                    logging.info(f"{source}: {auc}")
-                    if i == 3:
-                        break
+            # avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True)
+            # i = 0
+            # for source, auc in avg_aucs_list:
+            #     if "AUC" in source:
+            #         i += 1
+            #         logging.info(f"{source}: {auc}")
+            #         if i == 3:
+            #             break
+            # i = 0
+            # for source, auc in avg_aucs_list:
+            #     if "loss" in source:
+            #         i += 1
+            #         logging.info(f"{source}: {auc}")
+            #         if i == 3:
+            #             break
             log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
 
         source_metrics = {}

From a9c83c1e192f324e6ff4c88c4aaa7ef2e17aabd1 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 03:52:09 +0100
Subject: [PATCH 110/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 0855655a..bc77d7c8 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -322,7 +322,7 @@ def domain_adaptation(
                 fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels)
                 rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"]))
             rated_loss_functions.sort(key=lambda x: x[1], reverse=True)
-            for f_str, auc in rated_auc_functions[:3]:
+            for f_str, auc in rated_loss_functions[:3]:
                 logging.info(f"{f_str}: {auc}")
 
             

From 3db23a42b7e33150440696beb5b9f64f5e9bc9b6 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 04:18:19 +0100
Subject: [PATCH 111/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index bc77d7c8..18c0980a 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -289,7 +289,7 @@ def domain_adaptation(
             logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses)))
             logging.info("Average validation AUCs: %s", dict(zip(val_aucs.keys(), avg_val_aucs)))
 
-            scaled_losses = 0.9 * avg_val_losses / np.max(avg_val_losses)
+            scaled_losses = np.array(0.9 * avg_val_losses / np.max(avg_val_losses))
             logging.info(f"scaled_losses: {scaled_losses}")
 
             # find top three auc functions
@@ -325,6 +325,12 @@ def domain_adaptation(
             for f_str, auc in rated_loss_functions[:3]:
                 logging.info(f"{f_str}: {auc}")
 
+            # evaluate source only mixture
+            logging.info("Evaluating loss weighted source only mixture.")
+            loss_based_weights = 1 - scaled_losses[1:]
+            test_pred = np.average(test_predictions_list_without_target, axis=0, weights=loss_based_weights)
+            fold_results[f"loss_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels)
+            logging.info(f"auc: {fold_results[f'loss_based_source_only_mixture']['AUC']}")
             
             # average results over folds
             agg_aucs = {}

From 88d5ce59a394f4de7225dc4df4c6dcbc99725988 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 04:29:03 +0100
Subject: [PATCH 112/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 18c0980a..0edf6108 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -317,7 +317,7 @@ def domain_adaptation(
                 f_str = inspect.getsource(f).replace(" ", "")[:-2]
                 # logging.info(f"Evaluating convex combination of models with loss function {f_str}.")
                 weights = [f(x) for x in scaled_losses]
-                logging.info(f"weights: {weights}")
+                # logging.info(f"weights: {weights}")
                 test_pred = np.average(test_predictions_list, axis=0, weights=weights)
                 fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels)
                 rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"]))
@@ -331,6 +331,12 @@ def domain_adaptation(
             test_pred = np.average(test_predictions_list_without_target, axis=0, weights=loss_based_weights)
             fold_results[f"loss_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels)
             logging.info(f"auc: {fold_results[f'loss_based_source_only_mixture']['AUC']}")
+
+            logging.info("Evaluating auc weighted source only mixture.")
+            auc_based_weights = [avg_val_aucs.values() - 0.5][1:] ** 2
+            test_pred = np.average(test_predictions_list_without_target, axis=0, weights=auc_based_weights)
+            fold_results[f"auc_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels)
+            logging.info(f"auc: {fold_results[f'auc_based_source_only_mixture']['AUC']}")
             
             # average results over folds
             agg_aucs = {}

From ac2eac1b7b2ea7a14f4bab07b2a1b23108ce9a79 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 04:31:45 +0100
Subject: [PATCH 113/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 0edf6108..7c935ed4 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -333,7 +333,7 @@ def domain_adaptation(
             logging.info(f"auc: {fold_results[f'loss_based_source_only_mixture']['AUC']}")
 
             logging.info("Evaluating auc weighted source only mixture.")
-            auc_based_weights = [avg_val_aucs.values() - 0.5][1:] ** 2
+            auc_based_weights = (np.array(list(avg_val_aucs.values())) - 0.5)[1:] ** 2
             test_pred = np.average(test_predictions_list_without_target, axis=0, weights=auc_based_weights)
             fold_results[f"auc_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels)
             logging.info(f"auc: {fold_results[f'auc_based_source_only_mixture']['AUC']}")

From fcc811f12714cb90e7cd61ee60f694d104b34f1d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 14:27:34 +0100
Subject: [PATCH 114/163] test target with predictions

---
 icu_benchmarks/models/domain_adaptation.py | 42 +++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 7c935ed4..8fda4ae9 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -209,6 +209,17 @@ def domain_adaptation(
                 # train target model
                 # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
                 target_model = load_model(old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold)
+
+                # get predictions for train set
+                train_predictions, train_labels = get_predictions_for_all_models(
+                    target_model,
+                    data,
+                    log_dir_fold,
+                    source_dir=model_path / task / model,
+                    seed=seed,
+                    source_datasets=source_datasets,
+                    test_on="train",
+                )
                 
                 # generate predictions and write to file if not already done
                 if not (log_dir_fold / "val_predictions.json").exists():
@@ -255,6 +266,35 @@ def domain_adaptation(
                     _, test_labels = RICUDataset(data, split="test").get_data_and_labels()
 
 
+
+                # join predictions with static data and train new model
+                gin.clear_config()
+                gin.parse_config(gin_config_before_tuning)
+                gin.bind_parameter("preprocess.fold_size", target_size)
+                data_with_predictions = preprocess_data(
+                    data_dir,
+                    seed=seed,
+                    debug=debug,
+                    use_cache=True,
+                    cv_repetitions=cv_repetitions,
+                    repetition_index=repetition,
+                    cv_folds=cv_folds,
+                    fold_index=fold_index,
+                )
+                data_with_predictions["train"]["STATIC"] = data_with_predictions["train"]["STATIC"].join(pd.DataFrame(list(train_predictions.values())[1:]).T)
+                data_with_predictions["val"]["STATIC"] = data_with_predictions["val"]["STATIC"].join(pd.DataFrame(list(val_predictions.values())[1:]).T)
+                data_with_predictions["test"]["STATIC"] = data_with_predictions["test"]["STATIC"].join(pd.DataFrame(list(test_predictions.values())[1:]).T)
+                target_model_with_predictions = MLWrapper()
+                target_model_with_predictions.set_log_dir(log_dir_fold)
+                target_model_with_predictions.train(RICUDataset(data_with_predictions, split="train"), RICUDataset(data_with_predictions, split="val"), "balanced", seed)
+                dataset_with_predictions = RICUDataset(data_with_predictions, split="test")
+                preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None)
+                preds_w_preds = preds_w_preds[:, 1]
+                fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels)
+                logging.info(f"auc with preds: {fold_results[f'target_with_predictions']['AUC']}")
+                
+
+
                 for baseline, predictions in test_predictions.items():
                     # logging.info("Evaluating model: {}".format(baseline))
                     fold_results[baseline] = calculate_metrics(predictions, test_labels)
@@ -350,7 +390,7 @@ def domain_adaptation(
 
             # print baselines first, then top three AUC, then top three loss
             for source, auc in avg_aucs.items():
-                if source in ["target", "convex_combination_without_target"] + datasets:
+                if source in ["target", "convex_combination_without_target", "target_with_predictions"] + datasets:
                     logging.info(f"{source}: {auc}")
             # avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True)
             # i = 0

From f20715c8016f0c94716b64ef28bb63cab4af43d7 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 14:29:37 +0100
Subject: [PATCH 115/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 8fda4ae9..2da69bd7 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -284,7 +284,11 @@ def domain_adaptation(
                 data_with_predictions["train"]["STATIC"] = data_with_predictions["train"]["STATIC"].join(pd.DataFrame(list(train_predictions.values())[1:]).T)
                 data_with_predictions["val"]["STATIC"] = data_with_predictions["val"]["STATIC"].join(pd.DataFrame(list(val_predictions.values())[1:]).T)
                 data_with_predictions["test"]["STATIC"] = data_with_predictions["test"]["STATIC"].join(pd.DataFrame(list(test_predictions.values())[1:]).T)
-                target_model_with_predictions = MLWrapper()
+                model_type = gin.query_parameter("train_common.model")
+                if str(model_type) == "@DLWrapper()":
+                    target_model_with_predictions = DLWrapper()
+                elif str(model_type) == "@MLWrapper()":
+                    target_model_with_predictions = MLWrapper()
                 target_model_with_predictions.set_log_dir(log_dir_fold)
                 target_model_with_predictions.train(RICUDataset(data_with_predictions, split="train"), RICUDataset(data_with_predictions, split="val"), "balanced", seed)
                 dataset_with_predictions = RICUDataset(data_with_predictions, split="test")

From f2b2ac5fc46653cb966f711c095d315a93a8833f Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 14:51:40 +0100
Subject: [PATCH 116/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 2da69bd7..814444f8 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -208,7 +208,8 @@ def domain_adaptation(
 
                 # train target model
                 # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
-                target_model = load_model(old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}", log_dir_fold)
+                target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}"
+                target_model = load_model(target_model_dir, log_dir_fold)
 
                 # get predictions for train set
                 train_predictions, train_labels = get_predictions_for_all_models(
@@ -266,10 +267,10 @@ def domain_adaptation(
                     _, test_labels = RICUDataset(data, split="test").get_data_and_labels()
 
 
-
                 # join predictions with static data and train new model
                 gin.clear_config()
                 gin.parse_config(gin_config_before_tuning)
+                gin.parse_config_file(target_model_dir / "train_config.gin")
                 gin.bind_parameter("preprocess.fold_size", target_size)
                 data_with_predictions = preprocess_data(
                     data_dir,

From fdfca07cb779046727667a7c39831ba817a35e96 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 14:54:00 +0100
Subject: [PATCH 117/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 814444f8..f097d457 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -269,7 +269,6 @@ def domain_adaptation(
 
                 # join predictions with static data and train new model
                 gin.clear_config()
-                gin.parse_config(gin_config_before_tuning)
                 gin.parse_config_file(target_model_dir / "train_config.gin")
                 gin.bind_parameter("preprocess.fold_size", target_size)
                 data_with_predictions = preprocess_data(

From 89a5a6c303afc5a60ad219bd1ee2d3578dd310f0 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 14:56:39 +0100
Subject: [PATCH 118/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index f097d457..14f6474e 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -270,6 +270,7 @@ def domain_adaptation(
                 # join predictions with static data and train new model
                 gin.clear_config()
                 gin.parse_config_file(target_model_dir / "train_config.gin")
+                gin.bind_parameter("Transformer.emb", 103)
                 gin.bind_parameter("preprocess.fold_size", target_size)
                 data_with_predictions = preprocess_data(
                     data_dir,

From 8cf0207b75ded5b6b2985a3d8658aa647f7c4258 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 14:58:58 +0100
Subject: [PATCH 119/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 14f6474e..f12a18f8 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -294,7 +294,8 @@ def domain_adaptation(
                 target_model_with_predictions.train(RICUDataset(data_with_predictions, split="train"), RICUDataset(data_with_predictions, split="val"), "balanced", seed)
                 dataset_with_predictions = RICUDataset(data_with_predictions, split="test")
                 preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None)
-                preds_w_preds = preds_w_preds[:, 1]
+                if preds_w_preds.shape[1] == 2:
+                    preds_w_preds = preds_w_preds[:, 1]
                 fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels)
                 logging.info(f"auc with preds: {fold_results[f'target_with_predictions']['AUC']}")
                 

From d89edbf9776572bd8a4fd72f8907dfd13633f00f Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 15:02:53 +0100
Subject: [PATCH 120/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index f12a18f8..92782396 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -294,7 +294,7 @@ def domain_adaptation(
                 target_model_with_predictions.train(RICUDataset(data_with_predictions, split="train"), RICUDataset(data_with_predictions, split="val"), "balanced", seed)
                 dataset_with_predictions = RICUDataset(data_with_predictions, split="test")
                 preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None)
-                if preds_w_preds.shape[1] == 2:
+                if isinstance(target_model_with_predictions, MLWrapper):
                     preds_w_preds = preds_w_preds[:, 1]
                 fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels)
                 logging.info(f"auc with preds: {fold_results[f'target_with_predictions']['AUC']}")

From 918890ab1b2d873a594bb3acb39113bae2299ac5 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 15:29:18 +0100
Subject: [PATCH 121/163] test cc with preds

---
 icu_benchmarks/models/domain_adaptation.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 92782396..383fa690 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -298,8 +298,8 @@ def domain_adaptation(
                     preds_w_preds = preds_w_preds[:, 1]
                 fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels)
                 logging.info(f"auc with preds: {fold_results[f'target_with_predictions']['AUC']}")
-                
 
+                
 
                 for baseline, predictions in test_predictions.items():
                     # logging.info("Evaluating model: {}".format(baseline))
@@ -314,6 +314,9 @@ def domain_adaptation(
                 test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1])
                 fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
+                test_pred_with_preds = np.average([preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[.5,1,1,1])
+                fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels)
+
                 agg_val_losses.append(val_losses)
                 agg_val_aucs.append(val_aucs)
 
@@ -396,7 +399,7 @@ def domain_adaptation(
 
             # print baselines first, then top three AUC, then top three loss
             for source, auc in avg_aucs.items():
-                if source in ["target", "convex_combination_without_target", "target_with_predictions"] + datasets:
+                if source in ["target", "convex_combination_without_target", "target_with_predictions", "cc_with_preds"] + datasets:
                     logging.info(f"{source}: {auc}")
             # avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True)
             # i = 0

From 871d5622a07101511e418ff5fab23d6713bf054d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 16:29:54 +0100
Subject: [PATCH 122/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 383fa690..8bfb714c 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -271,6 +271,7 @@ def domain_adaptation(
                 gin.clear_config()
                 gin.parse_config_file(target_model_dir / "train_config.gin")
                 gin.bind_parameter("Transformer.emb", 103)
+                gin.bind_parameter("LSTM.emb", 103)
                 gin.bind_parameter("preprocess.fold_size", target_size)
                 data_with_predictions = preprocess_data(
                     data_dir,

From cc837594d8f5beb9d4f0593f7e9539b52b326e8e Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 16:52:06 +0100
Subject: [PATCH 123/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 8bfb714c..effe108a 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -271,7 +271,7 @@ def domain_adaptation(
                 gin.clear_config()
                 gin.parse_config_file(target_model_dir / "train_config.gin")
                 gin.bind_parameter("Transformer.emb", 103)
-                gin.bind_parameter("LSTM.emb", 103)
+                gin.bind_parameter("LSTMNet.emb", 103)
                 gin.bind_parameter("preprocess.fold_size", target_size)
                 data_with_predictions = preprocess_data(
                     data_dir,

From 89d12a31be52258885a6adcb0356a1cb3c464cb2 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 15 Jan 2023 16:53:32 +0100
Subject: [PATCH 124/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index effe108a..f5028b45 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -271,7 +271,7 @@ def domain_adaptation(
                 gin.clear_config()
                 gin.parse_config_file(target_model_dir / "train_config.gin")
                 gin.bind_parameter("Transformer.emb", 103)
-                gin.bind_parameter("LSTMNet.emb", 103)
+                gin.bind_parameter("LSTMNet.input_dim", 103)
                 gin.bind_parameter("preprocess.fold_size", target_size)
                 data_with_predictions = preprocess_data(
                     data_dir,

From 6014ea49927ade698c29903d51ba75de8bdd2921 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Mon, 16 Jan 2023 12:42:52 +0100
Subject: [PATCH 125/163] boil down to relevant appraoches

---
 icu_benchmarks/models/domain_adaptation.py | 254 ++++++---------------
 1 file changed, 75 insertions(+), 179 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index f5028b45..f9dc667b 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -144,26 +144,6 @@ def domain_adaptation(
     cv_folds_to_train = 5
     target_sizes = [500, 1000, 2000]
     datasets = ["aumc", "eicu", "hirid", "miiv"]
-    target_weights = [0.1, 0.2, 0.5, 1, 2, 5]
-    # weights = [1] * (len(datasets) - 1)
-    auc_functions = [
-        lambda x: (x-0.5) ** 1,
-        lambda x: (x-0.5) ** 2,
-        lambda x: (x-0.5) ** 3,
-        lambda x: (x-0.5) ** 4,
-        lambda x: (x-0.5) ** 5,
-        lambda x: ((2 ** (10*(x-0.5))) - 1),
-        lambda x: ((3 ** (10*(x-0.5))) - 1),
-    ]
-    loss_functions = [
-        lambda x: (1-x) ** 1,
-        lambda x: (1-x) ** 2,
-        lambda x: (1-x) ** 3,
-        lambda x: (1-x) ** 4,
-        lambda x: (1-x) ** 5,
-        lambda x: ((2 ** (10*(1-x))) - 1),
-        lambda x: ((3 ** (10*(1-x))) - 1),
-    ]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
     # old_run_dir = Path("../yaib_logs/DA")
@@ -184,9 +164,9 @@ def domain_adaptation(
         # choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug)
         # gin_config_with_target_hyperparameters = gin.config_str()
         results = {}
+        loss_weighted_results = {}
         for repetition in range(cv_repetitions_to_train):
             agg_val_losses = []
-            agg_val_aucs = []
             for fold_index in range(cv_folds_to_train):
                 # gin.parse_config(gin_config_with_target_hyperparameters)
                 results[f"{repetition}_{fold_index}"] = {}
@@ -206,66 +186,67 @@ def domain_adaptation(
                 log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}"
                 log_dir_fold.mkdir(parents=True, exist_ok=True)
 
-                # train target model
-                # target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
+                # load or train target model
                 target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}"
-                target_model = load_model(target_model_dir, log_dir_fold)
-
-                # get predictions for train set
-                train_predictions, train_labels = get_predictions_for_all_models(
-                    target_model,
-                    data,
-                    log_dir_fold,
-                    source_dir=model_path / task / model,
-                    seed=seed,
-                    source_datasets=source_datasets,
-                    test_on="train",
-                )
-                
-                # generate predictions and write to file if not already done
-                if not (log_dir_fold / "val_predictions.json").exists():
-                    val_predictions, val_labels = get_predictions_for_all_models(
-                        target_model,
-                        data,
-                        log_dir_fold,
-                        source_dir=model_path / task / model,
-                        seed=seed,
-                        source_datasets=source_datasets,
-                        test_on="val",
-                    )
-                    with open(log_dir_fold / "val_predictions.json", "w") as f:
-                        json.dump(val_predictions, f, cls=JsonResultLoggingEncoder)
+                if target_model_dir.exists():
+                    target_model = load_model(target_model_dir, log_dir_fold)
                 else:
-                    with open(log_dir_fold / "val_predictions.json", "r") as f:
-                        val_predictions = json.load(f)
-                    _, val_labels = RICUDataset(data, split="val").get_data_and_labels()
-                val_losses = {}
-                val_aucs = {}
+                    target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
+
+                def get_preds(split):
+                    if not (log_dir_fold / f"{split}_predictions.json").exists():
+                        predictions, labels = get_predictions_for_all_models(
+                            target_model,
+                            data,
+                            log_dir_fold,
+                            source_dir=model_path / task / model,
+                            seed=seed,
+                            source_datasets=source_datasets,
+                            test_on=split,
+                        )
+                        with open(log_dir_fold / f"{split}_predictions.json", "w") as f:
+                            json.dump(predictions, f, cls=JsonResultLoggingEncoder)
+                    else:
+                        with open(log_dir_fold / f"{split}_predictions.json", "r") as f:
+                            predictions = json.load(f)
+                        _, labels = RICUDataset(data, split=split).get_data_and_labels()
+                    return predictions, labels
+                
+                # get predictions for train set
+                train_predictions, train_labels = get_preds("train")
+                test_predictions, test_labels = get_preds("test")
+                val_predictions, val_labels = get_preds("val")
+                val_losses = {baseline: log_loss(val_labels, predictions) for baseline, predictions in val_predictions.items()}
                 val_losses["target"] = log_loss(val_labels, val_predictions["target"])
-                val_aucs["target"] = roc_auc_score(val_labels, val_predictions["target"])
-                for baseline, predictions in val_predictions.items():
-                    val_losses[baseline] = log_loss(val_labels, predictions)
-                    val_aucs[baseline] = roc_auc_score(val_labels, predictions)
-                logging.info("Validation AUCS: %s", val_aucs)
-                logging.info("Validation losses: %s", val_losses)
-
-                # generate predictions and write to file if not already done
-                if not (log_dir_fold / "test_predictions.json").exists():
-                    test_predictions, test_labels = get_predictions_for_all_models(
-                        target_model,
-                        data,
-                        log_dir_fold,
-                        source_dir=model_path / task / model,
-                        seed=seed,
-                        source_datasets=source_datasets,
-                    )
-                    with open(log_dir_fold / "test_predictions.json", "w") as f:
-                        json.dump(test_predictions, f, cls=JsonResultLoggingEncoder)
-                else:
-                    with open(log_dir_fold / "test_predictions.json", "r") as f:
-                        test_predictions = json.load(f)
-                    _, test_labels = RICUDataset(data, split="test").get_data_and_labels()
+                # logging.info("Validation AUCS: %s", val_aucs)
+                # logging.info("Validation losses: %s", val_losses)
+                agg_val_losses.append(val_losses)
 
+                # evaluate baselines
+                for baseline, predictions in test_predictions.items():
+                    # logging.info("Evaluating model: {}".format(baseline))
+                    fold_results[baseline] = calculate_metrics(predictions, test_labels)
+                
+                # evaluate convex combination of models without target
+                test_predictions_list = list(test_predictions.values())
+                test_predictions_list_without_target = test_predictions_list[1:]
+                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1])
+                fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
+
+                # evaluate convex combination of models with target
+                weights = {
+                    "aumc": 10535,
+                    "eicu": 113382,
+                    "hirid": 12859,
+                    "mimic": 52045,
+                }
+                weights_without_target = [v for k, v in weights.items() if k != dataset]
+                target_weights = [0.5, 1, 2]
+                for t in target_weights:
+                    w =  [t * sum(weights_without_target)] + weights_without_target
+                    # logging.info(f"Evaluating target weight: {t}")
+                    test_pred = np.average(test_predictions_list, axis=0, weights=w)
+                    fold_results[f"target_weight_{t}"] = calculate_metrics(test_pred, test_labels)
 
                 # join predictions with static data and train new model
                 gin.clear_config()
@@ -298,95 +279,10 @@ def domain_adaptation(
                 if isinstance(target_model_with_predictions, MLWrapper):
                     preds_w_preds = preds_w_preds[:, 1]
                 fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels)
-                logging.info(f"auc with preds: {fold_results[f'target_with_predictions']['AUC']}")
-
-                
-
-                for baseline, predictions in test_predictions.items():
-                    # logging.info("Evaluating model: {}".format(baseline))
-                    fold_results[baseline] = calculate_metrics(predictions, test_labels)
-                # evaluate baselines
-
-                # evaluate convex combination of models
-                test_predictions_list = list(test_predictions.values())
-                test_predictions_list_without_target = test_predictions_list[1:]
-
-                # logging.info("Evaluating convex combination of models without target.")
-                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1])
-                fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
-
                 test_pred_with_preds = np.average([preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[.5,1,1,1])
                 fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels)
 
-                agg_val_losses.append(val_losses)
-                agg_val_aucs.append(val_aucs)
-
-                # logging.info("Evaluating convex combination of models.")
-                # for w in weights:
-                #     # w =  weights + [t * sum(weights)]
-                #     # logging.info(f"Evaluating target weight: {t}")
-                #     logging.info(f"Evaluating weights: {w}")
-                #     test_pred = np.average(test_predictions_list, axis=0, weights=w)
-                #     fold_results[f"convex_combination_{w}"] = calculate_metrics(test_pred, test_labels)
-
-                # logging.info(f"Top three AUC functions: {rated_auc_functions[:3]}")
-                # logging.info(f"Top three loss functions: {rated_loss_functions[:3]}")
-
                 log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
-
-            avg_val_losses = np.array([np.mean([x[source] for x in agg_val_losses]) for source in val_losses.keys()])
-            avg_val_aucs = {source: np.mean([x[source] for x in agg_val_aucs]) for source in val_aucs.keys()}
-            logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses)))
-            logging.info("Average validation AUCs: %s", dict(zip(val_aucs.keys(), avg_val_aucs)))
-
-            scaled_losses = np.array(0.9 * avg_val_losses / np.max(avg_val_losses))
-            logging.info(f"scaled_losses: {scaled_losses}")
-
-            # find top three auc functions
-            rated_auc_functions = []
-            for f in auc_functions:
-                f_str = inspect.getsource(f).replace(" ", "")[:-2]
-                # logging.info(f"Evaluating convex combination of models with AUC function {f_str}.")
-                weights = np.array([f(x) for x in avg_val_aucs.values()])
-                weights = weights.clip(min=0)
-                test_pred = np.average(test_predictions_list, axis=0, weights=weights)
-                # logging.info(f"weights: {weights}")
-                
-                fold_results[f"AUC_{f_str}"] = calculate_metrics(test_pred, test_labels)
-                rated_auc_functions.append((f_str, fold_results[f"AUC_{f_str}"]["AUC"]))
-            rated_auc_functions.sort(key=lambda x: x[1], reverse=True)
-            # print top three auc functions
-            for f_str, auc in rated_auc_functions[:3]:
-                logging.info(f"{f_str}: {auc}")
-            
-
-            # find top three loss functions
-            rated_loss_functions = []
-            for f in loss_functions:
-                # strip whitespace
-                f_str = inspect.getsource(f).replace(" ", "")[:-2]
-                # logging.info(f"Evaluating convex combination of models with loss function {f_str}.")
-                weights = [f(x) for x in scaled_losses]
-                # logging.info(f"weights: {weights}")
-                test_pred = np.average(test_predictions_list, axis=0, weights=weights)
-                fold_results[f"loss_{f_str}"] = calculate_metrics(test_pred, test_labels)
-                rated_loss_functions.append((f_str, fold_results[f"loss_{f_str}"]["AUC"]))
-            rated_loss_functions.sort(key=lambda x: x[1], reverse=True)
-            for f_str, auc in rated_loss_functions[:3]:
-                logging.info(f"{f_str}: {auc}")
-
-            # evaluate source only mixture
-            logging.info("Evaluating loss weighted source only mixture.")
-            loss_based_weights = 1 - scaled_losses[1:]
-            test_pred = np.average(test_predictions_list_without_target, axis=0, weights=loss_based_weights)
-            fold_results[f"loss_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels)
-            logging.info(f"auc: {fold_results[f'loss_based_source_only_mixture']['AUC']}")
-
-            logging.info("Evaluating auc weighted source only mixture.")
-            auc_based_weights = (np.array(list(avg_val_aucs.values())) - 0.5)[1:] ** 2
-            test_pred = np.average(test_predictions_list_without_target, axis=0, weights=auc_based_weights)
-            fold_results[f"auc_based_source_only_mixture"] = calculate_metrics(test_pred, test_labels)
-            logging.info(f"auc: {fold_results[f'auc_based_source_only_mixture']['AUC']}")
             
             # average results over folds
             agg_aucs = {}
@@ -398,25 +294,21 @@ def domain_adaptation(
             for source, aucs in agg_aucs.items():
                 avg_aucs[source] = np.mean(aucs)
 
+            avg_val_losses = np.array([np.mean([x[source] for x in agg_val_losses]) for source in val_losses.keys()])
+            logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses)))
+            scaled_losses = np.array(0.9 * avg_val_losses / np.max(avg_val_losses))
+            logging.info(f"scaled_losses: {scaled_losses}")
+
+            weights = [(1-x) for x in scaled_losses]
+            # logging.info(f"weights: {weights}")
+            test_pred = np.average(test_predictions_list, axis=0, weights=weights)
+            loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels)
+            avg_aucs["loss_weighted"] = calculate_metrics(test_pred, test_labels)["AUC"]
+
             # print baselines first, then top three AUC, then top three loss
             for source, auc in avg_aucs.items():
-                if source in ["target", "convex_combination_without_target", "target_with_predictions", "cc_with_preds"] + datasets:
-                    logging.info(f"{source}: {auc}")
-            # avg_aucs_list = sorted(avg_aucs.items(), key=lambda x: x[1], reverse=True)
-            # i = 0
-            # for source, auc in avg_aucs_list:
-            #     if "AUC" in source:
-            #         i += 1
-            #         logging.info(f"{source}: {auc}")
-            #         if i == 3:
-            #             break
-            # i = 0
-            # for source, auc in avg_aucs_list:
-            #     if "loss" in source:
-            #         i += 1
-            #         logging.info(f"{source}: {auc}")
-            #         if i == 3:
-            #             break
+                logging.info(f"{source}: {auc}")
+
             log_full_line(f"FINISHED CV REPETITION {repetition}", level=logging.INFO, char="=", num_newlines=3)
 
         source_metrics = {}
@@ -425,6 +317,10 @@ def domain_adaptation(
                 for metric, score in source_stats.items():
                     if isinstance(score, (float, int)):
                         source_metrics.setdefault(source, {}).setdefault(metric, []).append(score)
+        for loss_weighted_result in loss_weighted_results.values():
+            for metric, score in loss_weighted_result.items():
+                if isinstance(score, (float, int)):
+                    source_metrics.setdefault("loss_weighted", {}).setdefault(metric, []).append(score)
 
         # Compute statistical metric over aggregated results
         averaged_metrics = {}

From a6b31d4cae0b52250e8488a4ba217caf7653b989 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Mon, 16 Jan 2023 12:44:15 +0100
Subject: [PATCH 126/163] format

---
 icu_benchmarks/models/domain_adaptation.py | 57 +++++++++++++++-------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index f9dc667b..7062ef5f 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -187,7 +187,15 @@ def domain_adaptation(
                 log_dir_fold.mkdir(parents=True, exist_ok=True)
 
                 # load or train target model
-                target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}" / f"cv_rep_{repetition}" / f"fold_{fold_index}"
+                target_model_dir = (
+                    old_run_dir
+                    / task
+                    / model
+                    / dataset
+                    / f"target_{target_size}"
+                    / f"cv_rep_{repetition}"
+                    / f"fold_{fold_index}"
+                )
                 if target_model_dir.exists():
                     target_model = load_model(target_model_dir, log_dir_fold)
                 else:
@@ -211,7 +219,7 @@ def get_preds(split):
                             predictions = json.load(f)
                         _, labels = RICUDataset(data, split=split).get_data_and_labels()
                     return predictions, labels
-                
+
                 # get predictions for train set
                 train_predictions, train_labels = get_preds("train")
                 test_predictions, test_labels = get_preds("test")
@@ -226,11 +234,11 @@ def get_preds(split):
                 for baseline, predictions in test_predictions.items():
                     # logging.info("Evaluating model: {}".format(baseline))
                     fold_results[baseline] = calculate_metrics(predictions, test_labels)
-                
+
                 # evaluate convex combination of models without target
                 test_predictions_list = list(test_predictions.values())
                 test_predictions_list_without_target = test_predictions_list[1:]
-                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1,1,1])
+                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1])
                 fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
                 # evaluate convex combination of models with target
@@ -243,7 +251,7 @@ def get_preds(split):
                 weights_without_target = [v for k, v in weights.items() if k != dataset]
                 target_weights = [0.5, 1, 2]
                 for t in target_weights:
-                    w =  [t * sum(weights_without_target)] + weights_without_target
+                    w = [t * sum(weights_without_target)] + weights_without_target
                     # logging.info(f"Evaluating target weight: {t}")
                     test_pred = np.average(test_predictions_list, axis=0, weights=w)
                     fold_results[f"target_weight_{t}"] = calculate_metrics(test_pred, test_labels)
@@ -264,26 +272,39 @@ def get_preds(split):
                     cv_folds=cv_folds,
                     fold_index=fold_index,
                 )
-                data_with_predictions["train"]["STATIC"] = data_with_predictions["train"]["STATIC"].join(pd.DataFrame(list(train_predictions.values())[1:]).T)
-                data_with_predictions["val"]["STATIC"] = data_with_predictions["val"]["STATIC"].join(pd.DataFrame(list(val_predictions.values())[1:]).T)
-                data_with_predictions["test"]["STATIC"] = data_with_predictions["test"]["STATIC"].join(pd.DataFrame(list(test_predictions.values())[1:]).T)
+                data_with_predictions["train"]["STATIC"] = data_with_predictions["train"]["STATIC"].join(
+                    pd.DataFrame(list(train_predictions.values())[1:]).T
+                )
+                data_with_predictions["val"]["STATIC"] = data_with_predictions["val"]["STATIC"].join(
+                    pd.DataFrame(list(val_predictions.values())[1:]).T
+                )
+                data_with_predictions["test"]["STATIC"] = data_with_predictions["test"]["STATIC"].join(
+                    pd.DataFrame(list(test_predictions.values())[1:]).T
+                )
                 model_type = gin.query_parameter("train_common.model")
                 if str(model_type) == "@DLWrapper()":
                     target_model_with_predictions = DLWrapper()
                 elif str(model_type) == "@MLWrapper()":
                     target_model_with_predictions = MLWrapper()
                 target_model_with_predictions.set_log_dir(log_dir_fold)
-                target_model_with_predictions.train(RICUDataset(data_with_predictions, split="train"), RICUDataset(data_with_predictions, split="val"), "balanced", seed)
+                target_model_with_predictions.train(
+                    RICUDataset(data_with_predictions, split="train"),
+                    RICUDataset(data_with_predictions, split="val"),
+                    "balanced",
+                    seed,
+                )
                 dataset_with_predictions = RICUDataset(data_with_predictions, split="test")
                 preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None)
                 if isinstance(target_model_with_predictions, MLWrapper):
                     preds_w_preds = preds_w_preds[:, 1]
                 fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels)
-                test_pred_with_preds = np.average([preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[.5,1,1,1])
+                test_pred_with_preds = np.average(
+                    [preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[0.5, 1, 1, 1]
+                )
                 fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels)
 
                 log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
-            
+
             # average results over folds
             agg_aucs = {}
             for fold_results in results.values():
@@ -299,7 +320,7 @@ def get_preds(split):
             scaled_losses = np.array(0.9 * avg_val_losses / np.max(avg_val_losses))
             logging.info(f"scaled_losses: {scaled_losses}")
 
-            weights = [(1-x) for x in scaled_losses]
+            weights = [(1 - x) for x in scaled_losses]
             # logging.info(f"weights: {weights}")
             test_pred = np.average(test_predictions_list, axis=0, weights=weights)
             loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels)
@@ -326,11 +347,13 @@ def get_preds(split):
         averaged_metrics = {}
         for source, source_stats in source_metrics.items():
             for metric, scores in source_stats.items():
-                averaged_metrics.setdefault(source, {}).setdefault(metric, []).append({
-                    "avg": np.mean(scores),
-                    "std": np.std(scores),
-                    "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)),
-                })
+                averaged_metrics.setdefault(source, {}).setdefault(metric, []).append(
+                    {
+                        "avg": np.mean(scores),
+                        "std": np.std(scores),
+                        "CI_0.95": stats.t.interval(0.95, len(scores) - 1, loc=np.mean(scores), scale=stats.sem(scores)),
+                    }
+                )
 
         with open(log_dir / "aggregated_source_metrics.json", "w") as f:
             json.dump(results, f, cls=JsonResultLoggingEncoder)

From 272747c04a8f41b9c25a231a7e6dde036084989c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Mon, 16 Jan 2023 12:45:52 +0100
Subject: [PATCH 127/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 47 +++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 8d625cc2..efb0ae4f 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -6,36 +6,45 @@
 for metric in ["AUC", "PR"]:
     for endpoint in models_dir.iterdir():
         if endpoint.is_dir():
-            with open(models_dir / f'{endpoint.name}_{metric}_results.csv', 'w') as csv_file:
+            with open(models_dir / f"{endpoint.name}_{metric}_results.csv", "w") as csv_file:
                 writer = csv.writer(csv_file)
                 info = ["model", "target", "target_size"]
-                source_names = ['target', 'aumc', 'eicu', 'hirid', 'miiv', 'convex_combination_without_target', 'convex_combination_0.1', 'convex_combination_0.2', 'convex_combination_0.5', 'convex_combination_1', 'convex_combination_2', 'convex_combination_5']
-                stats_basis = ['avg', 'std', 'CI_0.95']
-                stats_basis = ['avg']
-                stats = ['avg', 'std', 'CI_0.95_min', 'CI_0.95_max']
-                stats = ['avg']
+                source_names = [
+                    "target",
+                    "aumc",
+                    "eicu",
+                    "hirid",
+                    "miiv",
+                    "convex_combination_without_target",
+                    "target_weight_0.5",
+                    "target_weight_1",
+                    "target_weight_2",
+                    "loss_weighted",
+                    "target_with_predictions",
+                    "cc_with_preds",
+                ]
+                stats_basis = ["avg", "std", "CI_0.95"]
+                stats_basis = ["avg"]
+                stats = ["avg", "std", "CI_0.95_min", "CI_0.95_max"]
+                stats = ["avg"]
                 # combine fieldnames and stats
-                full_fields = [f'{source}_{stat}' for source in source_names for stat in stats]
-                writer = csv.DictWriter(csv_file, fieldnames=info+full_fields)
+                full_fields = [f"{source}_{stat}" for source in source_names for stat in stats]
+                writer = csv.DictWriter(csv_file, fieldnames=info + full_fields)
 
                 writer.writeheader()
                 for model in endpoint.iterdir():
                     for target in ["aumc", "eicu", "hirid", "miiv"]:
-                        target_sizes = ['target_500', 'target_1000', 'target_2000']
+                        target_sizes = ["target_500", "target_1000", "target_2000"]
                         for target_size in target_sizes:
-                            with open(model / target / target_size / 'averaged_source_metrics.json', 'r') as f:
+                            with open(model / target / target_size / "averaged_source_metrics.json", "r") as f:
                                 results = json.load(f)
 
-                                row_data = {
-                                    'model': model.name,
-                                    'target': target,
-                                    'target_size': target_size
-                                }
+                                row_data = {"model": model.name, "target": target, "target_size": target_size}
                                 for stat in stats_basis:
                                     for source, source_metrics in results.items():
-                                        if stat == 'CI_0.95':
-                                            row_data[f'{source}_{stat}_min'] = source_metrics[metric][0][stat][0] * 100
-                                            row_data[f'{source}_{stat}_max'] = source_metrics[metric][0][stat][1] * 100
+                                        if stat == "CI_0.95":
+                                            row_data[f"{source}_{stat}_min"] = source_metrics[metric][0][stat][0] * 100
+                                            row_data[f"{source}_{stat}_max"] = source_metrics[metric][0][stat][1] * 100
                                         else:
-                                            row_data[f'{source}_{stat}'] = source_metrics[metric][0][stat] * 100
+                                            row_data[f"{source}_{stat}"] = source_metrics[metric][0][stat] * 100
                                 writer.writerow(row_data)

From 5d2d20ff37b1d66bc1e4363fc82c865579b99032 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Mon, 16 Jan 2023 13:32:42 +0100
Subject: [PATCH 128/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 27 ++++++++++------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 7062ef5f..96fc511a 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -161,14 +161,18 @@ def domain_adaptation(
         gin.bind_parameter("preprocess.fold_size", target_size)
         log_dir = run_dir / task / model / dataset / f"target_{target_size}"
         log_dir.mkdir(parents=True, exist_ok=True)
-        # choose_and_bind_hyperparameters(False, data_dir, log_dir, seed, debug=debug)
-        # gin_config_with_target_hyperparameters = gin.config_str()
+        target_model_dir = (old_run_dir / task / model / dataset / f"target_{target_size}")
+        if not (target_model_dir / "cv_rep_0" / "fold_0").exists():
+            choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
+        else:
+            gin.parse_config_file(target_model_dir / "cv_rep_0" / "fold_0" / "train_config.gin")
+        gin_config_with_target_hyperparameters = gin.config_str()
         results = {}
         loss_weighted_results = {}
         for repetition in range(cv_repetitions_to_train):
             agg_val_losses = []
             for fold_index in range(cv_folds_to_train):
-                # gin.parse_config(gin_config_with_target_hyperparameters)
+                gin.parse_config(gin_config_with_target_hyperparameters)
                 results[f"{repetition}_{fold_index}"] = {}
                 fold_results = results[f"{repetition}_{fold_index}"]
 
@@ -187,18 +191,11 @@ def domain_adaptation(
                 log_dir_fold.mkdir(parents=True, exist_ok=True)
 
                 # load or train target model
-                target_model_dir = (
-                    old_run_dir
-                    / task
-                    / model
-                    / dataset
-                    / f"target_{target_size}"
-                    / f"cv_rep_{repetition}"
-                    / f"fold_{fold_index}"
-                )
-                if target_model_dir.exists():
-                    target_model = load_model(target_model_dir, log_dir_fold)
+                target_model_dir_fold = target_model_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}"
+                if target_model_dir_fold.exists():
+                    target_model = load_model(target_model_dir_fold, log_dir_fold)
                 else:
+                    logging.info("Model not found, training new model.")
                     target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
 
                 def get_preds(split):
@@ -258,7 +255,7 @@ def get_preds(split):
 
                 # join predictions with static data and train new model
                 gin.clear_config()
-                gin.parse_config_file(target_model_dir / "train_config.gin")
+                gin.parse_config(gin_config_with_target_hyperparameters)
                 gin.bind_parameter("Transformer.emb", 103)
                 gin.bind_parameter("LSTMNet.input_dim", 103)
                 gin.bind_parameter("preprocess.fold_size", target_size)

From d896d560ce74b01347e277ab4ff024b32dca31be Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Mon, 16 Jan 2023 15:11:21 +0100
Subject: [PATCH 129/163] fix da for miiv

---
 icu_benchmarks/models/domain_adaptation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 96fc511a..cf19c8a6 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -138,6 +138,9 @@ def domain_adaptation(
     Raises:
         ValueError: If checkpoint is not None and the checkpoint does not exist.
     """
+    if dataset != "miiv":
+        return
+
     cv_repetitions = 5
     cv_repetitions_to_train = 5
     cv_folds = 5
@@ -243,7 +246,7 @@ def get_preds(split):
                     "aumc": 10535,
                     "eicu": 113382,
                     "hirid": 12859,
-                    "mimic": 52045,
+                    "miiv": 52045,
                 }
                 weights_without_target = [v for k, v in weights.items() if k != dataset]
                 target_weights = [0.5, 1, 2]

From 101f2620018128cb9927e4e3e2bad188ddb608f0 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Mon, 16 Jan 2023 23:41:38 +0100
Subject: [PATCH 130/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index efb0ae4f..cb960605 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 import csv
 
-models_dir = Path("../DA_logs")
+models_dir = Path("../DA_new")
 for metric in ["AUC", "PR"]:
     for endpoint in models_dir.iterdir():
         if endpoint.is_dir():

From ac098ce1aca13793a9703a53cf5518897138af70 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Tue, 17 Jan 2023 13:25:18 +0100
Subject: [PATCH 131/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index cf19c8a6..21bfe9d2 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -145,7 +145,8 @@ def domain_adaptation(
     cv_repetitions_to_train = 5
     cv_folds = 5
     cv_folds_to_train = 5
-    target_sizes = [500, 1000, 2000]
+    # target_sizes = [500, 1000, 2000]
+    target_sizes = [500]
     datasets = ["aumc", "eicu", "hirid", "miiv"]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")

From db0393f711406bda96e80ab7efdf0eaf6fd706dd Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Tue, 17 Jan 2023 16:37:13 +0100
Subject: [PATCH 132/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index cb960605..3b4fa36f 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -36,15 +36,16 @@
                     for target in ["aumc", "eicu", "hirid", "miiv"]:
                         target_sizes = ["target_500", "target_1000", "target_2000"]
                         for target_size in target_sizes:
-                            with open(model / target / target_size / "averaged_source_metrics.json", "r") as f:
-                                results = json.load(f)
+                            if (model / target / target_size).exists():
+                                with open(model / target / target_size / "averaged_source_metrics.json", "r") as f:
+                                    results = json.load(f)
 
-                                row_data = {"model": model.name, "target": target, "target_size": target_size}
-                                for stat in stats_basis:
-                                    for source, source_metrics in results.items():
-                                        if stat == "CI_0.95":
-                                            row_data[f"{source}_{stat}_min"] = source_metrics[metric][0][stat][0] * 100
-                                            row_data[f"{source}_{stat}_max"] = source_metrics[metric][0][stat][1] * 100
-                                        else:
-                                            row_data[f"{source}_{stat}"] = source_metrics[metric][0][stat] * 100
-                                writer.writerow(row_data)
+                                    row_data = {"model": model.name, "target": target, "target_size": target_size}
+                                    for stat in stats_basis:
+                                        for source, source_metrics in results.items():
+                                            if stat == "CI_0.95":
+                                                row_data[f"{source}_{stat}_min"] = source_metrics[metric][0][stat][0] * 100
+                                                row_data[f"{source}_{stat}_max"] = source_metrics[metric][0][stat][1] * 100
+                                            else:
+                                                row_data[f"{source}_{stat}"] = source_metrics[metric][0][stat] * 100
+                                    writer.writerow(row_data)

From 871810ecfd89206d2365e2dfa9f6b438b0184855 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Tue, 17 Jan 2023 17:08:51 +0100
Subject: [PATCH 133/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 21bfe9d2..8df6ad84 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -138,15 +138,11 @@ def domain_adaptation(
     Raises:
         ValueError: If checkpoint is not None and the checkpoint does not exist.
     """
-    if dataset != "miiv":
-        return
-
     cv_repetitions = 5
     cv_repetitions_to_train = 5
     cv_folds = 5
     cv_folds_to_train = 5
-    # target_sizes = [500, 1000, 2000]
-    target_sizes = [500]
+    target_sizes = [500, 1000, 2000]
     datasets = ["aumc", "eicu", "hirid", "miiv"]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
@@ -318,15 +314,19 @@ def get_preds(split):
 
             avg_val_losses = np.array([np.mean([x[source] for x in agg_val_losses]) for source in val_losses.keys()])
             logging.info("Average validation losses: %s", dict(zip(val_losses.keys(), avg_val_losses)))
-            scaled_losses = np.array(0.9 * avg_val_losses / np.max(avg_val_losses))
-            logging.info(f"scaled_losses: {scaled_losses}")
 
-            weights = [(1 - x) for x in scaled_losses]
-            # logging.info(f"weights: {weights}")
+            weights = 1 / avg_val_losses
+            logging.info(f"weights: {weights}")
             test_pred = np.average(test_predictions_list, axis=0, weights=weights)
             loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels)
             avg_aucs["loss_weighted"] = calculate_metrics(test_pred, test_labels)["AUC"]
 
+            weights = (1 / avg_val_losses) ** 2
+            logging.info(f"weights: {weights}")
+            test_pred = np.average(test_predictions_list, axis=0, weights=weights)
+            loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels)
+            avg_aucs["squared_loss_weighted"] = calculate_metrics(test_pred, test_labels)["AUC"]
+
             # print baselines first, then top three AUC, then top three loss
             for source, auc in avg_aucs.items():
                 logging.info(f"{source}: {auc}")

From f10e0bb40ca641a59d81563a4d1424baddcd63b6 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Tue, 17 Jan 2023 18:14:53 +0100
Subject: [PATCH 134/163] fix weight for combined

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 8df6ad84..50b30cc3 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -296,7 +296,7 @@ def get_preds(split):
                     preds_w_preds = preds_w_preds[:, 1]
                 fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels)
                 test_pred_with_preds = np.average(
-                    [preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[0.5, 1, 1, 1]
+                    [preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[0.5*sum(weights_without_target)] + weights_without_target
                 )
                 fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels)
 

From 3c83511e876ca66e2fa34d1e8dc6e7c501fcd68f Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Tue, 17 Jan 2023 19:16:53 +0100
Subject: [PATCH 135/163] include max prediction

---
 icu_benchmarks/models/domain_adaptation.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 50b30cc3..cc47f15b 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -238,6 +238,10 @@ def get_preds(split):
                 test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1])
                 fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
+                # evaluate max probability
+                max_pred = np.max(test_predictions_list, axis=0)
+                fold_results[f"max_prediction"] = calculate_metrics(max_pred, test_labels)
+
                 # evaluate convex combination of models with target
                 weights = {
                     "aumc": 10535,

From a6a4551c529e162e5d70f8250aa646f6516ba029 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Tue, 17 Jan 2023 22:24:42 +0100
Subject: [PATCH 136/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index cc47f15b..9641d22c 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -9,7 +9,8 @@
 import pandas as pd
 from pathlib import Path
 import scipy.stats as stats
-from sklearn.metrics import log_loss, roc_auc_score
+from sklearn.metrics import log_loss
+from skopt import gp_minimize
 
 from icu_benchmarks.data.loader import RICUDataset
 from icu_benchmarks.data.preprocess import preprocess_data
@@ -304,6 +305,25 @@ def get_preds(split):
                 )
                 fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels)
 
+                def convex_model_combination(model_weights):
+                    val_pred = np.average(list(val_predictions.values()), axis=0, weights=model_weights)
+                    return log_loss(val_labels, val_pred)
+
+                logging.disable(logging.INFO)
+                res = gp_minimize(
+                    convex_model_combination,
+                    [(0.01, 1)] * len(datasets),
+                    n_calls=50,
+                    n_initial_points=10,
+                    random_state=seed,
+                    noise=1e-10,  # the models are deterministic, but noise is needed for the gp to work
+                )
+                logging.disable(logging.NOTSET)
+                best_model_weights = res.x
+                logging.info(best_model_weights)
+                test_pred = np.average(test_predictions_list, axis=0, weights=best_model_weights)
+                fold_results["bayes_opt"] = calculate_metrics(test_pred, test_labels)
+
                 log_full_line(f"FINISHED FOLD {fold_index}", level=logging.INFO)
 
             # average results over folds

From 2c590783cfc181ae10e746759d0d590cd91972bd Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Wed, 18 Jan 2023 01:28:57 +0100
Subject: [PATCH 137/163] format

---
 icu_benchmarks/models/domain_adaptation.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 9641d22c..c384530e 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -162,7 +162,7 @@ def domain_adaptation(
         gin.bind_parameter("preprocess.fold_size", target_size)
         log_dir = run_dir / task / model / dataset / f"target_{target_size}"
         log_dir.mkdir(parents=True, exist_ok=True)
-        target_model_dir = (old_run_dir / task / model / dataset / f"target_{target_size}")
+        target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}"
         if not (target_model_dir / "cv_rep_0" / "fold_0").exists():
             choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
         else:
@@ -301,7 +301,9 @@ def get_preds(split):
                     preds_w_preds = preds_w_preds[:, 1]
                 fold_results["target_with_predictions"] = calculate_metrics(preds_w_preds, test_labels)
                 test_pred_with_preds = np.average(
-                    [preds_w_preds] + test_predictions_list_without_target, axis=0, weights=[0.5*sum(weights_without_target)] + weights_without_target
+                    [preds_w_preds] + test_predictions_list_without_target,
+                    axis=0,
+                    weights=[0.5 * sum(weights_without_target)] + weights_without_target,
                 )
                 fold_results[f"cc_with_preds"] = calculate_metrics(test_pred_with_preds, test_labels)
 

From 479246a597ec1586ee0d0f1ebe5069be45efcfd9 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Wed, 18 Jan 2023 10:42:37 +0100
Subject: [PATCH 138/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index c384530e..2c415f54 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -143,7 +143,8 @@ def domain_adaptation(
     cv_repetitions_to_train = 5
     cv_folds = 5
     cv_folds_to_train = 5
-    target_sizes = [500, 1000, 2000]
+    # target_sizes = [500, 1000, 2000]
+    target_sizes = [1000, 2000]
     datasets = ["aumc", "eicu", "hirid", "miiv"]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")

From 0c2af6daa5ca17c7987a0b5e9040a03b02519ef8 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Wed, 18 Jan 2023 20:24:40 +0100
Subject: [PATCH 139/163] changes for sepsis

---
 icu_benchmarks/models/domain_adaptation.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 2c415f54..c75851df 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -143,9 +143,9 @@ def domain_adaptation(
     cv_repetitions_to_train = 5
     cv_folds = 5
     cv_folds_to_train = 5
-    # target_sizes = [500, 1000, 2000]
-    target_sizes = [1000, 2000]
-    datasets = ["aumc", "eicu", "hirid", "miiv"]
+    target_sizes = [500, 1000, 2000]
+    # datasets = ["aumc", "eicu", "hirid", "miiv"]
+    datasets = ["aumc", "hirid"]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
     # old_run_dir = Path("../yaib_logs/DA")
@@ -164,7 +164,7 @@ def domain_adaptation(
         log_dir = run_dir / task / model / dataset / f"target_{target_size}"
         log_dir.mkdir(parents=True, exist_ok=True)
         target_model_dir = old_run_dir / task / model / dataset / f"target_{target_size}"
-        if not (target_model_dir / "cv_rep_0" / "fold_0").exists():
+        if not (target_model_dir / "cv_rep_0" / "fold_0" / "train_config.gin").exists():
             choose_and_bind_hyperparameters(True, data_dir, log_dir, seed, debug=debug)
         else:
             gin.parse_config_file(target_model_dir / "cv_rep_0" / "fold_0" / "train_config.gin")
@@ -194,9 +194,9 @@ def domain_adaptation(
 
                 # load or train target model
                 target_model_dir_fold = target_model_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}"
-                if target_model_dir_fold.exists():
+                try:
                     target_model = load_model(target_model_dir_fold, log_dir_fold)
-                else:
+                except:
                     logging.info("Model not found, training new model.")
                     target_model = train_common(data, log_dir=log_dir_fold, seed=seed, return_model=True)
 
@@ -236,9 +236,9 @@ def get_preds(split):
 
                 # evaluate convex combination of models without target
                 test_predictions_list = list(test_predictions.values())
-                test_predictions_list_without_target = test_predictions_list[1:]
-                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1])
-                fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
+                # test_predictions_list_without_target = test_predictions_list[1:]
+                # test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1])
+                # fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
                 # evaluate max probability
                 max_pred = np.max(test_predictions_list, axis=0)
@@ -247,9 +247,9 @@ def get_preds(split):
                 # evaluate convex combination of models with target
                 weights = {
                     "aumc": 10535,
-                    "eicu": 113382,
+                    # "eicu": 113382,
                     "hirid": 12859,
-                    "miiv": 52045,
+                    # "miiv": 52045,
                 }
                 weights_without_target = [v for k, v in weights.items() if k != dataset]
                 target_weights = [0.5, 1, 2]

From 4cff9eb4219ec7c9fd5bfe3faf13febf7a68ab00 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 00:28:53 +0100
Subject: [PATCH 140/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index c75851df..c4e838cf 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -148,8 +148,8 @@ def domain_adaptation(
     datasets = ["aumc", "hirid"]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
-    # old_run_dir = Path("../yaib_logs/DA")
-    old_run_dir = Path("../DA_logs")
+    # old_run_dir = Path("../yaib_logs/DA_sep")
+    old_run_dir = Path("../DA_seps")
     gin_config_before_tuning = gin.config_str()
 
     # evaluate models on same test split
@@ -236,9 +236,9 @@ def get_preds(split):
 
                 # evaluate convex combination of models without target
                 test_predictions_list = list(test_predictions.values())
-                # test_predictions_list_without_target = test_predictions_list[1:]
-                # test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1])
-                # fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
+                test_predictions_list_without_target = test_predictions_list[1:]
+                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1])
+                fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
                 # evaluate max probability
                 max_pred = np.max(test_predictions_list, axis=0)

From 89eead011739d9967232a6a7cb51383f687509c7 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 00:31:56 +0100
Subject: [PATCH 141/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 3b4fa36f..dfffef82 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -16,10 +16,12 @@
                     "hirid",
                     "miiv",
                     "convex_combination_without_target",
+                    "max_prediction_avg", 
                     "target_weight_0.5",
                     "target_weight_1",
                     "target_weight_2",
                     "loss_weighted",
+                    "bayes_opt_avg",
                     "target_with_predictions",
                     "cc_with_preds",
                 ]

From 00986de71f23cc641117633a1cf82dc434fa5626 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 00:40:06 +0100
Subject: [PATCH 142/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index dfffef82..2529f3d4 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -16,12 +16,12 @@
                     "hirid",
                     "miiv",
                     "convex_combination_without_target",
-                    "max_prediction_avg", 
+                    "max_prediction", 
                     "target_weight_0.5",
                     "target_weight_1",
                     "target_weight_2",
                     "loss_weighted",
-                    "bayes_opt_avg",
+                    "bayes_opt",
                     "target_with_predictions",
                     "cc_with_preds",
                 ]

From 6aa022957096123d7acd14ba2d22d49fdbdaa995 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 00:46:15 +0100
Subject: [PATCH 143/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index c4e838cf..9ba97369 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -143,13 +143,13 @@ def domain_adaptation(
     cv_repetitions_to_train = 5
     cv_folds = 5
     cv_folds_to_train = 5
-    target_sizes = [500, 1000, 2000]
-    # datasets = ["aumc", "eicu", "hirid", "miiv"]
-    datasets = ["aumc", "hirid"]
+    target_sizes = [500]
+    datasets = ["aumc", "eicu", "hirid", "miiv"]
+    # datasets = ["aumc", "hirid"]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
     # old_run_dir = Path("../yaib_logs/DA_sep")
-    old_run_dir = Path("../DA_seps")
+    old_run_dir = Path("../DA_new")
     gin_config_before_tuning = gin.config_str()
 
     # evaluate models on same test split
@@ -247,9 +247,9 @@ def get_preds(split):
                 # evaluate convex combination of models with target
                 weights = {
                     "aumc": 10535,
-                    # "eicu": 113382,
+                    "eicu": 113382,
                     "hirid": 12859,
-                    # "miiv": 52045,
+                    "miiv": 52045,
                 }
                 weights_without_target = [v for k, v in weights.items() if k != dataset]
                 target_weights = [0.5, 1, 2]

From 4e7174c39dddf7a6453ed43db8507c15037e5b9e Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 00:48:53 +0100
Subject: [PATCH 144/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 9ba97369..f332268f 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -143,13 +143,13 @@ def domain_adaptation(
     cv_repetitions_to_train = 5
     cv_folds = 5
     cv_folds_to_train = 5
-    target_sizes = [500]
+    target_sizes = [500, 1000, 2000]
     datasets = ["aumc", "eicu", "hirid", "miiv"]
     # datasets = ["aumc", "hirid"]
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
     # old_run_dir = Path("../yaib_logs/DA_sep")
-    old_run_dir = Path("../DA_new")
+    old_run_dir = Path("../DA_seps")
     gin_config_before_tuning = gin.config_str()
 
     # evaluate models on same test split

From 7f84b2a101381e372476700195e4ffdaf50eae74 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 11:46:03 +0100
Subject: [PATCH 145/163] correct logging for loss weighted

---
 icu_benchmarks/models/domain_adaptation.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index f332268f..1f02395b 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -170,8 +170,9 @@ def domain_adaptation(
             gin.parse_config_file(target_model_dir / "cv_rep_0" / "fold_0" / "train_config.gin")
         gin_config_with_target_hyperparameters = gin.config_str()
         results = {}
-        loss_weighted_results = {}
+        loss_weighted_results = []
         for repetition in range(cv_repetitions_to_train):
+            loss_weighted_results.append({})
             agg_val_losses = []
             for fold_index in range(cv_folds_to_train):
                 gin.parse_config(gin_config_with_target_hyperparameters)
@@ -345,14 +346,14 @@ def convex_model_combination(model_weights):
             weights = 1 / avg_val_losses
             logging.info(f"weights: {weights}")
             test_pred = np.average(test_predictions_list, axis=0, weights=weights)
-            loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels)
-            avg_aucs["loss_weighted"] = calculate_metrics(test_pred, test_labels)["AUC"]
+            loss_weighted_results[repetition]["loss_weighted"] = calculate_metrics(test_pred, test_labels)
+            avg_aucs["loss_weighted"] = loss_weighted_results[repetition]["loss_weighted"]["AUC"]
 
             weights = (1 / avg_val_losses) ** 2
             logging.info(f"weights: {weights}")
             test_pred = np.average(test_predictions_list, axis=0, weights=weights)
-            loss_weighted_results[repetition] = calculate_metrics(test_pred, test_labels)
-            avg_aucs["squared_loss_weighted"] = calculate_metrics(test_pred, test_labels)["AUC"]
+            loss_weighted_results[repetition]["squared_loss_weighted"] = calculate_metrics(test_pred, test_labels)
+            avg_aucs["squared_loss_weighted"] = loss_weighted_results[repetition]["squared_loss_weighted"]["AUC"]
 
             # print baselines first, then top three AUC, then top three loss
             for source, auc in avg_aucs.items():

From a30a56f1a5bb22ba5b6ef2d22a16f08c90912643 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 11:46:17 +0100
Subject: [PATCH 146/163] only use source weights

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 1f02395b..e29f2f7e 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -252,7 +252,7 @@ def get_preds(split):
                     "hirid": 12859,
                     "miiv": 52045,
                 }
-                weights_without_target = [v for k, v in weights.items() if k != dataset]
+                weights_without_target = [v for k, v in weights.items() if k in source_datasets]
                 target_weights = [0.5, 1, 2]
                 for t in target_weights:
                     w = [t * sum(weights_without_target)] + weights_without_target

From 16fba8af266b59933b59fe52ddddae05c5bcadab Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 11:50:30 +0100
Subject: [PATCH 147/163] use debug to set source datasets

---
 icu_benchmarks/models/domain_adaptation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index e29f2f7e..e45a83e0 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -144,8 +144,8 @@ def domain_adaptation(
     cv_folds = 5
     cv_folds_to_train = 5
     target_sizes = [500, 1000, 2000]
-    datasets = ["aumc", "eicu", "hirid", "miiv"]
-    # datasets = ["aumc", "hirid"]
+    datasets = ["aumc", "hirid"] if debug else ["aumc", "eicu", "hirid", "miiv"]
+    debug = False
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
     # old_run_dir = Path("../yaib_logs/DA_sep")

From 4d40c7b3dfbd5376195c4efc66c17bc6bb7cdda3 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 11:51:14 +0100
Subject: [PATCH 148/163] Update run.py

---
 icu_benchmarks/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index 9e43c78b..0617b01e 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -52,7 +52,7 @@ def main(my_args=tuple(sys.argv[1:])):
             else [Path(f"configs/models/{model}.gin"), Path(f"configs/tasks/{task}.gin")]
         )
         gin.parse_config_files_and_bindings(gin_config_files, args.gin_bindings, finalize_config=False)
-        domain_adaptation(name, args.data_dir, args.log_dir, args.seed, args.task_name, model)
+        domain_adaptation(name, args.data_dir, args.log_dir, args.seed, args.task_name, model, debug=args.debug)
         return
     else:
         reproducible = args.reproducible

From b351a7056f4f721911592c04facf76a04056e582 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 11:52:32 +0100
Subject: [PATCH 149/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index e45a83e0..74e75e41 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -145,11 +145,12 @@ def domain_adaptation(
     cv_folds_to_train = 5
     target_sizes = [500, 1000, 2000]
     datasets = ["aumc", "hirid"] if debug else ["aumc", "eicu", "hirid", "miiv"]
+    # old_run_dir = Path("../yaib_logs/DA_sep")
+    old_run_dir = Path("../DA_seps") if debug else Path("../DA_new")
     debug = False
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
-    # old_run_dir = Path("../yaib_logs/DA_sep")
-    old_run_dir = Path("../DA_seps")
+    
     gin_config_before_tuning = gin.config_str()
 
     # evaluate models on same test split

From c881d876b6fb786ec5d80af0f4d6c3a04bb791ba Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Thu, 19 Jan 2023 13:45:38 +0100
Subject: [PATCH 150/163] fix loss_weighted

---
 icu_benchmarks/models/domain_adaptation.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 74e75e41..f5dc7d99 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -368,10 +368,11 @@ def convex_model_combination(model_weights):
                 for metric, score in source_stats.items():
                     if isinstance(score, (float, int)):
                         source_metrics.setdefault(source, {}).setdefault(metric, []).append(score)
-        for loss_weighted_result in loss_weighted_results.values():
-            for metric, score in loss_weighted_result.items():
-                if isinstance(score, (float, int)):
-                    source_metrics.setdefault("loss_weighted", {}).setdefault(metric, []).append(score)
+        for loss_weighted_result in loss_weighted_results:
+            for source, source_stats in loss_weighted_result.items():
+                for metric, score in source_stats.items():
+                    if isinstance(score, (float, int)):
+                        source_metrics.setdefault(source, {}).setdefault(metric, []).append(score)
 
         # Compute statistical metric over aggregated results
         averaged_metrics = {}

From 467bd74c7f707c29708afd3a4349bab65dc7c7df Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 20 Jan 2023 00:12:05 +0100
Subject: [PATCH 151/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index f5dc7d99..ba7e5d69 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -239,8 +239,9 @@ def get_preds(split):
                 # evaluate convex combination of models without target
                 test_predictions_list = list(test_predictions.values())
                 test_predictions_list_without_target = test_predictions_list[1:]
-                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1])
-                fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
+                if not debug:
+                    test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1])
+                    fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
                 # evaluate max probability
                 max_pred = np.max(test_predictions_list, axis=0)

From 9f6f4a62717bb958c9b6766dc45aa61cb9d140f3 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 20 Jan 2023 11:34:09 +0100
Subject: [PATCH 152/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index ba7e5d69..62081f19 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -139,15 +139,15 @@ def domain_adaptation(
     Raises:
         ValueError: If checkpoint is not None and the checkpoint does not exist.
     """
+    is_sepsis = task == "sepsis"
     cv_repetitions = 5
     cv_repetitions_to_train = 5
     cv_folds = 5
     cv_folds_to_train = 5
     target_sizes = [500, 1000, 2000]
-    datasets = ["aumc", "hirid"] if debug else ["aumc", "eicu", "hirid", "miiv"]
+    datasets = ["aumc", "hirid"] if is_sepsis else ["aumc", "eicu", "hirid", "miiv"]
     # old_run_dir = Path("../yaib_logs/DA_sep")
-    old_run_dir = Path("../DA_seps") if debug else Path("../DA_new")
-    debug = False
+    old_run_dir = Path("../DA_seps") if is_sepsis else Path("../DA_new")
     task_dir = data_dir / task
     model_path = Path("../yaib_models/best_models/")
     
@@ -239,7 +239,7 @@ def get_preds(split):
                 # evaluate convex combination of models without target
                 test_predictions_list = list(test_predictions.values())
                 test_predictions_list_without_target = test_predictions_list[1:]
-                if not debug:
+                if not is_sepsis:
                     test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1])
                     fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 

From b543be642a4a135921b6adbdd64d6f378d71a1ba Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 20 Jan 2023 12:08:00 +0100
Subject: [PATCH 153/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 2529f3d4..1dc2dd26 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -21,6 +21,7 @@
                     "target_weight_1",
                     "target_weight_2",
                     "loss_weighted",
+                    "squared_loss_weighted_avg",
                     "bayes_opt",
                     "target_with_predictions",
                     "cc_with_preds",

From 8bc39b866ce0020c053ec846b5697441dd889743 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Fri, 20 Jan 2023 12:09:21 +0100
Subject: [PATCH 154/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 1dc2dd26..7251694d 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -21,7 +21,7 @@
                     "target_weight_1",
                     "target_weight_2",
                     "loss_weighted",
-                    "squared_loss_weighted_avg",
+                    "squared_loss_weighted",
                     "bayes_opt",
                     "target_with_predictions",
                     "cc_with_preds",

From 45454a61b3f5525f6b72e89d0027793f481b01b5 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 21 Jan 2023 15:40:53 +0100
Subject: [PATCH 155/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 62081f19..54085ab0 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -145,7 +145,7 @@ def domain_adaptation(
     cv_folds = 5
     cv_folds_to_train = 5
     target_sizes = [500, 1000, 2000]
-    datasets = ["aumc", "hirid"] if is_sepsis else ["aumc", "eicu", "hirid", "miiv"]
+    datasets = ["aumc", "hirid", "miiv"] if is_sepsis else ["aumc", "eicu", "hirid", "miiv"]
     # old_run_dir = Path("../yaib_logs/DA_sep")
     old_run_dir = Path("../DA_seps") if is_sepsis else Path("../DA_new")
     task_dir = data_dir / task

From e4d739f1af6f486385a08792420a618c5f22b39e Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sat, 21 Jan 2023 20:53:40 +0100
Subject: [PATCH 156/163] Update domain_adaptation.py

---
 icu_benchmarks/models/domain_adaptation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 54085ab0..2566f651 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -239,9 +239,9 @@ def get_preds(split):
                 # evaluate convex combination of models without target
                 test_predictions_list = list(test_predictions.values())
                 test_predictions_list_without_target = test_predictions_list[1:]
-                if not is_sepsis:
-                    test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=[1, 1, 1])
-                    fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
+                weights_without_target = [1, 1] if is_sepsis else [1, 1, 1]
+                test_pred_without_target = np.average(test_predictions_list_without_target, axis=0, weights=weights_without_target)
+                fold_results[f"convex_combination_without_target"] = calculate_metrics(test_pred_without_target, test_labels)
 
                 # evaluate max probability
                 max_pred = np.max(test_predictions_list, axis=0)

From 870214a83e50d714d5f6fb0695516c29f4cbea8d Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Sun, 22 Jan 2023 22:43:13 +0100
Subject: [PATCH 157/163] Update da_to_csv.py

---
 scripts/results/da_to_csv.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/da_to_csv.py
index 7251694d..e380e578 100644
--- a/scripts/results/da_to_csv.py
+++ b/scripts/results/da_to_csv.py
@@ -26,10 +26,8 @@
                     "target_with_predictions",
                     "cc_with_preds",
                 ]
-                stats_basis = ["avg", "std", "CI_0.95"]
-                stats_basis = ["avg"]
-                stats = ["avg", "std", "CI_0.95_min", "CI_0.95_max"]
-                stats = ["avg"]
+                stats_basis = ["avg", "std"]
+                stats = ["avg", "std"]
                 # combine fieldnames and stats
                 full_fields = [f"{source}_{stat}" for source in source_names for stat in stats]
                 writer = csv.DictWriter(csv_file, fieldnames=info + full_fields)
@@ -37,10 +35,11 @@
                 writer.writeheader()
                 for model in endpoint.iterdir():
                     for target in ["aumc", "eicu", "hirid", "miiv"]:
-                        target_sizes = ["target_500", "target_1000", "target_2000"]
+                        target_sizes = [500, 1000, 2000]
                         for target_size in target_sizes:
-                            if (model / target / target_size).exists():
-                                with open(model / target / target_size / "averaged_source_metrics.json", "r") as f:
+                            target_str = f"target_{target_size}"
+                            if (model / target / target_str).exists():
+                                with open(model / target / target_str / "averaged_source_metrics.json", "r") as f:
                                     results = json.load(f)
 
                                     row_data = {"model": model.name, "target": target, "target_size": target_size}

From e069f6b1129df81cfdf5b70c326467d6c69fea2c Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Mon, 23 Jan 2023 15:07:55 +0100
Subject: [PATCH 158/163] rename script

---
 scripts/results/{da_to_csv.py => mortality_to_csv.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename scripts/results/{da_to_csv.py => mortality_to_csv.py} (100%)

diff --git a/scripts/results/da_to_csv.py b/scripts/results/mortality_to_csv.py
similarity index 100%
rename from scripts/results/da_to_csv.py
rename to scripts/results/mortality_to_csv.py

From c0c733b9d14991e1e194b3b8dc087b0233147833 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Mon, 23 Jan 2023 15:08:41 +0100
Subject: [PATCH 159/163] Create sepsis_to_csv.py

---
 scripts/results/sepsis_to_csv.py | 53 ++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 scripts/results/sepsis_to_csv.py

diff --git a/scripts/results/sepsis_to_csv.py b/scripts/results/sepsis_to_csv.py
new file mode 100644
index 00000000..65ad0a56
--- /dev/null
+++ b/scripts/results/sepsis_to_csv.py
@@ -0,0 +1,53 @@
+import json
+from pathlib import Path
+import csv
+
+models_dir = Path("../DA_sep_new")
+for metric in ["AUC", "PR"]:
+    for endpoint in models_dir.iterdir():
+        if endpoint.is_dir():
+            with open(models_dir / f"{endpoint.name}_{metric}_results.csv", "w") as csv_file:
+                writer = csv.writer(csv_file)
+                info = ["model", "target", "target_size"]
+                source_names = [
+                    "target",
+                    "aumc",
+                    "eicu",
+                    "hirid",
+                    "miiv",
+                    "convex_combination_without_target",
+                    "max_prediction", 
+                    "target_weight_0.5",
+                    "target_weight_1",
+                    "target_weight_2",
+                    "loss_weighted",
+                    "squared_loss_weighted",
+                    "bayes_opt",
+                    "target_with_predictions",
+                    "cc_with_preds",
+                ]
+                stats_basis = ["avg", "std"]
+                stats = ["avg", "std"]
+                # combine fieldnames and stats
+                full_fields = [f"{source}_{stat}" for source in source_names for stat in stats]
+                writer = csv.DictWriter(csv_file, fieldnames=info + full_fields)
+
+                writer.writeheader()
+                for model in endpoint.iterdir():
+                    for target in ["aumc", "hirid", "miiv"]:
+                        target_sizes = [500, 1000, 2000]
+                        for target_size in target_sizes:
+                            target_str = f"target_{target_size}"
+                            if (model / target / target_str).exists():
+                                with open(model / target / target_str / "averaged_source_metrics.json", "r") as f:
+                                    results = json.load(f)
+
+                                    row_data = {"model": model.name, "target": target, "target_size": target_size}
+                                    for stat in stats_basis:
+                                        for source, source_metrics in results.items():
+                                            if stat == "CI_0.95":
+                                                row_data[f"{source}_{stat}_min"] = source_metrics[metric][0][stat][0] * 100
+                                                row_data[f"{source}_{stat}_max"] = source_metrics[metric][0][stat][1] * 100
+                                            else:
+                                                row_data[f"{source}_{stat}"] = source_metrics[metric][0][stat] * 100
+                                    writer.writerow(row_data)

From c1e555af99b16680d49bec0a58f150a82cce71a8 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Mon, 23 Jan 2023 15:18:22 +0100
Subject: [PATCH 160/163] Update sepsis_to_csv.py

---
 scripts/results/sepsis_to_csv.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/results/sepsis_to_csv.py b/scripts/results/sepsis_to_csv.py
index 65ad0a56..813c3aeb 100644
--- a/scripts/results/sepsis_to_csv.py
+++ b/scripts/results/sepsis_to_csv.py
@@ -12,7 +12,6 @@
                 source_names = [
                     "target",
                     "aumc",
-                    "eicu",
                     "hirid",
                     "miiv",
                     "convex_combination_without_target",

From 6a7300d905a40dc9d34d23d7c6e44b901262b9f1 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Wed, 25 Jan 2023 16:58:25 +0100
Subject: [PATCH 161/163] tables to latex

---
 scripts/results/da_results_to_latex.py     | 69 ++++++++++++++++++++++
 scripts/results/da_results_to_latex_sep.py | 69 ++++++++++++++++++++++
 2 files changed, 138 insertions(+)
 create mode 100644 scripts/results/da_results_to_latex.py
 create mode 100644 scripts/results/da_results_to_latex_sep.py

diff --git a/scripts/results/da_results_to_latex.py b/scripts/results/da_results_to_latex.py
new file mode 100644
index 00000000..a2e8cbd6
--- /dev/null
+++ b/scripts/results/da_results_to_latex.py
@@ -0,0 +1,69 @@
+import csv
+
+rawNamesMap = {
+  "target": "Target",
+  "aumc": "AUMCdb",
+  "eicu": "eICU",
+  "hirid": "HiRID",
+  "miiv": "MIMIC-IV",
+  "convex_combination_without_target": "Convex UDA",
+  "max_prediction": "Max Pooling",
+  "target_weight_0.5": "Weighted $\\alpha=1/3$",
+  "target_weight_2": "Weighted $\\alpha=2/3$",
+  "loss_weighted": "Weighted Loss",
+  "bayes_opt": "Weighted Bayes",
+  "target_with_predictions": "Prediction-Feature",
+  "cc_with_preds": "Combined",
+}
+
+def csv_to_dict(file_name):
+    with open(file_name, 'r') as file:
+        reader = csv.DictReader(file)
+        data = [row for row in reader]
+    tables = {}
+    for row in data:
+        row_without_target = {key: value for key, value in row.items() if key != 'target' and key != 'target_size' and key != 'model'}
+        tables.setdefault((row['target'], row['target_size']), {})[row['model']] = row_without_target
+    return tables
+
+
+def dict_to_latex(combination, data, metric):
+    table = '\\begin{table}[h]\n'
+    table += '\\centering\n'
+    table += '\\footnotesize'
+    table += '\\caption{{Sepsis prediction on {0} with target size {1}, {2} with standard deviation.}}\n'.format(rawNamesMap[combination[0]], combination[1], "AUROC" if metric == "auc" else "AUPRC")
+    headers = ['Model']
+    for model, scores in data.items():
+        headers += [model]
+
+    table += '\\begin{tabular}{l|' + ''.join(['c'] * (len(headers) - 1)) + '}\n'
+    table += '\\textbf{' + '} & \\textbf{'.join(headers) + '}\\\\\n'
+    table += '\\hline\n'
+
+    for score_name, score in data[model].items():
+        if "_avg" in score_name:
+            raw_name = score_name.split("_avg")[0]
+            if raw_name == combination[0] or not raw_name in rawNamesMap:
+                continue
+            clean_name = rawNamesMap[raw_name]
+            values = [clean_name]
+            for model in headers[1:]:
+                scores = data[model]
+                avg = "{:.2f}".format(float(scores[score_name]))
+                std = "{:.2f}".format(float(scores[f"{raw_name}_std"]))
+                values.append(f"${avg} \pm {std}$")
+            table += ' & '.join(values) + '\\\\\n'
+
+    table += '\\end{tabular}\n'
+    table += '\\end{table}\n'
+    return table
+
+if __name__ == '__main__':
+    for metric in ["auc", "pr"]:
+        file_name = f'../yaib_logs/sep_{metric}.csv'
+        data = csv_to_dict(file_name)
+        for key, row in data.items():
+            table = dict_to_latex(key, row, metric)
+            print(table)
+        print('\n' * 5)
+        
diff --git a/scripts/results/da_results_to_latex_sep.py b/scripts/results/da_results_to_latex_sep.py
new file mode 100644
index 00000000..c0c0aafe
--- /dev/null
+++ b/scripts/results/da_results_to_latex_sep.py
@@ -0,0 +1,69 @@
+import csv
+
+rawNamesMap = {
+  "target": "Target",
+  "aumc": "AUMCdb",
+  "eicu": "eICU",
+  "hirid": "HiRID",
+  "miiv": "MIMIC-IV",
+  "convex_combination_without_target": "Convex UDA",
+  "max_prediction": "Max Pooling",
+  "target_weight_0.5": "Weighted $\\alpha=1/3$",
+  "target_weight_2": "Weighted $\\alpha=2/3$",
+  "loss_weighted": "Weighted Loss",
+  "bayes_opt": "Weighted Bayes",
+  "target_with_predictions": "Prediction-Feature",
+  "cc_with_preds": "Combined",
+}
+
+def csv_to_dict(file_name):
+    with open(file_name, 'r') as file:
+        reader = csv.DictReader(file)
+        data = [row for row in reader]
+    tables = {}
+    for row in data:
+        row_without_target = {key: value for key, value in row.items() if key != 'target' and key != 'target_size' and key != 'model'}
+        tables.setdefault((row['target']), {})[row['target_size']] = row_without_target
+    return tables
+
+
+def dict_to_latex(combination, data, metric):
+    table = '\\begin{table}[h]\n'
+    table += '\\centering\n'
+    table += '\\footnotesize'
+    table += '\\caption{{Sepsis prediction on {0} with LGBM, {1} with standard deviation.}}\n'.format(rawNamesMap[combination], "AUROC" if metric == "auc" else "AUPRC")
+    headers = ['Target Size']
+    for target_size, scores in data.items():
+        headers += [target_size]
+
+    table += '\\begin{tabular}{l|' + ''.join(['c'] * (len(headers) - 1)) + '}\n'
+    table += '\\textbf{' + '} & \\textbf{'.join(headers) + '}\\\\\n'
+    table += '\\hline\n'
+
+    for score_name, score in data[target_size].items():
+        if "_avg" in score_name:
+            raw_name = score_name.split("_avg")[0]
+            if raw_name == combination[0] or not raw_name in rawNamesMap:
+                continue
+            clean_name = rawNamesMap[raw_name]
+            values = [clean_name]
+            for target_size in headers[1:]:
+                scores = data[target_size]
+                avg = "{:.2f}".format(float(scores[score_name]))
+                std = "{:.2f}".format(float(scores[f"{raw_name}_std"]))
+                values.append(f"${avg} \pm {std}$")
+            table += ' & '.join(values) + '\\\\\n'
+
+    table += '\\end{tabular}\n'
+    table += '\\end{table}\n'
+    return table
+
+if __name__ == '__main__':
+    for metric in ["auc", "pr"]:
+        file_name = f'../yaib_logs/sep_{metric}.csv'
+        data = csv_to_dict(file_name)
+        for key, row in data.items():
+            table = dict_to_latex(key, row, metric)
+            print(table)
+        print('\n' * 5)
+        

From 60ae45fb9e628fe6d437fd1d22260f8ad1e7aeb5 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Tue, 8 Aug 2023 16:49:30 +0200
Subject: [PATCH 162/163] import domain adaptation

---
 icu_benchmarks/run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/icu_benchmarks/run.py b/icu_benchmarks/run.py
index 21fce158..0a47cf7b 100644
--- a/icu_benchmarks/run.py
+++ b/icu_benchmarks/run.py
@@ -22,6 +22,7 @@
     setup_logging,
 )
 from icu_benchmarks.contants import RunMode
+from icu_benchmarks.models import domain_adaptation
 
 
 @gin.configurable("Run")

From b0583c719282462a56b78d516022887e71c44d48 Mon Sep 17 00:00:00 2001
From: Hendrik Schmidt <hendriknas@gmail.com>
Date: Tue, 8 Aug 2023 17:08:18 +0200
Subject: [PATCH 163/163] make train run

---
 icu_benchmarks/models/domain_adaptation.py | 33 +++++++++-------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/icu_benchmarks/models/domain_adaptation.py b/icu_benchmarks/models/domain_adaptation.py
index 2566f651..8df55050 100644
--- a/icu_benchmarks/models/domain_adaptation.py
+++ b/icu_benchmarks/models/domain_adaptation.py
@@ -12,14 +12,14 @@
 from sklearn.metrics import log_loss
 from skopt import gp_minimize
 
-from icu_benchmarks.data.loader import RICUDataset
-from icu_benchmarks.data.preprocess import preprocess_data
-from icu_benchmarks.hyperparameter_tuning import choose_and_bind_hyperparameters
-from icu_benchmarks.models.metric_constants import MLMetrics
+from icu_benchmarks.data.loader import PredictionDataset
+from icu_benchmarks.data.preprocessor import Preprocessor, DefaultClassificationPreprocessor
+from icu_benchmarks.tuning.hyperparameters import choose_and_bind_hyperparameters
 from icu_benchmarks.models.train import train_common
 from icu_benchmarks.models.wrappers import DLWrapper, MLWrapper
 from icu_benchmarks.models.utils import JsonResultLoggingEncoder
 from icu_benchmarks.run_utils import log_full_line
+from .constants import MLMetrics
 
 
 def load_model(model_dir: Path, log_dir: Path):
@@ -42,7 +42,7 @@ def load_model(model_dir: Path, log_dir: Path):
     return model
 
 
-def get_predictions_for_single_model(dataset: RICUDataset, model_dir: Path, log_dir: Path):
+def get_predictions_for_single_model(dataset: PredictionDataset, model_dir: Path, log_dir: Path):
     """Get predictions for a single model.
 
     Args:
@@ -100,7 +100,7 @@ def get_predictions_for_all_models(
         torch.backends.cudnn.deterministic = True
         torch.backends.cudnn.benchmark = False
 
-    test_dataset = RICUDataset(data, split=test_on)
+    test_dataset = PredictionDataset(data, split=test_on)
     _, test_labels = test_dataset.get_data_and_labels()
 
     test_predictions = {}
@@ -153,6 +153,8 @@ def domain_adaptation(
     
     gin_config_before_tuning = gin.config_str()
 
+    preprocessor = preprocessor(use_static_features=True)
+
     # evaluate models on same test split
     data_dir = task_dir / dataset
     source_datasets = [d for d in datasets if d != dataset]
@@ -180,16 +182,7 @@ def domain_adaptation(
                 results[f"{repetition}_{fold_index}"] = {}
                 fold_results = results[f"{repetition}_{fold_index}"]
 
-                data = preprocess_data(
-                    data_dir,
-                    seed=seed,
-                    debug=debug,
-                    use_cache=True,
-                    cv_repetitions=cv_repetitions,
-                    repetition_index=repetition,
-                    cv_folds=cv_folds,
-                    fold_index=fold_index,
-                )
+                data = preprocessor.apply(data, vars)
 
                 log_dir_fold = log_dir / f"cv_rep_{repetition}" / f"fold_{fold_index}"
                 log_dir_fold.mkdir(parents=True, exist_ok=True)
@@ -218,7 +211,7 @@ def get_preds(split):
                     else:
                         with open(log_dir_fold / f"{split}_predictions.json", "r") as f:
                             predictions = json.load(f)
-                        _, labels = RICUDataset(data, split=split).get_data_and_labels()
+                        _, labels = PredictionDataset(data, split=split).get_data_and_labels()
                     return predictions, labels
 
                 # get predictions for train set
@@ -294,12 +287,12 @@ def get_preds(split):
                     target_model_with_predictions = MLWrapper()
                 target_model_with_predictions.set_log_dir(log_dir_fold)
                 target_model_with_predictions.train(
-                    RICUDataset(data_with_predictions, split="train"),
-                    RICUDataset(data_with_predictions, split="val"),
+                    PredictionDataset(data_with_predictions, split="train"),
+                    PredictionDataset(data_with_predictions, split="val"),
                     "balanced",
                     seed,
                 )
-                dataset_with_predictions = RICUDataset(data_with_predictions, split="test")
+                dataset_with_predictions = PredictionDataset(data_with_predictions, split="test")
                 preds_w_preds = target_model_with_predictions.predict(dataset_with_predictions, None, None)
                 if isinstance(target_model_with_predictions, MLWrapper):
                     preds_w_preds = preds_w_preds[:, 1]