From 07b3d9b8722a31a83547fce4b38eedbdc1e1b402 Mon Sep 17 00:00:00 2001
From: Johannes Haux <jo.mobile.2011@gmail.com>
Date: Wed, 26 Feb 2020 15:29:26 +0100
Subject: [PATCH 1/5] :tada: Wandb sweeps for edflow

The great functionality of wandb sweeps cannot be used out of the box
with edflow. The new `edprep` command helps with this. Now you can
1. Define a sweep from you wandb client
2. edprep the sweep.yaml so that it is compatible with edflow
3. run the wandb sweep
For more details the the docstring in edprep
---
 edflow/args.py                        |  63 +++++++++++++++
 edflow/config/commandline_kwargs.py   |  15 +++-
 edflow/edflow                         |  35 +-------
 edflow/edprep                         | 110 ++++++++++++++++++++++++++
 edflow/iterators/template_iterator.py |   4 +-
 setup.py                              |   1 +
 6 files changed, 193 insertions(+), 35 deletions(-)
 create mode 100644 edflow/args.py
 create mode 100644 edflow/edprep

diff --git a/edflow/args.py b/edflow/args.py
new file mode 100644
index 0000000..ffdedf5
--- /dev/null
+++ b/edflow/args.py
@@ -0,0 +1,63 @@
+import argparse
+
+
+def get_parser(**parser_kwargs):
+    def str2bool(v):
+        if isinstance(v, bool):
+           return v
+        if v.lower() in ('yes', 'true', 't', 'y', '1'):
+            return True
+        elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+            return False
+        else:
+            raise argparse.ArgumentTypeError('Boolean value expected.')
+
+    parser = argparse.ArgumentParser(**parser_kwargs)
+    parser.add_argument(
+        "-n", "--name", metavar="description", help="postfix of log directory."
+    )
+    parser.add_argument(
+        "-b",
+        "--base",
+        nargs="*",
+        metavar="base_config.yaml",
+        help="paths to base configs. Loaded from left-to-right. "
+        "Parameters can be overwritten or added with command-line options of the form `--key value`.",
+        default=None,
+    )
+    parser.add_argument(
+        "-t", "--train",
+        type=str2bool, const=True, default=False, nargs='?',
+        help="run in training mode"
+    )
+    parser.add_argument("-p", "--project", help="path to existing project")
+    parser.add_argument("-c", "--checkpoint", help="path to existing checkpoint")
+    parser.add_argument(
+        "-r", "--retrain",
+        type=str2bool, const=True, default=False, nargs='?',
+        help="reset global step"
+    )
+    parser.add_argument(
+        "-l",
+        "--log_level",
+        metavar="LEVEL",
+        type=str,
+        choices=["warn", "info", "debug", "critical"],
+        default="info",
+        help="set the logging level.",
+    )
+    parser.add_argument("-d", "--debug", type=str2bool, nargs='?', const=True,
+            default=False, help="enable post-mortem debugging")
+    parser.add_argument(
+        "-w",
+        "--wandb_sweep",
+        nargs='?',
+        const=True,
+        type=str2bool,
+        default=False,
+        help="Process additional arguments supplied by wandb's sweep mechanism,"
+        "i.e. replace dots ('.') with slashes ('/') in the argument name: "
+        "--par.at.level=3 => --par/at/level 3",
+    )
+
+    return parser
diff --git a/edflow/config/commandline_kwargs.py b/edflow/config/commandline_kwargs.py
index 6d7b3d4..ad34e0f 100644
--- a/edflow/config/commandline_kwargs.py
+++ b/edflow/config/commandline_kwargs.py
@@ -19,7 +19,17 @@ def replace(k):
     walk(config, replace, inplace=True)
 
 
-def parse_unknown_args(unknown):
+def parse_unknown_args(unknown, is_wandb_sweep):
+    if is_wandb_sweep:
+        unknown_ = unknown
+        unknown = []
+        for u in unknown_:
+            if '=' in u:
+                key, val = u.split('=')
+                unknown += [key, val]
+            else:
+                unknown += [u]
+
     kwargs = {}
     for i in range(len(unknown)):
         key = unknown[i]
@@ -42,6 +52,9 @@ def parse_unknown_args(unknown):
             while key[0] == "-":
                 key = key[1:]
 
+            if is_wandb_sweep:
+                key = key.replace('.', '/')
+
             # Store key key pairs
             kwargs[key] = value
 
diff --git a/edflow/edflow b/edflow/edflow
index a763005..40c0214 100644
--- a/edflow/edflow
+++ b/edflow/edflow
@@ -18,6 +18,7 @@ from edflow.custom_logging import run, get_logger  # noqa
 from edflow.hooks.checkpoint_hooks.common import get_latest_checkpoint  # noqa
 from edflow.config import parse_unknown_args, update_config
 from edflow.util import retrieve
+from edflow.args import get_parser
 
 
 def load_config(base_configs, additional_kwargs):
@@ -124,40 +125,10 @@ def main(opt, additional_kwargs):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "-n", "--name", metavar="description", help="postfix of log directory."
-    )
-    parser.add_argument(
-        "-b",
-        "--base",
-        nargs="*",
-        metavar="base_config.yaml",
-        help="paths to base configs. Loaded from left-to-right. "
-        "Parameters can be overwritten or added with command-line options of the form `--key value`.",
-        default=None,
-    )
-    parser.add_argument(
-        "-t", "--train", action="store_true", help="run in training mode"
-    )
-    parser.add_argument("-p", "--project", help="path to existing project")
-    parser.add_argument("-c", "--checkpoint", help="path to existing checkpoint")
-    parser.add_argument(
-        "-r", "--retrain", action="store_true", help="reset global step"
-    )
-    parser.add_argument(
-        "-l",
-        "--log_level",
-        metavar="LEVEL",
-        type=str,
-        choices=["warn", "info", "debug", "critical"],
-        default="info",
-        help="set the logging level.",
-    )
-    parser.add_argument("-d", "--debug", action="store_true", help="enable post-mortem debugging")
+    parser = get_parser()
 
     opt, unknown = parser.parse_known_args()
-    additional_kwargs = parse_unknown_args(unknown)
+    additional_kwargs = parse_unknown_args(unknown, opt.wandb_sweep)
 
     if not opt.debug:
         main(opt, additional_kwargs)
diff --git a/edflow/edprep b/edflow/edprep
new file mode 100644
index 0000000..30bb825
--- /dev/null
+++ b/edflow/edprep
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+'''
+This script is meant to be used together with wandb's great sweep
+functionality. The workflow looks as follows:
+
+
+1. Create a sweep file from your wandb web client. You can put in anything
+    under ``Settings/Training Script`` or leave it as it is. This value will be
+    changed to match you local edflow later on. Choose all parameters you want
+    to sweep over and nothgin else. Don't worry about the dots in the preview,
+    these will be fixed automatically later on.
+
+2. Download the sweep file to your prefered location.
+
+3. Navigate to the script location and run ``edprep <sweep_file.yaml> [all
+    additional parameters]. Add all additional parameters as you would when
+    running ``edflow``. Usually those include ``-t -b
+    <path/to/some/config.yaml> -n <sweep name>``. You can also add any other
+    parameter as if working with edflow. All parameters are added to the sweep
+    as constants, which are not varied.
+
+4. Now follow the wandb manual: run ``wandb sweep <sweep_file.yaml>``
+
+5. copy the command from the output of step 4 and run it. It should look
+    something like this: ``wandb agent <sweep_id>``.
+'''
+
+import os
+import argparse  # noqa
+import yaml  # noqa
+import time
+import subprocess
+
+from edflow.config import parse_unknown_args
+from edflow.args import get_parser
+
+
+def load_sweep(sweep_file_path):
+    '''Loads a yaml file in read only mode'''
+    with open(sweep_file_path, 'r') as sf:
+        content = yaml.safe_load(sf)
+
+    return content
+
+
+def prepare_content(content, opt, additional_kwargs):
+    '''Exchanges the program parameter of the sweep file with the local edflow
+    executable and adds all commandline parameters as constant parameters for
+    the sweep arguments.
+
+    Parameters
+    ----------
+    content : dict
+        content of the sweep file. Needs at least the key ``parameters``.
+    opt : Namespace
+        Parsed arguments from the commandline.
+    additional_kwargs : dict
+        Preprocessed addtitional commandline kwargs.
+
+    Returns
+    -------
+    content : dict
+        Updated version of the content.
+    '''
+    edexec = subprocess.check_output("which edflow", shell=True).decode("utf-8")
+    edexec = edexec.replace('\n', '')
+    content['program'] = edexec
+
+    for key, value in additional_kwargs.items():
+        _add_const_parameter(content, key, value)
+
+    for [key, value] in opt._get_kwargs():
+        if value is not None:
+            _add_const_parameter(content, key, value)
+
+    return content
+
+def _add_const_parameter(content, key, value):
+    par_dict = {'value': value, 'distribution': 'constant'}
+    content['parameters'][key] = par_dict
+
+
+def store_sweep(content, sweep_file_path):
+    '''Stores the updated sweep file and makes a backup of the old one.'''
+    os.rename(sweep_file_path, f'.{sweep_file_path}.{time.time()}.backup')
+    with open(sweep_file_path, 'w') as sf:
+        sf.write(yaml.safe_dump(content, indent=2))
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser(
+            parents=[get_parser(add_help=False)],
+            description=__doc__ + '\nNote: the option ``--wand_sweep`` will '
+                'always be set to True!',
+            formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('sweep', metavar='SWEEP.YAML',
+            help='Sweep File as created by wandb. See '
+            'https://docs.wandb.com/sweeps for more.')
+
+    opt, unknown = parser.parse_known_args()
+
+    if not opt.wandb_sweep:
+        opt.wandb_sweep = True
+
+    additional_kwargs = parse_unknown_args(unknown, opt.wandb_sweep)
+
+    content = load_sweep(opt.sweep)
+    content = prepare_content(content, opt, additional_kwargs)
+    store_sweep(content, opt.sweep)
diff --git a/edflow/iterators/template_iterator.py b/edflow/iterators/template_iterator.py
index eacb079..ce0ab6b 100644
--- a/edflow/iterators/template_iterator.py
+++ b/edflow/iterators/template_iterator.py
@@ -72,9 +72,9 @@ def __init__(self, *args, **kwargs):
                 )
 
                 os.environ["WANDB_RESUME"] = "allow"
-                os.environ["WANDB_RUN_ID"] = ProjectManager.root.strip("/").replace(
+                os.environ.setdefault("WANDB_RUN_ID", ProjectManager.root.strip("/").replace(
                     "/", "-"
-                )
+                ))
                 wandb_project = set_default(
                     self.config, "integrations/wandb/project", None
                 )
diff --git a/setup.py b/setup.py
index f63d75f..1724f7e 100644
--- a/setup.py
+++ b/setup.py
@@ -58,6 +58,7 @@
         "edflow/edeval",
         "edflow/edsetup",
         "edflow/edexplore",
+        "edflow/edprep",
     ],
     python_requires=">=3.6",
     classifiers=[

From c3254113ada40fd0f829d496dd1d361d859ca927 Mon Sep 17 00:00:00 2001
From: Johannes Haux <jo.mobile.2011@gmail.com>
Date: Wed, 26 Feb 2020 15:30:39 +0100
Subject: [PATCH 2/5] :memo: Changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b5fce4e..ef7a001 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+- Wandb sweeps! Prepare your sweep with wandb, edprep your sweep file and go sweepin'!
 - Root parameter for image and numpy loader of the meta dataset. `root` is prepended to the given paths and thus allows for smaller label arrays
 - Category loader allows to convert a given label into a more expressive category, which is specifed in the dataset's `meta.yaml`
 - Debug options: `debug/disable_integrations=True`, `debug/max_examples=5 batches`.

From 93eed15f9a882087957a9bc25b69b29fdedf9a92 Mon Sep 17 00:00:00 2001
From: Johannes Haux <jo.mobile.2011@gmail.com>
Date: Wed, 26 Feb 2020 16:18:53 +0100
Subject: [PATCH 3/5] :bug: Fixes list argument bug

This is specific only to the --base arguemnt of the edflow and thus
edprep command. The current hack is to simpy join a list using a space.
This is hacky and will probably lead to errors in the future, should the
commandline interaface be extended.
---
 edflow/edprep | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/edflow/edprep b/edflow/edprep
index 30bb825..4c1aa57 100644
--- a/edflow/edprep
+++ b/edflow/edprep
@@ -76,6 +76,8 @@ def prepare_content(content, opt, additional_kwargs):
     return content
 
 def _add_const_parameter(content, key, value):
+    if isinstance(value, list):
+        value = ' '.join(value)
     par_dict = {'value': value, 'distribution': 'constant'}
     content['parameters'][key] = par_dict
 

From fe6e9106a80b7290a5b61d6a60fcf2c9904291d9 Mon Sep 17 00:00:00 2001
From: Johannes Haux <jo.mobile.2011@gmail.com>
Date: Wed, 26 Feb 2020 16:20:35 +0100
Subject: [PATCH 4/5] formatting

---
 edflow/args.py                        | 41 ++++++++++++++++++---------
 edflow/config/commandline_kwargs.py   |  6 ++--
 edflow/iterators/template_iterator.py |  6 ++--
 3 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/edflow/args.py b/edflow/args.py
index ffdedf5..30c4661 100644
--- a/edflow/args.py
+++ b/edflow/args.py
@@ -4,13 +4,13 @@
 def get_parser(**parser_kwargs):
     def str2bool(v):
         if isinstance(v, bool):
-           return v
-        if v.lower() in ('yes', 'true', 't', 'y', '1'):
+            return v
+        if v.lower() in ("yes", "true", "t", "y", "1"):
             return True
-        elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        elif v.lower() in ("no", "false", "f", "n", "0"):
             return False
         else:
-            raise argparse.ArgumentTypeError('Boolean value expected.')
+            raise argparse.ArgumentTypeError("Boolean value expected.")
 
     parser = argparse.ArgumentParser(**parser_kwargs)
     parser.add_argument(
@@ -26,16 +26,24 @@ def str2bool(v):
         default=None,
     )
     parser.add_argument(
-        "-t", "--train",
-        type=str2bool, const=True, default=False, nargs='?',
-        help="run in training mode"
+        "-t",
+        "--train",
+        type=str2bool,
+        const=True,
+        default=False,
+        nargs="?",
+        help="run in training mode",
     )
     parser.add_argument("-p", "--project", help="path to existing project")
     parser.add_argument("-c", "--checkpoint", help="path to existing checkpoint")
     parser.add_argument(
-        "-r", "--retrain",
-        type=str2bool, const=True, default=False, nargs='?',
-        help="reset global step"
+        "-r",
+        "--retrain",
+        type=str2bool,
+        const=True,
+        default=False,
+        nargs="?",
+        help="reset global step",
     )
     parser.add_argument(
         "-l",
@@ -46,12 +54,19 @@ def str2bool(v):
         default="info",
         help="set the logging level.",
     )
-    parser.add_argument("-d", "--debug", type=str2bool, nargs='?', const=True,
-            default=False, help="enable post-mortem debugging")
+    parser.add_argument(
+        "-d",
+        "--debug",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=False,
+        help="enable post-mortem debugging",
+    )
     parser.add_argument(
         "-w",
         "--wandb_sweep",
-        nargs='?',
+        nargs="?",
         const=True,
         type=str2bool,
         default=False,
diff --git a/edflow/config/commandline_kwargs.py b/edflow/config/commandline_kwargs.py
index ad34e0f..8b3e701 100644
--- a/edflow/config/commandline_kwargs.py
+++ b/edflow/config/commandline_kwargs.py
@@ -24,8 +24,8 @@ def parse_unknown_args(unknown, is_wandb_sweep):
         unknown_ = unknown
         unknown = []
         for u in unknown_:
-            if '=' in u:
-                key, val = u.split('=')
+            if "=" in u:
+                key, val = u.split("=")
                 unknown += [key, val]
             else:
                 unknown += [u]
@@ -53,7 +53,7 @@ def parse_unknown_args(unknown, is_wandb_sweep):
                 key = key[1:]
 
             if is_wandb_sweep:
-                key = key.replace('.', '/')
+                key = key.replace(".", "/")
 
             # Store key key pairs
             kwargs[key] = value
diff --git a/edflow/iterators/template_iterator.py b/edflow/iterators/template_iterator.py
index ce0ab6b..191c270 100644
--- a/edflow/iterators/template_iterator.py
+++ b/edflow/iterators/template_iterator.py
@@ -72,9 +72,9 @@ def __init__(self, *args, **kwargs):
                 )
 
                 os.environ["WANDB_RESUME"] = "allow"
-                os.environ.setdefault("WANDB_RUN_ID", ProjectManager.root.strip("/").replace(
-                    "/", "-"
-                ))
+                os.environ.setdefault(
+                    "WANDB_RUN_ID", ProjectManager.root.strip("/").replace("/", "-")
+                )
                 wandb_project = set_default(
                     self.config, "integrations/wandb/project", None
                 )

From 33350d6006c4cf6ef17cfb981cf292f5105a0c85 Mon Sep 17 00:00:00 2001
From: Johannes Haux <jo.mobile.2011@gmail.com>
Date: Thu, 27 Feb 2020 11:01:49 +0100
Subject: [PATCH 5/5] :bug: Fixes default behavior to be backward compatible
 and adds test

The additional test confirms, that a mixture of wandb-type (depth by
dot) and edflow-type (depth by slash) arguments are converted to purely
edflow-type arguemnts, resulting in a correct config dict.
---
 edflow/config/commandline_kwargs.py |  2 +-
 tests/test_config.py                | 48 ++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/edflow/config/commandline_kwargs.py b/edflow/config/commandline_kwargs.py
index 8b3e701..7ebdb14 100644
--- a/edflow/config/commandline_kwargs.py
+++ b/edflow/config/commandline_kwargs.py
@@ -19,7 +19,7 @@ def replace(k):
     walk(config, replace, inplace=True)
 
 
-def parse_unknown_args(unknown, is_wandb_sweep):
+def parse_unknown_args(unknown, is_wandb_sweep=False):
     if is_wandb_sweep:
         unknown_ = unknown
         unknown = []
diff --git a/tests/test_config.py b/tests/test_config.py
index 9c2387d..6875545 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -50,7 +50,7 @@ def test_basic_parsing():
 
     unknown = parse_unknown_args(unknown)
 
-    assert not "a" in unknown
+    assert "a" not in unknown
     ref = {
         "b": "b",
         "c": 12.5,
@@ -110,3 +110,49 @@ def test_config_format():
     print(config)
     ref = {"a": {"b": 1.0}, "x": 1.0}
     assert config == ref
+
+
+def test_wandb_input():
+    """
+    This test confirms, that a mixture of wandb-type (depth by dot) and
+    edflow-type (depth by slash) arguments are converted to purely edflow-type
+    arguemnts, resulting in a correct config dict.
+    """
+    import argparse
+
+    A = argparse.ArgumentParser()
+
+    A.add_argument("--a", default="a", type=str)
+
+    passed = [
+        "--a",
+        "c",
+        "--l",
+        "abc",
+        "--m",
+        "{'asd': 3.5}",
+        "--abc/def",
+        "1.0",
+        "--abc/def/ghi",
+        "2.0",
+        "--abc.jkl",
+        "3.0",
+        "--xyz.0",
+        "4.0",
+    ]
+
+    print(passed)
+    args, unknown = A.parse_known_args(passed)
+
+    unknown = parse_unknown_args(unknown, is_wandb_sweep=True)
+
+    assert "a" not in unknown
+    ref = {
+        "l": "abc",
+        "m": {"asd": 3.5},
+        "abc/def": 1.0,
+        "abc/def/ghi": 2.0,
+        "abc/jkl": 3.0,
+        "xyz/0": 4.0,
+    }
+    assert ref == unknown