pesser · pesser · May 12, 2020 · Feb 26, 2020 · Feb 26, 2020 · Feb 26, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+- Wandb sweeps! Prepare your sweep with wandb, edprep your sweep file and go sweepin'!
 - Root parameter for image and numpy loader of the meta dataset. `root` is prepended to the given paths and thus allows for smaller label arrays
 - Category loader allows to convert a given label into a more expressive category, which is specifed in the dataset's `meta.yaml`
 - Debug options: `debug/disable_integrations=True`, `debug/max_examples=5 batches`.

diff --git a/edflow/args.py b/edflow/args.py
@@ -0,0 +1,78 @@
+import argparse
+
+
+def get_parser(**parser_kwargs):
+    def str2bool(v):
+        if isinstance(v, bool):
+            return v
+        if v.lower() in ("yes", "true", "t", "y", "1"):
+            return True
+        elif v.lower() in ("no", "false", "f", "n", "0"):
+            return False
+        else:
+            raise argparse.ArgumentTypeError("Boolean value expected.")
+
+    parser = argparse.ArgumentParser(**parser_kwargs)
+    parser.add_argument(
+        "-n", "--name", metavar="description", help="postfix of log directory."
+    )
+    parser.add_argument(
+        "-b",
+        "--base",
+        nargs="*",
+        metavar="base_config.yaml",
+        help="paths to base configs. Loaded from left-to-right. "
+        "Parameters can be overwritten or added with command-line options of the form `--key value`.",
+        default=None,
+    )
+    parser.add_argument(
+        "-t",
+        "--train",
+        type=str2bool,
+        const=True,
+        default=False,
+        nargs="?",
+        help="run in training mode",
+    )
+    parser.add_argument("-p", "--project", help="path to existing project")
+    parser.add_argument("-c", "--checkpoint", help="path to existing checkpoint")
+    parser.add_argument(
+        "-r",
+        "--retrain",
+        type=str2bool,
+        const=True,
+        default=False,
+        nargs="?",
+        help="reset global step",
+    )
+    parser.add_argument(
+        "-l",
+        "--log_level",
+        metavar="LEVEL",
+        type=str,
+        choices=["warn", "info", "debug", "critical"],
+        default="info",
+        help="set the logging level.",
+    )
+    parser.add_argument(
+        "-d",
+        "--debug",
+        type=str2bool,
+        nargs="?",
+        const=True,
+        default=False,
+        help="enable post-mortem debugging",
+    )
+    parser.add_argument(
+        "-w",
+        "--wandb_sweep",
+        nargs="?",
+        const=True,
+        type=str2bool,
+        default=False,
+        help="Process additional arguments supplied by wandb's sweep mechanism,"
+        "i.e. replace dots ('.') with slashes ('/') in the argument name: "
+        "--par.at.level=3 => --par/at/level 3",
+    )
+
+    return parser
diff --git a/edflow/config/commandline_kwargs.py b/edflow/config/commandline_kwargs.py
@@ -19,7 +19,17 @@ def replace(k):
     walk(config, replace, inplace=True)
 
 
-def parse_unknown_args(unknown):
+def parse_unknown_args(unknown, is_wandb_sweep=False):
+    if is_wandb_sweep:
+        unknown_ = unknown
+        unknown = []
+        for u in unknown_:
+            if "=" in u:
+                key, val = u.split("=")
+                unknown += [key, val]
+            else:
+                unknown += [u]
+
     kwargs = {}
     for i in range(len(unknown)):
         key = unknown[i]
@@ -42,6 +52,9 @@ def parse_unknown_args(unknown):
             while key[0] == "-":
                 key = key[1:]
 
+            if is_wandb_sweep:
+                key = key.replace(".", "/")
+
             # Store key key pairs
             kwargs[key] = value
 

diff --git a/edflow/edflow b/edflow/edflow
@@ -18,6 +18,7 @@ from edflow.custom_logging import run, get_logger  # noqa
 from edflow.hooks.checkpoint_hooks.common import get_latest_checkpoint  # noqa
 from edflow.config import parse_unknown_args, update_config
 from edflow.util import retrieve
+from edflow.args import get_parser
 
 
 def load_config(base_configs, additional_kwargs):
@@ -124,40 +125,10 @@ def main(opt, additional_kwargs):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "-n", "--name", metavar="description", help="postfix of log directory."
-    )
-    parser.add_argument(
-        "-b",
-        "--base",
-        nargs="*",
-        metavar="base_config.yaml",
-        help="paths to base configs. Loaded from left-to-right. "
-        "Parameters can be overwritten or added with command-line options of the form `--key value`.",
-        default=None,
-    )
-    parser.add_argument(
-        "-t", "--train", action="store_true", help="run in training mode"
-    )
-    parser.add_argument("-p", "--project", help="path to existing project")
-    parser.add_argument("-c", "--checkpoint", help="path to existing checkpoint")
-    parser.add_argument(
-        "-r", "--retrain", action="store_true", help="reset global step"
-    )
-    parser.add_argument(
-        "-l",
-        "--log_level",
-        metavar="LEVEL",
-        type=str,
-        choices=["warn", "info", "debug", "critical"],
-        default="info",
-        help="set the logging level.",
-    )
-    parser.add_argument("-d", "--debug", action="store_true", help="enable post-mortem debugging")
+    parser = get_parser()
 
     opt, unknown = parser.parse_known_args()
-    additional_kwargs = parse_unknown_args(unknown)
+    additional_kwargs = parse_unknown_args(unknown, opt.wandb_sweep)
 
     if not opt.debug:
         main(opt, additional_kwargs)

diff --git a/edflow/edprep b/edflow/edprep
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+'''
+This script is meant to be used together with wandb's great sweep
+functionality. The workflow looks as follows:
+
+
+1. Create a sweep file from your wandb web client. You can put in anything
+    under ``Settings/Training Script`` or leave it as it is. This value will be
+    changed to match you local edflow later on. Choose all parameters you want
+    to sweep over and nothgin else. Don't worry about the dots in the preview,
+    these will be fixed automatically later on.
+
+2. Download the sweep file to your prefered location.
+
+3. Navigate to the script location and run ``edprep <sweep_file.yaml> [all
+    additional parameters]. Add all additional parameters as you would when
+    running ``edflow``. Usually those include ``-t -b
+    <path/to/some/config.yaml> -n <sweep name>``. You can also add any other
+    parameter as if working with edflow. All parameters are added to the sweep
+    as constants, which are not varied.
+
+4. Now follow the wandb manual: run ``wandb sweep <sweep_file.yaml>``
+
+5. copy the command from the output of step 4 and run it. It should look
+    something like this: ``wandb agent <sweep_id>``.
+'''
+
+import os
+import argparse  # noqa
+import yaml  # noqa
+import time
+import subprocess
+
+from edflow.config import parse_unknown_args
+from edflow.args import get_parser
+
+
+def load_sweep(sweep_file_path):
+    '''Loads a yaml file in read only mode'''
+    with open(sweep_file_path, 'r') as sf:
+        content = yaml.safe_load(sf)
+
+    return content
+
+
+def prepare_content(content, opt, additional_kwargs):
+    '''Exchanges the program parameter of the sweep file with the local edflow
+    executable and adds all commandline parameters as constant parameters for
+    the sweep arguments.
+
+    Parameters
+    ----------
+    content : dict
+        content of the sweep file. Needs at least the key ``parameters``.
+    opt : Namespace
+        Parsed arguments from the commandline.
+    additional_kwargs : dict
+        Preprocessed addtitional commandline kwargs.
+
+    Returns
+    -------
+    content : dict
+        Updated version of the content.
+    '''
+    edexec = subprocess.check_output("which edflow", shell=True).decode("utf-8")
+    edexec = edexec.replace('\n', '')
+    content['program'] = edexec
+
+    for key, value in additional_kwargs.items():
+        _add_const_parameter(content, key, value)
+
+    for [key, value] in opt._get_kwargs():
+        if value is not None:
+            _add_const_parameter(content, key, value)
+
+    return content
+
+def _add_const_parameter(content, key, value):
+    if isinstance(value, list):
+        value = ' '.join(value)
+    par_dict = {'value': value, 'distribution': 'constant'}
+    content['parameters'][key] = par_dict
+
+
+def store_sweep(content, sweep_file_path):
+    '''Stores the updated sweep file and makes a backup of the old one.'''
+    os.rename(sweep_file_path, f'.{sweep_file_path}.{time.time()}.backup')
+    with open(sweep_file_path, 'w') as sf:
+        sf.write(yaml.safe_dump(content, indent=2))
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser(
+            parents=[get_parser(add_help=False)],
+            description=__doc__ + '\nNote: the option ``--wand_sweep`` will '
+                'always be set to True!',
+            formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('sweep', metavar='SWEEP.YAML',
+            help='Sweep File as created by wandb. See '
+            'https://docs.wandb.com/sweeps for more.')
+
+    opt, unknown = parser.parse_known_args()
+
+    if not opt.wandb_sweep:
+        opt.wandb_sweep = True
+
+    additional_kwargs = parse_unknown_args(unknown, opt.wandb_sweep)
+
+    content = load_sweep(opt.sweep)
+    content = prepare_content(content, opt, additional_kwargs)
+    store_sweep(content, opt.sweep)
diff --git a/edflow/iterators/template_iterator.py b/edflow/iterators/template_iterator.py
@@ -72,8 +72,8 @@ def __init__(self, *args, **kwargs):
                 )
 
                 os.environ["WANDB_RESUME"] = "allow"
-                os.environ["WANDB_RUN_ID"] = ProjectManager.root.strip("/").replace(
-                    "/", "-"
+                os.environ.setdefault(
+                    "WANDB_RUN_ID", ProjectManager.root.strip("/").replace("/", "-")
                 )
                 wandb_project = set_default(
                     self.config, "integrations/wandb/project", None

diff --git a/setup.py b/setup.py
@@ -58,6 +58,7 @@
         "edflow/edeval",
         "edflow/edsetup",
         "edflow/edexplore",
+        "edflow/edprep",
     ],
     python_requires=">=3.6",
     classifiers=[

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -50,7 +50,7 @@ def test_basic_parsing():
 
     unknown = parse_unknown_args(unknown)
 
-    assert not "a" in unknown
+    assert "a" not in unknown
     ref = {
         "b": "b",
         "c": 12.5,
@@ -110,3 +110,49 @@ def test_config_format():
     print(config)
     ref = {"a": {"b": 1.0}, "x": 1.0}
     assert config == ref
+
+
+def test_wandb_input():
+    """
+    This test confirms, that a mixture of wandb-type (depth by dot) and
+    edflow-type (depth by slash) arguments are converted to purely edflow-type
+    arguemnts, resulting in a correct config dict.
+    """
+    import argparse
+
+    A = argparse.ArgumentParser()
+
+    A.add_argument("--a", default="a", type=str)
+
+    passed = [
+        "--a",
+        "c",
+        "--l",
+        "abc",
+        "--m",
+        "{'asd': 3.5}",
+        "--abc/def",
+        "1.0",
+        "--abc/def/ghi",
+        "2.0",
+        "--abc.jkl",
+        "3.0",
+        "--xyz.0",
+        "4.0",
+    ]
+
+    print(passed)
+    args, unknown = A.parse_known_args(passed)
+
+    unknown = parse_unknown_args(unknown, is_wandb_sweep=True)
+
+    assert "a" not in unknown
+    ref = {
+        "l": "abc",
+        "m": {"asd": 3.5},
+        "abc/def": 1.0,
+        "abc/def/ghi": 2.0,
+        "abc/jkl": 3.0,
+        "xyz/0": 4.0,
+    }
+    assert ref == unknown