From 07b3d9b8722a31a83547fce4b38eedbdc1e1b402 Mon Sep 17 00:00:00 2001 From: Johannes Haux Date: Wed, 26 Feb 2020 15:29:26 +0100 Subject: [PATCH 1/5] :tada: Wandb sweeps for edflow The great functionality of wandb sweeps cannot be used out of the box with edflow. The new `edprep` command helps with this. Now you can 1. Define a sweep from you wandb client 2. edprep the sweep.yaml so that it is compatible with edflow 3. run the wandb sweep For more details the the docstring in edprep --- edflow/args.py | 63 +++++++++++++++ edflow/config/commandline_kwargs.py | 15 +++- edflow/edflow | 35 +------- edflow/edprep | 110 ++++++++++++++++++++++++++ edflow/iterators/template_iterator.py | 4 +- setup.py | 1 + 6 files changed, 193 insertions(+), 35 deletions(-) create mode 100644 edflow/args.py create mode 100644 edflow/edprep diff --git a/edflow/args.py b/edflow/args.py new file mode 100644 index 0000000..ffdedf5 --- /dev/null +++ b/edflow/args.py @@ -0,0 +1,63 @@ +import argparse + + +def get_parser(**parser_kwargs): + def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + parser = argparse.ArgumentParser(**parser_kwargs) + parser.add_argument( + "-n", "--name", metavar="description", help="postfix of log directory." + ) + parser.add_argument( + "-b", + "--base", + nargs="*", + metavar="base_config.yaml", + help="paths to base configs. Loaded from left-to-right. " + "Parameters can be overwritten or added with command-line options of the form `--key value`.", + default=None, + ) + parser.add_argument( + "-t", "--train", + type=str2bool, const=True, default=False, nargs='?', + help="run in training mode" + ) + parser.add_argument("-p", "--project", help="path to existing project") + parser.add_argument("-c", "--checkpoint", help="path to existing checkpoint") + parser.add_argument( + "-r", "--retrain", + type=str2bool, const=True, default=False, nargs='?', + help="reset global step" + ) + parser.add_argument( + "-l", + "--log_level", + metavar="LEVEL", + type=str, + choices=["warn", "info", "debug", "critical"], + default="info", + help="set the logging level.", + ) + parser.add_argument("-d", "--debug", type=str2bool, nargs='?', const=True, + default=False, help="enable post-mortem debugging") + parser.add_argument( + "-w", + "--wandb_sweep", + nargs='?', + const=True, + type=str2bool, + default=False, + help="Process additional arguments supplied by wandb's sweep mechanism," + "i.e. replace dots ('.') with slashes ('/') in the argument name: " + "--par.at.level=3 => --par/at/level 3", + ) + + return parser diff --git a/edflow/config/commandline_kwargs.py b/edflow/config/commandline_kwargs.py index 6d7b3d4..ad34e0f 100644 --- a/edflow/config/commandline_kwargs.py +++ b/edflow/config/commandline_kwargs.py @@ -19,7 +19,17 @@ def replace(k): walk(config, replace, inplace=True) -def parse_unknown_args(unknown): +def parse_unknown_args(unknown, is_wandb_sweep): + if is_wandb_sweep: + unknown_ = unknown + unknown = [] + for u in unknown_: + if '=' in u: + key, val = u.split('=') + unknown += [key, val] + else: + unknown += [u] + kwargs = {} for i in range(len(unknown)): key = unknown[i] @@ -42,6 +52,9 @@ def parse_unknown_args(unknown): while key[0] == "-": key = key[1:] + if is_wandb_sweep: + key = key.replace('.', '/') + # Store key key pairs kwargs[key] = value diff --git a/edflow/edflow b/edflow/edflow index a763005..40c0214 100644 --- a/edflow/edflow +++ b/edflow/edflow @@ -18,6 +18,7 @@ from edflow.custom_logging import run, get_logger # noqa from edflow.hooks.checkpoint_hooks.common import get_latest_checkpoint # noqa from edflow.config import parse_unknown_args, update_config from edflow.util import retrieve +from edflow.args import get_parser def load_config(base_configs, additional_kwargs): @@ -124,40 +125,10 @@ def main(opt, additional_kwargs): if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-n", "--name", metavar="description", help="postfix of log directory." - ) - parser.add_argument( - "-b", - "--base", - nargs="*", - metavar="base_config.yaml", - help="paths to base configs. Loaded from left-to-right. " - "Parameters can be overwritten or added with command-line options of the form `--key value`.", - default=None, - ) - parser.add_argument( - "-t", "--train", action="store_true", help="run in training mode" - ) - parser.add_argument("-p", "--project", help="path to existing project") - parser.add_argument("-c", "--checkpoint", help="path to existing checkpoint") - parser.add_argument( - "-r", "--retrain", action="store_true", help="reset global step" - ) - parser.add_argument( - "-l", - "--log_level", - metavar="LEVEL", - type=str, - choices=["warn", "info", "debug", "critical"], - default="info", - help="set the logging level.", - ) - parser.add_argument("-d", "--debug", action="store_true", help="enable post-mortem debugging") + parser = get_parser() opt, unknown = parser.parse_known_args() - additional_kwargs = parse_unknown_args(unknown) + additional_kwargs = parse_unknown_args(unknown, opt.wandb_sweep) if not opt.debug: main(opt, additional_kwargs) diff --git a/edflow/edprep b/edflow/edprep new file mode 100644 index 0000000..30bb825 --- /dev/null +++ b/edflow/edprep @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +''' +This script is meant to be used together with wandb's great sweep +functionality. The workflow looks as follows: + + +1. Create a sweep file from your wandb web client. You can put in anything + under ``Settings/Training Script`` or leave it as it is. This value will be + changed to match you local edflow later on. Choose all parameters you want + to sweep over and nothgin else. Don't worry about the dots in the preview, + these will be fixed automatically later on. + +2. Download the sweep file to your prefered location. + +3. Navigate to the script location and run ``edprep [all + additional parameters]. Add all additional parameters as you would when + running ``edflow``. Usually those include ``-t -b + -n ``. You can also add any other + parameter as if working with edflow. All parameters are added to the sweep + as constants, which are not varied. + +4. Now follow the wandb manual: run ``wandb sweep `` + +5. copy the command from the output of step 4 and run it. It should look + something like this: ``wandb agent ``. +''' + +import os +import argparse # noqa +import yaml # noqa +import time +import subprocess + +from edflow.config import parse_unknown_args +from edflow.args import get_parser + + +def load_sweep(sweep_file_path): + '''Loads a yaml file in read only mode''' + with open(sweep_file_path, 'r') as sf: + content = yaml.safe_load(sf) + + return content + + +def prepare_content(content, opt, additional_kwargs): + '''Exchanges the program parameter of the sweep file with the local edflow + executable and adds all commandline parameters as constant parameters for + the sweep arguments. + + Parameters + ---------- + content : dict + content of the sweep file. Needs at least the key ``parameters``. + opt : Namespace + Parsed arguments from the commandline. + additional_kwargs : dict + Preprocessed addtitional commandline kwargs. + + Returns + ------- + content : dict + Updated version of the content. + ''' + edexec = subprocess.check_output("which edflow", shell=True).decode("utf-8") + edexec = edexec.replace('\n', '') + content['program'] = edexec + + for key, value in additional_kwargs.items(): + _add_const_parameter(content, key, value) + + for [key, value] in opt._get_kwargs(): + if value is not None: + _add_const_parameter(content, key, value) + + return content + +def _add_const_parameter(content, key, value): + par_dict = {'value': value, 'distribution': 'constant'} + content['parameters'][key] = par_dict + + +def store_sweep(content, sweep_file_path): + '''Stores the updated sweep file and makes a backup of the old one.''' + os.rename(sweep_file_path, f'.{sweep_file_path}.{time.time()}.backup') + with open(sweep_file_path, 'w') as sf: + sf.write(yaml.safe_dump(content, indent=2)) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser( + parents=[get_parser(add_help=False)], + description=__doc__ + '\nNote: the option ``--wand_sweep`` will ' + 'always be set to True!', + formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument('sweep', metavar='SWEEP.YAML', + help='Sweep File as created by wandb. See ' + 'https://docs.wandb.com/sweeps for more.') + + opt, unknown = parser.parse_known_args() + + if not opt.wandb_sweep: + opt.wandb_sweep = True + + additional_kwargs = parse_unknown_args(unknown, opt.wandb_sweep) + + content = load_sweep(opt.sweep) + content = prepare_content(content, opt, additional_kwargs) + store_sweep(content, opt.sweep) diff --git a/edflow/iterators/template_iterator.py b/edflow/iterators/template_iterator.py index eacb079..ce0ab6b 100644 --- a/edflow/iterators/template_iterator.py +++ b/edflow/iterators/template_iterator.py @@ -72,9 +72,9 @@ def __init__(self, *args, **kwargs): ) os.environ["WANDB_RESUME"] = "allow" - os.environ["WANDB_RUN_ID"] = ProjectManager.root.strip("/").replace( + os.environ.setdefault("WANDB_RUN_ID", ProjectManager.root.strip("/").replace( "/", "-" - ) + )) wandb_project = set_default( self.config, "integrations/wandb/project", None ) diff --git a/setup.py b/setup.py index f63d75f..1724f7e 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,7 @@ "edflow/edeval", "edflow/edsetup", "edflow/edexplore", + "edflow/edprep", ], python_requires=">=3.6", classifiers=[ From c3254113ada40fd0f829d496dd1d361d859ca927 Mon Sep 17 00:00:00 2001 From: Johannes Haux Date: Wed, 26 Feb 2020 15:30:39 +0100 Subject: [PATCH 2/5] :memo: Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b5fce4e..ef7a001 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Wandb sweeps! Prepare your sweep with wandb, edprep your sweep file and go sweepin'! - Root parameter for image and numpy loader of the meta dataset. `root` is prepended to the given paths and thus allows for smaller label arrays - Category loader allows to convert a given label into a more expressive category, which is specifed in the dataset's `meta.yaml` - Debug options: `debug/disable_integrations=True`, `debug/max_examples=5 batches`. From 93eed15f9a882087957a9bc25b69b29fdedf9a92 Mon Sep 17 00:00:00 2001 From: Johannes Haux Date: Wed, 26 Feb 2020 16:18:53 +0100 Subject: [PATCH 3/5] :bug: Fixes list argument bug This is specific only to the --base arguemnt of the edflow and thus edprep command. The current hack is to simpy join a list using a space. This is hacky and will probably lead to errors in the future, should the commandline interaface be extended. --- edflow/edprep | 2 ++ 1 file changed, 2 insertions(+) diff --git a/edflow/edprep b/edflow/edprep index 30bb825..4c1aa57 100644 --- a/edflow/edprep +++ b/edflow/edprep @@ -76,6 +76,8 @@ def prepare_content(content, opt, additional_kwargs): return content def _add_const_parameter(content, key, value): + if isinstance(value, list): + value = ' '.join(value) par_dict = {'value': value, 'distribution': 'constant'} content['parameters'][key] = par_dict From fe6e9106a80b7290a5b61d6a60fcf2c9904291d9 Mon Sep 17 00:00:00 2001 From: Johannes Haux Date: Wed, 26 Feb 2020 16:20:35 +0100 Subject: [PATCH 4/5] formatting --- edflow/args.py | 41 ++++++++++++++++++--------- edflow/config/commandline_kwargs.py | 6 ++-- edflow/iterators/template_iterator.py | 6 ++-- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/edflow/args.py b/edflow/args.py index ffdedf5..30c4661 100644 --- a/edflow/args.py +++ b/edflow/args.py @@ -4,13 +4,13 @@ def get_parser(**parser_kwargs): def str2bool(v): if isinstance(v, bool): - return v - if v.lower() in ('yes', 'true', 't', 'y', '1'): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): + elif v.lower() in ("no", "false", "f", "n", "0"): return False else: - raise argparse.ArgumentTypeError('Boolean value expected.') + raise argparse.ArgumentTypeError("Boolean value expected.") parser = argparse.ArgumentParser(**parser_kwargs) parser.add_argument( @@ -26,16 +26,24 @@ def str2bool(v): default=None, ) parser.add_argument( - "-t", "--train", - type=str2bool, const=True, default=False, nargs='?', - help="run in training mode" + "-t", + "--train", + type=str2bool, + const=True, + default=False, + nargs="?", + help="run in training mode", ) parser.add_argument("-p", "--project", help="path to existing project") parser.add_argument("-c", "--checkpoint", help="path to existing checkpoint") parser.add_argument( - "-r", "--retrain", - type=str2bool, const=True, default=False, nargs='?', - help="reset global step" + "-r", + "--retrain", + type=str2bool, + const=True, + default=False, + nargs="?", + help="reset global step", ) parser.add_argument( "-l", @@ -46,12 +54,19 @@ def str2bool(v): default="info", help="set the logging level.", ) - parser.add_argument("-d", "--debug", type=str2bool, nargs='?', const=True, - default=False, help="enable post-mortem debugging") + parser.add_argument( + "-d", + "--debug", + type=str2bool, + nargs="?", + const=True, + default=False, + help="enable post-mortem debugging", + ) parser.add_argument( "-w", "--wandb_sweep", - nargs='?', + nargs="?", const=True, type=str2bool, default=False, diff --git a/edflow/config/commandline_kwargs.py b/edflow/config/commandline_kwargs.py index ad34e0f..8b3e701 100644 --- a/edflow/config/commandline_kwargs.py +++ b/edflow/config/commandline_kwargs.py @@ -24,8 +24,8 @@ def parse_unknown_args(unknown, is_wandb_sweep): unknown_ = unknown unknown = [] for u in unknown_: - if '=' in u: - key, val = u.split('=') + if "=" in u: + key, val = u.split("=") unknown += [key, val] else: unknown += [u] @@ -53,7 +53,7 @@ def parse_unknown_args(unknown, is_wandb_sweep): key = key[1:] if is_wandb_sweep: - key = key.replace('.', '/') + key = key.replace(".", "/") # Store key key pairs kwargs[key] = value diff --git a/edflow/iterators/template_iterator.py b/edflow/iterators/template_iterator.py index ce0ab6b..191c270 100644 --- a/edflow/iterators/template_iterator.py +++ b/edflow/iterators/template_iterator.py @@ -72,9 +72,9 @@ def __init__(self, *args, **kwargs): ) os.environ["WANDB_RESUME"] = "allow" - os.environ.setdefault("WANDB_RUN_ID", ProjectManager.root.strip("/").replace( - "/", "-" - )) + os.environ.setdefault( + "WANDB_RUN_ID", ProjectManager.root.strip("/").replace("/", "-") + ) wandb_project = set_default( self.config, "integrations/wandb/project", None ) From 33350d6006c4cf6ef17cfb981cf292f5105a0c85 Mon Sep 17 00:00:00 2001 From: Johannes Haux Date: Thu, 27 Feb 2020 11:01:49 +0100 Subject: [PATCH 5/5] :bug: Fixes default behavior to be backward compatible and adds test The additional test confirms, that a mixture of wandb-type (depth by dot) and edflow-type (depth by slash) arguments are converted to purely edflow-type arguemnts, resulting in a correct config dict. --- edflow/config/commandline_kwargs.py | 2 +- tests/test_config.py | 48 ++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/edflow/config/commandline_kwargs.py b/edflow/config/commandline_kwargs.py index 8b3e701..7ebdb14 100644 --- a/edflow/config/commandline_kwargs.py +++ b/edflow/config/commandline_kwargs.py @@ -19,7 +19,7 @@ def replace(k): walk(config, replace, inplace=True) -def parse_unknown_args(unknown, is_wandb_sweep): +def parse_unknown_args(unknown, is_wandb_sweep=False): if is_wandb_sweep: unknown_ = unknown unknown = [] diff --git a/tests/test_config.py b/tests/test_config.py index 9c2387d..6875545 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -50,7 +50,7 @@ def test_basic_parsing(): unknown = parse_unknown_args(unknown) - assert not "a" in unknown + assert "a" not in unknown ref = { "b": "b", "c": 12.5, @@ -110,3 +110,49 @@ def test_config_format(): print(config) ref = {"a": {"b": 1.0}, "x": 1.0} assert config == ref + + +def test_wandb_input(): + """ + This test confirms, that a mixture of wandb-type (depth by dot) and + edflow-type (depth by slash) arguments are converted to purely edflow-type + arguemnts, resulting in a correct config dict. + """ + import argparse + + A = argparse.ArgumentParser() + + A.add_argument("--a", default="a", type=str) + + passed = [ + "--a", + "c", + "--l", + "abc", + "--m", + "{'asd': 3.5}", + "--abc/def", + "1.0", + "--abc/def/ghi", + "2.0", + "--abc.jkl", + "3.0", + "--xyz.0", + "4.0", + ] + + print(passed) + args, unknown = A.parse_known_args(passed) + + unknown = parse_unknown_args(unknown, is_wandb_sweep=True) + + assert "a" not in unknown + ref = { + "l": "abc", + "m": {"asd": 3.5}, + "abc/def": 1.0, + "abc/def/ghi": 2.0, + "abc/jkl": 3.0, + "xyz/0": 4.0, + } + assert ref == unknown