diff --git a/CHANGELOG.md b/CHANGELOG.md index b5fce4e..ef7a001 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Wandb sweeps! Prepare your sweep with wandb, edprep your sweep file and go sweepin'! - Root parameter for image and numpy loader of the meta dataset. `root` is prepended to the given paths and thus allows for smaller label arrays - Category loader allows to convert a given label into a more expressive category, which is specifed in the dataset's `meta.yaml` - Debug options: `debug/disable_integrations=True`, `debug/max_examples=5 batches`. diff --git a/edflow/args.py b/edflow/args.py new file mode 100644 index 0000000..30c4661 --- /dev/null +++ b/edflow/args.py @@ -0,0 +1,78 @@ +import argparse + + +def get_parser(**parser_kwargs): + def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise argparse.ArgumentTypeError("Boolean value expected.") + + parser = argparse.ArgumentParser(**parser_kwargs) + parser.add_argument( + "-n", "--name", metavar="description", help="postfix of log directory." + ) + parser.add_argument( + "-b", + "--base", + nargs="*", + metavar="base_config.yaml", + help="paths to base configs. Loaded from left-to-right. " + "Parameters can be overwritten or added with command-line options of the form `--key value`.", + default=None, + ) + parser.add_argument( + "-t", + "--train", + type=str2bool, + const=True, + default=False, + nargs="?", + help="run in training mode", + ) + parser.add_argument("-p", "--project", help="path to existing project") + parser.add_argument("-c", "--checkpoint", help="path to existing checkpoint") + parser.add_argument( + "-r", + "--retrain", + type=str2bool, + const=True, + default=False, + nargs="?", + help="reset global step", + ) + parser.add_argument( + "-l", + "--log_level", + metavar="LEVEL", + type=str, + choices=["warn", "info", "debug", "critical"], + default="info", + help="set the logging level.", + ) + parser.add_argument( + "-d", + "--debug", + type=str2bool, + nargs="?", + const=True, + default=False, + help="enable post-mortem debugging", + ) + parser.add_argument( + "-w", + "--wandb_sweep", + nargs="?", + const=True, + type=str2bool, + default=False, + help="Process additional arguments supplied by wandb's sweep mechanism," + "i.e. replace dots ('.') with slashes ('/') in the argument name: " + "--par.at.level=3 => --par/at/level 3", + ) + + return parser diff --git a/edflow/config/commandline_kwargs.py b/edflow/config/commandline_kwargs.py index 6d7b3d4..7ebdb14 100644 --- a/edflow/config/commandline_kwargs.py +++ b/edflow/config/commandline_kwargs.py @@ -19,7 +19,17 @@ def replace(k): walk(config, replace, inplace=True) -def parse_unknown_args(unknown): +def parse_unknown_args(unknown, is_wandb_sweep=False): + if is_wandb_sweep: + unknown_ = unknown + unknown = [] + for u in unknown_: + if "=" in u: + key, val = u.split("=") + unknown += [key, val] + else: + unknown += [u] + kwargs = {} for i in range(len(unknown)): key = unknown[i] @@ -42,6 +52,9 @@ def parse_unknown_args(unknown): while key[0] == "-": key = key[1:] + if is_wandb_sweep: + key = key.replace(".", "/") + # Store key key pairs kwargs[key] = value diff --git a/edflow/edflow b/edflow/edflow index a763005..40c0214 100644 --- a/edflow/edflow +++ b/edflow/edflow @@ -18,6 +18,7 @@ from edflow.custom_logging import run, get_logger # noqa from edflow.hooks.checkpoint_hooks.common import get_latest_checkpoint # noqa from edflow.config import parse_unknown_args, update_config from edflow.util import retrieve +from edflow.args import get_parser def load_config(base_configs, additional_kwargs): @@ -124,40 +125,10 @@ def main(opt, additional_kwargs): if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-n", "--name", metavar="description", help="postfix of log directory." - ) - parser.add_argument( - "-b", - "--base", - nargs="*", - metavar="base_config.yaml", - help="paths to base configs. Loaded from left-to-right. " - "Parameters can be overwritten or added with command-line options of the form `--key value`.", - default=None, - ) - parser.add_argument( - "-t", "--train", action="store_true", help="run in training mode" - ) - parser.add_argument("-p", "--project", help="path to existing project") - parser.add_argument("-c", "--checkpoint", help="path to existing checkpoint") - parser.add_argument( - "-r", "--retrain", action="store_true", help="reset global step" - ) - parser.add_argument( - "-l", - "--log_level", - metavar="LEVEL", - type=str, - choices=["warn", "info", "debug", "critical"], - default="info", - help="set the logging level.", - ) - parser.add_argument("-d", "--debug", action="store_true", help="enable post-mortem debugging") + parser = get_parser() opt, unknown = parser.parse_known_args() - additional_kwargs = parse_unknown_args(unknown) + additional_kwargs = parse_unknown_args(unknown, opt.wandb_sweep) if not opt.debug: main(opt, additional_kwargs) diff --git a/edflow/edprep b/edflow/edprep new file mode 100644 index 0000000..4c1aa57 --- /dev/null +++ b/edflow/edprep @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +''' +This script is meant to be used together with wandb's great sweep +functionality. The workflow looks as follows: + + +1. Create a sweep file from your wandb web client. You can put in anything + under ``Settings/Training Script`` or leave it as it is. This value will be + changed to match you local edflow later on. Choose all parameters you want + to sweep over and nothgin else. Don't worry about the dots in the preview, + these will be fixed automatically later on. + +2. Download the sweep file to your prefered location. + +3. Navigate to the script location and run ``edprep [all + additional parameters]. Add all additional parameters as you would when + running ``edflow``. Usually those include ``-t -b + -n ``. You can also add any other + parameter as if working with edflow. All parameters are added to the sweep + as constants, which are not varied. + +4. Now follow the wandb manual: run ``wandb sweep `` + +5. copy the command from the output of step 4 and run it. It should look + something like this: ``wandb agent ``. +''' + +import os +import argparse # noqa +import yaml # noqa +import time +import subprocess + +from edflow.config import parse_unknown_args +from edflow.args import get_parser + + +def load_sweep(sweep_file_path): + '''Loads a yaml file in read only mode''' + with open(sweep_file_path, 'r') as sf: + content = yaml.safe_load(sf) + + return content + + +def prepare_content(content, opt, additional_kwargs): + '''Exchanges the program parameter of the sweep file with the local edflow + executable and adds all commandline parameters as constant parameters for + the sweep arguments. + + Parameters + ---------- + content : dict + content of the sweep file. Needs at least the key ``parameters``. + opt : Namespace + Parsed arguments from the commandline. + additional_kwargs : dict + Preprocessed addtitional commandline kwargs. + + Returns + ------- + content : dict + Updated version of the content. + ''' + edexec = subprocess.check_output("which edflow", shell=True).decode("utf-8") + edexec = edexec.replace('\n', '') + content['program'] = edexec + + for key, value in additional_kwargs.items(): + _add_const_parameter(content, key, value) + + for [key, value] in opt._get_kwargs(): + if value is not None: + _add_const_parameter(content, key, value) + + return content + +def _add_const_parameter(content, key, value): + if isinstance(value, list): + value = ' '.join(value) + par_dict = {'value': value, 'distribution': 'constant'} + content['parameters'][key] = par_dict + + +def store_sweep(content, sweep_file_path): + '''Stores the updated sweep file and makes a backup of the old one.''' + os.rename(sweep_file_path, f'.{sweep_file_path}.{time.time()}.backup') + with open(sweep_file_path, 'w') as sf: + sf.write(yaml.safe_dump(content, indent=2)) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser( + parents=[get_parser(add_help=False)], + description=__doc__ + '\nNote: the option ``--wand_sweep`` will ' + 'always be set to True!', + formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument('sweep', metavar='SWEEP.YAML', + help='Sweep File as created by wandb. See ' + 'https://docs.wandb.com/sweeps for more.') + + opt, unknown = parser.parse_known_args() + + if not opt.wandb_sweep: + opt.wandb_sweep = True + + additional_kwargs = parse_unknown_args(unknown, opt.wandb_sweep) + + content = load_sweep(opt.sweep) + content = prepare_content(content, opt, additional_kwargs) + store_sweep(content, opt.sweep) diff --git a/edflow/iterators/template_iterator.py b/edflow/iterators/template_iterator.py index eacb079..191c270 100644 --- a/edflow/iterators/template_iterator.py +++ b/edflow/iterators/template_iterator.py @@ -72,8 +72,8 @@ def __init__(self, *args, **kwargs): ) os.environ["WANDB_RESUME"] = "allow" - os.environ["WANDB_RUN_ID"] = ProjectManager.root.strip("/").replace( - "/", "-" + os.environ.setdefault( + "WANDB_RUN_ID", ProjectManager.root.strip("/").replace("/", "-") ) wandb_project = set_default( self.config, "integrations/wandb/project", None diff --git a/setup.py b/setup.py index f63d75f..1724f7e 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,7 @@ "edflow/edeval", "edflow/edsetup", "edflow/edexplore", + "edflow/edprep", ], python_requires=">=3.6", classifiers=[ diff --git a/tests/test_config.py b/tests/test_config.py index 9c2387d..6875545 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -50,7 +50,7 @@ def test_basic_parsing(): unknown = parse_unknown_args(unknown) - assert not "a" in unknown + assert "a" not in unknown ref = { "b": "b", "c": 12.5, @@ -110,3 +110,49 @@ def test_config_format(): print(config) ref = {"a": {"b": 1.0}, "x": 1.0} assert config == ref + + +def test_wandb_input(): + """ + This test confirms, that a mixture of wandb-type (depth by dot) and + edflow-type (depth by slash) arguments are converted to purely edflow-type + arguemnts, resulting in a correct config dict. + """ + import argparse + + A = argparse.ArgumentParser() + + A.add_argument("--a", default="a", type=str) + + passed = [ + "--a", + "c", + "--l", + "abc", + "--m", + "{'asd': 3.5}", + "--abc/def", + "1.0", + "--abc/def/ghi", + "2.0", + "--abc.jkl", + "3.0", + "--xyz.0", + "4.0", + ] + + print(passed) + args, unknown = A.parse_known_args(passed) + + unknown = parse_unknown_args(unknown, is_wandb_sweep=True) + + assert "a" not in unknown + ref = { + "l": "abc", + "m": {"asd": 3.5}, + "abc/def": 1.0, + "abc/def/ghi": 2.0, + "abc/jkl": 3.0, + "xyz/0": 4.0, + } + assert ref == unknown