From 5782eb5aaa1455a6c6a264d6469d40276d9c2f6a Mon Sep 17 00:00:00 2001 From: basile Date: Tue, 14 Mar 2023 14:46:54 -0400 Subject: [PATCH 1/6] add assume ready for image (and extra-inputs) --- datalad_container/containers_run.py | 65 +++++++++++++++++++---------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index c3db792..6feffa4 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -26,6 +26,7 @@ from datalad.utils import ensure_iter from datalad_container.find_container import find_container_ +from datalad.support.constraints import EnsureChoice lgr = logging.getLogger("datalad.containers.containers_run") @@ -41,9 +42,19 @@ metavar="NAME", doc="""Specify the name of or a path to a known container to use for execution, in case multiple containers are configured."""), + assume_ready=Parameter( + args=("--assume-ready",), + nargs="*", + constraints=EnsureChoice(None, "image", "inputs", "outputs", "extra-inputs"), + doc="""Assume that inputs do not need to be retrieved and/or outputs do not + need to unlocked or removed, or containers/extra-inputs to be retrieved, + before running the command. This option allows + you to avoid the expense of these preparation steps if you know that they + are unnecessary."""), ) + @build_doc # all commands must be derived from Interface class ContainersRun(Interface): @@ -79,7 +90,7 @@ class ContainersRun(Interface): @eval_results def __call__(cmd, container_name=None, dataset=None, inputs=None, outputs=None, message=None, expand=None, - explicit=False, sidecar=None): + explicit=False, sidecar=None, assume_ready=None): from unittest.mock import \ patch # delayed, since takes long (~600ms for yoh) pwd, _ = get_command_pwds(dataset) @@ -154,28 +165,39 @@ def __call__(cmd, container_name=None, dataset=None, # just prepend and pray cmd = container['path'] + ' ' + cmd + assume_ready = assume_ready or [] extra_inputs = [] - for extra_input in ensure_iter(container.get("extra-input",[]), set): - try: - xi_kwargs = dict( - img_dspath=image_dspath, - img_dirpath=op.dirname(image_path) or ".", - ) - extra_inputs.append(extra_input.format(**xi_kwargs)) - except KeyError as exc: - yield get_status_dict( - 'run', - ds=ds, - status='error', - message=( - 'Unrecognized extra_input placeholder: %s. ' - 'See containers-add for information on known ones: %s', - exc, - ", ".join(xi_kwargs))) - return + if not "extra-inputs" in assume_ready: + for extra_input in ensure_iter(container.get("extra-input",[]), set): + try: + xi_kwargs = dict( + img_dspath=image_dspath, + img_dirpath=op.dirname(image_path) or ".", + ) + extra_inputs.append(extra_input.format(**xi_kwargs)) + except KeyError as exc: + yield get_status_dict( + 'run', + ds=ds, + status='error', + message=( + 'Unrecognized extra_input placeholder: %s. ' + 'See containers-add for information on known ones: %s', + exc, + ", ".join(xi_kwargs))) + return + assume_ready = [ar for ar in assume_ready if ar != 'extra-inputs'] + + if not "image" in assume_ready: + extra_inputs.append(image_path) + assume_ready = [ar for ar in assume_ready if ar != 'image'] lgr.debug("extra_inputs = %r", extra_inputs) + # we can assume "both" with params choices and filtering above + if len(assume_ready) > 1: + assume_ready='both' + with patch.dict('os.environ', {CONTAINER_NAME_ENVVAR: container['name']}): # fire! @@ -183,10 +205,11 @@ def __call__(cmd, container_name=None, dataset=None, cmd=cmd, dataset=dataset or (ds if ds.path == pwd else None), inputs=inputs, - extra_inputs=[image_path] + extra_inputs, + extra_inputs=extra_inputs, outputs=outputs, message=message, expand=expand, explicit=explicit, - sidecar=sidecar): + sidecar=sidecar, + assume_ready=assume_ready): yield r From 1aea8abb418222c404aa682b57d3e9726a3365cf Mon Sep 17 00:00:00 2001 From: basile Date: Wed, 15 Mar 2023 11:49:35 -0400 Subject: [PATCH 2/6] explicit check, convert list to string/None in all cases --- datalad_container/containers_run.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 6feffa4..5efd0f0 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -194,9 +194,12 @@ def __call__(cmd, container_name=None, dataset=None, lgr.debug("extra_inputs = %r", extra_inputs) - # we can assume "both" with params choices and filtering above - if len(assume_ready) > 1: - assume_ready='both' + if len(assume_ready) == 0: + assume_ready = None + elif len(assume_ready) == 1: + assume_ready = assume_ready[0] + elif "inputs" in assume_ready and "outputs" in assume_ready: + assume_ready = "both" with patch.dict('os.environ', {CONTAINER_NAME_ENVVAR: container['name']}): From 804cc0a23d7f525bbeb36f09d483866795e911a8 Mon Sep 17 00:00:00 2001 From: Basile Date: Tue, 21 Mar 2023 14:14:53 -0400 Subject: [PATCH 3/6] Update datalad_container/containers_run.py Co-authored-by: Yaroslav Halchenko --- datalad_container/containers_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 5efd0f0..0222183 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -167,7 +167,7 @@ def __call__(cmd, container_name=None, dataset=None, assume_ready = assume_ready or [] extra_inputs = [] - if not "extra-inputs" in assume_ready: + if "extra-inputs" not in assume_ready: for extra_input in ensure_iter(container.get("extra-input",[]), set): try: xi_kwargs = dict( From 28e6eb71c4d2ba598fbff53340a6545dfde69569 Mon Sep 17 00:00:00 2001 From: basile Date: Tue, 21 Mar 2023 14:18:40 -0400 Subject: [PATCH 4/6] fixes following YH review --- datalad_container/containers_run.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 0222183..4d139d9 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -186,11 +186,14 @@ def __call__(cmd, container_name=None, dataset=None, exc, ", ".join(xi_kwargs))) return - assume_ready = [ar for ar in assume_ready if ar != 'extra-inputs'] + else: + # filter the whole list as .remove only removes first instance + assume_ready = [ar for ar in assume_ready if ar != 'extra-inputs'] - if not "image" in assume_ready: + if "image" not in assume_ready: extra_inputs.append(image_path) - assume_ready = [ar for ar in assume_ready if ar != 'image'] + else: + assume_ready = [ar for ar in assume_ready if ar != 'image'] lgr.debug("extra_inputs = %r", extra_inputs) From 04aab52b4bafbb50dc29f9f212a580e963a89d0c Mon Sep 17 00:00:00 2001 From: Basile Date: Tue, 21 Mar 2023 14:18:09 -0400 Subject: [PATCH 5/6] Update datalad_container/containers_run.py Co-authored-by: Yaroslav Halchenko --- datalad_container/containers_run.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 4d139d9..fb6887e 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -197,12 +197,14 @@ def __call__(cmd, container_name=None, dataset=None, lgr.debug("extra_inputs = %r", extra_inputs) - if len(assume_ready) == 0: + if not assume_ready: assume_ready = None elif len(assume_ready) == 1: assume_ready = assume_ready[0] elif "inputs" in assume_ready and "outputs" in assume_ready: assume_ready = "both" + else: + raise ValueError(f"Ended up with assume_ready={assume_ready!r} which must have not happened") with patch.dict('os.environ', {CONTAINER_NAME_ENVVAR: container['name']}): From 037de79eeef1b7e084bdb9f1f2012c1bdcc5a0d7 Mon Sep 17 00:00:00 2001 From: bpinsard Date: Tue, 19 Mar 2024 10:58:34 -0400 Subject: [PATCH 6/6] add tests for assume_ready --- datalad_container/tests/test_run.py | 73 +++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py index c3aa6e0..879e91e 100644 --- a/datalad_container/tests/test_run.py +++ b/datalad_container/tests/test_run.py @@ -276,6 +276,79 @@ def test_extra_inputs(path=None): ) == set(runinfo.get("extra_inputs", set())) +@with_tree( + tree={ + "container.img": "image file", + "input.txt": "input data", + "overlay1.img": "overlay1", + } +) +def test_assume_ready(path=None): + ds = Dataset(path).create(force=True, **common_kwargs) + ds.containers_add( + "mycontainer", + image="container.img", + call_fmt="echo image={img} cmd={cmd} img_dspath={img_dspath} img_dirpath={img_dirpath} > out.log", + extra_input=["overlay1.img"], + **common_kwargs + ) + ds.save(**common_kwargs) + # assume image is ready + ds.containers_run( + "XXX", + container_name="mycontainer", + assume_ready=['image'], + **common_kwargs) + ok_file_has_content( + os.path.join(ds.repo.path, "out.log"), + "image=container.img", + re_=True, + ) + commit_msg = ds.repo.call_git(["show", "--format=%B"]) + cmd, runinfo = get_run_info(ds, commit_msg) + assert "container.img" not in runinfo.get("extra_inputs", []) + + # fails if erroneous assume_ready value + with pytest.raises(ValueError): + ds.containers_run( + "XXX", + inputs=['input.txt'], + container_name="mycontainer", + assume_ready=['inputsssstypo', 'outputs'], + **common_kwargs) + + # fail when output is assume ready but is not unlocked + with pytest.raises(IncompleteResultsError): + ds.containers_run( + "XXX", + inputs=['input.txt'], + outputs=['out.log'], + container_name="mycontainer", + assume_ready=['inputs', 'outputs'], + **common_kwargs) + + # assume inputs as ready, pass to regular `run` + ds.containers_run( + "YYY", + inputs=['input.txt'], + outputs=['out.log'], + container_name="mycontainer", + assume_ready=['inputs'], + **common_kwargs) + commit_msg = ds.repo.call_git(["show", "--format=%B"]) + cmd, runinfo = get_run_info(ds, commit_msg) + + ds.containers_run( + "ZZZ", + container_name="mycontainer", + outputs=['out.log'], + assume_ready=['extra-inputs'], + **common_kwargs) + commit_msg = ds.repo.call_git(["show", "--format=%B"]) + cmd, runinfo = get_run_info(ds, commit_msg) + assert 'overlay1.img' not in runinfo.get("extra_inputs", []) + + @skip_if_no_network @with_tree(tree={"subdir": {"in": "innards"}}) def test_run_no_explicit_dataset(path=None):