diff --git a/docs/source/installation.rst b/docs/source/installation.rst index c65596a..bb856a7 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -73,7 +73,7 @@ via the ``environment_sources`` key. Once this has been done, your environment file can be be opened using ``{{ app_module }} open env-source``. - Below is an example environments file that defines an environment for running Pyton scripts. + Below is an example environments file that defines an environment for running Python scripts. Domain-specific tools can be added to the environments file as required, each with their own setup instructions for loading that tool on your machine. @@ -88,3 +88,32 @@ start: 1 stop: 32 parallel_mode: null + + Note also that any {{ app_name }} environment which activates a python virtual environment + as part of the `setup`, + must also have the {{ app_name }} python package installed, + and it must be the same version as is used to submit the workflow. + In practice, this is most easily achieved by creating one python virtual environment + and using it in each of these {{ app_name }} environments and to submit workflows. + + Tips for SLURM + ************** + + {{ app_name }} currently has a fault such that it doesn't select a SLURM partition + based on the resources requested in your workflow file. + As such, users must manually define this in their workflow files e.g. + + .. code-block:: yaml + + resources: + any: + scheduler_args: + directives: + --time: 00:30:00 + --partition: serial + + Note also that for many SLURM schedulers, a time limit must also be specified as shown above. + + A `default time limit and partition `_ + can be set in the config file, which will be used for tasks which don't have this set explicitly + in a ``resources`` block like the example above. \ No newline at end of file diff --git a/docs/source/user/tutorials/tutorial_resources/advanced_workflow.yaml b/docs/source/user/getting_started/advanced_workflow.yaml similarity index 76% rename from docs/source/user/tutorials/tutorial_resources/advanced_workflow.yaml rename to docs/source/user/getting_started/advanced_workflow.yaml index 5f26e32..6b2e453 100644 --- a/docs/source/user/tutorials/tutorial_resources/advanced_workflow.yaml +++ b/docs/source/user/getting_started/advanced_workflow.yaml @@ -1,6 +1,6 @@ template_components: task_schemas: - - objective: process_some_data + - objective: process_data inputs: - parameter: input_data outputs: @@ -10,13 +10,13 @@ template_components: - input_file: my_input_file from_inputs: - input_data - script: <> + script: <> environments: - scope: type: any environment: python_env script_exe: python_script - script: <> + script: <> save_files: - processed_file output_file_parsers: @@ -24,7 +24,7 @@ template_components: from_files: - my_input_file - processed_file - script: <> + script: <> save_files: - parsed_output @@ -33,16 +33,17 @@ template_components: - parameter: input_data - parameter: path actions: - - script: <> + - script: <> script_data_in: direct script_exe: python_script - save_files: + save_files: - my_input_file environments: - scope: type: any environment: python_env - - script: <> + requires_dir: true + - script: <> script_exe: python_script environments: - scope: @@ -50,6 +51,7 @@ template_components: environment: python_env save_files: - processed_file + requires_dir: true command_files: - label: my_input_file @@ -64,7 +66,7 @@ template_components: tasks: -- schema: process_some_data +- schema: process_data inputs: input_data: [1, 2, 3, 4] - schema: process_data_without_input_file_generator diff --git a/docs/source/user/getting_started/advanced_workflow_concepts.rst b/docs/source/user/getting_started/advanced_workflow_concepts.rst index 01feaee..c4be50a 100644 --- a/docs/source/user/getting_started/advanced_workflow_concepts.rst +++ b/docs/source/user/getting_started/advanced_workflow_concepts.rst @@ -12,13 +12,10 @@ Requesting resources can be done using a ``resources`` block, either for the who resources: any: - scheduler: sge # Setting the scheduler is not normally needed because a - # `default_scheduler` will be set in the config file. - shell_args: - executable_args: ["--login"] scheduler_args: directives: - -l: short + --time: 1:00:00 + --partition: multicore or at the task level @@ -67,22 +64,20 @@ resources, and will run the command which matches those resources. There are lots of :ref:`resource options ` available that can be requested. -Scheduler arguments can be passed like this e.g. to target high memory nodes: +Scheduler arguments can be passed like this e.g. to set a time limit of 1 hour .. code-block:: yaml resources: - any: - num_cores: 10 - SGE_parallel_env: smp.pe - scheduler_args: - directives: - -l: mem512 + any: + scheduler_args: + directives: + --time: 1:00:00 + num_cores: 10 Anything specified under `directives` is passed directly to the scheduler as a jobscript command (i.e. isn't processed by {{ app_name }} at all). If you have set resource options at the top level (for the whole workflow), but would like to "unset" them for a particular task, - you can pass an empty dictionary: .. code-block:: yaml @@ -93,7 +88,6 @@ you can pass an empty dictionary: num_cores: 16 scheduler_args: directives: {} # "Clear" any previous directives which have been set. - inputs: Task sequences @@ -141,41 +135,9 @@ Then whichever parameters are linked with the group in the task schema will be r Here is an example workflow using sequences and groups that you might wish to run to solidify your understanding -.. code-block:: yaml - # groups_workflow.yaml - - template_components: - task_schemas: - - objective: s1 - inputs: - - parameter: p1 - outputs: - - parameter: p2 - actions: - - commands: - - command: echo $(( <> + 1 )) # This is printed to stdout - - command: echo $(( <> + 1 )) # This is captured as p2 - stdout: <> - - objective: s2 - inputs: - - parameter: p2 - group: my_group - outputs: - - parameter: p3 - actions: - - commands: - - command: echo <> # This one is printed to stdout - - command: echo $(( <> )) # This is captured as p3 - stdout: <> - tasks: - - schema: s1 - sequences: - - path: inputs.p1 - values: [1, 2] - groups: - - name: my_group - - schema: s2 +.. literalinclude:: groups_workflow.yaml + :language: YAML Task schema shortcuts @@ -243,63 +205,15 @@ This is because an output file parser only has one named output parameter, so a dictionary isn't needed to distinguish different output parameters. The :ref:`previous example ` has been reworked and -expanded below to demonstrate ``input_file_generators`` and ``output_file_parsers``. +expanded below to demonstrate ``input_file_generators`` and ``output_file_parsers``, +along with the alternative code which would be needed to achieve the same result +as the input file generator: -.. code-block:: yaml +.. literalinclude:: advanced_workflow.yaml + :language: yaml - # workflow.yaml - template_components: - task_schemas: - - objective: process_some_data - inputs: - - parameter: input_data - outputs: - - parameter: parsed_output - actions: - - input_file_generators: - - input_file: my_input_file - from_inputs: - - input_data - script: <> - environments: - - scope: - type: any - environment: python_env - script_exe: python_script - script: <> - save_files: - - processed_file - output_file_parsers: - parsed_output: - from_files: - - my_input_file - - processed_file - script: <> - save_files: - - parsed_output - -This workflow uses the same python scripts as before, with the addition of - -.. code-block:: python - - # parse_output.py - - import json - def parse_output(my_input_file: str, processed_file: str): - """Do some post-processing of data files. - - In this instance, we're just making a dictionary containing both the input - and output data. - """ - with open(my_input_file, "r") as f: - input_data = json.load(f) - with open(processed_file, "r") as f: - processed_data = json.load(f) - - combined_data = {"input_data": input_data, "output_data": processed_data} - # Save file so we can look at the data - with open("parsed_output.json", "w") as f: - json.dump(combined_data, f, indent=2) - - return {"parsed_output": combined_data} +This workflow uses the same python scripts as before, with the addition of ``parse_output.py``: + +.. literalinclude:: parse_output.py + :language: python diff --git a/docs/source/user/getting_started/command_files_example.yaml b/docs/source/user/getting_started/command_files_example.yaml new file mode 100644 index 0000000..615fb07 --- /dev/null +++ b/docs/source/user/getting_started/command_files_example.yaml @@ -0,0 +1,41 @@ +# workflow.yaml +template_components: + task_schemas: + - objective: process_data + inputs: + - parameter: input_data + - parameter: path + default_value: input_file.json + actions: + - script: <> + script_data_in: direct + script_exe: python_script + save_files: # A copy of any command files listed here will be saved in the the artifacts directory + - my_input_file + environments: + - scope: + type: any + environment: python_env + - script: <> + script_exe: python_script + environments: + - scope: + type: any + environment: python_env + save_files: + - processed_file + + command_files: + - label: my_input_file + name: + name: input_file.json + - label: processed_file + name: + name: processed_file.json + + +tasks: +- schema: process_data + inputs: + input_data: [1, 2, 3, 4] + path: input_file.json diff --git a/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst b/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst index 5861851..f52ef7c 100644 --- a/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst +++ b/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst @@ -111,120 +111,23 @@ So if any parameters saved in json files (or passed directly) are needed as inpu {{ app_name }} can pass them directly or via json as specified in the task schema. An example is given of both combinations. -To run this example, create a ``workflow.yaml`` file with the contents below, +To run this example, create a ``read_save_workflow.yaml`` file with the contents below, along with the ``json_in_json_out.py``, ``json_in_direct_out.py``, and ``mixed_in_direct_out.py`` files. +.. literalinclude:: read_save_workflow.yaml + :language: yaml -.. code-block:: yaml - - # workflow.yaml - template_components: - task_schemas: - - objective: read_and_save_using_json - inputs: - - parameter: p1 - - parameter: p2 - actions: - - script: <> - script_data_in: json - script_data_out: json - script_exe: python_script - environments: - - scope: - type: any - environment: python_env - outputs: - - parameter: p3 - - objective: read_json_from_another_task - inputs: - - parameter: p3 - actions: - - script: <> - script_data_in: json - script_data_out: direct - script_exe: python_script - environments: - - scope: - type: any - environment: python_env - outputs: - - parameter: p4 - - objective: pass_mixed_from_another_task - inputs: - - parameter: p3 - - parameter: p4 - actions: - - script: <> - script_data_in: - p3: direct # previously saved as json in task read_and_save_using_json - p4: json # previously saved directly in task read_json_from_another_task - script_data_out: direct - script_exe: python_script - environments: - - scope: - type: any - environment: python_env - outputs: - - parameter: p5 - - tasks: - - schema: read_and_save_using_json - inputs: - p1: 1 - p2: 2 - - schema: read_json_from_another_task - - schema: pass_mixed_from_another_task - - -.. code-block:: python - - # json_in_json_out.py - import json - - def json_in_json_out(_input_files, _output_files): - with open(_input_files["json"]) as json_data: - inputs = json.load(json_data) - p1 = inputs["p1"] - p2 = inputs["p2"] - - p3 = p1 + p2 - with open(_output_files["json"], 'w') as f: - json.dump({"p3": p3}, f) - - -.. code-block:: python - # json_in_direct_out.py - import json +.. literalinclude:: json_in_json_out.py + :language: python - def json_in_direct_out(_input_files): - with open(_input_files["json"]) as json_data: - inputs = json.load(json_data) - p3 = inputs["p3"] - p4 = p3 + 1 - print(f"{p3=}") - print(f"{p4=}") +.. literalinclude:: json_in_direct_out.py + :language: python - return {"p4": p4} - -.. code-block:: python - - # mixed_in_json_out.py - import json - - def mixed_in_direct_out(p3, _input_files): - with open(_input_files["json"]) as json_data: - inputs = json.load(json_data) - p4 = inputs["p4"] - p5 = p3 + p4 - - print(f"{p3=}") - print(f"{p4=}") - print(f"{p5=}") - - return {"p5": p5} +.. literalinclude:: mixed_in_json_out.py + :language: python The particular variables names used to pass parameters using json/HDF5 depend on which language is being used. @@ -285,78 +188,22 @@ Example workflow .. _command_files_example_workflow: Here we have an example workflow which illustrates use of command files. -To run this example, create a ``workflow.yaml`` file with the contents below, +To run this example, create a ``command_files_example.yaml`` file with the contents below, along with the ``generate_input_file.py`` and ``process_input_file.py`` files. Modify the paths to the python scripts under the ``action`` keys to give the full path to your files. -You can then run the workflow using ``{{ app_package_name }} go workflow.yaml``. +You can then run the workflow using ``{{ app_package_name }} go command_files_example.yaml``. -.. code-block:: yaml - # workflow.yaml - template_components: - task_schemas: - - objective: process_data - inputs: - - parameter: input_data - - parameter: path - default_value: input_file.json - actions: - - script: <> - script_data_in: direct - script_exe: python_script - save_files: # A copy of any command files listed here will be saved in the the artifacts directory - - my_input_file - environments: - - scope: - type: any - environment: python_env - - script: <> - script_exe: python_script - environments: - - scope: - type: any - environment: python_env - save_files: - - processed_file - - command_files: - - label: my_input_file - name: - name: input_file.json - - label: processed_file - name: - name: processed_file.json +.. literalinclude:: command_files_example.yaml + :language: yaml - tasks: - - schema: process_data - inputs: - input_data: [1, 2, 3, 4] - path: input_file.json - -.. code-block:: python - - # generate_input_file.py - import json - def generate_input_file(path: str, input_data: list): - """Generate an input file""" - with open(path, "w") as f: - json.dump(input_data, f, indent=2) +.. literalinclude:: generate_input_file.py + :language: python -.. code-block:: python - # process_input_file.py - import json - def process_input_file(): - """Process an input file. - - This could be a materials science simulation for example. - """ - with open("input_file.json", "r") as f: - data = json.load(f) - data = [item * 2 for item in data] - with open("processed_file.json", "w") as f: - json.dump(data, f, indent=2) +.. literalinclude:: process_input_file.py + :language: python diff --git a/docs/source/user/tutorials/tutorial_resources/generate_input_file.py b/docs/source/user/getting_started/generate_input_file.py similarity index 87% rename from docs/source/user/tutorials/tutorial_resources/generate_input_file.py rename to docs/source/user/getting_started/generate_input_file.py index 78ab6f5..1fba4d6 100644 --- a/docs/source/user/tutorials/tutorial_resources/generate_input_file.py +++ b/docs/source/user/getting_started/generate_input_file.py @@ -1,6 +1,6 @@ +# generate_input_file.py import json - def generate_input_file(path: str, input_data: list): """Generate an input file""" with open(path, "w") as f: diff --git a/docs/source/user/tutorials/tutorial_resources/groups.yaml b/docs/source/user/getting_started/groups_workflow.yaml similarity index 94% rename from docs/source/user/tutorials/tutorial_resources/groups.yaml rename to docs/source/user/getting_started/groups_workflow.yaml index d924d1e..1b56231 100644 --- a/docs/source/user/tutorials/tutorial_resources/groups.yaml +++ b/docs/source/user/getting_started/groups_workflow.yaml @@ -20,7 +20,7 @@ template_components: - commands: - command: echo <> # This one is printed to stdout - command: echo $(( <> )) # This is captured as p3 - stdout: <> + stdout: <> tasks: - schema: s1 sequences: diff --git a/docs/source/user/getting_started/json_in_direct_out.py b/docs/source/user/getting_started/json_in_direct_out.py new file mode 100644 index 0000000..9082e2a --- /dev/null +++ b/docs/source/user/getting_started/json_in_direct_out.py @@ -0,0 +1,13 @@ +# json_in_direct_out.py +import json + +def json_in_direct_out(_input_files): + with open(_input_files["json"]) as json_data: + inputs = json.load(json_data) + p3 = inputs["p3"] + p4 = p3 + 1 + + print(f"{p3=}") + print(f"{p4=}") + + return {"p4": p4} diff --git a/docs/source/user/getting_started/json_in_json_out.py b/docs/source/user/getting_started/json_in_json_out.py new file mode 100644 index 0000000..6b0fc97 --- /dev/null +++ b/docs/source/user/getting_started/json_in_json_out.py @@ -0,0 +1,12 @@ +# json_in_json_out.py +import json + +def json_in_json_out(_input_files, _output_files): + with open(_input_files["json"]) as json_data: + inputs = json.load(json_data) + p1 = inputs["p1"] + p2 = inputs["p2"] + + p3 = p1 + p2 + with open(_output_files["json"], 'w') as f: + json.dump({"p3": p3}, f) diff --git a/docs/source/user/getting_started/mixed_in_json_out.py b/docs/source/user/getting_started/mixed_in_json_out.py new file mode 100644 index 0000000..fae4d71 --- /dev/null +++ b/docs/source/user/getting_started/mixed_in_json_out.py @@ -0,0 +1,14 @@ +# mixed_in_json_out.py +import json + +def mixed_in_direct_out(p3, _input_files): + with open(_input_files["json"]) as json_data: + inputs = json.load(json_data) + p4 = inputs["p4"] + p5 = p3 + p4 + + print(f"{p3=}") + print(f"{p4=}") + print(f"{p5=}") + + return {"p5": p5} diff --git a/docs/source/user/tutorials/tutorial_resources/parse_output.py b/docs/source/user/getting_started/parse_output.py similarity index 99% rename from docs/source/user/tutorials/tutorial_resources/parse_output.py rename to docs/source/user/getting_started/parse_output.py index ab54151..2e1d2a1 100644 --- a/docs/source/user/tutorials/tutorial_resources/parse_output.py +++ b/docs/source/user/getting_started/parse_output.py @@ -1,6 +1,5 @@ import json - def parse_output(my_input_file: str, processed_file: str): """Do some post-processing of data files. diff --git a/docs/source/user/tutorials/tutorial_resources/process_input_file.py b/docs/source/user/getting_started/process_input_file.py similarity index 93% rename from docs/source/user/tutorials/tutorial_resources/process_input_file.py rename to docs/source/user/getting_started/process_input_file.py index e6ecd85..6526931 100644 --- a/docs/source/user/tutorials/tutorial_resources/process_input_file.py +++ b/docs/source/user/getting_started/process_input_file.py @@ -1,6 +1,6 @@ +# process_input_file.py import json - def process_input_file(): """Process an input file. diff --git a/docs/source/user/getting_started/read_save_workflow.yaml b/docs/source/user/getting_started/read_save_workflow.yaml new file mode 100644 index 0000000..ba8dd49 --- /dev/null +++ b/docs/source/user/getting_started/read_save_workflow.yaml @@ -0,0 +1,57 @@ +# read_save_workflow.yaml +template_components: + task_schemas: + - objective: read_and_save_using_json + inputs: + - parameter: p1 + - parameter: p2 + actions: + - script: <> + script_data_in: json + script_data_out: json + script_exe: python_script + environments: + - scope: + type: any + environment: python_env + outputs: + - parameter: p3 + - objective: read_json_from_another_task + inputs: + - parameter: p3 + actions: + - script: <> + script_data_in: json + script_data_out: direct + script_exe: python_script + environments: + - scope: + type: any + environment: python_env + outputs: + - parameter: p4 + - objective: pass_mixed_from_another_task + inputs: + - parameter: p3 + - parameter: p4 + actions: + - script: <> + script_data_in: + p3: direct # previously saved as json in task read_and_save_using_json + p4: json # previously saved directly in task read_json_from_another_task + script_data_out: direct + script_exe: python_script + environments: + - scope: + type: any + environment: python_env + outputs: + - parameter: p5 + +tasks: +- schema: read_and_save_using_json + inputs: + p1: 1 + p2: 2 +- schema: read_json_from_another_task +- schema: pass_mixed_from_another_task diff --git a/docs/source/user/getting_started/running_workflows.rst b/docs/source/user/getting_started/running_workflows.rst index 22fce0d..81fa8b3 100644 --- a/docs/source/user/getting_started/running_workflows.rst +++ b/docs/source/user/getting_started/running_workflows.rst @@ -2,7 +2,7 @@ Demo workflows ---------------- -A good way to get started with {{ app_name }} is to run a built-in demo workflows. +A good way to get started with {{ app_name }} is to run one of the built-in demo workflows. This will also test your installation, configuration, and some of your environments. Submit a workflow diff --git a/docs/source/user/tutorials/tutorial_resources/greet.py b/docs/source/user/tutorials/tutorial_resources/greet.py deleted file mode 100644 index 6f3fa1f..0000000 --- a/docs/source/user/tutorials/tutorial_resources/greet.py +++ /dev/null @@ -1,3 +0,0 @@ -def greet(greeting: str, name: str): - """Return a greeting""" - return {"string_to_print": f"{greeting}, {name}!"} diff --git a/docs/source/user/tutorials/tutorial_resources/hello.yaml b/docs/source/user/tutorials/tutorial_resources/hello.yaml deleted file mode 100644 index 36e5a88..0000000 --- a/docs/source/user/tutorials/tutorial_resources/hello.yaml +++ /dev/null @@ -1,89 +0,0 @@ -template_components: - task_schemas: - - objective: greet - inputs: - - parameter: name - default_value: World - - parameter: greeting - default_value: Hello - actions: - - commands: - - command: echo "<>, <>!" > printed_string.txt - - - objective: python_greet - inputs: - - parameter: name - default_value: World - - parameter: greeting - default_value: Hello - outputs: - - parameter: string_to_print - actions: - - script: <> - script_data_in: direct - script_data_out: direct - script_exe: python_script - environments: - - scope: - type: any - environment: python_env - - - objective: print - inputs: - - parameter: string_to_print - actions: - - commands: - - command: echo "<>" > printed_string.txt - - # This schema uses the environment `temp_python_env` - # which loads a python venv. - # This is shown in `envs.yaml` in this repo. - - objective: which_python - actions: - - commands: - - command: which python - environments: - - scope: - type: any - environment: temp_python_env - -# Workflow -tasks: -- schema: greet -- schema: greet - inputs: - greeting: What's up - name: doc -- schema: python_greet - inputs: - greeting: Howdy - name: partner -- schema: print -- schema: print - inputs: - string_to_print: another string to print! -- schema: print - # Explicitly reference output parameter from a task - input_sources: - string_to_print: task.python_greet -- schema: print - input_sources: - # Note that local variable will appear first, regardless of its position in the list - string_to_print: [task.python_greet, local] - inputs: - string_to_print: Yet another string to print! -- schema: which_python -- schema: greet - sequences: - - path: inputs.greeting - values: - - hey - - see ya later - - in a while - nesting_order: 0 - - path: inputs.name - values: - - you - - alligator - - crocodile - nesting_order: 1