diff --git a/src/queens/drivers/jobscript.py b/src/queens/drivers/jobscript.py index 7989fef9..a7637af7 100644 --- a/src/queens/drivers/jobscript.py +++ b/src/queens/drivers/jobscript.py @@ -106,17 +106,18 @@ def __init__( Args: parameters (Parameters): Parameters object. - input_templates (str, Path, dict): Path(s) to simulation input template. - jobscript_template (str, Path): Path to jobscript template or read-in jobscript - template. - executable (str, Path): Path to main executable of respective software. + input_templates (str, Path, dict): (Local) path(s) to simulation input template. + jobscript_template (str, Path): (Local) path to jobscript template or read-in jobscript + template. + executable (str, Path): Path to main executable of respective software. Is a remote + path when using the Cluster scheduler. files_to_copy (list, opt): Files or directories to copy to experiment_dir. data_processor (obj, opt): Instance of data processor class. gradient_data_processor (obj, opt): Instance of data processor class for gradient data. jobscript_file_name (str, opt): Jobscript file name (default: 'jobscript.sh'). extra_options (dict, opt): Extra options to inject into jobscript template. raise_error_on_jobscript_failure (bool, opt): Whether to raise an error for a non-zero - jobscript exit code. + jobscript exit code. """ super().__init__(parameters=parameters, files_to_copy=files_to_copy) self.input_templates = self.create_input_templates_dict(input_templates) diff --git a/src/queens/utils/config_directories.py b/src/queens/utils/config_directories.py index a12ae2dc..1497a733 100644 --- a/src/queens/utils/config_directories.py +++ b/src/queens/utils/config_directories.py @@ -67,7 +67,13 @@ def experiment_directory( if experiment_base_directory is None: experiment_base_directory = base_directory() else: - experiment_base_directory = Path(experiment_base_directory) + # Replace ~ with home directory if necessary + experiment_base_directory_str = str(experiment_base_directory) + if experiment_base_directory_str.startswith("~"): + experiment_base_directory_str = experiment_base_directory_str.replace( + "~", str(Path().home()), 1 + ) + experiment_base_directory = Path(experiment_base_directory_str) experiment_dir = experiment_base_directory / experiment_name return experiment_dir, experiment_dir.exists() diff --git a/tests/integration_tests/cluster/test_dask_cluster.py b/tests/integration_tests/cluster/test_cluster.py similarity index 64% rename from tests/integration_tests/cluster/test_dask_cluster.py rename to tests/integration_tests/cluster/test_cluster.py index 1c1aaa7d..5b3a64d8 100644 --- a/tests/integration_tests/cluster/test_dask_cluster.py +++ b/tests/integration_tests/cluster/test_cluster.py @@ -14,7 +14,6 @@ # """Test remote 4C simulations with ensight data-processor.""" -import getpass import json import logging import os @@ -24,8 +23,8 @@ import numpy as np import pytest +from testbook import testbook -import queens.schedulers.cluster as cluster_scheduler # pylint: disable=consider-using-from-import from queens.data_processors.pvd_file import PvdFile from queens.distributions.uniform import Uniform from queens.drivers import Jobscript @@ -34,6 +33,7 @@ from queens.models.simulation import Simulation from queens.parameters.parameters import Parameters from queens.schedulers.cluster import Cluster +from queens.utils.config_directories import experiment_directory from queens.utils.io import load_result from queens.utils.path import relative_path_from_root from queens.utils.remote_operations import RemoteConnection @@ -46,6 +46,8 @@ BRUTEFORCE_CLUSTER_TYPE = "bruteforce" CHARON_CLUSTER_TYPE = "charon" +PYTEST_BASE_DIR_CLUSTER = "~/queens-tests" + @pytest.mark.parametrize( "cluster", @@ -56,101 +58,14 @@ ], indirect=True, ) -class TestDaskCluster: +class TestCluster: """Test class collecting all test with Dask jobqueue clusters and 4C. - NOTE: we use a class here since our fixture are set to autouse, but we only want to call them - for these tests. + NOTE: we use a class here to parametrize each tests with the different clusters. """ - def pytest_base_directory_on_cluster(self): - """Remote directory containing several pytest runs.""" - return "$HOME/queens-tests" - - @pytest.fixture(name="queens_base_directory_on_cluster") - def fixture_queens_base_directory_on_cluster(self, pytest_id): - """Remote directory containing all experiments of a single pytest run. - - This directory is conceptually equivalent to the usual base - directory for non-pytest runs, i.e., production experiments. The - goal is to separate the testing data from production data of the - user. - """ - return self.pytest_base_directory_on_cluster() + f"/{pytest_id}" - - @pytest.fixture(name="mock_experiment_dir", autouse=True) - def fixture_mock_experiment_dir( - self, monkeypatch, cluster_settings, queens_base_directory_on_cluster - ): - """Mock the experiment directory of a test on the cluster. - - NOTE: It is necessary to mock the whole experiment_directory method. - Otherwise, the mock is not loaded properly remote. - This is in contrast to the local mocking where it suffices to mock - config_directories.BASE_DATA_DIR. - Note that we also rely on this local mock here! - """ - - def patch_experiments_directory(experiment_name, experiment_base_directory=None): - """Base directory for all experiments on the computing machine.""" - if experiment_base_directory is None: - experiment_base_directory = Path( - queens_base_directory_on_cluster.replace("$HOME", str(Path.home())) - ) - else: - raise ValueError( - "This mock function does not support specifying 'experiment_base_directory'. " - "It must be called with 'experiment_base_directory=None'." - ) - experiments_dir = experiment_base_directory / experiment_name - return experiments_dir, experiments_dir.exists() - - monkeypatch.setattr(cluster_scheduler, "experiment_directory", patch_experiments_directory) - _logger.debug("Mocking of dask experiment_directory was successful.") - _logger.debug( - "dask experiment_directory is mocked to '%s/' on %s@%s", - queens_base_directory_on_cluster, - cluster_settings["user"], - cluster_settings["host"], - ) - - return patch_experiments_directory - - @pytest.fixture(name="experiment_dir") - def fixture_experiment_dir(self, test_name, remote_connection, mock_experiment_dir): - """Fixture providing the remote experiment directory.""" - experiment_dir, _ = remote_connection.run_function(mock_experiment_dir, test_name, None) - return experiment_dir - - @pytest.fixture(name="_create_experiment_dir") - def fixture_create_experiment_dir(self, remote_connection, experiment_dir): - """Fixture providing the remote experiment directory.""" - - def create_experiment_dir_and_assert_it_exists(): - """Create experiment directory on remote and assert it exists.""" - os.mkdir(experiment_dir) - return experiment_dir.exists() - - assert remote_connection.run_function(create_experiment_dir_and_assert_it_exists) - - @pytest.fixture(name="cluster_kwargs") - def fixture_cluster_kwargs(self, cluster_settings, remote_connection, test_name): - """Keyword arguments to initialize the cluster scheduler.""" - return { - "workload_manager": cluster_settings["workload_manager"], - "walltime": "00:10:00", - "num_jobs": 1, - "min_jobs": 1, - "num_procs": 1, - "num_nodes": 1, - "remote_connection": remote_connection, - "cluster_internal_address": cluster_settings["cluster_internal_address"], - "experiment_name": test_name, - "queue": cluster_settings.get("queue"), - "job_script_prologue": cluster_settings.get("job_script_prologue"), - } - - def test_new_experiment_dir(self, cluster_kwargs, remote_connection, experiment_dir): + @staticmethod + def test_new_experiment_dir(cluster_kwargs, remote_connection, experiment_dir): """Test cluster init when experiment dir does not exist.""" experiment_dir_exists = remote_connection.run_function(experiment_dir.exists) assert not experiment_dir_exists @@ -160,12 +75,14 @@ def test_new_experiment_dir(self, cluster_kwargs, remote_connection, experiment_ experiment_dir_exists = remote_connection.run_function(experiment_dir.exists) assert experiment_dir_exists - def test_overwriting_existing_experiment_dir(self, cluster_kwargs, _create_experiment_dir): + @staticmethod + def test_overwriting_existing_experiment_dir(cluster_kwargs, _create_experiment_dir): """Test cluster init when overwriting experiment dir via flag.""" Cluster(**cluster_kwargs, overwrite_existing_experiment=True) + @staticmethod def test_no_prompt_input_for_existing_experiment_dir( - self, cluster_kwargs, mocker, _create_experiment_dir + cluster_kwargs, mocker, _create_experiment_dir ): """Test cluster init when not overwriting experiment dir via flag. @@ -178,8 +95,9 @@ def test_no_prompt_input_for_existing_experiment_dir( Cluster(**cluster_kwargs, overwrite_existing_experiment=False) assert exit_info.value.code == 1 + @staticmethod def test_empty_prompt_input_for_existing_experiment_dir( - self, cluster_kwargs, mocker, _create_experiment_dir + cluster_kwargs, mocker, _create_experiment_dir ): """Test cluster init when not overwriting experiment dir via flag. @@ -193,9 +111,10 @@ def test_empty_prompt_input_for_existing_experiment_dir( Cluster(**cluster_kwargs, overwrite_existing_experiment=False) assert exit_info.value.code == 1 + @staticmethod @pytest.mark.parametrize("user_input", ["y", "yes"]) def test_y_prompt_input_for_existing_experiment_dir( - self, cluster_kwargs, mocker, user_input, _create_experiment_dir + cluster_kwargs, mocker, user_input, _create_experiment_dir ): """Test cluster init when not overwriting experiment dir via flag. @@ -245,32 +164,30 @@ def experiment_dir_exists_and_contents(experiment_dir): ): assert file_before == file_after + @staticmethod def test_fourc_mc_cluster( - self, third_party_inputs, - cluster_settings, cluster_kwargs, remote_connection, - fourc_cluster_path, + basic_jobscript_kwargs, fourc_example_expected_output, global_settings, tmp_path, ): """Test remote 4C simulations with DASK jobqueue and MC iterator. - Test for remote 4C simulations on a remote cluster in combination - with + Test for remote 4C simulations on a remote cluster in combination with - DASK jobqueue cluster - Monte-Carlo (MC) iterator - - 4C ensight data-processor. + - PVD data processor. Args: third_party_inputs (Path): Path to the 4C input files - cluster_settings (dict): Cluster settings cluster_kwargs (dict): Keyword arguments to initialize the cluster scheduler remote_connection (RemoteConnection): Remote connection object - fourc_cluster_path (Path): paths to 4C executable on the cluster + basic_jobscript_kwargs (dict): Basic keyword arguments to initialize the jobscript + driver that are constant for all cluster tests fourc_example_expected_output (np.ndarray): Expected output for the MC samples global_settings (GlobalSettings): object containing experiment name and tmp_path tmp_path (Path): Temporary path for storing remote data locally @@ -293,10 +210,8 @@ def test_fourc_mc_cluster( driver = Jobscript( parameters=parameters, input_templates=fourc_input_file_template, - jobscript_template=cluster_settings["jobscript_template"], - executable=fourc_cluster_path, data_processor=data_processor, - extra_options={"cluster_script": cluster_settings["cluster_script_path"]}, + **basic_jobscript_kwargs, ) model = Simulation(scheduler=scheduler, driver=driver) iterator = MonteCarlo( @@ -320,7 +235,7 @@ def test_fourc_mc_cluster( scheduler.copy_files_from_experiment_dir(local_data_path) # The remote data has to be deleted before the assertion - self.delete_simulation_data(remote_connection) + delete_old_simulation_data(remote_connection) # assert statements np.testing.assert_array_almost_equal( @@ -342,21 +257,65 @@ def test_fourc_mc_cluster( # The extracted local data should match the expected output np.testing.assert_array_almost_equal(local_data, fourc_example_expected_output, decimal=6) - def delete_simulation_data(self, remote_connection): - """Delete simulation data on the cluster. + @staticmethod + @testbook( + "tutorials/5_grid_iterator_4c_remote/5_grid_iterator_4c_remote.ipynb", + ) + def test_4c_remote_tutorial( + tb, + tmp_path, + test_name, + basic_jobscript_kwargs, + remote_connection_kwargs, + minimal_cluster_kwargs, + ): + """Test for tutorial 3: Remote 4C simulation with grid iterator. - This approach deletes test simulation data older than seven days - Args: - remote_connection (RemoteConnection): connection to remote cluster. + The notebook is run with injected lines of code to replace placeholders. + It is checked that the replaced dict entries already exist in the notebook. """ - # Delete data from tests older then 1 week - command = ( - "find " - + str(self.pytest_base_directory_on_cluster()) - + " -mindepth 1 -maxdepth 1 -mtime +7 -type d -exec rm -rv {} \\;" + kwargs_dicts = [basic_jobscript_kwargs, remote_connection_kwargs, minimal_cluster_kwargs] + dict_names = [ + "jobscript_driver_kwargs", + "remote_connection_kwargs", + "cluster_scheduler_kwargs", + ] + + injected_cell = """from pathlib import PosixPath""" + + for kwargs_dict, dict_name in zip(kwargs_dicts, dict_names): + dict_name_injected = f"{dict_name}_injected" + injected_cell += f""" +{dict_name_injected} = {kwargs_dict} +if not {dict_name}.keys() == {dict_name_injected}.keys(): + raise KeyError( + f"The keys of the injected dictionary are not the same as the keys of the " + f"placeholder dictionary in the notebook.\\n" + f"Injected keys: {{{dict_name_injected}.keys()}}\\n" + f"Placeholder keys: {{{dict_name}.keys()}}" + ) +{dict_name} = {dict_name_injected} + """ + + # replace placeholder dicts + tb.inject(injected_cell, after=6, run=False) + # replace experiment name and output dir + tb.inject( + f"experiment_name = {test_name!r}\noutput_dir = {tmp_path!r}", + after=8, + run=False, ) - result = remote_connection.run(command, in_stream=False) - _logger.debug("Deleting old simulation data:\n%s", result.stdout) + # assert expected output + tb.inject( + "np.testing.assert_allclose(max_displacement_magnitude_per_run, " + "[0.17606783, 0.22969808, 0.27944426, 0.22969808, 0.2782447, 0.32395894, 0.27944426, " + "0.32395894, 0.36635981])", + after=14, + run=False, + ) + + # run the notebook + tb.execute() @dataclass(frozen=True) @@ -399,7 +358,6 @@ class ClusterConfig: cluster_script_path=Path("/lnm/share/donottouch.sh"), ) - BRUTEFORCE_CONFIG = ClusterConfig( name="bruteforce", host="bruteforce.lnm.ed.tum.de", @@ -409,6 +367,7 @@ class ClusterConfig: default_python_path="$HOME/anaconda/miniconda/envs/queens/bin/python", cluster_script_path=Path("/lnm/share/donottouch.sh"), ) + CHARON_CONFIG = ClusterConfig( name="charon", host="charon.bauv.unibw-muenchen.de", @@ -427,11 +386,7 @@ class ClusterConfig: } -# CLUSTER TESTS ------------------------------------------------------------------------------------ -@pytest.fixture(name="user", scope="session") -def fixture_user(): - """Name of user calling the test suite.""" - return getpass.getuser() +# CLUSTER TESTS FIXTURES --------------------------------------------------------------------------- @pytest.fixture(name="remote_user", scope="session") @@ -449,6 +404,19 @@ def fixture_gateway(pytestconfig): return gateway +@pytest.fixture(name="remote_python", scope="session") +def fixture_remote_python(pytestconfig): + """Path to the Python environment on remote host.""" + return pytestconfig.getoption("remote_python") + + +@pytest.fixture(name="remote_queens_repository", scope="session") +def fixture_remote_queens_repository(pytestconfig): + """Path to the queens repository on remote host.""" + remote_queens = pytestconfig.getoption("remote_queens_repository", skip=True) + return remote_queens + + @pytest.fixture(name="cluster", scope="session") def fixture_cluster(request): """Name of the cluster to run a test on. @@ -459,44 +427,34 @@ def fixture_cluster(request): return request.param -@pytest.fixture(name="cluster_settings", scope="session") -def fixture_cluster_settings( - cluster, remote_user, gateway, remote_python, remote_queens_repository -): - """All cluster settings.""" - settings = CLUSTER_CONFIGS.get(cluster).dict() - _logger.debug("raw cluster config: %s", settings) - settings["cluster"] = cluster - settings["user"] = remote_user - settings["remote_python"] = remote_python - settings["remote_queens_repository"] = remote_queens_repository - settings["gateway"] = gateway - return settings +@pytest.fixture(name="cluster_config", scope="session") +def fixture_cluster_config(cluster): + """The cluster configuration for the given cluster.""" + config = CLUSTER_CONFIGS.get(cluster).dict() + _logger.debug("Cluster config: %s", config) + return config -@pytest.fixture(name="remote_python", scope="session") -def fixture_remote_python(pytestconfig): - """Path to the Python environment on remote host.""" - return pytestconfig.getoption("remote_python") +@pytest.fixture(name="remote_connection_kwargs", scope="session") +def fixture_remote_connection_kwargs( + cluster_config, remote_user, remote_python, remote_queens_repository, gateway +): + """Keyword arguments to initialize the remote connection.""" + remote_connection_kwargs = { + "host": cluster_config["host"], + "user": remote_user, + "remote_python": remote_python, + "remote_queens_repository": remote_queens_repository, + "gateway": gateway, + } + _logger.debug("Remote connection kwargs: %s", remote_connection_kwargs) + return remote_connection_kwargs @pytest.fixture(name="remote_connection", scope="session") -def fixture_remote_connection(cluster_settings): +def fixture_remote_connection(remote_connection_kwargs): """A fabric connection to a remote host.""" - return RemoteConnection( - host=cluster_settings["host"], - user=cluster_settings["user"], - remote_python=cluster_settings["remote_python"], - remote_queens_repository=cluster_settings["remote_queens_repository"], - gateway=cluster_settings["gateway"], - ) - - -@pytest.fixture(name="remote_queens_repository", scope="session") -def fixture_remote_queens_repository(pytestconfig): - """Path to the queens repository on remote host.""" - remote_queens = pytestconfig.getoption("remote_queens_repository", skip=True) - return remote_queens + return RemoteConnection(**remote_connection_kwargs) @pytest.fixture(name="fourc_cluster_path", scope="session") @@ -515,3 +473,101 @@ def fixture_fourc_cluster_path(remote_connection): Path(find_result.stdout.rstrip()) return fourc + + +@pytest.fixture(name="experiment_base_dir_cluster", scope="session") +def fixture_experiment_base_dir_cluster(pytest_id): + """Remote directory containing all experiments of a single pytest run. + + This directory is conceptually equivalent to the usual base + directory for non-pytest runs, i.e., production experiments. The + goal is to separate the testing data from production data of the + user. + """ + return PYTEST_BASE_DIR_CLUSTER + f"/{pytest_id}" + + +@pytest.fixture(name="experiment_dir") +def fixture_experiment_dir(global_settings, remote_connection, experiment_base_dir_cluster): + """Fixture providing the remote experiment directory.""" + experiment_dir, _ = remote_connection.run_function( + experiment_directory, + global_settings.experiment_name, + experiment_base_dir_cluster, + ) + return experiment_dir + + +@pytest.fixture(name="_create_experiment_dir") +def fixture_create_experiment_dir(remote_connection, experiment_dir): + """Fixture providing the remote experiment directory.""" + + def create_experiment_dir_and_assert_it_exists(): + """Create experiment directory on remote and assert it exists.""" + os.mkdir(experiment_dir) + return experiment_dir.exists() + + assert remote_connection.run_function(create_experiment_dir_and_assert_it_exists) + + +@pytest.fixture(name="minimal_cluster_kwargs", scope="session") +def fixture_minimal_cluster_kwargs(cluster_config, experiment_base_dir_cluster): + """Basic keyword arguments to initialize the cluster scheduler. + + These kwargs are constant for all cluster tests. + """ + return { + "workload_manager": cluster_config["workload_manager"], + "queue": cluster_config.get("queue"), + "cluster_internal_address": cluster_config["cluster_internal_address"], + "experiment_base_dir": experiment_base_dir_cluster, + "job_script_prologue": cluster_config.get("job_script_prologue"), + } + + +@pytest.fixture(name="cluster_kwargs") +def fixture_cluster_kwargs(minimal_cluster_kwargs, remote_connection, test_name): + """Keyword arguments to initialize the cluster scheduler.""" + return minimal_cluster_kwargs | { + "walltime": "00:10:00", + "num_jobs": 1, + "min_jobs": 1, + "num_procs": 1, + "num_nodes": 1, + "remote_connection": remote_connection, + "experiment_name": test_name, + } + + +@pytest.fixture(name="basic_jobscript_kwargs", scope="session") +def fixture_basic_jobscript_kwargs(cluster_config, fourc_cluster_path): + """Basic keyword arguments to initialize the jobscript driver. + + These kwargs are constant for all cluster tests. + """ + return { + "jobscript_template": cluster_config["jobscript_template"], + "executable": fourc_cluster_path, + "extra_options": {"cluster_script": cluster_config["cluster_script_path"]}, + } + + +# CLUSTER TESTS FUNCTIONS -------------------------------------------------------------------------- + + +def delete_old_simulation_data(remote_connection): + """Delete old simulation data on the cluster. + + This approach deletes test simulation data older than seven days. + + Args: + remote_connection (RemoteConnection): connection to remote cluster. + """ + # Delete data from tests older than 1 week + command = ( + "find " + + PYTEST_BASE_DIR_CLUSTER + + " -mindepth 1 -maxdepth 1 -mtime +7 -type d -exec rm -rv {} \\;" + ) + result = remote_connection.run(command, in_stream=False) + _logger.debug("Deleting old simulation data:\n%s", result.stdout) diff --git a/tests/tutorial_tests/test_5_grid_iterator_4c_remote.py b/tests/tutorial_tests/test_5_grid_iterator_4c_remote.py new file mode 100644 index 00000000..af193e5b --- /dev/null +++ b/tests/tutorial_tests/test_5_grid_iterator_4c_remote.py @@ -0,0 +1,21 @@ +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# Copyright (c) 2024-2025, QUEENS contributors. +# +# This file is part of QUEENS. +# +# QUEENS is free software: you can redistribute it and/or modify it under the terms of the GNU +# Lesser General Public License as published by the Free Software Foundation, either version 3 of +# the License, or (at your option) any later version. QUEENS is distributed in the hope that it will +# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You +# should have received a copy of the GNU Lesser General Public License along with QUEENS. If not, +# see . +# +"""Placeholder for integration test of 5_grid_iterator_4c_remote tutorial. + +This file ensures that the tutorial is not run by tests/tutorial_tests/test_tutorials.py since +those tests are only run locally and do not have access to a remote cluster, which is needed for +this tutorial. +The actual test of this tutorial is in tests/integration_tests/cluster/test_dask_cluster.py. +""" diff --git a/tests/tutorial_tests/test_tutorials.py b/tests/tutorial_tests/test_tutorials.py index 3c562aef..a2237572 100644 --- a/tests/tutorial_tests/test_tutorials.py +++ b/tests/tutorial_tests/test_tutorials.py @@ -27,7 +27,7 @@ "paths_to_tutorial_notebooks", [ str(patch) - for patch in sorted(Path("tutorials").glob("*.ipynb")) + for patch in sorted(Path("tutorials").rglob("*.ipynb")) if patch.stem not in { t.stem.removeprefix("test_") for t in Path("tests/tutorial_tests").glob("test_*.py") diff --git a/tutorials/5_grid_iterator_4c_remote/5_grid_iterator_4c_remote.ipynb b/tutorials/5_grid_iterator_4c_remote/5_grid_iterator_4c_remote.ipynb new file mode 100644 index 00000000..c4532ec7 --- /dev/null +++ b/tutorials/5_grid_iterator_4c_remote/5_grid_iterator_4c_remote.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# 5. Remote Computing with 4C\n", + "\n", + "In this tutorial, you will use the `Grid` iterator to probe the response surface of a 4C model.\n", + "The 4C simulations will run remotely on a cluster. \n", + "\n", + "> **Disclaimer:**\n", + "> You will only be able to execute the following notebook if you have access to a remote computing resource and fill in the placeholders accordingly." + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "## Set up the remote machine\n", + "\n", + "Remote computing with QUEENS is enabled via SSH port forwarding, so a few initial steps are necessary:\n", + "\n", + "1. Make sure both your local and your remote machine have an SSH key under `~/.ssh/id_rsa.pub`.\n", + " In case either of them does not have one yet, you can generate an SSH key on the respective machine via:\n", + "\n", + " ```bash\n", + " # execute on local or remote machine:\n", + " ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa\n", + " ```\n", + "\n", + "1. Connecting via SSH from the local to the remote machine needs to work without a password. \n", + " Therefore, you need to copy the public key of the local machine to the `authorized_keys` file of the remote machine:\n", + "\n", + " ```bash\n", + " # execute on local machine:\n", + " ssh-copy-id -i ~/.ssh/id_rsa.pub @\n", + " ```\n", + "\n", + "1. To enable passwordless onto the localhost on the remote machine itself, you also need to copy the ssh-key of the remote machine to its `authorized_keys` file:\n", + "\n", + " ```bash\n", + " # execute on remote machine:\n", + " cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys\n", + " ```\n", + "\n", + "> **Troubleshooting**:\n", + "> If you are still asked for your password after these steps, verify that:\n", + "> \n", + "> - The `~/.ssh` directory has permissions `700`.\n", + "> To set the permissions correctly, execute `chmod -R 700 ~/.ssh`.\n", + "> - The home directory on your remote machine has permissions `700`.\n", + "> To set the permissions correctly, execute `chmod 700 ~` on the remote machine.\n", + "\n", + "4. Clone the QUEENS repository on the remote machine.\n", + "\n", + "5. Install the same QUEENS environment that you are using on your local machine also on the remote machine (see [here](https://queens-py.github.io/queens/introduction.html#installation)). You will later have to pass the path to its python executable via `remote_python`.\n", + "\n", + "> **Subclassing**:\n", + "> If you want to run QUEENS remotely with custom classes that inherit from QUEENS objects, you need to ensure that these classes are available in the remote QUEENS environment.\n", + "> You can do this in one of the following ways:\n", + ">\n", + "> 1. *Recommended:* Include the custom classes in the local QUEENS repository. They will be automatically copied to the remote QUEENS repository at the start of the QUEENS runs.\n", + "> 2. Manually synchronize the custom classes between your local and your remote machine, e.g., via an additional repository and installing this repository in the remote QUEENS environment." + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "## Set up the QUEENS experiment\n", + "\n", + "In the following section, we will set up the QUEENS experiment for cluster usage:\n", + "\n", + "1. To evaluate 4C remotely on a cluster, use a `Jobscript` driver instead of a `Fourc` driver and adjust the paths:\n", + "\n", + " - `input_templates`: The local path to your 4C input template. QUEENS will copy it to the remote machine for you.\n", + " - `jobscript_template`: The local path to your jobscript template. For inspiration, check out our templates under `templates/jobscripts/`.\n", + " - `executable`: The absolute path to the 4C executable on the remote machine." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "jobscript_driver_kwargs = {\n", + " \"jobscript_template\": \"\",\n", + " \"executable\": \"\",\n", + " \"extra_options\": {}, # optional\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "\n", + "2. Switch to the `Cluster` scheduler and set up a `RemoteConnection` for it:\n", + " - `remote_python`: The absolute path to the Python executable of your QUEENS Python environment on the remote machine.\n", + " You can check your available environments via `conda info --envs`.\n", + " The path typically looks like `/home///envs/queens/bin/python`.\n", + " - `remote_queens_repository`: The absolute path to your QUEENS repository on the remote machine." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "remote_connection_kwargs = {\n", + " \"host\": \"\",\n", + " \"user\": \"\",\n", + " \"remote_python\": \"\", \n", + " \"remote_queens_repository\": \"\",\n", + " \"gateway\": None, # optional\n", + "}\n", + "cluster_scheduler_kwargs = {\n", + " \"workload_manager\": \"\", \n", + " \"queue\": \"\",\n", + " \"cluster_internal_address\": \"\",\n", + " \"experiment_base_dir\": None, # optional\n", + " \"job_script_prologue\": None, # optional list of commands executed before starting a worker\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "### Run the example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "experiment_name = \"grid_iterator_4c_remote\"\n", + "output_dir = \"./\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "from queens.data_processors import PvdFile\n", + "from queens.distributions import Uniform\n", + "from queens.drivers import Jobscript\n", + "from queens.global_settings import GlobalSettings\n", + "from queens.iterators import Grid\n", + "from queens.main import run_iterator\n", + "from queens.models import Simulation\n", + "from queens.parameters.parameters import Parameters\n", + "from queens.schedulers import Cluster\n", + "from queens.utils.remote_operations import RemoteConnection\n", + "from queens.utils.path import relative_path_from_root\n", + "\n", + "\n", + "with GlobalSettings(\n", + " experiment_name=experiment_name, output_dir=output_dir, debug=False\n", + ") as gs:\n", + " # Parameters parameterizing a Neumann BC\n", + " parameter_1 = Uniform(lower_bound=0.0, upper_bound=1.0)\n", + " parameter_2 = Uniform(lower_bound=0.0, upper_bound=1.0)\n", + " parameters = Parameters(parameter_1=parameter_1, parameter_2=parameter_2)\n", + "\n", + " # The data processor extracts the displacement vectors (with x, y, z component) of all nodes at \n", + " # the last time step of the simulation\n", + " data_processor = PvdFile(\n", + " field_name=\"displacement\",\n", + " file_name_identifier=\"*.pvd\",\n", + " file_options_dict={},\n", + " )\n", + "\n", + " # Establish an SSH connection to the cluster\n", + " remote_connection = RemoteConnection(**remote_connection_kwargs) \n", + " \n", + " scheduler = Cluster(\n", + " experiment_name,\n", + " walltime=\"00:10:00\", \n", + " remote_connection=remote_connection, \n", + " num_jobs=9,\n", + " min_jobs=1,\n", + " num_procs=1, \n", + " num_nodes=1,\n", + " **cluster_scheduler_kwargs,\n", + " )\n", + "\n", + " # The driver handles the actual evaluation of 4C\n", + " driver = Jobscript(\n", + " parameters=parameters,\n", + " data_processor=data_processor,\n", + " input_templates=relative_path_from_root(\n", + " \"tutorials/5_grid_iterator_4c_remote/solid_runtime_hex8.4C.yaml\"\n", + " ), \n", + " **jobscript_driver_kwargs,\n", + " )\n", + " \n", + " model = Simulation(scheduler, driver)\n", + " \n", + " # Analysis setup\n", + " grid_design = {\n", + " \"parameter_1\": {\n", + " \"num_grid_points\": 3,\n", + " \"axis_type\": \"lin\",\n", + " \"data_type\": \"FLOAT\",\n", + " },\n", + " \"parameter_2\": {\n", + " \"num_grid_points\": 3,\n", + " \"axis_type\": \"lin\",\n", + " \"data_type\": \"FLOAT\",\n", + " },\n", + " }\n", + " iterator = Grid(\n", + " model,\n", + " parameters,\n", + " global_settings=gs,\n", + " grid_design=grid_design,\n", + " result_description={\"write_results\": True, \"plot_results\": False},\n", + " )\n", + "\n", + " # Run the analysis\n", + " run_iterator(iterator, gs)" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "### Evaluate the results\n", + "\n", + "Look at the results and analyze them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from pathlib import Path\n", + "\n", + "from queens.utils.io import load_result\n", + "\n", + "# Load the results\n", + "result_file = Path(output_dir) / f\"{experiment_name}.pickle\"\n", + "results = load_result(result_file)\n", + "\n", + "# This yields the displacement components (x, y, z) for each of the 9 grid points on each node \n", + "# of each element \n", + "# (Here: 2 elements with 8 nodes each, the output is written for each element individually), \n", + "# so the resulting array is expected to have the shape (9, 16, 3)\n", + "raw_displacements = results[\"raw_output_data\"][\"result\"]\n", + "\n", + "# Compute the displacement magnitudes for each run on each node of each element.\n", + "# The resulting array has shape (9, 16).\n", + "point_wise_displacement_magnitudes = np.sqrt(np.sum(raw_displacements ** 2, axis=-1))\n", + "\n", + "# Finally, we compute the maximum displacement that was achieved in each run.\n", + "# The resulting array has shape (9,).\n", + "max_displacement_magnitude_per_run = np.max(point_wise_displacement_magnitudes, axis=1)\n", + "print(max_displacement_magnitude_per_run)" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "## Where do I find all the data on the cluster?\n", + "\n", + "The data is stored -- equivalently to the local runs -- in a folder with the following nomenclature: \n", + "`$HOME/queens-experiments//`\n", + "\n", + "For example, you can find the data of the first simulation of this queens experiment in the folder `$HOME/queens-experiments/grid_iterator_4c_remote/1`\n", + "\n", + "Feel free to take a look around and to find the logged 4C console output of one of the simulations." + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "## Lessons learned\n", + "\n", + "You have learned how to run 4C simulations remotely on a cluster:\n", + "\n", + "1. Use a `Jobscript` driver and ensure correct paths to the executables:\n", + " - `path_to_executable` refers to a path on the cluster.\n", + " - `input_template` refers to a local path, making it very easy to adjust the file.\n", + "1. Use a `Cluster` scheduler and supply all necessary options.\n", + "1. The location of the QUEENS data on the cluster is `$HOME/queens-experiments`." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "queens", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + }, + "nbsphinx": { + "execute": "never" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/5_grid_iterator_4c_remote/solid_runtime_hex8.4C.yaml b/tutorials/5_grid_iterator_4c_remote/solid_runtime_hex8.4C.yaml new file mode 100644 index 00000000..b9c687c7 --- /dev/null +++ b/tutorials/5_grid_iterator_4c_remote/solid_runtime_hex8.4C.yaml @@ -0,0 +1,79 @@ +TITLE: + - "This is a simple test that tests the extrapolation of stresses from Gauss points to nodes for " + - "a hex8 discretization" +PROBLEM TYPE: + PROBLEMTYPE: "Structure" +IO: + OUTPUT_SPRING: true + STRUCT_STRESS: "Cauchy" + STRUCT_STRAIN: "GL" + VERBOSITY: "Standard" +IO/RUNTIME VTK OUTPUT: + INTERVAL_STEPS: 1 + OUTPUT_DATA_FORMAT: ascii +IO/RUNTIME VTK OUTPUT/STRUCTURE: + OUTPUT_STRUCTURE: true + DISPLACEMENT: true + STRESS_STRAIN: true +SOLVER 1: + SOLVER: "Superlu" + NAME: "Structure_Solver" +STRUCTURAL DYNAMIC: + INT_STRATEGY: "Standard" + DYNAMICTYPE: "Statics" + TIMESTEP: 0.5 + NUMSTEP: 2 + MAXTIME: 1 + TOLDISP: 1e-09 + TOLRES: 1e-09 + LOADLIN: true + LINEAR_SOLVER: 1 +STRUCT NOX/Printing: + Inner Iteration: false + Outer Iteration StatusTest: false +MATERIALS: + - MAT: 1 + MAT_Struct_StVenantKirchhoff: + YOUNG: 100 + NUE: 0 + DENS: 0 +FUNCT1: + - COMPONENT: 0 + SYMBOLIC_FUNCTION_OF_SPACE_TIME: "(1+{{ parameter_1 }}*y+{{ parameter_2 }}*z)*t" +DESIGN SURF NEUMANN CONDITIONS: + - E: 2 + NUMDOF: 3 + ONOFF: [1, 0, 0] + VAL: [10, 0, 0] + FUNCT: [1, 0, 0] +DESIGN SURF DIRICH CONDITIONS: + - E: 1 + NUMDOF: 3 + ONOFF: [1, 1, 1] + VAL: [0, 0, 0] + FUNCT: [0, 0, 0] +DSURF-NODE TOPOLOGY: + - "NODE 3 DSURFACE 1" + - "NODE 1 DSURFACE 1" + - "NODE 4 DSURFACE 1" + - "NODE 2 DSURFACE 1" + - "NODE 10 DSURFACE 2" + - "NODE 12 DSURFACE 2" + - "NODE 9 DSURFACE 2" + - "NODE 11 DSURFACE 2" +NODE COORDS: + - "NODE 1 COORD 0.0 0.0 0.0" + - "NODE 2 COORD 0.0 1.0 0.0" + - "NODE 3 COORD 0.0 0.0 1.0" + - "NODE 4 COORD 0.0 1.0 1.0" + - "NODE 5 COORD 1.0 0.0 0.0" + - "NODE 6 COORD 1.0 1.0 0.0" + - "NODE 7 COORD 1.0 0.0 1.0" + - "NODE 8 COORD 1.0 1.0 1.0" + - "NODE 9 COORD 2.0 0.0 0.0" + - "NODE 10 COORD 2.0 1.0 0.0" + - "NODE 11 COORD 2.0 0.0 1.0" + - "NODE 12 COORD 2.0 1.0 1.0" +STRUCTURE ELEMENTS: + - "1 SOLID HEX8 1 5 6 2 3 7 8 4 MAT 1 KINEM nonlinear" + - "2 SOLID HEX8 5 9 10 6 7 11 12 8 MAT 1 KINEM nonlinear"