diff --git a/news/test-download-all.rst b/news/test-download-all.rst new file mode 100644 index 000000000..f0c31b08a --- /dev/null +++ b/news/test-download-all.rst @@ -0,0 +1,23 @@ +**Added:** + +* Added ``openfe test --download-only`` flag, which caches all test data stored remotely. + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* diff --git a/src/openfe/data/__init__.py b/src/openfe/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/openfe/data/_downloader.py b/src/openfe/data/_downloader.py new file mode 100644 index 000000000..7bfb0d661 --- /dev/null +++ b/src/openfe/data/_downloader.py @@ -0,0 +1,33 @@ +import pooch + +from ._registry import zenodo_data_registry + + +def retrieve_registry_data(zenodo_registry: list[dict], path: str) -> None: + """Helper function for pulling all test data up-front. + + Parameters + ---------- + path : str + path to store the data - usually a pooch.os_cache instance. + + """ + downloader = pooch.DOIDownloader(progressbar=True) + + def _infer_processor(fname: str): + if fname.endswith("tar.gz"): + return pooch.Untar() + elif fname.endswith("zip"): + return pooch.Unzip() + else: + return None + + for d in zenodo_registry: + pooch.retrieve( + url=d["base_url"] + d["fname"], + known_hash=d["known_hash"], + fname=d["fname"], + processor=_infer_processor(d["fname"]), + downloader=downloader, + path=path, + ) diff --git a/src/openfe/data/_registry.py b/src/openfe/data/_registry.py new file mode 100644 index 000000000..7a87814dd --- /dev/null +++ b/src/openfe/data/_registry.py @@ -0,0 +1,24 @@ +import pooch + +POOCH_CACHE = pooch.os_cache("openfe") + +zenodo_rfe_simulation_nc = dict( + base_url="doi:10.5281/zenodo.15375081/", + fname="simulation.nc", + known_hash="md5:bc4e842b47de17704d804ae345b91599", +) +zenodo_t4_lysozyme_traj = dict( + base_url="doi:10.5281/zenodo.15212342", + fname="t4_lysozyme_trajectory.zip", + known_hash="sha256:e985d055db25b5468491e169948f641833a5fbb67a23dbb0a00b57fb7c0e59c8", +) +zenodo_industry_benchmark_systems = dict( + base_url="doi:10.5281/zenodo.15212342", + fname="industry_benchmark_systems.zip", + known_hash="sha256:2bb5eee36e29b718b96bf6e9350e0b9957a592f6c289f77330cbb6f4311a07bd", +) +zenodo_data_registry = [ + zenodo_rfe_simulation_nc, + zenodo_t4_lysozyme_traj, + zenodo_industry_benchmark_systems, +] diff --git a/src/openfe/tests/conftest.py b/src/openfe/tests/conftest.py index 4a75fc03b..deaad59e3 100644 --- a/src/openfe/tests/conftest.py +++ b/src/openfe/tests/conftest.py @@ -22,13 +22,12 @@ from rdkit.Chem import AllChem import openfe +from openfe.data._registry import POOCH_CACHE from openfe.protocols.openmm_rfe import RelativeHybridTopologyProtocol from openfe.protocols.openmm_rfe._rfe_utils.relative import HybridTopologyFactory from openfe.protocols.openmm_utils.serialization import deserialize from openfe.tests.protocols.openmm_rfe.helpers import make_htf -POOCH_CACHE = pooch.os_cache("openfe") - class SlowTests: """Plugin for handling fixtures that skips slow tests diff --git a/src/openfe/tests/protocols/conftest.py b/src/openfe/tests/protocols/conftest.py index ab1405aa7..b5f302947 100644 --- a/src/openfe/tests/protocols/conftest.py +++ b/src/openfe/tests/protocols/conftest.py @@ -17,8 +17,12 @@ from rdkit.Geometry import Point3D import openfe - -from ..conftest import POOCH_CACHE +from openfe.data._registry import ( + POOCH_CACHE, + zenodo_industry_benchmark_systems, + zenodo_rfe_simulation_nc, + zenodo_t4_lysozyme_traj, +) @pytest.fixture @@ -284,55 +288,50 @@ def septop_json() -> str: return f.read().decode() # type: ignore -zenodo_industry_benchmarks_data = pooch.create( +pooch_industry_benchmark_systems = pooch.create( path=POOCH_CACHE, - base_url="doi:10.5281/zenodo.15212342", + base_url=zenodo_industry_benchmark_systems["base_url"], registry={ - "industry_benchmark_systems.zip": "sha256:2bb5eee36e29b718b96bf6e9350e0b9957a592f6c289f77330cbb6f4311a07bd" + zenodo_industry_benchmark_systems["fname"]: zenodo_industry_benchmark_systems["known_hash"] }, ) @pytest.fixture def industry_benchmark_files(): - zenodo_industry_benchmarks_data.fetch("industry_benchmark_systems.zip", processor=pooch.Unzip()) + pooch_industry_benchmark_systems.fetch( + "industry_benchmark_systems.zip", processor=pooch.Unzip() + ) cache_dir = pathlib.Path( POOCH_CACHE / "industry_benchmark_systems.zip.unzip/industry_benchmark_systems" ) return cache_dir -zenodo_restraint_data = pooch.create( +pooch_t4_lysozyme = pooch.create( path=POOCH_CACHE, - base_url="doi:10.5281/zenodo.15212342", - registry={ - "t4_lysozyme_trajectory.zip": "sha256:e985d055db25b5468491e169948f641833a5fbb67a23dbb0a00b57fb7c0e59c8" - }, + base_url=zenodo_t4_lysozyme_traj["base_url"], + registry={zenodo_t4_lysozyme_traj["fname"]: zenodo_t4_lysozyme_traj["known_hash"]}, ) # session scope for downstream reuse @pytest.fixture(scope="session") def t4_lysozyme_trajectory_dir(): - zenodo_restraint_data.fetch("t4_lysozyme_trajectory.zip", processor=pooch.Unzip()) + pooch_t4_lysozyme.fetch("t4_lysozyme_trajectory.zip", processor=pooch.Unzip()) cache_dir = pathlib.Path( POOCH_CACHE / "t4_lysozyme_trajectory.zip.unzip/t4_lysozyme_trajectory" ) return cache_dir -RFE_OUTPUT = pooch.create( - path=POOCH_CACHE, - base_url="doi:10.6084/m9.figshare.24101655", - registry={ - "simulation.nc": "92361a0864d4359a75399470135f56642b72c605069a4c33dbc4be6f91f28b31", - }, -) - - @pytest.fixture def simulation_nc(): - return RFE_OUTPUT.fetch("simulation.nc") + return pooch.retrieve( + url=zenodo_rfe_simulation_nc["base_url"] + zenodo_rfe_simulation_nc["fname"], + known_hash=zenodo_rfe_simulation_nc["known_hash"], + path=POOCH_CACHE, + ) @pytest.fixture diff --git a/src/openfe/tests/protocols/restraints/test_geometry_boresch.py b/src/openfe/tests/protocols/restraints/test_geometry_boresch.py index 493de92ac..5231deef5 100644 --- a/src/openfe/tests/protocols/restraints/test_geometry_boresch.py +++ b/src/openfe/tests/protocols/restraints/test_geometry_boresch.py @@ -9,6 +9,7 @@ from openff.units import unit from rdkit import Chem +from openfe.data._registry import POOCH_CACHE from openfe.protocols.restraint_utils.geometry.boresch.geometry import ( BoreschRestraintGeometry, find_boresch_restraint, diff --git a/src/openfe/tests/protocols/restraints/test_geometry_boresch_host.py b/src/openfe/tests/protocols/restraints/test_geometry_boresch_host.py index d511b3d81..5f89cf5aa 100644 --- a/src/openfe/tests/protocols/restraints/test_geometry_boresch_host.py +++ b/src/openfe/tests/protocols/restraints/test_geometry_boresch_host.py @@ -10,6 +10,7 @@ from numpy.testing import assert_equal from openff.units import unit +from openfe.data._registry import POOCH_CACHE from openfe.protocols.restraint_utils.geometry.boresch.host import ( EvaluateBoreschAtoms, EvaluateHostAtoms1, @@ -24,7 +25,7 @@ is_collinear, ) -from ...conftest import HAS_INTERNET, POOCH_CACHE +from ...conftest import HAS_INTERNET @pytest.fixture diff --git a/src/openfe/tests/protocols/restraints/test_geometry_utils.py b/src/openfe/tests/protocols/restraints/test_geometry_utils.py index d3cf312b0..077c32c24 100644 --- a/src/openfe/tests/protocols/restraints/test_geometry_utils.py +++ b/src/openfe/tests/protocols/restraints/test_geometry_utils.py @@ -11,6 +11,7 @@ from openff.units import unit from rdkit import Chem +from openfe.data._registry import POOCH_CACHE from openfe.protocols.restraint_utils.geometry.utils import ( CentroidDistanceSort, FindHostAtoms, diff --git a/src/openfe/tests/protocols/restraints/test_omm_restraints.py b/src/openfe/tests/protocols/restraints/test_omm_restraints.py index 99ad1b5fa..4017e9270 100644 --- a/src/openfe/tests/protocols/restraints/test_omm_restraints.py +++ b/src/openfe/tests/protocols/restraints/test_omm_restraints.py @@ -9,6 +9,7 @@ from openff.units import unit from openmmtools.states import ThermodynamicState +from openfe.data._registry import POOCH_CACHE from openfe.protocols.restraint_utils.openmm.omm_restraints import ( BoreschRestraint, BoreschRestraintGeometry, @@ -26,7 +27,7 @@ FlatBottomRestraintSettings, ) -from ...conftest import HAS_INTERNET, POOCH_CACHE +from ...conftest import HAS_INTERNET def test_parameter_state_default(): diff --git a/src/openfe/tests/protocols/test_openmmutils.py b/src/openfe/tests/protocols/test_openmmutils.py index 46528f912..d3838bfdd 100644 --- a/src/openfe/tests/protocols/test_openmmutils.py +++ b/src/openfe/tests/protocols/test_openmmutils.py @@ -23,6 +23,7 @@ from pymbar.utils import ParameterError import openfe +from openfe.data._registry import POOCH_CACHE from openfe.protocols.openmm_rfe.equil_rfe_settings import ( IntegratorSettings, OpenMMSolvationSettings, @@ -40,7 +41,8 @@ HAS_NAGL, HAS_OPENEYE, ) -from openfe.tests.conftest import HAS_INTERNET, POOCH_CACHE + +from ..conftest import HAS_INTERNET @pytest.mark.parametrize( diff --git a/src/openfecli/commands/test.py b/src/openfecli/commands/test.py index d659ed802..47cd06d9b 100644 --- a/src/openfecli/commands/test.py +++ b/src/openfecli/commands/test.py @@ -4,13 +4,23 @@ import click import pytest +from openfe.data import _downloader +from openfe.data._registry import zenodo_data_registry as api_test_data_registry from openfecli import OFECommandPlugin +from openfecli.data._registry import POOCH_CACHE +from openfecli.data._registry import zenodo_data_registry as cli_test_data_registry from openfecli.utils import write @click.command("test", short_help="Run the OpenFE test suite") @click.option('--long', is_flag=True, default=False, help="Run additional tests (takes much longer)") # fmt: skip -def test(long): +@click.option( + "--download-only", + is_flag=True, + default=False, + help="Download data to the cache if not already present (this is helpful if internet is unreliable). If all data exists in the cache, only the cache location is shown.", +) +def test(long, download_only): """ Run the OpenFE test suite. This first checks that OpenFE is correctly imported, and then runs the main test suite, which should take several @@ -22,6 +32,14 @@ def test(long): terminals, these show as green or yellow. Warnings are not a concern. However, You should not see anything that fails or errors (red). """ + + if download_only: + click.echo(f"Checking for test data in cache location:\n{POOCH_CACHE}") + _downloader.retrieve_registry_data( + cli_test_data_registry + api_test_data_registry, POOCH_CACHE + ) + sys.exit(0) + try: old_env = dict(os.environ) os.environ["OFE_SLOW_TESTS"] = str(long) diff --git a/src/openfecli/data/__init__.py b/src/openfecli/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/openfecli/data/_registry.py b/src/openfecli/data/_registry.py new file mode 100644 index 000000000..f9b811f73 --- /dev/null +++ b/src/openfecli/data/_registry.py @@ -0,0 +1,38 @@ +"""Registry for all remotely-stored CLI test data.""" + +import pooch + +POOCH_CACHE = pooch.os_cache("openfe") +zenodo_cmet_data = dict( + base_url="doi:10.5281/zenodo.15200083/", + fname="cmet_results.tar.gz", + known_hash="md5:a4ca67a907f744c696b09660dc1eb8ec", +) +zenodo_rbfe_serial_data = dict( + base_url="doi:10.5281/zenodo.15042470/", + fname="rbfe_results_serial_repeats.tar.gz", + known_hash="md5:2355ecc80e03242a4c7fcbf20cb45487", +) +zenodo_rbfe_parallel_data = dict( + base_url="doi:10.5281/zenodo.15042470/", + fname="rbfe_results_parallel_repeats.tar.gz", + known_hash="md5:ff7313e14eb6f2940c6ffd50f2192181", +) +zenodo_abfe_data = dict( + base_url="doi:10.5281/zenodo.17348229/", + fname="abfe_results.zip", + known_hash="md5:547f896e867cce61979d75b7e082f6ba", +) +zenodo_septop_data = dict( + base_url="doi:10.5281/zenodo.17435569/", + fname="septop_results.zip", + known_hash="md5:2cfa18da59a20228f5c75a1de6ec879e", +) + +zenodo_data_registry = [ + zenodo_cmet_data, + zenodo_rbfe_serial_data, + zenodo_rbfe_parallel_data, + zenodo_abfe_data, + zenodo_septop_data, +] diff --git a/src/openfecli/tests/commands/test_gather.py b/src/openfecli/tests/commands/test_gather.py index 1cae95aa8..9584482a9 100644 --- a/src/openfecli/tests/commands/test_gather.py +++ b/src/openfecli/tests/commands/test_gather.py @@ -17,23 +17,34 @@ ) from openfecli.commands.gather_abfe import gather_abfe from openfecli.commands.gather_septop import gather_septop +from openfecli.data._registry import ( + POOCH_CACHE, + zenodo_abfe_data, + zenodo_cmet_data, + zenodo_rbfe_parallel_data, + zenodo_rbfe_serial_data, + zenodo_septop_data, +) from ..conftest import HAS_INTERNET from ..utils import assert_click_success -POOCH_CACHE = pooch.os_cache("openfe") -ZENODO_RBFE_DATA = pooch.create( +pooch_rbfe_serial = pooch.create( path=POOCH_CACHE, - base_url="doi:10.5281/zenodo.15042470", - registry={ - "rbfe_results_serial_repeats.tar.gz": "md5:2355ecc80e03242a4c7fcbf20cb45487", - "rbfe_results_parallel_repeats.tar.gz": "md5:ff7313e14eb6f2940c6ffd50f2192181", - }, + base_url=zenodo_rbfe_serial_data["base_url"], + registry={zenodo_rbfe_serial_data["fname"]: zenodo_rbfe_serial_data["known_hash"]}, ) -ZENODO_CMET_DATA = pooch.create( + +pooch_rbfe_parallel = pooch.create( + path=POOCH_CACHE, + base_url=zenodo_rbfe_parallel_data["base_url"], + registry={zenodo_rbfe_parallel_data["fname"]: zenodo_rbfe_parallel_data["known_hash"]}, +) + +pooch_cmet = pooch.create( path=POOCH_CACHE, - base_url="doi:10.5281/zenodo.15200083", - registry={"cmet_results.tar.gz": "md5:a4ca67a907f744c696b09660dc1eb8ec"}, + base_url=zenodo_cmet_data["base_url"], + registry={zenodo_cmet_data["fname"]: zenodo_cmet_data["known_hash"]}, ) @@ -226,19 +237,9 @@ def test_no_results_found(): """ -@pytest.fixture -def rbfe_result_dir() -> pathlib.Path: - def _rbfe_result_dir(dataset) -> str: - ZENODO_RBFE_DATA.fetch(f"{dataset}.tar.gz", processor=pooch.Untar()) - cache_dir = pathlib.Path(POOCH_CACHE) / f"{dataset}.tar.gz.untar/{dataset}/" - return cache_dir - - return _rbfe_result_dir - - @pytest.fixture def cmet_result_dir() -> pathlib.Path: - ZENODO_CMET_DATA.fetch("cmet_results.tar.gz", processor=pooch.Untar()) + pooch_cmet.fetch("cmet_results.tar.gz", processor=pooch.Untar()) result_dir = pathlib.Path(POOCH_CACHE) / "cmet_results.tar.gz.untar/cmet_results/" return result_dir @@ -347,14 +348,34 @@ def test_write_to_file(self, cmet_result_dir): assert pathlib.Path(fname).is_file() +@pytest.fixture +def rbfe_results_serial_dir() -> pathlib.Path: + pooch_rbfe_serial.fetch("rbfe_results_serial_repeats.tar.gz", processor=pooch.Untar()) + result_dir = ( + pathlib.Path(POOCH_CACHE) + / "rbfe_results_serial_repeats.tar.gz.untar/rbfe_results_serial_repeats/" + ) + return result_dir + + +@pytest.fixture +def rbfe_results_parallel_dir() -> pathlib.Path: + pooch_rbfe_parallel.fetch("rbfe_results_parallel_repeats.tar.gz", processor=pooch.Untar()) + result_dir = ( + pathlib.Path(POOCH_CACHE) + / "rbfe_results_parallel_repeats.tar.gz.untar/rbfe_results_parallel_repeats/" + ) + return result_dir + + @pytest.mark.skipif( not os.path.exists(POOCH_CACHE) and not HAS_INTERNET, reason="Internet seems to be unavailable and test data is not cached locally.", ) -@pytest.mark.parametrize("dataset", ["rbfe_results_serial_repeats", "rbfe_results_parallel_repeats"]) # fmt: skip +@pytest.mark.parametrize("dataset", ["rbfe_results_serial_dir", "rbfe_results_parallel_dir"]) # fmt: skip @pytest.mark.parametrize("report", ["", "dg", "ddg", "raw"]) @pytest.mark.parametrize("input_mode", ["directory", "filepaths"]) -def test_rbfe_gather(rbfe_result_dir, dataset, report, input_mode): +def test_rbfe_gather(request, dataset, report, input_mode): expected = { "": _RBFE_EXPECTED_DG, "dg": _RBFE_EXPECTED_DG, @@ -368,7 +389,7 @@ def test_rbfe_gather(rbfe_result_dir, dataset, report, input_mode): else: args = [] - results = rbfe_result_dir(dataset) + results = request.getfixturevalue(dataset) if input_mode == "directory": results = [str(results)] elif input_mode == "filepaths": @@ -383,11 +404,11 @@ def test_rbfe_gather(rbfe_result_dir, dataset, report, input_mode): assert set(expected.split(b"\n")) == actual_lines -def test_rbfe_gather_single_repeats_dg_error(rbfe_result_dir): +def test_rbfe_gather_single_repeats_dg_error(rbfe_results_parallel_dir): """A single repeat is insufficient for a dg calculation - should fail cleanly.""" runner = CliRunner() - results = rbfe_result_dir("rbfe_results_parallel_repeats") + results = rbfe_results_parallel_dir args = ["report", "dg"] cli_result = runner.invoke(gather, [f"{results}/replicate_0"] + args + ["--tsv"]) assert cli_result.exit_code == 1 @@ -399,9 +420,9 @@ def test_rbfe_gather_single_repeats_dg_error(rbfe_result_dir): ) class TestRBFEGatherFailedEdges: @pytest.fixture() - def results_paths_serial_missing_legs(self, rbfe_result_dir) -> str: + def results_paths_serial_missing_legs(self, rbfe_results_serial_dir) -> str: """Example output data, with replicates run in serial and two missing results JSONs.""" - result_dir = rbfe_result_dir("rbfe_results_serial_repeats") + result_dir = rbfe_results_serial_dir results = glob.glob(f"{result_dir}/*", recursive=True) files_to_skip = [ @@ -442,13 +463,13 @@ def test_allow_partial_msg_not_printed(self, results_paths_serial_missing_legs: ZENODO_ABFE_DATA = pooch.create( path=POOCH_CACHE, - base_url="doi:10.5281/zenodo.17348229", - registry={"abfe_results.zip": "md5:547f896e867cce61979d75b7e082f6ba"}, + base_url=zenodo_abfe_data["base_url"], + registry={zenodo_abfe_data["fname"]: zenodo_abfe_data["known_hash"]}, ) ZENODO_SEPTOP_DATA = pooch.create( path=POOCH_CACHE, - base_url="doi:10.5281/zenodo.17435569", - registry={"septop_results.zip": "md5:2cfa18da59a20228f5c75a1de6ec879e"}, + base_url=zenodo_septop_data["base_url"], + registry={zenodo_septop_data["fname"]: zenodo_septop_data["known_hash"]}, )