diff --git a/openfecli/tests/commands/test_gather.py b/openfecli/tests/commands/test_gather.py index f6c067cd9..bbde8246b 100644 --- a/openfecli/tests/commands/test_gather.py +++ b/openfecli/tests/commands/test_gather.py @@ -1,3 +1,4 @@ +from typing import Callable from click.testing import CliRunner from importlib import resources import tarfile @@ -6,10 +7,10 @@ import pytest import pooch from ..utils import assert_click_success +from ..conftest import HAS_INTERNET from openfecli.commands.gather import ( gather, format_estimate_uncertainty, _get_column, - _generate_bad_legs_error_message, ) @pytest.mark.parametrize('est,unc,unc_prec,est_str,unc_str', [ @@ -134,30 +135,29 @@ def test_get_column(val, col): solvent lig_ejm_46 lig_jmc_28 23.3 0.8 solvent lig_ejm_46 lig_jmc_28 23.4 0.8 """ - -@pytest.fixture() -def results_dir_serial(tmpdir)->str: - """Example output data, with replicates run in serial (3 replicates per results JSON).""" - with tmpdir.as_cwd(): - with resources.files('openfecli.tests.data') as d: - tar = tarfile.open(d / 'rbfe_results.tar.gz', mode='r') - tar.extractall('.') - - return os.path.abspath(tar.getnames()[0]) - -@pytest.fixture() -def results_dir_parallel(tmpdir)->str: - """Example output data, with replicates run in serial (3 replicates per results JSON).""" - with tmpdir.as_cwd(): - with resources.files('openfecli.tests.data') as d: - tar = tarfile.open(d / 'rbfe_results_parallel.tar.gz', mode='r') - tar.extractall('.') - - return os.path.abspath(tar.getnames()[0]) - -@pytest.mark.parametrize('data_fixture', ['results_dir_serial', 'results_dir_parallel']) +POOCH_CACHE = pooch.os_cache('openfe') +ZENODO_RBFE_DATA = pooch.create( + path = POOCH_CACHE, + base_url="doi:10.5281/zenodo.14884797", + registry={ + "rbfe_results_serial_repeats.tar.gz": "md5:d7c5e04786d03e1280a74639c2981546", + "rbfe_results_parallel_repeats.tar.gz": "md5:cc54afe32b56232339a9315f4c3d6d91"}, + ) + +@pytest.fixture +def rbfe_result_dir()->pathlib.Path: + def _rbfe_result_dir(dataset)->str: + ZENODO_RBFE_DATA.fetch(f'{dataset}.tar.gz', processor=pooch.Untar()) + cache_dir = pathlib.Path(pooch.os_cache('openfe'))/f'{dataset}.tar.gz.untar/{dataset}/' + return cache_dir + + return _rbfe_result_dir + +@pytest.mark.skipif(not os.path.exists(POOCH_CACHE) and not HAS_INTERNET,reason="Internet seems to be unavailable and test data is not cached locally.") +@pytest.mark.parametrize('dataset', ['rbfe_results_serial_repeats', 'rbfe_results_parallel_repeats']) @pytest.mark.parametrize('report', ["", "dg", "ddg", "raw"]) -def test_gather(request, data_fixture, report): +def test_gather(rbfe_result_dir, dataset, report): + expected = { "": _EXPECTED_DG, "dg": _EXPECTED_DG, @@ -171,30 +171,30 @@ def test_gather(request, data_fixture, report): else: args = [] - results_dir = request.getfixturevalue(data_fixture) - result = runner.invoke(gather, [results_dir] + args + ['-o', '-']) + results_dir = rbfe_result_dir(dataset) + result = runner.invoke(gather, [str(results_dir)] + args + ['-o', '-']) assert_click_success(result) actual_lines = set(result.stdout_bytes.split(b'\n')) assert set(expected.split(b'\n')) == actual_lines +@pytest.mark.skipif(not os.path.exists(POOCH_CACHE) and not HAS_INTERNET,reason="Internet seems to be unavailable and test data is not cached locally.") class TestGatherFailedEdges: @pytest.fixture() - def results_dir_serial_missing_legs(self, tmpdir)->str: - """Example output data, with replicates run in serial and one deleted results JSON.""" - with tmpdir.as_cwd(): - with resources.files('openfecli.tests.data') as d: - tar = tarfile.open(d / 'rbfe_results.tar.gz', mode='r') - tar.extractall('.') - - results_dir_path = os.path.abspath(tar.getnames()[0]) - files_to_remove = ["rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json", - "rbfe_lig_ejm_46_solvent_lig_jmc_28_solvent.json" - ] - for fname in files_to_remove: - (pathlib.Path(results_dir_path)/ fname).unlink() - return results_dir_path + def results_dir_serial_missing_legs(self, rbfe_result_dir, tmpdir)->str: + """Example output data, with replicates run in serial and two missing results JSONs.""" + # TODO: update to return a list of paths without doing this symlink mess, when gather supports it. + rbfe_result_dir = rbfe_result_dir('rbfe_results_serial_repeats') + tmp_results_dir = tmpdir + files_to_skip = ["rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json", + "rbfe_lig_ejm_46_solvent_lig_jmc_28_solvent.json" + ] + for item in os.listdir(rbfe_result_dir): + if item not in files_to_skip: + os.symlink(rbfe_result_dir/item, tmp_results_dir/item) + + return str(tmp_results_dir) def test_missing_leg_error(self, results_dir_serial_missing_legs: str): runner = CliRunner() @@ -213,9 +213,3 @@ def test_missing_leg_allow_partial(self, results_dir_serial_missing_legs: str): result = runner.invoke(gather, [results_dir_serial_missing_legs] + ['--allow-partial', '-o', '-']) assert_click_success(result) - -RBFE_RESULTS = pooch.create( - pooch.os_cache('openfe'), - base_url="doi:10.6084/m9.figshare.25148945", - registry={"results.tar.gz": "bf27e728935b31360f95188f41807558156861f6d89b8a47854502a499481da3"}, -) diff --git a/openfecli/tests/data/rbfe_results.tar.gz b/openfecli/tests/data/rbfe_results.tar.gz index 7be120c40..a399b7ddb 100644 Binary files a/openfecli/tests/data/rbfe_results.tar.gz and b/openfecli/tests/data/rbfe_results.tar.gz differ diff --git a/openfecli/tests/data/rbfe_results_parallel.tar.gz b/openfecli/tests/data/rbfe_results_parallel.tar.gz deleted file mode 100644 index fd033bc17..000000000 Binary files a/openfecli/tests/data/rbfe_results_parallel.tar.gz and /dev/null differ diff --git a/openfecli/tests/data/restructure_results_data.ipynb b/openfecli/tests/data/restructure_results_data.ipynb deleted file mode 100644 index 92aa67d22..000000000 --- a/openfecli/tests/data/restructure_results_data.ipynb +++ /dev/null @@ -1,245 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "8d1899bc-337a-4024-9fa3-9cfbc452e091", - "metadata": {}, - "outputs": [], - "source": [ - "import json \n", - "from gufe.tokenization import JSON_HANDLER\n", - "import numpy as np\n", - "import os \n", - "import shutil\n", - "from pathlib import Path" - ] - }, - { - "cell_type": "markdown", - "id": "a82b8123-521a-4ca3-a2cf-f73b6504fa14", - "metadata": {}, - "source": [ - "for this dataset, we know we have 3 replicates run in serial for each leg. We want to manipulate the data so that it is equivalent to the output if we re-ran this dataset with each leg run in parallel, with the following directory structure:\n", - "\n", - "```\n", - "results/\n", - " transformations_0/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", - " shared_[hashA]_attempt_0/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", - " transformations_1/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", - " shared_[hashB]_attempt_0/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", - " transformations_2/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", - " shared_[hashC]_attempt_0/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c6ed7fe-b42c-4781-b356-85799e25356f", - "metadata": {}, - "outputs": [], - "source": [ - "def load_json(fpath):\n", - " return json.load(open(fpath, 'r'), cls=JSON_HANDLER.decoder)\n", - "\n", - "def dump_json(data, fpath):\n", - " with open(fpath, \"w\") as f:\n", - " json.dump(data, f, cls=JSON_HANDLER.encoder)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8eba246a-6123-4d8e-8fd8-2de516fbf881", - "metadata": {}, - "outputs": [], - "source": [ - "orig_dir = Path(\"results/\")\n", - "new_dir = Path(\"results_parallel/\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ab4f2587-9b15-422d-9faa-e11ff98fd491", - "metadata": {}, - "outputs": [], - "source": [ - "leg_names = []\n", - "for name in os.listdir(orig_dir):\n", - " if name.endswith(\".json\"):\n", - " continue\n", - " leg_names.append(name)\n", - "leg_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "311a7f0e-9c91-47ae-9e09-0e1bef03aca8", - "metadata": {}, - "outputs": [], - "source": [ - "! rm -rf $new_dir\n", - "for leg in leg_names:\n", - " json_data = load_json(orig_dir/f\"{leg}.json\")\n", - " srckey_to_protocol = {}\n", - " srckey_to_unit_results = {}\n", - " srckey_to_estimate = {}\n", - " ## collect results on a per-replicate basis\n", - " for k in json_data['protocol_result']['data']: \n", - " rep_source_key = json_data['protocol_result']['data'][k][0]['source_key']\n", - " \n", - " # keep only the data for this replicate\n", - " rep_result = json_data['protocol_result'].copy()\n", - " rep_result['data']={k:json_data['protocol_result']['data'][k]}\n", - " srckey_to_protocol[rep_source_key] = rep_result\n", - "\n", - " # pull just the estimate value so we can put it at the top of the output\n", - " srckey_to_estimate[rep_source_key] = rep_result['data'][k][0]['outputs']['unit_estimate']\n", - " \n", - " for k in json_data['unit_results']:\n", - " rep_source_key = json_data['unit_results'][k]['source_key']\n", - "\n", - " rep_unit_result = json_data['unit_results'].copy()\n", - " rep_unit_result = {k: json_data['unit_results'][k]}\n", - " srckey_to_unit_results[rep_source_key] = rep_unit_result\n", - " \n", - " assert srckey_to_protocol.keys() == srckey_to_unit_results.keys()\n", - " \n", - " ## write to the new directory\n", - " for n, sk in enumerate(sorted(srckey_to_protocol.keys())):\n", - " rep_dir = new_dir/f\"replicate_{n}\"\n", - " os.makedirs(rep_dir/leg)\n", - " \n", - " # build up the data for this replicate\n", - " replicate_data = {'estimate': srckey_to_estimate[sk],\n", - " 'uncertainty': np.std(srckey_to_estimate[sk]),\n", - " 'protocol_result': srckey_to_protocol[sk],\n", - " 'unit_results': srckey_to_unit_results[sk]}\n", - " \n", - " # write!\n", - " dump_json(replicate_data, rep_dir/f\"{leg}.json\")\n", - " working_dir_name = f\"shared_{sk}_attempt_0\"\n", - " ## TODO: make this work for arbitrary number of attempts \n", - " # os.symlink(orig_dir/leg/working_dir_name, rep_dir/leg/working_dir_name)\n", - " shutil.copytree(orig_dir/leg/working_dir_name, rep_dir/leg/working_dir_name)\n" - ] - }, - { - "cell_type": "markdown", - "id": "f864dcb3-bebf-425b-9154-bffc2b0e3f07", - "metadata": {}, - "source": [ - "## check that objects reload correctly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6c20639c-8ba7-457a-bf8a-76c64aef4a38", - "metadata": {}, - "outputs": [], - "source": [ - "import openfe" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9cba8316-5500-4d5e-a84d-d72d09ba2a42", - "metadata": {}, - "outputs": [], - "source": [ - "json_reloaded = load_json(\"results_parallel/replicate_0/easy_rbfe_lig_ejm_31_solvent_lig_ejm_47_solvent.json\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c0e90b45-ae83-41c1-8748-0a8c1466b378", - "metadata": {}, - "outputs": [], - "source": [ - "json_reloaded['estimate'], json_reloaded['uncertainty']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c0ce2bc6-d960-4521-b71c-316be0557e9d", - "metadata": {}, - "outputs": [], - "source": [ - "pr_reloaded = openfe.ProtocolResult.from_dict(json_reloaded['protocol_result'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2fbb695-d4ef-45bd-af53-2ef9d0bc8e0a", - "metadata": {}, - "outputs": [], - "source": [ - "pr_reloaded.data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "19662eaa-46de-4eb0-8c78-ddd6c68b12db", - "metadata": {}, - "outputs": [], - "source": [ - "first_pur_key = list(json_reloaded['unit_results'].keys())[0]\n", - "pur_reloaded = openfe.ProtocolUnit.from_dict(json_reloaded['unit_results'][first_pur_key])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0154fda2-4c1a-4064-8bcc-03aeecf13365", - "metadata": {}, - "outputs": [], - "source": [ - "pur_reloaded" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3f2bbc84-f59c-40b9-a176-9a733ff275c1", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/openfecli/tests/test_fetchables.py b/openfecli/tests/test_fetchables.py index a30e7e247..5feead121 100644 --- a/openfecli/tests/test_fetchables.py +++ b/openfecli/tests/test_fetchables.py @@ -23,11 +23,11 @@ def fetchable_test(fetchable): assert (pathlib.Path("output-dir") / path).exists() -def test_rhfe_tutorial(): +def test_rbfe_tutorial(): fetchable_test(RBFE_TUTORIAL) -def test_rhfe_tutorial_results(): +def test_rbfe_tutorial_results(): fetchable_test(RBFE_TUTORIAL_RESULTS) -def test_rhfe_showcase(): +def test_rbfe_showcase(): fetchable_test(RBFE_SHOWCASE)