From 6826faf3494390130856e7037b1e30d2cf585295 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Fri, 8 Aug 2025 14:59:42 +0200 Subject: [PATCH 1/5] Move pooch import into Registry --- src/ess/reduce/data.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/ess/reduce/data.py b/src/ess/reduce/data.py index 3a618c0f..90ee919c 100644 --- a/src/ess/reduce/data.py +++ b/src/ess/reduce/data.py @@ -1,10 +1,19 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024 Scipp contributors (https://github.com/scipp) -import pooch class Registry: - def __init__(self, instrument: str, files: dict[str, str], version: str): + """A registry for data files. + + Note + ---- + This class requires [Pooch](https://www.fatiando.org/pooch/latest/) which + is not a hard dependency of ESSreduce and needs to be installed separately. + """ + + def __init__(self, instrument: str, files: dict[str, str], version: str) -> None: + import pooch + self._registry = pooch.create( path=pooch.os_cache(f'ess/{instrument}'), env=f'ESS_{instrument.upper()}_DATA_DIR', @@ -14,8 +23,10 @@ def __init__(self, instrument: str, files: dict[str, str], version: str): registry=files, retry_if_failed=3, ) + self._unzip_processor = pooch.Unzip() - def __contains__(self, key): + def __contains__(self, key: str) -> bool: + """Return True if the key is in the registry.""" return key in self._registry.registry def get_path(self, name: str, unzip: bool = False) -> str: @@ -28,8 +39,15 @@ def get_path(self, name: str, unzip: bool = False) -> str: Name of the file to get the path for. unzip: If `True`, unzip the file before returning the path. + + Returns + ------- + : + The Path to the file. """ - return self._registry.fetch(name, processor=pooch.Unzip() if unzip else None) + return self._registry.fetch( + name, processor=self._unzip_processor if unzip else None + ) _bifrost_registry = Registry( From aadecdc715b459d64c15e4ba612ee62e53d76410 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Fri, 8 Aug 2025 15:00:40 +0200 Subject: [PATCH 2/5] Return Path from get_path --- src/ess/reduce/data.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/ess/reduce/data.py b/src/ess/reduce/data.py index 90ee919c..6d919f63 100644 --- a/src/ess/reduce/data.py +++ b/src/ess/reduce/data.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2024 Scipp contributors (https://github.com/scipp) +# Copyright (c) 2025 Scipp contributors (https://github.com/scipp) + +from pathlib import Path class Registry: @@ -29,7 +31,7 @@ def __contains__(self, key: str) -> bool: """Return True if the key is in the registry.""" return key in self._registry.registry - def get_path(self, name: str, unzip: bool = False) -> str: + def get_path(self, name: str, unzip: bool = False) -> Path: """ Get the path to a file in the registry. @@ -45,8 +47,10 @@ def get_path(self, name: str, unzip: bool = False) -> str: : The Path to the file. """ - return self._registry.fetch( - name, processor=self._unzip_processor if unzip else None + return Path( + self._registry.fetch( + name, processor=self._unzip_processor if unzip else None + ) ) @@ -94,37 +98,37 @@ def get_path(self, name: str, unzip: bool = False) -> str: ) -def bifrost_simulated_elastic() -> str: +def bifrost_simulated_elastic() -> Path: """McStas simulation with elastic incoherent scattering + phonon.""" return _bifrost_registry.get_path('BIFROST_20240914T053723.h5') -def loki_tutorial_sample_run_60250() -> str: +def loki_tutorial_sample_run_60250() -> Path: """Sample run with sample and sample holder/can, no transmission monitor in beam.""" return _loki_registry.get_path('60250-2022-02-28_2215.nxs') -def loki_tutorial_sample_run_60339() -> str: +def loki_tutorial_sample_run_60339() -> Path: """Sample run with sample and sample holder/can, no transmission monitor in beam.""" return _loki_registry.get_path('60339-2022-02-28_2215.nxs') -def loki_tutorial_background_run_60248() -> str: +def loki_tutorial_background_run_60248() -> Path: """Background run with sample holder/can only, no transmission monitor.""" return _loki_registry.get_path('60248-2022-02-28_2215.nxs') -def loki_tutorial_background_run_60393() -> str: +def loki_tutorial_background_run_60393() -> Path: """Background run with sample holder/can only, no transmission monitor.""" return _loki_registry.get_path('60393-2022-02-28_2215.nxs') -def loki_tutorial_sample_transmission_run() -> str: +def loki_tutorial_sample_transmission_run() -> Path: """Sample transmission run (sample + sample holder/can + transmission monitor).""" return _loki_registry.get_path('60394-2022-02-28_2215.nxs') -def dream_coda_test_file() -> str: +def dream_coda_test_file() -> Path: """CODA file for DREAM where most pulses have been removed. See ``tools/shrink_nexus.py``. From 3da89aab5adbf528fadff5d549fc8a54a552eb06 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Fri, 8 Aug 2025 15:05:49 +0200 Subject: [PATCH 3/5] Cache get_path --- src/ess/reduce/data.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/ess/reduce/data.py b/src/ess/reduce/data.py index 6d919f63..0c3e70a2 100644 --- a/src/ess/reduce/data.py +++ b/src/ess/reduce/data.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2025 Scipp contributors (https://github.com/scipp) +from functools import cache from pathlib import Path @@ -31,9 +32,19 @@ def __contains__(self, key: str) -> bool: """Return True if the key is in the registry.""" return key in self._registry.registry + @cache # noqa: B019 def get_path(self, name: str, unzip: bool = False) -> Path: - """ - Get the path to a file in the registry. + """Get the path to a file in the registry. + + Downloads the file if necessary. + + Note that return values of this method are cached to avoid recomputing + potentially expensive checksums. + This usually means that the ``Registry`` object itself gets stored until the + Python interpreter shuts down. + However, registries are small and do not own resources. + It is anyway expected that the registry objects are stored at + module scope and live until program exit. Parameters ---------- From 14f95e2b4eba5acb79b0c1fe64142a803cec93dc Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Fri, 8 Aug 2025 15:06:47 +0200 Subject: [PATCH 4/5] Module docs --- src/ess/reduce/data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ess/reduce/data.py b/src/ess/reduce/data.py index 0c3e70a2..ab38d229 100644 --- a/src/ess/reduce/data.py +++ b/src/ess/reduce/data.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2025 Scipp contributors (https://github.com/scipp) +"""Data files bundled with ESSreduce.""" from functools import cache from pathlib import Path From 571a839b57ada08642cb1d435db9a6d32b646dc9 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Fri, 8 Aug 2025 15:08:44 +0200 Subject: [PATCH 5/5] Make retry_if_failed an arg --- src/ess/reduce/data.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ess/reduce/data.py b/src/ess/reduce/data.py index ab38d229..f065c2a2 100644 --- a/src/ess/reduce/data.py +++ b/src/ess/reduce/data.py @@ -15,7 +15,13 @@ class Registry: is not a hard dependency of ESSreduce and needs to be installed separately. """ - def __init__(self, instrument: str, files: dict[str, str], version: str) -> None: + def __init__( + self, + instrument: str, + files: dict[str, str], + version: str, + retry_if_failed: int = 3, + ) -> None: import pooch self._registry = pooch.create( @@ -25,7 +31,7 @@ def __init__(self, instrument: str, files: dict[str, str], version: str) -> None + '{version}/', version=version, registry=files, - retry_if_failed=3, + retry_if_failed=retry_if_failed, ) self._unzip_processor = pooch.Unzip()