Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 56 additions & 16 deletions src/ess/reduce/data.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,75 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 Scipp contributors (https://github.com/scipp)
import pooch
# Copyright (c) 2025 Scipp contributors (https://github.com/scipp)
"""Data files bundled with ESSreduce."""

from functools import cache
from pathlib import Path


class Registry:
def __init__(self, instrument: str, files: dict[str, str], version: str):
"""A registry for data files.

Note
----
This class requires [Pooch](https://www.fatiando.org/pooch/latest/) which
is not a hard dependency of ESSreduce and needs to be installed separately.
"""

def __init__(
self,
instrument: str,
files: dict[str, str],
version: str,
retry_if_failed: int = 3,
) -> None:
import pooch

self._registry = pooch.create(
path=pooch.os_cache(f'ess/{instrument}'),
env=f'ESS_{instrument.upper()}_DATA_DIR',
base_url=f'https://public.esss.dk/groups/scipp/ess/{instrument}/'
+ '{version}/',
version=version,
registry=files,
retry_if_failed=3,
retry_if_failed=retry_if_failed,
)
self._unzip_processor = pooch.Unzip()

def __contains__(self, key):
def __contains__(self, key: str) -> bool:
"""Return True if the key is in the registry."""
return key in self._registry.registry

def get_path(self, name: str, unzip: bool = False) -> str:
"""
Get the path to a file in the registry.
@cache # noqa: B019
def get_path(self, name: str, unzip: bool = False) -> Path:
Comment on lines +42 to +43
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there cases where caching is bad, either because the file was corrupted or deleted? I am more thinking of the use case of users running tutorials, not the unit tests.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Depends. There are a couple of scenarios:

  • If the file is bad when calling get_path, it redownloads the file.
  • If the download fails, it raises an exception and the result is not cached. So calling get_path again, attempts the download again.
  • Likewise, if the download succeeds but file access fails, we can an exception and the result is not cached.
  • If the first call to get_path succeeds and the file gets corrupted or removed afterwards, a second call to get_path will simply return the same path without any checks.

Did I miss a scenario?

Only the last case is a problem. But is it really a relevant problem? The files are fairly well hidden and it is unlikely for people to touch them accidentally.

"""Get the path to a file in the registry.

Downloads the file if necessary.

Note that return values of this method are cached to avoid recomputing
potentially expensive checksums.
This usually means that the ``Registry`` object itself gets stored until the
Python interpreter shuts down.
However, registries are small and do not own resources.
It is anyway expected that the registry objects are stored at
module scope and live until program exit.

Parameters
----------
name:
Name of the file to get the path for.
unzip:
If `True`, unzip the file before returning the path.

Returns
-------
:
The Path to the file.
"""
return self._registry.fetch(name, processor=pooch.Unzip() if unzip else None)
return Path(
self._registry.fetch(
name, processor=self._unzip_processor if unzip else None
)
)


_bifrost_registry = Registry(
Expand Down Expand Up @@ -76,37 +116,37 @@ def get_path(self, name: str, unzip: bool = False) -> str:
)


def bifrost_simulated_elastic() -> str:
def bifrost_simulated_elastic() -> Path:
"""McStas simulation with elastic incoherent scattering + phonon."""
return _bifrost_registry.get_path('BIFROST_20240914T053723.h5')


def loki_tutorial_sample_run_60250() -> str:
def loki_tutorial_sample_run_60250() -> Path:
"""Sample run with sample and sample holder/can, no transmission monitor in beam."""
return _loki_registry.get_path('60250-2022-02-28_2215.nxs')


def loki_tutorial_sample_run_60339() -> str:
def loki_tutorial_sample_run_60339() -> Path:
"""Sample run with sample and sample holder/can, no transmission monitor in beam."""
return _loki_registry.get_path('60339-2022-02-28_2215.nxs')


def loki_tutorial_background_run_60248() -> str:
def loki_tutorial_background_run_60248() -> Path:
"""Background run with sample holder/can only, no transmission monitor."""
return _loki_registry.get_path('60248-2022-02-28_2215.nxs')


def loki_tutorial_background_run_60393() -> str:
def loki_tutorial_background_run_60393() -> Path:
"""Background run with sample holder/can only, no transmission monitor."""
return _loki_registry.get_path('60393-2022-02-28_2215.nxs')


def loki_tutorial_sample_transmission_run() -> str:
def loki_tutorial_sample_transmission_run() -> Path:
"""Sample transmission run (sample + sample holder/can + transmission monitor)."""
return _loki_registry.get_path('60394-2022-02-28_2215.nxs')


def dream_coda_test_file() -> str:
def dream_coda_test_file() -> Path:
"""CODA file for DREAM where most pulses have been removed.

See ``tools/shrink_nexus.py``.
Expand Down
Loading