-
Notifications
You must be signed in to change notification settings - Fork 4
Support setting techniques from ExPaNDS ontology label #321
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7ab2ce0
e97f370
853fd01
e41b1fb
1e9eabd
38ce792
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,3 +26,6 @@ __pycache__/ | |
| Pipfile | ||
| venv | ||
| .venv | ||
|
|
||
| # Ontologies | ||
| *.jsonld | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -72,6 +72,7 @@ Submodules | |
| :recursive: | ||
|
|
||
| model | ||
| ontology | ||
| testing | ||
| typing | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,122 @@ | ||
| # SPDX-License-Identifier: BSD-3-Clause | ||
| # Copyright (c) 2025 SciCat Project (https://github.com/SciCatProject/scitacean) | ||
| """Tools for working with ontologies.""" | ||
|
|
||
| import bz2 | ||
| import importlib.resources | ||
| import json | ||
| import re | ||
| from functools import cache | ||
|
|
||
| from ..model import Technique | ||
|
|
||
|
|
||
| def _load_ontology(name: str) -> object: | ||
| """Load an ontology from the package resources. | ||
|
|
||
| Note that the ontology file was generated using a script in | ||
| ``tools/ontologies`` in the Scitacean repository. | ||
| """ | ||
| with ( | ||
| importlib.resources.files("scitacean.ontology") | ||
| .joinpath(f"{name}.json.bz2") | ||
| .open("rb") as raw_f | ||
| ): | ||
| with bz2.open(raw_f, "rb") as f: | ||
| return json.loads(f.read()) | ||
|
|
||
|
|
||
| @cache | ||
| def expands_techniques() -> dict[str, list[str]]: | ||
| """Load the ExPaNDS experimental techniques ontology. | ||
|
|
||
| Returns | ||
| ------- | ||
| : | ||
| A dict mapping from technique ids (IRIs) to labels. | ||
| The first element of the list is the primary label. | ||
| All labels are lowercase and contain no leading or trailing whitespace. | ||
| """ | ||
| return _load_ontology("expands_techniques") # type: ignore[return-value] | ||
|
|
||
|
|
||
| def find_technique(label_or_iri: str) -> Technique: | ||
| """Construct a Technique model from an ontology label or IRI. | ||
|
|
||
| The argument specifies a technique from the | ||
| `ExPaNDS experimental techniques ontology <https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html>`_. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| label_or_iri: | ||
| One of: | ||
|
|
||
| - Technique *label* from the ExPaNDS ontology. The input is first converted to | ||
| lowercase and leading and trailing whitespace is removed. | ||
| - Technique *IRI* from the ExPaNDS ontology. Must exactly match an IRI in | ||
| the ontology. | ||
|
|
||
| Returns | ||
| ------- | ||
| : | ||
| The loaded technique encoded as: | ||
|
|
||
| .. code-block:: python | ||
|
|
||
| Technique(name=label, pid=iri) | ||
|
|
||
| Raises | ||
| ------ | ||
| ValueError | ||
| If the label or IRI is not found in the ontology. | ||
| """ | ||
| if _is_iri(label_or_iri): | ||
| return _lookup_iri(label_or_iri) | ||
| return _lookup_label(label_or_iri) | ||
|
|
||
|
|
||
| def _lookup_label(label: str) -> Technique: | ||
| label = label.strip().lower() | ||
| found = [ | ||
| (iri, labels[0]) | ||
| for iri, labels in expands_techniques().items() | ||
| if label in labels | ||
| ] | ||
| if len(found) == 1: | ||
| return Technique(pid=found[0][0], name=found[0][1]) | ||
| elif len(found) > 1: | ||
| raise ValueError( | ||
| f"Found multiple techniques with label '{label}': {[f[0] for f in found]}. " | ||
| "Please specify the exact IRI instead or construct a Technique model " | ||
| "manually.\n" | ||
| "See the ExPaNDS experimental technique ontology for allowed labels at " | ||
| "https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html" | ||
| ) | ||
| # else: len(found) == 0 | ||
| raise ValueError( | ||
| f"Unknown technique label: '{label}'\n" | ||
| "See the ExPaNDS experimental technique ontology for allowed labels at " | ||
| "https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html" | ||
| ) | ||
|
|
||
|
|
||
| def _lookup_iri(iri: str) -> Technique: | ||
| try: | ||
| label = expands_techniques()[iri][0] | ||
| except KeyError: | ||
| raise ValueError( | ||
| f"Unknown technique IRI: '{iri}'\n" | ||
| "See the ExPaNDS experimental technique ontology for allowed labels at " | ||
| "https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html" | ||
| ) from None | ||
| return Technique(pid=iri, name=label) | ||
|
|
||
|
|
||
| _IRI_REGEX = re.compile(r"^https?://purl\.org/pan-science/PaNET/PaNET\d+$") | ||
|
|
||
|
|
||
| def _is_iri(iri: str) -> bool: | ||
| return bool(_IRI_REGEX.match(iri)) | ||
|
|
||
|
|
||
| __all__ = ["expands_techniques", "find_technique"] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can maybe add a test if the ontology file is up to date in the CI or will it not be ever changed...?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It will change. I wouldn't want that check in the regular tests, but I can try and set it up in CI.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really sure how to do this. The JSONLD file does not encode the version. I would have to parse that out of the HTML, TTL, or N Triples file. Or clone the github repo. But then I don't know if the deployed version is the same as the repo.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we simply check if the binary files that we make from the jsonld files are exactly the same? But we can skip it I guess, if we ever want it to be updated, we will probably be asked to do so by instrument scientists or SIMS team.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we download and parse the whole ontology every time we use it, then we don't even need to store it in the package. I wanted to avoid that so that we don't need an internet connection to make local datasets. Admittedly, that is a niche use case. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| # SPDX-License-Identifier: BSD-3-Clause | ||
| # Copyright (c) 2025 SciCat Project (https://github.com/SciCatProject/scitacean) | ||
| import pytest | ||
|
|
||
| from scitacean import model, ontology | ||
|
|
||
|
|
||
| def test_can_load_expands_technique_ontology() -> None: | ||
| techniques = ontology.expands_techniques() | ||
| assert len(techniques) > 0 | ||
|
|
||
| # Check IRI | ||
| assert all(iri.startswith("http") for iri in techniques.keys()) | ||
|
|
||
| # Check labels | ||
| assert all( | ||
| all(label.islower() for label in labels) for labels in techniques.values() | ||
| ) | ||
| assert all( | ||
| all(label.strip() == label for label in labels) | ||
| for labels in techniques.values() | ||
| ) | ||
|
|
||
|
|
||
| def test_can_look_up_technique_by_label() -> None: | ||
| technique = ontology.find_technique("small angle neutron scattering") | ||
| expected = model.Technique( | ||
| pid="http://purl.org/pan-science/PaNET/PaNET01189", | ||
| name="small angle neutron scattering", | ||
| ) | ||
| assert technique == expected | ||
|
|
||
|
|
||
| def test_can_look_up_technique_by_label_is_case_insensitive() -> None: | ||
| technique = ontology.find_technique("Total Scattering") | ||
| expected = model.Technique( | ||
| pid="http://purl.org/pan-science/PaNET/PaNET01190", | ||
| name="total scattering", | ||
| ) | ||
| assert technique == expected | ||
|
|
||
|
|
||
| def test_can_look_up_technique_by_alternative_label() -> None: | ||
| regular = ontology.find_technique("x-ray single crystal diffraction") | ||
| alternative1 = ontology.find_technique("SXRD") | ||
| alternative2 = ontology.find_technique("sxrd") | ||
| alternative3 = ontology.find_technique("single crystal x-ray diffraction ") | ||
| expected = model.Technique( | ||
| pid="http://purl.org/pan-science/PaNET/PaNET01102", | ||
| name="x-ray single crystal diffraction", | ||
| ) | ||
| assert regular == expected | ||
| assert alternative1 == expected | ||
| assert alternative2 == expected | ||
| assert alternative3 == expected | ||
|
|
||
|
|
||
| def test_can_look_up_technique_by_iri() -> None: | ||
| technique = ontology.find_technique("http://purl.org/pan-science/PaNET/PaNET01239") | ||
| expected = model.Technique( | ||
| pid="http://purl.org/pan-science/PaNET/PaNET01239", | ||
| name="neutron reflectometry", | ||
| ) | ||
| assert technique == expected | ||
|
|
||
|
|
||
| def test_lookup_rejects_ambiguous_label() -> None: | ||
| with pytest.raises(ValueError, match="multiple techniques"): | ||
| ontology.find_technique("diffraction") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this function is for notebooks, maybe we can wrap it in html repr, otherwise ignore me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is mean t as a 'backend' implementation. I don't think it is very useful to show this list in a notebook directly. The link to the actual ontology seems more useful.