SciCatProject · jl-wynen · Dec 15, 2025 · Dec 12, 2025 · Dec 12, 2025 · Dec 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -26,3 +26,6 @@ __pycache__/
 Pipfile
 venv
 .venv
+
+# Ontologies
+*.jsonld
diff --git a/docs/reference/index.rst b/docs/reference/index.rst
@@ -72,6 +72,7 @@ Submodules
    :recursive:
 
    model
+   ontology
    testing
    typing
 

diff --git a/src/scitacean/_dataset_fields.py b/src/scitacean/_dataset_fields.py
@@ -11,6 +11,7 @@
 
 from __future__ import annotations
 
+from collections.abc import Iterable
 from dataclasses import dataclass
 from datetime import UTC, datetime
 from typing import Any, ClassVar, Literal, TypeVar
@@ -29,6 +30,7 @@
     Technique,
     construct,
 )
+from .ontology import find_technique
 from .pid import PID
 
 M = TypeVar("M", bound=BaseModel)
@@ -54,6 +56,12 @@ def _parse_remote_path(path: str | RemotePath | None) -> RemotePath | None:
     return RemotePath(path)
 
 
+def _parse_techniques(arg: Iterable[str | Technique] | None) -> list[Technique] | None:
+    if arg is None:
+        return None
+    return [t if isinstance(t, Technique) else find_technique(t) for t in arg]
+
+
 def _validate_checksum_algorithm(algorithm: str | None) -> str | None:
     if algorithm is None:
         return algorithm
@@ -615,7 +623,7 @@ def __init__(
         source_folder: RemotePath | str | None = None,
         source_folder_host: str | None = None,
         start_time: datetime | None = None,
-        techniques: list[Technique] | None = None,
+        techniques: Iterable[str | Technique] | None = None,
         used_software: list[str] | None = None,
         validation_status: str | None = None,
         meta: dict[str, Any] | None = None,
@@ -656,7 +664,7 @@ def __init__(
         self._source_folder = _parse_remote_path(source_folder)
         self._source_folder_host = source_folder_host
         self._start_time = start_time
-        self._techniques = techniques
+        self._techniques = _parse_techniques(techniques)
         self._used_software = used_software
         self._validation_status = validation_status
         self._api_version = None
@@ -1033,9 +1041,9 @@ def techniques(self) -> list[Technique] | None:
         return self._techniques
 
     @techniques.setter
-    def techniques(self, techniques: list[Technique] | None) -> None:
+    def techniques(self, techniques: Iterable[str | Technique] | None) -> None:
         """Stores the metadata information for techniques."""
-        self._techniques = techniques
+        self._techniques = _parse_techniques(techniques)
 
     @property
     def updated_at(self) -> datetime | None:

diff --git a/src/scitacean/ontology/__init__.py b/src/scitacean/ontology/__init__.py
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 SciCat Project (https://github.com/SciCatProject/scitacean)
+"""Tools for working with ontologies."""
+
+import bz2
+import importlib.resources
+import json
+import re
+from functools import cache
+
+from ..model import Technique
+
+
+def _load_ontology(name: str) -> object:
+    """Load an ontology from the package resources.
+
+    Note that the ontology file was generated using a script in
+    ``tools/ontologies`` in the Scitacean repository.
+    """
+    with (
+        importlib.resources.files("scitacean.ontology")
+        .joinpath(f"{name}.json.bz2")
+        .open("rb") as raw_f
+    ):
+        with bz2.open(raw_f, "rb") as f:
+            return json.loads(f.read())
+
+
+@cache
+def expands_techniques() -> dict[str, list[str]]:
+    """Load the ExPaNDS experimental techniques ontology.
+
+    Returns
+    -------
+    :
+        A dict mapping from technique ids (IRIs) to labels.
+        The first element of the list is the primary label.
+        All labels are lowercase and contain no leading or trailing whitespace.
+    """
+    return _load_ontology("expands_techniques")  # type: ignore[return-value]
+
+
+def find_technique(label_or_iri: str) -> Technique:
+    """Construct a Technique model from an ontology label or IRI.
+
+    The argument specifies a technique from the
+    `ExPaNDS experimental techniques ontology <https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html>`_.
+
+    Parameters
+    ----------
+    label_or_iri:
+        One of:
+
+        - Technique *label* from the ExPaNDS ontology. The input is first converted to
+          lowercase and leading and trailing whitespace is removed.
+        - Technique *IRI* from the ExPaNDS ontology. Must exactly match an IRI in
+          the ontology.
+
+    Returns
+    -------
+    :
+        The loaded technique encoded as:
+
+        .. code-block:: python
+
+            Technique(name=label, pid=iri)
+
+    Raises
+    ------
+    ValueError
+        If the label or IRI is not found in the ontology.
+    """
+    if _is_iri(label_or_iri):
+        return _lookup_iri(label_or_iri)
+    return _lookup_label(label_or_iri)
+
+
+def _lookup_label(label: str) -> Technique:
+    label = label.strip().lower()
+    found = [
+        (iri, labels[0])
+        for iri, labels in expands_techniques().items()
+        if label in labels
+    ]
+    if len(found) == 1:
+        return Technique(pid=found[0][0], name=found[0][1])
+    elif len(found) > 1:
+        raise ValueError(
+            f"Found multiple techniques with label '{label}': {[f[0] for f in found]}. "
+            "Please specify the exact IRI instead or construct a Technique model "
+            "manually.\n"
+            "See the ExPaNDS experimental technique ontology for allowed labels at "
+            "https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html"
+        )
+    # else: len(found) == 0
+    raise ValueError(
+        f"Unknown technique label: '{label}'\n"
+        "See the ExPaNDS experimental technique ontology for allowed labels at "
+        "https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html"
+    )
+
+
+def _lookup_iri(iri: str) -> Technique:
+    try:
+        label = expands_techniques()[iri][0]
+    except KeyError:
+        raise ValueError(
+            f"Unknown technique IRI: '{iri}'\n"
+            "See the ExPaNDS experimental technique ontology for allowed labels at "
+            "https://expands-eu.github.io/ExPaNDS-experimental-techniques-ontology/index-en.html"
+        ) from None
+    return Technique(pid=iri, name=label)
+
+
+_IRI_REGEX = re.compile(r"^https?://purl\.org/pan-science/PaNET/PaNET\d+$")
+
+
+def _is_iri(iri: str) -> bool:
+    return bool(_IRI_REGEX.match(iri))
+
+
+__all__ = ["expands_techniques", "find_technique"]
diff --git a/src/scitacean/ontology/expands_techniques.json.bz2 b/src/scitacean/ontology/expands_techniques.json.bz2
diff --git a/tests/dataset_fields_test.py b/tests/dataset_fields_test.py
@@ -18,6 +18,7 @@
     DownloadDataFile,
     DownloadDataset,
     DownloadOrigDatablock,
+    Technique,
     UploadDerivedDataset,
     UploadRawDataset,
 )
@@ -430,12 +431,12 @@ def test_orcid_validation_valid(good_orcid: str) -> None:
     dset = Dataset(
         type="raw",
         name="test ORCID",
-        contact_email="jan-lukas.wynen@ess.eu",
+        contact_email="mail.person@sci.uni",
         creation_location="scitacean/tests",
         creation_time="2142-04-02T16:44:56",
-        owner="Jan-Lukas Wynen",
+        owner="Mustrum Ridcully",
         owner_group="ess",
-        principal_investigator="jan-lukas.wynen@ess.eu",
+        principal_investigator="mail.person@sci.uni",
         source_folder=RemotePath("/hex/source62"),
         orcid_of_owner=good_orcid,
     )
@@ -454,16 +455,64 @@ def test_orcid_validation_valid(good_orcid: str) -> None:
 def test_orcid_validation_missing_url(bad_orcid: str) -> None:
     dset = Dataset(
         type="raw",
-        contact_email="jan-lukas.wynen@ess.eu",
+        contact_email="mail.person@sci.uni",
         creation_time="2142-04-02T16:44:56",
-        owner="Jan-Lukas Wynen",
+        owner="Mustrum Ridcully",
         owner_group="ess",
-        principal_investigator="jan-lukas.wynen@ess.eu",
+        principal_investigator="mail.person@sci.uni",
         source_folder=RemotePath("/hex/source62"),
         orcid_of_owner=bad_orcid,
     )
     with pytest.raises(pydantic.ValidationError):
         dset.make_upload_model()
 
 
-# TODO technique
+def test_technique_set_model() -> None:
+    technique = Technique(pid="test/technique", name="Test Technique")
+    dset = Dataset(
+        type="raw",
+        contact_email="mail.person@sci.uni",
+        creation_time="2142-04-02T16:44:56",
+        owner="Mustrum Ridcully",
+        owner_group="ess",
+        principal_investigator="mail.person@sci.uni",
+        source_folder=RemotePath("/hex/source62"),
+        techniques=[technique],
+    )
+    assert dset.techniques == [technique]
+
+
+def test_technique_set_label() -> None:
+    dset = Dataset(
+        type="raw",
+        contact_email="mail.person@sci.uni",
+        creation_time="2142-04-02T16:44:56",
+        owner="Mustrum Ridcully",
+        owner_group="ess",
+        principal_investigator="mail.person@sci.uni",
+        source_folder=RemotePath("/hex/source62"),
+        techniques=["neutron powder diffraction"],
+    )
+    expected = Technique(
+        pid="http://purl.org/pan-science/PaNET/PaNET01100",
+        name="neutron powder diffraction",
+    )
+    Technique(
+        name="neutron powder diffraction",
+        pid="http://purl.org/pan-science/PaNET/PaNET01100",
+    )
+    assert dset.techniques == [expected]
+
+
+def test_technique_set_invalid_label_raises_value_error() -> None:
+    dset = Dataset(
+        type="raw",
+        contact_email="mail.person@sci.uni",
+        creation_time="2142-04-02T16:44:56",
+        owner="Mustrum Ridcully",
+        owner_group="ess",
+        principal_investigator="mail.person@sci.uni",
+        source_folder=RemotePath("/hex/source62"),
+    )
+    with pytest.raises(ValueError, match="Unknown technique"):
+        dset.techniques = ["bad technique"]
diff --git a/tests/ontology_test.py b/tests/ontology_test.py
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 SciCat Project (https://github.com/SciCatProject/scitacean)
+import pytest
+
+from scitacean import model, ontology
+
+
+def test_can_load_expands_technique_ontology() -> None:
+    techniques = ontology.expands_techniques()
+    assert len(techniques) > 0
+
+    # Check IRI
+    assert all(iri.startswith("http") for iri in techniques.keys())
+
+    # Check labels
+    assert all(
+        all(label.islower() for label in labels) for labels in techniques.values()
+    )
+    assert all(
+        all(label.strip() == label for label in labels)
+        for labels in techniques.values()
+    )
+
+
+def test_can_look_up_technique_by_label() -> None:
+    technique = ontology.find_technique("small angle neutron scattering")
+    expected = model.Technique(
+        pid="http://purl.org/pan-science/PaNET/PaNET01189",
+        name="small angle neutron scattering",
+    )
+    assert technique == expected
+
+
+def test_can_look_up_technique_by_label_is_case_insensitive() -> None:
+    technique = ontology.find_technique("Total Scattering")
+    expected = model.Technique(
+        pid="http://purl.org/pan-science/PaNET/PaNET01190",
+        name="total scattering",
+    )
+    assert technique == expected
+
+
+def test_can_look_up_technique_by_alternative_label() -> None:
+    regular = ontology.find_technique("x-ray single crystal diffraction")
+    alternative1 = ontology.find_technique("SXRD")
+    alternative2 = ontology.find_technique("sxrd")
+    alternative3 = ontology.find_technique("single crystal x-ray diffraction ")
+    expected = model.Technique(
+        pid="http://purl.org/pan-science/PaNET/PaNET01102",
+        name="x-ray single crystal diffraction",
+    )
+    assert regular == expected
+    assert alternative1 == expected
+    assert alternative2 == expected
+    assert alternative3 == expected
+
+
+def test_can_look_up_technique_by_iri() -> None:
+    technique = ontology.find_technique("http://purl.org/pan-science/PaNET/PaNET01239")
+    expected = model.Technique(
+        pid="http://purl.org/pan-science/PaNET/PaNET01239",
+        name="neutron reflectometry",
+    )
+    assert technique == expected
+
+
+def test_lookup_rejects_ambiguous_label() -> None:
+    with pytest.raises(ValueError, match="multiple techniques"):
+        ontology.find_technique("diffraction")
diff --git a/tools/model-generation/spec/dataset-fields.yml b/tools/model-generation/spec/dataset-fields.yml
@@ -34,6 +34,9 @@ conversions:
   sourceFolder:
     func: _parse_remote_path
     arg_type: RemotePath | str
+  techniques:
+    func: _parse_techniques
+    arg_type: Iterable[str | Technique]
 
 # Mark those fields as read-only in addition to those identified as read only from the schema.
 # Read-only fields must be None in uploads.
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,3 +26,6 @@ __pycache__/ @@
     Pipfile
     venv
     .venv
+    # Ontologies
+    *.jsonld