From d7aee857153f410569bc98c121cd8e4e5e0309a0 Mon Sep 17 00:00:00 2001 From: hsolbrig Date: Thu, 2 Feb 2023 14:24:55 -0600 Subject: [PATCH 1/3] Proposal for CurieNamespace with embedded catalog. It is my understanding that we are supposed to use the curies package, but it isn't clear how one would add maps incrementally (see: tests/test_utils/test_curienamespace.py#113 as an example). Any solution that passes test_curienamespace.py should do. --- linkml_runtime/utils/curienamespace.py | 37 +- tests/test_utils/input/CurieNamespace_test.py | 371 ++++++++++++++++++ tests/test_utils/test_curienamespace.py | 116 ++++++ 3 files changed, 517 insertions(+), 7 deletions(-) create mode 100644 tests/test_utils/input/CurieNamespace_test.py diff --git a/linkml_runtime/utils/curienamespace.py b/linkml_runtime/utils/curienamespace.py index fc342daa..066825d1 100644 --- a/linkml_runtime/utils/curienamespace.py +++ b/linkml_runtime/utils/curienamespace.py @@ -1,16 +1,39 @@ -from typing import Optional, Union +from logging import warning +from typing import Optional, Union, Dict from rdflib import Namespace, URIRef class CurieNamespace(Namespace): - def __new__(cls, prefix: str, value: Union[str, URIRef]): - value = str(value) - try: - rt = str.__new__(cls, value) - except UnicodeDecodeError: - rt = str.__new__(cls, value, 'utf-8') + # We would prefer to use curies.Converter here, but there doesn't appear to be any way to build it incrementally + catalog: Dict[str, "CurieNamespace"] = dict() + + @classmethod + def to_curie(cls, uri: Union[str, URIRef]) -> str: + uri = str(uri) + candidate_ns = "" + for prefix, ns in cls.catalog.items(): + if uri.startswith(ns) and len(ns) > len(candidate_ns): + candidate_ns = ns + if candidate_ns: + return candidate_ns.curie(uri[len(candidate_ns):]) + return None + + @classmethod + def to_uri(cls, curie: str) -> Optional[URIRef]: + prefix, localname = curie.split(':', 1) + ns = CurieNamespace.catalog.get(prefix, None) + return ns[localname] if ns else None + + def __new__(cls, prefix: str, ns: Union[str, bytes, URIRef]) -> "CurieNamespace": + rt = Namespace.__new__(cls, str(ns) if not isinstance(ns, bytes) else ns) rt.prefix = prefix + if prefix in CurieNamespace.catalog: + if CurieNamespace.catalog[prefix] != str(rt): + # prefix is bound to a different namespace + warning(f"Prefix: {prefix} already references {CurieNamespace.catalog[prefix]} - not updated to {rt}") + else: + CurieNamespace.catalog[prefix] = rt return rt def curie(self, reference: Optional[str] = '') -> str: diff --git a/tests/test_utils/input/CurieNamespace_test.py b/tests/test_utils/input/CurieNamespace_test.py new file mode 100644 index 00000000..ec2c569d --- /dev/null +++ b/tests/test_utils/input/CurieNamespace_test.py @@ -0,0 +1,371 @@ +# Auto generated from None by pythongen.py version: 0.9.0 +# Generation date: 2023-01-31T13:57:38 +# Schema: inline-dict-test +# +# id: http://example.org +# description: test +# license: https://creativecommons.org/publicdomain/zero/1.0/ + +import dataclasses +import sys +import re +from pprint import pprint + +from jsonasobj2 import JsonObj, as_dict +from typing import Optional, List, Union, Dict, ClassVar, Any +from dataclasses import dataclass +from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue, PvFormulaOptions + +from linkml_runtime.utils.slot import Slot +from linkml_runtime.utils.metamodelcore import empty_list, empty_dict, bnode +from linkml_runtime.utils.yamlutils import YAMLRoot, extended_str, extended_float, extended_int +from linkml_runtime.utils.dataclass_extensions_376 import dataclasses_init_fn_with_kwargs +from linkml_runtime.utils.formatutils import camelcase, underscore, sfx +from linkml_runtime.utils.enumerations import EnumDefinitionImpl +from rdflib import Namespace, URIRef, DC +from linkml_runtime.utils.curienamespace import CurieNamespace +from linkml_runtime.utils.metamodelcore import Bool, Decimal, ElementIdentifier, NCName, NodeIdentifier, URI, URIorCURIE, XSDDate, XSDDateTime, XSDTime + +metamodel_version = "1.7.0" +version = None + +# Overwrite dataclasses _init_fn to add **kwargs in __init__ +dataclasses._init_fn = dataclasses_init_fn_with_kwargs + +# Namespaces +LINKML = CurieNamespace('linkml', 'https://w3id.org/linkml/') +SHEX = CurieNamespace('shex', 'http://www.w3.org/ns/shex#') +XSD = CurieNamespace('xsd', 'http://www.w3.org/2001/XMLSchema#') +DEFAULT_ = CurieNamespace('', 'http://example.org/') + + +# Types +class String(str): + """ A character string """ + type_class_uri = XSD.string + type_class_curie = "xsd:string" + type_name = "string" + type_model_uri = URIRef("http://example.org/String") + + +class Integer(int): + """ An integer """ + type_class_uri = XSD.integer + type_class_curie = "xsd:integer" + type_name = "integer" + type_model_uri = URIRef("http://example.org/Integer") + + +class Boolean(Bool): + """ A binary (true or false) value """ + type_class_uri = XSD.boolean + type_class_curie = "xsd:boolean" + type_name = "boolean" + type_model_uri = URIRef("http://example.org/Boolean") + + +class Float(float): + """ A real number that conforms to the xsd:float specification """ + type_class_uri = XSD.float + type_class_curie = "xsd:float" + type_name = "float" + type_model_uri = URIRef("http://example.org/Float") + + +class Double(float): + """ A real number that conforms to the xsd:double specification """ + type_class_uri = XSD.double + type_class_curie = "xsd:double" + type_name = "double" + type_model_uri = URIRef("http://example.org/Double") + + +class Decimal(Decimal): + """ A real number with arbitrary precision that conforms to the xsd:decimal specification """ + type_class_uri = XSD.decimal + type_class_curie = "xsd:decimal" + type_name = "decimal" + type_model_uri = URIRef("http://example.org/Decimal") + + +class Time(XSDTime): + """ A time object represents a (local) time of day, independent of any particular day """ + type_class_uri = XSD.dateTime + type_class_curie = "xsd:dateTime" + type_name = "time" + type_model_uri = URIRef("http://example.org/Time") + + +class Date(XSDDate): + """ a date (year, month and day) in an idealized calendar """ + type_class_uri = XSD.date + type_class_curie = "xsd:date" + type_name = "date" + type_model_uri = URIRef("http://example.org/Date") + + +class Datetime(XSDDateTime): + """ The combination of a date and time """ + type_class_uri = XSD.dateTime + type_class_curie = "xsd:dateTime" + type_name = "datetime" + type_model_uri = URIRef("http://example.org/Datetime") + + +class DateOrDatetime(str): + """ Either a date or a datetime """ + type_class_uri = LINKML.DateOrDatetime + type_class_curie = "linkml:DateOrDatetime" + type_name = "date_or_datetime" + type_model_uri = URIRef("http://example.org/DateOrDatetime") + + +class Uriorcurie(URIorCURIE): + """ a URI or a CURIE """ + type_class_uri = XSD.anyURI + type_class_curie = "xsd:anyURI" + type_name = "uriorcurie" + type_model_uri = URIRef("http://example.org/Uriorcurie") + + +class Uri(URI): + """ a complete URI """ + type_class_uri = XSD.anyURI + type_class_curie = "xsd:anyURI" + type_name = "uri" + type_model_uri = URIRef("http://example.org/Uri") + + +class Ncname(NCName): + """ Prefix part of CURIE """ + type_class_uri = XSD.string + type_class_curie = "xsd:string" + type_name = "ncname" + type_model_uri = URIRef("http://example.org/Ncname") + + +class Objectidentifier(ElementIdentifier): + """ A URI or CURIE that represents an object in the model. """ + type_class_uri = SHEX.iri + type_class_curie = "shex:iri" + type_name = "objectidentifier" + type_model_uri = URIRef("http://example.org/Objectidentifier") + + +class Nodeidentifier(NodeIdentifier): + """ A URI, CURIE or BNODE that represents a node in a model. """ + type_class_uri = SHEX.nonLiteral + type_class_curie = "shex:nonLiteral" + type_name = "nodeidentifier" + type_model_uri = URIRef("http://example.org/Nodeidentifier") + + +# Class references +class NamedThingId(extended_str): + pass + + +class PersonId(NamedThingId): + pass + + +class OrganisationId(NamedThingId): + pass + + +class NonProfitId(OrganisationId): + pass + + +class ForProfitId(OrganisationId): + pass + + +@dataclass +class NamedThing(YAMLRoot): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = URIRef("http://example.org/NamedThing") + class_class_curie: ClassVar[str] = None + class_name: ClassVar[str] = "NamedThing" + class_model_curie: ClassVar[str] = "None" + class_model_uri: ClassVar[URIRef] = URIRef("http://example.org/NamedThing") + + id: Union[str, NamedThingId] = None + full_name: Optional[str] = None + thingtype: URIorCURIE = dataclasses.field(init=False) + + @property + def thingtype(self) -> URIorCURIE: + return URIorCURIE(self.class_class_uri) + + @thingtype.setter + def thingtype(self, v: Any) -> None: + pass + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.id): + self.MissingRequiredField("id") + if not isinstance(self.id, NamedThingId): + self.id = NamedThingId(self.id) + + if self.full_name is not None and not isinstance(self.full_name, str): + self.full_name = str(self.full_name) + + super().__post_init__(**kwargs) + + + + def __new__(cls, *args, **kwargs): + uri = kwargs.pop("thingtype", cls.class_class_uri) + target_cls = YAMLRoot._class_for_uri(uri) + if target_cls is None: + # Should this be a warning instead? If so, what do we do with the new URI? + raise ValueError( + f"Wrong type designator value: class NamedThing has no subclass with class_class_uri='{uri}'") + return super().__new__(target_cls, *args, **kwargs) + + + +@dataclass +class Person(NamedThing): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = URIRef("http://testbreaker/not-the-uri-you-expect") + class_class_curie: ClassVar[str] = None + class_name: ClassVar[str] = "Person" + class_model_curie: ClassVar[str] = "None" + class_model_uri: ClassVar[URIRef] = URIRef("http://example.org/Person") + + id: Union[str, PersonId] = None + height: Optional[int] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.id): + self.MissingRequiredField("id") + if not isinstance(self.id, PersonId): + self.id = PersonId(self.id) + + if self.height is not None and not isinstance(self.height, int): + self.height = int(self.height) + + super().__post_init__(**kwargs) + + + +@dataclass +class Organisation(NamedThing): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = URIRef("http://example.org/Organisation") + class_class_curie: ClassVar[str] = None + class_name: ClassVar[str] = "Organisation" + class_model_curie: ClassVar[str] = "None" + class_model_uri: ClassVar[URIRef] = URIRef("http://example.org/Organisation") + + id: Union[str, OrganisationId] = None + number_of_employees: Optional[int] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.id): + self.MissingRequiredField("id") + if not isinstance(self.id, OrganisationId): + self.id = OrganisationId(self.id) + + if self.number_of_employees is not None and not isinstance(self.number_of_employees, int): + self.number_of_employees = int(self.number_of_employees) + + super().__post_init__(**kwargs) + + + +@dataclass +class NonProfit(Organisation): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = URIRef("http://example.org/NonProfit") + class_class_curie: ClassVar[str] = None + class_name: ClassVar[str] = "NonProfit" + class_model_curie: ClassVar[str] = "None" + class_model_uri: ClassVar[URIRef] = URIRef("http://example.org/NonProfit") + + id: Union[str, NonProfitId] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.id): + self.MissingRequiredField("id") + if not isinstance(self.id, NonProfitId): + self.id = NonProfitId(self.id) + + super().__post_init__(**kwargs) + + + +@dataclass +class ForProfit(Organisation): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = URIRef("http://example.org/ForProfit") + class_class_curie: ClassVar[str] = None + class_name: ClassVar[str] = "ForProfit" + class_model_curie: ClassVar[str] = "None" + class_model_uri: ClassVar[URIRef] = URIRef("http://example.org/ForProfit") + + id: Union[str, ForProfitId] = None + target_profit_margin: Optional[float] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.id): + self.MissingRequiredField("id") + if not isinstance(self.id, ForProfitId): + self.id = ForProfitId(self.id) + + if self.target_profit_margin is not None and not isinstance(self.target_profit_margin, float): + self.target_profit_margin = float(self.target_profit_margin) + + super().__post_init__(**kwargs) + + + +@dataclass +class Container(YAMLRoot): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = URIRef("http://example.org/Container") + class_class_curie: ClassVar[str] = None + class_name: ClassVar[str] = "Container" + class_model_curie: ClassVar[str] = "None" + class_model_uri: ClassVar[URIRef] = URIRef("http://example.org/Container") + + things: Optional[Union[Dict[Union[str, NamedThingId], Union[dict, NamedThing]], List[Union[dict, NamedThing]]]] = empty_dict() + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + self._normalize_inlined_as_list(slot_name="things", slot_type=NamedThing, key_name="id", keyed=True) + + super().__post_init__(**kwargs) + + + +@dataclass +class ContainerWithOneSibling(YAMLRoot): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = URIRef("http://example.org/ContainerWithOneSibling") + class_class_curie: ClassVar[str] = None + class_name: ClassVar[str] = "ContainerWithOneSibling" + class_model_curie: ClassVar[str] = "None" + class_model_uri: ClassVar[URIRef] = URIRef("http://example.org/ContainerWithOneSibling") + + persons: Optional[Union[Dict[Union[str, PersonId], Union[dict, Person]], List[Union[dict, Person]]]] = empty_dict() + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + self._normalize_inlined_as_list(slot_name="persons", slot_type=Person, key_name="id", keyed=True) + + super().__post_init__(**kwargs) + + + +# Enumerations + + +# Slots diff --git a/tests/test_utils/test_curienamespace.py b/tests/test_utils/test_curienamespace.py index 3acec7ce..df128c4a 100644 --- a/tests/test_utils/test_curienamespace.py +++ b/tests/test_utils/test_curienamespace.py @@ -1,8 +1,77 @@ import unittest +from contextlib import redirect_stderr +from io import StringIO from rdflib import URIRef from linkml_runtime.utils.curienamespace import CurieNamespace +from tests.support.test_environment import TestEnvironmentTestCase + +model = schema_str = """ +id: http://example.org +name: inline-dict-test +imports: + - https://w3id.org/linkml/types +# prefixes: +# x: http://example.org/ +# default_prefix: x +default_range: string +description: test + +classes: + NamedThing: + slots: + - id + - full_name + - thingtype + Person: + is_a: NamedThing + class_uri: "http://testbreaker/not-the-uri-you-expect" + slots: + - height + Organisation: + is_a: NamedThing + slots: + - number_of_employees + NonProfit: + is_a: Organisation + ForProfit: + is_a: Organisation + slots: + - target_profit_margin + Container: + tree_root: true + slots: + - things + ContainerWithOneSibling: + slots: + - persons +slots: + id: + identifier: true + range: string + required: true + thingtype: + designates_type: true + range: uriorcurie + full_name: + range: string + target_profit_margin: + range: float + height: + range: integer + number_of_employees: + range: integer + things: + range: NamedThing + multivalued: true + inlined_as_list: true + persons: + range: Person + multivalued: true + inlined_as_list: true +""" + class CurieNamespaceTestCase(unittest.TestCase): @@ -19,5 +88,52 @@ def test_curie_as_curie(self): BFO = CurieNamespace('bfo', "http://purl.obolibrary.org/obo/BFO_") self.assertEqual("bfo:curie", BFO.curie) + def test_curie_catalog(self): + """ Test the CurieNamespace curie to uri and uri to curi conversions""" + from tests.test_utils.input.CurieNamespace_test import Person + # Make sure the import doesn't get factored out + Person(id="Fred") + + # Test bidirectional conversion + self.assertEqual('rdf:chaos', + CurieNamespace.to_curie('http://www.w3.org/1999/02/22-rdf-syntax-ns#chaos')) + self.assertEqual(URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#penguins'), + CurieNamespace.to_uri('rdf:penguins')) + + # Test missing URI and CURIE + self.assertIsNone(CurieNamespace.to_curie('http://nothing.org/never')) + self.assertIsNone(CurieNamespace.to_uri('abcd:efgh')) + + # Test the default namespace + self.assertEqual('http://example.org/ttfn', str(CurieNamespace.to_uri(':ttfn'))) + self.assertEqual(':foul_soap', str(CurieNamespace.to_curie('http://example.org/foul_soap'))) + + # Make sure we pick the longest path + self.assertEqual(':inst#probe', CurieNamespace.to_curie(URIRef('http://example.org/inst#probe'))) + CurieNamespace('long_ex', 'http://example.org/inst#') + self.assertEqual('long_ex:probe', CurieNamespace.to_curie(URIRef('http://example.org/inst#probe'))) + + # Test incremental add + CurieNamespace('tester', URIRef('http://fester.bester/tester#')) + self.assertEqual('tester:hip_boots', CurieNamespace.to_curie('http://fester.bester/tester#hip_boots')) + + # Test multiple prefixes for same suffix + CurieNamespace('ns17', URIRef('http://fester.bester/tester#')) + self.assertEqual('tester:hip_boots', CurieNamespace.to_curie('http://fester.bester/tester#hip_boots')) + self.assertEqual('http://fester.bester/tester#hip_boots', str(CurieNamespace.to_uri('tester:hip_boots'))) + self.assertEqual('http://fester.bester/tester#hip_boots', str(CurieNamespace.to_uri('ns17:hip_boots'))) + + # Test multiple uris for same prefix + # The following should be benign + CurieNamespace('tester', URIRef('http://fester.bester/tester#')) + + # Issue warnings for now on this + # TODO: test that we log the following + # 'Prefix: tester already references http://fester.bester/tester# - + # not updated to http://fester.notsogood/tester#' + CurieNamespace('tester', URIRef('http://fester.notsogood/tester#')) + + + if __name__ == '__main__': unittest.main() From f56629a291ae93adcb4287215d828e25893fb39e Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Wed, 1 Mar 2023 17:13:20 +0100 Subject: [PATCH 2/3] version of PR !244 that uses curies --- linkml_runtime/utils/curienamespace.py | 91 ++++++++++++++----- pyproject.toml | 2 +- tests/test_utils/input/CurieNamespace_test.py | 4 +- tests/test_utils/test_curienamespace.py | 37 ++++---- 4 files changed, 88 insertions(+), 46 deletions(-) diff --git a/linkml_runtime/utils/curienamespace.py b/linkml_runtime/utils/curienamespace.py index 066825d1..04a6c415 100644 --- a/linkml_runtime/utils/curienamespace.py +++ b/linkml_runtime/utils/curienamespace.py @@ -1,40 +1,81 @@ from logging import warning -from typing import Optional, Union, Dict +from typing import Optional, Union, Dict, List from rdflib import Namespace, URIRef +from curies import Converter, Record + +class CurieNamespaceCatalog(object): + catalog: Dict[str, "CurieNamespace"] + def __init__(self) -> None: + self.namespaces = [] + self._converter: Optional[Converter] = None + + @property + def converter(self): + if not self._converter: + self._converter = self._buildConverter() + return self._converter + + def _buildConverter(self): + records = [] + namespaces_to_treat = self.namespaces[:] + while len(namespaces_to_treat) > 0: + ns = namespaces_to_treat.pop(0) + prefix = ns.prefix + uri = str(ns) + all_prefixes = [prefix] + all_uris = [uri] + iteration_needed = True + while iteration_needed: + iteration_needed = False + for possible_synonym in namespaces_to_treat[:]: + if possible_synonym.prefix in all_prefixes: + all_uris.append(str(possible_synonym)) + namespaces_to_treat.remove(possible_synonym) + iteration_needed = True + if str(possible_synonym) in all_uris: + all_prefixes.append(possible_synonym.prefix) + namespaces_to_treat.remove(possible_synonym) + iteration_needed = True + records.append(Record(prefix, uri , [x for x in all_prefixes if not x == prefix], [x for x in all_uris if not x == uri])) + return Converter(records=records) -class CurieNamespace(Namespace): - # We would prefer to use curies.Converter here, but there doesn't appear to be any way to build it incrementally - catalog: Dict[str, "CurieNamespace"] = dict() - @classmethod - def to_curie(cls, uri: Union[str, URIRef]) -> str: - uri = str(uri) - candidate_ns = "" - for prefix, ns in cls.catalog.items(): - if uri.startswith(ns) and len(ns) > len(candidate_ns): - candidate_ns = ns - if candidate_ns: - return candidate_ns.curie(uri[len(candidate_ns):]) - return None + def to_curie(self, uri: Union[str, URIRef]) -> str: + return self.converter.compress(uri) + + def to_uri(self, curie: str) -> Optional[URIRef]: + expanded = self.converter.expand(curie) + return None if expanded is None else URIRef(expanded) + + def add_namespace(self,ns: "CurieNamespace"): + self.namespaces.append(ns) + self._converter = None + @classmethod - def to_uri(cls, curie: str) -> Optional[URIRef]: - prefix, localname = curie.split(':', 1) - ns = CurieNamespace.catalog.get(prefix, None) - return ns[localname] if ns else None + def create(cls, *namespaces: List["CurieNamespace"]): + cat = CurieNamespaceCatalog() + [cat.add_namespace(x) for x in namespaces] + return cat + + def clear(self): + self.catalog = dict() + + def as_dict(self): + return self.catalog.copy() + - def __new__(cls, prefix: str, ns: Union[str, bytes, URIRef]) -> "CurieNamespace": +class CurieNamespace(Namespace): + def __new__(cls, prefix: str, ns: Union[str, URIRef]): rt = Namespace.__new__(cls, str(ns) if not isinstance(ns, bytes) else ns) rt.prefix = prefix - if prefix in CurieNamespace.catalog: - if CurieNamespace.catalog[prefix] != str(rt): - # prefix is bound to a different namespace - warning(f"Prefix: {prefix} already references {CurieNamespace.catalog[prefix]} - not updated to {rt}") - else: - CurieNamespace.catalog[prefix] = rt return rt def curie(self, reference: Optional[str] = '') -> str: return self.prefix + ':' + reference + + def addTo(self, catalog: CurieNamespaceCatalog) -> "CurieNamespace": + catalog.add_namespace(self) + return self diff --git a/pyproject.toml b/pyproject.toml index 8514ae21..c3b7bde3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ pyyaml = "*" rdflib = ">=6.0.0" requests = "*" prefixmaps = ">=0.1.4" -curies = "^0.4.0" +curies = "^0.4.3" [tool.poetry.dev-dependencies] coverage = "^6.2" diff --git a/tests/test_utils/input/CurieNamespace_test.py b/tests/test_utils/input/CurieNamespace_test.py index ec2c569d..8df2bd65 100644 --- a/tests/test_utils/input/CurieNamespace_test.py +++ b/tests/test_utils/input/CurieNamespace_test.py @@ -23,7 +23,7 @@ from linkml_runtime.utils.formatutils import camelcase, underscore, sfx from linkml_runtime.utils.enumerations import EnumDefinitionImpl from rdflib import Namespace, URIRef, DC -from linkml_runtime.utils.curienamespace import CurieNamespace +from linkml_runtime.utils.curienamespace import CurieNamespace, CurieNamespaceCatalog from linkml_runtime.utils.metamodelcore import Bool, Decimal, ElementIdentifier, NCName, NodeIdentifier, URI, URIorCURIE, XSDDate, XSDDateTime, XSDTime metamodel_version = "1.7.0" @@ -37,7 +37,7 @@ SHEX = CurieNamespace('shex', 'http://www.w3.org/ns/shex#') XSD = CurieNamespace('xsd', 'http://www.w3.org/2001/XMLSchema#') DEFAULT_ = CurieNamespace('', 'http://example.org/') - +namespaceCatalog = CurieNamespaceCatalog.create(LINKML, SHEX, XSD, DEFAULT_) # Types class String(str): diff --git a/tests/test_utils/test_curienamespace.py b/tests/test_utils/test_curienamespace.py index df128c4a..cc5f9ceb 100644 --- a/tests/test_utils/test_curienamespace.py +++ b/tests/test_utils/test_curienamespace.py @@ -90,48 +90,49 @@ def test_curie_as_curie(self): def test_curie_catalog(self): """ Test the CurieNamespace curie to uri and uri to curi conversions""" - from tests.test_utils.input.CurieNamespace_test import Person + from tests.test_utils.input.CurieNamespace_test import Person, namespaceCatalog # Make sure the import doesn't get factored out Person(id="Fred") # Test bidirectional conversion + CurieNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#").addTo(namespaceCatalog) self.assertEqual('rdf:chaos', - CurieNamespace.to_curie('http://www.w3.org/1999/02/22-rdf-syntax-ns#chaos')) + namespaceCatalog.to_curie('http://www.w3.org/1999/02/22-rdf-syntax-ns#chaos')) self.assertEqual(URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#penguins'), - CurieNamespace.to_uri('rdf:penguins')) + namespaceCatalog.to_uri('rdf:penguins')) # Test missing URI and CURIE - self.assertIsNone(CurieNamespace.to_curie('http://nothing.org/never')) - self.assertIsNone(CurieNamespace.to_uri('abcd:efgh')) + self.assertIsNone(namespaceCatalog.to_curie('http://nothing.org/never')) + self.assertIsNone(namespaceCatalog.to_uri('abcd:efgh')) # Test the default namespace - self.assertEqual('http://example.org/ttfn', str(CurieNamespace.to_uri(':ttfn'))) - self.assertEqual(':foul_soap', str(CurieNamespace.to_curie('http://example.org/foul_soap'))) + self.assertEqual('http://example.org/ttfn', str(namespaceCatalog.to_uri(':ttfn'))) + self.assertEqual(':foul_soap', str(namespaceCatalog.to_curie('http://example.org/foul_soap'))) # Make sure we pick the longest path - self.assertEqual(':inst#probe', CurieNamespace.to_curie(URIRef('http://example.org/inst#probe'))) - CurieNamespace('long_ex', 'http://example.org/inst#') - self.assertEqual('long_ex:probe', CurieNamespace.to_curie(URIRef('http://example.org/inst#probe'))) + self.assertEqual(':inst#probe', namespaceCatalog.to_curie(URIRef('http://example.org/inst#probe'))) + CurieNamespace('long_ex', 'http://example.org/inst#').addTo(namespaceCatalog) + self.assertEqual('long_ex:probe', namespaceCatalog.to_curie(URIRef('http://example.org/inst#probe'))) # Test incremental add - CurieNamespace('tester', URIRef('http://fester.bester/tester#')) - self.assertEqual('tester:hip_boots', CurieNamespace.to_curie('http://fester.bester/tester#hip_boots')) + CurieNamespace('tester', URIRef('http://fester.bester/tester#')).addTo(namespaceCatalog) + self.assertEqual('tester:hip_boots', namespaceCatalog.to_curie('http://fester.bester/tester#hip_boots')) # Test multiple prefixes for same suffix - CurieNamespace('ns17', URIRef('http://fester.bester/tester#')) - self.assertEqual('tester:hip_boots', CurieNamespace.to_curie('http://fester.bester/tester#hip_boots')) - self.assertEqual('http://fester.bester/tester#hip_boots', str(CurieNamespace.to_uri('tester:hip_boots'))) - self.assertEqual('http://fester.bester/tester#hip_boots', str(CurieNamespace.to_uri('ns17:hip_boots'))) + CurieNamespace('ns17', URIRef('http://fester.bester/tester#')).addTo(namespaceCatalog) + self.assertEqual('tester:hip_boots', namespaceCatalog.to_curie('http://fester.bester/tester#hip_boots')) + self.assertEqual('http://fester.bester/tester#hip_boots', str(namespaceCatalog.to_uri('tester:hip_boots'))) + self.assertEqual('http://fester.bester/tester#hip_boots', str(namespaceCatalog.to_uri('ns17:hip_boots'))) # Test multiple uris for same prefix # The following should be benign - CurieNamespace('tester', URIRef('http://fester.bester/tester#')) + CurieNamespace('tester', URIRef('http://fester.bester/tester#')).addTo(namespaceCatalog) # Issue warnings for now on this # TODO: test that we log the following # 'Prefix: tester already references http://fester.bester/tester# - # not updated to http://fester.notsogood/tester#' - CurieNamespace('tester', URIRef('http://fester.notsogood/tester#')) + CurieNamespace('tester', URIRef('http://fester.notsogood/tester#')).addTo(namespaceCatalog) From 814b03eaf5d61b7e2e66c30ababfb8924772a92b Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Thu, 23 Mar 2023 12:06:48 +0100 Subject: [PATCH 3/3] cleanup and add documentation --- linkml_runtime/utils/curienamespace.py | 42 +++++++++++++++++++++----- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/linkml_runtime/utils/curienamespace.py b/linkml_runtime/utils/curienamespace.py index 04a6c415..25feb9ad 100644 --- a/linkml_runtime/utils/curienamespace.py +++ b/linkml_runtime/utils/curienamespace.py @@ -5,13 +5,21 @@ from curies import Converter, Record class CurieNamespaceCatalog(object): - catalog: Dict[str, "CurieNamespace"] + """ + A CurieNamespaceCatalog is a catalog of CurieNamespace objects + its main purpose is to convert between uri's and curies for the namespaces in the catalog + """ def __init__(self) -> None: self.namespaces = [] self._converter: Optional[Converter] = None @property def converter(self): + """ + return a curies.Converter that knows all namespaces. + When multiple namespaces have the same prefix, they are added as uri synonyms to the converter. + When there are two prefixes leading to the same uri, they are added as prefix synonyms to the converter. + """ if not self._converter: self._converter = self._buildConverter() return self._converter @@ -43,28 +51,48 @@ def _buildConverter(self): - def to_curie(self, uri: Union[str, URIRef]) -> str: + def to_curie(self, uri: Union[str, URIRef]) -> Optional[str]: + """ + Compress a URI to a CURIE, if possible. + + :param uri: + A string representing a valid uniform resource identifier (URI) + :returns: + A compact URI if this converter could find an appropriate URI prefix, otherwise None. + + """ + if isinstance(uri, URIRef): + uri = str(uri) return self.converter.compress(uri) def to_uri(self, curie: str) -> Optional[URIRef]: + """ + Expand a CURIE to a URI, if possible. + + :param curie: + A string representing a compact URI + :returns: + A URIRef if this converter contains a URI prefix for the prefix in this CURIE, otherwise None + """ expanded = self.converter.expand(curie) return None if expanded is None else URIRef(expanded) def add_namespace(self,ns: "CurieNamespace"): + """ + Adds a new namespace to the catalog. + """ self.namespaces.append(ns) self._converter = None @classmethod def create(cls, *namespaces: List["CurieNamespace"]): + """ + creates a new catalog from the given namespaces + """ cat = CurieNamespaceCatalog() [cat.add_namespace(x) for x in namespaces] return cat - def clear(self): - self.catalog = dict() - - def as_dict(self): - return self.catalog.copy() class CurieNamespace(Namespace):