From 7ba4e55c9d886a3c3eb1efe15095d1a64b414ad4 Mon Sep 17 00:00:00 2001 From: Anatoly Scherbakov Date: Wed, 18 Oct 2023 19:34:56 +0400 Subject: [PATCH 1/8] Add submodules --- .gitmodules | 11 +++++++++++ specifications/json-ld-api | 1 + specifications/json-ld-framing | 1 + specifications/rdf-canon | 1 + 4 files changed, 14 insertions(+) create mode 100644 .gitmodules create mode 160000 specifications/json-ld-api create mode 160000 specifications/json-ld-framing create mode 160000 specifications/rdf-canon diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..ea90020 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,11 @@ +[submodule "specifications/json-ld-api"] + path = specifications/json-ld-api + url = git@github.com:w3c/json-ld-api.git + +[submodule "specifications/json-ld-framing"] + path = specifications/json-ld-framing + url = git@github.com:w3c/json-ld-framing.git + +[submodule "specifications/rdf-canon"] + path = specifications/rdf-canon + url = git@github.com:w3c/rdf-canon.git diff --git a/specifications/json-ld-api b/specifications/json-ld-api new file mode 160000 index 0000000..6bf9ef4 --- /dev/null +++ b/specifications/json-ld-api @@ -0,0 +1 @@ +Subproject commit 6bf9ef4e2135c0146a8120b9732f67809503bb9c diff --git a/specifications/json-ld-framing b/specifications/json-ld-framing new file mode 160000 index 0000000..c01b175 --- /dev/null +++ b/specifications/json-ld-framing @@ -0,0 +1 @@ +Subproject commit c01b17540361040f2ee1f990aff138b5e81bbf5d diff --git a/specifications/rdf-canon b/specifications/rdf-canon new file mode 160000 index 0000000..0503fac --- /dev/null +++ b/specifications/rdf-canon @@ -0,0 +1 @@ +Subproject commit 0503facfaa0825686afc1f533f487816de54d9b7 From ac97d27b32a1db443c1bbe695c373142ce14fdee Mon Sep 17 00:00:00 2001 From: Anatoly Scherbakov Date: Wed, 18 Oct 2023 19:35:07 +0400 Subject: [PATCH 2/8] Update `.gitignore` --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 022ac42..665eab2 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,9 @@ lib/PyLD.egg-info profiler tests/test_caching.py tests/data/test_caching.json + +# Local Python version with `pyenv` +.python-version + +# PyCharm & other JetBrains IDEs +.idea From c84a9fcd5122e5355a60e1fab62dc44be129f470 Mon Sep 17 00:00:00 2001 From: Anatoly Scherbakov Date: Sun, 12 Nov 2023 18:21:37 +0200 Subject: [PATCH 3/8] Use spec tests from git modules as default test targets --- tests/runtests.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/runtests.py b/tests/runtests.py index 08ed0d3..1e45803 100644 --- a/tests/runtests.py +++ b/tests/runtests.py @@ -17,6 +17,7 @@ import unittest import re from argparse import ArgumentParser +from pathlib import Path from unittest import TextTestResult sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib')) @@ -32,9 +33,20 @@ LOCAL_BASES = [ 'https://w3c.github.io/json-ld-api/tests', 'https://w3c.github.io/json-ld-framing/tests', - 'https://github.com/json-ld/normalization/tests' + 'https://w3c.github.io/rdf-canon/tests/', ] + +def default_test_targets() -> list[Path]: + """Default test directories from specifications.""" + specifications = Path(__file__).parent.parent / 'specifications' + return [ + specifications / 'json-ld-api/tests', + specifications / 'json-ld-framing/tests', + specifications / 'rdf-canon/tests', + ] + + class TestRunner(unittest.TextTestRunner): """ Loads test manifests and runs tests. @@ -95,18 +107,7 @@ def main(self): test_targets = self.options.tests else: # default to find known sibling test dirs - test_targets = [] - sibling_dirs = [ - '../json-ld-api/tests/', - '../json-ld-framing/tests/', - '../normalization/tests/', - ] - for dir in sibling_dirs: - if os.path.exists(dir): - print('Test dir found', dir) - test_targets.append(dir) - else: - print('Test dir not found', dir) + test_targets = default_test_targets() # ensure a manifest or a directory was specified if len(test_targets) == 0: From c39ca9e59e98384dbc7a427837be34ad7e6256ad Mon Sep 17 00:00:00 2001 From: Anatoly Scherbakov Date: Sun, 12 Nov 2023 18:24:26 +0200 Subject: [PATCH 4/8] =?UTF-8?q?`list[=E2=80=A6]`=20=E2=86=92=20`List[?= =?UTF-8?q?=E2=80=A6]`=20because=20this=20form=20is=20for=20Python=20?= =?UTF-8?q?=E2=A9=BE=203.10=20only?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/runtests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/runtests.py b/tests/runtests.py index 1e45803..e618513 100644 --- a/tests/runtests.py +++ b/tests/runtests.py @@ -18,6 +18,7 @@ import re from argparse import ArgumentParser from pathlib import Path +from typing import List from unittest import TextTestResult sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib')) @@ -37,7 +38,7 @@ ] -def default_test_targets() -> list[Path]: +def default_test_targets() -> List[Path]: """Default test directories from specifications.""" specifications = Path(__file__).parent.parent / 'specifications' return [ From 3ebcff9d2638de2f677745b7ed0454a7e9371b9b Mon Sep 17 00:00:00 2001 From: Anatoly Scherbakov Date: Sun, 12 Nov 2023 18:34:35 +0200 Subject: [PATCH 5/8] Note about Git submodules in `README.rst` --- README.rst | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index f1b1a50..8db9e53 100644 --- a/README.rst +++ b/README.rst @@ -190,16 +190,21 @@ Tests This library includes a sample testing utility which may be used to verify that changes to the processor maintain the correct output. -To run the sample tests you will need to get the test suite files by cloning -the ``json-ld-api``, ``json-ld-framing``, and ``normalization`` repositories -hosted on GitHub: +To run the sample tests you will need the test suite files provided in the ``json-ld-api``, +``json-ld-framing``, and ``rdf-canon`` repositories hosted on GitHub: - https://github.com/w3c/json-ld-api - https://github.com/w3c/json-ld-framing -- https://github.com/json-ld/normalization +- https://github.com/w3c/rdf-canon -If the suites repositories are available as sibling directories of the PyLD -source directory, then all the tests can be run with the following: +They are included beneath ``specifications`` directory of this repository as Git submodules. By default, ``git clone`` does +not retrieve submodules; to download them, please issue the following command: + +.. code-block:: bash + + git submodule update --init --recursive + +If the suites repositories are available then all the tests can be run with the following: .. code-block:: bash From 43ed457fca861637da7a78c45102a7a7df9d29a7 Mon Sep 17 00:00:00 2001 From: Anatoly Scherbakov Date: Sun, 12 Nov 2023 23:45:53 +0200 Subject: [PATCH 6/8] Document addition of `on_key_dropped` argument and its behavior --- docs/decisions/on-key-dropped-argument.md | 30 ++++++++++ docs/decisions/on-key-dropped.md | 69 +++++++++++++++++++++++ docs/decisions/resolution.yaml | 5 ++ 3 files changed, 104 insertions(+) create mode 100644 docs/decisions/on-key-dropped-argument.md create mode 100644 docs/decisions/on-key-dropped.md create mode 100644 docs/decisions/resolution.yaml diff --git a/docs/decisions/on-key-dropped-argument.md b/docs/decisions/on-key-dropped-argument.md new file mode 100644 index 0000000..3c73fa0 --- /dev/null +++ b/docs/decisions/on-key-dropped-argument.md @@ -0,0 +1,30 @@ +--- +$id: on-key-dropped-argument +title: Pass on_key_dropped as a named argument to expand() +date: 2023-11-12 +author: anatoly-scherbakov +issue: 50 +adr:is-blocked-by: on-key-dropped-handler +--- + +# Pass `on_key_dropped` as a named argument to `expand()` + +## Context + +We need to pass the value of `on_key_dropped` handler to `jsonld.expand()` somehow. + +### :x: Use `options` dictionary + +That dictionary contradicts Python conventions. + +### :heavy_check_mark: Add a named argument + +That's what a Python developer would expect, in most cases. + +## Decision + +Add as a named argument. + +## Consequences + +Improve developer experience, even though it is a bit inconsistent. diff --git a/docs/decisions/on-key-dropped.md b/docs/decisions/on-key-dropped.md new file mode 100644 index 0000000..fd1f170 --- /dev/null +++ b/docs/decisions/on-key-dropped.md @@ -0,0 +1,69 @@ +--- +$id: on-key-dropped-handler +title: Call a customizable handler when a key is ignored during JSON-LD expansion +author: anatoly-scherbakov +date: 2023-11-12 +issue: 50 +--- + +# Call a customizable handler when a key is ignored during JSON-LD expansion + +## Context + +If a key in a JSON-LD document does not map to an absolute IRI then it is ignored. This situation might be valuable to debugging, and silent ignoring is not too good. + +Essentially, we need to provide the developer with means to react to each of ignored keys. How? + +### Use cases + +* Simplify debugging of JSON-LD files, +* Alert about ignored keys in IDEs/editors/linters, +* … + +### :x: Raise an `Exception` + +```mermaid +graph LR + subgraph any ["JSON-LD documents in the wild"] + arbitrary("Extra key
in a JSON-LD document") --implies--> invalid("The document
is now invalid") + end + + raise{"Raise an Exception"} --> each("On each ignored key") --> any --> + impractical("That is
impractical") --> failure("Failure") +``` + +### :x: Just log every key + +Does not support Alert about ignored keys in IDEs/editors/linters use case. + +### :x: Export the set of ignored keys as part of `expand()` return value + +```mermaid +graph LR + when{"When to export?"} --always--> waste("Waste RAM") --> failure("Failure") + when --"only when requested"-->typing("Change return value type
based on imputs") --contradicting--> typing-system("Python typing system") --> failure +``` + +### :x: Export the set of ignored keys in a mutable argument to `expand()` + +The author of this document + +* believes this approach contradicts Python conventions and practice, +* does not know of any popular Python libraries using such an approach, +* is certain that developers will not praise this API. + +### :heavy_check_mark: Call a handler on each ignored key + +* This will enable the developer to process each ignored key as they see fit, +* is a common practice (see `map` function, for instance). + +Let's call the handler `on_key_ignored`. + + +## Decision + +Pass a `callable` named `on_key_ignored` to `jsonld.expand(…)`. + +## Consequences + +Simplify debugging and permit custom handling of ignored keys in application code. diff --git a/docs/decisions/resolution.yaml b/docs/decisions/resolution.yaml new file mode 100644 index 0000000..ccbad60 --- /dev/null +++ b/docs/decisions/resolution.yaml @@ -0,0 +1,5 @@ +adr:resolution: + - decision: on-key-dropped-argument + by: anatoly-scherbakov + date: 2023-11-12 + status: accepted From 88530147ef9ecf742eb217b5bfe833781ccf4c46 Mon Sep 17 00:00:00 2001 From: Anatoly Scherbakov Date: Sun, 12 Nov 2023 23:46:11 +0200 Subject: [PATCH 7/8] Implement `on_key_dropped` argument --- lib/pyld/jsonld.py | 70 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index 49d6212..5d31e1e 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -14,6 +14,7 @@ .. moduleauthor:: Gregg Kellogg """ +import logging import copy import hashlib import json @@ -22,6 +23,8 @@ import traceback import warnings import uuid +from typing import Optional, Callable + from .context_resolver import ContextResolver from c14n.Canonicalize import canonicalize from cachetools import LRUCache @@ -32,6 +35,8 @@ from frozendict import frozendict from pyld.__about__ import (__copyright__, __license__, __version__) +logger = logging.getLogger('pyld.jsonld') + def cmp(a, b): return (a > b) - (a < b) @@ -117,6 +122,19 @@ def cmp(a, b): # Initial contexts, defined on first access INITIAL_CONTEXTS = {} + +# Handler to call if a key was dropped during expansion +OnKeyDropped = Callable[[Optional[str]], ...] + + +def log_on_key_dropped(key: Optional[str]): + """Default behavior on ignored JSON-LD keys is to log them.""" + logger.debug( + 'Key `%s` was not mapped to an absolute IRI and was ignored.', + key, + ) + + def compact(input_, ctx, options=None): """ Performs JSON-LD compaction. @@ -142,7 +160,11 @@ def compact(input_, ctx, options=None): return JsonLdProcessor().compact(input_, ctx, options) -def expand(input_, options=None): +def expand( + input_, + options=None, + on_key_dropped: OnKeyDropped = log_on_key_dropped, +): """ Performs JSON-LD expansion. @@ -157,10 +179,17 @@ def expand(input_, options=None): defaults to 'json-ld-1.1'. [documentLoader(url, options)] the document loader (default: _default_document_loader). + :param [on_key_dropped] Callable to invoke for every JSON-LD key that was + ignored. :return: the expanded JSON-LD output. """ - return JsonLdProcessor().expand(input_, options) + return JsonLdProcessor( + on_key_dropped=on_key_dropped, + ).expand( + input_=input_, + options=options, + ) def flatten(input_, ctx=None, options=None): @@ -645,18 +674,23 @@ def unparse_url(parsed): return rval -class JsonLdProcessor(object): +class JsonLdProcessor: """ A JSON-LD processor. """ - def __init__(self): + def __init__(self, on_key_dropped: OnKeyDropped = log_on_key_dropped): """ Initialize the JSON-LD processor. + + :param [on_key_dropped] Callable to invoke for every JSON-LD key that + was ignored. """ # processor-specific RDF parsers self.rdf_parsers = None + self.on_key_dropped = on_key_dropped + def compact(self, input_, ctx, options): """ Performs JSON-LD compaction. @@ -2191,10 +2225,15 @@ def _compact(self, active_ctx, active_property, element, options): return element def _expand( - self, active_ctx, active_property, element, options, - inside_list=False, - inside_index=False, - type_scoped_ctx=None): + self, + active_ctx, + active_property, + element, + options, + inside_list=False, + inside_index=False, + type_scoped_ctx=None, + ): """ Recursively expands an element using the given context. Any context in the element will be removed. All context URLs must have been retrieved @@ -2234,7 +2273,8 @@ def _expand( active_ctx, active_property, e, options, inside_list=inside_list, inside_index=inside_index, - type_scoped_ctx=type_scoped_ctx) + type_scoped_ctx=type_scoped_ctx, + ) if inside_list and _is_array(e): e = {'@list': e} # drop None values @@ -2460,10 +2500,11 @@ def _expand_object( active_ctx, key, vocab=True) # drop non-absolute IRI keys that aren't keywords - if (expanded_property is None or - not ( - _is_absolute_iri(expanded_property) or - _is_keyword(expanded_property))): + if expanded_property is None or ( + not _is_absolute_iri(expanded_property) + and not _is_keyword(expanded_property) + ): + self.on_key_dropped(expanded_property) continue if _is_keyword(expanded_property): @@ -3411,7 +3452,8 @@ def _expand_index_map(self, active_ctx, active_property, value, index_key, as_gr JsonLdProcessor.arrayify(v), options, inside_list=False, - inside_index=True) + inside_index=True, + ) expanded_key = None if property_index: From de462b0595e850bc77830a2c24f41e4995a47845 Mon Sep 17 00:00:00 2001 From: Anatoly Scherbakov Date: Sun, 12 Nov 2023 23:46:49 +0200 Subject: [PATCH 8/8] Adapt tests by @pchampin to cover dropped keys handler --- tests/expand/on_key_dropped.py | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 tests/expand/on_key_dropped.py diff --git a/tests/expand/on_key_dropped.py b/tests/expand/on_key_dropped.py new file mode 100644 index 0000000..554770a --- /dev/null +++ b/tests/expand/on_key_dropped.py @@ -0,0 +1,83 @@ +import unittest +from typing import Any + +import pyld.jsonld as jsonld + + +def raise_this(value: Any): + raise ValueError(value) + + +class TestOnKeyDropped(unittest.TestCase): + """ + Tests for on_key_dropped argument and logic in JSON-LD expand algorithm. + + Original implementation is © pchampin. + """ + + CTX = {"foo": {"@id": "http://example.com/foo"}} + DATA = {"fooo": "bar"} + RESULT = [] + + def test_silently_ignored(self): + got = jsonld.expand( + self.DATA, + {'expandContext': self.CTX}, + ) + self.assertEqual(got, self.RESULT) + + def test_strict_fails(self): + with self.assertRaises(ValueError): + jsonld.expand( + self.DATA, + {'expandContext': self.CTX}, + on_key_dropped=raise_this, + ) + + def test_dropped_keys(self): + dropped_keys = set() + got = jsonld.expand( + self.DATA, + {'expandContext': self.CTX}, + on_key_dropped=dropped_keys.add, + ) + self.assertEqual(got, self.RESULT) + self.assertSetEqual(dropped_keys, {"fooo"}) + + DATA2 = { + "@id": "foo", "foo": "bar", "fooo": "baz", + "http://example.com/other": "blah"} + RESULT2 = [{ + "@id": u"foo", + "http://example.com/foo": [{"@value": "bar"}], + "http://example.com/other": [{"@value": "blah"}], + }] + + def test_silently_ignored_2(self): + got = jsonld.expand( + self.DATA2, + {'expandContext': self.CTX}, + ) + self.assertEqual(got, self.RESULT2) + + def test_strict_fails_2(self): + with self.assertRaises(ValueError): + jsonld.expand( + self.DATA2, + {'expandContext': self.CTX}, + on_key_dropped=raise_this, + ) + + def test_dropped_keys_2(self): + dropped_keys = set() + got = jsonld.expand( + self.DATA2, + {'expandContext': self.CTX}, + on_key_dropped=dropped_keys.add, + ) + self.assertEqual(got, self.RESULT2) + self.assertSetEqual(dropped_keys, {"fooo"}) + + +if __name__ == "__main__": + unittest.main()