diff --git a/.gitignore b/.gitignore
index 022ac422..665eab28 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,9 @@ lib/PyLD.egg-info
profiler
tests/test_caching.py
tests/data/test_caching.json
+
+# Local Python version with `pyenv`
+.python-version
+
+# PyCharm & other JetBrains IDEs
+.idea
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..ea900208
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,11 @@
+[submodule "specifications/json-ld-api"]
+ path = specifications/json-ld-api
+ url = git@github.com:w3c/json-ld-api.git
+
+[submodule "specifications/json-ld-framing"]
+ path = specifications/json-ld-framing
+ url = git@github.com:w3c/json-ld-framing.git
+
+[submodule "specifications/rdf-canon"]
+ path = specifications/rdf-canon
+ url = git@github.com:w3c/rdf-canon.git
diff --git a/README.rst b/README.rst
index f1b1a50b..8db9e531 100644
--- a/README.rst
+++ b/README.rst
@@ -190,16 +190,21 @@ Tests
This library includes a sample testing utility which may be used to verify
that changes to the processor maintain the correct output.
-To run the sample tests you will need to get the test suite files by cloning
-the ``json-ld-api``, ``json-ld-framing``, and ``normalization`` repositories
-hosted on GitHub:
+To run the sample tests you will need the test suite files provided in the ``json-ld-api``,
+``json-ld-framing``, and ``rdf-canon`` repositories hosted on GitHub:
- https://github.com/w3c/json-ld-api
- https://github.com/w3c/json-ld-framing
-- https://github.com/json-ld/normalization
+- https://github.com/w3c/rdf-canon
-If the suites repositories are available as sibling directories of the PyLD
-source directory, then all the tests can be run with the following:
+They are included beneath ``specifications`` directory of this repository as Git submodules. By default, ``git clone`` does
+not retrieve submodules; to download them, please issue the following command:
+
+.. code-block:: bash
+
+ git submodule update --init --recursive
+
+If the suites repositories are available then all the tests can be run with the following:
.. code-block:: bash
diff --git a/docs/decisions/on-key-dropped-argument.md b/docs/decisions/on-key-dropped-argument.md
new file mode 100644
index 00000000..3c73fa01
--- /dev/null
+++ b/docs/decisions/on-key-dropped-argument.md
@@ -0,0 +1,30 @@
+---
+$id: on-key-dropped-argument
+title: Pass on_key_dropped as a named argument to expand()
+date: 2023-11-12
+author: anatoly-scherbakov
+issue: 50
+adr:is-blocked-by: on-key-dropped-handler
+---
+
+# Pass `on_key_dropped` as a named argument to `expand()`
+
+## Context
+
+We need to pass the value of `on_key_dropped` handler to `jsonld.expand()` somehow.
+
+### :x: Use `options` dictionary
+
+That dictionary contradicts Python conventions.
+
+### :heavy_check_mark: Add a named argument
+
+That's what a Python developer would expect, in most cases.
+
+## Decision
+
+Add as a named argument.
+
+## Consequences
+
+Improve developer experience, even though it is a bit inconsistent.
diff --git a/docs/decisions/on-key-dropped.md b/docs/decisions/on-key-dropped.md
new file mode 100644
index 00000000..fd1f1706
--- /dev/null
+++ b/docs/decisions/on-key-dropped.md
@@ -0,0 +1,69 @@
+---
+$id: on-key-dropped-handler
+title: Call a customizable handler when a key is ignored during JSON-LD expansion
+author: anatoly-scherbakov
+date: 2023-11-12
+issue: 50
+---
+
+# Call a customizable handler when a key is ignored during JSON-LD expansion
+
+## Context
+
+If a key in a JSON-LD document does not map to an absolute IRI then it is ignored. This situation might be valuable to debugging, and silent ignoring is not too good.
+
+Essentially, we need to provide the developer with means to react to each of ignored keys. How?
+
+### Use cases
+
+* Simplify debugging of JSON-LD files,
+* Alert about ignored keys in IDEs/editors/linters,
+* …
+
+### :x: Raise an `Exception`
+
+```mermaid
+graph LR
+ subgraph any ["JSON-LD documents in the wild"]
+ arbitrary("Extra key
in a JSON-LD document") --implies--> invalid("The document
is now invalid")
+ end
+
+ raise{"Raise an Exception
"} --> each("On each ignored key") --> any -->
+ impractical("That is
impractical") --> failure("Failure")
+```
+
+### :x: Just log every key
+
+Does not support Alert about ignored keys in IDEs/editors/linters use case.
+
+### :x: Export the set of ignored keys as part of `expand()` return value
+
+```mermaid
+graph LR
+ when{"When to export?"} --always--> waste("Waste RAM") --> failure("Failure")
+ when --"only when requested"-->typing("Change return value type
based on imputs") --contradicting--> typing-system("Python typing system") --> failure
+```
+
+### :x: Export the set of ignored keys in a mutable argument to `expand()`
+
+The author of this document
+
+* believes this approach contradicts Python conventions and practice,
+* does not know of any popular Python libraries using such an approach,
+* is certain that developers will not praise this API.
+
+### :heavy_check_mark: Call a handler on each ignored key
+
+* This will enable the developer to process each ignored key as they see fit,
+* is a common practice (see `map` function, for instance).
+
+Let's call the handler `on_key_ignored`.
+
+
+## Decision
+
+Pass a `callable` named `on_key_ignored` to `jsonld.expand(…)`.
+
+## Consequences
+
+Simplify debugging and permit custom handling of ignored keys in application code.
diff --git a/docs/decisions/resolution.yaml b/docs/decisions/resolution.yaml
new file mode 100644
index 00000000..ccbad600
--- /dev/null
+++ b/docs/decisions/resolution.yaml
@@ -0,0 +1,5 @@
+adr:resolution:
+ - decision: on-key-dropped-argument
+ by: anatoly-scherbakov
+ date: 2023-11-12
+ status: accepted
diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py
index 49d62122..5d31e1ed 100644
--- a/lib/pyld/jsonld.py
+++ b/lib/pyld/jsonld.py
@@ -14,6 +14,7 @@
.. moduleauthor:: Gregg Kellogg
"""
+import logging
import copy
import hashlib
import json
@@ -22,6 +23,8 @@
import traceback
import warnings
import uuid
+from typing import Optional, Callable
+
from .context_resolver import ContextResolver
from c14n.Canonicalize import canonicalize
from cachetools import LRUCache
@@ -32,6 +35,8 @@
from frozendict import frozendict
from pyld.__about__ import (__copyright__, __license__, __version__)
+logger = logging.getLogger('pyld.jsonld')
+
def cmp(a, b):
return (a > b) - (a < b)
@@ -117,6 +122,19 @@ def cmp(a, b):
# Initial contexts, defined on first access
INITIAL_CONTEXTS = {}
+
+# Handler to call if a key was dropped during expansion
+OnKeyDropped = Callable[[Optional[str]], ...]
+
+
+def log_on_key_dropped(key: Optional[str]):
+ """Default behavior on ignored JSON-LD keys is to log them."""
+ logger.debug(
+ 'Key `%s` was not mapped to an absolute IRI and was ignored.',
+ key,
+ )
+
+
def compact(input_, ctx, options=None):
"""
Performs JSON-LD compaction.
@@ -142,7 +160,11 @@ def compact(input_, ctx, options=None):
return JsonLdProcessor().compact(input_, ctx, options)
-def expand(input_, options=None):
+def expand(
+ input_,
+ options=None,
+ on_key_dropped: OnKeyDropped = log_on_key_dropped,
+):
"""
Performs JSON-LD expansion.
@@ -157,10 +179,17 @@ def expand(input_, options=None):
defaults to 'json-ld-1.1'.
[documentLoader(url, options)] the document loader
(default: _default_document_loader).
+ :param [on_key_dropped] Callable to invoke for every JSON-LD key that was
+ ignored.
:return: the expanded JSON-LD output.
"""
- return JsonLdProcessor().expand(input_, options)
+ return JsonLdProcessor(
+ on_key_dropped=on_key_dropped,
+ ).expand(
+ input_=input_,
+ options=options,
+ )
def flatten(input_, ctx=None, options=None):
@@ -645,18 +674,23 @@ def unparse_url(parsed):
return rval
-class JsonLdProcessor(object):
+class JsonLdProcessor:
"""
A JSON-LD processor.
"""
- def __init__(self):
+ def __init__(self, on_key_dropped: OnKeyDropped = log_on_key_dropped):
"""
Initialize the JSON-LD processor.
+
+ :param [on_key_dropped] Callable to invoke for every JSON-LD key that
+ was ignored.
"""
# processor-specific RDF parsers
self.rdf_parsers = None
+ self.on_key_dropped = on_key_dropped
+
def compact(self, input_, ctx, options):
"""
Performs JSON-LD compaction.
@@ -2191,10 +2225,15 @@ def _compact(self, active_ctx, active_property, element, options):
return element
def _expand(
- self, active_ctx, active_property, element, options,
- inside_list=False,
- inside_index=False,
- type_scoped_ctx=None):
+ self,
+ active_ctx,
+ active_property,
+ element,
+ options,
+ inside_list=False,
+ inside_index=False,
+ type_scoped_ctx=None,
+ ):
"""
Recursively expands an element using the given context. Any context in
the element will be removed. All context URLs must have been retrieved
@@ -2234,7 +2273,8 @@ def _expand(
active_ctx, active_property, e, options,
inside_list=inside_list,
inside_index=inside_index,
- type_scoped_ctx=type_scoped_ctx)
+ type_scoped_ctx=type_scoped_ctx,
+ )
if inside_list and _is_array(e):
e = {'@list': e}
# drop None values
@@ -2460,10 +2500,11 @@ def _expand_object(
active_ctx, key, vocab=True)
# drop non-absolute IRI keys that aren't keywords
- if (expanded_property is None or
- not (
- _is_absolute_iri(expanded_property) or
- _is_keyword(expanded_property))):
+ if expanded_property is None or (
+ not _is_absolute_iri(expanded_property)
+ and not _is_keyword(expanded_property)
+ ):
+ self.on_key_dropped(expanded_property)
continue
if _is_keyword(expanded_property):
@@ -3411,7 +3452,8 @@ def _expand_index_map(self, active_ctx, active_property, value, index_key, as_gr
JsonLdProcessor.arrayify(v),
options,
inside_list=False,
- inside_index=True)
+ inside_index=True,
+ )
expanded_key = None
if property_index:
diff --git a/specifications/json-ld-api b/specifications/json-ld-api
new file mode 160000
index 00000000..6bf9ef4e
--- /dev/null
+++ b/specifications/json-ld-api
@@ -0,0 +1 @@
+Subproject commit 6bf9ef4e2135c0146a8120b9732f67809503bb9c
diff --git a/specifications/json-ld-framing b/specifications/json-ld-framing
new file mode 160000
index 00000000..c01b1754
--- /dev/null
+++ b/specifications/json-ld-framing
@@ -0,0 +1 @@
+Subproject commit c01b17540361040f2ee1f990aff138b5e81bbf5d
diff --git a/specifications/rdf-canon b/specifications/rdf-canon
new file mode 160000
index 00000000..0503facf
--- /dev/null
+++ b/specifications/rdf-canon
@@ -0,0 +1 @@
+Subproject commit 0503facfaa0825686afc1f533f487816de54d9b7
diff --git a/tests/expand/on_key_dropped.py b/tests/expand/on_key_dropped.py
new file mode 100644
index 00000000..554770a0
--- /dev/null
+++ b/tests/expand/on_key_dropped.py
@@ -0,0 +1,83 @@
+import unittest
+from typing import Any
+
+import pyld.jsonld as jsonld
+
+
+def raise_this(value: Any):
+ raise ValueError(value)
+
+
+class TestOnKeyDropped(unittest.TestCase):
+ """
+ Tests for on_key_dropped argument and logic in JSON-LD expand algorithm.
+
+ Original implementation is © pchampin.
+ """
+
+ CTX = {"foo": {"@id": "http://example.com/foo"}}
+ DATA = {"fooo": "bar"}
+ RESULT = []
+
+ def test_silently_ignored(self):
+ got = jsonld.expand(
+ self.DATA,
+ {'expandContext': self.CTX},
+ )
+ self.assertEqual(got, self.RESULT)
+
+ def test_strict_fails(self):
+ with self.assertRaises(ValueError):
+ jsonld.expand(
+ self.DATA,
+ {'expandContext': self.CTX},
+ on_key_dropped=raise_this,
+ )
+
+ def test_dropped_keys(self):
+ dropped_keys = set()
+ got = jsonld.expand(
+ self.DATA,
+ {'expandContext': self.CTX},
+ on_key_dropped=dropped_keys.add,
+ )
+ self.assertEqual(got, self.RESULT)
+ self.assertSetEqual(dropped_keys, {"fooo"})
+
+ DATA2 = {
+ "@id": "foo", "foo": "bar", "fooo": "baz",
+ "http://example.com/other": "blah"}
+ RESULT2 = [{
+ "@id": u"foo",
+ "http://example.com/foo": [{"@value": "bar"}],
+ "http://example.com/other": [{"@value": "blah"}],
+ }]
+
+ def test_silently_ignored_2(self):
+ got = jsonld.expand(
+ self.DATA2,
+ {'expandContext': self.CTX},
+ )
+ self.assertEqual(got, self.RESULT2)
+
+ def test_strict_fails_2(self):
+ with self.assertRaises(ValueError):
+ jsonld.expand(
+ self.DATA2,
+ {'expandContext': self.CTX},
+ on_key_dropped=raise_this,
+ )
+
+ def test_dropped_keys_2(self):
+ dropped_keys = set()
+ got = jsonld.expand(
+ self.DATA2,
+ {'expandContext': self.CTX},
+ on_key_dropped=dropped_keys.add,
+ )
+ self.assertEqual(got, self.RESULT2)
+ self.assertSetEqual(dropped_keys, {"fooo"})
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/runtests.py b/tests/runtests.py
index 08ed0d34..e618513c 100644
--- a/tests/runtests.py
+++ b/tests/runtests.py
@@ -17,6 +17,8 @@
import unittest
import re
from argparse import ArgumentParser
+from pathlib import Path
+from typing import List
from unittest import TextTestResult
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib'))
@@ -32,9 +34,20 @@
LOCAL_BASES = [
'https://w3c.github.io/json-ld-api/tests',
'https://w3c.github.io/json-ld-framing/tests',
- 'https://github.com/json-ld/normalization/tests'
+ 'https://w3c.github.io/rdf-canon/tests/',
]
+
+def default_test_targets() -> List[Path]:
+ """Default test directories from specifications."""
+ specifications = Path(__file__).parent.parent / 'specifications'
+ return [
+ specifications / 'json-ld-api/tests',
+ specifications / 'json-ld-framing/tests',
+ specifications / 'rdf-canon/tests',
+ ]
+
+
class TestRunner(unittest.TextTestRunner):
"""
Loads test manifests and runs tests.
@@ -95,18 +108,7 @@ def main(self):
test_targets = self.options.tests
else:
# default to find known sibling test dirs
- test_targets = []
- sibling_dirs = [
- '../json-ld-api/tests/',
- '../json-ld-framing/tests/',
- '../normalization/tests/',
- ]
- for dir in sibling_dirs:
- if os.path.exists(dir):
- print('Test dir found', dir)
- test_targets.append(dir)
- else:
- print('Test dir not found', dir)
+ test_targets = default_test_targets()
# ensure a manifest or a directory was specified
if len(test_targets) == 0: