From 4bfcb7c0f371bc3df4b3b6d91ba4da888895d962 Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 7 Aug 2025 10:46:52 +0100 Subject: [PATCH 01/29] Version 2 WIP [skip ci] --- jsonpath/env.py | 30 +- jsonpath/filter.py | 22 +- jsonpath/lex.py | 96 +-- jsonpath/match.py | 13 + jsonpath/parse.py | 294 ++++---- jsonpath/path.py | 55 +- jsonpath/segments.py | 128 ++++ jsonpath/selectors.py | 657 ++++++----------- jsonpath/stream.py | 15 + jsonpath/token.py | 117 ++-- pyproject.toml | 3 +- tests/test_filter_expression_caching.py | 41 +- tests/test_find_reference.py | 253 +++---- tests/test_lex.py | 818 +++++++++++++++------- tests/test_walk_filter_expression_tree.py | 14 +- 15 files changed, 1348 insertions(+), 1208 deletions(-) create mode 100644 jsonpath/segments.py diff --git a/jsonpath/env.py b/jsonpath/env.py index d951c90..8542b32 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -25,9 +25,9 @@ from .filter import UNDEFINED from .filter import VALUE_TYPE_EXPRESSIONS from .filter import FilterExpression +from .filter import FilterQuery from .filter import FunctionExtension from .filter import InfixExpression -from .filter import Path from .fluent_api import Query from .function_extensions import ExpressionType from .function_extensions import FilterFunction @@ -40,8 +40,8 @@ from .path import JSONPath from .stream import TokenStream from .token import TOKEN_EOF -from .token import TOKEN_FAKE_ROOT from .token import TOKEN_INTERSECTION +from .token import TOKEN_PSEUDO_ROOT from .token import TOKEN_UNION from .token import Token @@ -92,7 +92,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`. ## Class attributes Attributes: - fake_root_token (str): The pattern used to select a "fake" root node, one level + pseudo_root_token (str): The pattern used to select a "fake" root node, one level above the real root node. filter_context_token (str): The pattern used to select extra filter context data. Defaults to `"_"`. @@ -117,7 +117,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`. # These should be unescaped strings. `re.escape` will be called # on them automatically when compiling lexer rules. - fake_root_token = "^" + pseudo_root_token = "^" filter_context_token = "_" intersection_token = "&" key_token = "#" @@ -180,9 +180,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 """ tokens = self.lexer.tokenize(path) stream = TokenStream(tokens) - fake_root = stream.current.kind == TOKEN_FAKE_ROOT + pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT _path: Union[JSONPath, CompoundJSONPath] = JSONPath( - env=self, selectors=self.parser.parse(stream), fake_root=fake_root + env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root ) if stream.current.kind != TOKEN_EOF: @@ -197,22 +197,22 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 if stream.current.kind == TOKEN_UNION: stream.next_token() - fake_root = stream.current.kind == TOKEN_FAKE_ROOT + pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT _path = _path.union( JSONPath( env=self, - selectors=self.parser.parse(stream), - fake_root=fake_root, + segments=self.parser.parse(stream), + pseudo_root=pseudo_root, ) ) elif stream.current.kind == TOKEN_INTERSECTION: stream.next_token() - fake_root = stream.current.kind == TOKEN_FAKE_ROOT + pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT _path = _path.intersection( JSONPath( env=self, - selectors=self.parser.parse(stream), - fake_root=fake_root, + segments=self.parser.parse(stream), + pseudo_root=pseudo_root, ) ) else: # pragma: no cover @@ -456,7 +456,7 @@ def check_well_typedness( if typ == ExpressionType.VALUE: if not ( isinstance(arg, VALUE_TYPE_EXPRESSIONS) - or (isinstance(arg, Path) and arg.path.singular_query()) + or (isinstance(arg, FilterQuery) and arg.path.singular_query()) or (self._function_return_type(arg) == ExpressionType.VALUE) ): raise JSONPathTypeError( @@ -464,13 +464,13 @@ def check_well_typedness( token=token, ) elif typ == ExpressionType.LOGICAL: - if not isinstance(arg, (Path, InfixExpression)): + if not isinstance(arg, (FilterQuery, InfixExpression)): raise JSONPathTypeError( f"{token.value}() argument {idx} must be of LogicalType", token=token, ) elif typ == ExpressionType.NODES and not ( - isinstance(arg, Path) + isinstance(arg, FilterQuery) or self._function_return_type(arg) == ExpressionType.NODES ): raise JSONPathTypeError( diff --git a/jsonpath/filter.py b/jsonpath/filter.py index 0556cbf..5f9adb2 100644 --- a/jsonpath/filter.py +++ b/jsonpath/filter.py @@ -23,7 +23,6 @@ from .function_extensions import FilterFunction from .match import NodeList from .selectors import Filter as FilterSelector -from .selectors import ListSelector from .serialize import canonical_string if TYPE_CHECKING: @@ -494,7 +493,7 @@ def set_children(self, children: List[FilterExpression]) -> None: self._expr.set_children(children) -class Path(FilterExpression, ABC): +class FilterQuery(FilterExpression, ABC): """Base expression for all _sub paths_ found in filter expressions.""" __slots__ = ("path",) @@ -504,17 +503,14 @@ def __init__(self, path: JSONPath) -> None: super().__init__() def __eq__(self, other: object) -> bool: - return isinstance(other, Path) and str(self) == str(other) + return isinstance(other, FilterQuery) and str(self) == str(other) def children(self) -> List[FilterExpression]: _children: List[FilterExpression] = [] - for segment in self.path.selectors: - if isinstance(segment, ListSelector): - _children.extend( - selector.expression - for selector in segment.items - if isinstance(selector, FilterSelector) - ) + for segment in self.path.segments: + for selector in segment.selectors: + if isinstance(selector, FilterSelector): + _children.append(selector.expression) return _children def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 @@ -522,7 +518,7 @@ def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG00 return -class SelfPath(Path): +class RelativeFilterQuery(FilterQuery): """A JSONPath starting at the current node.""" __slots__ = () @@ -572,7 +568,7 @@ async def evaluate_async(self, context: FilterContext) -> object: ) -class RootPath(Path): +class RootFilterQuery(FilterQuery): """A JSONPath starting at the root node.""" __slots__ = () @@ -606,7 +602,7 @@ async def evaluate_async(self, context: FilterContext) -> object: ) -class FilterContextPath(Path): +class FilterContextPath(FilterQuery): """A JSONPath starting at the root of any extra context data.""" __slots__ = () diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 4c4422d..837f6dc 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -10,41 +10,40 @@ from .exceptions import JSONPathSyntaxError from .token import TOKEN_AND -from .token import TOKEN_BARE_PROPERTY +from .token import TOKEN_COLON from .token import TOKEN_COMMA from .token import TOKEN_CONTAINS from .token import TOKEN_DDOT -from .token import TOKEN_DOT_PROPERTY +from .token import TOKEN_DOT from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EQ -from .token import TOKEN_FAKE_ROOT +from .token import TOKEN_ERROR from .token import TOKEN_FALSE from .token import TOKEN_FILTER from .token import TOKEN_FILTER_CONTEXT from .token import TOKEN_FLOAT -from .token import TOKEN_FUNCTION from .token import TOKEN_GE from .token import TOKEN_GT -from .token import TOKEN_ILLEGAL from .token import TOKEN_IN from .token import TOKEN_INT from .token import TOKEN_INTERSECTION from .token import TOKEN_KEY from .token import TOKEN_KEYS +from .token import TOKEN_KEYS_FILTER +from .token import TOKEN_LBRACKET from .token import TOKEN_LE from .token import TOKEN_LG -from .token import TOKEN_LIST_SLICE -from .token import TOKEN_LIST_START from .token import TOKEN_LPAREN from .token import TOKEN_LT from .token import TOKEN_MISSING +from .token import TOKEN_NAME from .token import TOKEN_NE from .token import TOKEN_NIL from .token import TOKEN_NONE from .token import TOKEN_NOT from .token import TOKEN_NULL from .token import TOKEN_OR -from .token import TOKEN_PROPERTY +from .token import TOKEN_PSEUDO_ROOT from .token import TOKEN_RBRACKET from .token import TOKEN_RE from .token import TOKEN_RE_FLAGS @@ -53,13 +52,10 @@ from .token import TOKEN_RPAREN from .token import TOKEN_SELF from .token import TOKEN_SINGLE_QUOTE_STRING -from .token import TOKEN_SKIP -from .token import TOKEN_SLICE_START -from .token import TOKEN_SLICE_STEP -from .token import TOKEN_SLICE_STOP from .token import TOKEN_TRUE from .token import TOKEN_UNDEFINED from .token import TOKEN_UNION +from .token import TOKEN_WHITESPACE from .token import TOKEN_WILD from .token import Token @@ -86,8 +82,9 @@ class attributes. Then setting `lexer_class` on a `JSONPathEnvironment`. """ key_pattern = r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*" + name_pattern = key_pattern # XXX: - # `not` or ! + # ! or `not` logical_not_pattern = r"(?:not\b)|!" # && or `and` @@ -102,28 +99,17 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: self.double_quote_pattern = r'"(?P(?:(?!(?(?:(?!(?{self.key_pattern})" - - self.slice_list_pattern = ( - r"(?P\-?\d*)\s*" - r":\s*(?P\-?\d*)\s*" - r"(?::\s*(?P\-?\d*))?" - ) - + # TODO: separate re literal tokens # /pattern/ or /pattern/flags self.re_pattern = r"/(?P.+?)/(?P[aims]*)" - # func( - self.function_pattern = r"(?P[a-z][a-z_0-9]+)\(\s*" - self.rules = self.compile_rules() def compile_rules(self) -> Pattern[str]: """Prepare regular expression rules.""" env_tokens = [ (TOKEN_ROOT, self.env.root_token), - (TOKEN_FAKE_ROOT, self.env.fake_root_token), + (TOKEN_PSEUDO_ROOT, self.env.pseudo_root_token), (TOKEN_SELF, self.env.self_token), (TOKEN_KEY, self.env.key_token), (TOKEN_UNION, self.env.union_token), @@ -136,12 +122,10 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), (TOKEN_RE_PATTERN, self.re_pattern), - (TOKEN_LIST_SLICE, self.slice_list_pattern), - (TOKEN_FUNCTION, self.function_pattern), - (TOKEN_DOT_PROPERTY, self.dot_property_pattern), (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"), (TOKEN_INT, r"-?\d+(?P[eE][+\-]?\d+)?\b"), (TOKEN_DDOT, r"\.\."), + (TOKEN_DOT, r"\."), (TOKEN_AND, self.logical_and_pattern), (TOKEN_OR, self.logical_or_pattern), *[ @@ -153,6 +137,7 @@ def compile_rules(self) -> Pattern[str]: ], (TOKEN_WILD, r"\*"), (TOKEN_FILTER, r"\?"), + (TOKEN_KEYS_FILTER, r"~\?"), # TODO: get from env (TOKEN_IN, r"in\b"), (TOKEN_TRUE, r"[Tt]rue\b"), (TOKEN_FALSE, r"[Ff]alse\b"), @@ -162,9 +147,10 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_CONTAINS, r"contains\b"), (TOKEN_UNDEFINED, r"undefined\b"), (TOKEN_MISSING, r"missing\b"), - (TOKEN_LIST_START, r"\["), + (TOKEN_LBRACKET, r"\["), (TOKEN_RBRACKET, r"]"), (TOKEN_COMMA, r","), + (TOKEN_COLON, r":"), (TOKEN_EQ, r"=="), (TOKEN_NE, r"!="), (TOKEN_LG, r"<>"), @@ -173,12 +159,12 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_RE, r"=~"), (TOKEN_LT, r"<"), (TOKEN_GT, r">"), - (TOKEN_NOT, self.logical_not_pattern), - (TOKEN_BARE_PROPERTY, self.key_pattern), + (TOKEN_NOT, self.logical_not_pattern), # Must go after "!=" + (TOKEN_NAME, self.key_pattern), # Must go after reserved words (TOKEN_LPAREN, r"\("), (TOKEN_RPAREN, r"\)"), - (TOKEN_SKIP, r"[ \n\t\r\.]+"), - (TOKEN_ILLEGAL, r"."), + (TOKEN_WHITESPACE, r"[ \n\t\r]+"), + (TOKEN_ERROR, r"."), ] return re.compile( @@ -194,35 +180,7 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 kind = match.lastgroup assert kind is not None - if kind == TOKEN_DOT_PROPERTY: - yield _token( - kind=TOKEN_PROPERTY, - value=match.group("G_PROP"), - index=match.start("G_PROP"), - ) - elif kind == TOKEN_BARE_PROPERTY: - yield _token( - kind=TOKEN_BARE_PROPERTY, - value=match.group(), - index=match.start(), - ) - elif kind == TOKEN_LIST_SLICE: - yield _token( - kind=TOKEN_SLICE_START, - value=match.group("G_LSLICE_START"), - index=match.start("G_LSLICE_START"), - ) - yield _token( - kind=TOKEN_SLICE_STOP, - value=match.group("G_LSLICE_STOP"), - index=match.start("G_LSLICE_STOP"), - ) - yield _token( - kind=TOKEN_SLICE_STEP, - value=match.group("G_LSLICE_STEP") or "", - index=match.start("G_LSLICE_STEP"), - ) - elif kind == TOKEN_DOUBLE_QUOTE_STRING: + if kind == TOKEN_DOUBLE_QUOTE_STRING: yield _token( kind=TOKEN_DOUBLE_QUOTE_STRING, value=match.group("G_DQUOTE"), @@ -264,19 +222,11 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 value=match.group(), index=match.start(), ) - elif kind == TOKEN_FUNCTION: - yield _token( - kind=TOKEN_FUNCTION, - value=match.group("G_FUNC"), - index=match.start("G_FUNC"), - ) - elif kind == TOKEN_SKIP: - continue - elif kind == TOKEN_ILLEGAL: + elif kind == TOKEN_ERROR: raise JSONPathSyntaxError( f"unexpected token {match.group()!r}", token=_token( - TOKEN_ILLEGAL, + TOKEN_ERROR, value=match.group(), index=match.start(), ), diff --git a/jsonpath/match.py b/jsonpath/match.py index dea2fee..964dff4 100644 --- a/jsonpath/match.py +++ b/jsonpath/match.py @@ -11,6 +11,7 @@ from typing import Union from .pointer import JSONPointer +from .serialize import canonical_string FilterContextVars = Mapping[str, Any] PathPart = Union[int, str] @@ -69,6 +70,18 @@ def add_child(self, *children: JSONPathMatch) -> None: """Append one or more children to this match.""" self.children.extend(children) + def new_child(self, obj: object, key: Union[int, str]) -> JSONPathMatch: + """Return a new JSONPathMatch instance with this instance as its parent.""" + return self.__class__( + filter_context=self.filter_context(), + obj=obj, + parent=self, + parts=self.parts + (key,), + path=self.path + + f"[{canonical_string(key) if isinstance(key, str) else key}]", + root=self.root, + ) + def filter_context(self) -> FilterContextVars: """Return filter context data for this match.""" return self._filter_context diff --git a/jsonpath/parse.py b/jsonpath/parse.py index eaef7fc..82ccea5 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -8,9 +8,9 @@ from typing import Callable from typing import Dict from typing import Iterable +from typing import Iterator from typing import List from typing import Optional -from typing import Union from jsonpath.function_extensions.filter_function import ExpressionType from jsonpath.function_extensions.filter_function import FilterFunction @@ -25,6 +25,7 @@ from .filter import BooleanExpression from .filter import FilterContextPath from .filter import FilterExpression +from .filter import FilterQuery from .filter import FloatLiteral from .filter import FunctionExtension from .filter import InfixExpression @@ -32,31 +33,31 @@ from .filter import ListLiteral from .filter import Literal from .filter import Nil -from .filter import Path from .filter import PrefixExpression from .filter import RegexLiteral -from .filter import RootPath -from .filter import SelfPath +from .filter import RelativeFilterQuery +from .filter import RootFilterQuery from .filter import StringLiteral from .path import JSONPath +from .segments import JSONPathChildSegment +from .segments import JSONPathRecursiveDescentSegment +from .segments import JSONPathSegment from .selectors import Filter from .selectors import IndexSelector from .selectors import JSONPathSelector from .selectors import KeysSelector -from .selectors import ListSelector from .selectors import PropertySelector -from .selectors import RecursiveDescentSelector from .selectors import SliceSelector from .selectors import WildSelector from .token import TOKEN_AND -from .token import TOKEN_BARE_PROPERTY +from .token import TOKEN_COLON from .token import TOKEN_COMMA from .token import TOKEN_CONTAINS from .token import TOKEN_DDOT +from .token import TOKEN_DOT from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EOF from .token import TOKEN_EQ -from .token import TOKEN_FAKE_ROOT from .token import TOKEN_FALSE from .token import TOKEN_FILTER from .token import TOKEN_FILTER_CONTEXT @@ -69,19 +70,20 @@ from .token import TOKEN_INTERSECTION from .token import TOKEN_KEY from .token import TOKEN_KEYS +from .token import TOKEN_LBRACKET from .token import TOKEN_LE from .token import TOKEN_LG -from .token import TOKEN_LIST_START from .token import TOKEN_LPAREN from .token import TOKEN_LT from .token import TOKEN_MISSING +from .token import TOKEN_NAME from .token import TOKEN_NE from .token import TOKEN_NIL from .token import TOKEN_NONE from .token import TOKEN_NOT from .token import TOKEN_NULL from .token import TOKEN_OR -from .token import TOKEN_PROPERTY +from .token import TOKEN_PSEUDO_ROOT from .token import TOKEN_RBRACKET from .token import TOKEN_RE from .token import TOKEN_RE_FLAGS @@ -90,9 +92,6 @@ from .token import TOKEN_RPAREN from .token import TOKEN_SELF from .token import TOKEN_SINGLE_QUOTE_STRING -from .token import TOKEN_SLICE_START -from .token import TOKEN_SLICE_STEP -from .token import TOKEN_SLICE_STOP from .token import TOKEN_TRUE from .token import TOKEN_UNDEFINED from .token import TOKEN_UNION @@ -145,7 +144,6 @@ class Parser: """A JSONPath parser bound to a JSONPathEnvironment.""" PRECEDENCE_LOWEST = 1 - PRECEDENCE_LOGICALRIGHT = 2 PRECEDENCE_LOGICAL_OR = 3 PRECEDENCE_LOGICAL_AND = 4 PRECEDENCE_RELATIONAL = 5 @@ -236,14 +234,13 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = { TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, - TOKEN_FAKE_ROOT: self.parse_root_path, + TOKEN_PSEUDO_ROOT: self.parse_root_path, TOKEN_FALSE: self.parse_boolean, TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, TOKEN_FLOAT: self.parse_float_literal, TOKEN_FUNCTION: self.parse_function_extension, TOKEN_INT: self.parse_integer_literal, TOKEN_KEY: self.parse_current_key, - TOKEN_LIST_START: self.parse_list_literal, TOKEN_LPAREN: self.parse_grouped_expression, TOKEN_MISSING: self.parse_undefined, TOKEN_NIL: self.parse_nil, @@ -274,7 +271,7 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: str, Callable[[TokenStream], FilterExpression] ] = { TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, - TOKEN_FAKE_ROOT: self.parse_root_path, + TOKEN_PSEUDO_ROOT: self.parse_root_path, TOKEN_FALSE: self.parse_boolean, TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, TOKEN_FLOAT: self.parse_float_literal, @@ -290,10 +287,11 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: TOKEN_TRUE: self.parse_boolean, } - def parse(self, stream: TokenStream) -> Iterable[JSONPathSelector]: + def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: """Parse a JSONPath from a stream of tokens.""" - if stream.current.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT}: + if stream.current.kind in {TOKEN_ROOT, TOKEN_PSEUDO_ROOT}: stream.next_token() + yield from self.parse_path(stream, in_filter=False) if stream.current.kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): @@ -307,37 +305,34 @@ def parse_path( stream: TokenStream, *, in_filter: bool = False, - ) -> Iterable[JSONPathSelector]: + ) -> Iterable[JSONPathSegment]: """Parse a top-level JSONPath, or one that is nested in a filter.""" while True: - if stream.current.kind in (TOKEN_PROPERTY, TOKEN_BARE_PROPERTY): - yield PropertySelector( - env=self.env, - token=stream.current, - name=stream.current.value, - shorthand=True, - ) - elif stream.current.kind == TOKEN_SLICE_START: - yield self.parse_slice(stream) - elif stream.current.kind == TOKEN_WILD: - yield WildSelector( - env=self.env, - token=stream.current, - shorthand=True, + if stream.current.kind == TOKEN_DDOT: + token = stream.next_token() + selectors = self.parse_selectors(stream) + if not selectors: + raise JSONPathSyntaxError( + "missing selector for recursive descent segment", + token=stream.current, + ) + yield JSONPathRecursiveDescentSegment( + env=self.env, token=token, selectors=selectors ) - elif stream.current.kind == TOKEN_KEYS: - yield KeysSelector( - env=self.env, - token=stream.current, - shorthand=True, + elif ( + stream.skip(TOKEN_DOT) + and stream.current.kind + in { + TOKEN_NAME, + TOKEN_WILD, + TOKEN_KEYS, + } + ) or stream.current.kind == TOKEN_LBRACKET: + token = stream.current + selectors = self.parse_selectors(stream) + yield JSONPathChildSegment( + env=self.env, token=token, selectors=selectors ) - elif stream.current.kind == TOKEN_DDOT: - yield RecursiveDescentSelector( - env=self.env, - token=stream.current, - ) - elif stream.current.kind == TOKEN_LIST_START: - yield self.parse_selector_list(stream) else: if in_filter: stream.push(stream.current) @@ -345,95 +340,112 @@ def parse_path( stream.next_token() + def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: + if stream.current.kind == TOKEN_NAME: + return ( + PropertySelector( + env=self.env, + token=stream.current, + name=stream.current.value, + shorthand=True, + ), + ) + + if stream.current.kind == TOKEN_WILD: + return (WildSelector(env=self.env, token=stream.current, shorthand=True),) + + if stream.current.kind == TOKEN_KEYS: + return ( + KeysSelector( + env=self.env, + token=stream.current, + shorthand=True, + ), + ) + + if stream.current.kind == TOKEN_LBRACKET: + return tuple(self.parse_bracketed_selection(stream)) + + return () + def parse_slice(self, stream: TokenStream) -> SliceSelector: """Parse a slice JSONPath expression from a stream of tokens.""" - start_token = stream.next_token() - stream.expect(TOKEN_SLICE_STOP) - stop_token = stream.next_token() - stream.expect(TOKEN_SLICE_STEP) - step_token = stream.current - - if not start_token.value: - start: Optional[int] = None - else: - start = int(start_token.value) - - if not stop_token.value: - stop: Optional[int] = None - else: - stop = int(stop_token.value) - - if not step_token.value: - step: Optional[int] = None - else: - step = int(step_token.value) + tok = stream.current + start: Optional[int] = None + stop: Optional[int] = None + step: Optional[int] = None + + def _maybe_index(token: Token) -> bool: + if token.kind == TOKEN_INT: + if len(token.value) > 1 and token.value.startswith(("0", "-0")): + raise JSONPathSyntaxError( + f"invalid index {token.value!r}", token=token + ) + return True + return False + + # 1: or : + if _maybe_index(stream.current): + start = int(stream.current.value) + stream.next_token() + + stream.expect(TOKEN_COLON) + stream.next_token() + + # 1 or 1: or : or ? + if _maybe_index(stream.current): + stop = int(stream.current.value) + stream.next_token() + if stream.current.kind == TOKEN_COLON: + stream.next_token() + elif stream.current.kind == TOKEN_COLON: + stream.expect(TOKEN_COLON) + stream.next_token() + + # 1 or ? + if _maybe_index(stream.current): + step = int(stream.current.value) + stream.next_token() + + stream.push(stream.current) return SliceSelector( env=self.env, - token=start_token, + token=tok, start=start, stop=stop, step=step, ) - def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR0912 - """Parse a comma separated list JSONPath selectors from a stream of tokens.""" - tok = stream.next_token() - list_items: List[ - Union[ - IndexSelector, - KeysSelector, - PropertySelector, - SliceSelector, - WildSelector, - Filter, - ] - ] = [] + def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912 + """Parse a comma separated list of JSONPath selectors.""" + tok = stream.next_token() # Skip LBRACKET + selectors: List[JSONPathSelector] = [] while stream.current.kind != TOKEN_RBRACKET: if stream.current.kind == TOKEN_INT: - if ( - len(stream.current.value) > 1 - and stream.current.value.startswith("0") - ) or stream.current.value.startswith("-0"): - raise JSONPathSyntaxError( - "leading zero in index selector", token=stream.current + if stream.peek.kind == TOKEN_COLON: + selectors.append(self.parse_slice(stream)) + else: + if ( + len(stream.current.value) > 1 + and stream.current.value.startswith("0") + ) or stream.current.value.startswith("-0"): + raise JSONPathSyntaxError( + "leading zero in index selector", token=stream.current + ) + selectors.append( + IndexSelector( + env=self.env, + token=stream.current, + index=int(stream.current.value), + ) ) - list_items.append( - IndexSelector( - env=self.env, - token=stream.current, - index=int(stream.current.value), - ) - ) - elif stream.current.kind == TOKEN_BARE_PROPERTY: - list_items.append( - PropertySelector( - env=self.env, - token=stream.current, - name=stream.current.value, - shorthand=False, - ), - ) - elif stream.current.kind == TOKEN_KEYS: - list_items.append( - KeysSelector( - env=self.env, - token=stream.current, - shorthand=False, - ) - ) elif stream.current.kind in ( TOKEN_DOUBLE_QUOTE_STRING, TOKEN_SINGLE_QUOTE_STRING, ): - if self.RE_INVALID_NAME_SELECTOR.search(stream.current.value): - raise JSONPathSyntaxError( - f"invalid name selector {stream.current.value!r}", - token=stream.current, - ) - - list_items.append( + selectors.append( PropertySelector( env=self.env, token=stream.current, @@ -441,10 +453,10 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR shorthand=False, ), ) - elif stream.current.kind == TOKEN_SLICE_START: - list_items.append(self.parse_slice(stream)) + elif stream.current.kind == TOKEN_COLON: + selectors.append(self.parse_slice(stream)) elif stream.current.kind == TOKEN_WILD: - list_items.append( + selectors.append( WildSelector( env=self.env, token=stream.current, @@ -452,7 +464,7 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR ) ) elif stream.current.kind == TOKEN_FILTER: - list_items.append(self.parse_filter(stream)) + selectors.append(self.parse_filter_selector(stream)) elif stream.current.kind == TOKEN_EOF: raise JSONPathSyntaxError( "unexpected end of query", token=stream.current @@ -470,26 +482,20 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR ) if stream.peek.kind != TOKEN_RBRACKET: - # TODO: error message .. expected a comma or logical operator stream.expect_peek(TOKEN_COMMA) stream.next_token() - - if stream.peek.kind == TOKEN_RBRACKET: - raise JSONPathSyntaxError( - "unexpected trailing comma", - token=stream.peek, - ) + stream.expect_peek_not(TOKEN_RBRACKET, "unexpected trailing comma") stream.next_token() - if not list_items: + if not selectors: raise JSONPathSyntaxError("empty bracketed segment", token=tok) - return ListSelector(env=self.env, token=tok, items=list_items) + return selectors - def parse_filter(self, stream: TokenStream) -> Filter: + def parse_filter_selector(self, stream: TokenStream) -> Filter: tok = stream.next_token() - expr = self.parse_filter_selector(stream) + expr = self.parse_filter_expression(stream) if self.env.well_typed and isinstance(expr, FunctionExtension): func = self.env.function_extensions.get(expr.name) @@ -537,7 +543,9 @@ def parse_prefix_expression(self, stream: TokenStream) -> FilterExpression: assert tok.kind == TOKEN_NOT return PrefixExpression( operator="!", - right=self.parse_filter_selector(stream, precedence=self.PRECEDENCE_PREFIX), + right=self.parse_filter_expression( + stream, precedence=self.PRECEDENCE_PREFIX + ), ) def parse_infix_expression( @@ -545,7 +553,7 @@ def parse_infix_expression( ) -> FilterExpression: tok = stream.next_token() precedence = self.PRECEDENCES.get(tok.kind, self.PRECEDENCE_LOWEST) - right = self.parse_filter_selector(stream, precedence) + right = self.parse_filter_expression(stream, precedence) operator = self.BINARY_OPERATORS[tok.kind] if self.env.well_typed and operator in self.COMPARISON_OPERATORS: @@ -570,7 +578,7 @@ def parse_infix_expression( def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: stream.next_token() - expr = self.parse_filter_selector(stream) + expr = self.parse_filter_expression(stream) stream.next_token() while stream.current.kind != TOKEN_RPAREN: @@ -592,18 +600,18 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: def parse_root_path(self, stream: TokenStream) -> FilterExpression: root = stream.next_token() - return RootPath( + return RootFilterQuery( JSONPath( env=self.env, - selectors=self.parse_path(stream, in_filter=True), - fake_root=root.kind == TOKEN_FAKE_ROOT, + segments=self.parse_path(stream, in_filter=True), + pseudo_root=root.kind == TOKEN_PSEUDO_ROOT, ) ) def parse_self_path(self, stream: TokenStream) -> FilterExpression: stream.next_token() - return SelfPath( - JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True)) + return RelativeFilterQuery( + JSONPath(env=self.env, segments=self.parse_path(stream, in_filter=True)) ) def parse_current_key(self, _: TokenStream) -> FilterExpression: @@ -612,7 +620,7 @@ def parse_current_key(self, _: TokenStream) -> FilterExpression: def parse_filter_context_path(self, stream: TokenStream) -> FilterExpression: stream.next_token() return FilterContextPath( - JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True)) + JSONPath(env=self.env, segments=self.parse_path(stream, in_filter=True)) ) def parse_regex(self, stream: TokenStream) -> FilterExpression: @@ -680,7 +688,7 @@ def parse_function_extension(self, stream: TokenStream) -> FilterExpression: self.env.validate_function_extension_signature(tok, function_arguments), ) - def parse_filter_selector( + def parse_filter_expression( self, stream: TokenStream, precedence: int = PRECEDENCE_LOWEST ) -> FilterExpression: try: @@ -728,7 +736,7 @@ def _decode_string_literal(self, token: Token) -> str: def _raise_for_non_comparable_function( self, expr: FilterExpression, token: Token ) -> None: - if isinstance(expr, Path) and not expr.path.singular_query(): + if isinstance(expr, FilterQuery) and not expr.path.singular_query(): raise JSONPathTypeError("non-singular query is not comparable", token=token) if isinstance(expr, FunctionExtension): diff --git a/jsonpath/path.py b/jsonpath/path.py index 9cf3d98..2b9cf70 100644 --- a/jsonpath/path.py +++ b/jsonpath/path.py @@ -18,15 +18,15 @@ from jsonpath.fluent_api import Query from jsonpath.match import FilterContextVars from jsonpath.match import JSONPathMatch +from jsonpath.segments import JSONPathRecursiveDescentSegment from jsonpath.selectors import IndexSelector -from jsonpath.selectors import ListSelector from jsonpath.selectors import PropertySelector if TYPE_CHECKING: from io import IOBase from .env import JSONPathEnvironment - from .selectors import JSONPathSelector + from .segments import JSONPathSegment class JSONPath: @@ -34,9 +34,9 @@ class JSONPath: Arguments: env: The `JSONPathEnvironment` this path is bound to. - selectors: An iterable of `JSONPathSelector` objects, as generated by + segments: An iterable of `JSONPathSegment` instances, as generated by a `Parser`. - fake_root: Indicates if target JSON values should be wrapped in a single- + pseudo_root: Indicates if target JSON values should be wrapped in a single- element array, so as to make the target root value selectable. @@ -45,29 +45,27 @@ class JSONPath: selectors: The `JSONPathSelector` instances that make up this path. """ - __slots__ = ("env", "fake_root", "selectors") + __slots__ = ("env", "pseudo_root", "segments") def __init__( self, *, env: JSONPathEnvironment, - selectors: Iterable[JSONPathSelector], - fake_root: bool = False, + segments: Iterable[JSONPathSegment], + pseudo_root: bool = False, ) -> None: self.env = env - self.selectors = tuple(selectors) - self.fake_root = fake_root + self.segments = tuple(segments) + self.pseudo_root = pseudo_root def __str__(self) -> str: - return self.env.root_token + "".join( - str(selector) for selector in self.selectors - ) + return self.env.root_token + "".join(str(segment) for segment in self.segments) def __eq__(self, __value: object) -> bool: - return isinstance(__value, JSONPath) and self.selectors == __value.selectors + return isinstance(__value, JSONPath) and self.segments == __value.segments def __hash__(self) -> int: - return hash(self.selectors) + return hash(self.segments) def findall( self, @@ -128,7 +126,7 @@ def finditer( matches: Iterable[JSONPathMatch] = [ JSONPathMatch( filter_context=filter_context or {}, - obj=[_data] if self.fake_root else _data, + obj=[_data] if self.pseudo_root else _data, parent=None, path=self.env.root_token, parts=(), @@ -136,8 +134,8 @@ def finditer( ) ] - for selector in self.selectors: - matches = selector.resolve(matches) + for segment in self.segments: + matches = segment.resolve(matches) return matches @@ -167,7 +165,7 @@ async def finditer_async( async def root_iter() -> AsyncIterable[JSONPathMatch]: yield self.env.match_class( filter_context=filter_context or {}, - obj=[_data] if self.fake_root else _data, + obj=[_data] if self.pseudo_root else _data, parent=None, path=self.env.root_token, parts=(), @@ -176,8 +174,8 @@ async def root_iter() -> AsyncIterable[JSONPathMatch]: matches: AsyncIterable[JSONPathMatch] = root_iter() - for selector in self.selectors: - matches = selector.resolve_async(matches) + for segment in self.segments: + matches = segment.resolve_async(matches) return matches @@ -237,20 +235,21 @@ def query( def empty(self) -> bool: """Return `True` if this path has no selectors.""" - return not bool(self.selectors) + return not bool(self.segments) def singular_query(self) -> bool: """Return `True` if this JSONPath query is a singular query.""" - for selector in self.selectors: - if isinstance(selector, (PropertySelector, IndexSelector)): - continue - if ( - isinstance(selector, ListSelector) - and len(selector.items) == 1 - and isinstance(selector.items[0], (PropertySelector, IndexSelector)) + for segment in self.segments: + if isinstance(segment, JSONPathRecursiveDescentSegment): + return False + + if len(segment.selectors) == 1 and isinstance( + segment.selectors[0], (PropertySelector, IndexSelector) ): continue + return False + return True diff --git a/jsonpath/segments.py b/jsonpath/segments.py new file mode 100644 index 0000000..8aeb892 --- /dev/null +++ b/jsonpath/segments.py @@ -0,0 +1,128 @@ +"""JSONPath child and descendant segment definitions.""" + +from __future__ import annotations + +from abc import ABC +from abc import abstractmethod +from typing import TYPE_CHECKING +from typing import AsyncIterable +from typing import Iterable +from typing import Mapping +from typing import Sequence +from typing import Tuple + +if TYPE_CHECKING: + from .env import JSONPathEnvironment + from .match import JSONPathMatch + from .selectors import JSONPathSelector + from .token import Token + + +class JSONPathSegment(ABC): + """Base class for all JSONPath segments.""" + + __slots__ = ("env", "token", "selectors") + + def __init__( + self, + *, + env: JSONPathEnvironment, + token: Token, + selectors: Tuple[JSONPathSelector, ...], + ) -> None: + self.env = env + self.token = token + self.selectors = selectors + + @abstractmethod + def resolve(self, nodes: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + """Apply this segment to each `JSONPathMatch` in _nodes_.""" + + @abstractmethod + def resolve_async( + self, nodes: AsyncIterable[JSONPathMatch] + ) -> AsyncIterable[JSONPathMatch]: + """An async version of `resolve`.""" + + +class JSONPathChildSegment(JSONPathSegment): + """The JSONPath child selection segment.""" + + def resolve(self, nodes: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + """Select children of each node in _nodes_.""" + for node in nodes: + for selector in self.selectors: + yield from selector.resolve(node) + + async def resolve_async( + self, nodes: AsyncIterable[JSONPathMatch] + ) -> AsyncIterable[JSONPathMatch]: + """An async version of `resolve`.""" + async for node in nodes: + for selector in self.selectors: + async for match in selector.resolve_async(node): + yield match + + def __str__(self) -> str: + return f"[{', '.join(str(itm) for itm in self.selectors)}]" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, JSONPathChildSegment) + and self.selectors == __value.selectors + and self.token == __value.token + ) + + def __hash__(self) -> int: + return hash((self.selectors, self.token)) + + +class JSONPathRecursiveDescentSegment(JSONPathSegment): + """The JSONPath recursive descent segment.""" + + def resolve(self, nodes: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + """Select descendants of each node in _nodes_.""" + for node in nodes: + for _node in self._visit(node): + for selector in self.selectors: + yield from selector.resolve(_node) + + async def resolve_async( + self, nodes: AsyncIterable[JSONPathMatch] + ) -> AsyncIterable[JSONPathMatch]: + """An async version of `resolve`.""" + async for node in nodes: + for _node in self._visit(node): + for selector in self.selectors: + async for match in selector.resolve_async(_node): + yield match + + def _visit(self, node: JSONPathMatch, depth: int = 1) -> Iterable[JSONPathMatch]: + """Depth-first, pre-order node traversal.""" + # TODO: check for recursion limit + + yield node + + if isinstance(node.obj, Mapping): + for name, val in node.obj.items(): + if isinstance(val, (Mapping, Sequence)): + _node = node.new_child(val, name) + yield from self._visit(_node, depth + 1) + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, item in enumerate(node.obj): + if isinstance(item, (Mapping, Sequence)): + _node = node.new_child(item, i) + yield from self._visit(_node, depth + 1) + + def __str__(self) -> str: + return f"..[{', '.join(str(itm) for itm in self.selectors)}]" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, JSONPathRecursiveDescentSegment) + and self.selectors == __value.selectors + and self.token == __value.token + ) + + def __hash__(self) -> int: + return hash(("..", self.selectors, self.token)) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 44007e9..89e2490 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -11,9 +11,7 @@ from typing import Any from typing import AsyncIterable from typing import Iterable -from typing import List from typing import Optional -from typing import TypeVar from typing import Union from .exceptions import JSONPathIndexError @@ -39,13 +37,11 @@ def __init__(self, *, env: JSONPathEnvironment, token: Token) -> None: self.token = token @abstractmethod - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: """Apply the segment/selector to each node in _matches_. Arguments: - matches: Nodes matched by preceding segments/selectors. This is like - a lazy _NodeList_, as described in RFC 9535, but each match carries - more than the node's value and location. + node: A node matched by preceding segments/selectors. Returns: The `JSONPathMatch` instances created by applying this selector to each @@ -53,9 +49,7 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: """ @abstractmethod - def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: + def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: """An async version of `resolve`.""" @@ -93,46 +87,29 @@ def __eq__(self, __value: object) -> bool: def __hash__(self) -> int: return hash((self.name, self.token)) - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if not isinstance(match.obj, Mapping): - continue - + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=self.env.getitem(match.obj, self.name), - parent=match, - parts=match.parts + (self.name,), - path=match.path + f"[{canonical_string(self.name)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if not isinstance(match.obj, Mapping): - continue + match = node.new_child(self.env.getitem(node.obj, self.name), self.name) + node.add_child(match) + yield match + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=await self.env.getitem_async(match.obj, self.name), - parent=match, - parts=match.parts + (self.name,), - path=match.path + f"[{canonical_string(self.name)}]", - root=match.root, + match = node.new_child( + await self.env.getitem_async(node.obj, self.name), self.name ) - match.add_child(_match) - yield _match + node.add_child(match) + yield match class IndexSelector(JSONPathSelector): """Select an element from an array by index. + XXX: Change to make unquoted keys/properties a "singular path selector" + https://github.com/ietf-wg-jsonpath/draft-ietf-jsonpath-base/issues/522 + Considering we don't require mapping (JSON object) keys/properties to be quoted, and that we support mappings with numeric keys, we also check to see if the "index" is a mapping key, which is non-standard. @@ -172,65 +149,41 @@ def _normalized_index(self, obj: Sequence[object]) -> int: return len(obj) + self.index return self.index - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if isinstance(match.obj, Mapping): - # Try the string representation of the index as a key. - with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=self.env.getitem(match.obj, self._as_key), - parent=match, - parts=match.parts + (self._as_key,), - path=f"{match.path}['{self.index}']", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - norm_index = self._normalized_index(match.obj) - with suppress(IndexError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=self.env.getitem(match.obj, self.index), - parent=match, - parts=match.parts + (norm_index,), - path=match.path + f"[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if isinstance(match.obj, Mapping): - # Try the string representation of the index as a key. - with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=await self.env.getitem_async(match.obj, self._as_key), - parent=match, - parts=match.parts + (self._as_key,), - path=f"{match.path}['{self.index}']", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - norm_index = self._normalized_index(match.obj) - with suppress(IndexError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=await self.env.getitem_async(match.obj, self.index), - parent=match, - parts=match.parts + (norm_index,), - path=match.path + f"[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + # Try the string representation of the index as a key. + with suppress(KeyError): + match = node.new_child( + self.env.getitem(node.obj, self._as_key), self.index + ) + node.add_child(match) + yield match + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + norm_index = self._normalized_index(node.obj) + with suppress(IndexError): + match = node.new_child( + self.env.getitem(node.obj, self.index), norm_index + ) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + # Try the string representation of the index as a key. + with suppress(KeyError): + match = node.new_child( + await self.env.getitem_async(node.obj, self._as_key), self.index + ) + node.add_child(match) + yield match + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + norm_index = self._normalized_index(node.obj) + with suppress(IndexError): + match = node.new_child( + await self.env.getitem_async(node.obj, self.index), norm_index + ) + node.add_child(match) + yield match class KeysSelector(JSONPathSelector): @@ -260,30 +213,26 @@ def __eq__(self, __value: object) -> bool: def __hash__(self) -> int: return hash(self.token) - def _keys(self, match: JSONPathMatch) -> Iterable[JSONPathMatch]: - if isinstance(match.obj, Mapping): - for i, key in enumerate(match.obj.keys()): - _match = self.env.match_class( - filter_context=match.filter_context(), + def _keys(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for i, key in enumerate(node.obj.keys()): + match = node.__class__( + filter_context=node.filter_context(), obj=key, - parent=match, - parts=match.parts + (f"{self.env.keys_selector_token}{key}",), - path=f"{match.path}[{self.env.keys_selector_token}][{i}]", - root=match.root, + parent=node, + parts=node.parts + (f"{self.env.keys_selector_token}{key}",), + path=f"{node.path}[{self.env.keys_selector_token}][{i}]", + root=node.root, ) - match.add_child(_match) - yield _match + node.add_child(match) + yield match - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - yield from self._keys(match) + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + yield from self._keys(node) - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - for _match in self._keys(match): - yield _match + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + for match in self._keys(node): + yield match class SliceSelector(JSONPathSelector): @@ -327,47 +276,29 @@ def _check_range(self, *indices: Optional[int]) -> None: ): raise JSONPathIndexError("index out of range", token=self.token) - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if not isinstance(match.obj, Sequence) or self.slice.step == 0: - continue + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if not isinstance(node.obj, Sequence) or self.slice.step == 0: + return - for norm_index, obj in zip( # noqa: B905 - range(*self.slice.indices(len(match.obj))), - self.env.getitem(match.obj, self.slice), - ): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (norm_index,), - path=f"{match.path}[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if not isinstance(match.obj, Sequence) or self.slice.step == 0: - continue - - for norm_index, obj in zip( # noqa: B905 - range(*self.slice.indices(len(match.obj))), - await self.env.getitem_async(match.obj, self.slice), - ): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (norm_index,), - path=f"{match.path}[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match + for norm_index, obj in zip( # noqa: B905 + range(*self.slice.indices(len(node.obj))), + self.env.getitem(node.obj, self.slice), + ): + match = node.new_child(obj, norm_index) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if not isinstance(node.obj, Sequence) or self.slice.step == 0: + return + + for norm_index, obj in zip( # noqa: B905 + range(*self.slice.indices(len(node.obj))), + await self.env.getitem_async(node.obj, self.slice), + ): + match = node.new_child(obj, norm_index) + node.add_child(match) + yield match class WildSelector(JSONPathSelector): @@ -390,191 +321,31 @@ def __eq__(self, __value: object) -> bool: def __hash__(self) -> int: return hash(self.token) - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if isinstance(match.obj, str): - continue - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence): - for i, val in enumerate(match.obj): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence): - for i, val in enumerate(match.obj): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - -class RecursiveDescentSelector(JSONPathSelector): - """A JSONPath selector that visits all nodes recursively. - - NOTE: Strictly this is a "segment", not a "selector". - """ - - def __str__(self) -> str: - return ".." - - def __eq__(self, __value: object) -> bool: - return ( - isinstance(__value, RecursiveDescentSelector) - and self.token == __value.token - ) - - def __hash__(self) -> int: - return hash(self.token) - - def _expand(self, match: JSONPathMatch) -> Iterable[JSONPathMatch]: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - if isinstance(val, str): - pass - elif isinstance(val, (Mapping, Sequence)): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - yield from self._expand(_match) - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - for i, val in enumerate(match.obj): - if isinstance(val, str): - pass - elif isinstance(val, (Mapping, Sequence)): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - yield from self._expand(_match) - - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - yield match - yield from self._expand(match) - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - yield match - for _match in self._expand(match): - yield _match - - -T = TypeVar("T") - - -async def _alist(it: List[T]) -> AsyncIterable[T]: - for item in it: - yield item - - -class ListSelector(JSONPathSelector): - """A bracketed list of selectors, the results of which are concatenated together. - - NOTE: Strictly this is a "segment", not a "selector". - """ - - __slots__ = ("items",) - - def __init__( - self, - *, - env: JSONPathEnvironment, - token: Token, - items: List[ - Union[ - SliceSelector, - KeysSelector, - IndexSelector, - PropertySelector, - WildSelector, - Filter, - ] - ], - ) -> None: - super().__init__(env=env, token=token) - self.items = tuple(items) - - def __str__(self) -> str: - return f"[{', '.join(str(itm) for itm in self.items)}]" - - def __eq__(self, __value: object) -> bool: - return ( - isinstance(__value, ListSelector) - and self.items == __value.items - and self.token == __value.token - ) - - def __hash__(self) -> int: - return hash((self.items, self.token)) - - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match_ in matches: - for item in self.items: - yield from item.resolve([match_]) - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match_ in matches: - for item in self.items: - async for m in item.resolve_async(_alist([match_])): - yield m + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + match = node.new_child(val, key) + node.add_child(match) + yield match + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, val in enumerate(node.obj): + match = node.new_child(val, i) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + match = node.new_child(val, key) + node.add_child(match) + yield match + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, val in enumerate(node.obj): + match = node.new_child(val, i) + node.add_child(match) + yield match class Filter(JSONPathSelector): @@ -607,132 +378,98 @@ def __eq__(self, __value: object) -> bool: def __hash__(self) -> int: return hash((str(self.expression), self.token)) - def resolve( # noqa: PLR0912 - self, matches: Iterable[JSONPathMatch] - ) -> Iterable[JSONPathMatch]: + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: if self.cacheable_nodes and self.env.filter_caching: expr = self.expression.cache_tree() else: expr = self.expression - for match in matches: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - context = FilterContext( - env=self.env, - current=val, - root=match.root, - extra_context=match.filter_context(), - current_key=key, - ) - try: - if expr.evaluate(context): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - for i, obj in enumerate(match.obj): - context = FilterContext( - env=self.env, - current=obj, - root=match.root, - extra_context=match.filter_context(), - current_key=i, - ) - try: - if expr.evaluate(context): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - - async def resolve_async( # noqa: PLR0912 - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + try: + if expr.evaluate(context): + match = node.new_child(val, key) + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, obj in enumerate(node.obj): + context = FilterContext( + env=self.env, + current=obj, + root=node.root, + extra_context=node.filter_context(), + current_key=i, + ) + try: + if expr.evaluate(context): + match = node.new_child(obj, i) + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: if self.cacheable_nodes and self.env.filter_caching: expr = self.expression.cache_tree() else: expr = self.expression - async for match in matches: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - context = FilterContext( - env=self.env, - current=val, - root=match.root, - extra_context=match.filter_context(), - current_key=key, - ) - - try: - result = await expr.evaluate_async(context) - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - - if result: - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - for i, obj in enumerate(match.obj): - context = FilterContext( - env=self.env, - current=obj, - root=match.root, - extra_context=match.filter_context(), - current_key=i, - ) - - try: - result = await expr.evaluate_async(context) - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - if result: - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + + try: + result = await expr.evaluate_async(context) + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + if result: + match = node.new_child(val, key) + node.add_child(match) + yield match + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, obj in enumerate(node.obj): + context = FilterContext( + env=self.env, + current=obj, + root=node.root, + extra_context=node.filter_context(), + current_key=i, + ) + + try: + result = await expr.evaluate_async(context) + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + if result: + match = node.new_child(obj, i) + node.add_child(match) + yield match class FilterContext: diff --git a/jsonpath/stream.py b/jsonpath/stream.py index 4a38afb..0a6e052 100644 --- a/jsonpath/stream.py +++ b/jsonpath/stream.py @@ -4,6 +4,7 @@ from collections import deque from typing import Deque from typing import Iterator +from typing import Optional from .exceptions import JSONPathSyntaxError from .token import TOKEN_EOF @@ -97,3 +98,17 @@ def expect_peek(self, *typ: str) -> None: f"expected {_typ}, found {self.peek.kind!r}", token=self.peek, ) + + def expect_peek_not(self, typ: str, message: str) -> None: + """Raise an exception if the next token kind of _typ_.""" + if self.peek.kind == typ: + raise JSONPathSyntaxError(message, token=self.peek) + + def eat(self, *typ: str) -> Token: + self.expect(*typ) + return self.next_token() + + def skip(self, *typ: str) -> Optional[Token]: + if self.current.kind in typ: + return self.next_token() + return None diff --git a/jsonpath/token.py b/jsonpath/token.py index a2392e3..c9f6592 100644 --- a/jsonpath/token.py +++ b/jsonpath/token.py @@ -1,74 +1,69 @@ """JSONPath tokens.""" + import sys from typing import Tuple # Utility tokens -TOKEN_EOF = sys.intern("EOF") -TOKEN_ILLEGAL = sys.intern("ILLEGAL") -TOKEN_SKIP = sys.intern("SKIP") +TOKEN_EOF = sys.intern("TOKEN_EOF") +TOKEN_WHITESPACE = sys.intern("TOKEN_WHITESPACE") +TOKEN_ERROR = sys.intern("TOKEN_ERROR") # JSONPath expression tokens -TOKEN_COLON = sys.intern("COLON") -TOKEN_COMMA = sys.intern("COMMA") -TOKEN_DDOT = sys.intern("DDOT") -TOKEN_DOT = sys.intern("DOT") -TOKEN_DOT_INDEX = sys.intern("DINDEX") -TOKEN_DOT_PROPERTY = sys.intern("DOT_PROPERTY") -TOKEN_FILTER = sys.intern("FILTER") -TOKEN_FAKE_ROOT = sys.intern("FAKE_ROOT") -TOKEN_KEY = sys.intern("KEY") -TOKEN_KEYS = sys.intern("KEYS") -TOKEN_RBRACKET = sys.intern("RBRACKET") -TOKEN_BARE_PROPERTY = sys.intern("BARE_PROPERTY") -TOKEN_LIST_SLICE = sys.intern("LSLICE") -TOKEN_LIST_START = sys.intern("LBRACKET") -TOKEN_PROPERTY = sys.intern("PROP") -TOKEN_ROOT = sys.intern("ROOT") -TOKEN_SLICE_START = sys.intern("SLICE_START") -TOKEN_SLICE_STEP = sys.intern("SLICE_STEP") -TOKEN_SLICE_STOP = sys.intern("SLICE_STOP") -TOKEN_WILD = sys.intern("WILD") +TOKEN_COLON = sys.intern("TOKEN_COLON") +TOKEN_COMMA = sys.intern("TOKEN_COMMA") +TOKEN_DDOT = sys.intern("TOKEN_DDOT") +TOKEN_DOT = sys.intern("TOKEN_DOT") +TOKEN_FILTER = sys.intern("TOKEN_FILTER") +TOKEN_KEY = sys.intern("TOKEN_KEY") +TOKEN_KEYS = sys.intern("TOKEN_KEYS") +TOKEN_KEYS_FILTER = sys.intern("TOKEN_KEYS_FILTER") +TOKEN_LBRACKET = sys.intern("TOKEN_LBRACKET") +TOKEN_PSEUDO_ROOT = sys.intern("TOKEN_PSEUDO_ROOT") +TOKEN_RBRACKET = sys.intern("TOKEN_RBRACKET") +TOKEN_ROOT = sys.intern("TOKEN_ROOT") +TOKEN_WILD = sys.intern("TOKEN_WILD") +TOKEN_NAME = sys.intern("TOKEN_NAME") # An object property/key or a function name # Filter expression tokens -TOKEN_AND = sys.intern("AND") -TOKEN_BLANK = sys.intern("BLANK") -TOKEN_CONTAINS = sys.intern("CONTAINS") -TOKEN_FILTER_CONTEXT = sys.intern("FILTER_CONTEXT") -TOKEN_FUNCTION = sys.intern("FUNCTION") -TOKEN_EMPTY = sys.intern("EMPTY") -TOKEN_EQ = sys.intern("EQ") -TOKEN_FALSE = sys.intern("FALSE") -TOKEN_FLOAT = sys.intern("FLOAT") -TOKEN_GE = sys.intern("GE") -TOKEN_GT = sys.intern("GT") -TOKEN_IN = sys.intern("IN") -TOKEN_INT = sys.intern("INT") -TOKEN_LE = sys.intern("LE") -TOKEN_LG = sys.intern("LG") -TOKEN_LPAREN = sys.intern("LPAREN") -TOKEN_LT = sys.intern("LT") -TOKEN_NE = sys.intern("NE") -TOKEN_NIL = sys.intern("NIL") -TOKEN_NONE = sys.intern("NONE") -TOKEN_NOT = sys.intern("NOT") -TOKEN_NULL = sys.intern("NULL") -TOKEN_OP = sys.intern("OP") -TOKEN_OR = sys.intern("OR") -TOKEN_RE = sys.intern("RE") -TOKEN_RE_FLAGS = sys.intern("RE_FLAGS") -TOKEN_RE_PATTERN = sys.intern("RE_PATTERN") -TOKEN_RPAREN = sys.intern("RPAREN") -TOKEN_SELF = sys.intern("SELF") -TOKEN_STRING = sys.intern("STRING") -TOKEN_DOUBLE_QUOTE_STRING = sys.intern("DOUBLE_QUOTE_STRING") -TOKEN_SINGLE_QUOTE_STRING = sys.intern("SINGLE_QUOTE_STRING") -TOKEN_TRUE = sys.intern("TRUE") -TOKEN_UNDEFINED = sys.intern("UNDEFINED") -TOKEN_MISSING = sys.intern("MISSING") +TOKEN_AND = sys.intern("TOKEN_AND") +TOKEN_BLANK = sys.intern("TOKEN_BLANK") +TOKEN_CONTAINS = sys.intern("TOKEN_CONTAINS") +TOKEN_DOUBLE_QUOTE_STRING = sys.intern("TOKEN_DOUBLE_QUOTE_STRING") +TOKEN_EMPTY = sys.intern("TOKEN_EMPTY") +TOKEN_EQ = sys.intern("TOKEN_EQ") +TOKEN_FALSE = sys.intern("TOKEN_FALSE") +TOKEN_FILTER_CONTEXT = sys.intern("TOKEN_FILTER_CONTEXT") +TOKEN_FLOAT = sys.intern("TOKEN_FLOAT") +TOKEN_FUNCTION = sys.intern("TOKEN_FUNCTION") +TOKEN_GE = sys.intern("TOKEN_GE") +TOKEN_GT = sys.intern("TOKEN_GT") +TOKEN_IN = sys.intern("TOKEN_IN") +TOKEN_INT = sys.intern("TOKEN_INT") +TOKEN_LE = sys.intern("TOKEN_LE") +TOKEN_LG = sys.intern("TOKEN_LG") +TOKEN_LPAREN = sys.intern("TOKEN_LPAREN") +TOKEN_LT = sys.intern("TOKEN_LT") +TOKEN_MISSING = sys.intern("TOKEN_MISSING") +TOKEN_NE = sys.intern("TOKEN_NE") +TOKEN_NIL = sys.intern("TOKEN_NIL") +TOKEN_NONE = sys.intern("TOKEN_NONE") +TOKEN_NOT = sys.intern("TOKEN_NOT") +TOKEN_NULL = sys.intern("TOKEN_NULL") +TOKEN_OP = sys.intern("TOKEN_OP") +TOKEN_OR = sys.intern("TOKEN_OR") +TOKEN_RE = sys.intern("TOKEN_RE") +TOKEN_RE_FLAGS = sys.intern("TOKEN_RE_FLAGS") +TOKEN_RE_PATTERN = sys.intern("TOKEN_RE_PATTERN") +TOKEN_RPAREN = sys.intern("TOKEN_RPAREN") +TOKEN_SELF = sys.intern("TOKEN_SELF") +TOKEN_SINGLE_QUOTE_STRING = sys.intern("TOKEN_SINGLE_QUOTE_STRING") +TOKEN_STRING = sys.intern("TOKEN_STRING") +TOKEN_TRUE = sys.intern("TOKEN_TRUE") +TOKEN_UNDEFINED = sys.intern("TOKEN_UNDEFINED") # Extension tokens -TOKEN_UNION = sys.intern("UNION") -TOKEN_INTERSECTION = sys.intern("INTERSECT") +TOKEN_INTERSECTION = sys.intern("TOKEN_INTERSECTION") +TOKEN_UNION = sys.intern("TOKEN_UNION") class Token: @@ -99,7 +94,7 @@ def __init__( def __repr__(self) -> str: # pragma: no cover return ( - f"Token(kind={self.kind!r}, value={self.value!r}, " + f"Token(kind={self.kind}, value={self.value!r}, " f"index={self.index}, path={self.path!r})" ) diff --git a/pyproject.toml b/pyproject.toml index a801434..23268f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,7 +87,8 @@ omit = ["jsonpath/__about__.py", "tests/compliance.py", "tests/consensus.py"] exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] [tool.mypy] -files = "jsonpath" +files = ["jsonpath", "tests"] +exclude = ["tests/nts"] python_version = "3.11" disallow_subclassing_any = true disallow_untyped_calls = true diff --git a/tests/test_filter_expression_caching.py b/tests/test_filter_expression_caching.py index 9d1b3cf..31534c2 100644 --- a/tests/test_filter_expression_caching.py +++ b/tests/test_filter_expression_caching.py @@ -1,4 +1,5 @@ """Filter expression caching test cases.""" + from unittest import mock from jsonpath import JSONPath @@ -9,10 +10,10 @@ from jsonpath.filter import FilterExpression from jsonpath.filter import InfixExpression from jsonpath.filter import IntegerLiteral -from jsonpath.filter import RootPath -from jsonpath.filter import SelfPath +from jsonpath.filter import RelativeFilterQuery +from jsonpath.filter import RootFilterQuery +from jsonpath.segments import JSONPathChildSegment from jsonpath.selectors import Filter as FilterSelector -from jsonpath.selectors import ListSelector def test_cache_root_path() -> None: @@ -20,9 +21,9 @@ def test_cache_root_path() -> None: env = JSONPathEnvironment() path = env.compile("$.some[?@.a < $.thing].a") assert isinstance(path, JSONPath) - selection_list = path.selectors[1] - assert isinstance(selection_list, ListSelector) - filter_selector = selection_list.items[0] + segment = path.segments[1] + assert isinstance(segment, JSONPathChildSegment) + filter_selector = segment.selectors[0] assert isinstance(filter_selector, FilterSelector) assert filter_selector.cacheable_nodes is True @@ -31,17 +32,17 @@ def test_cache_root_path() -> None: assert isinstance(expr, BooleanExpression) expr = expr.expression assert isinstance(expr, InfixExpression) - assert isinstance(expr.left, SelfPath) - assert isinstance(expr.right, RootPath) + assert isinstance(expr.left, RelativeFilterQuery) + assert isinstance(expr.right, RootFilterQuery) # A caching copy of the original expression tree. expr = filter_selector.expression.cache_tree() assert isinstance(expr, BooleanExpression) expr = expr.expression assert isinstance(expr, InfixExpression) - assert isinstance(expr.left, SelfPath) + assert isinstance(expr.left, RelativeFilterQuery) assert isinstance(expr.right, CachingFilterExpression) - assert isinstance(expr.right._expr, RootPath) # noqa: SLF001 + assert isinstance(expr.right._expr, RootFilterQuery) # noqa: SLF001 def test_root_path_cache() -> None: @@ -75,9 +76,9 @@ def test_cache_context_path() -> None: env = JSONPathEnvironment() path = env.compile("$.some[?_.thing > @.a].a") assert isinstance(path, JSONPath) - selection_list = path.selectors[1] - assert isinstance(selection_list, ListSelector) - filter_selector = selection_list.items[0] + segment = path.segments[1] + assert isinstance(segment, JSONPathChildSegment) + filter_selector = segment.selectors[0] assert isinstance(filter_selector, FilterSelector) assert filter_selector.cacheable_nodes is True @@ -87,7 +88,7 @@ def test_cache_context_path() -> None: expr = expr.expression assert isinstance(expr, InfixExpression) assert isinstance(expr.left, FilterContextPath) - assert isinstance(expr.right, SelfPath) + assert isinstance(expr.right, RelativeFilterQuery) # A caching copy of the original expression tree. expr = filter_selector.expression.cache_tree() @@ -96,7 +97,7 @@ def test_cache_context_path() -> None: assert isinstance(expr, InfixExpression) assert isinstance(expr.left, CachingFilterExpression) assert isinstance(expr.left._expr, FilterContextPath) # noqa: SLF001 - assert isinstance(expr.right, SelfPath) + assert isinstance(expr.right, RelativeFilterQuery) def test_context_path_cache() -> None: @@ -146,9 +147,9 @@ def test_uncacheable_filter() -> None: env = JSONPathEnvironment(filter_caching=True) path = env.compile("$.some[?@.a > 2 and @.b < 4].a") assert isinstance(path, JSONPath) - selection_list = path.selectors[1] - assert isinstance(selection_list, ListSelector) - filter_selector = selection_list.items[0] + segment = path.segments[1] + assert isinstance(segment, JSONPathChildSegment) + filter_selector = segment.selectors[0] assert isinstance(filter_selector, FilterSelector) assert filter_selector.cacheable_nodes is False @@ -159,7 +160,7 @@ def test_uncacheable_filter() -> None: assert isinstance(expr, InfixExpression) assert isinstance(expr.left, InfixExpression) assert isinstance(expr.right, InfixExpression) - assert isinstance(expr.left.left, SelfPath) + assert isinstance(expr.left.left, RelativeFilterQuery) assert isinstance(expr.left.right, IntegerLiteral) - assert isinstance(expr.right.left, SelfPath) + assert isinstance(expr.right.left, RelativeFilterQuery) assert isinstance(expr.right.right, IntegerLiteral) diff --git a/tests/test_find_reference.py b/tests/test_find_reference.py index cbc7bf0..83a050d 100644 --- a/tests/test_find_reference.py +++ b/tests/test_find_reference.py @@ -2,6 +2,7 @@ See https://goessner.net/articles/JsonPath/ """ + import asyncio import dataclasses import operator @@ -220,132 +221,132 @@ class Case: }, ], ), - Case( - description="root descent", - path="$..", - data=REFERENCE_DATA, - want=[ - { - "store": { - "book": [ - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - ], - "bicycle": {"color": "red", "price": 19.95}, - } - }, - { - "book": [ - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - ], - "bicycle": {"color": "red", "price": 19.95}, - }, - [ - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - ], - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - {"color": "red", "price": 19.95}, - ], - ), + # Case( + # description="root descent", + # path="$..", + # data=REFERENCE_DATA, + # want=[ + # { + # "store": { + # "book": [ + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # ], + # "bicycle": {"color": "red", "price": 19.95}, + # } + # }, + # { + # "book": [ + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # ], + # "bicycle": {"color": "red", "price": 19.95}, + # }, + # [ + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # ], + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # {"color": "red", "price": 19.95}, + # ], + # ), Case( description="(reference) all elements", path="$..*", diff --git a/tests/test_lex.py b/tests/test_lex.py index 14727ac..b3335c8 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -7,28 +7,28 @@ from jsonpath import JSONPathEnvironment from jsonpath.exceptions import JSONPathSyntaxError from jsonpath.token import TOKEN_AND -from jsonpath.token import TOKEN_BARE_PROPERTY +from jsonpath.token import TOKEN_COLON from jsonpath.token import TOKEN_COMMA from jsonpath.token import TOKEN_DDOT +from jsonpath.token import TOKEN_DOT from jsonpath.token import TOKEN_DOUBLE_QUOTE_STRING from jsonpath.token import TOKEN_EQ -from jsonpath.token import TOKEN_FAKE_ROOT from jsonpath.token import TOKEN_FALSE from jsonpath.token import TOKEN_FILTER from jsonpath.token import TOKEN_FLOAT -from jsonpath.token import TOKEN_FUNCTION from jsonpath.token import TOKEN_GT from jsonpath.token import TOKEN_IN from jsonpath.token import TOKEN_INT from jsonpath.token import TOKEN_INTERSECTION from jsonpath.token import TOKEN_KEYS -from jsonpath.token import TOKEN_LIST_START +from jsonpath.token import TOKEN_LBRACKET from jsonpath.token import TOKEN_LPAREN from jsonpath.token import TOKEN_LT +from jsonpath.token import TOKEN_NAME from jsonpath.token import TOKEN_NIL from jsonpath.token import TOKEN_NOT from jsonpath.token import TOKEN_OR -from jsonpath.token import TOKEN_PROPERTY +from jsonpath.token import TOKEN_PSEUDO_ROOT from jsonpath.token import TOKEN_RBRACKET from jsonpath.token import TOKEN_RE from jsonpath.token import TOKEN_RE_FLAGS @@ -37,11 +37,9 @@ from jsonpath.token import TOKEN_RPAREN from jsonpath.token import TOKEN_SELF from jsonpath.token import TOKEN_SINGLE_QUOTE_STRING -from jsonpath.token import TOKEN_SLICE_START -from jsonpath.token import TOKEN_SLICE_STEP -from jsonpath.token import TOKEN_SLICE_STOP from jsonpath.token import TOKEN_TRUE from jsonpath.token import TOKEN_UNION +from jsonpath.token import TOKEN_WHITESPACE from jsonpath.token import TOKEN_WILD from jsonpath.token import Token @@ -57,33 +55,33 @@ class Case: Case( description="just root", path="$", - want=[ - Token(kind=TOKEN_ROOT, value="$", index=0, path="$"), - ], + want=[Token(kind=TOKEN_ROOT, value="$", index=0, path="$")], ), Case( - description="just fake root", + description="just pseudo-root", path="^", - want=[ - Token(kind=TOKEN_FAKE_ROOT, value="^", index=0, path="^"), - ], + want=[Token(kind=TOKEN_PSEUDO_ROOT, value="^", index=0, path="^")], ), Case( description="root dot property", path="$.some.thing", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.some.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="$.some.thing"), - Token(kind=TOKEN_PROPERTY, value="thing", index=7, path="$.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.some.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=6, path="$.some.thing"), + Token(kind=TOKEN_NAME, value="thing", index=7, path="$.some.thing"), ], ), Case( - description="fake root dot property", + description="pseudo root dot property", path="^.some.thing", want=[ - Token(kind=TOKEN_FAKE_ROOT, value="^", index=0, path="^.some.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="^.some.thing"), - Token(kind=TOKEN_PROPERTY, value="thing", index=7, path="^.some.thing"), + Token(kind=TOKEN_PSEUDO_ROOT, value="^", index=0, path="^.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="^.some.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="^.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=6, path="^.some.thing"), + Token(kind=TOKEN_NAME, value="thing", index=7, path="^.some.thing"), ], ), Case( @@ -91,15 +89,11 @@ class Case: path="$[some][thing]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="some", index=2, path="$[some][thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[some][thing]"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=6, path="$[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=7, path="$[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="thing", index=8, path="$[some][thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=7, path="$[some][thing]"), + Token(kind=TOKEN_NAME, value="thing", index=8, path="$[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=13, path="$[some][thing]"), ], ), @@ -108,7 +102,7 @@ class Case: path='$["some"]', want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path='$["some"]'), - Token(kind=TOKEN_LIST_START, value="[", index=1, path='$["some"]'), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path='$["some"]'), Token( kind=TOKEN_DOUBLE_QUOTE_STRING, value="some", index=3, path='$["some"]' ), @@ -120,7 +114,7 @@ class Case: path="$['some']", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$['some']"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$['some']"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$['some']"), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="some", index=3, path="$['some']" ), @@ -132,15 +126,12 @@ class Case: path="$.[some][thing]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="some", index=3, path="$.[some][thing]" - ), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[some][thing]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[some][thing]"), + Token(kind=TOKEN_NAME, value="some", index=3, path="$.[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=7, path="$.[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=8, path="$.[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="thing", index=9, path="$.[some][thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=8, path="$.[some][thing]"), + Token(kind=TOKEN_NAME, value="thing", index=9, path="$.[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=14, path="$.[some][thing]"), ], ), @@ -149,7 +140,7 @@ class Case: path="$[1]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[1]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[1]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[1]"), Token(kind=TOKEN_INT, value="1", index=2, path="$[1]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="$[1]"), ], @@ -159,7 +150,8 @@ class Case: path="$.[1]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[1]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[1]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[1]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[1]"), Token(kind=TOKEN_INT, value="1", index=3, path="$.[1]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="$.[1]"), ], @@ -168,10 +160,8 @@ class Case: description="empty slice", path="[:]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[:]"), - Token(kind=TOKEN_SLICE_START, value="", index=1, path="[:]"), - Token(kind=TOKEN_SLICE_STOP, value="", index=2, path="[:]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[:]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[:]"), + Token(kind=TOKEN_COLON, value=":", index=1, path="[:]"), Token(kind=TOKEN_RBRACKET, value="]", index=2, path="[:]"), ], ), @@ -179,10 +169,9 @@ class Case: description="empty slice empty step", path="[::]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[::]"), - Token(kind=TOKEN_SLICE_START, value="", index=1, path="[::]"), - Token(kind=TOKEN_SLICE_STOP, value="", index=2, path="[::]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=3, path="[::]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[::]"), + Token(kind=TOKEN_COLON, value=":", index=1, path="[::]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[::]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="[::]"), ], ), @@ -190,10 +179,9 @@ class Case: description="slice empty stop", path="[1:]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[1:]"), - Token(kind=TOKEN_SLICE_START, value="1", index=1, path="[1:]"), - Token(kind=TOKEN_SLICE_STOP, value="", index=3, path="[1:]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[1:]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[1:]"), + Token(kind=TOKEN_INT, value="1", index=1, path="[1:]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[1:]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="[1:]"), ], ), @@ -201,10 +189,9 @@ class Case: description="slice empty start", path="[:-1]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[:-1]"), - Token(kind=TOKEN_SLICE_START, value="", index=1, path="[:-1]"), - Token(kind=TOKEN_SLICE_STOP, value="-1", index=2, path="[:-1]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[:-1]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[:-1]"), + Token(kind=TOKEN_COLON, value=":", index=1, path="[:-1]"), + Token(kind=TOKEN_INT, value="-1", index=2, path="[:-1]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="[:-1]"), ], ), @@ -212,10 +199,10 @@ class Case: description="slice start and stop", path="[1:7]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[1:7]"), - Token(kind=TOKEN_SLICE_START, value="1", index=1, path="[1:7]"), - Token(kind=TOKEN_SLICE_STOP, value="7", index=3, path="[1:7]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[1:7]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[1:7]"), + Token(kind=TOKEN_INT, value="1", index=1, path="[1:7]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[1:7]"), + Token(kind=TOKEN_INT, value="7", index=3, path="[1:7]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="[1:7]"), ], ), @@ -223,10 +210,12 @@ class Case: description="slice start, stop and step", path="[1:7:2]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[1:7:2]"), - Token(kind=TOKEN_SLICE_START, value="1", index=1, path="[1:7:2]"), - Token(kind=TOKEN_SLICE_STOP, value="7", index=3, path="[1:7:2]"), - Token(kind=TOKEN_SLICE_STEP, value="2", index=5, path="[1:7:2]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[1:7:2]"), + Token(kind=TOKEN_INT, value="1", index=1, path="[1:7:2]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[1:7:2]"), + Token(kind=TOKEN_INT, value="7", index=3, path="[1:7:2]"), + Token(kind=TOKEN_COLON, value=":", index=4, path="[1:7:2]"), + Token(kind=TOKEN_INT, value="2", index=5, path="[1:7:2]"), Token(kind=TOKEN_RBRACKET, value="]", index=6, path="[1:7:2]"), ], ), @@ -235,6 +224,7 @@ class Case: path="$.*", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.*"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.*"), Token(kind=TOKEN_WILD, value="*", index=2, path="$.*"), ], ), @@ -243,7 +233,7 @@ class Case: path="$[*]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[*]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[*]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[*]"), Token(kind=TOKEN_WILD, value="*", index=2, path="$[*]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="$[*]"), ], @@ -253,7 +243,8 @@ class Case: path="$.[*]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[*]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[*]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[*]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[*]"), Token(kind=TOKEN_WILD, value="*", index=3, path="$.[*]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="$.[*]"), ], @@ -272,7 +263,7 @@ class Case: want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$..thing"), Token(kind=TOKEN_DDOT, value="..", index=1, path="$..thing"), - Token(kind=TOKEN_BARE_PROPERTY, value="thing", index=3, path="$..thing"), + Token(kind=TOKEN_NAME, value="thing", index=3, path="$..thing"), ], ), Case( @@ -281,7 +272,8 @@ class Case: want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$...thing"), Token(kind=TOKEN_DDOT, value="..", index=1, path="$...thing"), - Token(kind=TOKEN_PROPERTY, value="thing", index=4, path="$...thing"), + Token(kind=TOKEN_DOT, value=".", index=3, path="$...thing"), + Token(kind=TOKEN_NAME, value="thing", index=4, path="$...thing"), ], ), Case( @@ -289,7 +281,7 @@ class Case: path="$[1,4,5]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[1,4,5]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[1,4,5]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[1,4,5]"), Token(kind=TOKEN_INT, value="1", index=2, path="$[1,4,5]"), Token(kind=TOKEN_COMMA, value=",", index=3, path="$[1,4,5]"), Token(kind=TOKEN_INT, value="4", index=4, path="$[1,4,5]"), @@ -303,12 +295,12 @@ class Case: path="$[1,4:9]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[1,4:9]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[1,4:9]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[1,4:9]"), Token(kind=TOKEN_INT, value="1", index=2, path="$[1,4:9]"), Token(kind=TOKEN_COMMA, value=",", index=3, path="$[1,4:9]"), - Token(kind=TOKEN_SLICE_START, value="4", index=4, path="$[1,4:9]"), - Token(kind=TOKEN_SLICE_STOP, value="9", index=6, path="$[1,4:9]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="$[1,4:9]"), + Token(kind=TOKEN_INT, value="4", index=4, path="$[1,4:9]"), + Token(kind=TOKEN_COLON, value=":", index=5, path="$[1,4:9]"), + Token(kind=TOKEN_INT, value="9", index=6, path="$[1,4:9]"), Token(kind=TOKEN_RBRACKET, value="]", index=7, path="$[1,4:9]"), ], ), @@ -317,14 +309,10 @@ class Case: path="$[some,thing]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[some,thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[some,thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="some", index=2, path="$[some,thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[some,thing]"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$[some,thing]"), Token(kind=TOKEN_COMMA, value=",", index=6, path="$[some,thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="thing", index=7, path="$[some,thing]" - ), + Token(kind=TOKEN_NAME, value="thing", index=7, path="$[some,thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=12, path="$[some,thing]"), ], ), @@ -333,11 +321,13 @@ class Case: path="$.[?(@.some)]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[?(@.some)]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[?(@.some)]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[?(@.some)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[?(@.some)]"), Token(kind=TOKEN_FILTER, value="?", index=3, path="$.[?(@.some)]"), Token(kind=TOKEN_LPAREN, value="(", index=4, path="$.[?(@.some)]"), Token(kind=TOKEN_SELF, value="@", index=5, path="$.[?(@.some)]"), - Token(kind=TOKEN_PROPERTY, value="some", index=7, path="$.[?(@.some)]"), + Token(kind=TOKEN_DOT, value=".", index=6, path="$.[?(@.some)]"), + Token(kind=TOKEN_NAME, value="some", index=7, path="$.[?(@.some)]"), Token(kind=TOKEN_RPAREN, value=")", index=11, path="$.[?(@.some)]"), Token(kind=TOKEN_RBRACKET, value="]", index=12, path="$.[?(@.some)]"), ], @@ -347,11 +337,13 @@ class Case: path="$.[?($.some)]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[?($.some)]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[?($.some)]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[?($.some)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[?($.some)]"), Token(kind=TOKEN_FILTER, value="?", index=3, path="$.[?($.some)]"), Token(kind=TOKEN_LPAREN, value="(", index=4, path="$.[?($.some)]"), Token(kind=TOKEN_ROOT, value="$", index=5, path="$.[?($.some)]"), - Token(kind=TOKEN_PROPERTY, value="some", index=7, path="$.[?($.some)]"), + Token(kind=TOKEN_DOT, value=".", index=6, path="$.[?($.some)]"), + Token(kind=TOKEN_NAME, value="some", index=7, path="$.[?($.some)]"), Token(kind=TOKEN_RPAREN, value=")", index=11, path="$.[?($.some)]"), Token(kind=TOKEN_RBRACKET, value="]", index=12, path="$.[?($.some)]"), ], @@ -361,11 +353,12 @@ class Case: path="$.[?(@[1])]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[?(@[1])]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[?(@[1])]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[?(@[1])]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[?(@[1])]"), Token(kind=TOKEN_FILTER, value="?", index=3, path="$.[?(@[1])]"), Token(kind=TOKEN_LPAREN, value="(", index=4, path="$.[?(@[1])]"), Token(kind=TOKEN_SELF, value="@", index=5, path="$.[?(@[1])]"), - Token(kind=TOKEN_LIST_START, value="[", index=6, path="$.[?(@[1])]"), + Token(kind=TOKEN_LBRACKET, value="[", index=6, path="$.[?(@[1])]"), Token(kind=TOKEN_INT, value="1", index=7, path="$.[?(@[1])]"), Token(kind=TOKEN_RBRACKET, value="]", index=8, path="$.[?(@[1])]"), Token(kind=TOKEN_RPAREN, value=")", index=9, path="$.[?(@[1])]"), @@ -376,43 +369,41 @@ class Case: description="filter self dot property equality with float", path="[?(@.some == 1.1)]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1.1)]"), Token( - kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1.1)]" + kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1.1)]" ), - Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_FLOAT, value="1.1", index=13, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_RPAREN, value=")", index=16, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=17, path="[?(@.some == 1.1)]"), ], ), Case( - description=( - "filter self dot property equality with float in scientific notation" - ), + description="filter self dot property equality float in scientific notation", path="[?(@.some == 1.1e10)]", want=[ Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(@.some == 1.1e10)]", + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1.1e10)]" ), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1.1e10)]"), - Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.some == 1.1e10)]", - ), + Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1.1e10)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1.1e10)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1.1e10)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1.1e10)]"), Token( - kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1.1e10)]" + kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1.1e10)]" ), Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1.1e10)]"), + Token( + kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1.1e10)]" + ), Token( kind=TOKEN_FLOAT, value="1.1e10", index=13, path="[?(@.some == 1.1e10)]" ), @@ -426,14 +417,16 @@ class Case: description="filter self index equality with float", path="[?(@[1] == 1.1)]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@[1] == 1.1)]"), - Token(kind=TOKEN_LIST_START, value="[", index=4, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=4, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_INT, value="1", index=5, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=6, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=7, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_EQ, value="==", index=8, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=10, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_FLOAT, value="1.1", index=11, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_RPAREN, value=")", index=14, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=15, path="[?(@[1] == 1.1)]"), @@ -443,12 +436,15 @@ class Case: description="filter self dot property equality with int", path="[?(@.some == 1)]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[?(@.some == 1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1)]"), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1)]"), Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1)]"), - Token(kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1)]"), Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1)]"), Token(kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1)]"), Token(kind=TOKEN_RPAREN, value=")", index=14, path="[?(@.some == 1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=15, path="[?(@.some == 1)]"), @@ -458,29 +454,19 @@ class Case: description="filter self dot property equality with int in scientific notation", path="[?(@.some == 1e10)]", want=[ - Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(@.some == 1e10)]", - ), - Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(@.some == 1e10)]", - ), - Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.some == 1e10)]", - ), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1e10)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1e10)]"), Token( - kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1e10)]" + kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1e10)]" ), Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1e10)]"), + Token( + kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1e10)]" + ), Token(kind=TOKEN_INT, value="1e10", index=13, path="[?(@.some == 1e10)]"), Token(kind=TOKEN_RPAREN, value=")", index=17, path="[?(@.some == 1e10)]"), Token(kind=TOKEN_RBRACKET, value="]", index=18, path="[?(@.some == 1e10)]"), @@ -491,36 +477,37 @@ class Case: path="[?(@.some =~ /foo|bar/i)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some =~ /foo|bar/i)]", ), Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(@.some =~ /foo|bar/i)]", + kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some =~ /foo|bar/i)]" ), Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.some =~ /foo|bar/i)]", + kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some =~ /foo|bar/i)]" ), Token( kind=TOKEN_SELF, value="@", index=3, path="[?(@.some =~ /foo|bar/i)]" ), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some =~ /foo|bar/i)]"), Token( - kind=TOKEN_PROPERTY, - value="some", - index=5, + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some =~ /foo|bar/i)]" + ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, path="[?(@.some =~ /foo|bar/i)]", ), Token( - kind=TOKEN_RE, - value="=~", - index=10, + kind=TOKEN_RE, value="=~", index=10, path="[?(@.some =~ /foo|bar/i)]" + ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, path="[?(@.some =~ /foo|bar/i)]", ), Token( @@ -536,10 +523,7 @@ class Case: path="[?(@.some =~ /foo|bar/i)]", ), Token( - kind=TOKEN_RPAREN, - value=")", - index=23, - path="[?(@.some =~ /foo|bar/i)]", + kind=TOKEN_RPAREN, value=")", index=23, path="[?(@.some =~ /foo|bar/i)]" ), Token( kind=TOKEN_RBRACKET, @@ -554,12 +538,14 @@ class Case: path="$.some | $.thing", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.some | $.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="$.some | $.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.some | $.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$.some | $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=6, path="$.some | $.thing"), Token(kind=TOKEN_UNION, value="|", index=7, path="$.some | $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=8, path="$.some | $.thing"), Token(kind=TOKEN_ROOT, value="$", index=9, path="$.some | $.thing"), - Token( - kind=TOKEN_PROPERTY, value="thing", index=11, path="$.some | $.thing" - ), + Token(kind=TOKEN_DOT, value=".", index=10, path="$.some | $.thing"), + Token(kind=TOKEN_NAME, value="thing", index=11, path="$.some | $.thing"), ], ), Case( @@ -570,31 +556,64 @@ class Case: kind=TOKEN_ROOT, value="$", index=0, path="$.some | $.thing | $.other" ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, value=".", index=1, path="$.some | $.thing | $.other" + ), + Token( + kind=TOKEN_NAME, value="some", index=2, path="$.some | $.thing | $.other", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=6, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_UNION, value="|", index=7, path="$.some | $.thing | $.other" ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=8, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_ROOT, value="$", index=9, path="$.some | $.thing | $.other" ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, value=".", index=10, path="$.some | $.thing | $.other" + ), + Token( + kind=TOKEN_NAME, value="thing", index=11, path="$.some | $.thing | $.other", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=16, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_UNION, value="|", index=17, path="$.some | $.thing | $.other" ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=18, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_ROOT, value="$", index=19, path="$.some | $.thing | $.other" ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, value=".", index=20, path="$.some | $.thing | $.other" + ), + Token( + kind=TOKEN_NAME, value="other", index=21, path="$.some | $.thing | $.other", @@ -606,12 +625,14 @@ class Case: path="$.some & $.thing", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.some & $.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="$.some & $.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.some & $.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$.some & $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=6, path="$.some & $.thing"), Token(kind=TOKEN_INTERSECTION, value="&", index=7, path="$.some & $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=8, path="$.some & $.thing"), Token(kind=TOKEN_ROOT, value="$", index=9, path="$.some & $.thing"), - Token( - kind=TOKEN_PROPERTY, value="thing", index=11, path="$.some & $.thing" - ), + Token(kind=TOKEN_DOT, value=".", index=10, path="$.some & $.thing"), + Token(kind=TOKEN_NAME, value="thing", index=11, path="$.some & $.thing"), ], ), Case( @@ -619,7 +640,7 @@ class Case: path="[?(@.some > 1 and @.some < 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some > 1 and @.some < 5)]", @@ -643,29 +664,59 @@ class Case: path="[?(@.some > 1 and @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some > 1 and @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_GT, value=">", index=10, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=11, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_INT, value="1", index=12, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=13, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_AND, value="and", index=14, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -673,17 +724,35 @@ class Case: path="[?(@.some > 1 and @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=19, + path="[?(@.some > 1 and @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=20, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_LT, value="<", index=25, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=26, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -709,7 +778,7 @@ class Case: path="[?(@.some == 1 or @.some == 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 or @.some == 5)]", @@ -733,29 +802,59 @@ class Case: path="[?(@.some == 1 or @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 or @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_OR, value="or", index=15, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -763,17 +862,35 @@ class Case: path="[?(@.some == 1 or @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=19, + path="[?(@.some == 1 or @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=20, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=25, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=27, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -799,7 +916,7 @@ class Case: path="[?(@.some == 1 || @.some == 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 || @.some == 5)]", @@ -823,29 +940,59 @@ class Case: path="[?(@.some == 1 || @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 || @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_OR, value="||", index=15, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -853,17 +1000,35 @@ class Case: path="[?(@.some == 1 || @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=19, + path="[?(@.some == 1 || @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=20, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=25, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=27, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -889,33 +1054,34 @@ class Case: path="[?(@.thing in [1, '1'])]", want=[ Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.thing in [1, '1'])]" ), Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_FILTER, value="?", index=1, path="[?(@.thing in [1, '1'])]" ), Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.thing in [1, '1'])]" ), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.thing in [1, '1'])]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.thing in [1, '1'])]"), Token( - kind=TOKEN_PROPERTY, - value="thing", - index=5, + kind=TOKEN_NAME, value="thing", index=5, path="[?(@.thing in [1, '1'])]" + ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=10, path="[?(@.thing in [1, '1'])]", ), Token(kind=TOKEN_IN, value="in", index=11, path="[?(@.thing in [1, '1'])]"), Token( - kind=TOKEN_LIST_START, + kind=TOKEN_WHITESPACE, + value=" ", + index=13, + path="[?(@.thing in [1, '1'])]", + ), + Token( + kind=TOKEN_LBRACKET, value="[", index=14, path="[?(@.thing in [1, '1'])]", @@ -924,6 +1090,12 @@ class Case: Token( kind=TOKEN_COMMA, value=",", index=16, path="[?(@.thing in [1, '1'])]" ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.thing in [1, '1'])]", + ), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="1", @@ -937,10 +1109,7 @@ class Case: path="[?(@.thing in [1, '1'])]", ), Token( - kind=TOKEN_RPAREN, - value=")", - index=22, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_RPAREN, value=")", index=22, path="[?(@.thing in [1, '1'])]" ), Token( kind=TOKEN_RBRACKET, @@ -955,7 +1124,7 @@ class Case: path="[?(@.some == 1 or not @.some < 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 or not @.some < 5)]", @@ -979,35 +1148,71 @@ class Case: path="[?(@.some == 1 or not @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 or not @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_OR, value="or", index=15, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_NOT, value="not", index=18, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=21, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -1015,17 +1220,35 @@ class Case: path="[?(@.some == 1 or not @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=23, + path="[?(@.some == 1 or not @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=24, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=28, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_LT, value="<", index=29, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=30, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -1051,7 +1274,7 @@ class Case: path="[?(@.some == 1 or !@.some < 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 or !@.some < 5)]", @@ -1075,29 +1298,59 @@ class Case: path="[?(@.some == 1 or !@.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 or !@.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_OR, value="or", index=15, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_NOT, value="!", @@ -1111,17 +1364,35 @@ class Case: path="[?(@.some == 1 or !@.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=20, + path="[?(@.some == 1 or !@.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=21, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=25, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_LT, value="<", index=26, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=27, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -1146,26 +1417,15 @@ class Case: description="filter true and false", path="[?(true == false)]", want=[ - Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(true == false)]", - ), - Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(true == false)]", - ), - Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(true == false)]", - ), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(true == false)]"), + Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(true == false)]"), + Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(true == false)]"), Token(kind=TOKEN_TRUE, value="true", index=3, path="[?(true == false)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=7, path="[?(true == false)]"), Token(kind=TOKEN_EQ, value="==", index=8, path="[?(true == false)]"), + Token( + kind=TOKEN_WHITESPACE, value=" ", index=10, path="[?(true == false)]" + ), Token(kind=TOKEN_FALSE, value="false", index=11, path="[?(true == false)]"), Token(kind=TOKEN_RPAREN, value=")", index=16, path="[?(true == false)]"), Token(kind=TOKEN_RBRACKET, value="]", index=17, path="[?(true == false)]"), @@ -1176,7 +1436,7 @@ class Case: path="[?(nil == none && nil == null)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(nil == none && nil == null)]", @@ -1199,36 +1459,72 @@ class Case: index=3, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=6, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_EQ, value="==", index=7, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_NIL, value="none", index=10, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_AND, value="&&", index=15, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_NIL, value="nil", index=18, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=21, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_EQ, value="==", index=22, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_NIL, value="null", @@ -1254,7 +1550,7 @@ class Case: path="$['some', 'thing']", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$['some', 'thing']"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$['some', 'thing']"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$['some', 'thing']"), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="some", @@ -1262,6 +1558,7 @@ class Case: path="$['some', 'thing']", ), Token(kind=TOKEN_COMMA, value=",", index=8, path="$['some', 'thing']"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=9, path="$['some', 'thing']"), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="thing", @@ -1282,13 +1579,19 @@ class Case: path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=1, + path="$.some[?(length(@.thing) < 2)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=2, path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=6, path="$.some[?(length(@.thing) < 2)]", @@ -1306,11 +1609,17 @@ class Case: path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_FUNCTION, + kind=TOKEN_NAME, value="length", index=9, path="$.some[?(length(@.thing) < 2)]", ), + Token( + kind=TOKEN_LPAREN, + value="(", + index=15, + path="$.some[?(length(@.thing) < 2)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -1318,7 +1627,13 @@ class Case: path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=17, + path="$.some[?(length(@.thing) < 2)]", + ), + Token( + kind=TOKEN_NAME, value="thing", index=18, path="$.some[?(length(@.thing) < 2)]", @@ -1329,12 +1644,24 @@ class Case: index=23, path="$.some[?(length(@.thing) < 2)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="$.some[?(length(@.thing) < 2)]", + ), Token( kind=TOKEN_LT, value="<", index=25, path="$.some[?(length(@.thing) < 2)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=26, + path="$.some[?(length(@.thing) < 2)]", + ), Token( kind=TOKEN_INT, value="2", @@ -1360,7 +1687,9 @@ class Case: path="$.thing.~", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.thing.~"), - Token(kind=TOKEN_PROPERTY, value="thing", index=2, path="$.thing.~"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.thing.~"), + Token(kind=TOKEN_NAME, value="thing", index=2, path="$.thing.~"), + Token(kind=TOKEN_DOT, value=".", index=7, path="$.thing.~"), Token(kind=TOKEN_KEYS, value="~", index=8, path="$.thing.~"), ], ), @@ -1369,8 +1698,9 @@ class Case: path="$.thing[~]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.thing[~]"), - Token(kind=TOKEN_PROPERTY, value="thing", index=2, path="$.thing[~]"), - Token(kind=TOKEN_LIST_START, value="[", index=7, path="$.thing[~]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.thing[~]"), + Token(kind=TOKEN_NAME, value="thing", index=2, path="$.thing[~]"), + Token(kind=TOKEN_LBRACKET, value="[", index=7, path="$.thing[~]"), Token(kind=TOKEN_KEYS, value="~", index=8, path="$.thing[~]"), Token(kind=TOKEN_RBRACKET, value="]", index=9, path="$.thing[~]"), ], @@ -1378,81 +1708,49 @@ class Case: Case( description="implicit root selector, name selector starts with `and`", path="anderson", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="anderson", index=0, path="anderson"), - ], + want=[Token(kind=TOKEN_NAME, value="anderson", index=0, path="anderson")], ), Case( description="implicit root selector, name selector starts with `or`", path="order", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="order", index=0, path="order"), - ], + want=[Token(kind=TOKEN_NAME, value="order", index=0, path="order")], ), Case( description="implicit root selector, name selector starts with `true`", path="trueblue", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="trueblue", index=0, path="trueblue"), - ], + want=[Token(kind=TOKEN_NAME, value="trueblue", index=0, path="trueblue")], ), Case( description="implicit root selector, name selector starts with `false`", path="falsehood", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, value="falsehood", index=0, path="falsehood" - ), - ], + want=[Token(kind=TOKEN_NAME, value="falsehood", index=0, path="falsehood")], ), Case( description="implicit root selector, name selector starts with `not`", path="nottingham", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, value="nottingham", index=0, path="nottingham" - ), - ], + want=[Token(kind=TOKEN_NAME, value="nottingham", index=0, path="nottingham")], ), Case( description="implicit root selector, name selector starts with `null`", path="nullable", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="nullable", index=0, path="nullable"), - ], + want=[Token(kind=TOKEN_NAME, value="nullable", index=0, path="nullable")], ), Case( description="implicit root selector, name selector starts with `none`", path="nonexpert", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, value="nonexpert", index=0, path="nonexpert" - ), - ], + want=[Token(kind=TOKEN_NAME, value="nonexpert", index=0, path="nonexpert")], ), Case( description="implicit root selector, name selector starts with `undefined`", path="undefinedness", want=[ - Token( - kind=TOKEN_BARE_PROPERTY, - value="undefinedness", - index=0, - path="undefinedness", - ), + Token(kind=TOKEN_NAME, value="undefinedness", index=0, path="undefinedness") ], ), Case( description="implicit root selector, name selector starts with `missing`", path="missingly", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, - value="missingly", - index=0, - path="missingly", - ), - ], + want=[Token(kind=TOKEN_NAME, value="missingly", index=0, path="missingly")], ), ] diff --git a/tests/test_walk_filter_expression_tree.py b/tests/test_walk_filter_expression_tree.py index b8059ec..0dad737 100644 --- a/tests/test_walk_filter_expression_tree.py +++ b/tests/test_walk_filter_expression_tree.py @@ -1,4 +1,5 @@ """Test that we can traverse filter expression trees.""" + import dataclasses import operator from typing import List @@ -9,7 +10,6 @@ from jsonpath.filter import FilterExpression from jsonpath.filter import walk from jsonpath.selectors import Filter as FilterSelector -from jsonpath.selectors import ListSelector @dataclasses.dataclass @@ -63,13 +63,11 @@ def test_is_volatile(case: Case) -> None: assert isinstance(path, jsonpath.JSONPath) filter_selectors: List[FilterSelector] = [] - for segment in path.selectors: - if isinstance(segment, ListSelector): - filter_selectors.extend( - selector - for selector in segment.items - if isinstance(selector, FilterSelector) - ) + + for segment in path.segments: + for selector in segment.selectors: + if isinstance(selector, FilterSelector): + filter_selectors.append(selector) assert len(filter_selectors) == 1 assert is_volatile(filter_selectors[0].expression) is case.want From 33fe76d0c30f0fe0faab948edc3f0610587db3c9 Mon Sep 17 00:00:00 2001 From: James Prior Date: Fri, 8 Aug 2025 21:46:17 +0100 Subject: [PATCH 02/29] Rewrite parser WIP [skip ci] --- jsonpath/env.py | 33 +-- jsonpath/lex.py | 37 +++- jsonpath/parse.py | 444 +++++++++++++++++++++------------------ jsonpath/selectors.py | 38 ++-- jsonpath/stream.py | 142 ++++++------- jsonpath/token.py | 3 +- pyproject.toml | 2 +- tests/test_compliance.py | 22 +- tests/test_find.py | 18 +- tests/test_lex.py | 3 +- 10 files changed, 396 insertions(+), 346 deletions(-) diff --git a/jsonpath/env.py b/jsonpath/env.py index 8542b32..770c614 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -92,8 +92,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`. ## Class attributes Attributes: - pseudo_root_token (str): The pattern used to select a "fake" root node, one level - above the real root node. + pseudo_root_token (str): The pattern used to select a "fake" root node, one + level above the real root node. filter_context_token (str): The pattern used to select extra filter context data. Defaults to `"_"`. intersection_token (str): The pattern used as the intersection operator. @@ -180,24 +180,25 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 """ tokens = self.lexer.tokenize(path) stream = TokenStream(tokens) - pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT + pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path: Union[JSONPath, CompoundJSONPath] = JSONPath( env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root ) - if stream.current.kind != TOKEN_EOF: + # TODO: better! + if stream.current().kind != TOKEN_EOF: _path = CompoundJSONPath(env=self, path=_path) - while stream.current.kind != TOKEN_EOF: - if stream.peek.kind == TOKEN_EOF: + while stream.current().kind != TOKEN_EOF: + if stream.peek().kind == TOKEN_EOF: # trailing union or intersection raise JSONPathSyntaxError( - f"expected a path after {stream.current.value!r}", - token=stream.current, + f"expected a path after {stream.current().value!r}", + token=stream.current(), ) - if stream.current.kind == TOKEN_UNION: - stream.next_token() - pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT + if stream.current().kind == TOKEN_UNION: + stream.next() + pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path = _path.union( JSONPath( env=self, @@ -205,9 +206,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 pseudo_root=pseudo_root, ) ) - elif stream.current.kind == TOKEN_INTERSECTION: - stream.next_token() - pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT + elif stream.current().kind == TOKEN_INTERSECTION: + stream.next() + pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path = _path.intersection( JSONPath( env=self, @@ -218,8 +219,8 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 else: # pragma: no cover # Parser.parse catches this too raise JSONPathSyntaxError( # noqa: TRY003 - f"unexpected token {stream.current.value!r}", - token=stream.current, + f"unexpected token {stream.current().value!r}", + token=stream.current(), ) return _path diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 837f6dc..6583589 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -15,6 +15,7 @@ from .token import TOKEN_CONTAINS from .token import TOKEN_DDOT from .token import TOKEN_DOT +from .token import TOKEN_DOT_PROPERTY from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EQ from .token import TOKEN_ERROR @@ -22,6 +23,7 @@ from .token import TOKEN_FILTER from .token import TOKEN_FILTER_CONTEXT from .token import TOKEN_FLOAT +from .token import TOKEN_FUNCTION from .token import TOKEN_GE from .token import TOKEN_GT from .token import TOKEN_IN @@ -82,7 +84,6 @@ class attributes. Then setting `lexer_class` on a `JSONPathEnvironment`. """ key_pattern = r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*" - name_pattern = key_pattern # XXX: # ! or `not` logical_not_pattern = r"(?:not\b)|!" @@ -99,10 +100,15 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: self.double_quote_pattern = r'"(?P(?:(?!(?(?:(?!(?\.)(?P{self.key_pattern})" + # /pattern/ or /pattern/flags self.re_pattern = r"/(?P.+?)/(?P[aims]*)" + # func( + self.function_pattern = r"(?P[a-z][a-z_0-9]+)(?P\()" + self.rules = self.compile_rules() def compile_rules(self) -> Pattern[str]: @@ -122,6 +128,7 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), (TOKEN_RE_PATTERN, self.re_pattern), + (TOKEN_DOT_PROPERTY, self.dot_property_pattern), (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"), (TOKEN_INT, r"-?\d+(?P[eE][+\-]?\d+)?\b"), (TOKEN_DDOT, r"\.\."), @@ -160,6 +167,7 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_LT, r"<"), (TOKEN_GT, r">"), (TOKEN_NOT, self.logical_not_pattern), # Must go after "!=" + (TOKEN_FUNCTION, self.function_pattern), (TOKEN_NAME, self.key_pattern), # Must go after reserved words (TOKEN_LPAREN, r"\("), (TOKEN_RPAREN, r"\)"), @@ -180,7 +188,18 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 kind = match.lastgroup assert kind is not None - if kind == TOKEN_DOUBLE_QUOTE_STRING: + if kind == TOKEN_DOT_PROPERTY: + yield _token( + kind=TOKEN_DOT, + value=match.group("G_DOT"), + index=match.start("G_DOT"), + ) + yield _token( + kind=TOKEN_NAME, + value=match.group("G_PROP"), + index=match.start("G_PROP"), + ) + elif kind == TOKEN_DOUBLE_QUOTE_STRING: yield _token( kind=TOKEN_DOUBLE_QUOTE_STRING, value=match.group("G_DQUOTE"), @@ -222,6 +241,18 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 value=match.group(), index=match.start(), ) + elif kind == TOKEN_FUNCTION: + yield _token( + kind=TOKEN_FUNCTION, + value=match.group("G_FUNC"), + index=match.start("G_FUNC"), + ) + + yield _token( + kind=TOKEN_LPAREN, + value=match.group("G_FUNC_PAREN"), + index=match.start("G_FUNC_PAREN"), + ) elif kind == TOKEN_ERROR: raise JSONPathSyntaxError( f"unexpected token {match.group()!r}", diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 82ccea5..c1bb0d4 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -289,212 +289,240 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: """Parse a JSONPath from a stream of tokens.""" - if stream.current.kind in {TOKEN_ROOT, TOKEN_PSEUDO_ROOT}: - stream.next_token() + # TODO: Optionally require TOKEN_ROOT + if stream.current().kind in {TOKEN_ROOT, TOKEN_PSEUDO_ROOT}: + stream.next() - yield from self.parse_path(stream, in_filter=False) + # TODO: Support "bare" paths. Those without a leading dot for shorthand + # selectors - if stream.current.kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): + yield from self.parse_path(stream) + + if stream.current().kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): raise JSONPathSyntaxError( - f"unexpected token {stream.current.value!r}", - token=stream.current, + f"unexpected token {stream.current().value!r}", + token=stream.current(), ) - def parse_path( - self, - stream: TokenStream, - *, - in_filter: bool = False, - ) -> Iterable[JSONPathSegment]: - """Parse a top-level JSONPath, or one that is nested in a filter.""" + def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: + """Parse a JSONPath query string. + + This method assumes the root, current or pseudo root identifier has + already been consumed. + """ while True: - if stream.current.kind == TOKEN_DDOT: - token = stream.next_token() + stream.skip_whitespace() + if stream.current().kind == TOKEN_DOT: + # Consume the dot. + stream.next() + # Assert that dot is followed by shorthand selector without whitespace. + stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS) + token = stream.current() + selectors = self.parse_selectors(stream) + yield JSONPathChildSegment( + env=self.env, token=token, selectors=selectors + ) + elif stream.current().kind == TOKEN_DDOT: + token = stream.next() selectors = self.parse_selectors(stream) if not selectors: raise JSONPathSyntaxError( "missing selector for recursive descent segment", - token=stream.current, + token=stream.current(), ) yield JSONPathRecursiveDescentSegment( env=self.env, token=token, selectors=selectors ) - elif ( - stream.skip(TOKEN_DOT) - and stream.current.kind - in { - TOKEN_NAME, - TOKEN_WILD, - TOKEN_KEYS, - } - ) or stream.current.kind == TOKEN_LBRACKET: - token = stream.current + elif stream.current().kind == TOKEN_LBRACKET: + token = stream.current() selectors = self.parse_selectors(stream) yield JSONPathChildSegment( env=self.env, token=token, selectors=selectors ) else: - if in_filter: - stream.push(stream.current) break - stream.next_token() - def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: - if stream.current.kind == TOKEN_NAME: + token = stream.next() + + if token.kind == TOKEN_NAME: return ( PropertySelector( env=self.env, - token=stream.current, - name=stream.current.value, + token=token, + name=token.value, shorthand=True, ), ) - if stream.current.kind == TOKEN_WILD: - return (WildSelector(env=self.env, token=stream.current, shorthand=True),) + if token.kind == TOKEN_WILD: + return ( + WildSelector( + env=self.env, + token=token, + shorthand=True, + ), + ) - if stream.current.kind == TOKEN_KEYS: + if token.kind == TOKEN_KEYS: return ( KeysSelector( env=self.env, - token=stream.current, + token=token, shorthand=True, ), ) - if stream.current.kind == TOKEN_LBRACKET: + if token.kind == TOKEN_LBRACKET: + stream.pos -= 1 return tuple(self.parse_bracketed_selection(stream)) return () - def parse_slice(self, stream: TokenStream) -> SliceSelector: - """Parse a slice JSONPath expression from a stream of tokens.""" - tok = stream.current - start: Optional[int] = None - stop: Optional[int] = None - step: Optional[int] = None - - def _maybe_index(token: Token) -> bool: - if token.kind == TOKEN_INT: - if len(token.value) > 1 and token.value.startswith(("0", "-0")): - raise JSONPathSyntaxError( - f"invalid index {token.value!r}", token=token - ) - return True - return False - - # 1: or : - if _maybe_index(stream.current): - start = int(stream.current.value) - stream.next_token() - - stream.expect(TOKEN_COLON) - stream.next_token() - - # 1 or 1: or : or ? - if _maybe_index(stream.current): - stop = int(stream.current.value) - stream.next_token() - if stream.current.kind == TOKEN_COLON: - stream.next_token() - elif stream.current.kind == TOKEN_COLON: - stream.expect(TOKEN_COLON) - stream.next_token() - - # 1 or ? - if _maybe_index(stream.current): - step = int(stream.current.value) - stream.next_token() - - stream.push(stream.current) - - return SliceSelector( - env=self.env, - token=tok, - start=start, - stop=stop, - step=step, - ) - def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912 """Parse a comma separated list of JSONPath selectors.""" - tok = stream.next_token() # Skip LBRACKET + segment_token = stream.eat(TOKEN_LBRACKET) selectors: List[JSONPathSelector] = [] - while stream.current.kind != TOKEN_RBRACKET: - if stream.current.kind == TOKEN_INT: - if stream.peek.kind == TOKEN_COLON: + while True: + stream.skip_whitespace() + token = stream.current() + + if token.kind == TOKEN_RBRACKET: + break + + if token.kind == TOKEN_INT: + if ( + stream.peek().kind == TOKEN_COLON + or stream.peek(2).kind == TOKEN_COLON + ): selectors.append(self.parse_slice(stream)) else: - if ( - len(stream.current.value) > 1 - and stream.current.value.startswith("0") - ) or stream.current.value.startswith("-0"): - raise JSONPathSyntaxError( - "leading zero in index selector", token=stream.current - ) + self._raise_for_leading_zero(token) selectors.append( IndexSelector( env=self.env, - token=stream.current, - index=int(stream.current.value), + token=token, + index=int(token.value), ) ) - elif stream.current.kind in ( + stream.next() + elif token.kind in ( TOKEN_DOUBLE_QUOTE_STRING, TOKEN_SINGLE_QUOTE_STRING, ): selectors.append( PropertySelector( env=self.env, - token=stream.current, - name=self._decode_string_literal(stream.current), + token=token, + name=self._decode_string_literal(token), shorthand=False, ), ) - elif stream.current.kind == TOKEN_COLON: + stream.next() + elif token.kind == TOKEN_COLON: selectors.append(self.parse_slice(stream)) - elif stream.current.kind == TOKEN_WILD: + elif token.kind == TOKEN_WILD: selectors.append( WildSelector( env=self.env, - token=stream.current, + token=token, shorthand=False, ) ) - elif stream.current.kind == TOKEN_FILTER: - selectors.append(self.parse_filter_selector(stream)) - elif stream.current.kind == TOKEN_EOF: - raise JSONPathSyntaxError( - "unexpected end of query", token=stream.current + stream.next() + elif token.kind == TOKEN_KEYS: + selectors.append( + KeysSelector(env=self.env, token=token, shorthand=False) ) + stream.next() + elif token.kind == TOKEN_FILTER: + selectors.append(self.parse_filter_selector(stream)) + elif token.kind == TOKEN_EOF: + raise JSONPathSyntaxError("unexpected end of query", token=token) else: raise JSONPathSyntaxError( - f"unexpected token in bracketed selection {stream.current.kind!r}", - token=stream.current, + f"unexpected token in bracketed selection {token.kind!r}", + token=token, ) - if stream.peek.kind == TOKEN_EOF: - raise JSONPathSyntaxError( - "unexpected end of selector list", - token=stream.current, - ) + # XXX: + # if stream.peek().kind == TOKEN_EOF: + # raise JSONPathSyntaxError( + # "unexpected end of segment", + # token=stream.current(), + # ) - if stream.peek.kind != TOKEN_RBRACKET: - stream.expect_peek(TOKEN_COMMA) - stream.next_token() - stream.expect_peek_not(TOKEN_RBRACKET, "unexpected trailing comma") + stream.skip_whitespace() - stream.next_token() + if stream.current().kind != TOKEN_RBRACKET: + stream.eat(TOKEN_COMMA) + stream.skip_whitespace() + if stream.current().kind == TOKEN_RBRACKET: + raise JSONPathSyntaxError( + "unexpected trailing comma", token=stream.current() + ) + + stream.eat(TOKEN_RBRACKET) if not selectors: - raise JSONPathSyntaxError("empty bracketed segment", token=tok) + raise JSONPathSyntaxError("empty bracketed segment", token=segment_token) return selectors + def parse_slice(self, stream: TokenStream) -> SliceSelector: + """Parse a slice JSONPath expression from a stream of tokens.""" + token = stream.current() + start: Optional[int] = None + stop: Optional[int] = None + step: Optional[int] = None + + def _maybe_index(token: Token) -> bool: + if token.kind == TOKEN_INT: + if len(token.value) > 1 and token.value.startswith(("0", "-0")): + raise JSONPathSyntaxError( + f"invalid index {token.value!r}", token=token + ) + return True + return False + + # 1: or : + if _maybe_index(stream.current()): + start = int(stream.current().value) + stream.next() + + stream.skip_whitespace() + stream.expect(TOKEN_COLON) + stream.next() + stream.skip_whitespace() + + # 1 or 1: or : or ? + if _maybe_index(stream.current()): + stop = int(stream.current().value) + stream.next() + stream.skip_whitespace() + if stream.current().kind == TOKEN_COLON: + stream.next() + elif stream.current().kind == TOKEN_COLON: + stream.expect(TOKEN_COLON) + stream.next() + + # 1 or ? + stream.skip_whitespace() + if _maybe_index(stream.current()): + step = int(stream.current().value) + stream.next() + + return SliceSelector( + env=self.env, + token=token, + start=start, + stop=stop, + step=step, + ) + def parse_filter_selector(self, stream: TokenStream) -> Filter: - tok = stream.next_token() + token = stream.eat(TOKEN_FILTER) expr = self.parse_filter_expression(stream) if self.env.well_typed and isinstance(expr, FunctionExtension): @@ -505,42 +533,44 @@ def parse_filter_selector(self, stream: TokenStream) -> Filter: and func.return_type == ExpressionType.VALUE ): raise JSONPathTypeError( - f"result of {expr.name}() must be compared", token=tok + f"result of {expr.name}() must be compared", token=token ) if isinstance(expr, (Literal, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", - token=tok, + token=token, ) - return Filter(env=self.env, token=tok, expression=BooleanExpression(expr)) + return Filter(env=self.env, token=token, expression=BooleanExpression(expr)) def parse_boolean(self, stream: TokenStream) -> FilterExpression: - if stream.current.kind == TOKEN_TRUE: + if stream.next().kind == TOKEN_TRUE: return TRUE return FALSE - def parse_nil(self, _: TokenStream) -> FilterExpression: + def parse_nil(self, stream: TokenStream) -> FilterExpression: + stream.next() return NIL - def parse_undefined(self, _: TokenStream) -> FilterExpression: + def parse_undefined(self, stream: TokenStream) -> FilterExpression: + stream.next() return UNDEFINED_LITERAL def parse_string_literal(self, stream: TokenStream) -> FilterExpression: - return StringLiteral(value=self._decode_string_literal(stream.current)) + return StringLiteral(value=self._decode_string_literal(stream.next())) def parse_integer_literal(self, stream: TokenStream) -> FilterExpression: # Convert to float first to handle scientific notation. - return IntegerLiteral(value=int(float(stream.current.value))) + return IntegerLiteral(value=int(float(stream.next().value))) def parse_float_literal(self, stream: TokenStream) -> FilterExpression: - return FloatLiteral(value=float(stream.current.value)) + return FloatLiteral(value=float(stream.next().value)) def parse_prefix_expression(self, stream: TokenStream) -> FilterExpression: - tok = stream.next_token() - assert tok.kind == TOKEN_NOT + token = stream.next() + assert token.kind == TOKEN_NOT return PrefixExpression( operator="!", right=self.parse_filter_expression( @@ -551,169 +581,173 @@ def parse_prefix_expression(self, stream: TokenStream) -> FilterExpression: def parse_infix_expression( self, stream: TokenStream, left: FilterExpression ) -> FilterExpression: - tok = stream.next_token() - precedence = self.PRECEDENCES.get(tok.kind, self.PRECEDENCE_LOWEST) + token = stream.next() + precedence = self.PRECEDENCES.get(token.kind, self.PRECEDENCE_LOWEST) right = self.parse_filter_expression(stream, precedence) - operator = self.BINARY_OPERATORS[tok.kind] + operator = self.BINARY_OPERATORS[token.kind] if self.env.well_typed and operator in self.COMPARISON_OPERATORS: - self._raise_for_non_comparable_function(left, tok) - self._raise_for_non_comparable_function(right, tok) + self._raise_for_non_comparable_function(left, token) + self._raise_for_non_comparable_function(right, token) if operator not in self.INFIX_LITERAL_OPERATORS: if isinstance(left, (Literal, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", - token=tok, + token=token, ) if isinstance(right, (Literal, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", - token=tok, + token=token, ) return InfixExpression(left, operator, right) def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: - stream.next_token() + stream.eat(TOKEN_LPAREN) expr = self.parse_filter_expression(stream) - stream.next_token() - while stream.current.kind != TOKEN_RPAREN: - if stream.current.kind == TOKEN_EOF: - raise JSONPathSyntaxError( - "unbalanced parentheses", token=stream.current - ) + while stream.current().kind != TOKEN_RPAREN: + token = stream.current() + if token.kind == TOKEN_EOF: + raise JSONPathSyntaxError("unbalanced parentheses", token=token) - if stream.current.kind not in self.BINARY_OPERATORS: + if token.kind not in self.BINARY_OPERATORS: raise JSONPathSyntaxError( - f"expected an expression, found '{stream.current.value}'", - token=stream.current, + f"expected an expression, found '{token.value}'", + token=token, ) expr = self.parse_infix_expression(stream, expr) - stream.expect(TOKEN_RPAREN) + stream.eat(TOKEN_RPAREN) return expr def parse_root_path(self, stream: TokenStream) -> FilterExpression: - root = stream.next_token() + root = stream.next() return RootFilterQuery( JSONPath( env=self.env, - segments=self.parse_path(stream, in_filter=True), + segments=self.parse_path(stream), pseudo_root=root.kind == TOKEN_PSEUDO_ROOT, ) ) def parse_self_path(self, stream: TokenStream) -> FilterExpression: - stream.next_token() + stream.next() return RelativeFilterQuery( - JSONPath(env=self.env, segments=self.parse_path(stream, in_filter=True)) + JSONPath(env=self.env, segments=self.parse_path(stream)) ) - def parse_current_key(self, _: TokenStream) -> FilterExpression: + def parse_current_key(self, stream: TokenStream) -> FilterExpression: + stream.next() return CURRENT_KEY def parse_filter_context_path(self, stream: TokenStream) -> FilterExpression: - stream.next_token() + stream.next() return FilterContextPath( - JSONPath(env=self.env, segments=self.parse_path(stream, in_filter=True)) + JSONPath(env=self.env, segments=self.parse_path(stream)) ) def parse_regex(self, stream: TokenStream) -> FilterExpression: - pattern = stream.current.value + pattern = stream.current().value flags = 0 - if stream.peek.kind == TOKEN_RE_FLAGS: - stream.next_token() - for flag in set(stream.current.value): + if stream.peek().kind == TOKEN_RE_FLAGS: + stream.next() + for flag in set(stream.next().value): flags |= self.RE_FLAG_MAP[flag] return RegexLiteral(value=re.compile(pattern, flags)) def parse_list_literal(self, stream: TokenStream) -> FilterExpression: - stream.next_token() + stream.eat(TOKEN_LBRACKET) list_items: List[FilterExpression] = [] - while stream.current.kind != TOKEN_RBRACKET: + while stream.current().kind != TOKEN_RBRACKET: try: - list_items.append(self.list_item_map[stream.current.kind](stream)) + list_items.append(self.list_item_map[stream.current().kind](stream)) except KeyError as err: raise JSONPathSyntaxError( - f"unexpected {stream.current.value!r}", - token=stream.current, + f"unexpected {stream.current().value!r}", + token=stream.current(), ) from err - if stream.peek.kind != TOKEN_RBRACKET: + if stream.peek().kind != TOKEN_RBRACKET: stream.expect_peek(TOKEN_COMMA) - stream.next_token() + stream.next() - stream.next_token() + stream.next() + stream.eat(TOKEN_RBRACKET) return ListLiteral(list_items) def parse_function_extension(self, stream: TokenStream) -> FilterExpression: function_arguments: List[FilterExpression] = [] - tok = stream.next_token() + function_token = stream.next() + stream.eat(TOKEN_LPAREN) + + while True: + stream.skip_whitespace() + token = stream.current() + + if token.kind == TOKEN_RPAREN: + break - while stream.current.kind != TOKEN_RPAREN: try: - func = self.function_argument_map[stream.current.kind] + func = self.function_argument_map[token.kind] except KeyError as err: raise JSONPathSyntaxError( - f"unexpected {stream.current.value!r}", - token=stream.current, + f"unexpected {token.value!r}", token=token ) from err expr = func(stream) + stream.skip_whitespace() - # The argument could be a comparison or logical expression - peek_kind = stream.peek.kind - while peek_kind in self.BINARY_OPERATORS: - stream.next_token() + while stream.current().kind in self.BINARY_OPERATORS: expr = self.parse_infix_expression(stream, expr) - peek_kind = stream.peek.kind function_arguments.append(expr) + stream.skip_whitespace() - if stream.peek.kind != TOKEN_RPAREN: - stream.expect_peek(TOKEN_COMMA) - stream.next_token() + if stream.current().kind != TOKEN_RPAREN: + stream.eat(TOKEN_COMMA) - stream.next_token() + stream.eat(TOKEN_RPAREN) return FunctionExtension( - tok.value, - self.env.validate_function_extension_signature(tok, function_arguments), + function_token.value, + self.env.validate_function_extension_signature( + function_token, function_arguments + ), ) def parse_filter_expression( self, stream: TokenStream, precedence: int = PRECEDENCE_LOWEST ) -> FilterExpression: + stream.skip_whitespace() + token = stream.current() + try: - left = self.token_map[stream.current.kind](stream) + left = self.token_map[token.kind](stream) except KeyError as err: - if stream.current.kind in (TOKEN_EOF, TOKEN_RBRACKET): + if token.kind in (TOKEN_EOF, TOKEN_RBRACKET): msg = "end of expression" else: - msg = repr(stream.current.value) - raise JSONPathSyntaxError( - f"unexpected {msg}", token=stream.current - ) from err + msg = repr(token.value) + raise JSONPathSyntaxError(f"unexpected {msg}", token=token) from err while True: - peek_kind = stream.peek.kind + stream.skip_whitespace() + kind = stream.current().kind + if ( - peek_kind in (TOKEN_EOF, TOKEN_RBRACKET) - or self.PRECEDENCES.get(peek_kind, self.PRECEDENCE_LOWEST) < precedence + kind not in self.BINARY_OPERATORS + or self.PRECEDENCES.get(kind, self.PRECEDENCE_LOWEST) < precedence ): break - if peek_kind not in self.BINARY_OPERATORS: - return left - - stream.next_token() left = self.parse_infix_expression(stream, left) return left @@ -748,3 +782,9 @@ def _raise_for_non_comparable_function( raise JSONPathTypeError( f"result of {expr.name}() is not comparable", token ) + + def _raise_for_leading_zero(self, token: Token) -> None: + if ( + len(token.value) > 1 and token.value.startswith("0") + ) or token.value.startswith("-0"): + raise JSONPathSyntaxError("leading zero in index selector", token=token) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 89e2490..d13071b 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -150,15 +150,16 @@ def _normalized_index(self, obj: Sequence[object]) -> int: return self.index def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: - if isinstance(node.obj, Mapping): - # Try the string representation of the index as a key. - with suppress(KeyError): - match = node.new_child( - self.env.getitem(node.obj, self._as_key), self.index - ) - node.add_child(match) - yield match - elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + # TODO: Optionally try string representation of int + # if isinstance(node.obj, Mapping): + # # Try the string representation of the index as a key. + # with suppress(KeyError): + # match = node.new_child( + # self.env.getitem(node.obj, self._as_key), self.index + # ) + # node.add_child(match) + # yield match + if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): norm_index = self._normalized_index(node.obj) with suppress(IndexError): match = node.new_child( @@ -168,15 +169,16 @@ def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: yield match async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: - if isinstance(node.obj, Mapping): - # Try the string representation of the index as a key. - with suppress(KeyError): - match = node.new_child( - await self.env.getitem_async(node.obj, self._as_key), self.index - ) - node.add_child(match) - yield match - elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + # XXX + # if isinstance(node.obj, Mapping): + # # Try the string representation of the index as a key. + # with suppress(KeyError): + # match = node.new_child( + # await self.env.getitem_async(node.obj, self._as_key), self.index + # ) + # node.add_child(match) + # yield match + if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): norm_index = self._normalized_index(node.obj) with suppress(IndexError): match = node.new_child( diff --git a/jsonpath/stream.py b/jsonpath/stream.py index 0a6e052..775c95f 100644 --- a/jsonpath/stream.py +++ b/jsonpath/stream.py @@ -1,114 +1,94 @@ -# noqa: D100 +"""Step through a stream of tokens.""" + from __future__ import annotations -from collections import deque -from typing import Deque -from typing import Iterator -from typing import Optional +from typing import Iterable from .exceptions import JSONPathSyntaxError from .token import TOKEN_EOF +from .token import TOKEN_WHITESPACE from .token import Token -# ruff: noqa: D102 - class TokenStream: - """Step through or iterate a stream of tokens.""" - - def __init__(self, token_iter: Iterator[Token]): - self.iter = token_iter - self._pushed: Deque[Token] = deque() - self.current = Token("", "", -1, "") - next(self) - - class TokenStreamIterator: - """An iterable token stream.""" - - def __init__(self, stream: TokenStream): - self.stream = stream - - def __iter__(self) -> Iterator[Token]: - return self - - def __next__(self) -> Token: - tok = self.stream.current - if tok.kind is TOKEN_EOF: - self.stream.close() - raise StopIteration - next(self.stream) - return tok - - def __iter__(self) -> Iterator[Token]: - return self.TokenStreamIterator(self) - - def __next__(self) -> Token: - tok = self.current - if self._pushed: - self.current = self._pushed.popleft() - elif self.current.kind is not TOKEN_EOF: - try: - self.current = next(self.iter) - except StopIteration: - self.close() - return tok + """Step through a stream of tokens.""" + + def __init__(self, token_iter: Iterable[Token]): + self.tokens = list(token_iter) + self.pos = 0 + self.eof = Token(TOKEN_EOF, "", -1, self.tokens[0].path) def __str__(self) -> str: # pragma: no cover return f"current: {self.current}\nnext: {self.peek}" - def next_token(self) -> Token: - """Return the next token from the stream.""" - return next(self) - - @property - def peek(self) -> Token: - """Look at the next token.""" - current = next(self) - result = self.current - self.push(current) - return result - - def push(self, tok: Token) -> None: - """Push a token back to the stream.""" - self._pushed.append(self.current) - self.current = tok - - def close(self) -> None: - """Close the stream.""" - self.current = Token(TOKEN_EOF, "", -1, "") + def current(self) -> Token: + """Return the token at the current position in the stream.""" + try: + return self.tokens[self.pos] + except IndexError: + return self.eof + + def next(self) -> Token: + """Return the token at the current position and advance the pointer.""" + try: + token = self.tokens[self.pos] + self.pos += 1 + return token + except IndexError: + return self.eof + + def peek(self, offset: int = 1) -> Token: + """Return the token at current position plus the offset. + + Does not advance the pointer. + """ + try: + return self.tokens[self.pos + offset] + except IndexError: + return self.eof + + def eat(self, kind: str, message: str | None = None) -> Token: + """Assert tge type if the current token and advance the pointer.""" + token = self.next() + if token.kind != kind: + raise JSONPathSyntaxError( + message or f"expected {kind}, found {token.kind!r}", + token=token, + ) + return token def expect(self, *typ: str) -> None: - if self.current.kind not in typ: + """Raise an exception of the current token is not in `typ`.""" + token = self.current() + if token.kind not in typ: if len(typ) == 1: _typ = repr(typ[0]) else: _typ = f"one of {typ!r}" raise JSONPathSyntaxError( - f"expected {_typ}, found {self.current.kind!r}", - token=self.current, + f"expected {_typ}, found {token.kind!r}", + token=token, ) def expect_peek(self, *typ: str) -> None: - if self.peek.kind not in typ: + """Raise an exception of the current token is not in `typ`.""" + token = self.peek() + if token.kind not in typ: if len(typ) == 1: _typ = repr(typ[0]) else: _typ = f"one of {typ!r}" raise JSONPathSyntaxError( - f"expected {_typ}, found {self.peek.kind!r}", - token=self.peek, + f"expected {_typ}, found {token.kind!r}", + token=token, ) def expect_peek_not(self, typ: str, message: str) -> None: """Raise an exception if the next token kind of _typ_.""" - if self.peek.kind == typ: - raise JSONPathSyntaxError(message, token=self.peek) - - def eat(self, *typ: str) -> Token: - self.expect(*typ) - return self.next_token() + if self.peek().kind == typ: + raise JSONPathSyntaxError(message, token=self.peek()) - def skip(self, *typ: str) -> Optional[Token]: - if self.current.kind in typ: - return self.next_token() - return None + def skip_whitespace(self) -> None: + """Skip whitespace.""" + if self.current().kind == TOKEN_WHITESPACE: + self.pos += 1 diff --git a/jsonpath/token.py b/jsonpath/token.py index c9f6592..6650b9c 100644 --- a/jsonpath/token.py +++ b/jsonpath/token.py @@ -22,7 +22,8 @@ TOKEN_RBRACKET = sys.intern("TOKEN_RBRACKET") TOKEN_ROOT = sys.intern("TOKEN_ROOT") TOKEN_WILD = sys.intern("TOKEN_WILD") -TOKEN_NAME = sys.intern("TOKEN_NAME") # An object property/key or a function name +TOKEN_NAME = sys.intern("TOKEN_NAME") +TOKEN_DOT_PROPERTY = sys.intern("TOKEN_DOT_PROPERTY") # Filter expression tokens TOKEN_AND = sys.intern("TOKEN_AND") diff --git a/pyproject.toml b/pyproject.toml index 23268f8..fe30c78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,7 +88,7 @@ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] [tool.mypy] files = ["jsonpath", "tests"] -exclude = ["tests/nts"] +exclude = ["tests/nts", "tests/cts"] python_version = "3.11" disallow_subclassing_any = true disallow_untyped_calls = true diff --git a/tests/test_compliance.py b/tests/test_compliance.py index 38592cb..6430f07 100644 --- a/tests/test_compliance.py +++ b/tests/test_compliance.py @@ -35,10 +35,10 @@ class Case: SKIP = { - "basic, no leading whitespace": "flexible whitespace policy", + # "basic, no leading whitespace": "flexible whitespace policy", "basic, no trailing whitespace": "flexible whitespace policy", - "basic, bald descendant segment": "almost has a consensus", - "filter, index segment on object, selects nothing": "flexible selector policy", + # "basic, bald descendant segment": "almost has a consensus", + # "filter, index segment on object, selects nothing": "flexible selector policy", "functions, match, dot matcher on \\u2028": "standard library re policy", "functions, match, dot matcher on \\u2029": "standard library re policy", "functions, search, dot matcher on \\u2028": "standard library re policy", @@ -76,14 +76,14 @@ class Case: "name selector, double quotes, non-surrogate surrogate": "expected behavior policy", "name selector, double quotes, surrogate supplementary": "expected behavior policy", "name selector, double quotes, supplementary surrogate": "expected behavior policy", - "whitespace, selectors, space between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, newline between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, tab between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, return between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, space between recursive descent and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, newline between recursive descent and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, tab between recursive descent and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, return between recursive descent and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, space between dot and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, newline between dot and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, tab between dot and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, return between dot and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, space between recursive descent and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, newline between recursive descent and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, tab between recursive descent and name": "flexible whitespace policy", # noqa: E501 + # "whitespace, selectors, return between recursive descent and name": "flexible whitespace policy", # noqa: E501 } diff --git a/tests/test_find.py b/tests/test_find.py index 140a291..ef399b4 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -21,12 +21,6 @@ class Case: TEST_CASES = [ - Case( - description="property key that looks like an index", - path="$[some][0]", - data={"some": {"0": "thing"}}, - want=["thing"], - ), Case( description="slice a mapping", path="$.some[0:4]", @@ -58,14 +52,14 @@ class Case: want=[{"foo": 1}, {"foo": 2}], ), Case( - description="select root value using fake root", - path="^[?@some.thing > 7]", + description="select root value using pseudo root", + path="^[?@.some.thing > 7]", data={"some": {"thing": 42}}, want=[{"some": {"thing": 42}}], ), Case( - description="fake root in a filter query", - path="^[?@some.thing > value(^.*.num)]", + description="pseudo root in a filter query", + path="^[?@.some.thing > value(^.*.num)]", data={"some": {"thing": 42}, "num": 7}, want=[{"some": {"thing": 42}, "num": 7}], ), @@ -129,13 +123,13 @@ class Case: ), Case( description="quoted reserved word, and", - path="['and']", + path="$['and']", data={"and": [1, 2, 3]}, want=[[1, 2, 3]], ), Case( description="quoted reserved word, or", - path="['or']", + path="$['or']", data={"or": [1, 2, 3]}, want=[[1, 2, 3]], ), diff --git a/tests/test_lex.py b/tests/test_lex.py index b3335c8..8241a04 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -16,6 +16,7 @@ from jsonpath.token import TOKEN_FALSE from jsonpath.token import TOKEN_FILTER from jsonpath.token import TOKEN_FLOAT +from jsonpath.token import TOKEN_FUNCTION from jsonpath.token import TOKEN_GT from jsonpath.token import TOKEN_IN from jsonpath.token import TOKEN_INT @@ -1609,7 +1610,7 @@ class Case: path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_NAME, + kind=TOKEN_FUNCTION, value="length", index=9, path="$.some[?(length(@.thing) < 2)]", From c7a10af0873df6e3c4d65ee882b2cb4878f1d814 Mon Sep 17 00:00:00 2001 From: James Prior Date: Sat, 9 Aug 2025 09:28:01 +0100 Subject: [PATCH 03/29] Fix canonical paths, compound paths and list literals --- jsonpath/env.py | 5 +++ jsonpath/parse.py | 53 +++++++++++++++---------- jsonpath/selectors.py | 14 ++----- jsonpath/stream.py | 3 +- tests/test_env.py | 5 ++- tests/test_errors.py | 7 +++- tests/test_filter_expression_caching.py | 4 +- tests/test_parse.py | 18 --------- 8 files changed, 52 insertions(+), 57 deletions(-) diff --git a/jsonpath/env.py b/jsonpath/env.py index 770c614..7719fff 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -185,6 +185,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root ) + # TODO: Optionally raise for trailing whitespace + stream.skip_whitespace() + # TODO: better! if stream.current().kind != TOKEN_EOF: _path = CompoundJSONPath(env=self, path=_path) @@ -198,6 +201,7 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 if stream.current().kind == TOKEN_UNION: stream.next() + stream.skip_whitespace() pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path = _path.union( JSONPath( @@ -208,6 +212,7 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 ) elif stream.current().kind == TOKEN_INTERSECTION: stream.next() + stream.skip_whitespace() pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path = _path.intersection( JSONPath( diff --git a/jsonpath/parse.py b/jsonpath/parse.py index c1bb0d4..3e08c2a 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -241,6 +241,7 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: TOKEN_FUNCTION: self.parse_function_extension, TOKEN_INT: self.parse_integer_literal, TOKEN_KEY: self.parse_current_key, + TOKEN_LBRACKET: self.parse_list_literal, TOKEN_LPAREN: self.parse_grouped_expression, TOKEN_MISSING: self.parse_undefined, TOKEN_NIL: self.parse_nil, @@ -293,9 +294,6 @@ def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: if stream.current().kind in {TOKEN_ROOT, TOKEN_PSEUDO_ROOT}: stream.next() - # TODO: Support "bare" paths. Those without a leading dot for shorthand - # selectors - yield from self.parse_path(stream) if stream.current().kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): @@ -312,9 +310,9 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: """ while True: stream.skip_whitespace() - if stream.current().kind == TOKEN_DOT: - # Consume the dot. - stream.next() + _token = stream.current() + if _token.kind == TOKEN_DOT: + stream.eat(TOKEN_DOT) # Assert that dot is followed by shorthand selector without whitespace. stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS) token = stream.current() @@ -322,8 +320,8 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: yield JSONPathChildSegment( env=self.env, token=token, selectors=selectors ) - elif stream.current().kind == TOKEN_DDOT: - token = stream.next() + elif _token.kind == TOKEN_DDOT: + token = stream.eat(TOKEN_DDOT) selectors = self.parse_selectors(stream) if not selectors: raise JSONPathSyntaxError( @@ -333,7 +331,14 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: yield JSONPathRecursiveDescentSegment( env=self.env, token=token, selectors=selectors ) - elif stream.current().kind == TOKEN_LBRACKET: + elif _token.kind == TOKEN_LBRACKET: + selectors = self.parse_selectors(stream) + yield JSONPathChildSegment( + env=self.env, token=_token, selectors=selectors + ) + elif _token.kind in {TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS}: + # A non-standard "bare" path. One without a leading identifier (`$`, + # `@`, `^` or `_`). token = stream.current() selectors = self.parse_selectors(stream) yield JSONPathChildSegment( @@ -377,6 +382,7 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: stream.pos -= 1 return tuple(self.parse_bracketed_selection(stream)) + stream.pos -= 1 return () def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912 @@ -446,15 +452,14 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto token=token, ) - # XXX: - # if stream.peek().kind == TOKEN_EOF: - # raise JSONPathSyntaxError( - # "unexpected end of segment", - # token=stream.current(), - # ) - stream.skip_whitespace() + if stream.current().kind == TOKEN_EOF: + raise JSONPathSyntaxError( + "unexpected end of segment", + token=stream.current(), + ) + if stream.current().kind != TOKEN_RBRACKET: stream.eat(TOKEN_COMMA) stream.skip_whitespace() @@ -665,7 +670,12 @@ def parse_list_literal(self, stream: TokenStream) -> FilterExpression: stream.eat(TOKEN_LBRACKET) list_items: List[FilterExpression] = [] - while stream.current().kind != TOKEN_RBRACKET: + while True: + stream.skip_whitespace() + + if stream.current().kind == TOKEN_RBRACKET: + break + try: list_items.append(self.list_item_map[stream.current().kind](stream)) except KeyError as err: @@ -674,11 +684,10 @@ def parse_list_literal(self, stream: TokenStream) -> FilterExpression: token=stream.current(), ) from err - if stream.peek().kind != TOKEN_RBRACKET: - stream.expect_peek(TOKEN_COMMA) - stream.next() - - stream.next() + stream.skip_whitespace() + if stream.current().kind != TOKEN_RBRACKET: + stream.eat(TOKEN_COMMA) + stream.skip_whitespace() stream.eat(TOKEN_RBRACKET) return ListLiteral(list_items) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index d13071b..362bc9f 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -71,11 +71,7 @@ def __init__( self.shorthand = shorthand def __str__(self) -> str: - return ( - f"[{canonical_string(self.name)}]" - if self.shorthand - else f"{canonical_string(self.name)}" - ) + return canonical_string(self.name) def __eq__(self, __value: object) -> bool: return ( @@ -203,11 +199,7 @@ def __init__( self.shorthand = shorthand def __str__(self) -> str: - return ( - f"[{self.env.keys_selector_token}]" - if self.shorthand - else self.env.keys_selector_token - ) + return self.env.keys_selector_token def __eq__(self, __value: object) -> bool: return isinstance(__value, KeysSelector) and self.token == __value.token @@ -315,7 +307,7 @@ def __init__( self.shorthand = shorthand def __str__(self) -> str: - return "[*]" if self.shorthand else "*" + return "*" def __eq__(self, __value: object) -> bool: return isinstance(__value, WildSelector) and self.token == __value.token diff --git a/jsonpath/stream.py b/jsonpath/stream.py index 775c95f..93ddf93 100644 --- a/jsonpath/stream.py +++ b/jsonpath/stream.py @@ -16,7 +16,8 @@ class TokenStream: def __init__(self, token_iter: Iterable[Token]): self.tokens = list(token_iter) self.pos = 0 - self.eof = Token(TOKEN_EOF, "", -1, self.tokens[0].path) + path = self.tokens[0].path if self.tokens else "" + self.eof = Token(TOKEN_EOF, "", -1, path) def __str__(self) -> str: # pragma: no cover return f"current: {self.current}\nnext: {self.peek}" diff --git a/tests/test_env.py b/tests/test_env.py index 5908baa..51b91d8 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,4 +1,5 @@ """JSONPathEnvironment API test cases.""" + import asyncio from typing import List @@ -178,7 +179,7 @@ def test_custom_fake_root_identifier_token() -> None: """Test that we can change the non-standard fake root identifier.""" class MyJSONPathEnvironment(JSONPathEnvironment): - fake_root_token = "$$" + pseudo_root_token = "$$" env = MyJSONPathEnvironment() data = {"foo": {"a": 1, "b": 2, "c": 3}} @@ -191,7 +192,7 @@ def test_disable_fake_root_identifier() -> None: """Test that we can disable the non-standard fake root identifier.""" class MyJSONPathEnvironment(JSONPathEnvironment): - fake_root_token = "" + pseudo_root_token = "" env = MyJSONPathEnvironment() with pytest.raises(JSONPathSyntaxError): diff --git a/tests/test_errors.py b/tests/test_errors.py index 1d1f46a..8ff5913 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -15,7 +15,7 @@ def env() -> JSONPathEnvironment: def test_unclosed_selection_list(env: JSONPathEnvironment) -> None: - with pytest.raises(JSONPathSyntaxError, match=r"unexpected end of selector list"): + with pytest.raises(JSONPathSyntaxError, match=r"unexpected end of segment"): env.compile("$[1,2") @@ -39,6 +39,11 @@ def test_unbalanced_parens(env: JSONPathEnvironment) -> None: env.compile("$[?((@.foo)]") +def test_root_dot(env: JSONPathEnvironment) -> None: + with pytest.raises(JSONPathSyntaxError): + env.compile("$.") + + class FilterLiteralTestCase(NamedTuple): description: str query: str diff --git a/tests/test_filter_expression_caching.py b/tests/test_filter_expression_caching.py index 31534c2..0ba64ea 100644 --- a/tests/test_filter_expression_caching.py +++ b/tests/test_filter_expression_caching.py @@ -50,7 +50,7 @@ def test_root_path_cache() -> None: env = JSONPathEnvironment(filter_caching=True) data = {"some": [{"a": 1}, {"a": 99}, {"a": 2}, {"a": 3}]} with mock.patch( - "jsonpath.filter.RootPath.evaluate", return_value=10 + "jsonpath.filter.RootFilterQuery.evaluate", return_value=10 ) as mock_root_path: path = env.compile("$.some[?@.a < $.thing].a") rv = path.findall(data) @@ -63,7 +63,7 @@ def test_root_path_no_cache() -> None: env = JSONPathEnvironment(filter_caching=False) data = {"some": [{"a": 1}, {"a": 99}, {"a": 2}, {"a": 3}]} with mock.patch( - "jsonpath.filter.RootPath.evaluate", return_value=10 + "jsonpath.filter.RootFilterQuery.evaluate", return_value=10 ) as mock_root_path: path = env.compile("$.some[?@.a < $.thing].a") rv = path.findall(data) diff --git a/tests/test_parse.py b/tests/test_parse.py index 96949a1..8415b74 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -16,10 +16,8 @@ class Case: TEST_CASES = [ Case(description="empty", path="", want="$"), Case(description="just root", path="$", want="$"), - Case(description="root dot", path="$.", want="$"), Case(description="implicit root dot property", path=".thing", want="$['thing']"), Case(description="root dot property", path="$.thing", want="$['thing']"), - Case(description="root bracket property", path="$[thing]", want="$['thing']"), Case( description="root double quoted property", path='$["thing"]', want="$['thing']" ), @@ -31,40 +29,24 @@ class Case: path="$['anything{!%']", want="$['anything{!%']", ), - Case(description="root dot bracket property", path="$.[thing]", want="$['thing']"), Case(description="root bracket index", path="$[1]", want="$[1]"), Case(description="root slice", path="$[1:-1]", want="$[1:-1:1]"), - Case(description="root dot slice", path="$.[1:-1]", want="$[1:-1:1]"), Case(description="root slice with step", path="$[1:-1:2]", want="$[1:-1:2]"), Case(description="root slice with empty start", path="$[:-1]", want="$[:-1:1]"), Case(description="root slice with empty stop", path="$[1:]", want="$[1::1]"), Case(description="root dot wild", path="$.*", want="$[*]"), Case(description="root bracket wild", path="$[*]", want="$[*]"), - Case(description="root dot bracket wild", path="$.[*]", want="$[*]"), - Case(description="root descend", path="$..", want="$.."), - Case(description="root dot descend", path="$...", want="$.."), Case(description="root selector list", path="$[1,2]", want="$[1, 2]"), - Case(description="root dot selector list", path="$.[1,2]", want="$[1, 2]"), Case( description="root selector list with slice", path="$[1,5:-1:1]", want="$[1, 5:-1:1]", ), - Case( - description="root selector list with properties", - path="$[some,thing]", - want="$['some', 'thing']", - ), Case( description="root selector list with quoted properties", path="$[\"some\",'thing']", want="$['some', 'thing']", ), - Case( - description="implicit root selector list with mixed selectors", - path='$["some",thing, 1, 2:-2:2]', - want="$['some', 'thing', 1, 2:-2:2]", - ), Case( description="filter self dot property", path="[?(@.thing)]", From e338a0c9b0e729100c8794efab13428f866df0b7 Mon Sep 17 00:00:00 2001 From: James Prior Date: Sun, 10 Aug 2025 07:48:29 +0100 Subject: [PATCH 04/29] Remove shorthand arguments to Property, Wild and Keys selectors --- jsonpath/parse.py | 16 ++-------------- jsonpath/selectors.py | 27 +++++---------------------- 2 files changed, 7 insertions(+), 36 deletions(-) diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 3e08c2a..499db46 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -356,7 +356,6 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: env=self.env, token=token, name=token.value, - shorthand=True, ), ) @@ -365,7 +364,6 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: WildSelector( env=self.env, token=token, - shorthand=True, ), ) @@ -374,7 +372,6 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: KeysSelector( env=self.env, token=token, - shorthand=True, ), ) @@ -422,25 +419,16 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto env=self.env, token=token, name=self._decode_string_literal(token), - shorthand=False, ), ) stream.next() elif token.kind == TOKEN_COLON: selectors.append(self.parse_slice(stream)) elif token.kind == TOKEN_WILD: - selectors.append( - WildSelector( - env=self.env, - token=token, - shorthand=False, - ) - ) + selectors.append(WildSelector(env=self.env, token=token)) stream.next() elif token.kind == TOKEN_KEYS: - selectors.append( - KeysSelector(env=self.env, token=token, shorthand=False) - ) + selectors.append(KeysSelector(env=self.env, token=token)) stream.next() elif token.kind == TOKEN_FILTER: selectors.append(self.parse_filter_selector(stream)) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 362bc9f..e3db8c3 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -56,19 +56,11 @@ def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: class PropertySelector(JSONPathSelector): """A shorthand or bracketed property selector.""" - __slots__ = ("name", "shorthand") + __slots__ = ("name",) - def __init__( - self, - *, - env: JSONPathEnvironment, - token: Token, - name: str, - shorthand: bool, - ) -> None: + def __init__(self, *, env: JSONPathEnvironment, token: Token, name: str) -> None: super().__init__(env=env, token=token) self.name = name - self.shorthand = shorthand def __str__(self) -> str: return canonical_string(self.name) @@ -190,13 +182,10 @@ class KeysSelector(JSONPathSelector): NOTE: This is a non-standard selector. """ - __slots__ = ("shorthand",) + __slots__ = () - def __init__( - self, *, env: JSONPathEnvironment, token: Token, shorthand: bool - ) -> None: + def __init__(self, *, env: JSONPathEnvironment, token: Token) -> None: super().__init__(env=env, token=token) - self.shorthand = shorthand def __str__(self) -> str: return self.env.keys_selector_token @@ -298,13 +287,7 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc class WildSelector(JSONPathSelector): """Select all items from a sequence/array or values from a mapping/object.""" - __slots__ = ("shorthand",) - - def __init__( - self, *, env: JSONPathEnvironment, token: Token, shorthand: bool - ) -> None: - super().__init__(env=env, token=token) - self.shorthand = shorthand + __slots__ = () def __str__(self) -> str: return "*" From b4cb9c2170791d8fb59afbfb37ddcdb6dc4ce69b Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 11 Aug 2025 21:08:34 +0100 Subject: [PATCH 05/29] Add "key" and "keys filter" JSONPath selectors --- jsonpath/env.py | 3 + jsonpath/lex.py | 23 ++++++- jsonpath/parse.py | 41 +++++++++--- jsonpath/selectors.py | 143 +++++++++++++++++++++++++++++++++++++++++- jsonpath/token.py | 2 + tests/test_find.py | 6 ++ 6 files changed, 206 insertions(+), 12 deletions(-) diff --git a/jsonpath/env.py b/jsonpath/env.py index 7719fff..3c4d04d 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -102,6 +102,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`. filtering a mapping or sequence. Defaults to `"#"`. keys_selector_token (str): The pattern used as the "keys" selector. Defaults to `"~"`. + keys_filter_token (str): The pattern used as the "keys filter" selector. + Defaults to `"~?"`. lexer_class: The lexer to use when tokenizing path strings. max_int_index (int): The maximum integer allowed when selecting array items by index. Defaults to `(2**53) - 1`. @@ -122,6 +124,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`. intersection_token = "&" key_token = "#" keys_selector_token = "~" + keys_filter_token = "~?" root_token = "$" self_token = "@" union_token = "|" diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 6583589..3c3dfb0 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -15,6 +15,7 @@ from .token import TOKEN_CONTAINS from .token import TOKEN_DDOT from .token import TOKEN_DOT +from .token import TOKEN_DOT_KEY_PROPERTY from .token import TOKEN_DOT_PROPERTY from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EQ @@ -30,6 +31,7 @@ from .token import TOKEN_INT from .token import TOKEN_INTERSECTION from .token import TOKEN_KEY +from .token import TOKEN_KEY_NAME from .token import TOKEN_KEYS from .token import TOKEN_KEYS_FILTER from .token import TOKEN_LBRACKET @@ -103,6 +105,13 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: # .thing self.dot_property_pattern = rf"(?P\.)(?P{self.key_pattern})" + # .~thing + self.dot_key_pattern = ( + r"(?P\.)" + rf"(?P{re.escape(env.keys_selector_token)})" + rf"(?P{self.key_pattern})" + ) + # /pattern/ or /pattern/flags self.re_pattern = r"/(?P.+?)/(?P[aims]*)" @@ -122,12 +131,14 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_INTERSECTION, self.env.intersection_token), (TOKEN_FILTER_CONTEXT, self.env.filter_context_token), (TOKEN_KEYS, self.env.keys_selector_token), + (TOKEN_KEYS_FILTER, self.env.keys_filter_token), ] rules = [ (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), (TOKEN_RE_PATTERN, self.re_pattern), + (TOKEN_DOT_KEY_PROPERTY, self.dot_key_pattern), (TOKEN_DOT_PROPERTY, self.dot_property_pattern), (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"), (TOKEN_INT, r"-?\d+(?P[eE][+\-]?\d+)?\b"), @@ -144,7 +155,6 @@ def compile_rules(self) -> Pattern[str]: ], (TOKEN_WILD, r"\*"), (TOKEN_FILTER, r"\?"), - (TOKEN_KEYS_FILTER, r"~\?"), # TODO: get from env (TOKEN_IN, r"in\b"), (TOKEN_TRUE, r"[Tt]rue\b"), (TOKEN_FALSE, r"[Ff]alse\b"), @@ -199,6 +209,17 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 value=match.group("G_PROP"), index=match.start("G_PROP"), ) + elif kind == TOKEN_DOT_KEY_PROPERTY: + yield _token( + kind=TOKEN_DOT, + value=match.group("G_DOT_KEY"), + index=match.start("G_DOT_KEY"), + ) + yield _token( + kind=TOKEN_KEY_NAME, + value=match.group("G_PROP_KEY"), + index=match.start("G_PROP_KEY"), + ) elif kind == TOKEN_DOUBLE_QUOTE_STRING: yield _token( kind=TOKEN_DOUBLE_QUOTE_STRING, diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 499db46..a0c6d97 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -11,6 +11,7 @@ from typing import Iterator from typing import List from typing import Optional +from typing import Union from jsonpath.function_extensions.filter_function import ExpressionType from jsonpath.function_extensions.filter_function import FilterFunction @@ -45,6 +46,8 @@ from .selectors import Filter from .selectors import IndexSelector from .selectors import JSONPathSelector +from .selectors import KeySelector +from .selectors import KeysFilter from .selectors import KeysSelector from .selectors import PropertySelector from .selectors import SliceSelector @@ -69,7 +72,9 @@ from .token import TOKEN_INT from .token import TOKEN_INTERSECTION from .token import TOKEN_KEY +from .token import TOKEN_KEY_NAME from .token import TOKEN_KEYS +from .token import TOKEN_KEYS_FILTER from .token import TOKEN_LBRACKET from .token import TOKEN_LE from .token import TOKEN_LG @@ -314,15 +319,15 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: if _token.kind == TOKEN_DOT: stream.eat(TOKEN_DOT) # Assert that dot is followed by shorthand selector without whitespace. - stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS) + stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS, TOKEN_KEY_NAME) token = stream.current() - selectors = self.parse_selectors(stream) + selectors = self.parse_selector(stream) yield JSONPathChildSegment( env=self.env, token=token, selectors=selectors ) elif _token.kind == TOKEN_DDOT: token = stream.eat(TOKEN_DDOT) - selectors = self.parse_selectors(stream) + selectors = self.parse_selector(stream) if not selectors: raise JSONPathSyntaxError( "missing selector for recursive descent segment", @@ -332,22 +337,22 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: env=self.env, token=token, selectors=selectors ) elif _token.kind == TOKEN_LBRACKET: - selectors = self.parse_selectors(stream) + selectors = self.parse_selector(stream) yield JSONPathChildSegment( env=self.env, token=_token, selectors=selectors ) - elif _token.kind in {TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS}: + elif _token.kind in {TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS, TOKEN_KEY_NAME}: # A non-standard "bare" path. One without a leading identifier (`$`, # `@`, `^` or `_`). token = stream.current() - selectors = self.parse_selectors(stream) + selectors = self.parse_selector(stream) yield JSONPathChildSegment( env=self.env, token=token, selectors=selectors ) else: break - def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: + def parse_selector(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: token = stream.next() if token.kind == TOKEN_NAME: @@ -359,6 +364,15 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: ), ) + if token.kind == TOKEN_KEY_NAME: + return ( + KeySelector( + env=self.env, + token=token, + key=token.value, + ), + ) + if token.kind == TOKEN_WILD: return ( WildSelector( @@ -432,6 +446,8 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto stream.next() elif token.kind == TOKEN_FILTER: selectors.append(self.parse_filter_selector(stream)) + elif token.kind == TOKEN_KEYS_FILTER: + selectors.append(self.parse_filter_selector(stream, keys=True)) elif token.kind == TOKEN_EOF: raise JSONPathSyntaxError("unexpected end of query", token=token) else: @@ -514,8 +530,10 @@ def _maybe_index(token: Token) -> bool: step=step, ) - def parse_filter_selector(self, stream: TokenStream) -> Filter: - token = stream.eat(TOKEN_FILTER) + def parse_filter_selector( + self, stream: TokenStream, *, keys: bool = False + ) -> Union[Filter, KeysFilter]: + token = stream.next() expr = self.parse_filter_expression(stream) if self.env.well_typed and isinstance(expr, FunctionExtension): @@ -536,6 +554,11 @@ def parse_filter_selector(self, stream: TokenStream) -> Filter: token=token, ) + if keys: + return KeysFilter( + env=self.env, token=token, expression=BooleanExpression(expr) + ) + return Filter(env=self.env, token=token, expression=BooleanExpression(expr)) def parse_boolean(self, stream: TokenStream) -> FilterExpression: diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index e3db8c3..04ee4ae 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -176,10 +176,57 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc yield match +class KeySelector(JSONPathSelector): + """Select a single mapping/object name/key. + + NOTE: This is a non-standard selector. + + See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#key-selector. + """ + + __slots__ = ("key",) + + def __init__(self, *, env: JSONPathEnvironment, token: Token, key: str) -> None: + super().__init__(env=env, token=token) + self.key = key + + def __str__(self) -> str: + return f"{self.env.keys_selector_token}{canonical_string(self.key)}" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, KeySelector) + and self.token == __value.token + and self.key == __value.key + ) + + def __hash__(self) -> int: + return hash((self.token, self.key)) + + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping) and self.key in node.obj: + match = node.__class__( + filter_context=node.filter_context(), + obj=self.key, + parent=node, + parts=node.parts + (f"{self.env.keys_selector_token}{self.key}",), + path=f"{node.path}[{self}]", + root=node.root, + ) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + for _node in self.resolve(node): + yield _node + + class KeysSelector(JSONPathSelector): """Select mapping/object keys/properties. NOTE: This is a non-standard selector. + + See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#keys-selector """ __slots__ = () @@ -198,13 +245,13 @@ def __hash__(self) -> int: def _keys(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: if isinstance(node.obj, Mapping): - for i, key in enumerate(node.obj.keys()): + for key in node.obj: match = node.__class__( filter_context=node.filter_context(), obj=key, parent=node, parts=node.parts + (f"{self.env.keys_selector_token}{key}",), - path=f"{node.path}[{self.env.keys_selector_token}][{i}]", + path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]", root=node.root, ) node.add_child(match) @@ -449,6 +496,98 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc yield match +class KeysFilter(JSONPathSelector): + """Selects names from an object’s name/value members. + + NOTE: This is a non-standard selector. + + See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#keys-filter-selector + """ + + __slots__ = ("expression",) + + def __init__( + self, + *, + env: JSONPathEnvironment, + token: Token, + expression: BooleanExpression, + ) -> None: + super().__init__(env=env, token=token) + self.expression = expression + + def __str__(self) -> str: + return f"~?{self.expression}" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, Filter) + and self.expression == __value.expression + and self.token == __value.token + ) + + def __hash__(self) -> int: + return hash(("~", str(self.expression), self.token)) + + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.value, Mapping): + for key, val in node.value.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + + try: + if self.expression.evaluate(context): + match = node.__class__( + filter_context=node.filter_context(), + obj=key, + parent=node, + parts=node.parts + + (f"{self.env.keys_selector_token}{key}",), + path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]", + root=node.root, + ) + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.value, Mapping): + for key, val in node.value.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + + try: + if await self.expression.evaluate_async(context): + match = node.__class__( + filter_context=node.filter_context(), + obj=key, + parent=node, + parts=node.parts + + (f"{self.env.keys_selector_token}{key}",), + path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]", + root=node.root, + ) + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + class FilterContext: """Contextual information and data for evaluating a filter expression.""" diff --git a/jsonpath/token.py b/jsonpath/token.py index 6650b9c..e9d39e0 100644 --- a/jsonpath/token.py +++ b/jsonpath/token.py @@ -24,6 +24,8 @@ TOKEN_WILD = sys.intern("TOKEN_WILD") TOKEN_NAME = sys.intern("TOKEN_NAME") TOKEN_DOT_PROPERTY = sys.intern("TOKEN_DOT_PROPERTY") +TOKEN_DOT_KEY_PROPERTY = sys.intern("TOKEN_DOT_KEY_PROPERTY") +TOKEN_KEY_NAME = sys.intern("TOKEN_KEY_NAME") # Filter expression tokens TOKEN_AND = sys.intern("TOKEN_AND") diff --git a/tests/test_find.py b/tests/test_find.py index ef399b4..b564103 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -51,6 +51,12 @@ class Case: }, want=[{"foo": 1}, {"foo": 2}], ), + Case( + description="filter current key, array data", + path="$.abc[?(# >= 1)]", + data={"abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": []}, + want=[2, 3], + ), Case( description="select root value using pseudo root", path="^[?@.some.thing > 7]", From 7a55c02bd54c683c693cb7f40580e98d3947148d Mon Sep 17 00:00:00 2001 From: James Prior Date: Tue, 12 Aug 2025 21:04:32 +0100 Subject: [PATCH 06/29] Test "extra" JSONPath syntax --- jsonpath/parse.py | 28 +++++- tests/test_find_extra.py | 135 ++++++++++++++++++++++++++++ tests/test_find_extra_examples.py | 145 ++++++++++++++++++++++++++++++ 3 files changed, 305 insertions(+), 3 deletions(-) create mode 100644 tests/test_find_extra.py create mode 100644 tests/test_find_extra_examples.py diff --git a/jsonpath/parse.py b/jsonpath/parse.py index a0c6d97..cfd50ce 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -352,7 +352,7 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: else: break - def parse_selector(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: + def parse_selector(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: # noqa: PLR0911 token = stream.next() if token.kind == TOKEN_NAME: @@ -382,6 +382,15 @@ def parse_selector(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: ) if token.kind == TOKEN_KEYS: + if stream.current().kind == TOKEN_NAME: + return ( + KeySelector( + env=self.env, + token=token, + key=self._decode_string_literal(stream.next()), + ), + ) + return ( KeysSelector( env=self.env, @@ -442,8 +451,21 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto selectors.append(WildSelector(env=self.env, token=token)) stream.next() elif token.kind == TOKEN_KEYS: - selectors.append(KeysSelector(env=self.env, token=token)) - stream.next() + stream.eat(TOKEN_KEYS) + if stream.current().kind in ( + TOKEN_DOUBLE_QUOTE_STRING, + TOKEN_SINGLE_QUOTE_STRING, + ): + selectors.append( + KeySelector( + env=self.env, + token=token, + key=self._decode_string_literal(stream.next()), + ) + ) + else: + selectors.append(KeysSelector(env=self.env, token=token)) + elif token.kind == TOKEN_FILTER: selectors.append(self.parse_filter_selector(stream)) elif token.kind == TOKEN_KEYS_FILTER: diff --git a/tests/test_find_extra.py b/tests/test_find_extra.py new file mode 100644 index 0000000..edb3771 --- /dev/null +++ b/tests/test_find_extra.py @@ -0,0 +1,135 @@ +import asyncio +import dataclasses +import operator +from typing import Any +from typing import List +from typing import Mapping +from typing import Sequence +from typing import Union + +import pytest + +from jsonpath import JSONPathEnvironment + + +@dataclasses.dataclass +class Case: + description: str + path: str + data: Union[Sequence[Any], Mapping[str, Any]] + want: Union[Sequence[Any], Mapping[str, Any]] + + +TEST_CASES = [ + Case( + description="keys from an object", + path="$.some[~]", + data={"some": {"other": "foo", "thing": "bar"}}, + want=["other", "thing"], + ), + Case( + description="shorthand keys from an object", + path="$.some.~", + data={"some": {"other": "foo", "thing": "bar"}}, + want=["other", "thing"], + ), + Case( + description="keys from an array", + path="$.some[~]", + data={"some": ["other", "thing"]}, + want=[], + ), + Case( + description="shorthand keys from an array", + path="$.some.~", + data={"some": ["other", "thing"]}, + want=[], + ), + Case( + description="recurse object keys", + path="$..~", + data={"some": {"thing": "else", "foo": {"bar": "baz"}}}, + want=["some", "thing", "foo", "bar"], + ), + Case( + description="current key of an object", + path="$.some[?match(#, '^b.*')]", + data={"some": {"foo": "a", "bar": "b", "baz": "c", "qux": "d"}}, + want=["b", "c"], + ), + Case( + description="current key of an array", + path="$.some[?# > 1]", + data={"some": ["other", "thing", "foo", "bar"]}, + want=["foo", "bar"], + ), + Case( + description="filter keys from an object", + path="$.some[~?match(@, '^b.*')]", + data={"some": {"other": "foo", "thing": "bar"}}, + want=["thing"], + ), + Case( + description="singular key from an object", + path="$.some[~'other']", + data={"some": {"other": "foo", "thing": "bar"}}, + want=["other"], + ), + Case( + description="singular key from an object, does not exist", + path="$.some[~'else']", + data={"some": {"other": "foo", "thing": "bar"}}, + want=[], + ), + Case( + description="singular key from an array", + path="$.some[~'1']", + data={"some": ["foo", "bar"]}, + want=[], + ), + Case( + description="singular key from an object, shorthand", + path="$.some.~other", + data={"some": {"other": "foo", "thing": "bar"}}, + want=["other"], + ), + Case( + description="recursive key from an object", + path="$.some..[~'other']", + data={"some": {"other": "foo", "thing": "bar", "else": {"other": "baz"}}}, + want=["other", "other"], + ), + Case( + description="recursive key from an object, shorthand", + path="$.some..~other", + data={"some": {"other": "foo", "thing": "bar", "else": {"other": "baz"}}}, + want=["other", "other"], + ), + Case( + description="recursive key from an object, does not exist", + path="$.some..[~'nosuchthing']", + data={"some": {"other": "foo", "thing": "bar", "else": {"other": "baz"}}}, + want=[], + ), +] + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment() + + +@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) +def test_find_extra(env: JSONPathEnvironment, case: Case) -> None: + path = env.compile(case.path) + assert path.findall(case.data) == case.want + + +@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) +def test_find_extra_async(env: JSONPathEnvironment, case: Case) -> None: + path = env.compile(case.path) + + async def coro() -> List[object]: + return await path.findall_async(case.data) + + assert asyncio.run(coro()) == case.want diff --git a/tests/test_find_extra_examples.py b/tests/test_find_extra_examples.py new file mode 100644 index 0000000..6f47327 --- /dev/null +++ b/tests/test_find_extra_examples.py @@ -0,0 +1,145 @@ +import asyncio +import dataclasses +import operator +from typing import Any +from typing import List +from typing import Mapping +from typing import Sequence +from typing import Union + +import pytest + +from jsonpath import JSONPathEnvironment + + +@dataclasses.dataclass +class Case: + description: str + path: str + data: Union[Sequence[Any], Mapping[str, Any]] + want: Union[Sequence[Any], Mapping[str, Any]] + want_paths: List[str] + + +TEST_CASES = [ + Case( + description="key selector, key of nested object", + path="$.a[0].~c", + data={ + "a": [{"b": "x", "c": "z"}, {"b": "y"}], + }, + want=["c"], + want_paths=["$['a'][0][~'c']"], + ), + Case( + description="key selector, key does not exist", + path="$.a[1].~c", + data={ + "a": [{"b": "x", "c": "z"}, {"b": "y"}], + }, + want=[], + want_paths=[], + ), + Case( + description="key selector, descendant, single quoted key", + path="$..[~'b']", + data={ + "a": [{"b": "x", "c": "z"}, {"b": "y"}], + }, + want=["b", "b"], + want_paths=["$['a'][0][~'b']", "$['a'][1][~'b']"], + ), + Case( + description="key selector, descendant, double quoted key", + path='$..[~"b"]', + data={ + "a": [{"b": "x", "c": "z"}, {"b": "y"}], + }, + want=["b", "b"], + want_paths=["$['a'][0][~'b']", "$['a'][1][~'b']"], + ), + Case( + description="keys selector, object key", + path="$.a[0].~", + data={ + "a": [{"b": "x", "c": "z"}, {"b": "y"}], + }, + want=["b", "c"], + want_paths=["$['a'][0][~'b']", "$['a'][0][~'c']"], + ), + Case( + description="keys selector, array key", + path="$.a.~", + data={ + "a": [{"b": "x", "c": "z"}, {"b": "y"}], + }, + want=[], + want_paths=[], + ), + Case( + description="keys selector, descendant keys", + path="$..[~]", + data={ + "a": [{"b": "x", "c": "z"}, {"b": "y"}], + }, + want=["a", "b", "c", "b"], + want_paths=["$[~'a']", "$['a'][0][~'b']", "$['a'][0][~'c']", "$['a'][1][~'b']"], + ), + Case( + description="keys filter selector, conditionally select object keys", + path="$.*[~?length(@) > 2]", + data=[{"a": [1, 2, 3], "b": [4, 5]}, {"c": {"x": [1, 2]}}, {"d": [1, 2, 3]}], + want=["a", "d"], + want_paths=["$[0][~'a']", "$[2][~'d']"], + ), + Case( + description="keys filter selector, existence test", + path="$.*[~?@.x]", + data=[{"a": [1, 2, 3], "b": [4, 5]}, {"c": {"x": [1, 2]}}, {"d": [1, 2, 3]}], + want=["c"], + want_paths=["$[1][~'c']"], + ), + Case( + description="keys filter selector, keys from an array", + path="$[~?(true == true)]", + data=[{"a": [1, 2, 3], "b": [4, 5]}, {"c": {"x": [1, 2]}}, {"d": [1, 2, 3]}], + want=[], + want_paths=[], + ), + Case( + description="current key identifier, match on object names", + path="$[?match(#, '^ab.*') && length(@) > 0 ]", + data={"abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": []}, + want=[[1, 2, 3], [6]], + want_paths=["$['abc']", "$['abx']"], + ), + Case( + description="current key identifier, compare current array index", + path="$.abc[?(# >= 1)]", + data={"abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": []}, + want=[2, 3], + want_paths=["$['abc'][1]", "$['abc'][2]"], + ), +] + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment() + + +@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) +def test_find_extra_examples(env: JSONPathEnvironment, case: Case) -> None: + path = env.compile(case.path) + assert path.findall(case.data) == case.want + assert list(path.query(case.data).locations()) == case.want_paths + + +@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) +def test_find_extra_async(env: JSONPathEnvironment, case: Case) -> None: + path = env.compile(case.path) + + async def coro() -> List[object]: + return await path.findall_async(case.data) + + assert asyncio.run(coro()) == case.want From ea84ed97b3f3f9a9e938a62818896d1ff783b445 Mon Sep 17 00:00:00 2001 From: James Prior Date: Wed, 13 Aug 2025 09:29:27 +0100 Subject: [PATCH 07/29] Singular query selector stub [skip ci] --- docs/singular_query_selector.md | 43 ++++++++++++++++++++++++++ jsonpath/parse.py | 54 ++++++++++++++++++++++++--------- jsonpath/selectors.py | 40 ++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 15 deletions(-) create mode 100644 docs/singular_query_selector.md diff --git a/docs/singular_query_selector.md b/docs/singular_query_selector.md new file mode 100644 index 0000000..3d6ce67 --- /dev/null +++ b/docs/singular_query_selector.md @@ -0,0 +1,43 @@ +# Singular Query Selector + +The singular query selector consist of an embedded absolute singular query, the result of which is used as an object member name or array element index. + +If the embedded query resolves to a string or int value, at most one object member value or array element value is selected. Otherwise the singular query selector selects nothing. + +## Syntax + +``` +selector = name-selector / + wildcard-selector / + slice-selector / + index-selector / + filter-selector / + singular-query-selector + +singular-query-selector = abs-singular-query +``` + +## Examples + +```json +{ + "a": { + "j": [1, 2, 3], + "p": { + "q": [4, 5, 6] + } + }, + "b": ["j", "p", "q"], + "c d": { + "x": { + "y": 1 + } + } +} +``` + +| Query | Result | Result Path | Comment | +| --------------------- | ------------------ | ---------------- | ----------------------------------------------------------------- | +| `$.a[$.b[1]]` | `{"q": [4, 5, 6]}` | `$['a']['p']` | Object name from embedded singular query | +| `$.a.j[$['c d'].x.y]` | `2` | `$['a']['j'][1]` | Array index from embedded singular query | +| `$.a[$.b]` | | | Embedded singular query does not resolve to a string or int value | diff --git a/jsonpath/parse.py b/jsonpath/parse.py index cfd50ce..26d1126 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -50,6 +50,7 @@ from .selectors import KeysFilter from .selectors import KeysSelector from .selectors import PropertySelector +from .selectors import SingularQuerySelector from .selectors import SliceSelector from .selectors import WildSelector from .token import TOKEN_AND @@ -239,7 +240,7 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = { TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, - TOKEN_PSEUDO_ROOT: self.parse_root_path, + TOKEN_PSEUDO_ROOT: self.parse_absolute_query, TOKEN_FALSE: self.parse_boolean, TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, TOKEN_FLOAT: self.parse_float_literal, @@ -254,8 +255,8 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: TOKEN_NOT: self.parse_prefix_expression, TOKEN_NULL: self.parse_nil, TOKEN_RE_PATTERN: self.parse_regex, - TOKEN_ROOT: self.parse_root_path, - TOKEN_SELF: self.parse_self_path, + TOKEN_ROOT: self.parse_absolute_query, + TOKEN_SELF: self.parse_relative_query, TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal, TOKEN_TRUE: self.parse_boolean, TOKEN_UNDEFINED: self.parse_undefined, @@ -277,7 +278,7 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: str, Callable[[TokenStream], FilterExpression] ] = { TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, - TOKEN_PSEUDO_ROOT: self.parse_root_path, + TOKEN_PSEUDO_ROOT: self.parse_absolute_query, TOKEN_FALSE: self.parse_boolean, TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, TOKEN_FLOAT: self.parse_float_literal, @@ -287,8 +288,8 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: TOKEN_NIL: self.parse_nil, TOKEN_NONE: self.parse_nil, TOKEN_NULL: self.parse_nil, - TOKEN_ROOT: self.parse_root_path, - TOKEN_SELF: self.parse_self_path, + TOKEN_ROOT: self.parse_absolute_query, + TOKEN_SELF: self.parse_relative_query, TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal, TOKEN_TRUE: self.parse_boolean, } @@ -299,7 +300,7 @@ def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: if stream.current().kind in {TOKEN_ROOT, TOKEN_PSEUDO_ROOT}: stream.next() - yield from self.parse_path(stream) + yield from self.parse_query(stream) if stream.current().kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): raise JSONPathSyntaxError( @@ -307,7 +308,7 @@ def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: token=stream.current(), ) - def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]: + def parse_query(self, stream: TokenStream) -> Iterable[JSONPathSegment]: """Parse a JSONPath query string. This method assumes the root, current or pseudo root identifier has @@ -405,7 +406,7 @@ def parse_selector(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: stream.pos -= 1 return () - def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912 + def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912, PLR0915 """Parse a comma separated list of JSONPath selectors.""" segment_token = stream.eat(TOKEN_LBRACKET) selectors: List[JSONPathSelector] = [] @@ -470,6 +471,8 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto selectors.append(self.parse_filter_selector(stream)) elif token.kind == TOKEN_KEYS_FILTER: selectors.append(self.parse_filter_selector(stream, keys=True)) + elif token.kind in (TOKEN_ROOT, TOKEN_NAME): + selectors.append(self.parse_singular_query_selector(stream)) elif token.kind == TOKEN_EOF: raise JSONPathSyntaxError("unexpected end of query", token=token) else: @@ -664,20 +667,41 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: stream.eat(TOKEN_RPAREN) return expr - def parse_root_path(self, stream: TokenStream) -> FilterExpression: + def parse_absolute_query(self, stream: TokenStream) -> FilterExpression: root = stream.next() return RootFilterQuery( JSONPath( env=self.env, - segments=self.parse_path(stream), + segments=self.parse_query(stream), pseudo_root=root.kind == TOKEN_PSEUDO_ROOT, ) ) - def parse_self_path(self, stream: TokenStream) -> FilterExpression: - stream.next() + def parse_relative_query(self, stream: TokenStream) -> FilterExpression: + stream.eat(TOKEN_SELF) return RelativeFilterQuery( - JSONPath(env=self.env, segments=self.parse_path(stream)) + JSONPath(env=self.env, segments=self.parse_query(stream)) + ) + + def parse_singular_query_selector( + self, stream: TokenStream + ) -> SingularQuerySelector: + # TODO: optionally require root identifier + token = ( + stream.next() if stream.current().kind == TOKEN_ROOT else stream.current() + ) + + query = JSONPath(env=self.env, segments=self.parse_query(stream)) + + if not query.singular_query(): + raise JSONPathSyntaxError( + "embedded query selectors must be singular queries", token=token + ) + + return SingularQuerySelector( + env=self.env, + token=token, + query=query, ) def parse_current_key(self, stream: TokenStream) -> FilterExpression: @@ -687,7 +711,7 @@ def parse_current_key(self, stream: TokenStream) -> FilterExpression: def parse_filter_context_path(self, stream: TokenStream) -> FilterExpression: stream.next() return FilterContextPath( - JSONPath(env=self.env, segments=self.parse_path(stream)) + JSONPath(env=self.env, segments=self.parse_query(stream)) ) def parse_regex(self, stream: TokenStream) -> FilterExpression: diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 04ee4ae..63e3134 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -22,6 +22,7 @@ from .env import JSONPathEnvironment from .filter import BooleanExpression from .match import JSONPathMatch + from .path import JSONPath from .token import Token # ruff: noqa: D102 @@ -372,6 +373,45 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc yield match +class SingularQuerySelector(JSONPathSelector): + """An embedded absolute query. + + The result of the embedded query is used as an object member name or array element + index. + + NOTE: This is a non-standard selector. + """ + + __slots__ = ("query",) + + def __init__( + self, *, env: JSONPathEnvironment, token: Token, query: JSONPath + ) -> None: + super().__init__(env=env, token=token) + self.query = query + + def __str__(self) -> str: + return str(self.query) + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, SingularQuerySelector) + and self.query == __value.query + and self.token == __value.token + ) + + def __hash__(self) -> int: + return hash((self.query, self.token)) + + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + # TODO: + raise Exception("not implemented") + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + # TODO: + raise Exception("not implemented") + + class Filter(JSONPathSelector): """Filter sequence/array items or mapping/object values with a filter expression.""" From 9a1886eef9a99345b595fbfbdb9f2ca7cd282348 Mon Sep 17 00:00:00 2001 From: James Prior Date: Wed, 13 Aug 2025 17:47:28 +0100 Subject: [PATCH 08/29] Implement the singular query selector --- jsonpath/selectors.py | 94 ++++++++++++++++++++++++++----- tests/test_errors.py | 5 ++ tests/test_find_extra.py | 40 +++++++++++++ tests/test_find_extra_examples.py | 33 +++++++++++ 4 files changed, 159 insertions(+), 13 deletions(-) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 63e3134..b7bd90e 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -16,6 +16,7 @@ from .exceptions import JSONPathIndexError from .exceptions import JSONPathTypeError +from .match import NodeList from .serialize import canonical_string if TYPE_CHECKING: @@ -94,15 +95,7 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc class IndexSelector(JSONPathSelector): - """Select an element from an array by index. - - XXX: Change to make unquoted keys/properties a "singular path selector" - https://github.com/ietf-wg-jsonpath/draft-ietf-jsonpath-base/issues/522 - - Considering we don't require mapping (JSON object) keys/properties to - be quoted, and that we support mappings with numeric keys, we also check - to see if the "index" is a mapping key, which is non-standard. - """ + """Select an element from an array by index.""" __slots__ = ("index", "_as_key") @@ -404,12 +397,87 @@ def __hash__(self) -> int: return hash((self.query, self.token)) def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: - # TODO: - raise Exception("not implemented") + if isinstance(node.obj, Mapping): + nodes = NodeList(self.query.finditer(node.root)) + + if nodes.empty(): + return + + value = nodes[0].value + + if not isinstance(value, str): + return + + with suppress(KeyError): + match = node.new_child(self.env.getitem(node.obj, value), value) + node.add_child(match) + yield match + + if isinstance(node.obj, Sequence): + nodes = NodeList(self.query.finditer(node.root)) + + if nodes.empty(): + return + + value = nodes[0].value + + if not isinstance(value, int): + return + + index = self._normalized_index(node.obj, value) + + with suppress(IndexError): + match = node.new_child(self.env.getitem(node.obj, index), index) + node.add_child(match) + yield match async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: - # TODO: - raise Exception("not implemented") + if isinstance(node.obj, Mapping): + nodes = NodeList( + [match async for match in await self.query.finditer_async(node.root)] + ) + + if nodes.empty(): + return + + value = nodes[0].value + + if not isinstance(value, str): + return + + with suppress(KeyError): + match = node.new_child( + await self.env.getitem_async(node.obj, value), value + ) + node.add_child(match) + yield match + + if isinstance(node.obj, Sequence): + nodes = NodeList( + [match async for match in await self.query.finditer_async(node.root)] + ) + + if nodes.empty(): + return + + value = nodes[0].value + + if not isinstance(value, int): + return + + index = self._normalized_index(node.obj, value) + + with suppress(IndexError): + match = node.new_child( + await self.env.getitem_async(node.obj, index), index + ) + node.add_child(match) + yield match + + def _normalized_index(self, obj: Sequence[object], index: int) -> int: + if index < 0 and len(obj) >= abs(index): + return len(obj) + index + return index class Filter(JSONPathSelector): diff --git a/tests/test_errors.py b/tests/test_errors.py index 8ff5913..2c88135 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -44,6 +44,11 @@ def test_root_dot(env: JSONPathEnvironment) -> None: env.compile("$.") +def test_embedded_query_is_not_singular(env: JSONPathEnvironment) -> None: + with pytest.raises(JSONPathSyntaxError): + env.compile("$.a[$.*]") + + class FilterLiteralTestCase(NamedTuple): description: str query: str diff --git a/tests/test_find_extra.py b/tests/test_find_extra.py index edb3771..69074ce 100644 --- a/tests/test_find_extra.py +++ b/tests/test_find_extra.py @@ -111,6 +111,46 @@ class Case: data={"some": {"other": "foo", "thing": "bar", "else": {"other": "baz"}}}, want=[], ), + Case( + description="object name from embedded singular query resolving to nothing", + path="$.a[$.foo]", + data={ + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, + "b": ["j", "p", "q"], + "c d": {"x": {"y": 1}}, + }, + want=[], + ), + Case( + description="array index from embedded singular query resolving to nothing", + path="$.b[$.foo]", + data={ + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, + "b": ["j", "p", "q"], + "c d": {"x": {"y": 1}}, + }, + want=[], + ), + Case( + description="array index from embedded singular query is not an int", + path="$.b[$.a.z]", + data={ + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}, "z": "foo"}, + "b": ["j", "p", "q"], + "c d": {"x": {"y": 1}}, + }, + want=[], + ), + Case( + description="array index from embedded singular query is negative", + path="$.b[$.a.z]", + data={ + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}, "z": -1}, + "b": ["j", "p", "q"], + "c d": {"x": {"y": 1}}, + }, + want=["q"], + ), ] diff --git a/tests/test_find_extra_examples.py b/tests/test_find_extra_examples.py index 6f47327..b43b967 100644 --- a/tests/test_find_extra_examples.py +++ b/tests/test_find_extra_examples.py @@ -120,6 +120,39 @@ class Case: want=[2, 3], want_paths=["$['abc'][1]", "$['abc'][2]"], ), + Case( + description="object name from embedded singular query", + path="$.a[$.b[1]]", + data={ + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, + "b": ["j", "p", "q"], + "c d": {"x": {"y": 1}}, + }, + want=[{"q": [4, 5, 6]}], + want_paths=["$['a']['p']"], + ), + Case( + description="array index from embedded singular query", + path="$.a.j[$['c d'].x.y]", + data={ + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, + "b": ["j", "p", "q"], + "c d": {"x": {"y": 1}}, + }, + want=[2], + want_paths=["$['a']['j'][1]"], + ), + Case( + description="embedded singular query does not resolve to a string or int value", + path="$.a[$.b]", + data={ + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, + "b": ["j", "p", "q"], + "c d": {"x": {"y": 1}}, + }, + want=[], + want_paths=[], + ), ] From 616f438abec4013bf25122f3cdc7a48a22f37cfa Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 14 Aug 2025 10:21:38 +0100 Subject: [PATCH 09/29] Rename some selector classes and tidy. --- jsonpath/env.py | 6 +- jsonpath/filter.py | 116 +++++++++++----------- jsonpath/parse.py | 74 +++++++------- jsonpath/path.py | 7 +- jsonpath/selectors.py | 40 ++++---- pyproject.toml | 2 + tests/test_filter_expression_caching.py | 18 ++-- tests/test_walk_filter_expression_tree.py | 4 +- 8 files changed, 136 insertions(+), 131 deletions(-) diff --git a/jsonpath/env.py b/jsonpath/env.py index 3c4d04d..1eb28b4 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -24,7 +24,7 @@ from .exceptions import JSONPathTypeError from .filter import UNDEFINED from .filter import VALUE_TYPE_EXPRESSIONS -from .filter import FilterExpression +from .filter import BaseExpression from .filter import FilterQuery from .filter import FunctionExtension from .filter import InfixExpression @@ -449,7 +449,7 @@ def check_well_typedness( self, token: Token, func: FilterFunction, - args: List[FilterExpression], + args: List[BaseExpression], ) -> None: """Check the well-typedness of a function's arguments at compile-time.""" # Correct number of arguments? @@ -487,7 +487,7 @@ def check_well_typedness( token=token, ) - def _function_return_type(self, expr: FilterExpression) -> Optional[ExpressionType]: + def _function_return_type(self, expr: BaseExpression) -> Optional[ExpressionType]: """Return the type returned from a filter function. If _expr_ is not a `FunctionExtension` or the registered function definition is diff --git a/jsonpath/filter.py b/jsonpath/filter.py index 5f9adb2..b8905e9 100644 --- a/jsonpath/filter.py +++ b/jsonpath/filter.py @@ -29,10 +29,8 @@ from .path import JSONPath from .selectors import FilterContext -# ruff: noqa: D102, PLW1641 - -class FilterExpression(ABC): +class BaseExpression(ABC): """Base class for all filter expression nodes.""" __slots__ = ("volatile",) @@ -59,11 +57,11 @@ async def evaluate_async(self, context: FilterContext) -> object: """An async version of `evaluate`.""" @abstractmethod - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: """Return a list of direct child expressions.""" @abstractmethod - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 """Update this expression's child expressions. _children_ is assumed to have the same number of items as is returned @@ -71,7 +69,7 @@ def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG00 """ -class Nil(FilterExpression): +class Nil(BaseExpression): """The constant `nil`. Also aliased as `null` and `None`, sometimes. @@ -94,10 +92,10 @@ def evaluate(self, _: FilterContext) -> None: async def evaluate_async(self, _: FilterContext) -> None: return None - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [] - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 return @@ -125,7 +123,7 @@ def __repr__(self) -> str: UNDEFINED = _Undefined() -class Undefined(FilterExpression): +class Undefined(BaseExpression): """The constant `undefined`.""" __slots__ = () @@ -146,10 +144,10 @@ def evaluate(self, _: FilterContext) -> object: async def evaluate_async(self, _: FilterContext) -> object: return UNDEFINED - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [] - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 return @@ -158,7 +156,7 @@ def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG00 LITERAL_EXPRESSION_T = TypeVar("LITERAL_EXPRESSION_T") -class Literal(FilterExpression, Generic[LITERAL_EXPRESSION_T]): +class FilterExpressionLiteral(BaseExpression, Generic[LITERAL_EXPRESSION_T]): """Base class for filter expression literals.""" __slots__ = ("value",) @@ -182,14 +180,14 @@ def evaluate(self, _: FilterContext) -> LITERAL_EXPRESSION_T: async def evaluate_async(self, _: FilterContext) -> LITERAL_EXPRESSION_T: return self.value - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [] - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 return -class BooleanLiteral(Literal[bool]): +class BooleanLiteral(FilterExpressionLiteral[bool]): """A Boolean `True` or `False`.""" __slots__ = () @@ -201,7 +199,7 @@ class BooleanLiteral(Literal[bool]): FALSE = BooleanLiteral(value=False) -class StringLiteral(Literal[str]): +class StringLiteral(FilterExpressionLiteral[str]): """A string literal.""" __slots__ = () @@ -210,19 +208,19 @@ def __str__(self) -> str: return canonical_string(self.value) -class IntegerLiteral(Literal[int]): +class IntegerLiteral(FilterExpressionLiteral[int]): """An integer literal.""" __slots__ = () -class FloatLiteral(Literal[float]): +class FloatLiteral(FilterExpressionLiteral[float]): """A float literal.""" __slots__ = () -class RegexLiteral(Literal[Pattern[str]]): +class RegexLiteral(FilterExpressionLiteral[Pattern[str]]): """A regex literal.""" __slots__ = () @@ -245,12 +243,12 @@ def __str__(self) -> str: return f"/{self.value.pattern}/{''.join(flags)}" -class ListLiteral(FilterExpression): +class ListLiteral(BaseExpression): """A list literal.""" __slots__ = ("items",) - def __init__(self, items: List[FilterExpression]) -> None: + def __init__(self, items: List[BaseExpression]) -> None: self.items = items super().__init__() @@ -267,19 +265,19 @@ def evaluate(self, context: FilterContext) -> object: async def evaluate_async(self, context: FilterContext) -> object: return [await item.evaluate_async(context) for item in self.items] - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return self.items - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 self.items = children -class PrefixExpression(FilterExpression): +class PrefixExpression(BaseExpression): """An expression composed of a prefix operator and another expression.""" __slots__ = ("operator", "right") - def __init__(self, operator: str, right: FilterExpression): + def __init__(self, operator: str, right: BaseExpression): self.operator = operator self.right = right super().__init__() @@ -305,24 +303,24 @@ def evaluate(self, context: FilterContext) -> object: async def evaluate_async(self, context: FilterContext) -> object: return self._evaluate(context, await self.right.evaluate_async(context)) - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [self.right] - def set_children(self, children: List[FilterExpression]) -> None: + def set_children(self, children: List[BaseExpression]) -> None: assert len(children) == 1 self.right = children[0] -class InfixExpression(FilterExpression): +class InfixExpression(BaseExpression): """A pair of expressions and a comparison or logical operator.""" __slots__ = ("left", "operator", "right", "logical") def __init__( self, - left: FilterExpression, + left: BaseExpression, operator: str, - right: FilterExpression, + right: BaseExpression, ): self.left = left self.operator = operator @@ -365,10 +363,10 @@ async def evaluate_async(self, context: FilterContext) -> bool: return context.env.compare(left, self.operator, right) - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [self.left, self.right] - def set_children(self, children: List[FilterExpression]) -> None: + def set_children(self, children: List[BaseExpression]) -> None: assert len(children) == 2 # noqa: PLR2004 self.left = children[0] self.right = children[1] @@ -380,19 +378,19 @@ def set_children(self, children: List[FilterExpression]) -> None: PRECEDENCE_PREFIX = 7 -class BooleanExpression(FilterExpression): - """An expression that always evaluates to `True` or `False`.""" +class FilterExpression(BaseExpression): + """An expression that evaluates to `True` or `False`.""" __slots__ = ("expression",) - def __init__(self, expression: FilterExpression): + def __init__(self, expression: BaseExpression): self.expression = expression super().__init__() - def cache_tree(self) -> BooleanExpression: + def cache_tree(self) -> FilterExpression: """Return a copy of _self.expression_ augmented with caching nodes.""" - def _cache_tree(expr: FilterExpression) -> FilterExpression: + def _cache_tree(expr: BaseExpression) -> BaseExpression: children = expr.children() if expr.volatile: _expr = copy.copy(expr) @@ -403,7 +401,7 @@ def _cache_tree(expr: FilterExpression) -> FilterExpression: _expr.set_children([_cache_tree(child) for child in children]) return _expr - return BooleanExpression(_cache_tree(copy.copy(self.expression))) + return FilterExpression(_cache_tree(copy.copy(self.expression))) def cacheable_nodes(self) -> bool: """Return `True` if there are any cacheable nodes in this expression tree.""" @@ -417,11 +415,11 @@ def __str__(self) -> str: def __eq__(self, other: object) -> bool: return ( - isinstance(other, BooleanExpression) and self.expression == other.expression + isinstance(other, FilterExpression) and self.expression == other.expression ) def _canonical_string( - self, expression: FilterExpression, parent_precedence: int + self, expression: BaseExpression, parent_precedence: int ) -> str: if isinstance(expression, InfixExpression): if expression.operator == "&&": @@ -453,15 +451,15 @@ def evaluate(self, context: FilterContext) -> bool: async def evaluate_async(self, context: FilterContext) -> bool: return context.env.is_truthy(await self.expression.evaluate_async(context)) - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [self.expression] - def set_children(self, children: List[FilterExpression]) -> None: + def set_children(self, children: List[BaseExpression]) -> None: assert len(children) == 1 self.expression = children[0] -class CachingFilterExpression(FilterExpression): +class CachingFilterExpression(BaseExpression): """A FilterExpression wrapper that caches the result.""" __slots__ = ( @@ -471,7 +469,7 @@ class CachingFilterExpression(FilterExpression): _UNSET = object() - def __init__(self, expression: FilterExpression): + def __init__(self, expression: BaseExpression): self.volatile = False self._expr = expression self._cached: object = self._UNSET @@ -486,14 +484,14 @@ async def evaluate_async(self, context: FilterContext) -> object: self._cached = await self._expr.evaluate_async(context) return self._cached - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return self._expr.children() - def set_children(self, children: List[FilterExpression]) -> None: + def set_children(self, children: List[BaseExpression]) -> None: self._expr.set_children(children) -class FilterQuery(FilterExpression, ABC): +class FilterQuery(BaseExpression, ABC): """Base expression for all _sub paths_ found in filter expressions.""" __slots__ = ("path",) @@ -505,15 +503,15 @@ def __init__(self, path: JSONPath) -> None: def __eq__(self, other: object) -> bool: return isinstance(other, FilterQuery) and str(self) == str(other) - def children(self) -> List[FilterExpression]: - _children: List[FilterExpression] = [] + def children(self) -> List[BaseExpression]: + _children: List[BaseExpression] = [] for segment in self.path.segments: for selector in segment.selectors: if isinstance(selector, FilterSelector): _children.append(selector.expression) return _children - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 # self.path has its own cache return @@ -637,12 +635,12 @@ async def evaluate_async(self, context: FilterContext) -> object: ) -class FunctionExtension(FilterExpression): +class FunctionExtension(BaseExpression): """A filter function.""" __slots__ = ("name", "args") - def __init__(self, name: str, args: Sequence[FilterExpression]) -> None: + def __init__(self, name: str, args: Sequence[BaseExpression]) -> None: self.name = name self.args = args super().__init__() @@ -709,15 +707,15 @@ def _unpack_node_lists( for obj in args ] - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return list(self.args) - def set_children(self, children: List[FilterExpression]) -> None: + def set_children(self, children: List[BaseExpression]) -> None: assert len(children) == len(self.args) self.args = children -class CurrentKey(FilterExpression): +class CurrentKey(BaseExpression): """The key/property or index associated with the current object.""" __slots__ = () @@ -740,17 +738,17 @@ def evaluate(self, context: FilterContext) -> object: async def evaluate_async(self, context: FilterContext) -> object: return self.evaluate(context) - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [] - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 return CURRENT_KEY = CurrentKey() -def walk(expr: FilterExpression) -> Iterable[FilterExpression]: +def walk(expr: BaseExpression) -> Iterable[BaseExpression]: """Walk the filter expression tree starting at _expr_.""" yield expr for child in expr.children(): @@ -760,7 +758,7 @@ def walk(expr: FilterExpression) -> Iterable[FilterExpression]: VALUE_TYPE_EXPRESSIONS = ( Nil, Undefined, - Literal, + FilterExpressionLiteral, ListLiteral, CurrentKey, ) diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 26d1126..b0a1725 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -23,16 +23,16 @@ from .filter import NIL from .filter import TRUE from .filter import UNDEFINED_LITERAL -from .filter import BooleanExpression +from .filter import BaseExpression from .filter import FilterContextPath from .filter import FilterExpression +from .filter import FilterExpressionLiteral from .filter import FilterQuery from .filter import FloatLiteral from .filter import FunctionExtension from .filter import InfixExpression from .filter import IntegerLiteral from .filter import ListLiteral -from .filter import Literal from .filter import Nil from .filter import PrefixExpression from .filter import RegexLiteral @@ -49,10 +49,10 @@ from .selectors import KeySelector from .selectors import KeysFilter from .selectors import KeysSelector -from .selectors import PropertySelector +from .selectors import NameSelector from .selectors import SingularQuerySelector from .selectors import SliceSelector -from .selectors import WildSelector +from .selectors import WildcardSelector from .token import TOKEN_AND from .token import TOKEN_COLON from .token import TOKEN_COMMA @@ -238,7 +238,7 @@ class Parser: def __init__(self, *, env: JSONPathEnvironment) -> None: self.env = env - self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = { + self.token_map: Dict[str, Callable[[TokenStream], BaseExpression]] = { TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, TOKEN_PSEUDO_ROOT: self.parse_absolute_query, TOKEN_FALSE: self.parse_boolean, @@ -262,7 +262,7 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: TOKEN_UNDEFINED: self.parse_undefined, } - self.list_item_map: Dict[str, Callable[[TokenStream], FilterExpression]] = { + self.list_item_map: Dict[str, Callable[[TokenStream], BaseExpression]] = { TOKEN_FALSE: self.parse_boolean, TOKEN_FLOAT: self.parse_float_literal, TOKEN_INT: self.parse_integer_literal, @@ -275,7 +275,7 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: } self.function_argument_map: Dict[ - str, Callable[[TokenStream], FilterExpression] + str, Callable[[TokenStream], BaseExpression] ] = { TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, TOKEN_PSEUDO_ROOT: self.parse_absolute_query, @@ -358,7 +358,7 @@ def parse_selector(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: if token.kind == TOKEN_NAME: return ( - PropertySelector( + NameSelector( env=self.env, token=token, name=token.value, @@ -376,7 +376,7 @@ def parse_selector(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: if token.kind == TOKEN_WILD: return ( - WildSelector( + WildcardSelector( env=self.env, token=token, ), @@ -439,7 +439,7 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto TOKEN_SINGLE_QUOTE_STRING, ): selectors.append( - PropertySelector( + NameSelector( env=self.env, token=token, name=self._decode_string_literal(token), @@ -449,7 +449,7 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto elif token.kind == TOKEN_COLON: selectors.append(self.parse_slice(stream)) elif token.kind == TOKEN_WILD: - selectors.append(WildSelector(env=self.env, token=token)) + selectors.append(WildcardSelector(env=self.env, token=token)) stream.next() elif token.kind == TOKEN_KEYS: stream.eat(TOKEN_KEYS) @@ -572,7 +572,7 @@ def parse_filter_selector( f"result of {expr.name}() must be compared", token=token ) - if isinstance(expr, (Literal, Nil)): + if isinstance(expr, (FilterExpressionLiteral, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", @@ -581,35 +581,35 @@ def parse_filter_selector( if keys: return KeysFilter( - env=self.env, token=token, expression=BooleanExpression(expr) + env=self.env, token=token, expression=FilterExpression(expr) ) - return Filter(env=self.env, token=token, expression=BooleanExpression(expr)) + return Filter(env=self.env, token=token, expression=FilterExpression(expr)) - def parse_boolean(self, stream: TokenStream) -> FilterExpression: + def parse_boolean(self, stream: TokenStream) -> BaseExpression: if stream.next().kind == TOKEN_TRUE: return TRUE return FALSE - def parse_nil(self, stream: TokenStream) -> FilterExpression: + def parse_nil(self, stream: TokenStream) -> BaseExpression: stream.next() return NIL - def parse_undefined(self, stream: TokenStream) -> FilterExpression: + def parse_undefined(self, stream: TokenStream) -> BaseExpression: stream.next() return UNDEFINED_LITERAL - def parse_string_literal(self, stream: TokenStream) -> FilterExpression: + def parse_string_literal(self, stream: TokenStream) -> BaseExpression: return StringLiteral(value=self._decode_string_literal(stream.next())) - def parse_integer_literal(self, stream: TokenStream) -> FilterExpression: + def parse_integer_literal(self, stream: TokenStream) -> BaseExpression: # Convert to float first to handle scientific notation. return IntegerLiteral(value=int(float(stream.next().value))) - def parse_float_literal(self, stream: TokenStream) -> FilterExpression: + def parse_float_literal(self, stream: TokenStream) -> BaseExpression: return FloatLiteral(value=float(stream.next().value)) - def parse_prefix_expression(self, stream: TokenStream) -> FilterExpression: + def parse_prefix_expression(self, stream: TokenStream) -> BaseExpression: token = stream.next() assert token.kind == TOKEN_NOT return PrefixExpression( @@ -620,8 +620,8 @@ def parse_prefix_expression(self, stream: TokenStream) -> FilterExpression: ) def parse_infix_expression( - self, stream: TokenStream, left: FilterExpression - ) -> FilterExpression: + self, stream: TokenStream, left: BaseExpression + ) -> BaseExpression: token = stream.next() precedence = self.PRECEDENCES.get(token.kind, self.PRECEDENCE_LOWEST) right = self.parse_filter_expression(stream, precedence) @@ -632,13 +632,13 @@ def parse_infix_expression( self._raise_for_non_comparable_function(right, token) if operator not in self.INFIX_LITERAL_OPERATORS: - if isinstance(left, (Literal, Nil)): + if isinstance(left, (FilterExpressionLiteral, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", token=token, ) - if isinstance(right, (Literal, Nil)): + if isinstance(right, (FilterExpressionLiteral, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", @@ -647,7 +647,7 @@ def parse_infix_expression( return InfixExpression(left, operator, right) - def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: + def parse_grouped_expression(self, stream: TokenStream) -> BaseExpression: stream.eat(TOKEN_LPAREN) expr = self.parse_filter_expression(stream) @@ -667,7 +667,7 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: stream.eat(TOKEN_RPAREN) return expr - def parse_absolute_query(self, stream: TokenStream) -> FilterExpression: + def parse_absolute_query(self, stream: TokenStream) -> BaseExpression: root = stream.next() return RootFilterQuery( JSONPath( @@ -677,7 +677,7 @@ def parse_absolute_query(self, stream: TokenStream) -> FilterExpression: ) ) - def parse_relative_query(self, stream: TokenStream) -> FilterExpression: + def parse_relative_query(self, stream: TokenStream) -> BaseExpression: stream.eat(TOKEN_SELF) return RelativeFilterQuery( JSONPath(env=self.env, segments=self.parse_query(stream)) @@ -704,17 +704,17 @@ def parse_singular_query_selector( query=query, ) - def parse_current_key(self, stream: TokenStream) -> FilterExpression: + def parse_current_key(self, stream: TokenStream) -> BaseExpression: stream.next() return CURRENT_KEY - def parse_filter_context_path(self, stream: TokenStream) -> FilterExpression: + def parse_filter_context_path(self, stream: TokenStream) -> BaseExpression: stream.next() return FilterContextPath( JSONPath(env=self.env, segments=self.parse_query(stream)) ) - def parse_regex(self, stream: TokenStream) -> FilterExpression: + def parse_regex(self, stream: TokenStream) -> BaseExpression: pattern = stream.current().value flags = 0 if stream.peek().kind == TOKEN_RE_FLAGS: @@ -723,9 +723,9 @@ def parse_regex(self, stream: TokenStream) -> FilterExpression: flags |= self.RE_FLAG_MAP[flag] return RegexLiteral(value=re.compile(pattern, flags)) - def parse_list_literal(self, stream: TokenStream) -> FilterExpression: + def parse_list_literal(self, stream: TokenStream) -> BaseExpression: stream.eat(TOKEN_LBRACKET) - list_items: List[FilterExpression] = [] + list_items: List[BaseExpression] = [] while True: stream.skip_whitespace() @@ -749,8 +749,8 @@ def parse_list_literal(self, stream: TokenStream) -> FilterExpression: stream.eat(TOKEN_RBRACKET) return ListLiteral(list_items) - def parse_function_extension(self, stream: TokenStream) -> FilterExpression: - function_arguments: List[FilterExpression] = [] + def parse_function_extension(self, stream: TokenStream) -> BaseExpression: + function_arguments: List[BaseExpression] = [] function_token = stream.next() stream.eat(TOKEN_LPAREN) @@ -791,7 +791,7 @@ def parse_function_extension(self, stream: TokenStream) -> FilterExpression: def parse_filter_expression( self, stream: TokenStream, precedence: int = PRECEDENCE_LOWEST - ) -> FilterExpression: + ) -> BaseExpression: stream.skip_whitespace() token = stream.current() @@ -834,7 +834,7 @@ def _decode_string_literal(self, token: Token) -> str: return token.value def _raise_for_non_comparable_function( - self, expr: FilterExpression, token: Token + self, expr: BaseExpression, token: Token ) -> None: if isinstance(expr, FilterQuery) and not expr.path.singular_query(): raise JSONPathTypeError("non-singular query is not comparable", token=token) diff --git a/jsonpath/path.py b/jsonpath/path.py index 2b9cf70..3f4e950 100644 --- a/jsonpath/path.py +++ b/jsonpath/path.py @@ -20,7 +20,7 @@ from jsonpath.match import JSONPathMatch from jsonpath.segments import JSONPathRecursiveDescentSegment from jsonpath.selectors import IndexSelector -from jsonpath.selectors import PropertySelector +from jsonpath.selectors import NameSelector if TYPE_CHECKING: from io import IOBase @@ -244,7 +244,7 @@ def singular_query(self) -> bool: return False if len(segment.selectors) == 1 and isinstance( - segment.selectors[0], (PropertySelector, IndexSelector) + segment.selectors[0], (NameSelector, IndexSelector) ): continue @@ -471,6 +471,9 @@ def intersection(self, path: JSONPath) -> CompoundJSONPath: paths=self.paths + ((self.env.intersection_token, path),), ) + # TODO: implement empty and singular for CompoundJSONPath + # TODO: add a `segments` property returning segments from all paths + T = TypeVar("T") diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index b7bd90e..4a64d75 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -21,13 +21,11 @@ if TYPE_CHECKING: from .env import JSONPathEnvironment - from .filter import BooleanExpression + from .filter import FilterExpression from .match import JSONPathMatch from .path import JSONPath from .token import Token -# ruff: noqa: D102 - class JSONPathSelector(ABC): """Base class for all JSONPath segments and selectors.""" @@ -55,8 +53,8 @@ def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: """An async version of `resolve`.""" -class PropertySelector(JSONPathSelector): - """A shorthand or bracketed property selector.""" +class NameSelector(JSONPathSelector): + """Select at most one object member value given an object member name.""" __slots__ = ("name",) @@ -69,7 +67,7 @@ def __str__(self) -> str: def __eq__(self, __value: object) -> bool: return ( - isinstance(__value, PropertySelector) + isinstance(__value, NameSelector) and self.name == __value.name and self.token == __value.token ) @@ -95,7 +93,7 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc class IndexSelector(JSONPathSelector): - """Select an element from an array by index.""" + """Select at most one array element value given an index.""" __slots__ = ("index", "_as_key") @@ -171,7 +169,11 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc class KeySelector(JSONPathSelector): - """Select a single mapping/object name/key. + """Select at most one name from an object member, given the name. + + The key selector is introduced to facilitate valid normalized paths for nodes + produced by the "keys selector" and the "keys filter selector". It is not expected + to be of much use elsewhere. NOTE: This is a non-standard selector. @@ -216,7 +218,7 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc class KeysSelector(JSONPathSelector): - """Select mapping/object keys/properties. + """Select all names from an object's name/value members. NOTE: This is a non-standard selector. @@ -260,7 +262,7 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc class SliceSelector(JSONPathSelector): - """Sequence slicing selector.""" + """Select array elements given a start index, a stop index and a step.""" __slots__ = ("slice",) @@ -325,8 +327,8 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc yield match -class WildSelector(JSONPathSelector): - """Select all items from a sequence/array or values from a mapping/object.""" +class WildcardSelector(JSONPathSelector): + """Select nodes of all children of an object or array.""" __slots__ = () @@ -334,7 +336,7 @@ def __str__(self) -> str: return "*" def __eq__(self, __value: object) -> bool: - return isinstance(__value, WildSelector) and self.token == __value.token + return isinstance(__value, WildcardSelector) and self.token == __value.token def __hash__(self) -> int: return hash(self.token) @@ -413,7 +415,7 @@ def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: node.add_child(match) yield match - if isinstance(node.obj, Sequence): + if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): nodes = NodeList(self.query.finditer(node.root)) if nodes.empty(): @@ -452,7 +454,7 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc node.add_child(match) yield match - if isinstance(node.obj, Sequence): + if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): nodes = NodeList( [match async for match in await self.query.finditer_async(node.root)] ) @@ -481,7 +483,7 @@ def _normalized_index(self, obj: Sequence[object], index: int) -> int: class Filter(JSONPathSelector): - """Filter sequence/array items or mapping/object values with a filter expression.""" + """Select array elements or object values according to a filter expression.""" __slots__ = ("expression", "cacheable_nodes") @@ -490,7 +492,7 @@ def __init__( *, env: JSONPathEnvironment, token: Token, - expression: BooleanExpression, + expression: FilterExpression, ) -> None: super().__init__(env=env, token=token) self.expression = expression @@ -605,7 +607,7 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc class KeysFilter(JSONPathSelector): - """Selects names from an object’s name/value members. + """Selects names from an object's name/value members. NOTE: This is a non-standard selector. @@ -619,7 +621,7 @@ def __init__( *, env: JSONPathEnvironment, token: Token, - expression: BooleanExpression, + expression: FilterExpression, ) -> None: super().__init__(env=env, token=token) self.expression = expression diff --git a/pyproject.toml b/pyproject.toml index fe30c78..8dc6475 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -186,4 +186,6 @@ convention = "google" [tool.ruff.lint.per-file-ignores] "jsonpath/__about__.py" = ["D100"] "jsonpath/__init__.py" = ["D104"] +"jsonpath/selectors.py" = ["D102"] +"jsonpath/filter.py" = ["D102", "PLW1641"] "tests/*" = ["D100", "D101", "D104", "D103"] diff --git a/tests/test_filter_expression_caching.py b/tests/test_filter_expression_caching.py index 0ba64ea..b91f7a3 100644 --- a/tests/test_filter_expression_caching.py +++ b/tests/test_filter_expression_caching.py @@ -4,7 +4,7 @@ from jsonpath import JSONPath from jsonpath import JSONPathEnvironment -from jsonpath.filter import BooleanExpression +from jsonpath.filter import BaseExpression from jsonpath.filter import CachingFilterExpression from jsonpath.filter import FilterContextPath from jsonpath.filter import FilterExpression @@ -28,8 +28,8 @@ def test_cache_root_path() -> None: assert filter_selector.cacheable_nodes is True # The original expression tree without caching nodes. - expr: FilterExpression = filter_selector.expression - assert isinstance(expr, BooleanExpression) + expr: BaseExpression = filter_selector.expression + assert isinstance(expr, FilterExpression) expr = expr.expression assert isinstance(expr, InfixExpression) assert isinstance(expr.left, RelativeFilterQuery) @@ -37,7 +37,7 @@ def test_cache_root_path() -> None: # A caching copy of the original expression tree. expr = filter_selector.expression.cache_tree() - assert isinstance(expr, BooleanExpression) + assert isinstance(expr, FilterExpression) expr = expr.expression assert isinstance(expr, InfixExpression) assert isinstance(expr.left, RelativeFilterQuery) @@ -83,8 +83,8 @@ def test_cache_context_path() -> None: assert filter_selector.cacheable_nodes is True # The original expression tree without caching nodes. - expr: FilterExpression = filter_selector.expression - assert isinstance(expr, BooleanExpression) + expr: BaseExpression = filter_selector.expression + assert isinstance(expr, FilterExpression) expr = expr.expression assert isinstance(expr, InfixExpression) assert isinstance(expr.left, FilterContextPath) @@ -92,7 +92,7 @@ def test_cache_context_path() -> None: # A caching copy of the original expression tree. expr = filter_selector.expression.cache_tree() - assert isinstance(expr, BooleanExpression) + assert isinstance(expr, FilterExpression) expr = expr.expression assert isinstance(expr, InfixExpression) assert isinstance(expr.left, CachingFilterExpression) @@ -154,8 +154,8 @@ def test_uncacheable_filter() -> None: assert filter_selector.cacheable_nodes is False # The original expression tree without caching nodes. - expr: FilterExpression = filter_selector.expression - assert isinstance(expr, BooleanExpression) + expr: BaseExpression = filter_selector.expression + assert isinstance(expr, FilterExpression) expr = expr.expression assert isinstance(expr, InfixExpression) assert isinstance(expr.left, InfixExpression) diff --git a/tests/test_walk_filter_expression_tree.py b/tests/test_walk_filter_expression_tree.py index 0dad737..90acb6d 100644 --- a/tests/test_walk_filter_expression_tree.py +++ b/tests/test_walk_filter_expression_tree.py @@ -7,7 +7,7 @@ import pytest import jsonpath -from jsonpath.filter import FilterExpression +from jsonpath.filter import BaseExpression from jsonpath.filter import walk from jsonpath.selectors import Filter as FilterSelector @@ -53,7 +53,7 @@ class Case: ] -def is_volatile(expr: FilterExpression) -> bool: +def is_volatile(expr: BaseExpression) -> bool: return any(expr.volatile for expr in walk(expr)) From a5605a17662ac1996403c301a6aedcc6158f96cd Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 14 Aug 2025 12:52:26 +0100 Subject: [PATCH 10/29] Introduce strict mode and use regex if available --- jsonpath/env.py | 18 ++++++++++-- jsonpath/function_extensions/match.py | 5 +++- jsonpath/function_extensions/search.py | 5 +++- jsonpath/parse.py | 26 +++++++++++++++-- jsonpath/selectors.py | 35 +++++++++++------------ jsonpath/stream.py | 4 ++- pyproject.toml | 8 ++++-- tests/test_compliance.py | 39 +++++++++++--------------- tests/test_strictness.py | 26 +++++++++++++++++ 9 files changed, 115 insertions(+), 51 deletions(-) create mode 100644 tests/test_strictness.py diff --git a/jsonpath/env.py b/jsonpath/env.py index 1eb28b4..e723c61 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -88,7 +88,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`. well-typedness as compile time. **New in version 0.10.0** - + strict: When `True`, follow RFC 9535 strictly. + **New in version 2.0.0** ## Class attributes Attributes: @@ -143,6 +144,7 @@ def __init__( filter_caching: bool = True, unicode_escape: bool = True, well_typed: bool = True, + strict: bool = False, ) -> None: self.filter_caching: bool = filter_caching """Enable or disable filter expression caching.""" @@ -154,6 +156,14 @@ def __init__( self.well_typed: bool = well_typed """Control well-typedness checks on filter function expressions.""" + self.strict: bool = strict + """When `True`, follow RFC 9535 strictly. + + This includes things like enforcing a leading root identifier and + ensuring there's not leading or trailing whitespace when parsing a + JSONPath query. + """ + self.lexer: Lexer = self.lexer_class(env=self) """The lexer bound to this environment.""" @@ -188,8 +198,10 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root ) - # TODO: Optionally raise for trailing whitespace - stream.skip_whitespace() + if stream.skip_whitespace() and self.strict: + raise JSONPathSyntaxError( + "unexpected whitespace", token=stream.tokens[stream.pos - 1] + ) # TODO: better! if stream.current().kind != TOKEN_EOF: diff --git a/jsonpath/function_extensions/match.py b/jsonpath/function_extensions/match.py index 7bc8749..e8e5f2b 100644 --- a/jsonpath/function_extensions/match.py +++ b/jsonpath/function_extensions/match.py @@ -1,6 +1,9 @@ """The standard `match` function extension.""" -import re +try: + import regex as re +except ImportError: + import re # type: ignore from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction diff --git a/jsonpath/function_extensions/search.py b/jsonpath/function_extensions/search.py index ed88635..5b61e8f 100644 --- a/jsonpath/function_extensions/search.py +++ b/jsonpath/function_extensions/search.py @@ -1,6 +1,9 @@ """The standard `search` function extension.""" -import re +try: + import regex as re +except ImportError: + import re # type: ignore from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction diff --git a/jsonpath/parse.py b/jsonpath/parse.py index b0a1725..cd78ecb 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -101,6 +101,7 @@ from .token import TOKEN_TRUE from .token import TOKEN_UNDEFINED from .token import TOKEN_UNION +from .token import TOKEN_WHITESPACE from .token import TOKEN_WILD from .token import Token @@ -295,10 +296,29 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: } def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: - """Parse a JSONPath from a stream of tokens.""" - # TODO: Optionally require TOKEN_ROOT - if stream.current().kind in {TOKEN_ROOT, TOKEN_PSEUDO_ROOT}: + """Parse a JSONPath query from a stream of tokens.""" + if stream.skip_whitespace() and self.env.strict: + raise JSONPathSyntaxError( + "unexpected leading whitespace", token=stream.current() + ) + + if ( + self.env.strict + and len(stream.tokens) + and stream.tokens[-1].kind == TOKEN_WHITESPACE + ): + raise JSONPathSyntaxError( + "unexpected trailing whitespace", token=stream.tokens[-1] + ) + + token = stream.current() + + if token.kind == TOKEN_ROOT or ( + token.kind == TOKEN_PSEUDO_ROOT and not self.env.strict + ): stream.next() + elif self.env.strict: + stream.expect(TOKEN_ROOT) yield from self.parse_query(stream) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 4a64d75..3eb033f 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -130,15 +130,15 @@ def _normalized_index(self, obj: Sequence[object]) -> int: return self.index def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: - # TODO: Optionally try string representation of int - # if isinstance(node.obj, Mapping): - # # Try the string representation of the index as a key. - # with suppress(KeyError): - # match = node.new_child( - # self.env.getitem(node.obj, self._as_key), self.index - # ) - # node.add_child(match) - # yield match + # Optionally try string representation of int + if not self.env.strict and isinstance(node.obj, Mapping): + # Try the string representation of the index as a key. + with suppress(KeyError): + match = node.new_child( + self.env.getitem(node.obj, self._as_key), self.index + ) + node.add_child(match) + yield match if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): norm_index = self._normalized_index(node.obj) with suppress(IndexError): @@ -149,15 +149,14 @@ def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: yield match async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: - # XXX - # if isinstance(node.obj, Mapping): - # # Try the string representation of the index as a key. - # with suppress(KeyError): - # match = node.new_child( - # await self.env.getitem_async(node.obj, self._as_key), self.index - # ) - # node.add_child(match) - # yield match + if not self.env.strict and isinstance(node.obj, Mapping): + # Try the string representation of the index as a key. + with suppress(KeyError): + match = node.new_child( + await self.env.getitem_async(node.obj, self._as_key), self.index + ) + node.add_child(match) + yield match if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): norm_index = self._normalized_index(node.obj) with suppress(IndexError): diff --git a/jsonpath/stream.py b/jsonpath/stream.py index 93ddf93..a703d9c 100644 --- a/jsonpath/stream.py +++ b/jsonpath/stream.py @@ -89,7 +89,9 @@ def expect_peek_not(self, typ: str, message: str) -> None: if self.peek().kind == typ: raise JSONPathSyntaxError(message, token=self.peek()) - def skip_whitespace(self) -> None: + def skip_whitespace(self) -> bool: """Skip whitespace.""" if self.current().kind == TOKEN_WHITESPACE: self.pos += 1 + return True + return False diff --git a/pyproject.toml b/pyproject.toml index 8dc6475..97a2916 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,11 +48,12 @@ include = ["/jsonpath"] dependencies = [ "pytest", "pytest-cov", - "black", "mypy", - "ipython", + "regex", + "iregexp-check", "pyyaml", "types-pyyaml", + "types-regex", "twine", "ruff", ] @@ -78,6 +79,9 @@ dependencies = ["black", "mkdocs", "mkdocstrings[python]", "mkdocs-material"] build = "mkdocs build --clean --strict" serve = "mkdocs serve --dev-addr localhost:8000" +[tool.hatch.envs.no-regex] +dependencies = ["pytest"] + [tool.coverage.run] branch = true parallel = true diff --git a/tests/test_compliance.py b/tests/test_compliance.py index 6430f07..9463571 100644 --- a/tests/test_compliance.py +++ b/tests/test_compliance.py @@ -18,7 +18,9 @@ import pytest -import jsonpath +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathError +from jsonpath import NodeList @dataclass @@ -35,10 +37,6 @@ class Case: SKIP = { - # "basic, no leading whitespace": "flexible whitespace policy", - "basic, no trailing whitespace": "flexible whitespace policy", - # "basic, bald descendant segment": "almost has a consensus", - # "filter, index segment on object, selects nothing": "flexible selector policy", "functions, match, dot matcher on \\u2028": "standard library re policy", "functions, match, dot matcher on \\u2029": "standard library re policy", "functions, search, dot matcher on \\u2028": "standard library re policy", @@ -76,14 +74,6 @@ class Case: "name selector, double quotes, non-surrogate surrogate": "expected behavior policy", "name selector, double quotes, surrogate supplementary": "expected behavior policy", "name selector, double quotes, supplementary surrogate": "expected behavior policy", - # "whitespace, selectors, space between dot and name": "flexible whitespace policy", # noqa: E501 - # "whitespace, selectors, newline between dot and name": "flexible whitespace policy", # noqa: E501 - # "whitespace, selectors, tab between dot and name": "flexible whitespace policy", # noqa: E501 - # "whitespace, selectors, return between dot and name": "flexible whitespace policy", # noqa: E501 - # "whitespace, selectors, space between recursive descent and name": "flexible whitespace policy", # noqa: E501 - # "whitespace, selectors, newline between recursive descent and name": "flexible whitespace policy", # noqa: E501 - # "whitespace, selectors, tab between recursive descent and name": "flexible whitespace policy", # noqa: E501 - # "whitespace, selectors, return between recursive descent and name": "flexible whitespace policy", # noqa: E501 } @@ -101,13 +91,18 @@ def invalid_cases() -> List[Case]: return [case for case in cases() if case.invalid_selector] +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=True) + + @pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) -def test_compliance(case: Case) -> None: +def test_compliance(env: JSONPathEnvironment, case: Case) -> None: if case.name in SKIP: pytest.skip(reason=SKIP[case.name]) assert case.document is not None - nodes = jsonpath.NodeList(jsonpath.finditer(case.selector, case.document)) + nodes = NodeList(env.finditer(case.selector, case.document)) if case.results is not None: assert case.results_paths is not None @@ -120,14 +115,14 @@ def test_compliance(case: Case) -> None: @pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) -def test_compliance_async(case: Case) -> None: +def test_compliance_async(env: JSONPathEnvironment, case: Case) -> None: if case.name in SKIP: pytest.skip(reason=SKIP[case.name]) - async def coro() -> jsonpath.NodeList: + async def coro() -> NodeList: assert case.document is not None - it = await jsonpath.finditer_async(case.selector, case.document) - return jsonpath.NodeList([node async for node in it]) + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) nodes = asyncio.run(coro()) @@ -142,9 +137,9 @@ async def coro() -> jsonpath.NodeList: @pytest.mark.parametrize("case", invalid_cases(), ids=operator.attrgetter("name")) -def test_invalid_selectors(case: Case) -> None: +def test_invalid_selectors(env: JSONPathEnvironment, case: Case) -> None: if case.name in SKIP: pytest.skip(reason=SKIP[case.name]) - with pytest.raises(jsonpath.JSONPathError): - jsonpath.compile(case.selector) + with pytest.raises(JSONPathError): + env.compile(case.selector) diff --git a/tests/test_strictness.py b/tests/test_strictness.py new file mode 100644 index 0000000..6e7480b --- /dev/null +++ b/tests/test_strictness.py @@ -0,0 +1,26 @@ +import pytest + +from jsonpath import JSONPathEnvironment + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +def test_leading_whitespace(env: JSONPathEnvironment) -> None: + query = " $.a" + data = {"a": 1} + assert env.findall(query, data) == [1] + + +def test_trailing_whitespace(env: JSONPathEnvironment) -> None: + query = "$.a " + data = {"a": 1} + assert env.findall(query, data) == [1] + + +def test_index_as_object_name(env: JSONPathEnvironment) -> None: + query = "$.a[0]" + data = {"a": {"0": 1}} + assert env.findall(query, data) == [1] From e98453d2ab6f6536bc6c5d5fd55c911f983a1bf1 Mon Sep 17 00:00:00 2001 From: James Prior Date: Sat, 16 Aug 2025 09:44:43 +0100 Subject: [PATCH 11/29] Add strict lexer rules --- jsonpath/env.py | 31 +++++++++++-- jsonpath/function_extensions/_pattern.py | 31 +++++++++++++ jsonpath/function_extensions/match.py | 7 ++- jsonpath/function_extensions/search.py | 7 ++- jsonpath/lex.py | 58 +++++++++++++++++++++++- tests/test_compliance.py | 35 ++++++++------ 6 files changed, 147 insertions(+), 22 deletions(-) create mode 100644 jsonpath/function_extensions/_pattern.py diff --git a/jsonpath/env.py b/jsonpath/env.py index e723c61..98ff94f 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -2,7 +2,20 @@ from __future__ import annotations -import re +try: + import regex # noqa: F401 + + REGEX_AVAILABLE = True +except ImportError: + REGEX_AVAILABLE = False + +try: + import iregexp_check # noqa: F401 + + IREGEXP_AVAILABLE = True +except ImportError: + IREGEXP_AVAILABLE = False + from decimal import Decimal from operator import getitem from typing import TYPE_CHECKING @@ -90,6 +103,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`. **New in version 0.10.0** strict: When `True`, follow RFC 9535 strictly. **New in version 2.0.0** + ## Class attributes Attributes: @@ -160,10 +174,20 @@ def __init__( """When `True`, follow RFC 9535 strictly. This includes things like enforcing a leading root identifier and - ensuring there's not leading or trailing whitespace when parsing a + ensuring there's no leading or trailing whitespace when parsing a JSONPath query. """ + self.regex_available: bool = REGEX_AVAILABLE + """When `True`, the third party `regex` package is available.""" + + self.iregexp_available: bool = IREGEXP_AVAILABLE + """When `True`, the iregexp_check package is available. + + iregexp_check will be used to validate regular expressions against RFC 9485, + if available. + """ + self.lexer: Lexer = self.lexer_class(env=self) """The lexer bound to this environment.""" @@ -589,7 +613,8 @@ def compare( # noqa: PLR0911 return left in right if operator == "contains" and isinstance(left, (Mapping, Sequence)): return right in left - if operator == "=~" and isinstance(right, re.Pattern) and isinstance(left, str): + if operator == "=~" and hasattr(right, "fullmatch") and isinstance(left, str): + # Right should be a regex.Pattern or an re.Pattern. return bool(right.fullmatch(left)) return False diff --git a/jsonpath/function_extensions/_pattern.py b/jsonpath/function_extensions/_pattern.py new file mode 100644 index 0000000..a42a689 --- /dev/null +++ b/jsonpath/function_extensions/_pattern.py @@ -0,0 +1,31 @@ +from typing import List + + +def map_re(pattern: str) -> str: + escaped = False + char_class = False + parts: List[str] = [] + for ch in pattern: + if escaped: + parts.append(ch) + escaped = False + continue + + if ch == ".": + if not char_class: + parts.append(r"(?:(?![\r\n])\P{Cs}|\p{Cs}\p{Cs})") + else: + parts.append(ch) + elif ch == "\\": + escaped = True + parts.append(ch) + elif ch == "[": + char_class = True + parts.append(ch) + elif ch == "]": + char_class = False + parts.append(ch) + else: + parts.append(ch) + + return "".join(parts) diff --git a/jsonpath/function_extensions/match.py b/jsonpath/function_extensions/match.py index e8e5f2b..cc7069f 100644 --- a/jsonpath/function_extensions/match.py +++ b/jsonpath/function_extensions/match.py @@ -8,6 +8,8 @@ from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction +from ._pattern import map_re + class Match(FilterFunction): """A type-aware implementation of the standard `match` function.""" @@ -18,7 +20,8 @@ class Match(FilterFunction): def __call__(self, string: str, pattern: str) -> bool: """Return `True` if _string_ matches _pattern_, or `False` otherwise.""" try: - # re.fullmatch caches compiled patterns internally - return bool(re.fullmatch(pattern, string)) + # XXX: re.fullmatch caches compiled patterns internally, but `map_re` is not + # cached. + return bool(re.fullmatch(map_re(pattern), string)) except (TypeError, re.error): return False diff --git a/jsonpath/function_extensions/search.py b/jsonpath/function_extensions/search.py index 5b61e8f..c554600 100644 --- a/jsonpath/function_extensions/search.py +++ b/jsonpath/function_extensions/search.py @@ -8,6 +8,8 @@ from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction +from ._pattern import map_re + class Search(FilterFunction): """A type-aware implementation of the standard `search` function.""" @@ -18,7 +20,8 @@ class Search(FilterFunction): def __call__(self, string: str, pattern: str) -> bool: """Return `True` if _string_ contains _pattern_, or `False` otherwise.""" try: - # re.search caches compiled patterns internally - return bool(re.search(pattern, string)) + # XXX: re.search caches compiled patterns internally, but `map_re` is not + # cached. + return bool(re.search(map_re(pattern), string)) except (TypeError, re.error): return False diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 3c3dfb0..f414b34 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -118,7 +118,7 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: # func( self.function_pattern = r"(?P[a-z][a-z_0-9]+)(?P\()" - self.rules = self.compile_rules() + self.rules = self.compile_strict_rules() if env.strict else self.compile_rules() def compile_rules(self) -> Pattern[str]: """Prepare regular expression rules.""" @@ -190,6 +190,62 @@ def compile_rules(self) -> Pattern[str]: re.DOTALL, ) + def compile_strict_rules(self) -> Pattern[str]: + """Prepare regular expression rules in strict mode.""" + env_tokens = [ + (TOKEN_ROOT, self.env.root_token), + (TOKEN_SELF, self.env.self_token), + ] + + rules = [ + (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), + (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), + (TOKEN_DOT_KEY_PROPERTY, self.dot_key_pattern), + (TOKEN_DOT_PROPERTY, self.dot_property_pattern), + (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"), + (TOKEN_INT, r"-?\d+(?P[eE][+\-]?\d+)?\b"), + (TOKEN_DDOT, r"\.\."), + (TOKEN_DOT, r"\."), + (TOKEN_AND, r"&&"), + (TOKEN_OR, r"\|\|"), + *[ + (token, re.escape(pattern)) + for token, pattern in sorted( + env_tokens, key=lambda x: len(x[1]), reverse=True + ) + if pattern + ], + (TOKEN_WILD, r"\*"), + (TOKEN_FILTER, r"\?"), + (TOKEN_TRUE, r"true\b"), + (TOKEN_FALSE, r"false\b"), + (TOKEN_NULL, r"null\b"), + (TOKEN_LBRACKET, r"\["), + (TOKEN_RBRACKET, r"]"), + (TOKEN_COMMA, r","), + (TOKEN_COLON, r":"), + (TOKEN_EQ, r"=="), + (TOKEN_NE, r"!="), + (TOKEN_LG, r"<>"), + (TOKEN_LE, r"<="), + (TOKEN_GE, r">="), + (TOKEN_RE, r"=~"), + (TOKEN_LT, r"<"), + (TOKEN_GT, r">"), + (TOKEN_NOT, r"!"), # Must go after "!=" + (TOKEN_FUNCTION, self.function_pattern), + (TOKEN_NAME, self.key_pattern), # Must go after reserved words + (TOKEN_LPAREN, r"\("), + (TOKEN_RPAREN, r"\)"), + (TOKEN_WHITESPACE, r"[ \n\t\r]+"), + (TOKEN_ERROR, r"."), + ] + + return re.compile( + "|".join(f"(?P<{token}>{pattern})" for token, pattern in rules), + re.DOTALL, + ) + def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 """Generate a sequence of tokens from a JSONPath string.""" _token = partial(Token, path=path) diff --git a/tests/test_compliance.py b/tests/test_compliance.py index 9463571..4308b30 100644 --- a/tests/test_compliance.py +++ b/tests/test_compliance.py @@ -37,17 +37,7 @@ class Case: SKIP = { - "functions, match, dot matcher on \\u2028": "standard library re policy", - "functions, match, dot matcher on \\u2029": "standard library re policy", - "functions, search, dot matcher on \\u2028": "standard library re policy", - "functions, search, dot matcher on \\u2029": "standard library re policy", - "functions, match, filter, match function, unicode char class, uppercase": "\\p not supported", # noqa: E501 - "functions, match, filter, match function, unicode char class negated, uppercase": "\\P not supported", # noqa: E501 - "functions, search, filter, search function, unicode char class, uppercase": "\\p not supported", # noqa: E501 - "functions, search, filter, search function, unicode char class negated, uppercase": "\\P not supported", # noqa: E501 - "filter, equals number, decimal fraction, no fractional digit": "expected behavior policy", # noqa: E501 - "filter, equals number, decimal fraction, no int digit": "expected behavior policy", - "filter, equals number, invalid no int digit": "expected behavior policy", + # "filter, equals number, invalid no int digit": "expected behavior policy", "filter, equals number, invalid 00": "expected behavior policy", "filter, equals number, invalid leading 0": "expected behavior policy", "filter, equals number, invalid no fractional digit": "expected behavior policy", @@ -63,9 +53,9 @@ class Case: "slice selector, step, minus space": "expected behavior policy", "slice selector, step, -0": "expected behavior policy", "slice selector, step, leading -0": "expected behavior policy", - "filter, true, incorrectly capitalized": "flexible literal policy", - "filter, false, incorrectly capitalized": "flexible literal policy", - "filter, null, incorrectly capitalized": "flexible literal policy", + # "filter, true, incorrectly capitalized": "flexible literal policy", + # "filter, false, incorrectly capitalized": "flexible literal policy", + # "filter, null, incorrectly capitalized": "flexible literal policy", "name selector, double quotes, single high surrogate": "expected behavior policy", "name selector, double quotes, single low surrogate": "expected behavior policy", "name selector, double quotes, high high surrogate": "expected behavior policy", @@ -76,6 +66,17 @@ class Case: "name selector, double quotes, supplementary surrogate": "expected behavior policy", } +# CTS test that will only pass if the third party `regex` package is installed. +REGEX_ONLY = { + "functions, match, filter, match function, unicode char class, uppercase", + "functions, match, filter, match function, unicode char class negated, uppercase", + "functions, search, filter, search function, unicode char class, uppercase", + "functions, search, filter, search function, unicode char class negated, uppercase", +} + +# TODO: Test compliance without strict mode. Assert expected failures. +# TODO: Test runner in `no-regexp` env + def cases() -> List[Case]: with open("tests/cts/cts.json", encoding="utf8") as fd: @@ -98,6 +99,9 @@ def env() -> JSONPathEnvironment: @pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) def test_compliance(env: JSONPathEnvironment, case: Case) -> None: + if not env.regex_available and case.name in REGEX_ONLY: + pytest.skip(reason="requires regex package") + if case.name in SKIP: pytest.skip(reason=SKIP[case.name]) @@ -116,6 +120,9 @@ def test_compliance(env: JSONPathEnvironment, case: Case) -> None: @pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) def test_compliance_async(env: JSONPathEnvironment, case: Case) -> None: + if not env.regex_available and case.name in REGEX_ONLY: + pytest.skip(reason="requires regex package") + if case.name in SKIP: pytest.skip(reason=SKIP[case.name]) From 7e09eaffb44240a696966084afef090af06c0df4 Mon Sep 17 00:00:00 2001 From: James Prior Date: Sat, 16 Aug 2025 19:29:48 +0100 Subject: [PATCH 12/29] Test for compliance in strict and lax mode --- .github/workflows/tests-no-regex.yaml | 19 ++++ jsonpath/function_extensions/match.py | 16 ++- jsonpath/function_extensions/search.py | 16 ++- jsonpath/lex.py | 27 ++--- jsonpath/parse.py | 25 ++++- jsonpath/unescape.py | 134 +++++++++++++++++++++++ pyproject.toml | 1 + tests/test_compliance.py | 145 +++++++++++++++---------- 8 files changed, 299 insertions(+), 84 deletions(-) create mode 100644 .github/workflows/tests-no-regex.yaml create mode 100644 jsonpath/unescape.py diff --git a/.github/workflows/tests-no-regex.yaml b/.github/workflows/tests-no-regex.yaml new file mode 100644 index 0000000..8969b45 --- /dev/null +++ b/.github/workflows/tests-no-regex.yaml @@ -0,0 +1,19 @@ +name: test-no-regex +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade hatch + - run: hatch -e no-regex run test diff --git a/jsonpath/function_extensions/match.py b/jsonpath/function_extensions/match.py index cc7069f..696c5dd 100644 --- a/jsonpath/function_extensions/match.py +++ b/jsonpath/function_extensions/match.py @@ -2,9 +2,13 @@ try: import regex as re + + REGEX_AVAILABLE = True except ImportError: import re # type: ignore + REGEX_AVAILABLE = False + from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction @@ -19,9 +23,15 @@ class Match(FilterFunction): def __call__(self, string: str, pattern: str) -> bool: """Return `True` if _string_ matches _pattern_, or `False` otherwise.""" + # XXX: re.fullmatch caches compiled patterns internally, but `map_re` is not + # cached. + if REGEX_AVAILABLE: + try: + pattern = map_re(pattern) + except TypeError: + return False + try: - # XXX: re.fullmatch caches compiled patterns internally, but `map_re` is not - # cached. - return bool(re.fullmatch(map_re(pattern), string)) + return bool(re.fullmatch(pattern, string)) except (TypeError, re.error): return False diff --git a/jsonpath/function_extensions/search.py b/jsonpath/function_extensions/search.py index c554600..8efb8bf 100644 --- a/jsonpath/function_extensions/search.py +++ b/jsonpath/function_extensions/search.py @@ -2,9 +2,13 @@ try: import regex as re + + REGEX_AVAILABLE = True except ImportError: import re # type: ignore + REGEX_AVAILABLE = False + from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction @@ -19,9 +23,15 @@ class Search(FilterFunction): def __call__(self, string: str, pattern: str) -> bool: """Return `True` if _string_ contains _pattern_, or `False` otherwise.""" + # XXX: re.search caches compiled patterns internally, but `map_re` is not + # cached. + if REGEX_AVAILABLE: + try: + pattern = map_re(pattern) + except TypeError: + return False + try: - # XXX: re.search caches compiled patterns internally, but `map_re` is not - # cached. - return bool(re.search(map_re(pattern), string)) + return bool(re.search(pattern, string)) except (TypeError, re.error): return False diff --git a/jsonpath/lex.py b/jsonpath/lex.py index f414b34..2d71d63 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -140,8 +140,11 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_RE_PATTERN, self.re_pattern), (TOKEN_DOT_KEY_PROPERTY, self.dot_key_pattern), (TOKEN_DOT_PROPERTY, self.dot_property_pattern), - (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"), - (TOKEN_INT, r"-?\d+(?P[eE][+\-]?\d+)?\b"), + ( + TOKEN_FLOAT, + r"(:?-?[0-9]+\.[0-9]+(?:[eE][+-]?[0-9]+)?)|(-?[0-9]+[eE]-[0-9]+)", + ), + (TOKEN_INT, r"-?[0-9]+(?:[eE]\+?[0-9]+)?"), (TOKEN_DDOT, r"\.\."), (TOKEN_DOT, r"\."), (TOKEN_AND, self.logical_and_pattern), @@ -202,8 +205,11 @@ def compile_strict_rules(self) -> Pattern[str]: (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), (TOKEN_DOT_KEY_PROPERTY, self.dot_key_pattern), (TOKEN_DOT_PROPERTY, self.dot_property_pattern), - (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"), - (TOKEN_INT, r"-?\d+(?P[eE][+\-]?\d+)?\b"), + ( + TOKEN_FLOAT, + r"(:?-?[0-9]+\.[0-9]+(?:[eE][+-]?[0-9]+)?)|(-?[0-9]+[eE]-[0-9]+)", + ), + (TOKEN_INT, r"-?[0-9]+(?:[eE]\+?[0-9]+)?"), (TOKEN_DDOT, r"\.\."), (TOKEN_DOT, r"\."), (TOKEN_AND, r"&&"), @@ -288,19 +294,6 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 value=match.group("G_SQUOTE"), index=match.start("G_SQUOTE"), ) - elif kind == TOKEN_INT: - if match.group("G_EXP") and match.group("G_EXP")[1] == "-": - yield _token( - kind=TOKEN_FLOAT, - value=match.group(), - index=match.start(), - ) - else: - yield _token( - kind=TOKEN_INT, - value=match.group(), - index=match.start(), - ) elif kind == TOKEN_RE_PATTERN: yield _token( kind=TOKEN_RE_PATTERN, diff --git a/jsonpath/parse.py b/jsonpath/parse.py index cd78ecb..039c6e6 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -104,6 +104,7 @@ from .token import TOKEN_WHITESPACE from .token import TOKEN_WILD from .token import Token +from .unescape import unescape_string if TYPE_CHECKING: from .env import JSONPathEnvironment @@ -623,11 +624,23 @@ def parse_string_literal(self, stream: TokenStream) -> BaseExpression: return StringLiteral(value=self._decode_string_literal(stream.next())) def parse_integer_literal(self, stream: TokenStream) -> BaseExpression: + token = stream.next() + value = token.value + + if self.env.strict and value.startswith("0") and len(value) > 1: + raise JSONPathSyntaxError("invalid integer literal", token=token) + # Convert to float first to handle scientific notation. - return IntegerLiteral(value=int(float(stream.next().value))) + return IntegerLiteral(value=int(float(value))) def parse_float_literal(self, stream: TokenStream) -> BaseExpression: - return FloatLiteral(value=float(stream.next().value)) + token = stream.next() + value = token.value + + if value.startswith("0") and len(value.split(".")[0]) > 1: + raise JSONPathSyntaxError("invalid float literal", token=token) + + return FloatLiteral(value=float(value)) def parse_prefix_expression(self, stream: TokenStream) -> BaseExpression: token = stream.next() @@ -839,11 +852,19 @@ def parse_filter_expression( return left def _decode_string_literal(self, token: Token) -> str: + if self.env.strict: + return unescape_string( + token.value, + token, + "'" if token.kind == TOKEN_SINGLE_QUOTE_STRING else '"', + ) + if self.env.unicode_escape: if token.kind == TOKEN_SINGLE_QUOTE_STRING: value = token.value.replace('"', '\\"').replace("\\'", "'") else: value = token.value + try: rv = json.loads(f'"{value}"') assert isinstance(rv, str) diff --git a/jsonpath/unescape.py b/jsonpath/unescape.py new file mode 100644 index 0000000..584e3c2 --- /dev/null +++ b/jsonpath/unescape.py @@ -0,0 +1,134 @@ +r"""Replace `\uXXXX` escape sequences with Unicode code points.""" + +from typing import List +from typing import Tuple + +from .exceptions import JSONPathSyntaxError +from .token import Token + + +def unescape_string(value: str, token: Token, quote: str) -> str: + """Return `value` with escape sequences replaced with Unicode code points.""" + unescaped: List[str] = [] + index = 0 + + while index < len(value): + ch = value[index] + if ch == "\\": + index += 1 + _ch, index = _decode_escape_sequence(value, index, token, quote) + unescaped.append(_ch) + else: + _string_from_codepoint(ord(ch), token) + unescaped.append(ch) + index += 1 + return "".join(unescaped) + + +def _decode_escape_sequence( # noqa: PLR0911 + value: str, index: int, token: Token, quote: str +) -> Tuple[str, int]: + try: + ch = value[index] + except IndexError as err: + raise JSONPathSyntaxError("incomplete escape sequence", token=token) from err + + if ch == quote: + return quote, index + if ch == "\\": + return "\\", index + if ch == "/": + return "/", index + if ch == "b": + return "\x08", index + if ch == "f": + return "\x0c", index + if ch == "n": + return "\n", index + if ch == "r": + return "\r", index + if ch == "t": + return "\t", index + if ch == "u": + codepoint, index = _decode_hex_char(value, index, token) + return _string_from_codepoint(codepoint, token), index + + raise JSONPathSyntaxError( + f"unknown escape sequence at index {token.index + index - 1}", + token=token, + ) + + +def _decode_hex_char(value: str, index: int, token: Token) -> Tuple[int, int]: + length = len(value) + + if index + 4 >= length: + raise JSONPathSyntaxError( + f"incomplete escape sequence at index {token.index + index - 1}", + token=token, + ) + + index += 1 # move past 'u' + codepoint = _parse_hex_digits(value[index : index + 4], token) + + if _is_low_surrogate(codepoint): + raise JSONPathSyntaxError( + f"unexpected low surrogate at index {token.index + index - 1}", + token=token, + ) + + if _is_high_surrogate(codepoint): + # expect a surrogate pair + if not ( + index + 9 < length and value[index + 4] == "\\" and value[index + 5] == "u" + ): + raise JSONPathSyntaxError( + f"incomplete escape sequence at index {token.index + index - 2}", + token=token, + ) + + low_surrogate = _parse_hex_digits(value[index + 6 : index + 10], token) + + if not _is_low_surrogate(low_surrogate): + raise JSONPathSyntaxError( + f"unexpected codepoint at index {token.index + index + 4}", + token=token, + ) + + codepoint = 0x10000 + (((codepoint & 0x03FF) << 10) | (low_surrogate & 0x03FF)) + + return (codepoint, index + 9) + + return (codepoint, index + 3) + + +def _parse_hex_digits(digits: str, token: Token) -> int: + codepoint = 0 + for digit in digits.encode(): + codepoint <<= 4 + if digit >= 48 and digit <= 57: + codepoint |= digit - 48 + elif digit >= 65 and digit <= 70: + codepoint |= digit - 65 + 10 + elif digit >= 97 and digit <= 102: + codepoint |= digit - 97 + 10 + else: + raise JSONPathSyntaxError( + "invalid \\uXXXX escape sequence", + token=token, + ) + return codepoint + + +def _string_from_codepoint(codepoint: int, token: Token) -> str: + if codepoint <= 0x1F: + raise JSONPathSyntaxError("invalid character", token=token) + return chr(codepoint) + + +def _is_high_surrogate(codepoint: int) -> bool: + return codepoint >= 0xD800 and codepoint <= 0xDBFF + + +def _is_low_surrogate(codepoint: int) -> bool: + return codepoint >= 0xDC00 and codepoint <= 0xDFFF diff --git a/pyproject.toml b/pyproject.toml index 97a2916..ca89351 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -192,4 +192,5 @@ convention = "google" "jsonpath/__init__.py" = ["D104"] "jsonpath/selectors.py" = ["D102"] "jsonpath/filter.py" = ["D102", "PLW1641"] +"jsonpath/unescape.py" = ["PLR2004"] "tests/*" = ["D100", "D101", "D104", "D103"] diff --git a/tests/test_compliance.py b/tests/test_compliance.py index 4308b30..77f81f7 100644 --- a/tests/test_compliance.py +++ b/tests/test_compliance.py @@ -22,6 +22,43 @@ from jsonpath import JSONPathError from jsonpath import NodeList +# CTS tests that are expected to fail when JSONPathEnvironment.strict is False. +XFAIL_INVALID = { + "basic, no leading whitespace", + "basic, no trailing whitespace", + "filter, equals number, invalid 00", + "filter, equals number, invalid leading 0", + "filter, true, incorrectly capitalized", + "filter, false, incorrectly capitalized", + "filter, null, incorrectly capitalized", + "name selector, double quotes, single high surrogate", + "name selector, double quotes, single low surrogate", + "name selector, double quotes, high high surrogate", + "name selector, double quotes, low low surrogate", + "name selector, double quotes, surrogate non-surrogate", + "name selector, double quotes, non-surrogate surrogate", + "name selector, double quotes, surrogate supplementary", + "name selector, double quotes, supplementary surrogate", +} + +XFAIL_VALID = { + "filter, index segment on object, selects nothing", +} + +# CTS test that will only pass if the third party `regex` package is installed. +REGEX_ONLY = { + "functions, match, dot matcher on \\u2028", + "functions, match, dot matcher on \\u2029", + "functions, search, dot matcher on \\u2028", + "functions, search, dot matcher on \\u2029", + "functions, match, filter, match function, unicode char class, uppercase", + "functions, match, filter, match function, unicode char class negated, uppercase", + "functions, search, filter, search function, unicode char class, uppercase", + "functions, search, filter, search function, unicode char class negated, uppercase", +} + +# TODO: Test runner in `no-regexp` env + @dataclass class Case: @@ -36,60 +73,18 @@ class Case: tags: List[str] = field(default_factory=list) -SKIP = { - # "filter, equals number, invalid no int digit": "expected behavior policy", - "filter, equals number, invalid 00": "expected behavior policy", - "filter, equals number, invalid leading 0": "expected behavior policy", - "filter, equals number, invalid no fractional digit": "expected behavior policy", - "filter, equals number, invalid no fractional digit e": "expected behavior policy", - "slice selector, start, leading 0": "expected behavior policy", - "slice selector, start, -0": "expected behavior policy", - "slice selector, start, leading -0": "expected behavior policy", - "slice selector, end, leading 0": "expected behavior policy", - "slice selector, end, minus space": "expected behavior policy", - "slice selector, end, -0": "expected behavior policy", - "slice selector, end, leading -0": "expected behavior policy", - "slice selector, step, leading 0": "expected behavior policy", - "slice selector, step, minus space": "expected behavior policy", - "slice selector, step, -0": "expected behavior policy", - "slice selector, step, leading -0": "expected behavior policy", - # "filter, true, incorrectly capitalized": "flexible literal policy", - # "filter, false, incorrectly capitalized": "flexible literal policy", - # "filter, null, incorrectly capitalized": "flexible literal policy", - "name selector, double quotes, single high surrogate": "expected behavior policy", - "name selector, double quotes, single low surrogate": "expected behavior policy", - "name selector, double quotes, high high surrogate": "expected behavior policy", - "name selector, double quotes, low low surrogate": "expected behavior policy", - "name selector, double quotes, surrogate non-surrogate": "expected behavior policy", - "name selector, double quotes, non-surrogate surrogate": "expected behavior policy", - "name selector, double quotes, surrogate supplementary": "expected behavior policy", - "name selector, double quotes, supplementary surrogate": "expected behavior policy", -} - -# CTS test that will only pass if the third party `regex` package is installed. -REGEX_ONLY = { - "functions, match, filter, match function, unicode char class, uppercase", - "functions, match, filter, match function, unicode char class negated, uppercase", - "functions, search, filter, search function, unicode char class, uppercase", - "functions, search, filter, search function, unicode char class negated, uppercase", -} - -# TODO: Test compliance without strict mode. Assert expected failures. -# TODO: Test runner in `no-regexp` env - +with open("tests/cts/cts.json", encoding="utf8") as fd: + data = json.load(fd) -def cases() -> List[Case]: - with open("tests/cts/cts.json", encoding="utf8") as fd: - data = json.load(fd) - return [Case(**case) for case in data["tests"]] +CASES = [Case(**case) for case in data["tests"]] def valid_cases() -> List[Case]: - return [case for case in cases() if not case.invalid_selector] + return [case for case in CASES if not case.invalid_selector] def invalid_cases() -> List[Case]: - return [case for case in cases() if case.invalid_selector] + return [case for case in CASES if case.invalid_selector] @pytest.fixture() @@ -98,13 +93,10 @@ def env() -> JSONPathEnvironment: @pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) -def test_compliance(env: JSONPathEnvironment, case: Case) -> None: +def test_compliance_strict(env: JSONPathEnvironment, case: Case) -> None: if not env.regex_available and case.name in REGEX_ONLY: pytest.skip(reason="requires regex package") - if case.name in SKIP: - pytest.skip(reason=SKIP[case.name]) - assert case.document is not None nodes = NodeList(env.finditer(case.selector, case.document)) @@ -119,13 +111,10 @@ def test_compliance(env: JSONPathEnvironment, case: Case) -> None: @pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) -def test_compliance_async(env: JSONPathEnvironment, case: Case) -> None: +def test_compliance_async_strict(env: JSONPathEnvironment, case: Case) -> None: if not env.regex_available and case.name in REGEX_ONLY: pytest.skip(reason="requires regex package") - if case.name in SKIP: - pytest.skip(reason=SKIP[case.name]) - async def coro() -> NodeList: assert case.document is not None it = await env.finditer_async(case.selector, case.document) @@ -144,9 +133,47 @@ async def coro() -> NodeList: @pytest.mark.parametrize("case", invalid_cases(), ids=operator.attrgetter("name")) -def test_invalid_selectors(env: JSONPathEnvironment, case: Case) -> None: - if case.name in SKIP: - pytest.skip(reason=SKIP[case.name]) - +def test_invalid_selectors_strict(env: JSONPathEnvironment, case: Case) -> None: with pytest.raises(JSONPathError): env.compile(case.selector) + + +@pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) +def test_compliance_lax(case: Case) -> None: + env = JSONPathEnvironment(strict=False) + + if not env.regex_available and case.name in REGEX_ONLY: + pytest.skip(reason="requires regex package") + + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + + if case.results is not None: + assert case.results_paths is not None + + if case.name in XFAIL_VALID: + assert nodes.values() not in case.results + assert nodes.paths() in case.results_paths + else: + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + + if case.name in XFAIL_VALID: + assert nodes.values() != case.result + assert nodes.paths() != case.result_paths + else: + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths + + +@pytest.mark.parametrize("case", invalid_cases(), ids=operator.attrgetter("name")) +def test_invalid_selectors_lax(case: Case) -> None: + env = JSONPathEnvironment(strict=False) + + if case.name in XFAIL_INVALID: + env.compile(case.selector) + else: + with pytest.raises(JSONPathError): + env.compile(case.selector) From 6e1f3b762bede985f94bce02f0fbf5ffea5d6e15 Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 18 Aug 2025 08:33:53 +0100 Subject: [PATCH 13/29] Separate test cases for non-standard sytnax in to JSON files WIP --- jsonpath/lex.py | 1 - tests/_cts_case.py | 47 +++++++ tests/current_key_identifier.json | 61 +++++++++ tests/key_selector.json | 98 +++++++++++++++ tests/keys_filter_selector.json | 48 ++++++++ tests/keys_selector.json | 78 ++++++++++++ tests/membership_operators.json | 0 tests/pseudo_root_identifier.json | 0 tests/query_intersection.json | 0 tests/query_union.json | 0 tests/regex_operator.json | 0 tests/singular_path_selector.json | 88 +++++++++++++ tests/test_compliance.py | 25 +--- tests/test_current_key_identifier.py | 33 +++++ tests/test_find.py | 2 + tests/test_find_extra.py | 175 -------------------------- tests/test_find_extra_examples.py | 178 --------------------------- tests/test_key_selector.py | 33 +++++ tests/test_keys_filter_selector.py | 33 +++++ tests/test_keys_selector.py | 33 +++++ tests/test_singular_path_selector.py | 33 +++++ 21 files changed, 589 insertions(+), 377 deletions(-) create mode 100644 tests/_cts_case.py create mode 100644 tests/current_key_identifier.json create mode 100644 tests/key_selector.json create mode 100644 tests/keys_filter_selector.json create mode 100644 tests/keys_selector.json create mode 100644 tests/membership_operators.json create mode 100644 tests/pseudo_root_identifier.json create mode 100644 tests/query_intersection.json create mode 100644 tests/query_union.json create mode 100644 tests/regex_operator.json create mode 100644 tests/singular_path_selector.json create mode 100644 tests/test_current_key_identifier.py delete mode 100644 tests/test_find_extra.py delete mode 100644 tests/test_find_extra_examples.py create mode 100644 tests/test_key_selector.py create mode 100644 tests/test_keys_filter_selector.py create mode 100644 tests/test_keys_selector.py create mode 100644 tests/test_singular_path_selector.py diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 2d71d63..d9a470b 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -235,7 +235,6 @@ def compile_strict_rules(self) -> Pattern[str]: (TOKEN_LG, r"<>"), (TOKEN_LE, r"<="), (TOKEN_GE, r">="), - (TOKEN_RE, r"=~"), (TOKEN_LT, r"<"), (TOKEN_GT, r">"), (TOKEN_NOT, r"!"), # Must go after "!=" diff --git a/tests/_cts_case.py b/tests/_cts_case.py new file mode 100644 index 0000000..dd4a55b --- /dev/null +++ b/tests/_cts_case.py @@ -0,0 +1,47 @@ +"""A dataclass for a test case suitable for the CTS JSON schema.""" + +from dataclasses import dataclass +from dataclasses import field +from typing import Any +from typing import Dict +from typing import List +from typing import Mapping +from typing import Optional +from typing import Sequence +from typing import Union + + +@dataclass +class Case: + name: str + selector: str + document: Union[Mapping[str, Any], Sequence[Any], None] = None + result: Any = None + results: Optional[List[Any]] = None + result_paths: Optional[List[str]] = None + results_paths: Optional[List[List[str]]] = None + invalid_selector: Optional[bool] = None + tags: List[str] = field(default_factory=list) + + def as_dict(self) -> Dict[str, Any]: + rv: Dict[str, Any] = { + "name": self.name, + "selector": self.selector, + } + + if self.document is not None: + rv["document"] = self.document + + if self.result is not None: + rv["result"] = self.result + rv["result_paths"] = self.result_paths + else: + rv["results"] = self.results + rv["results_paths"] = self.results_paths + else: + assert self.invalid_selector + rv["invalid_selector"] = True + + rv["tags"] = self.tags + + return rv diff --git a/tests/current_key_identifier.json b/tests/current_key_identifier.json new file mode 100644 index 0000000..955e4cd --- /dev/null +++ b/tests/current_key_identifier.json @@ -0,0 +1,61 @@ +{ + "tests": [ + { + "name": "current key of an object", + "selector": "$.some[?match(#, '^b.*')]", + "document": { + "some": { "foo": "a", "bar": "b", "baz": "c", "qux": "d" } + }, + "result": ["b", "c"], + "result_paths": ["$['some']['bar']", "$['some']['baz']"] + }, + { + "name": "current key of an array", + "selector": "$.some[?# > 1]", + "document": { "some": ["other", "thing", "foo", "bar"] }, + "result": ["foo", "bar"], + "result_paths": ["$['some'][2]", "$['some'][3]"] + }, + { + "name": "current key of a string selects nothing", + "selector": "$.some[?# > 1]", + "document": { "some": "thing" }, + "result": [], + "result_paths": [] + }, + { + "name": "current key of an object", + "selector": "$.some[?match(#, '^b.*')]", + "document": { + "some": { "foo": "a", "bar": "b", "baz": "c", "qux": "d" } + }, + "result": ["b", "c"], + "result_paths": ["$['some']['bar']", "$['some']['baz']"], + "tags": ["extra"] + }, + { + "name": "current key of an array", + "selector": "$.some[?# > 1]", + "document": { "some": ["other", "thing", "foo", "bar"] }, + "result": ["foo", "bar"], + "result_paths": ["$['some'][2]", "$['some'][3]"], + "tags": ["extra"] + }, + { + "name": "current key identifier, match on object names", + "selector": "$[?match(#, '^ab.*') && length(@) > 0 ]", + "document": { "abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": [] }, + "result": [[1, 2, 3], [6]], + "result_paths": ["$['abc']", "$['abx']"], + "tags": ["extra"] + }, + { + "name": "current key identifier, compare current array index", + "selector": "$.abc[?(# >= 1)]", + "document": { "abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": [] }, + "result": [2, 3], + "result_paths": ["$['abc'][1]", "$['abc'][2]"], + "tags": ["extra"] + } + ] +} diff --git a/tests/key_selector.json b/tests/key_selector.json new file mode 100644 index 0000000..77ef007 --- /dev/null +++ b/tests/key_selector.json @@ -0,0 +1,98 @@ +{ + "tests": [ + { + "name": "singular key from an object", + "selector": "$.some[~'other']", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": ["other"], + "result_paths": ["$['some'][~'other']"], + "tags": ["extra"] + }, + { + "name": "singular key from an object, does not exist", + "selector": "$.some[~'else']", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "singular key from an array", + "selector": "$.some[~'1']", + "document": { "some": ["foo", "bar"] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "singular key from an object, shorthand", + "selector": "$.some.~other", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": ["other"], + "result_paths": ["$['some'][~'other']"], + "tags": ["extra"] + }, + { + "name": "recursive key from an object", + "selector": "$.some..[~'other']", + "document": { + "some": { "other": "foo", "thing": "bar", "else": { "other": "baz" } } + }, + "result": ["other", "other"], + "result_paths": ["$['some'][~'other']", "$['some']['else'][~'other']"], + "tags": ["extra"] + }, + { + "name": "recursive key from an object, shorthand", + "selector": "$.some..~other", + "document": { + "some": { "other": "foo", "thing": "bar", "else": { "other": "baz" } } + }, + "result": ["other", "other"], + "result_paths": ["$['some'][~'other']", "$['some']['else'][~'other']"], + "tags": ["extra"] + }, + { + "name": "recursive key from an object, does not exist", + "selector": "$.some..[~'nosuchthing']", + "document": { + "some": { "other": "foo", "thing": "bar", "else": { "other": "baz" } } + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "key of nested object", + "selector": "$.a[0].~c", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": ["c"], + "result_paths": ["$['a'][0][~'c']"], + "tags": ["extra"] + }, + { + "name": "key does not exist", + "selector": "$.a[1].~c", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "descendant, single quoted key", + "selector": "$..[~'b']", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": ["b", "b"], + "result_paths": ["$['a'][0][~'b']", "$['a'][1][~'b']"], + "tags": ["extra"] + }, + { + "name": "descendant, double quoted key", + "selector": "$..[~\"b\"]", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": ["b", "b"], + "result_paths": ["$['a'][0][~'b']", "$['a'][1][~'b']"], + "tags": ["extra"] + } + ] +} diff --git a/tests/keys_filter_selector.json b/tests/keys_filter_selector.json new file mode 100644 index 0000000..3132b22 --- /dev/null +++ b/tests/keys_filter_selector.json @@ -0,0 +1,48 @@ +{ + "tests": [ + { + "name": "filter keys from an object", + "selector": "$.some[~?match(@, '^b.*')]", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": ["thing"], + "result_paths": ["$['some'][~'thing']"], + "tags": ["extra"] + }, + { + "name": "keys filter selector, conditionally select object keys", + "selector": "$.*[~?length(@) > 2]", + "document": [ + { "a": [1, 2, 3], "b": [4, 5] }, + { "c": { "x": [1, 2] } }, + { "d": [1, 2, 3] } + ], + "result": ["a", "d"], + "result_paths": ["$[0][~'a']", "$[2][~'d']"], + "tags": ["extra"] + }, + { + "name": "keys filter selector, existence test", + "selector": "$.*[~?@.x]", + "document": [ + { "a": [1, 2, 3], "b": [4, 5] }, + { "c": { "x": [1, 2] } }, + { "d": [1, 2, 3] } + ], + "result": ["c"], + "result_paths": ["$[1][~'c']"], + "tags": ["extra"] + }, + { + "name": "keys filter selector, keys from an array", + "selector": "$[~?(true == true)]", + "document": [ + { "a": [1, 2, 3], "b": [4, 5] }, + { "c": { "x": [1, 2] } }, + { "d": [1, 2, 3] } + ], + "result": [], + "result_paths": [], + "tags": ["extra"] + } + ] +} diff --git a/tests/keys_selector.json b/tests/keys_selector.json new file mode 100644 index 0000000..c7461e1 --- /dev/null +++ b/tests/keys_selector.json @@ -0,0 +1,78 @@ +{ + "tests": [ + { + "name": "keys from an object", + "selector": "$.some[~]", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": ["other", "thing"], + "result_paths": ["$['some'][~'other']", "$['some'][~'thing']"], + "tags": ["extra"] + }, + { + "name": "shorthand keys from an object", + "selector": "$.some.~", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": ["other", "thing"], + "result_paths": ["$['some'][~'other']", "$['some'][~'thing']"], + "tags": ["extra"] + }, + { + "name": "keys from an array", + "selector": "$.some[~]", + "document": { "some": ["other", "thing"] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "shorthand keys from an array", + "selector": "$.some.~", + "document": { "some": ["other", "thing"] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "recurse object keys", + "selector": "$..~", + "document": { "some": { "thing": "else", "foo": { "bar": "baz" } } }, + "result": ["some", "thing", "foo", "bar"], + "result_paths": [ + "$[~'some']", + "$['some'][~'thing']", + "$['some'][~'foo']", + "$['some']['foo'][~'bar']" + ], + "tags": ["extra"] + }, + { + "name": "keys selector, object key", + "selector": "$.a[0].~", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": ["b", "c"], + "result_paths": ["$['a'][0][~'b']", "$['a'][0][~'c']"], + "tags": ["extra"] + }, + { + "name": "keys selector, array key", + "selector": "$.a.~", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "keys selector, descendant keys", + "selector": "$..[~]", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": ["a", "b", "c", "b"], + "result_paths": [ + "$[~'a']", + "$['a'][0][~'b']", + "$['a'][0][~'c']", + "$['a'][1][~'b']" + ], + "tags": ["extra"] + } + ] +} diff --git a/tests/membership_operators.json b/tests/membership_operators.json new file mode 100644 index 0000000..e69de29 diff --git a/tests/pseudo_root_identifier.json b/tests/pseudo_root_identifier.json new file mode 100644 index 0000000..e69de29 diff --git a/tests/query_intersection.json b/tests/query_intersection.json new file mode 100644 index 0000000..e69de29 diff --git a/tests/query_union.json b/tests/query_union.json new file mode 100644 index 0000000..e69de29 diff --git a/tests/regex_operator.json b/tests/regex_operator.json new file mode 100644 index 0000000..e69de29 diff --git a/tests/singular_path_selector.json b/tests/singular_path_selector.json new file mode 100644 index 0000000..ba0393e --- /dev/null +++ b/tests/singular_path_selector.json @@ -0,0 +1,88 @@ +{ + "tests": [ + { + "name": "object name from embedded singular query", + "selector": "$.a[$.b[1]]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] } }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [{ "q": [4, 5, 6] }], + "result_paths": ["$['a']['p']"], + "tags": ["extra"] + }, + { + "name": "array index from embedded singular query", + "selector": "$.a.j[$['c d'].x.y]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] } }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [2], + "result_paths": ["$['a']['j'][1]"], + "tags": ["extra"] + }, + { + "name": "embedded singular query does not resolve to a string or int value", + "selector": "$.a[$.b]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] } }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "object name from embedded singular query resolving to nothing", + "selector": "$.a[$.foo]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] } }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "array index from embedded singular query resolving to nothing", + "selector": "$.b[$.foo]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] } }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "array index from embedded singular query is not an int", + "selector": "$.b[$.a.z]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] }, "z": "foo" }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "array index from embedded singular query is negative", + "selector": "$.b[$.a.z]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] }, "z": -1 }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": ["q"], + "result_paths": ["$['b'][2]"], + "tags": ["extra"] + } + ] +} diff --git a/tests/test_compliance.py b/tests/test_compliance.py index 77f81f7..7602f8b 100644 --- a/tests/test_compliance.py +++ b/tests/test_compliance.py @@ -7,14 +7,7 @@ import asyncio import json import operator -from dataclasses import dataclass -from dataclasses import field -from typing import Any from typing import List -from typing import Mapping -from typing import Optional -from typing import Sequence -from typing import Union import pytest @@ -22,6 +15,8 @@ from jsonpath import JSONPathError from jsonpath import NodeList +from ._cts_case import Case + # CTS tests that are expected to fail when JSONPathEnvironment.strict is False. XFAIL_INVALID = { "basic, no leading whitespace", @@ -57,22 +52,6 @@ "functions, search, filter, search function, unicode char class negated, uppercase", } -# TODO: Test runner in `no-regexp` env - - -@dataclass -class Case: - name: str - selector: str - document: Union[Mapping[str, Any], Sequence[Any], None] = None - result: Any = None - results: Optional[List[Any]] = None - result_paths: Optional[List[str]] = None - results_paths: Optional[List[List[str]]] = None - invalid_selector: Optional[bool] = None - tags: List[str] = field(default_factory=list) - - with open("tests/cts/cts.json", encoding="utf8") as fd: data = json.load(fd) diff --git a/tests/test_current_key_identifier.py b/tests/test_current_key_identifier.py new file mode 100644 index 0000000..112b738 --- /dev/null +++ b/tests/test_current_key_identifier.py @@ -0,0 +1,33 @@ +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/current_key_identifier.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_current_key_identifier(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths diff --git a/tests/test_find.py b/tests/test_find.py index b564103..310d5b1 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -11,6 +11,8 @@ from jsonpath import JSONPathEnvironment +# TODO: move the rest of these test cases and delete me + @dataclasses.dataclass class Case: diff --git a/tests/test_find_extra.py b/tests/test_find_extra.py deleted file mode 100644 index 69074ce..0000000 --- a/tests/test_find_extra.py +++ /dev/null @@ -1,175 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: Union[Sequence[Any], Mapping[str, Any]] - - -TEST_CASES = [ - Case( - description="keys from an object", - path="$.some[~]", - data={"some": {"other": "foo", "thing": "bar"}}, - want=["other", "thing"], - ), - Case( - description="shorthand keys from an object", - path="$.some.~", - data={"some": {"other": "foo", "thing": "bar"}}, - want=["other", "thing"], - ), - Case( - description="keys from an array", - path="$.some[~]", - data={"some": ["other", "thing"]}, - want=[], - ), - Case( - description="shorthand keys from an array", - path="$.some.~", - data={"some": ["other", "thing"]}, - want=[], - ), - Case( - description="recurse object keys", - path="$..~", - data={"some": {"thing": "else", "foo": {"bar": "baz"}}}, - want=["some", "thing", "foo", "bar"], - ), - Case( - description="current key of an object", - path="$.some[?match(#, '^b.*')]", - data={"some": {"foo": "a", "bar": "b", "baz": "c", "qux": "d"}}, - want=["b", "c"], - ), - Case( - description="current key of an array", - path="$.some[?# > 1]", - data={"some": ["other", "thing", "foo", "bar"]}, - want=["foo", "bar"], - ), - Case( - description="filter keys from an object", - path="$.some[~?match(@, '^b.*')]", - data={"some": {"other": "foo", "thing": "bar"}}, - want=["thing"], - ), - Case( - description="singular key from an object", - path="$.some[~'other']", - data={"some": {"other": "foo", "thing": "bar"}}, - want=["other"], - ), - Case( - description="singular key from an object, does not exist", - path="$.some[~'else']", - data={"some": {"other": "foo", "thing": "bar"}}, - want=[], - ), - Case( - description="singular key from an array", - path="$.some[~'1']", - data={"some": ["foo", "bar"]}, - want=[], - ), - Case( - description="singular key from an object, shorthand", - path="$.some.~other", - data={"some": {"other": "foo", "thing": "bar"}}, - want=["other"], - ), - Case( - description="recursive key from an object", - path="$.some..[~'other']", - data={"some": {"other": "foo", "thing": "bar", "else": {"other": "baz"}}}, - want=["other", "other"], - ), - Case( - description="recursive key from an object, shorthand", - path="$.some..~other", - data={"some": {"other": "foo", "thing": "bar", "else": {"other": "baz"}}}, - want=["other", "other"], - ), - Case( - description="recursive key from an object, does not exist", - path="$.some..[~'nosuchthing']", - data={"some": {"other": "foo", "thing": "bar", "else": {"other": "baz"}}}, - want=[], - ), - Case( - description="object name from embedded singular query resolving to nothing", - path="$.a[$.foo]", - data={ - "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, - "b": ["j", "p", "q"], - "c d": {"x": {"y": 1}}, - }, - want=[], - ), - Case( - description="array index from embedded singular query resolving to nothing", - path="$.b[$.foo]", - data={ - "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, - "b": ["j", "p", "q"], - "c d": {"x": {"y": 1}}, - }, - want=[], - ), - Case( - description="array index from embedded singular query is not an int", - path="$.b[$.a.z]", - data={ - "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}, "z": "foo"}, - "b": ["j", "p", "q"], - "c d": {"x": {"y": 1}}, - }, - want=[], - ), - Case( - description="array index from embedded singular query is negative", - path="$.b[$.a.z]", - data={ - "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}, "z": -1}, - "b": ["j", "p", "q"], - "c d": {"x": {"y": 1}}, - }, - want=["q"], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_extra(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - assert path.findall(case.data) == case.want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_extra_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[object]: - return await path.findall_async(case.data) - - assert asyncio.run(coro()) == case.want diff --git a/tests/test_find_extra_examples.py b/tests/test_find_extra_examples.py deleted file mode 100644 index b43b967..0000000 --- a/tests/test_find_extra_examples.py +++ /dev/null @@ -1,178 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: Union[Sequence[Any], Mapping[str, Any]] - want_paths: List[str] - - -TEST_CASES = [ - Case( - description="key selector, key of nested object", - path="$.a[0].~c", - data={ - "a": [{"b": "x", "c": "z"}, {"b": "y"}], - }, - want=["c"], - want_paths=["$['a'][0][~'c']"], - ), - Case( - description="key selector, key does not exist", - path="$.a[1].~c", - data={ - "a": [{"b": "x", "c": "z"}, {"b": "y"}], - }, - want=[], - want_paths=[], - ), - Case( - description="key selector, descendant, single quoted key", - path="$..[~'b']", - data={ - "a": [{"b": "x", "c": "z"}, {"b": "y"}], - }, - want=["b", "b"], - want_paths=["$['a'][0][~'b']", "$['a'][1][~'b']"], - ), - Case( - description="key selector, descendant, double quoted key", - path='$..[~"b"]', - data={ - "a": [{"b": "x", "c": "z"}, {"b": "y"}], - }, - want=["b", "b"], - want_paths=["$['a'][0][~'b']", "$['a'][1][~'b']"], - ), - Case( - description="keys selector, object key", - path="$.a[0].~", - data={ - "a": [{"b": "x", "c": "z"}, {"b": "y"}], - }, - want=["b", "c"], - want_paths=["$['a'][0][~'b']", "$['a'][0][~'c']"], - ), - Case( - description="keys selector, array key", - path="$.a.~", - data={ - "a": [{"b": "x", "c": "z"}, {"b": "y"}], - }, - want=[], - want_paths=[], - ), - Case( - description="keys selector, descendant keys", - path="$..[~]", - data={ - "a": [{"b": "x", "c": "z"}, {"b": "y"}], - }, - want=["a", "b", "c", "b"], - want_paths=["$[~'a']", "$['a'][0][~'b']", "$['a'][0][~'c']", "$['a'][1][~'b']"], - ), - Case( - description="keys filter selector, conditionally select object keys", - path="$.*[~?length(@) > 2]", - data=[{"a": [1, 2, 3], "b": [4, 5]}, {"c": {"x": [1, 2]}}, {"d": [1, 2, 3]}], - want=["a", "d"], - want_paths=["$[0][~'a']", "$[2][~'d']"], - ), - Case( - description="keys filter selector, existence test", - path="$.*[~?@.x]", - data=[{"a": [1, 2, 3], "b": [4, 5]}, {"c": {"x": [1, 2]}}, {"d": [1, 2, 3]}], - want=["c"], - want_paths=["$[1][~'c']"], - ), - Case( - description="keys filter selector, keys from an array", - path="$[~?(true == true)]", - data=[{"a": [1, 2, 3], "b": [4, 5]}, {"c": {"x": [1, 2]}}, {"d": [1, 2, 3]}], - want=[], - want_paths=[], - ), - Case( - description="current key identifier, match on object names", - path="$[?match(#, '^ab.*') && length(@) > 0 ]", - data={"abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": []}, - want=[[1, 2, 3], [6]], - want_paths=["$['abc']", "$['abx']"], - ), - Case( - description="current key identifier, compare current array index", - path="$.abc[?(# >= 1)]", - data={"abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": []}, - want=[2, 3], - want_paths=["$['abc'][1]", "$['abc'][2]"], - ), - Case( - description="object name from embedded singular query", - path="$.a[$.b[1]]", - data={ - "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, - "b": ["j", "p", "q"], - "c d": {"x": {"y": 1}}, - }, - want=[{"q": [4, 5, 6]}], - want_paths=["$['a']['p']"], - ), - Case( - description="array index from embedded singular query", - path="$.a.j[$['c d'].x.y]", - data={ - "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, - "b": ["j", "p", "q"], - "c d": {"x": {"y": 1}}, - }, - want=[2], - want_paths=["$['a']['j'][1]"], - ), - Case( - description="embedded singular query does not resolve to a string or int value", - path="$.a[$.b]", - data={ - "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, - "b": ["j", "p", "q"], - "c d": {"x": {"y": 1}}, - }, - want=[], - want_paths=[], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_extra_examples(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - assert path.findall(case.data) == case.want - assert list(path.query(case.data).locations()) == case.want_paths - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_extra_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[object]: - return await path.findall_async(case.data) - - assert asyncio.run(coro()) == case.want diff --git a/tests/test_key_selector.py b/tests/test_key_selector.py new file mode 100644 index 0000000..ab383ec --- /dev/null +++ b/tests/test_key_selector.py @@ -0,0 +1,33 @@ +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/key_selector.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_key_selector(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths diff --git a/tests/test_keys_filter_selector.py b/tests/test_keys_filter_selector.py new file mode 100644 index 0000000..a38761d --- /dev/null +++ b/tests/test_keys_filter_selector.py @@ -0,0 +1,33 @@ +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/keys_filter_selector.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_filter_selector(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths diff --git a/tests/test_keys_selector.py b/tests/test_keys_selector.py new file mode 100644 index 0000000..df5a6dc --- /dev/null +++ b/tests/test_keys_selector.py @@ -0,0 +1,33 @@ +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/keys_selector.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_selector(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths diff --git a/tests/test_singular_path_selector.py b/tests/test_singular_path_selector.py new file mode 100644 index 0000000..fd07c07 --- /dev/null +++ b/tests/test_singular_path_selector.py @@ -0,0 +1,33 @@ +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/singular_path_selector.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_singular_path_selector(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths From 225f6864a332351ab1f37c442b02873081c062cc Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 18 Aug 2025 12:11:42 +0100 Subject: [PATCH 14/29] Assert that non-standard syntax fails in strict mode --- jsonpath/lex.py | 1 - jsonpath/parse.py | 8 ++- jsonpath/selectors.py | 4 ++ mkdocs.yml | 2 + tests/keys_selector.json | 6 +-- tests/membership_operators.json | 64 +++++++++++++++++++++++ tests/test_current_key_identifier.py | 9 ++++ tests/test_find.py | 76 ---------------------------- tests/test_key_selector.py | 9 ++++ tests/test_keys_filter_selector.py | 9 ++++ tests/test_keys_selector.py | 9 ++++ tests/test_membership_operators.py | 42 +++++++++++++++ tests/test_singular_path_selector.py | 9 ++++ 13 files changed, 167 insertions(+), 81 deletions(-) create mode 100644 tests/test_membership_operators.py diff --git a/jsonpath/lex.py b/jsonpath/lex.py index d9a470b..7737c90 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -203,7 +203,6 @@ def compile_strict_rules(self) -> Pattern[str]: rules = [ (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), - (TOKEN_DOT_KEY_PROPERTY, self.dot_key_pattern), (TOKEN_DOT_PROPERTY, self.dot_property_pattern), ( TOKEN_FLOAT, diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 039c6e6..3f6be61 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -298,14 +298,16 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: """Parse a JSONPath query from a stream of tokens.""" + # Leading whitespace is not allowed in strict mode. if stream.skip_whitespace() and self.env.strict: raise JSONPathSyntaxError( "unexpected leading whitespace", token=stream.current() ) + # Trailing whitespace is not allowed in strict mode. if ( self.env.strict - and len(stream.tokens) + and stream.tokens and stream.tokens[-1].kind == TOKEN_WHITESPACE ): raise JSONPathSyntaxError( @@ -319,6 +321,7 @@ def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: ): stream.next() elif self.env.strict: + # Raises a syntax error because the current token is not TOKEN_ROOT. stream.expect(TOKEN_ROOT) yield from self.parse_query(stream) @@ -853,6 +856,9 @@ def parse_filter_expression( def _decode_string_literal(self, token: Token) -> str: if self.env.strict: + # For strict compliance with RC 9535, we must unescape string literals + # ourself. RFC 9535 is more strict than json.loads when it comes to + # parsing \uXXXX escape sequences. return unescape_string( token.value, token, diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 3eb033f..d073380 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -15,6 +15,7 @@ from typing import Union from .exceptions import JSONPathIndexError +from .exceptions import JSONPathSyntaxError from .exceptions import JSONPathTypeError from .match import NodeList from .serialize import canonical_string @@ -384,6 +385,9 @@ def __init__( super().__init__(env=env, token=token) self.query = query + if env.strict: + raise JSONPathSyntaxError("unexpected query selector", token=token) + def __str__(self) -> str: return str(self.query) diff --git a/mkdocs.yml b/mkdocs.yml index 8183760..6166f4e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -7,12 +7,14 @@ theme: palette: - scheme: "default" media: "(prefers-color-scheme: light)" + primary: "blue" toggle: icon: "material/weather-sunny" name: "Switch to dark mode" - scheme: "slate" media: "(prefers-color-scheme: dark)" primary: "blue" + accent: blue toggle: icon: "material/weather-night" name: "Switch to light mode" diff --git a/tests/keys_selector.json b/tests/keys_selector.json index c7461e1..25227d5 100644 --- a/tests/keys_selector.json +++ b/tests/keys_selector.json @@ -46,7 +46,7 @@ "tags": ["extra"] }, { - "name": "keys selector, object key", + "name": "object key", "selector": "$.a[0].~", "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, "result": ["b", "c"], @@ -54,7 +54,7 @@ "tags": ["extra"] }, { - "name": "keys selector, array key", + "name": "array key", "selector": "$.a.~", "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, "result": [], @@ -62,7 +62,7 @@ "tags": ["extra"] }, { - "name": "keys selector, descendant keys", + "name": "descendant keys", "selector": "$..[~]", "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, "result": ["a", "b", "c", "b"], diff --git a/tests/membership_operators.json b/tests/membership_operators.json index e69de29..b66c8dc 100644 --- a/tests/membership_operators.json +++ b/tests/membership_operators.json @@ -0,0 +1,64 @@ +{ + "tests": [ + { + "name": "array contains literal string", + "selector": "$[?@.a contains 'foo']", + "document": [{ "a": ["foo", "bar"] }, { "a": ["bar"] }], + "result": [ + { + "a": ["foo", "bar"] + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + }, + { + "name": "object contains literal string", + "selector": "$[?@.a contains 'foo']", + "document": [{ "a": { "foo": "bar" } }, { "a": { "bar": "baz" } }], + "result": [ + { + "a": { "foo": "bar" } + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + }, + { + "name": "string literal in array", + "selector": "$[?'foo' in @.a]", + "document": [{ "a": ["foo", "bar"] }, { "a": ["bar"] }], + "result": [ + { + "a": ["foo", "bar"] + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + }, + { + "name": "string literal in object", + "selector": "$[?'foo' in @.a]", + "document": [{ "a": { "foo": "bar" } }, { "a": { "bar": "baz" } }], + "result": [ + { + "a": { "foo": "bar" } + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + }, + { + "name": "string from embedded query in object", + "selector": "$[?$[-1] in @.a]", + "document": [{ "a": { "foo": "bar" } }, { "a": { "bar": "baz" } }, "foo"], + "result": [ + { + "a": { "foo": "bar" } + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + } + ] +} diff --git a/tests/test_current_key_identifier.py b/tests/test_current_key_identifier.py index 112b738..4a28b40 100644 --- a/tests/test_current_key_identifier.py +++ b/tests/test_current_key_identifier.py @@ -4,6 +4,7 @@ import pytest from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError from jsonpath import NodeList from ._cts_case import Case @@ -31,3 +32,11 @@ def test_current_key_identifier(env: JSONPathEnvironment, case: Case) -> None: assert case.result_paths is not None assert nodes.values() == case.result assert nodes.paths() == case.result_paths + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_current_key_identifier_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_find.py b/tests/test_find.py index 310d5b1..d2bb49a 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -29,36 +29,6 @@ class Case: data={"some": {"thing": "else"}}, want=[], ), - Case( - description="keys from a mapping", - path="$.some[~]", - data={"some": {"thing": "else"}}, - want=["thing"], - ), - Case( - description="keys from a sequence", - path="$.some.~", - data={"some": ["thing", "else"]}, - want=[], - ), - Case( - description="match key pattern", - path="$.some[?match(#, 'thing[0-9]+')]", - data={ - "some": { - "thing1": {"foo": 1}, - "thing2": {"foo": 2}, - "other": {"foo": 3}, - } - }, - want=[{"foo": 1}, {"foo": 2}], - ), - Case( - description="filter current key, array data", - path="$.abc[?(# >= 1)]", - data={"abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": []}, - want=[2, 3], - ), Case( description="select root value using pseudo root", path="^[?@.some.thing > 7]", @@ -71,12 +41,6 @@ class Case: data={"some": {"thing": 42}, "num": 7}, want=[{"some": {"thing": 42}, "num": 7}], ), - Case( - description="recurse object keys", - path="$..~", - data={"some": {"thing": "else", "foo": {"bar": "baz"}}}, - want=["some", "thing", "foo", "bar"], - ), Case( description="logical expr existence tests", path="$[?@.a && @.b]", @@ -89,46 +53,6 @@ class Case: data=[{"a": True, "b": False}], want=[{"a": True, "b": False}], ), - Case( - description="array contains literal", - path="$[?@.a contains 'foo']", - data=[{"a": ["foo", "bar"]}, {"a": ["bar"]}], - want=[ - { - "a": ["foo", "bar"], - } - ], - ), - Case( - description="object contains literal", - path="$[?@.a contains 'foo']", - data=[{"a": {"foo": "bar"}}, {"a": {"bar": "baz"}}], - want=[ - { - "a": {"foo": "bar"}, - } - ], - ), - Case( - description="literal in array", - path="$[?'foo' in @.a]", - data=[{"a": ["foo", "bar"]}, {"a": ["bar"]}], - want=[ - { - "a": ["foo", "bar"], - } - ], - ), - Case( - description="literal in object", - path="$[?'foo' in @.a]", - data=[{"a": {"foo": "bar"}}, {"a": {"bar": "baz"}}], - want=[ - { - "a": {"foo": "bar"}, - } - ], - ), Case( description="quoted reserved word, and", path="$['and']", diff --git a/tests/test_key_selector.py b/tests/test_key_selector.py index ab383ec..fa42a45 100644 --- a/tests/test_key_selector.py +++ b/tests/test_key_selector.py @@ -4,6 +4,7 @@ import pytest from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError from jsonpath import NodeList from ._cts_case import Case @@ -31,3 +32,11 @@ def test_key_selector(env: JSONPathEnvironment, case: Case) -> None: assert case.result_paths is not None assert nodes.values() == case.result assert nodes.paths() == case.result_paths + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_key_selector_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_keys_filter_selector.py b/tests/test_keys_filter_selector.py index a38761d..53e6ecb 100644 --- a/tests/test_keys_filter_selector.py +++ b/tests/test_keys_filter_selector.py @@ -4,6 +4,7 @@ import pytest from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError from jsonpath import NodeList from ._cts_case import Case @@ -31,3 +32,11 @@ def test_keys_filter_selector(env: JSONPathEnvironment, case: Case) -> None: assert case.result_paths is not None assert nodes.values() == case.result assert nodes.paths() == case.result_paths + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_filter_selector_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_keys_selector.py b/tests/test_keys_selector.py index df5a6dc..abd64cd 100644 --- a/tests/test_keys_selector.py +++ b/tests/test_keys_selector.py @@ -4,6 +4,7 @@ import pytest from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError from jsonpath import NodeList from ._cts_case import Case @@ -31,3 +32,11 @@ def test_keys_selector(env: JSONPathEnvironment, case: Case) -> None: assert case.result_paths is not None assert nodes.values() == case.result assert nodes.paths() == case.result_paths + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_selector_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_membership_operators.py b/tests/test_membership_operators.py new file mode 100644 index 0000000..b10783e --- /dev/null +++ b/tests/test_membership_operators.py @@ -0,0 +1,42 @@ +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/membership_operators.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_membership_operators(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_membership_operators_fail_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_singular_path_selector.py b/tests/test_singular_path_selector.py index fd07c07..05042f3 100644 --- a/tests/test_singular_path_selector.py +++ b/tests/test_singular_path_selector.py @@ -4,6 +4,7 @@ import pytest from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError from jsonpath import NodeList from ._cts_case import Case @@ -31,3 +32,11 @@ def test_singular_path_selector(env: JSONPathEnvironment, case: Case) -> None: assert case.result_paths is not None assert nodes.values() == case.result assert nodes.paths() == case.result_paths + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_singular_path_selector_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) From e087f709d7c18402fa9fbcdacf4fa398fe5003ca Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 18 Aug 2025 17:46:09 +0100 Subject: [PATCH 15/29] Remember to test async path too --- jsonpath/path.py | 7 ++- tests/pseudo_root_identifier.json | 28 +++++++++ tests/test_current_key_identifier.py | 20 +++++++ tests/test_find.py | 89 ---------------------------- tests/test_issues.py | 12 ++++ tests/test_key_selector.py | 20 +++++++ tests/test_keys_filter_selector.py | 20 +++++++ tests/test_keys_selector.py | 20 +++++++ tests/test_membership_operators.py | 20 +++++++ tests/test_pseudo_root_identifier.py | 62 +++++++++++++++++++ tests/test_singular_path_selector.py | 20 +++++++ tests/test_strictness.py | 6 ++ 12 files changed, 233 insertions(+), 91 deletions(-) delete mode 100644 tests/test_find.py create mode 100644 tests/test_pseudo_root_identifier.py diff --git a/jsonpath/path.py b/jsonpath/path.py index 3f4e950..9b394c3 100644 --- a/jsonpath/path.py +++ b/jsonpath/path.py @@ -123,12 +123,14 @@ def finditer( an incompatible way. """ _data = load_data(data) + path = self.env.pseudo_root_token if self.pseudo_root else self.env.root_token + matches: Iterable[JSONPathMatch] = [ JSONPathMatch( filter_context=filter_context or {}, obj=[_data] if self.pseudo_root else _data, parent=None, - path=self.env.root_token, + path=path, parts=(), root=_data, ) @@ -161,13 +163,14 @@ async def finditer_async( ) -> AsyncIterable[JSONPathMatch]: """An async version of `finditer()`.""" _data = load_data(data) + path = self.env.pseudo_root_token if self.pseudo_root else self.env.root_token async def root_iter() -> AsyncIterable[JSONPathMatch]: yield self.env.match_class( filter_context=filter_context or {}, obj=[_data] if self.pseudo_root else _data, parent=None, - path=self.env.root_token, + path=path, parts=(), root=_data, ) diff --git a/tests/pseudo_root_identifier.json b/tests/pseudo_root_identifier.json index e69de29..93dd194 100644 --- a/tests/pseudo_root_identifier.json +++ b/tests/pseudo_root_identifier.json @@ -0,0 +1,28 @@ +{ + "tests": [ + { + "name": "conditionally select root value", + "selector": "^[?@.some.thing > 7]", + "document": { "some": { "thing": 42 } }, + "result": [{ "some": { "thing": 42 } }], + "result_paths": ["^[0]"], + "tags": ["extra"] + }, + { + "name": "embedded pseudo root query", + "selector": "^[?@.some.thing > value(^.*.num)]", + "document": { "some": { "thing": 42 }, "num": 7 }, + "result": [{ "some": { "thing": 42 }, "num": 7 }], + "result_paths": ["^[0]"], + "tags": ["extra"] + }, + { + "name": "embedded root query", + "selector": "^[?@.some.thing > value($.num)]", + "document": { "some": { "thing": 42 }, "num": 7 }, + "result": [{ "some": { "thing": 42 }, "num": 7 }], + "result_paths": ["^[0]"], + "tags": ["extra"] + } + ] +} diff --git a/tests/test_current_key_identifier.py b/tests/test_current_key_identifier.py index 4a28b40..5827467 100644 --- a/tests/test_current_key_identifier.py +++ b/tests/test_current_key_identifier.py @@ -1,3 +1,4 @@ +import asyncio import json import operator @@ -34,6 +35,25 @@ def test_current_key_identifier(env: JSONPathEnvironment, case: Case) -> None: assert nodes.paths() == case.result_paths +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_current_key_identifier_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths + + @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) def test_current_key_identifier_fails_in_strict_mode(case: Case) -> None: env = JSONPathEnvironment(strict=True) diff --git a/tests/test_find.py b/tests/test_find.py deleted file mode 100644 index d2bb49a..0000000 --- a/tests/test_find.py +++ /dev/null @@ -1,89 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment - -# TODO: move the rest of these test cases and delete me - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: Union[Sequence[Any], Mapping[str, Any]] - - -TEST_CASES = [ - Case( - description="slice a mapping", - path="$.some[0:4]", - data={"some": {"thing": "else"}}, - want=[], - ), - Case( - description="select root value using pseudo root", - path="^[?@.some.thing > 7]", - data={"some": {"thing": 42}}, - want=[{"some": {"thing": 42}}], - ), - Case( - description="pseudo root in a filter query", - path="^[?@.some.thing > value(^.*.num)]", - data={"some": {"thing": 42}, "num": 7}, - want=[{"some": {"thing": 42}, "num": 7}], - ), - Case( - description="logical expr existence tests", - path="$[?@.a && @.b]", - data=[{"a": True, "b": False}], - want=[{"a": True, "b": False}], - ), - Case( - description="logical expr existence tests, alternate and", - path="$[?@.a and @.b]", - data=[{"a": True, "b": False}], - want=[{"a": True, "b": False}], - ), - Case( - description="quoted reserved word, and", - path="$['and']", - data={"and": [1, 2, 3]}, - want=[[1, 2, 3]], - ), - Case( - description="quoted reserved word, or", - path="$['or']", - data={"or": [1, 2, 3]}, - want=[[1, 2, 3]], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - assert path.findall(case.data) == case.want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[object]: - return await path.findall_async(case.data) - - assert asyncio.run(coro()) == case.want diff --git a/tests/test_issues.py b/tests/test_issues.py index 3fe8e9b..3aaa879 100644 --- a/tests/test_issues.py +++ b/tests/test_issues.py @@ -67,3 +67,15 @@ def test_issue_103() -> None: ] assert findall(query, data, filter_context=filter_context) == want + + +def test_quoted_reserved_word_and() -> None: + query = "$['and']" + data = {"and": [1, 2, 3]} + assert findall(query, data) == [[1, 2, 3]] + + +def test_quoted_reserved_word_or() -> None: + query = "$['or']" + data = {"or": [1, 2, 3]} + assert findall(query, data) == [[1, 2, 3]] diff --git a/tests/test_key_selector.py b/tests/test_key_selector.py index fa42a45..581facc 100644 --- a/tests/test_key_selector.py +++ b/tests/test_key_selector.py @@ -1,3 +1,4 @@ +import asyncio import json import operator @@ -34,6 +35,25 @@ def test_key_selector(env: JSONPathEnvironment, case: Case) -> None: assert nodes.paths() == case.result_paths +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_key_selector_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths + + @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) def test_key_selector_fails_in_strict_mode(case: Case) -> None: env = JSONPathEnvironment(strict=True) diff --git a/tests/test_keys_filter_selector.py b/tests/test_keys_filter_selector.py index 53e6ecb..e80fbd8 100644 --- a/tests/test_keys_filter_selector.py +++ b/tests/test_keys_filter_selector.py @@ -1,3 +1,4 @@ +import asyncio import json import operator @@ -34,6 +35,25 @@ def test_keys_filter_selector(env: JSONPathEnvironment, case: Case) -> None: assert nodes.paths() == case.result_paths +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_filter_selector_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths + + @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) def test_keys_filter_selector_fails_in_strict_mode(case: Case) -> None: env = JSONPathEnvironment(strict=True) diff --git a/tests/test_keys_selector.py b/tests/test_keys_selector.py index abd64cd..2f1d5a0 100644 --- a/tests/test_keys_selector.py +++ b/tests/test_keys_selector.py @@ -1,3 +1,4 @@ +import asyncio import json import operator @@ -34,6 +35,25 @@ def test_keys_selector(env: JSONPathEnvironment, case: Case) -> None: assert nodes.paths() == case.result_paths +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_selector_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths + + @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) def test_keys_selector_fails_in_strict_mode(case: Case) -> None: env = JSONPathEnvironment(strict=True) diff --git a/tests/test_membership_operators.py b/tests/test_membership_operators.py index b10783e..695cf01 100644 --- a/tests/test_membership_operators.py +++ b/tests/test_membership_operators.py @@ -1,3 +1,4 @@ +import asyncio import json import operator @@ -34,6 +35,25 @@ def test_membership_operators(env: JSONPathEnvironment, case: Case) -> None: assert nodes.paths() == case.result_paths +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_membership_operators_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths + + @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) def test_membership_operators_fail_in_strict_mode(case: Case) -> None: env = JSONPathEnvironment(strict=True) diff --git a/tests/test_pseudo_root_identifier.py b/tests/test_pseudo_root_identifier.py new file mode 100644 index 0000000..91d22df --- /dev/null +++ b/tests/test_pseudo_root_identifier.py @@ -0,0 +1,62 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/pseudo_root_identifier.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_pseudo_root_identifier(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_pseudo_root_identifier_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_pseudo_root_identifier_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_singular_path_selector.py b/tests/test_singular_path_selector.py index 05042f3..ddea639 100644 --- a/tests/test_singular_path_selector.py +++ b/tests/test_singular_path_selector.py @@ -1,3 +1,4 @@ +import asyncio import json import operator @@ -34,6 +35,25 @@ def test_singular_path_selector(env: JSONPathEnvironment, case: Case) -> None: assert nodes.paths() == case.result_paths +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_singular_path_selector_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + + if case.results is not None: + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths + else: + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths + + @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) def test_singular_path_selector_fails_in_strict_mode(case: Case) -> None: env = JSONPathEnvironment(strict=True) diff --git a/tests/test_strictness.py b/tests/test_strictness.py index 6e7480b..00dba23 100644 --- a/tests/test_strictness.py +++ b/tests/test_strictness.py @@ -24,3 +24,9 @@ def test_index_as_object_name(env: JSONPathEnvironment) -> None: query = "$.a[0]" data = {"a": {"0": 1}} assert env.findall(query, data) == [1] + + +def test_alternative_and(env: JSONPathEnvironment) -> None: + query = "$[?@.a and @.b]" + data = [{"a": True, "b": False}] + assert env.findall(query, data) == [{"a": True, "b": False}] From f6909ca4a4395ca45f507c849c7a1b91089b51bb Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 18 Aug 2025 19:27:50 +0100 Subject: [PATCH 16/29] More tidy of test cases --- tests/_cts_case.py | 13 +++++ tests/consensus.py | 3 +- tests/query_intersection.json | 28 ++++++++++ tests/query_union.json | 35 ++++++++++++ tests/regex_operator.json | 36 +++++++++++++ tests/test_concrete_path.py | 65 ---------------------- tests/test_current_key_identifier.py | 20 +------ tests/test_find_compound_path.py | 80 ---------------------------- tests/test_key_selector.py | 20 +------ tests/test_keys_filter_selector.py | 20 +------ tests/test_keys_selector.py | 20 +------ tests/test_membership_operators.py | 20 +------ tests/test_pseudo_root_identifier.py | 20 +------ tests/test_query_intersection.py | 48 +++++++++++++++++ tests/test_query_union.py | 46 ++++++++++++++++ tests/test_re.py | 69 ------------------------ tests/test_regex_operator.py | 46 ++++++++++++++++ tests/test_singular_path_selector.py | 20 +------ tests/test_strictness.py | 6 +++ 19 files changed, 274 insertions(+), 341 deletions(-) delete mode 100644 tests/test_concrete_path.py delete mode 100644 tests/test_find_compound_path.py create mode 100644 tests/test_query_intersection.py create mode 100644 tests/test_query_union.py delete mode 100644 tests/test_re.py create mode 100644 tests/test_regex_operator.py diff --git a/tests/_cts_case.py b/tests/_cts_case.py index dd4a55b..9c652b3 100644 --- a/tests/_cts_case.py +++ b/tests/_cts_case.py @@ -10,6 +10,8 @@ from typing import Sequence from typing import Union +from jsonpath import NodeList + @dataclass class Case: @@ -45,3 +47,14 @@ def as_dict(self) -> Dict[str, Any]: rv["tags"] = self.tags return rv + + def assert_nodes(self, nodes: NodeList) -> None: + """Assert that `nodes` matches this test case.""" + if self.results is not None: + assert self.results_paths is not None + assert nodes.values() in self.results + assert nodes.paths() in self.results_paths + else: + assert self.result_paths is not None + assert nodes.values() == self.result + assert nodes.paths() == self.result_paths diff --git a/tests/consensus.py b/tests/consensus.py index a905500..ad0dd7d 100644 --- a/tests/consensus.py +++ b/tests/consensus.py @@ -8,6 +8,7 @@ We've deliberately named this file so as to exclude it when running `pytest` or `hatch run test`. Target it specifically using `pytest tests/consensus.py`. """ + import operator import unittest from dataclasses import dataclass @@ -44,7 +45,7 @@ class Query: } SKIP = { - "bracket_notation_with_number_on_object": "We support unquoted property names", + # "bracket_notation_with_number_on_object": "We support unquoted property names", "dot_notation_with_number_-1": "conflict with compliance", "dot_notation_with_number_on_object": "conflict with compliance", } diff --git a/tests/query_intersection.json b/tests/query_intersection.json index e69de29..465bedd 100644 --- a/tests/query_intersection.json +++ b/tests/query_intersection.json @@ -0,0 +1,28 @@ +{ + "tests": [ + { + "name": "intersection of two paths, no common items", + "selector": "$.some & $.thing", + "document": { + "some": [1, 2, 3], + "thing": [4, 5, 6], + "other": ["a", "b", "c"] + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "intersection of two paths, with common items", + "selector": "$.some & $.thing", + "document": { + "some": [1, 2, 3], + "thing": [1, 2, 3], + "other": ["a", "b", "c"] + }, + "result": [[1, 2, 3]], + "result_paths": ["$['some']"], + "tags": ["extra"] + } + ] +} diff --git a/tests/query_union.json b/tests/query_union.json index e69de29..408c8ad 100644 --- a/tests/query_union.json +++ b/tests/query_union.json @@ -0,0 +1,35 @@ +{ + "tests": [ + { + "name": "union of two paths", + "selector": "$.some | $.thing", + "document": { + "some": [1, 2, 3], + "thing": [4, 5, 6], + "other": ["a", "b", "c"] + }, + "result": [ + [1, 2, 3], + [4, 5, 6] + ], + "result_paths": ["$['some']", "$['thing']"], + "tags": ["extra"] + }, + { + "name": "union of three paths", + "selector": "$.some | $.thing | $.other", + "document": { + "some": [1, 2, 3], + "thing": [4, 5, 6], + "other": ["a", "b", "c"] + }, + "result": [ + [1, 2, 3], + [4, 5, 6], + ["a", "b", "c"] + ], + "result_paths": ["$['some']", "$['thing']", "$['other']"], + "tags": ["extra"] + } + ] +} diff --git a/tests/regex_operator.json b/tests/regex_operator.json index e69de29..4816304 100644 --- a/tests/regex_operator.json +++ b/tests/regex_operator.json @@ -0,0 +1,36 @@ +{ + "tests": [ + { + "name": "regex literal, match", + "selector": "$.some[?(@.thing =~ /fo[a-z]/)]", + "document": { "some": [{ "thing": "foo" }] }, + "result": [{ "thing": "foo" }], + "result_paths": ["$['some'][0]"], + "tags": ["extra"] + }, + { + "name": "regex literal, no match", + "selector": "$.some[?(@.thing =~ /fo[a-z]/)]", + "document": { "some": [{ "thing": "foO" }] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "regex literal, case insensitive match", + "selector": "$.some[?(@.thing =~ /fo[a-z]/i)]", + "document": { "some": [{ "thing": "foO" }] }, + "result": [{ "thing": "foO" }], + "result_paths": ["$['some'][0]"], + "tags": ["extra"] + }, + { + "name": "regex literal, escaped slash", + "selector": "$.some[?(@.thing =~ /fo\\\\[a-z]/)]", + "document": { "some": [{ "thing": "fo\\b" }] }, + "result": [{ "thing": "fo\\b" }], + "result_paths": ["$['some'][0]"], + "tags": ["extra"] + } + ] +} diff --git a/tests/test_concrete_path.py b/tests/test_concrete_path.py deleted file mode 100644 index 3ab6de1..0000000 --- a/tests/test_concrete_path.py +++ /dev/null @@ -1,65 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment -from jsonpath import JSONPathMatch - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: List[str] - - -TEST_CASES = [ - Case( - description="normalized negative index", - path="$.a[-2]", - data={"a": [1, 2, 3, 4, 5]}, - want=["$['a'][3]"], - ), - Case( - description="normalized reverse slice", - path="$.a[3:0:-1]", - data={"a": [1, 2, 3, 4, 5]}, - want=["$['a'][3]", "$['a'][2]", "$['a'][1]"], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - matches = list(path.finditer(case.data)) - assert len(matches) == len(case.want) - for match, want in zip(matches, case.want): # noqa: B905 - assert match.path == want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[JSONPathMatch]: - matches = await path.finditer_async(case.data) - return [match async for match in matches] - - matches = asyncio.run(coro()) - assert len(matches) == len(case.want) - for match, want in zip(matches, case.want): # noqa: B905 - assert match.path == want diff --git a/tests/test_current_key_identifier.py b/tests/test_current_key_identifier.py index 5827467..54a0131 100644 --- a/tests/test_current_key_identifier.py +++ b/tests/test_current_key_identifier.py @@ -24,15 +24,7 @@ def env() -> JSONPathEnvironment: def test_current_key_identifier(env: JSONPathEnvironment, case: Case) -> None: assert case.document is not None nodes = NodeList(env.finditer(case.selector, case.document)) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) @@ -43,15 +35,7 @@ async def coro() -> NodeList: return NodeList([node async for node in it]) nodes = asyncio.run(coro()) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) diff --git a/tests/test_find_compound_path.py b/tests/test_find_compound_path.py deleted file mode 100644 index d45db5a..0000000 --- a/tests/test_find_compound_path.py +++ /dev/null @@ -1,80 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: Union[Sequence[Any], Mapping[str, Any]] - - -TEST_CASES = [ - Case( - description="union of two paths", - path="$.some | $.thing", - data={"some": [1, 2, 3], "thing": [4, 5, 6], "other": ["a", "b", "c"]}, - want=[[1, 2, 3], [4, 5, 6]], - ), - Case( - description="union of three paths", - path="$.some | $.thing | $.other", - data={"some": [1, 2, 3], "thing": [4, 5, 6], "other": ["a", "b", "c"]}, - want=[[1, 2, 3], [4, 5, 6], ["a", "b", "c"]], - ), - Case( - description="intersection of two paths with no common items", - path="$.some & $.thing", - data={"some": [1, 2, 3], "thing": [4, 5, 6], "other": ["a", "b", "c"]}, - want=[], - ), - Case( - description="intersection of two paths with common item", - path="$.some & $.thing", - data={"some": [1, 2, 3], "thing": [1, 2, 3], "other": ["a", "b", "c"]}, - want=[[1, 2, 3]], - ), - Case( - description="intersection then union", - path="$.some & $.thing | $.other", - data={"some": [1, 2, 3], "thing": [1, 2, 3], "other": ["a", "b", "c"]}, - want=[[1, 2, 3], ["a", "b", "c"]], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_compound_path(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - assert path.findall(case.data) == case.want - assert [match.obj for match in path.finditer(case.data)] == case.want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_compound_path_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[object]: - return await path.findall_async(case.data) - - async def iter_coro() -> List[object]: - return [match.obj async for match in await path.finditer_async(case.data)] - - assert asyncio.run(coro()) == case.want - assert asyncio.run(iter_coro()) == case.want diff --git a/tests/test_key_selector.py b/tests/test_key_selector.py index 581facc..876ca3c 100644 --- a/tests/test_key_selector.py +++ b/tests/test_key_selector.py @@ -24,15 +24,7 @@ def env() -> JSONPathEnvironment: def test_key_selector(env: JSONPathEnvironment, case: Case) -> None: assert case.document is not None nodes = NodeList(env.finditer(case.selector, case.document)) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) @@ -43,15 +35,7 @@ async def coro() -> NodeList: return NodeList([node async for node in it]) nodes = asyncio.run(coro()) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) diff --git a/tests/test_keys_filter_selector.py b/tests/test_keys_filter_selector.py index e80fbd8..1360f80 100644 --- a/tests/test_keys_filter_selector.py +++ b/tests/test_keys_filter_selector.py @@ -24,15 +24,7 @@ def env() -> JSONPathEnvironment: def test_keys_filter_selector(env: JSONPathEnvironment, case: Case) -> None: assert case.document is not None nodes = NodeList(env.finditer(case.selector, case.document)) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) @@ -43,15 +35,7 @@ async def coro() -> NodeList: return NodeList([node async for node in it]) nodes = asyncio.run(coro()) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) diff --git a/tests/test_keys_selector.py b/tests/test_keys_selector.py index 2f1d5a0..7fe99e1 100644 --- a/tests/test_keys_selector.py +++ b/tests/test_keys_selector.py @@ -24,15 +24,7 @@ def env() -> JSONPathEnvironment: def test_keys_selector(env: JSONPathEnvironment, case: Case) -> None: assert case.document is not None nodes = NodeList(env.finditer(case.selector, case.document)) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) @@ -43,15 +35,7 @@ async def coro() -> NodeList: return NodeList([node async for node in it]) nodes = asyncio.run(coro()) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) diff --git a/tests/test_membership_operators.py b/tests/test_membership_operators.py index 695cf01..cb44ab0 100644 --- a/tests/test_membership_operators.py +++ b/tests/test_membership_operators.py @@ -24,15 +24,7 @@ def env() -> JSONPathEnvironment: def test_membership_operators(env: JSONPathEnvironment, case: Case) -> None: assert case.document is not None nodes = NodeList(env.finditer(case.selector, case.document)) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) @@ -43,15 +35,7 @@ async def coro() -> NodeList: return NodeList([node async for node in it]) nodes = asyncio.run(coro()) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) diff --git a/tests/test_pseudo_root_identifier.py b/tests/test_pseudo_root_identifier.py index 91d22df..fff90bb 100644 --- a/tests/test_pseudo_root_identifier.py +++ b/tests/test_pseudo_root_identifier.py @@ -24,15 +24,7 @@ def env() -> JSONPathEnvironment: def test_pseudo_root_identifier(env: JSONPathEnvironment, case: Case) -> None: assert case.document is not None nodes = NodeList(env.finditer(case.selector, case.document)) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) @@ -43,15 +35,7 @@ async def coro() -> NodeList: return NodeList([node async for node in it]) nodes = asyncio.run(coro()) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) diff --git a/tests/test_query_intersection.py b/tests/test_query_intersection.py new file mode 100644 index 0000000..d4d05c7 --- /dev/null +++ b/tests/test_query_intersection.py @@ -0,0 +1,48 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/query_intersection.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_intersection_operator(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_intersection_operator_async( + env: JSONPathEnvironment, case: Case +) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_intersection_operator_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_query_union.py b/tests/test_query_union.py new file mode 100644 index 0000000..4ec12ef --- /dev/null +++ b/tests/test_query_union.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/query_union.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_union_operator(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_union_operator_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_union_operator_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_re.py b/tests/test_re.py deleted file mode 100644 index eca6037..0000000 --- a/tests/test_re.py +++ /dev/null @@ -1,69 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: Union[Sequence[Any], Mapping[str, Any]] - - -TEST_CASES = [ - Case( - description="match a regex", - path="$.some[?(@.thing =~ /fo[a-z]/)]", - data={"some": [{"thing": "foo"}]}, - want=[{"thing": "foo"}], - ), - Case( - description="regex with no match", - path="$.some[?(@.thing =~ /fo[a-z]/)]", - data={"some": [{"thing": "foO"}]}, - want=[], - ), - Case( - description="case insensitive match", - path="$.some[?(@.thing =~ /fo[a-z]/i)]", - data={"some": [{"thing": "foO"}]}, - want=[{"thing": "foO"}], - ), - Case( - description="escaped slash", - path="$.some[?(@.thing =~ /fo\\\\[a-z]/)]", - data={"some": [{"thing": "fo\\b"}]}, - want=[{"thing": "fo\\b"}], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_filter_regex(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - assert path.findall(case.data) == case.want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_filter_regex_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[object]: - return await path.findall_async(case.data) - - assert asyncio.run(coro()) == case.want diff --git a/tests/test_regex_operator.py b/tests/test_regex_operator.py new file mode 100644 index 0000000..7849169 --- /dev/null +++ b/tests/test_regex_operator.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/regex_operator.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_regex_operator(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_regex_operator_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_regex_operator_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_singular_path_selector.py b/tests/test_singular_path_selector.py index ddea639..add9cee 100644 --- a/tests/test_singular_path_selector.py +++ b/tests/test_singular_path_selector.py @@ -24,15 +24,7 @@ def env() -> JSONPathEnvironment: def test_singular_path_selector(env: JSONPathEnvironment, case: Case) -> None: assert case.document is not None nodes = NodeList(env.finditer(case.selector, case.document)) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) @@ -43,15 +35,7 @@ async def coro() -> NodeList: return NodeList([node async for node in it]) nodes = asyncio.run(coro()) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) diff --git a/tests/test_strictness.py b/tests/test_strictness.py index 00dba23..008c760 100644 --- a/tests/test_strictness.py +++ b/tests/test_strictness.py @@ -30,3 +30,9 @@ def test_alternative_and(env: JSONPathEnvironment) -> None: query = "$[?@.a and @.b]" data = [{"a": True, "b": False}] assert env.findall(query, data) == [{"a": True, "b": False}] + + +def test_alternative_or(env: JSONPathEnvironment) -> None: + query = "$[?@.a or @.c]" + data = [{"a": True, "b": False}, {"c": 99}] + assert env.findall(query, data) == [{"a": True, "b": False}, {"c": 99}] From 9a73434e58238eed46a6588ad950dc9aa17e6227 Mon Sep 17 00:00:00 2001 From: James Prior Date: Tue, 19 Aug 2025 19:47:54 +0100 Subject: [PATCH 17/29] Enforce recursion limit and more tidying --- jsonpath/cli.py | 2 +- jsonpath/env.py | 9 ++++-- jsonpath/exceptions.py | 13 ++++++++ jsonpath/filter.py | 24 +++++++-------- jsonpath/parse.py | 1 - jsonpath/path.py | 3 -- jsonpath/segments.py | 5 ++- tests/test_compliance.py | 20 ++---------- tests/test_errors.py | 28 +++++++++++++++++ tests/test_match_function.py | 60 ------------------------------------ tests/test_strictness.py | 25 +++++++++++++++ 11 files changed, 91 insertions(+), 99 deletions(-) delete mode 100644 tests/test_match_function.py diff --git a/jsonpath/cli.py b/jsonpath/cli.py index e79d2fd..e840b2c 100644 --- a/jsonpath/cli.py +++ b/jsonpath/cli.py @@ -1,4 +1,5 @@ """JSONPath, JSON Pointer and JSON Patch command line interface.""" + import argparse import json import sys @@ -289,7 +290,6 @@ def handle_pointer_command(args: argparse.Namespace) -> None: if args.pointer is not None: pointer = args.pointer else: - # TODO: is a property with a trailing newline OK? pointer = args.pointer_file.read().strip() try: diff --git a/jsonpath/env.py b/jsonpath/env.py index 98ff94f..e888680 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -124,6 +124,9 @@ class attributes `root_token`, `self_token` and `filter_context_token`. index. Defaults to `(2**53) - 1`. min_int_index (int): The minimum integer allowed when selecting array items by index. Defaults to `-(2**53) + 1`. + max_recursion_depth (int): The maximum number of dict/objects and/or arrays/ + lists the recursive descent selector can visit before a + `JSONPathRecursionError` is thrown. parser_class: The parser to use when parsing tokens from the lexer. root_token (str): The pattern used to select the root node in a JSON document. Defaults to `"$"`. @@ -132,8 +135,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`. union_token (str): The pattern used as the union operator. Defaults to `"|"`. """ - # These should be unescaped strings. `re.escape` will be called - # on them automatically when compiling lexer rules. + # These should be unescaped strings. `re.escape` will be called on them + # automatically when compiling lexer rules. pseudo_root_token = "^" filter_context_token = "_" intersection_token = "&" @@ -146,6 +149,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`. max_int_index = (2**53) - 1 min_int_index = -(2**53) + 1 + max_recursion_depth = 100 # Override these to customize path tokenization and parsing. lexer_class: Type[Lexer] = Lexer @@ -227,7 +231,6 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 "unexpected whitespace", token=stream.tokens[stream.pos - 1] ) - # TODO: better! if stream.current().kind != TOKEN_EOF: _path = CompoundJSONPath(env=self, path=_path) while stream.current().kind != TOKEN_EOF: diff --git a/jsonpath/exceptions.py b/jsonpath/exceptions.py index c6797c5..d26e334 100644 --- a/jsonpath/exceptions.py +++ b/jsonpath/exceptions.py @@ -77,6 +77,19 @@ def __init__(self, *args: object, token: Token) -> None: self.token = token +class JSONPathRecursionError(JSONPathError): + """An exception raised when the maximum recursion depth is reached. + + Arguments: + args: Arguments passed to `Exception`. + token: The token that caused the error. + """ + + def __init__(self, *args: object, token: Token) -> None: + super().__init__(*args) + self.token = token + + class JSONPointerError(Exception): """Base class for all JSON Pointer errors.""" diff --git a/jsonpath/filter.py b/jsonpath/filter.py index b8905e9..494e2bc 100644 --- a/jsonpath/filter.py +++ b/jsonpath/filter.py @@ -529,11 +529,9 @@ def __str__(self) -> str: return "@" + str(self.path)[1:] def evaluate(self, context: FilterContext) -> object: - if isinstance(context.current, str): # TODO: refactor - if self.path.empty(): - return context.current - return NodeList() - if not isinstance(context.current, (Sequence, Mapping)): + if isinstance(context.current, str) or not isinstance( + context.current, (Sequence, Mapping) + ): if self.path.empty(): return context.current return NodeList() @@ -546,11 +544,9 @@ def evaluate(self, context: FilterContext) -> object: ) async def evaluate_async(self, context: FilterContext) -> object: - if isinstance(context.current, str): # TODO: refactor - if self.path.empty(): - return context.current - return NodeList() - if not isinstance(context.current, (Sequence, Mapping)): + if isinstance(context.current, str) or not isinstance( + context.current, (Sequence, Mapping) + ): if self.path.empty(): return context.current return NodeList() @@ -660,7 +656,9 @@ def evaluate(self, context: FilterContext) -> object: try: func = context.env.function_extensions[self.name] except KeyError: - return UNDEFINED # TODO: should probably raise an exception + # This can only happen if the environment's function register has been + # changed since the query was parsed. + return UNDEFINED args = [arg.evaluate(context) for arg in self.args] return func(*self._unpack_node_lists(func, args)) @@ -668,7 +666,9 @@ async def evaluate_async(self, context: FilterContext) -> object: try: func = context.env.function_extensions[self.name] except KeyError: - return UNDEFINED # TODO: should probably raise an exception + # This can only happen if the environment's function register has been + # changed since the query was parsed. + return UNDEFINED args = [await arg.evaluate_async(context) for arg in self.args] return func(*self._unpack_node_lists(func, args)) diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 3f6be61..0898529 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -722,7 +722,6 @@ def parse_relative_query(self, stream: TokenStream) -> BaseExpression: def parse_singular_query_selector( self, stream: TokenStream ) -> SingularQuerySelector: - # TODO: optionally require root identifier token = ( stream.next() if stream.current().kind == TOKEN_ROOT else stream.current() ) diff --git a/jsonpath/path.py b/jsonpath/path.py index 9b394c3..eeca6f7 100644 --- a/jsonpath/path.py +++ b/jsonpath/path.py @@ -474,9 +474,6 @@ def intersection(self, path: JSONPath) -> CompoundJSONPath: paths=self.paths + ((self.env.intersection_token, path),), ) - # TODO: implement empty and singular for CompoundJSONPath - # TODO: add a `segments` property returning segments from all paths - T = TypeVar("T") diff --git a/jsonpath/segments.py b/jsonpath/segments.py index 8aeb892..51054d1 100644 --- a/jsonpath/segments.py +++ b/jsonpath/segments.py @@ -11,6 +11,8 @@ from typing import Sequence from typing import Tuple +from .exceptions import JSONPathRecursionError + if TYPE_CHECKING: from .env import JSONPathEnvironment from .match import JSONPathMatch @@ -99,7 +101,8 @@ async def resolve_async( def _visit(self, node: JSONPathMatch, depth: int = 1) -> Iterable[JSONPathMatch]: """Depth-first, pre-order node traversal.""" - # TODO: check for recursion limit + if depth > self.env.max_recursion_depth: + raise JSONPathRecursionError("recursion limit exceeded", token=self.token) yield node diff --git a/tests/test_compliance.py b/tests/test_compliance.py index 7602f8b..daf4e71 100644 --- a/tests/test_compliance.py +++ b/tests/test_compliance.py @@ -78,15 +78,7 @@ def test_compliance_strict(env: JSONPathEnvironment, case: Case) -> None: assert case.document is not None nodes = NodeList(env.finditer(case.selector, case.document)) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) @@ -100,15 +92,7 @@ async def coro() -> NodeList: return NodeList([node async for node in it]) nodes = asyncio.run(coro()) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + case.assert_nodes(nodes) @pytest.mark.parametrize("case", invalid_cases(), ids=operator.attrgetter("name")) diff --git a/tests/test_errors.py b/tests/test_errors.py index 2c88135..029a680 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -1,10 +1,12 @@ from operator import attrgetter +from typing import Any from typing import List from typing import NamedTuple import pytest from jsonpath import JSONPathEnvironment +from jsonpath.exceptions import JSONPathRecursionError from jsonpath.exceptions import JSONPathSyntaxError from jsonpath.exceptions import JSONPathTypeError @@ -77,3 +79,29 @@ def test_filter_literals_must_be_compared( ) -> None: with pytest.raises(JSONPathSyntaxError): env.compile(case.query) + + +def test_recursive_data() -> None: + class MockEnv(JSONPathEnvironment): + nondeterministic = False + + env = MockEnv() + query = "$..a" + arr: List[Any] = [] + data: Any = {"foo": arr} + arr.append(data) + + with pytest.raises(JSONPathRecursionError): + env.findall(query, data) + + +def test_low_recursion_limit() -> None: + class MockEnv(JSONPathEnvironment): + max_recursion_depth = 3 + + env = MockEnv() + query = "$..a" + data = {"foo": [{"bar": [1, 2, 3]}]} + + with pytest.raises(JSONPathRecursionError): + env.findall(query, data) diff --git a/tests/test_match_function.py b/tests/test_match_function.py deleted file mode 100644 index 5a9dab7..0000000 --- a/tests/test_match_function.py +++ /dev/null @@ -1,60 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: Union[Sequence[Any], Mapping[str, Any]] - - -TEST_CASES = [ - Case( - description="match a regex", - path="$.some[?match(@.thing, 'fo[a-z]')]", - data={"some": [{"thing": "foo"}]}, - want=[{"thing": "foo"}], - ), - Case( - description="regex with no match", - path="$.some[?match(@.thing, 'fo[a-z]')]", - data={"some": [{"thing": "foO"}]}, - want=[], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_match_function(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - assert path.findall(case.data) == case.want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_match_function_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[object]: - return await path.findall_async(case.data) - - assert asyncio.run(coro()) == case.want - - -# TODO: test error conditions diff --git a/tests/test_strictness.py b/tests/test_strictness.py index 008c760..0db8a43 100644 --- a/tests/test_strictness.py +++ b/tests/test_strictness.py @@ -36,3 +36,28 @@ def test_alternative_or(env: JSONPathEnvironment) -> None: query = "$[?@.a or @.c]" data = [{"a": True, "b": False}, {"c": 99}] assert env.findall(query, data) == [{"a": True, "b": False}, {"c": 99}] + + +def test_implicit_root_identifier( + env: JSONPathEnvironment, +) -> None: + query = "a['p']" + data = { + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, + "b": ["j", "p", "q"], + } + + assert env.findall(query, data) == [{"q": [4, 5, 6]}] + + +def test_singular_path_selector_without_root_identifier( + env: JSONPathEnvironment, +) -> None: + query = "$.a[b[1]]" + data = { + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, + "b": ["j", "p", "q"], + "c d": {"x": {"y": 1}}, + } + + assert env.findall(query, data) == [{"q": [4, 5, 6]}] From 71a43ba7f95feab6cc29bb115220a3f2d1d6aef5 Mon Sep 17 00:00:00 2001 From: James Prior Date: Wed, 20 Aug 2025 08:41:03 +0100 Subject: [PATCH 18/29] More tests and refactor parser.parse_query --- jsonpath/parse.py | 119 +++++++++++++++----------------- tests/membership_operators.json | 24 +++++++ tests/test_strictness.py | 12 ++++ tests/test_undefined.py | 46 ++++++++++++ tests/undefined.json | 92 ++++++++++++++++++++++++ 5 files changed, 229 insertions(+), 64 deletions(-) create mode 100644 tests/test_undefined.py create mode 100644 tests/undefined.json diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 0898529..e504a43 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -338,100 +338,91 @@ def parse_query(self, stream: TokenStream) -> Iterable[JSONPathSegment]: This method assumes the root, current or pseudo root identifier has already been consumed. """ + if not self.env.strict and stream.current().kind in { + TOKEN_NAME, + TOKEN_WILD, + TOKEN_KEYS, + TOKEN_KEY_NAME, + }: + # A non-standard "bare" path. One that starts with a shorthand selector + # without a leading identifier (`$`, `@`, `^` or `_`). + # + # When no identifier is given, a root query (`$`) is assumed. + token = stream.current() + selector = self.parse_shorthand_selector(stream) + yield JSONPathChildSegment(env=self.env, token=token, selectors=(selector,)) + while True: stream.skip_whitespace() - _token = stream.current() - if _token.kind == TOKEN_DOT: - stream.eat(TOKEN_DOT) - # Assert that dot is followed by shorthand selector without whitespace. - stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS, TOKEN_KEY_NAME) - token = stream.current() - selectors = self.parse_selector(stream) + token = stream.next() + + if token.kind == TOKEN_DOT: + selector = self.parse_shorthand_selector(stream) yield JSONPathChildSegment( - env=self.env, token=token, selectors=selectors + env=self.env, token=token, selectors=(selector,) ) - elif _token.kind == TOKEN_DDOT: - token = stream.eat(TOKEN_DDOT) - selectors = self.parse_selector(stream) - if not selectors: - raise JSONPathSyntaxError( - "missing selector for recursive descent segment", - token=stream.current(), - ) + elif token.kind == TOKEN_DDOT: + if stream.current().kind == TOKEN_LBRACKET: + selectors = tuple(self.parse_bracketed_selection(stream)) + else: + selectors = (self.parse_shorthand_selector(stream),) + yield JSONPathRecursiveDescentSegment( env=self.env, token=token, selectors=selectors ) - elif _token.kind == TOKEN_LBRACKET: - selectors = self.parse_selector(stream) - yield JSONPathChildSegment( - env=self.env, token=_token, selectors=selectors - ) - elif _token.kind in {TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS, TOKEN_KEY_NAME}: - # A non-standard "bare" path. One without a leading identifier (`$`, - # `@`, `^` or `_`). - token = stream.current() - selectors = self.parse_selector(stream) + elif token.kind == TOKEN_LBRACKET: + stream.pos -= 1 yield JSONPathChildSegment( - env=self.env, token=token, selectors=selectors + env=self.env, + token=token, + selectors=tuple(self.parse_bracketed_selection(stream)), ) + elif token.kind == TOKEN_EOF: + break else: + # An embedded query. Put the token back on the stream. + stream.pos -= 1 break - def parse_selector(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]: # noqa: PLR0911 + def parse_shorthand_selector(self, stream: TokenStream) -> JSONPathSelector: token = stream.next() if token.kind == TOKEN_NAME: - return ( - NameSelector( - env=self.env, - token=token, - name=token.value, - ), + return NameSelector( + env=self.env, + token=token, + name=token.value, ) if token.kind == TOKEN_KEY_NAME: - return ( - KeySelector( - env=self.env, - token=token, - key=token.value, - ), + return KeySelector( + env=self.env, + token=token, + key=token.value, ) if token.kind == TOKEN_WILD: - return ( - WildcardSelector( - env=self.env, - token=token, - ), + return WildcardSelector( + env=self.env, + token=token, ) if token.kind == TOKEN_KEYS: if stream.current().kind == TOKEN_NAME: - return ( - KeySelector( - env=self.env, - token=token, - key=self._decode_string_literal(stream.next()), - ), - ) - - return ( - KeysSelector( + return KeySelector( env=self.env, token=token, - ), - ) + key=self._decode_string_literal(stream.next()), + ) - if token.kind == TOKEN_LBRACKET: - stream.pos -= 1 - return tuple(self.parse_bracketed_selection(stream)) + return KeysSelector( + env=self.env, + token=token, + ) - stream.pos -= 1 - return () + raise JSONPathSyntaxError("expected a shorthand selector", token=token) def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912, PLR0915 - """Parse a comma separated list of JSONPath selectors.""" segment_token = stream.eat(TOKEN_LBRACKET) selectors: List[JSONPathSelector] = [] @@ -704,7 +695,7 @@ def parse_grouped_expression(self, stream: TokenStream) -> BaseExpression: return expr def parse_absolute_query(self, stream: TokenStream) -> BaseExpression: - root = stream.next() + root = stream.next() # Could be TOKEN_ROOT or TOKEN_PSEUDO_ROOT return RootFilterQuery( JSONPath( env=self.env, diff --git a/tests/membership_operators.json b/tests/membership_operators.json index b66c8dc..15242bd 100644 --- a/tests/membership_operators.json +++ b/tests/membership_operators.json @@ -59,6 +59,30 @@ ], "result_paths": ["$[0]"], "tags": ["extra"] + }, + { + "name": "embedded query in list literal", + "selector": "$[?(@.a in ['bar', 'baz'])]", + "document": [{ "a": "foo" }, { "a": "bar" }], + "result": [ + { + "a": "bar" + } + ], + "result_paths": ["$[1]"], + "tags": ["extra"] + }, + { + "name": "list literal contains embedded query", + "selector": "$[?(['bar', 'baz'] contains @.a)]", + "document": [{ "a": "foo" }, { "a": "bar" }], + "result": [ + { + "a": "bar" + } + ], + "result_paths": ["$[1]"], + "tags": ["extra"] } ] } diff --git a/tests/test_strictness.py b/tests/test_strictness.py index 0db8a43..d4fe39a 100644 --- a/tests/test_strictness.py +++ b/tests/test_strictness.py @@ -38,6 +38,18 @@ def test_alternative_or(env: JSONPathEnvironment) -> None: assert env.findall(query, data) == [{"a": True, "b": False}, {"c": 99}] +def test_alternative_null(env: JSONPathEnvironment) -> None: + query = "$[?@.a==Null]" + data = [{"a": None, "d": "e"}, {"a": "c", "d": "f"}] + assert env.findall(query, data) == [{"a": None, "d": "e"}] + + +def test_none(env: JSONPathEnvironment) -> None: + query = "$[?@.a==None]" + data = [{"a": None, "d": "e"}, {"a": "c", "d": "f"}] + assert env.findall(query, data) == [{"a": None, "d": "e"}] + + def test_implicit_root_identifier( env: JSONPathEnvironment, ) -> None: diff --git a/tests/test_undefined.py b/tests/test_undefined.py new file mode 100644 index 0000000..3d9c1ef --- /dev/null +++ b/tests/test_undefined.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/undefined.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_undefined_keyword(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_undefined_keyword_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_comparison_to_undefined_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/undefined.json b/tests/undefined.json new file mode 100644 index 0000000..3e04c29 --- /dev/null +++ b/tests/undefined.json @@ -0,0 +1,92 @@ +{ + "tests": [ + { + "name": "explicit comparison to undefined", + "selector": "$[?@.a == undefined]", + "document": [ + { + "a": "b", + "d": "e" + }, + { + "b": "c", + "d": "f" + } + ], + "result": [ + { + "b": "c", + "d": "f" + } + ], + "result_paths": ["$[1]"], + "tags": ["extra"] + }, + { + "name": "explicit comparison to missing", + "selector": "$[?@.a == missing]", + "document": [ + { + "a": "b", + "d": "e" + }, + { + "b": "c", + "d": "f" + } + ], + "result": [ + { + "b": "c", + "d": "f" + } + ], + "result_paths": ["$[1]"], + "tags": ["extra"] + }, + { + "name": "explicit undefined is on the left", + "selector": "$[?undefined == @.a]", + "document": [ + { + "a": "b", + "d": "e" + }, + { + "b": "c", + "d": "f" + } + ], + "result": [ + { + "b": "c", + "d": "f" + } + ], + "result_paths": ["$[1]"], + "tags": ["extra"] + }, + { + "name": "not equal to undefined", + "selector": "$[?@.a != undefined]", + "document": [ + { + "a": "b", + "d": "e" + }, + { + "b": "c", + "d": "f" + } + ], + "result": [ + { + "a": "b", + "d": "e" + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + } + ] +} From f384b63a78dc97db83e9a3e93b92078e951f90f2 Mon Sep 17 00:00:00 2001 From: James Prior Date: Wed, 20 Aug 2025 09:15:02 +0100 Subject: [PATCH 19/29] Pretty exception messages --- jsonpath/env.py | 3 +- jsonpath/exceptions.py | 66 +++++++++++++++++++++++++++++++++++++++--- jsonpath/parse.py | 12 ++------ tests/test_errors.py | 2 +- 4 files changed, 68 insertions(+), 15 deletions(-) diff --git a/jsonpath/env.py b/jsonpath/env.py index e888680..021d5f1 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -493,8 +493,9 @@ def check_well_typedness( """Check the well-typedness of a function's arguments at compile-time.""" # Correct number of arguments? if len(args) != len(func.arg_types): + plural = "" if len(func.arg_types) == 1 else "s" raise JSONPathTypeError( - f"{token.value!r}() requires {len(func.arg_types)} arguments", + f"{token.value}() requires {len(func.arg_types)} argument{plural}", token=token, ) diff --git a/jsonpath/exceptions.py b/jsonpath/exceptions.py index d26e334..62de398 100644 --- a/jsonpath/exceptions.py +++ b/jsonpath/exceptions.py @@ -5,6 +5,8 @@ from typing import TYPE_CHECKING from typing import Optional +from .token import TOKEN_EOF + if TYPE_CHECKING: from .token import Token @@ -22,13 +24,69 @@ def __init__(self, *args: object, token: Optional[Token] = None) -> None: self.token: Optional[Token] = token def __str__(self) -> str: - msg = super().__str__() + return self.detailed_message() + def detailed_message(self) -> str: + """Return an error message formatted with extra context info.""" if not self.token: - return msg + return super().__str__() - line, column = self.token.position() - return f"{msg}, line {line}, column {column}" + lineno, col, _prev, current, _next = self._error_context( + self.token.path, self.token.index + ) + + if self.token.kind == TOKEN_EOF: + col = len(current) + + pad = " " * len(str(lineno)) + length = len(self.token.value) + pointer = (" " * col) + ("^" * max(length, 1)) + + return ( + f"{self.message}\n" + f"{pad} -> {self.token.path!r} {lineno}:{col}\n" + f"{pad} |\n" + f"{lineno} | {current}\n" + f"{pad} | {pointer} {self.message}\n" + ) + + @property + def message(self) -> object: + """The exception's error message if one was given.""" + if self.args: + return self.args[0] + return None + + def _error_context(self, text: str, index: int) -> tuple[int, int, str, str, str]: + lines = text.splitlines(keepends=True) + cumulative_length = 0 + target_line_index = -1 + + for i, line in enumerate(lines): + cumulative_length += len(line) + if index < cumulative_length: + target_line_index = i + break + + if target_line_index == -1: + raise ValueError("index is out of bounds for the given string") + + # Line number (1-based) + line_number = target_line_index + 1 + # Column number within the line + column_number = index - (cumulative_length - len(lines[target_line_index])) + + previous_line = ( + lines[target_line_index - 1].rstrip() if target_line_index > 0 else "" + ) + current_line = lines[target_line_index].rstrip() + next_line = ( + lines[target_line_index + 1].rstrip() + if target_line_index < len(lines) - 1 + else "" + ) + + return line_number, column_number, previous_line, current_line, next_line class JSONPathSyntaxError(JSONPathError): diff --git a/jsonpath/parse.py b/jsonpath/parse.py index e504a43..6b6c684 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -675,19 +675,13 @@ def parse_infix_expression( return InfixExpression(left, operator, right) def parse_grouped_expression(self, stream: TokenStream) -> BaseExpression: - stream.eat(TOKEN_LPAREN) + _token = stream.eat(TOKEN_LPAREN) expr = self.parse_filter_expression(stream) while stream.current().kind != TOKEN_RPAREN: token = stream.current() - if token.kind == TOKEN_EOF: - raise JSONPathSyntaxError("unbalanced parentheses", token=token) - - if token.kind not in self.BINARY_OPERATORS: - raise JSONPathSyntaxError( - f"expected an expression, found '{token.value}'", - token=token, - ) + if token.kind in (TOKEN_EOF, TOKEN_RBRACKET): + raise JSONPathSyntaxError("unbalanced parentheses", token=_token) expr = self.parse_infix_expression(stream, expr) diff --git a/tests/test_errors.py b/tests/test_errors.py index 029a680..b39caab 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -22,7 +22,7 @@ def test_unclosed_selection_list(env: JSONPathEnvironment) -> None: def test_function_missing_param(env: JSONPathEnvironment) -> None: - with pytest.raises(JSONPathTypeError): + with pytest.raises(JSONPathTypeError, match=r"length\(\) requires 1 argument"): env.compile("$[?(length()==1)]") From efb0f7dc3ea8b718a03b0cb6218481c5cd8309de Mon Sep 17 00:00:00 2001 From: James Prior Date: Wed, 20 Aug 2025 14:48:10 +0100 Subject: [PATCH 20/29] Update docs WIP [skip ci] --- docs/advanced.md | 20 ---- docs/index.md | 10 +- docs/quickstart.md | 10 +- docs/syntax.md | 248 +++++++++++++++++++++++++++++++-------------- 4 files changed, 187 insertions(+), 101 deletions(-) diff --git a/docs/advanced.md b/docs/advanced.md index 0dbd761..966dfc3 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -257,23 +257,3 @@ env = MyJSONPathEnvironment() query = env.compile("$.users[999]") # jsonpath.exceptions.JSONPathIndexError: index out of range, line 1, column 8 ``` - -### Subclassing Lexer - -TODO: - -### Subclassing Parser - -TODO: - -### Get Item - -TODO: - -### Truthiness and Existence - -TODO: - -### Filter Infix Expressions - -TODO: diff --git a/docs/index.md b/docs/index.md index 6165583..c62b891 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,7 +2,7 @@ JSONPath is a mini language for selecting values from data formatted in JavaScript Object Notation, or equivalent Python objects, like dictionaries and lists. -Python JSONPath is a non-evaluating, read-only implementation of JSONPath, suitable for situations where JSONPath query authors are untrusted. We follow most of [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535). See [Notable differences](syntax.md#notable-differences) for a list of areas where we deviate from the standard. +Python JSONPath is a non-evaluating, read-only implementation of JSONPath, suitable for situations where JSONPath query authors are untrusted. We follow [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535) and test against the [JSONPath Compliance Test Suite](https://github.com/jsonpath-standard/jsonpath-compliance-test-suite). We also include implementations of [JSON Pointer](pointers.md) ([RFC 6901](https://datatracker.ietf.org/doc/html/rfc6901)) and [JSON Patch](api.md#jsonpath.JSONPatch) ([RFC 6902](https://datatracker.ietf.org/doc/html/rfc6902)), plus methods for converting a [JSONPathMatch](api.md#jsonpath.JSONPathMatch) to a `JSONPointer`. @@ -32,6 +32,14 @@ Or from [conda-forge](https://anaconda.org/conda-forge/python-jsonpath): conda install -c conda-forge python-jsonpath ``` +### Optional dependencies + +By default, and without any additional dependencies, the JSONPath syntax supported by Python JSONPath is **very close** to RFC 9535. For strict compatibility with RFC 9535, install [regex](https://pypi.org/project/regex/) and [iregexp-check](https://pypi.org/project/iregexp-check/) packages too. + +With these two packages installed, the [`match()`](functions.md#match) and [`search()`](functions.md#search) filter functions will use [regex](https://pypi.org/project/regex/) instead of `re` from the standard library, and will validate regular expression patterns against [RFC 9485](https://datatracker.ietf.org/doc/html/rfc9485). + +Aso see [strict mode](syntax.md#strict-mode) for more information about strict compatibility with RFC 9535. + ## Example ```python diff --git a/docs/quickstart.md b/docs/quickstart.md index bae1781..26aa486 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -4,14 +4,14 @@ This page gets you started using JSONPath, JSON Pointer and JSON Patch wih Pytho ## `findall(path, data)` -Find all values matching a JSONPath expression using [`jsonpath.findall()`](api.md#jsonpath.JSONPathEnvironment.findall). +Find all values matching a JSONPath query using [`jsonpath.findall()`](api.md#jsonpath.JSONPathEnvironment.findall). This function takes two arguments: -- `path`: a JSONPath expression as a string (e.g., `"$.users[*].name"`) +- `path`: a JSONPath query as a string (e.g., `"$.users[*].name"`) - `data`: the JSON document to query -It always returns a **list** of matched values, even if the path resolves to a single result or nothing at all. +It **always** returns a list of matched values, even if the path resolves to a single result or nothing at all. The `data` argument can be: @@ -65,7 +65,7 @@ with open("users.json") as fd: ## `finditer(path, data)` -Use [`jsonpath.finditer()`](api.md#jsonpath.JSONPathEnvironment.finditer) to iterate over instances of [`jsonpath.JSONPathMatch`](api.md#jsonpath.JSONPathMatch) for every object in _data_ that matches _path_. It accepts the same arguments as [`findall()`](#findallpath-data), a path string and data from which to select matches. +Use [`jsonpath.finditer()`](api.md#jsonpath.JSONPathEnvironment.finditer) to iterate over instances of [`jsonpath.JSONPathMatch`](api.md#jsonpath.JSONPathMatch) for every object in _data_ that matches _path_. It accepts the same arguments as [`findall()`](#findallpath-data), a query string and data from which to select matches. ```python import jsonpath @@ -109,7 +109,7 @@ The selected object is available from a [`JSONPathMatch`](api.md#jsonpath.JSONPa ## `compile(path)` -When you have a JSONPath that needs to be matched against different data repeatedly, you can _compile_ the path ahead of time using [`jsonpath.compile()`](api.md#jsonpath.JSONPathEnvironment.compile). It takes a path as a string and returns a [`JSONPath`](api.md#jsonpath.JSONPath) instance. `JSONPath` has `findall()` and `finditer()` methods that behave similarly to package-level `findall()` and `finditer()`, just without the `path` argument. +When you have a JSONPath query that needs to be matched against different data repeatedly, you can compile the path ahead of time using [`jsonpath.compile()`](api.md#jsonpath.JSONPathEnvironment.compile). It takes a query as a string and returns an instance of [`JSONPath`](api.md#jsonpath.JSONPath). `JSONPath` has `findall()` and `finditer()` methods that behave similarly to package-level `findall()` and `finditer()`, just without the `path` argument. ```python import jsonpath diff --git a/docs/syntax.md b/docs/syntax.md index 2aa7e10..97dad97 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -1,167 +1,261 @@ # JSONPath Syntax -Python JSONPath's default syntax is an opinionated combination of JSONPath features from existing, popular implementations and [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535). If you're already familiar with JSONPath syntax, skip to [notable differences](#notable-differences). +By default, Python JSONPath extends the RFC 9535 specification with a few additional features and relaxed rules, making it more forgiving in everyday use. If you need strict compliance with RFC 9535, you can enable strict mode, which enforces the standard without these extensions. In this guide, we first outline the standard syntax (see the specification for the formal definition), and then describe the non-standard extensions and their semantics in detail. -Imagine a JSON document as a tree structure, where each object (mapping) and array can contain more objects, arrays and scalar values. Every object, array and scalar value is a node in the tree, and the outermost object or array is the "root" node. +## JSONPath Terminology -For our purposes, a JSON "document" could be a file containing valid JSON data, a Python string containing valid JSON data, or a Python `Object` made up of dictionaries (or any [Mapping](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes)), lists (or any [Sequence](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes)), strings, etc. +Think of a JSON document as a tree, objects (mappings) and arrays can contain other objects, arrays, or scalar values. Each of these (object, array, or scalar) is a _node_ in the tree. The outermost object or array is called the _root_ node. -We chain _selectors_ together to retrieve nodes from the target document. Each selector operates on the nodes matched by preceding selectors. What follows is a description of those selectors. +In this guide, a JSON "document" may refer to: -## Selectors +- A file containing valid JSON text +- A Python string containing valid JSON text +- A Python object composed of dictionaries (or any [Mapping](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes)), lists (or any [Sequence](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes)), strings, numbers, booleans, or `None` -### Root (`$`) +A JSONPath expression (aka "query") is made up of a sequence of **segments**. Each segment contains one or more **selectors**: -`$` refers to the first node in the target document, be it an object or an array. Unless referencing the root node from inside a filter expression, `$` is optional. The following two examples are equivalent. +- A _segment_ corresponds to a step in the path from one set of nodes to the next. +- A _selector_ describes how to choose nodes within that step (for example, by name, by index, or by wildcard). -```text +What follows is a description of these selectors, starting with the standard ones defined in [RFC 9535](https://www.rfc-editor.org/rfc/rfc9535). + +## Standard selectors and identifiers + +### Root identifier (`$`) + +The root identifier, `$`, refers to the outermost node in the target document. This can be an object, an array, or a scalar value. + +A query containing only the root identifier simply returns the entire input document. + +#### Example query + +``` $.categories.*.name ``` -```text -categories.*.name +```json title="data" +{ + "categories": [ + { "id": 1, "name": "fiction" }, + { "id": 2, "name": "non-fiction" } + ] +} ``` -An empty path or a path containing just the root (`$`) selector returns the input data in its entirety. +```text title="results" +["fiction", "non-fiction"] +``` + +### Name selector (`.thing` or `['thing']`) -### Properties (`.thing`, `[thing]` or `['thing']`) +A _name selector_ matches the value of an object member by its key. You can write it in either **dot notation** (`.thing`) or **bracket notation** (`['thing']`). -Select nodes by property/key name using dot notation (`.something`) or bracket notation (`[something]`). If a target property/key contains reserved characters, it must use bracket notation and be enclosed in quotes (`['thing']`). +Dot notation is concise and preferred when the property name is a valid identifier. Bracket notation is required when the property name contains spaces, special characters, or starts with a number. -A dot in front of bracket notation is OK, but unnecessary. The following examples are equivalent. +#### Example query ```text -$.categories[0].name +$.book.title ``` -```text -$.categories[0][name] +```json title="data" +{ + "book": { + "title": "Moby Dick", + "author": "Herman Melville" + } +} ``` -```text -$.categories[0]['name'] +```text title="results" +["Moby Dick"] ``` -By default, `or`, `and`, `in`, `true`, `True`, `false`, `False`, `nil`, `Nil`, `null`, `Null`, `none`, `None`, `contains`, `undefined`, and `missing` are considered _reserved words_. In some cases you will need to use quoted property/name selector syntax if you're selecting a name that matches any of these words exactly. For example, `["and"]`. +### Index selector (`[0]` or `[-1]`) -### Array indices (`[0]` or `[-1]`) +Select an item from an array by its index. Indices are zero-based and enclosed in brackets. If the index is negative, items are selected from the end of the array. -Select an item from an array by its index. Indices are zero-based and enclosed in brackets. If the index is negative, items are selected from the end of the array. Considering example data from the top of this page, the following examples are equivalent. +#### Example query ```text -$.categories[0] +$.categories[0].name ``` -```text -$.categories[-1] +```json title="data" +{ + "categories": [ + { "id": 1, "name": "fiction" }, + { "id": 2, "name": "non-fiction" } + ] +} ``` -### Wildcard (`.*` or `[*]`) +```text title="results" +["fiction"] +``` -Select all elements from an array or all values from an object using `*`. These two examples are equivalent. +### Wildcard selector (`.*` or `[*]`) + +A _wildcard selector_ matches all member values of an object or all items in an array. It can be written as `.*` (dot notation) or `[*]` (bracket notation). + +#### Example query ```text -$.categories[0].products.* +$.categories[*].name ``` -```text -$.categories[0].products[*] +```json title="data" +{ + "categories": [ + { "id": 1, "name": "fiction" }, + { "id": 2, "name": "non-fiction" } + ] +} ``` -### Keys (`.~` or `[~]`) +```text title="results" +["fiction", "non-fiction"] +``` -**_New in version 0.6.0_** +### Slice selector (`[start:end:step]`) -Select keys/properties from an object using `~`. +The slice selector allows you to select a range of items from an array. You can specify a starting index, an ending index (exclusive), and an optional step to skip elements. Negative indices count from the end of the array, just like standard Python slicing. + +#### Example query ```text -$.categories.~ +$.items[1:4:2] ``` -```text -$.categories[~] +```json title="data" +{ + "items": ["a", "b", "c", "d", "e", "f"] +} +``` + +```text title="results" +["b", "d"] ``` -### Slices (`[0:-1]` or `[-1:0:-1]`) +### Filter selector (`[?expression]`) -Select a range of elements from an array using slice notation. The start index, stop index and step are all optional. These examples are equivalent. +Filters allow you to remove nodes from a selection based on a Boolean expression. A filter expression evaluates each node in the context of either the root (`$`) or the current node (`@`). -```text -$.categories[0:] -``` +When filtering a mapping-like object, `@` identifies the current member value. When filtering a sequence-like object, `@` identifies the current item. + +Comparison operators include `==`, `!=`, `<`, `>`, `<=`, and `>=`. Logical operators `&&` (and) and `||` (or) can combine terms, and parentheses can be used to group expressions. + +A filter expression on its own - without a comparison - is treated as an existence test. + +#### Example query ```text -$.categories[0:-1:] +$..products[?(@.price < $.price_cap)] ``` -```text -$.categories[0:-1:1] +```json title="data" +{ + "price_cap": 10, + "products": [ + { "name": "apple", "price": 5 }, + { "name": "orange", "price": 12 }, + { "name": "banana", "price": 8 } + ] +} ``` -```text -$.categories[::] +```text title="results" +[ + {"name": "apple", "price": 5}, + {"name": "banana", "price": 8} +] ``` -### Lists (`[1, 2, 10:20]`) +Filter expressions can also call predefined [function extensions](functions.md). -Select multiple indices, slices or properties using list notation (sometimes known as a "union" or "segment", we use "union" to mean something else). +## More on segments -```text -$..products.*.[title, price] -``` +So far we've seen shorthand notation and segments with just one selector. Here we cover the descendant segment and segments with multiple selectors. -### Recursive descent (`..`) +### Segments with multiple selectors -The `..` selector visits every node beneath the current selection. If a property selector, using dot notation, follows `..`, the dot is optional. These two examples are equivalent. +A segment can include multiple selectors separated by commas and enclosed in square brackets (`[...]`). Any valid selector (names, indices, slices, filters, or wildcards) can appear in the list. + +#### Example query ```text -$..title +$.store.book[0,2] ``` -```text -$...title +```json title="data" +{ + "store": { + "book": [ + { "title": "Book A", "price": 10 }, + { "title": "Book B", "price": 12 }, + { "title": "Book C", "price": 8 } + ] + } +} ``` -### Filters (`[?EXPRESSION]`) +```text title="results" +[ + {"title": "Book A", "price": 10}, + {"title": "Book C", "price": 8} +] +``` -Filters allow you to remove nodes from a selection using a Boolean expression. A _filter query_ is a JSONPath query nested within a filter expression. Every filter query must start with the root identifier (`$`), the current node identifier (`@`) or the [filter context](advanced.md#filter-variables) identifier (`_`). +### Descendant segment (`..`) -```text -$..products[?(@.price < $.price_cap)] -``` +The descendant segment (`..`) visits all object member values and array elements under the current object or array, applying the selector or selectors that follow to each visited node. It can be followed by any valid shorthand selector (names, wildcards, etc.) or a bracketed list of one or more selectors, making it highly flexible for querying nested structures. + +#### Example query ```text -$..products[?@.price < $.price_cap] +$..price ``` -When filtering a mapping-like object, `#` references the current key/property and `@` references the current value associated with `#`. When filtering a sequence-like object, `@` references the current item and `#` will hold the item's index in the sequence. +```json title="data" +{ + "store": { + "book": [ + { "title": "Book A", "price": 10 }, + { "title": "Book B", "price": 12 } + ], + "bicycle": { "color": "red", "price": 19.95 } + } +} +``` -Comparison operators include `==`, `!=`, `<`, `>`, `<=` and `>=`. Plus `<>` as an alias for `!=`. +```text title="results" +[10, 12, 19.95] +``` -`in` and `contains` are membership operators. `left in right` is equivalent to `right contains left`. +## Non-standard selectors and identifiers -`&&` and `||` are logical operators and terms can be grouped with parentheses. `and` and `or` work too. +TODO: -`=~` matches the left value with a regular expression literal. Regular expressions use a syntax similar to that found in JavaScript, where the pattern to match is surrounded by slashes, optionally followed by flags. +### Keys (`.~` or `[~]`) -```text -$..products[?(@.description =~ /.*trainers/i)] -``` +**_New in version 0.6.0_** -A filter query on its own - one that is not part of a comparison expression - is an existence test. We also support comparing a filter query to the special `undefined` keyword. These two example are equivalent. +Select keys/properties from an object using `~`. ```text -$..products[?!@.sale_price] +$.categories.~ ``` ```text -$..products[?@.sale_price == undefined] +$.categories[~] ``` -Filter expressions can call predefined [function extensions](functions.md) too. +### Lists (`[1, 2, 10:20]`) + +Select multiple indices, slices or properties using list notation (sometimes known as a "union" or "segment", we use "union" to mean something else). ```text -$.categories[?count(@.products.*) >= 2] +$..products.*.[title, price] ``` ### Fake root (`^`) @@ -174,6 +268,10 @@ This non-standard "fake root" identifier behaves like the standard root identifi ^[?length(categories) > 0] ``` +## Non-standard operators + +TODO + ### Union (`|`) and intersection (`&`) Union (`|`) and intersection (`&`) are similar to Python's set operations, but we don't dedupe the matches (matches will often contain unhashable objects). From dd37e3da16ab17473a09ab43f2e1456b5be86ab3 Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 21 Aug 2025 08:24:37 +0100 Subject: [PATCH 21/29] Syntax docs WIP [skip ci] --- docs/syntax.md | 295 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 259 insertions(+), 36 deletions(-) diff --git a/docs/syntax.md b/docs/syntax.md index 97dad97..3931dbd 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -30,7 +30,7 @@ A query containing only the root identifier simply returns the entire input docu #### Example query ``` -$.categories.*.name +$ ``` ```json title="data" @@ -42,15 +42,22 @@ $.categories.*.name } ``` -```text title="results" -["fiction", "non-fiction"] +```json title="results" +[ + { + "categories": [ + { "id": 1, "name": "fiction" }, + { "id": 2, "name": "non-fiction" } + ] + } +] ``` ### Name selector (`.thing` or `['thing']`) -A _name selector_ matches the value of an object member by its key. You can write it in either **dot notation** (`.thing`) or **bracket notation** (`['thing']`). +A _name selector_ matches the value of an object member by its key. You can write it in either **shorthand notation** (`.thing`) or **bracket notation** (`['thing']`). -Dot notation is concise and preferred when the property name is a valid identifier. Bracket notation is required when the property name contains spaces, special characters, or starts with a number. +Dot notation can be used when the property name is a valid identifier. Bracket notation is required when the property name contains spaces, special characters, or starts with a number. #### Example query @@ -67,13 +74,13 @@ $.book.title } ``` -```text title="results" +```json title="results" ["Moby Dick"] ``` ### Index selector (`[0]` or `[-1]`) -Select an item from an array by its index. Indices are zero-based and enclosed in brackets. If the index is negative, items are selected from the end of the array. +Select an element from an array by its index. Indices are zero-based and enclosed in brackets. If the index is negative, items are selected from the end of the array. #### Example query @@ -90,13 +97,13 @@ $.categories[0].name } ``` -```text title="results" +```json title="results" ["fiction"] ``` ### Wildcard selector (`.*` or `[*]`) -A _wildcard selector_ matches all member values of an object or all items in an array. It can be written as `.*` (dot notation) or `[*]` (bracket notation). +The _wildcard selector_ matches all member values of an object or all elements in an array. It can be written as `.*` (shorthand notation) or `[*]` (bracket notation). #### Example query @@ -113,13 +120,13 @@ $.categories[*].name } ``` -```text title="results" +```json title="results" ["fiction", "non-fiction"] ``` ### Slice selector (`[start:end:step]`) -The slice selector allows you to select a range of items from an array. You can specify a starting index, an ending index (exclusive), and an optional step to skip elements. Negative indices count from the end of the array, just like standard Python slicing. +The slice selector allows you to select a range of elements from an array. You can specify a starting index, an ending index (exclusive), and an optional step to skip elements. Negative indices count from the end of the array, just like standard Python slicing. #### Example query @@ -133,7 +140,7 @@ $.items[1:4:2] } ``` -```text title="results" +```json title="results" ["b", "d"] ``` @@ -141,7 +148,7 @@ $.items[1:4:2] Filters allow you to remove nodes from a selection based on a Boolean expression. A filter expression evaluates each node in the context of either the root (`$`) or the current node (`@`). -When filtering a mapping-like object, `@` identifies the current member value. When filtering a sequence-like object, `@` identifies the current item. +When filtering a mapping-like object, `@` identifies the current member value. When filtering a sequence-like object, `@` identifies the current element. Comparison operators include `==`, `!=`, `<`, `>`, `<=`, and `>=`. Logical operators `&&` (and) and `||` (or) can combine terms, and parentheses can be used to group expressions. @@ -164,10 +171,10 @@ $..products[?(@.price < $.price_cap)] } ``` -```text title="results" +```json title="results" [ - {"name": "apple", "price": 5}, - {"name": "banana", "price": 8} + { "name": "apple", "price": 5 }, + { "name": "banana", "price": 8 } ] ``` @@ -199,16 +206,16 @@ $.store.book[0,2] } ``` -```text title="results" +```json title="results" [ - {"title": "Book A", "price": 10}, - {"title": "Book C", "price": 8} + { "title": "Book A", "price": 10 }, + { "title": "Book C", "price": 8 } ] ``` ### Descendant segment (`..`) -The descendant segment (`..`) visits all object member values and array elements under the current object or array, applying the selector or selectors that follow to each visited node. It can be followed by any valid shorthand selector (names, wildcards, etc.) or a bracketed list of one or more selectors, making it highly flexible for querying nested structures. +The descendant segment (`..`) visits all object member values and array elements under the current object or array, applying the selector or selectors that follow to each visited node. It must be followed by a shorthand selector (names, wildcards, etc.) or a bracketed list of one or more selectors. #### Example query @@ -228,50 +235,266 @@ $..price } ``` -```text title="results" +```json title="results" [10, 12, 19.95] ``` ## Non-standard selectors and identifiers -TODO: +The selectors and identifiers described in this section are an extension to RFC 9535. They are enabled by default. See [#strict-mode] for details on how to use JSONPath following RFC 9535 strictly. + +### Key selector + +**_New in version 2.0.0_** + +The key selector, `.~name` or `[~'name']`, selects at most one name from an object member. It is syntactically similar to the standard [name selector](https://datatracker.ietf.org/doc/html/rfc9535#name-name-selector), with the addition of a tilde (`~`) prefix. + +When applied to a JSON object, the key selector selects the _name_ from an object member, if that name exists, or nothing if it does not exist. This complements the standard name selector, which select the _value_ from a name/value pair. + +When applied to an array or primitive value, the key selector selects nothing. + +Key selector strings must follow the same processing semantics as name selector strings, as described in [section 2.3.2.1](https://datatracker.ietf.org/doc/html/rfc9535#section-2.3.1.2) of RFC 9535. + +!!! info + + The key selector is introduced to facilitate valid normalized paths for nodes produced by the [keys selector](#keys-selector) and the [keys filter selector](#keys-filter-selector). I don't expect it will be of much use elsewhere. -### Keys (`.~` or `[~]`) +#### Syntax + +``` +selector = name-selector / + wildcard-selector / + slice-selector / + index-selector / + filter-selector / + key-selector / + keys-selector / + keys-filter-selector + +key-selector = "~" name-selector + +child-segment = bracketed-selection / + ("." + (wildcard-selector / + member-name-shorthand / + member-key-shorthand)) + +descendant-segment = ".." (bracketed-selection / + wildcard-selector / + member-name-shorthand / + member-key-shorthand) + +member-key-shorthand = "~" name-first *name-char +``` + +#### Examples + +```json title="Example JSON document" +{ + "a": [{ "b": "x", "c": "z" }, { "b": "y" }] +} +``` + +| Query | Result | Result Paths | Comment | +| ----------- | ----------------- | ----------------------------------------- | ----------------------------- | +| `$.a[0].~c` | `"c"` | `$['a'][0][~'c']` | Key of nested object | +| `$.a[1].~c` | | | Key does not exist | +| `$..[~'b']` | `"b"`
`"b"` | `$['a'][0][~'b']`
`$['a'][1][~'b']` | Descendant, single quoted key | +| `$..[~"b"]` | `"b"`
`"b"` | `$['a'][0][~'b']`
`$['a'][1][~'b']` | Descendant, double quoted key | + +### Keys selector **_New in version 0.6.0_** -Select keys/properties from an object using `~`. +The keys selector, `~` or `[~]`, selects all names from an object’s name/value members. This complements the standard [wildcard selector](https://datatracker.ietf.org/doc/html/rfc9535#name-wildcard-selector), which selects all values from an object’s name/value pairs. + +As with the wildcard selector, the order of nodes resulting from a keys selector is not stipulated. + +When applied to an array or primitive value, the keys selector selects nothing. + +The normalized path of a node selected using the keys selector uses [key selector](#key-selector) syntax. + +#### Syntax -```text -$.categories.~ +``` +keys-selector = "~" ``` -```text -$.categories[~] +#### Examples + +```json title="Example JSON document" +{ + "a": [{ "b": "x", "c": "z" }, { "b": "y" }] +} ``` -### Lists (`[1, 2, 10:20]`) +| Query | Result | Result Paths | Comment | +| -------------- | ----------------------------------------- | ----------------------------------------------------------------------------------------- | -------------------------- | +| `$.a[0].~` | `"b"`
`"c"` | `$['a'][0][~'b']`
`$['a'][0][~'c']` | Object keys | +| `$.a.~` | | | Array keys | +| `$.a[0][~, ~]` | `"b"`
`"c"`
`"c"`
`"b"` | `$['a'][0][~'b']`
`$['a'][0][~'c']`
`$['a'][0][~'c']`
`$['a'][0][~'b']` | Non-deterministic ordering | +| `$..[~]` | `"a"`
`"b"`
`"c"`
`"b"` | `$[~'a']`
`$['a'][0][~'b']`
`$['a'][0][~'c']`
`$['a'][1][~'b']` | Descendant keys | -Select multiple indices, slices or properties using list notation (sometimes known as a "union" or "segment", we use "union" to mean something else). +### Keys filter selector -```text -$..products.*.[title, price] +**_New in version 2.0.0_** + +The keys filter selector selects names from an object’s name/value members. It is syntactically similar to the standard [filter selector](https://datatracker.ietf.org/doc/html/rfc9535#name-filter-selector), with the addition of a tilde (`~`) prefix. + +``` +~? +``` + +Whereas the standard filter selector will produce a node for each _value_ from an object’s name/value members - when its expression evaluates to logical true - the keys filter selector produces a node for each _name_ in an object’s name/value members. + +Logical expression syntax and semantics otherwise match that of the standard filter selector. `@` still refers to the current member value. See also the [current key identifier](#current-key-identifier). + +When applied to an array or primitive value, the keys filter selector selects nothing. + +The normalized path of a node selected using the keys filter selector uses [key selector](#key-selector) syntax. + +#### Syntax + +``` +filter-selector = "~?" S logical-expr +``` + +#### Examples + +```json title="Example JSON document" +[{ "a": [1, 2, 3], "b": [4, 5] }, { "c": { "x": [1, 2] } }, { "d": [1, 2, 3] }] +``` + +| Query | Result | Result Paths | Comment | +| ---------------------- | ----------------- | ------------------------------- | -------------------------------- | +| `$.*[~?length(@) > 2]` | `"a"`
`"d"` | `$[0][~'a']`
`$[2][~'d']` | Conditionally select object keys | +| `$.*[~?@.x]` | `"c"` | `$[1][~'c']` | Existence test | +| `$[~?(true == true)]` | | | Keys from an array | + +### Singular query selector + +The singular query selector consist of an embedded absolute singular query, the result of which is used as an object member name or array element index. + +If the embedded query resolves to a string or int value, at most one object member value or array element value is selected. Otherwise the singular query selector selects nothing. + +#### Syntax + +``` +selector = name-selector / + wildcard-selector / + slice-selector / + index-selector / + filter-selector / + singular-query-selector + +singular-query-selector = abs-singular-query +``` + +#### Examples + +```json +{ + "a": { + "j": [1, 2, 3], + "p": { + "q": [4, 5, 6] + } + }, + "b": ["j", "p", "q"], + "c d": { + "x": { + "y": 1 + } + } +} +``` + +| Query | Result | Result Path | Comment | +| --------------------- | ------------------ | ---------------- | ----------------------------------------------------------------- | +| `$.a[$.b[1]]` | `{"q": [4, 5, 6]}` | `$['a']['p']` | Object name from embedded singular query | +| `$.a.j[$['c d'].x.y]` | `2` | `$['a']['j'][1]` | Array index from embedded singular query | +| `$.a[$.b]` | | | Embedded singular query does not resolve to a string or int value | + +### Current key identifier + +`#` is the _current key_ identifier. `#` will be the name of the current object member, or index of the current array element. This complements the current node identifier (`@`), which refers to a member value or array element, respectively. + +It is a syntax error to follow the current key identifier with segments, as if it were a filter query. + +When used as an argument to a function, the current key is of `ValueType`, and outside a function call it must be compared. + +#### Syntax + +``` +comparable = literal / + singular-query / ; singular query value + function-expr / ; ValueType + current-key-identifier + + +function-argument = literal / + filter-query / ; (includes singular-query) + logical-expr / + function-expr / + current-key-identifier + +current-key-identifier = "#" +``` + +#### Examples + +```json title="Example JSON document" +{ "abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": [] } ``` -### Fake root (`^`) +| Query | Result | Result Path | Comment | +| ----------------------------------------- | --------------------- | --------------------------------- | --------------------------- | +| `$[?match(#, '^ab.*') && length(@) > 0 ]` | `[1,2,3]`
`[6]` | `$['abc']`
`$['abx']` | Match on object names | +| `$.abc[?(# >= 1)]` | `2`
`3` | `$['abc'][1]`
`$['abc'][2]` | Compare current array index | + +### Pseudo root identifier **_New in version 0.11.0_** -This non-standard "fake root" identifier behaves like the standard root identifier (`$`), but wraps the target JSON document in a single-element array, so as to make it selectable with a filter selector. +The pseudo root identifier (`^`) behaves like the standard root identifier (`$`), but conceptually wraps the target JSON document in a single-element array. This allows the root document itself to be addressed by selectors such as filters, which normally only apply to elements within arrays. -```text -^[?length(categories) > 0] +#### Syntax + +``` +jsonpath-query = (root-identifier / pseudo-root-identifier) segments + +root-identifier = "$" +pseudo-root-identifier = "^" ``` +#### Examples + +TODO + +### Filter context identifier + +The filter context identifier (`_`) starts an embedded query, similar to the root identifier (`$`) and current node identifier (`@`), but targets JSON-like data passed as the `filter_context` argument to [`findall()`](api.md#jsonpath.JSONPath.findall) and [`finditer()`](api.md#jsonpath.JSONPath.finditer). + +#### Syntax + +TODO + +#### Examples + +TODO + ## Non-standard operators TODO +### Lists (`[1, 2, 10:20]`) + +Select multiple indices, slices or properties using list notation (sometimes known as a "union" or "segment", we use "union" to mean something else). + +```text +$..products.*.[title, price] +``` + ### Union (`|`) and intersection (`&`) Union (`|`) and intersection (`&`) are similar to Python's set operations, but we don't dedupe the matches (matches will often contain unhashable objects). From cfa891c71673ccf6c62ff53aa92c06122c25094f Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 21 Aug 2025 21:10:15 +0100 Subject: [PATCH 22/29] More docs [skip ci] --- docs/singular_query_selector.md | 43 ---------- docs/syntax.md | 146 ++++++++++++++++++++++---------- 2 files changed, 100 insertions(+), 89 deletions(-) delete mode 100644 docs/singular_query_selector.md diff --git a/docs/singular_query_selector.md b/docs/singular_query_selector.md deleted file mode 100644 index 3d6ce67..0000000 --- a/docs/singular_query_selector.md +++ /dev/null @@ -1,43 +0,0 @@ -# Singular Query Selector - -The singular query selector consist of an embedded absolute singular query, the result of which is used as an object member name or array element index. - -If the embedded query resolves to a string or int value, at most one object member value or array element value is selected. Otherwise the singular query selector selects nothing. - -## Syntax - -``` -selector = name-selector / - wildcard-selector / - slice-selector / - index-selector / - filter-selector / - singular-query-selector - -singular-query-selector = abs-singular-query -``` - -## Examples - -```json -{ - "a": { - "j": [1, 2, 3], - "p": { - "q": [4, 5, 6] - } - }, - "b": ["j", "p", "q"], - "c d": { - "x": { - "y": 1 - } - } -} -``` - -| Query | Result | Result Path | Comment | -| --------------------- | ------------------ | ---------------- | ----------------------------------------------------------------- | -| `$.a[$.b[1]]` | `{"q": [4, 5, 6]}` | `$['a']['p']` | Object name from embedded singular query | -| `$.a.j[$['c d'].x.y]` | `2` | `$['a']['j'][1]` | Array index from embedded singular query | -| `$.a[$.b]` | | | Embedded singular query does not resolve to a string or int value | diff --git a/docs/syntax.md b/docs/syntax.md index 3931dbd..d350c69 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -1,6 +1,6 @@ # JSONPath Syntax -By default, Python JSONPath extends the RFC 9535 specification with a few additional features and relaxed rules, making it more forgiving in everyday use. If you need strict compliance with RFC 9535, you can enable strict mode, which enforces the standard without these extensions. In this guide, we first outline the standard syntax (see the specification for the formal definition), and then describe the non-standard extensions and their semantics in detail. +By default, Python JSONPath extends the RFC 9535 specification with a few additional features and relaxed rules. If you need strict compliance with RFC 9535, you can enable strict mode, which enforces the standard without these extensions. In this guide, we first outline the standard syntax (see the specification for the formal definition), and then describe the non-standard extensions and their semantics in detail. ## JSONPath Terminology @@ -241,7 +241,7 @@ $..price ## Non-standard selectors and identifiers -The selectors and identifiers described in this section are an extension to RFC 9535. They are enabled by default. See [#strict-mode] for details on how to use JSONPath following RFC 9535 strictly. +The selectors and identifiers described in this section are an extension to RFC 9535. They are enabled by default. See [#strict-mode] for details on how to use JSONPath without these extensions. ### Key selector @@ -373,6 +373,8 @@ filter-selector = "~?" S logical-expr ### Singular query selector +**_New in version 2.0.0_** + The singular query selector consist of an embedded absolute singular query, the result of which is used as an object member name or array element index. If the embedded query resolves to a string or int value, at most one object member value or array element value is selected. Otherwise the singular query selector selects nothing. @@ -456,7 +458,7 @@ current-key-identifier = "#" **_New in version 0.11.0_** -The pseudo root identifier (`^`) behaves like the standard root identifier (`$`), but conceptually wraps the target JSON document in a single-element array. This allows the root document itself to be addressed by selectors such as filters, which normally only apply to elements within arrays. +The pseudo root identifier (`^`) behaves like the standard root identifier (`$`), but conceptually wraps the target JSON document in a single-element array. This allows the root document itself to be conditionally selected by filters. #### Syntax @@ -469,7 +471,14 @@ pseudo-root-identifier = "^" #### Examples -TODO +```json title="Example JSON data" +{ "a": { "b": 42 }, "n": 7 } +``` + +| Query | Result | Result Path | Comment | +| -------------------------- | ------------------------------ | ----------- | ----------------------------------- | +| `^[?@.a.b > 7]` | `{ "a": { "b": 42 } }` | `^[0]` | Conditionally select the root value | +| `^[?@.a.v > value(^.*.n)]` | `{ "a": { "b": 42 }, "n": 7 }` | `^[0]` | Embedded pseudo root query | ### Filter context identifier @@ -477,23 +486,96 @@ The filter context identifier (`_`) starts an embedded query, similar to the roo #### Syntax -TODO +``` +current-node-identifier = "@" +extra-context-identifier = "_" + +filter-query = rel-query / extra-context-query / jsonpath-query +rel-query = current-node-identifier segments +extra-context-query = extra-context-identifier segments + +singular-query = rel-singular-query / abs-singular-query / extra-context-singular-query +rel-singular-query = current-node-identifier singular-query-segments +abs-singular-query = root-identifier singular-query-segments + +extra-context-singular-query = extra-context-identifier singular-query-segments +``` #### Examples -TODO +```json title="Example JSON data" +{ "a": [{ "b": 42 }, { "b": 3 }] } +``` + +```json title="Extra JSON data" +{ "c": 42 } +``` + +| Query | Result | Result Path | Comment | +| ------------------ | ------------- | ----------- | -------------------------------------------- | +| `$.a[?@.b == _.c]` | `{ "b": 42 }` | `$['a'][0]` | Comparison with extra context singular query | ## Non-standard operators -TODO +In addition to the operators described below, the standard _logical and_ operator (`&&`) is aliased as `and`, the standard _logical or_ operator (`||`) is aliased as `or`, and `null` is aliased as `nil` and `none`. -### Lists (`[1, 2, 10:20]`) +Also, `true`, `false`, `null` and their aliases can start with an upper case letter. -Select multiple indices, slices or properties using list notation (sometimes known as a "union" or "segment", we use "union" to mean something else). +### Membership operators + +The membership operators test whether one value occurs within another. + +An infix expression using `contains` evaluates to true if the right-hand side is a member of the left-hand side, and false otherwise. + +- If the left-hand side is an object and the right-hand side is a string, the result is true if the object has a member with that name. +- If the left-hand side is an array, the result is true if any element of the array is equal to the right-hand side. +- For scalars (strings, numbers, booleans, null), `contains` always evaluates to false. + +The `in` operator is equivalent to `contains` with operands reversed. This makes `contains` and `in` symmetric, so either form may be used depending on which reads more naturally in context. + +A list literal is a comma separated list of JSONPath expression literals. List should appear on the left-hand side of `contains` or the right-hand side of `in`. + +#### Syntax -```text -$..products.*.[title, price] ``` +basic-expr = paren-expr / + comparison-expr / + membership-expr / + test-expr + +membership-expr = comparable S membership-op S comparable + +membership-operator = "contains" / "in" + +membership-operand = literal / + singular-query / ; singular query value + function-expr / ; ValueType + list-literal + +list-literal = "[" S literal *(S "," S literal) S "]" +``` + +#### Examples + +```json title="Example JSON data" +{ + "x": [{ "a": ["foo", "bar"] }, { "a": ["bar"] }], + "y": [{ "a": { "foo": "bar" } }, { "a": { "bar": "baz" } }], + "z": [{ "a": "foo" }, { "a": "bar" }] +} +``` + +| Query | Result | Result Path | Comment | +| ------------------------------------- | ----------------------- | ----------- | ------------------------------------ | +| `$.x[?@.a contains 'foo']` | `{"a": ["foo", "bar"]}` | `$['x'][0]` | Array contains string literal | +| `$.y[?@.a contains 'foo']` | `{"a": ["foo", "bar"]}` | `$['y'][0]` | Object contains string literal | +| `$.x[?'foo' in @.a]` | `{"a": ["foo", "bar"]}` | `$['x'][0]` | String literal in array | +| `$.y[?'foo' in @.a]` | `{"a": ["foo", "bar"]}` | `$['y'][0]` | String literal in object | +| `$.z[?(['bar', 'baz'] contains @.a)]` | `{"a": "bar"}` | `$['z'][1]` | List literal contains embedded query | + +### The regex operator + +TODO ### Union (`|`) and intersection (`&`) @@ -513,38 +595,10 @@ $.categories[?(@.name == 'footwear')].products.* & $.categories[?(@.name == 'hea Note that `|` and `&` are not allowed inside filter expressions. -## Notable differences - -This is a list of things that you might find in other JSONPath implementation that we don't support (yet). - -- We don't support extension functions of the form `selector.func()`. -- We always return a list of matches from `jsonpath.findall()`, never a scalar value. -- We do not support arithmetic in filter expression. -- We don't allow dotted array indices. An array index must be surrounded by square brackets. -- Python JSONPath is strictly read only. There are no update "selectors", but we do provide methods for converting `JSONPathMatch` instances to `JSONPointer`s, and a `JSONPatch` builder API for modifying JSON-like data structures using said pointers. - -And this is a list of areas where we deviate from [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535). See [jsonpath-rfc9535](https://github.com/jg-rp/python-jsonpath-rfc9535) for an alternative implementation of JSONPath that does not deviate from RFC 9535. - -- The root token (default `$`) is optional and paths starting with a dot (`.`) are OK. `.thing` is the same as `$.thing`, as is `thing`, `$[thing]` and `$["thing"]`. -- The built-in `match()` and `search()` filter functions use Python's standard library `re` module, which, at least, doesn't support Unicode properties. We might add an implementation of `match()` and `search()` using the third party [regex](https://pypi.org/project/regex/) package in the future. -- We don't check `match()` and `search()` regex arguments against RFC 9485. Any valid Python pattern is allowed. -- We don't require property names to be quoted inside a bracketed selection, unless the name contains reserved characters. -- We don't require the recursive descent segment to have a selector. `$..` is equivalent to `$..*`. -- We support explicit comparisons to `undefined` as well as implicit existence tests. -- Float literals without a fractional digit are OK or leading digit. `1.` is equivalent to `1.0`. -- We treat literals (such as `true` and `false`) as valid "basic" expressions. For example, `$[?true || false]`, without an existence test or comparison either side of logical _or_, does not raise a syntax error. -- By default, `and` is equivalent to `&&` and `or` is equivalent to `||`. -- `none` and `nil` are aliases for `null`. -- `null` (and its aliases), `true` and `false` can start with an upper or lower case letter. -- We don't treat some invalid `\u` escape sequences in quoted name selectors and string literals as an error. We match the behavior of the JSON decoder in Python's standard library, which is less strict than RFC 9535. - -And this is a list of features that are uncommon or unique to Python JSONPath. - -- We support membership operators `in` and `contains`, plus list/array literals. -- `|` is a union operator, where matches from two or more JSONPaths are combined. This is not part of the Python API, but built-in to the JSONPath syntax. -- `&` is an intersection operator, where we exclude matches that don't exist in both left and right paths. This is not part of the Python API, but built-in to the JSONPath syntax. -- `#` is the current key/property or index identifier when filtering a mapping or sequence. -- `_` is a filter context identifier. With usage similar to `$` and `@`, `_` exposes arbitrary data from the `filter_context` argument to `findall()` and `finditer()`. -- `~` is a "keys" or "properties" selector. -- `^` is a "fake root" identifier. It is equivalent to `$`, but wraps the target JSON document in a single-element array, so the root value can be conditionally selected with a filter selector. -- `=~` is the the regex match operator, matching a value to a JavaScript-style regex literal. +## Other differences + +This is a list of areas where Python JSONPath is more relaxed than [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535). + +- The root token (`$`) is optional and paths starting with a dot (`.`) are OK. `.thing` is the same as `$.thing`, as is `thing` and `$["thing"]`. +- Leading and trailing whitespace is OK. +- We support explicit comparisons to `undefined` (aka `missing`) as well as implicit existence tests. From 6ce68d2ff82efb2d90ee6b8420e2cde8e01eac43 Mon Sep 17 00:00:00 2001 From: James Prior Date: Fri, 22 Aug 2025 08:31:09 +0100 Subject: [PATCH 23/29] Add a `strict` argument to convenience functions --- jsonpath/__init__.py | 298 +++++++++++++++++++++++++++++++++- jsonpath/_types.py | 31 ++++ jsonpath/cli.py | 2 + jsonpath/env.py | 16 +- jsonpath/path.py | 68 ++------ tests/test_convenience_api.py | 1 + 6 files changed, 347 insertions(+), 69 deletions(-) create mode 100644 jsonpath/_types.py create mode 100644 tests/test_convenience_api.py diff --git a/jsonpath/__init__.py b/jsonpath/__init__.py index 2604d4a..31fd4a9 100644 --- a/jsonpath/__init__.py +++ b/jsonpath/__init__.py @@ -1,7 +1,18 @@ # SPDX-FileCopyrightText: 2023-present James Prior # # SPDX-License-Identifier: MIT +from __future__ import annotations +from typing import TYPE_CHECKING +from typing import AsyncIterable +from typing import Iterable +from typing import List +from typing import Optional +from typing import Union + +from ._types import JSON +from ._types import JSONData +from ._types import JSONScalar from .env import JSONPathEnvironment from .exceptions import JSONPatchError from .exceptions import JSONPatchTestFailure @@ -32,6 +43,10 @@ from .pointer import RelativeJSONPointer from .pointer import resolve +if TYPE_CHECKING: + from .match import FilterContextVars + + __all__ = ( "compile", "CompoundJSONPath", @@ -68,16 +83,283 @@ "RelativeJSONPointerIndexError", "RelativeJSONPointerSyntaxError", "resolve", + "JSON", + "JSONData", + "JSONScalar", "UNDEFINED", ) -# For convenience +# For convenience and to delegate to strict or non-strict environments. DEFAULT_ENV = JSONPathEnvironment() -compile = DEFAULT_ENV.compile # noqa: A001 -findall = DEFAULT_ENV.findall -findall_async = DEFAULT_ENV.findall_async -finditer = DEFAULT_ENV.finditer -finditer_async = DEFAULT_ENV.finditer_async -match = DEFAULT_ENV.match -query = DEFAULT_ENV.query +STRICT_ENV = JSONPathEnvironment(strict=True) + + +def compile(path: str, *, strict: bool = False) -> Union[JSONPath, CompoundJSONPath]: # noqa: A001 + """Prepare a path string ready for repeated matching against different data. + + Arguments: + path: A JSONPath as a string. + strict: When `True`, compile the path for strict compliance with RFC 9535. + + Returns: + A `JSONPath` or `CompoundJSONPath`, ready to match against some data. + Expect a `CompoundJSONPath` if the path string uses the _union_ or + _intersection_ operators. + + Raises: + JSONPathSyntaxError: If _path_ is invalid. + JSONPathTypeError: If filter functions are given arguments of an + unacceptable type. + """ + return STRICT_ENV.compile(path) if strict else DEFAULT_ENV.compile(path) + + +def findall( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> List[object]: + """Find all objects in _data_ matching the JSONPath _path_. + + If _data_ is a string or a file-like objects, it will be loaded + using `json.loads()` and the default `JSONDecoder`. + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + A list of matched objects. If there are no matches, the list will + be empty. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + STRICT_ENV.findall(path, data, filter_context=filter_context) + if strict + else DEFAULT_ENV.findall(path, data, filter_context=filter_context) + ) + + +async def findall_async( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> List[object]: + """Find all objects in _data_ matching the JSONPath _path_. + + If _data_ is a string or a file-like objects, it will be loaded + using `json.loads()` and the default `JSONDecoder`. + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + A list of matched objects. If there are no matches, the list will + be empty. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + await STRICT_ENV.findall_async(path, data, filter_context=filter_context) + if strict + else await DEFAULT_ENV.findall_async(path, data, filter_context=filter_context) + ) + + +def finditer( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> Iterable[JSONPathMatch]: + """Generate `JSONPathMatch` objects for each match of _path_ in _data_. + + If _data_ is a string or a file-like objects, it will be loaded using + `json.loads()` and the default `JSONDecoder`. + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + An iterator yielding `JSONPathMatch` objects for each match. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + STRICT_ENV.finditer(path, data, filter_context=filter_context) + if strict + else DEFAULT_ENV.finditer(path, data, filter_context=filter_context) + ) + + +async def finditer_async( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> AsyncIterable[JSONPathMatch]: + """Find all objects in _data_ matching the JSONPath _path_. + + If _data_ is a string or a file-like objects, it will be loaded + using `json.loads()` and the default `JSONDecoder`. + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + A list of matched objects. If there are no matches, the list will + be empty. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + await STRICT_ENV.finditer_async(path, data, filter_context=filter_context) + if strict + else await DEFAULT_ENV.finditer_async(path, data, filter_context=filter_context) + ) + + +def match( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> Union[JSONPathMatch, None]: + """Return a `JSONPathMatch` instance for the first object found in _data_. + + `None` is returned if there are no matches. + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + A `JSONPathMatch` object for the first match, or `None` if there were + no matches. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + STRICT_ENV.match(path, data, filter_context=filter_context) + if strict + else DEFAULT_ENV.match(path, data, filter_context=filter_context) + ) + + +def query( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> Query: + """Return a `Query` iterator over matches found by applying _path_ to _data_. + + `Query` objects are iterable. + + ``` + for match in jsonpath.query("$.foo..bar", data): + ... + ``` + + You can skip and limit results with `Query.skip()` and `Query.limit()`. + + ``` + matches = ( + jsonpath.query("$.foo..bar", data) + .skip(5) + .limit(10) + ) + + for match in matches + ... + ``` + + `Query.tail()` will get the last _n_ results. + + ``` + for match in jsonpath.query("$.foo..bar", data).tail(5): + ... + ``` + + Get values for each match using `Query.values()`. + + ``` + for obj in jsonpath.query("$.foo..bar", data).limit(5).values(): + ... + ``` + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + A query iterator. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + STRICT_ENV.query(path, data, filter_context=filter_context) + if strict + else DEFAULT_ENV.query(path, data, filter_context=filter_context) + ) diff --git a/jsonpath/_types.py b/jsonpath/_types.py new file mode 100644 index 0000000..3f413b2 --- /dev/null +++ b/jsonpath/_types.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from io import IOBase +from typing import Any +from typing import Mapping +from typing import Sequence +from typing import Union + +JSONScalar = Union[str, int, float, bool, None] +"""A scalar JSON-like value. + +This includes primitive types that can appear in JSON: +string, number, boolean, or null. +""" + +JSON = Union[JSONScalar, Sequence[Any], Mapping[str, Any]] +"""A JSON-like data structure. + +This covers scalars, sequences (e.g. lists, tuples), and mappings (e.g. +dictionaries with string keys). Values inside may be untyped (`Any`) rather +than recursively constrained to `JSON` for flexibility. +""" + +JSONData = Union[str, IOBase, JSON] +"""Input representing JSON content. + +Accepts: +- a JSON-like object (`JSON`), +- a raw JSON string, +- or a file-like object containing JSON data. +""" diff --git a/jsonpath/cli.py b/jsonpath/cli.py index e840b2c..3c0bbfb 100644 --- a/jsonpath/cli.py +++ b/jsonpath/cli.py @@ -19,6 +19,8 @@ def path_sub_command(parser: argparse.ArgumentParser) -> None: # noqa: D103 parser.set_defaults(func=handle_path_command) group = parser.add_mutually_exclusive_group(required=True) + # TODO: add "strict" argument + group.add_argument( "-q", "--query", diff --git a/jsonpath/env.py b/jsonpath/env.py index 021d5f1..1d0fa49 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -59,8 +59,7 @@ from .token import Token if TYPE_CHECKING: - from io import IOBase - + from ._types import JSONData from .match import FilterContextVars @@ -275,7 +274,7 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 def findall( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, *, filter_context: Optional[FilterContextVars] = None, ) -> List[object]: @@ -305,7 +304,7 @@ def findall( def finditer( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, *, filter_context: Optional[FilterContextVars] = None, ) -> Iterable[JSONPathMatch]: @@ -334,7 +333,7 @@ def finditer( def match( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, *, filter_context: Optional[FilterContextVars] = None, ) -> Union[JSONPathMatch, None]: @@ -363,7 +362,8 @@ def match( def query( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, + *, filter_context: Optional[FilterContextVars] = None, ) -> Query: """Return a `Query` iterator over matches found by applying _path_ to _data_. @@ -422,7 +422,7 @@ def query( async def findall_async( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, *, filter_context: Optional[FilterContextVars] = None, ) -> List[object]: @@ -434,7 +434,7 @@ async def findall_async( async def finditer_async( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, *, filter_context: Optional[FilterContextVars] = None, ) -> AsyncIterable[JSONPathMatch]: diff --git a/jsonpath/path.py b/jsonpath/path.py index eeca6f7..7e70021 100644 --- a/jsonpath/path.py +++ b/jsonpath/path.py @@ -1,15 +1,13 @@ -# noqa: D100 +"""A compiled JSONPath ready to be applied to a JSON string or Python object.""" + from __future__ import annotations import itertools from typing import TYPE_CHECKING -from typing import Any from typing import AsyncIterable from typing import Iterable from typing import List -from typing import Mapping from typing import Optional -from typing import Sequence from typing import Tuple from typing import TypeVar from typing import Union @@ -23,7 +21,7 @@ from jsonpath.selectors import NameSelector if TYPE_CHECKING: - from io import IOBase + from jsonpath._types import JSONData from .env import JSONPathEnvironment from .segments import JSONPathSegment @@ -68,10 +66,7 @@ def __hash__(self) -> int: return hash(self.segments) def findall( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> List[object]: """Find all objects in `data` matching the given JSONPath `path`. @@ -98,10 +93,7 @@ def findall( ] def finditer( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Iterable[JSONPathMatch]: """Generate `JSONPathMatch` objects for each match. @@ -142,10 +134,7 @@ def finditer( return matches async def findall_async( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> List[object]: """An async version of `findall()`.""" return [ @@ -156,10 +145,7 @@ async def findall_async( ] async def finditer_async( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> AsyncIterable[JSONPathMatch]: """An async version of `finditer()`.""" _data = load_data(data) @@ -183,10 +169,7 @@ async def root_iter() -> AsyncIterable[JSONPathMatch]: return matches def match( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Union[JSONPathMatch, None]: """Return a `JSONPathMatch` instance for the first object found in _data_. @@ -213,10 +196,7 @@ def match( return None def query( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Query: """Return a `Query` iterator over matches found by applying this path to _data_. @@ -290,10 +270,7 @@ def __hash__(self) -> int: return hash((self.path, self.paths)) def findall( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> List[object]: """Find all objects in `data` matching the given JSONPath `path`. @@ -328,10 +305,7 @@ def findall( return objs def finditer( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Iterable[JSONPathMatch]: """Generate `JSONPathMatch` objects for each match. @@ -366,10 +340,7 @@ def finditer( return matches def match( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Union[JSONPathMatch, None]: """Return a `JSONPathMatch` instance for the first object found in _data_. @@ -396,10 +367,7 @@ def match( return None async def findall_async( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> List[object]: """An async version of `findall()`.""" objs = await self.path.findall_async(data, filter_context=filter_context) @@ -415,10 +383,7 @@ async def findall_async( return objs async def finditer_async( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> AsyncIterable[JSONPathMatch]: """An async version of `finditer()`.""" matches = await self.path.finditer_async(data, filter_context=filter_context) @@ -435,10 +400,7 @@ async def finditer_async( return matches def query( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Query: """Return a `Query` iterator over matches found by applying this path to _data_. diff --git a/tests/test_convenience_api.py b/tests/test_convenience_api.py new file mode 100644 index 0000000..e151d4a --- /dev/null +++ b/tests/test_convenience_api.py @@ -0,0 +1 @@ +# TODO: From bdbc677f8f1cfbb82b50772b64510edbbf6f9616 Mon Sep 17 00:00:00 2001 From: James Prior Date: Fri, 22 Aug 2025 19:02:02 +0100 Subject: [PATCH 24/29] Add a `strict` argument to the JSONPath CLI --- jsonpath/cli.py | 12 +++- tests/test_cli.py | 45 +++++++++++++++ tests/test_convenience_api.py | 105 +++++++++++++++++++++++++++++++++- 3 files changed, 159 insertions(+), 3 deletions(-) diff --git a/jsonpath/cli.py b/jsonpath/cli.py index 3c0bbfb..fd52358 100644 --- a/jsonpath/cli.py +++ b/jsonpath/cli.py @@ -19,8 +19,6 @@ def path_sub_command(parser: argparse.ArgumentParser) -> None: # noqa: D103 parser.set_defaults(func=handle_path_command) group = parser.add_mutually_exclusive_group(required=True) - # TODO: add "strict" argument - group.add_argument( "-q", "--query", @@ -62,6 +60,15 @@ def path_sub_command(parser: argparse.ArgumentParser) -> None: # noqa: D103 help="Disables filter expression well-typedness checks.", ) + parser.add_argument( + "--strict", + action="store_true", + help=( + "Compile and evaluate JSONPath expressions with strict " + "compliance with RFC 9535." + ), + ) + def pointer_sub_command(parser: argparse.ArgumentParser) -> None: # noqa: D103 parser.set_defaults(func=handle_pointer_command) @@ -251,6 +258,7 @@ def handle_path_command(args: argparse.Namespace) -> None: # noqa: PLR0912 path = jsonpath.JSONPathEnvironment( unicode_escape=not args.no_unicode_escape, well_typed=not args.no_type_checks, + strict=args.strict, ).compile(query) except JSONPathSyntaxError as err: if args.debug: diff --git a/tests/test_cli.py b/tests/test_cli.py index 16d7918..1b1f489 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,5 @@ """Test cases for the command line interface.""" + import argparse import json import pathlib @@ -291,6 +292,50 @@ def test_json_path( assert len(json.load(fd)) == 4 # noqa: PLR2004 +def test_json_path_strict( + parser: argparse.ArgumentParser, + sample_target: str, + outfile: str, +) -> None: + """Test a valid JSONPath.""" + args = parser.parse_args( + [ + "--debug", + "path", + "-q", + "price_cap", # No root identifier is an error in strict mode. + "-f", + sample_target, + "-o", + outfile, + "--strict", + ] + ) + + with pytest.raises(JSONPathSyntaxError): + handle_path_command(args) + + args = parser.parse_args( + [ + "path", + "-q", + "$.price_cap", # With a root identifier is OK. + "-f", + sample_target, + "-o", + outfile, + "--strict", + ] + ) + + handle_path_command(args) + args.output.flush() + + with open(outfile, "r") as fd: + rv = json.load(fd) + assert rv == [10] + + def test_pointer_command_invalid_target( parser: argparse.ArgumentParser, invalid_target: str, diff --git a/tests/test_convenience_api.py b/tests/test_convenience_api.py index e151d4a..06a4b69 100644 --- a/tests/test_convenience_api.py +++ b/tests/test_convenience_api.py @@ -1 +1,104 @@ -# TODO: +import asyncio +from typing import List + +import pytest + +import jsonpath + + +def test_convenience_compile() -> None: + # Implicit root identifier works by default, but not when strict=True. + path = jsonpath.compile("a.*") + assert isinstance(path, jsonpath.JSONPath) + assert path.findall({"a": [1, 2, 3]}) == [1, 2, 3] + + +def test_convenience_compile_strict() -> None: + with pytest.raises(jsonpath.JSONPathSyntaxError): + jsonpath.compile("a.*", strict=True) + + path = jsonpath.compile("$.a.*", strict=True) + assert isinstance(path, jsonpath.JSONPath) + assert path.findall({"a": [1, 2, 3]}) == [1, 2, 3] + + +def test_convenience_findall() -> None: + assert jsonpath.findall("a.*", {"a": [1, 2, 3]}) == [1, 2, 3] + + +def test_convenience_findall_strict() -> None: + with pytest.raises(jsonpath.JSONPathSyntaxError): + jsonpath.findall("a.*", {"a": [1, 2, 3]}, strict=True) + + assert jsonpath.findall("$.a.*", {"a": [1, 2, 3]}, strict=True) == [1, 2, 3] + + +def test_convenience_findall_async() -> None: + async def coro() -> List[object]: + return await jsonpath.findall_async("a.*", {"a": [1, 2, 3]}) + + assert asyncio.run(coro()) == [1, 2, 3] + + +def test_convenience_findall_async_strict() -> None: + async def coro() -> List[object]: + with pytest.raises(jsonpath.JSONPathSyntaxError): + await jsonpath.findall_async("a.*", {"a": [1, 2, 3]}, strict=True) + + return await jsonpath.findall_async("$.a.*", {"a": [1, 2, 3]}, strict=True) + + assert asyncio.run(coro()) == [1, 2, 3] + + +def test_convenience_finditer() -> None: + matches = list(jsonpath.finditer("a.*", {"a": [1, 2, 3]})) + assert [m.obj for m in matches] == [1, 2, 3] + + +def test_convenience_finditer_strict() -> None: + with pytest.raises(jsonpath.JSONPathSyntaxError): + list(jsonpath.finditer("a.*", {"a": [1, 2, 3]}, strict=True)) + + matches = list(jsonpath.finditer("$.a.*", {"a": [1, 2, 3]}, strict=True)) + assert [m.obj for m in matches] == [1, 2, 3] + + +def test_convenience_finditer_async_strict() -> None: + async def coro() -> List[object]: + with pytest.raises(jsonpath.JSONPathSyntaxError): + await jsonpath.finditer_async("a.*", {"a": [1, 2, 3]}, strict=True) + + it = await jsonpath.finditer_async("$.a.*", {"a": [1, 2, 3]}, strict=True) + return [m.obj async for m in it] + + assert asyncio.run(coro()) == [1, 2, 3] + + +def test_convenience_match() -> None: + match = jsonpath.match("a.*", {"a": [1, 2, 3]}) + assert isinstance(match, jsonpath.JSONPathMatch) + assert match.obj == 1 + + +def test_convenience_match_strict() -> None: + with pytest.raises(jsonpath.JSONPathSyntaxError): + jsonpath.match("a.*", {"a": [1, 2, 3]}, strict=True) + + match = jsonpath.match("$.a.*", {"a": [1, 2, 3]}) + assert isinstance(match, jsonpath.JSONPathMatch) + assert match.obj == 1 + + +def test_convenience_query() -> None: + query = jsonpath.query("a.*", {"a": [1, 2, 3]}) + assert isinstance(query, jsonpath.Query) + assert list(query.values()) == [1, 2, 3] + + +def test_convenience_query_strict() -> None: + with pytest.raises(jsonpath.JSONPathSyntaxError): + jsonpath.query("a.*", {"a": [1, 2, 3]}, strict=True) + + query = jsonpath.query("$.a.*", {"a": [1, 2, 3]}) + assert isinstance(query, jsonpath.Query) + assert list(query.values()) == [1, 2, 3] From a8bace43ea9d3ce9b4e594d7cf590dcf4e4231d9 Mon Sep 17 00:00:00 2001 From: James Prior Date: Fri, 22 Aug 2025 19:22:40 +0100 Subject: [PATCH 25/29] Add package level functions to docs and update CLI docs [skip ci] --- docs/cli.md | 7 +++++++ docs/convenience.md | 31 +++++++++++++++++++++++++++++++ docs/quickstart.md | 10 +++++----- mkdocs.yml | 1 + 4 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 docs/convenience.md diff --git a/docs/cli.md b/docs/cli.md index 04aa83c..70b641c 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -62,6 +62,7 @@ optional arguments: File to write resulting objects to, as a JSON array. Defaults to the standard output stream. --no-type-checks Disables filter expression well-typedness checks. + --strict Compile and evaluate JSONPath expressions with strict compliance with RFC 9535. ``` ## Global Options @@ -191,6 +192,12 @@ _New in version 0.10.0_ Disables JSONPath filter expression well-typedness checks. The well-typedness of a filter expression is defined by RFC 9535. +#### `--strict` + +_New in version 2.0.0_ + +Compile and evaluate JSONPath expressions with strict compliance with RFC 9535. + ### `pointer` Resolve a JSON Pointer against a JSON document. One of `-p`/`--pointer` or `-r`/`--pointer-file` must be given. `-p` being a JSON Pointer given on the command line as a string, `-r` being the path to a file containing a JSON Pointer. diff --git a/docs/convenience.md b/docs/convenience.md new file mode 100644 index 0000000..4437f9e --- /dev/null +++ b/docs/convenience.md @@ -0,0 +1,31 @@ +# Convenience Functions + +These package-level functions use the default [JSONPathEnvironment](api.md#jsonpath.JSONPathEnvironment), `jsonpath.DEFAULT_ENV` when `strict=False`, or the preconfigured strict environment, `jsonpath.STRICT_ENV` when `strict=True`. + +::: jsonpath.compile + + handler: python + +::: jsonpath.findall + + handler: python + +::: jsonpath.finditer + + handler: python + +::: jsonpath.findall_async + + handler: python + +::: jsonpath.finditer_async + + handler: python + +::: jsonpath.match + + handler: python + +::: jsonpath.query + + handler: python diff --git a/docs/quickstart.md b/docs/quickstart.md index 26aa486..449ee35 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -4,18 +4,18 @@ This page gets you started using JSONPath, JSON Pointer and JSON Patch wih Pytho ## `findall(path, data)` -Find all values matching a JSONPath query using [`jsonpath.findall()`](api.md#jsonpath.JSONPathEnvironment.findall). +Find all values matching a JSONPath query using [`jsonpath.findall()`](convenience.md#jsonpath.findall). This function takes two arguments: -- `path`: a JSONPath query as a string (e.g., `"$.users[*].name"`) +- `path`: a JSONPath query as a string (e.g. `"$.users[*].name"`) - `data`: the JSON document to query It **always** returns a list of matched values, even if the path resolves to a single result or nothing at all. The `data` argument can be: -- A Python [`Mapping`](https://docs.python.org/3/library/collections.abc.html#collections.abc.Mapping) (e.g., `dict`) or [`Sequence`](https://docs.python.org/3/library/collections.abc.html#collections.abc.Sequence) (e.g., `list`) +- A Python [`Mapping`](https://docs.python.org/3/library/collections.abc.html#collections.abc.Mapping) (e.g. `dict`) or [`Sequence`](https://docs.python.org/3/library/collections.abc.html#collections.abc.Sequence) (e.g. `list`) - A JSON-formatted string - A file-like object containing JSON @@ -65,7 +65,7 @@ with open("users.json") as fd: ## `finditer(path, data)` -Use [`jsonpath.finditer()`](api.md#jsonpath.JSONPathEnvironment.finditer) to iterate over instances of [`jsonpath.JSONPathMatch`](api.md#jsonpath.JSONPathMatch) for every object in _data_ that matches _path_. It accepts the same arguments as [`findall()`](#findallpath-data), a query string and data from which to select matches. +Use [`jsonpath.finditer()`](convenience.md#jsonpath.finditer) to iterate over instances of [`jsonpath.JSONPathMatch`](api.md#jsonpath.JSONPathMatch) for every object in _data_ that matches _path_. It accepts the same arguments as [`findall()`](#findallpath-data), a query string and data from which to select matches. ```python import jsonpath @@ -109,7 +109,7 @@ The selected object is available from a [`JSONPathMatch`](api.md#jsonpath.JSONPa ## `compile(path)` -When you have a JSONPath query that needs to be matched against different data repeatedly, you can compile the path ahead of time using [`jsonpath.compile()`](api.md#jsonpath.JSONPathEnvironment.compile). It takes a query as a string and returns an instance of [`JSONPath`](api.md#jsonpath.JSONPath). `JSONPath` has `findall()` and `finditer()` methods that behave similarly to package-level `findall()` and `finditer()`, just without the `path` argument. +When you have a JSONPath query that needs to be matched against different data repeatedly, you can compile the path ahead of time using [`jsonpath.compile()`](convenience.md#jsonpath.compile). It takes a query as a string and returns an instance of [`JSONPath`](api.md#jsonpath.JSONPath). `JSONPath` has `findall()` and `finditer()` methods that behave similarly to package-level `findall()` and `finditer()`, just without the `path` argument. ```python import jsonpath diff --git a/mkdocs.yml b/mkdocs.yml index 6166f4e..6489f74 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -53,6 +53,7 @@ nav: - JSON Pointers: "pointers.md" - Async Support: "async.md" - API Reference: + - Package Level Functions: "convenience.md" - High Level API: "api.md" - Low Level API: "custom_api.md" - Exceptions: "exceptions.md" From 833a6d6b10cb8fe8bde113aa774786ec01407f7e Mon Sep 17 00:00:00 2001 From: James Prior Date: Sat, 23 Aug 2025 08:44:30 +0100 Subject: [PATCH 26/29] More docs updates [skip ci] --- docs/advanced.md | 2 +- docs/async.md | 4 -- docs/functions.md | 2 +- docs/index.md | 4 +- docs/pointers.md | 2 +- docs/syntax.md | 95 +++++++++++++++++++++++++++-------------------- 6 files changed, 60 insertions(+), 49 deletions(-) diff --git a/docs/advanced.md b/docs/advanced.md index 966dfc3..4259706 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -2,7 +2,7 @@ ## Filter Variables -Arbitrary variables can be made available to [filter expressions](syntax.md#filters-expression) using the _filter_context_ argument to [`findall()`](quickstart.md#findallpath-data) and [`finditer()`](quickstart.md#finditerpath-data). _filter_context_ should be a [mapping](https://docs.python.org/3/library/typing.html#typing.Mapping) of strings to JSON-like objects, like lists, dictionaries, strings and integers. +Arbitrary variables can be made available to [filter selectors](syntax.md#filter-selector) using the `filter_context` argument to [`findall()`](quickstart.md#findallpath-data) and [`finditer()`](quickstart.md#finditerpath-data). `filter_context` should be a [mapping](https://docs.python.org/3/library/typing.html#typing.Mapping) of strings to JSON-like objects, like lists, dictionaries, strings and integers. Filter context variables are selected using a filter query starting with the _filter context identifier_, which defaults to `_` and has usage similar to `$` and `@`. diff --git a/docs/async.md b/docs/async.md index 00b26ab..f00f24e 100644 --- a/docs/async.md +++ b/docs/async.md @@ -59,7 +59,3 @@ data = { best_a_team_players = jsonpath.findall_async("$.teams['A Team'][?rank >= 8]", data) ``` - -## Custom Async Item Getting - -TODO: diff --git a/docs/functions.md b/docs/functions.md index e8f8a9f..10504cd 100644 --- a/docs/functions.md +++ b/docs/functions.md @@ -1,6 +1,6 @@ # Filter Functions -A filter function is a named function that can be called as part of a [filter selector](syntax.md#filters-expression) expression. Here we describe built-in filters. You can [define your own function extensions](advanced.md#function-extensions) too. +A filter function is a named function that can be called as part of a [filter selector](syntax.md#filter-selector). Here we describe built in filters. You can [define your own function extensions](advanced.md#function-extensions) too. ## `count()` diff --git a/docs/index.md b/docs/index.md index c62b891..60c5421 100644 --- a/docs/index.md +++ b/docs/index.md @@ -34,11 +34,11 @@ conda install -c conda-forge python-jsonpath ### Optional dependencies -By default, and without any additional dependencies, the JSONPath syntax supported by Python JSONPath is **very close** to RFC 9535. For strict compatibility with RFC 9535, install [regex](https://pypi.org/project/regex/) and [iregexp-check](https://pypi.org/project/iregexp-check/) packages too. +By default, and without any additional dependencies, the syntax supported by Python JSONPath is **very close** to RFC 9535. For strict compatibility with the specification, install [regex](https://pypi.org/project/regex/) and [iregexp-check](https://pypi.org/project/iregexp-check/) packages too. With these two packages installed, the [`match()`](functions.md#match) and [`search()`](functions.md#search) filter functions will use [regex](https://pypi.org/project/regex/) instead of `re` from the standard library, and will validate regular expression patterns against [RFC 9485](https://datatracker.ietf.org/doc/html/rfc9485). -Aso see [strict mode](syntax.md#strict-mode) for more information about strict compatibility with RFC 9535. +See the [syntax guide](syntax.md) for more information about strict compatibility with RFC 9535 and extensions to the specification. ## Example diff --git a/docs/pointers.md b/docs/pointers.md index aab6934..e29f39f 100644 --- a/docs/pointers.md +++ b/docs/pointers.md @@ -10,7 +10,7 @@ JSON Pointers are a fundamental component of JSON Patch ([RFC 6902](https://data We have extended RFC 6901 to support: - - Interoperability with the JSONPath [keys selector](syntax.md#keys-or) (`~`) + - Interoperability with the JSONPath [keys selector](syntax.md#keys-selector) (`~`) - A special non-standard syntax for targeting **keys or indices themselves**, used in conjunction with [Relative JSON Pointer](#torel) **Keys Selector Compatibility** diff --git a/docs/syntax.md b/docs/syntax.md index d350c69..f8381a3 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -1,6 +1,8 @@ # JSONPath Syntax -By default, Python JSONPath extends the RFC 9535 specification with a few additional features and relaxed rules. If you need strict compliance with RFC 9535, you can enable strict mode, which enforces the standard without these extensions. In this guide, we first outline the standard syntax (see the specification for the formal definition), and then describe the non-standard extensions and their semantics in detail. +Python JSONPath extends the [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535) specification with additional features and relaxed rules. If you need strict compliance with RFC 9535, set `strict=True` when calling [`findall()`](convenience.md#jsonpath.findall), [`finditer()`](convenience.md#jsonpath.finditer), etc., which enforces the standard without these extensions. + +In this guide, we first outline the standard syntax (see the specification for the formal definition), and then describe the non-standard extensions and their semantics in detail. ## JSONPath Terminology @@ -21,13 +23,13 @@ What follows is a description of these selectors, starting with the standard one ## Standard selectors and identifiers -### Root identifier (`$`) +### Root identifier The root identifier, `$`, refers to the outermost node in the target document. This can be an object, an array, or a scalar value. A query containing only the root identifier simply returns the entire input document. -#### Example query +**Example query** ``` $ @@ -53,13 +55,13 @@ $ ] ``` -### Name selector (`.thing` or `['thing']`) +### Name selector -A _name selector_ matches the value of an object member by its key. You can write it in either **shorthand notation** (`.thing`) or **bracket notation** (`['thing']`). +A _name selector_ matches the value of an object member by its key. You can write it in either **shorthand notation** (`.thing`) or **bracket notation** (`['thing']` or `["thing"]`). Dot notation can be used when the property name is a valid identifier. Bracket notation is required when the property name contains spaces, special characters, or starts with a number. -#### Example query +**Example query** ```text $.book.title @@ -78,11 +80,11 @@ $.book.title ["Moby Dick"] ``` -### Index selector (`[0]` or `[-1]`) +### Index selector -Select an element from an array by its index. Indices are zero-based and enclosed in brackets. If the index is negative, items are selected from the end of the array. +The index selector selects an element from an array by its index. Indices are zero-based and enclosed in brackets, `[0]`. If the index is negative, items are selected from the end of the array. -#### Example query +**Example query** ```text $.categories[0].name @@ -101,11 +103,11 @@ $.categories[0].name ["fiction"] ``` -### Wildcard selector (`.*` or `[*]`) +### Wildcard selector The _wildcard selector_ matches all member values of an object or all elements in an array. It can be written as `.*` (shorthand notation) or `[*]` (bracket notation). -#### Example query +**Example query** ```text $.categories[*].name @@ -124,11 +126,11 @@ $.categories[*].name ["fiction", "non-fiction"] ``` -### Slice selector (`[start:end:step]`) +### Slice selector -The slice selector allows you to select a range of elements from an array. You can specify a starting index, an ending index (exclusive), and an optional step to skip elements. Negative indices count from the end of the array, just like standard Python slicing. +The slice selector allows you to select a range of elements from an array. A start index, ending index and step size are all optional and separated by colons, `[start:end:step]`. Negative indices count from the end of the array, just like standard Python slicing. -#### Example query +**Example query** ```text $.items[1:4:2] @@ -144,9 +146,9 @@ $.items[1:4:2] ["b", "d"] ``` -### Filter selector (`[?expression]`) +### Filter selector -Filters allow you to remove nodes from a selection based on a Boolean expression. A filter expression evaluates each node in the context of either the root (`$`) or the current node (`@`). +Filters allow you to remove nodes from a selection based on a Boolean expression, `[?expression]`. A filter expression evaluates each node in the context of either the root (`$`) or current (`@`) node. When filtering a mapping-like object, `@` identifies the current member value. When filtering a sequence-like object, `@` identifies the current element. @@ -154,7 +156,7 @@ Comparison operators include `==`, `!=`, `<`, `>`, `<=`, and `>=`. Logical opera A filter expression on its own - without a comparison - is treated as an existence test. -#### Example query +**Example query** ```text $..products[?(@.price < $.price_cap)] @@ -182,13 +184,13 @@ Filter expressions can also call predefined [function extensions](functions.md). ## More on segments -So far we've seen shorthand notation and segments with just one selector. Here we cover the descendant segment and segments with multiple selectors. +So far we've seen shorthand notation (`.selector`) and segments with just one selector (`[selector]`). Here we cover the descendant segment and segments with multiple selectors. ### Segments with multiple selectors -A segment can include multiple selectors separated by commas and enclosed in square brackets (`[...]`). Any valid selector (names, indices, slices, filters, or wildcards) can appear in the list. +A segment can include multiple selectors separated by commas and enclosed in square brackets (`[selector, selector, ...]`). Any valid selector (names, indices, slices, filters, or wildcards) can appear in the list. -#### Example query +**Example query** ```text $.store.book[0,2] @@ -213,11 +215,11 @@ $.store.book[0,2] ] ``` -### Descendant segment (`..`) +### Descendant segment The descendant segment (`..`) visits all object member values and array elements under the current object or array, applying the selector or selectors that follow to each visited node. It must be followed by a shorthand selector (names, wildcards, etc.) or a bracketed list of one or more selectors. -#### Example query +**Example query** ```text $..price @@ -241,7 +243,13 @@ $..price ## Non-standard selectors and identifiers -The selectors and identifiers described in this section are an extension to RFC 9535. They are enabled by default. See [#strict-mode] for details on how to use JSONPath without these extensions. +The selectors and identifiers described in this section are an extension to the RFC 9535 specification. They are enabled by default. Set `strict=True` when constructing a [`JSONPathEnvironment`](api.md#jsonpath.JSONPathEnvironment), calling [`findall()`](convenience.md#jsonpath.findall), [`finditer()`](convenience.md#jsonpath.finditer), etc. to disable all non-standard features. + +Also note that when `strict=False`: + +- The root identifier (`$`) is optional and paths starting with a dot (`.`) are OK. `.thing` is the same as `$.thing`, as is `thing` and `$["thing"]`. +- Leading and trailing whitespace is OK. +- Explicit comparisons to `undefined` (aka `missing`) are supported as well as implicit existence tests. ### Key selector @@ -573,32 +581,39 @@ list-literal = "[" S literal *(S "," S literal) S "]" | `$.y[?'foo' in @.a]` | `{"a": ["foo", "bar"]}` | `$['y'][0]` | String literal in object | | `$.z[?(['bar', 'baz'] contains @.a)]` | `{"a": "bar"}` | `$['z'][1]` | List literal contains embedded query | -### The regex operator +### Regex operator -TODO +`=~` is an infix operator that matches the left-hand side with a regular expression literal on the right-hand side. Regular expression literals use a syntax similar to that found in JavaScript, where the pattern to match is surrounded by slashes, `/pattern/`, optionally followed by flags, `/pattern/flags`. -### Union (`|`) and intersection (`&`) +``` +$..products[?(@.description =~ /.*trainers/i)] +``` -Union (`|`) and intersection (`&`) are similar to Python's set operations, but we don't dedupe the matches (matches will often contain unhashable objects). +### Union and intersection operators -The `|` operator combines matches from two or more paths. This example selects a single list of all prices, plus the price cap as the last element. +The union or concatenation operator, `|`, combines matches from two or more paths. -```text -$..products.*.price | $.price_cap -``` +The intersection operator, `&`, produces matches that are common to both left and right paths. -The `&` operator produces matches that are common to both left and right paths. This example would select the list of products that are common to both the "footwear" and "headwear" categories. +Note that compound queries are not allowed inside filter expressions. + +#### Syntax -```text -$.categories[?(@.name == 'footwear')].products.* & $.categories[?(@.name == 'headwear')].products.* ``` +jsonpath-query = root-identifier segments -Note that `|` and `&` are not allowed inside filter expressions. +compound-jsonpath-query = jsonpath-query compound-op jsonpath-query -## Other differences +compound-op = "|" / + "&" +``` -This is a list of areas where Python JSONPath is more relaxed than [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535). +#### Examples -- The root token (`$`) is optional and paths starting with a dot (`.`) are OK. `.thing` is the same as `$.thing`, as is `thing` and `$["thing"]`. -- Leading and trailing whitespace is OK. -- We support explicit comparisons to `undefined` (aka `missing`) as well as implicit existence tests. +```text +$..products.*.price | $.price_cap +``` + +```text +$.categories[?(@.name == 'footwear')].products.* & $.categories[?(@.name == 'headwear')].products.* +``` From f7bdc184aa5709ffcd1aff7e1e836bd9041a8aa2 Mon Sep 17 00:00:00 2001 From: James Prior Date: Sun, 24 Aug 2025 08:49:13 +0100 Subject: [PATCH 27/29] Check regex patterns against I-Regexp if available --- jsonpath/function_extensions/match.py | 17 +++++++++++-- jsonpath/function_extensions/search.py | 17 +++++++++++-- jsonpath/parse.py | 3 ++- tests/test_iregexp.py | 34 ++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 tests/test_iregexp.py diff --git a/jsonpath/function_extensions/match.py b/jsonpath/function_extensions/match.py index 696c5dd..68494b8 100644 --- a/jsonpath/function_extensions/match.py +++ b/jsonpath/function_extensions/match.py @@ -9,6 +9,13 @@ REGEX_AVAILABLE = False +try: + from iregexp_check import check + + IREGEXP_AVAILABLE = True +except ImportError: + IREGEXP_AVAILABLE = False + from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction @@ -23,8 +30,14 @@ class Match(FilterFunction): def __call__(self, string: str, pattern: str) -> bool: """Return `True` if _string_ matches _pattern_, or `False` otherwise.""" - # XXX: re.fullmatch caches compiled patterns internally, but `map_re` is not - # cached. + # TODO: re.match caches compiled patterns internally, but `map_re` and `check` + # are not cached. + + # TODO: validate literal patterns ar compile time? + + if IREGEXP_AVAILABLE and (not isinstance(pattern, str) or not check(pattern)): + return False + if REGEX_AVAILABLE: try: pattern = map_re(pattern) diff --git a/jsonpath/function_extensions/search.py b/jsonpath/function_extensions/search.py index 8efb8bf..e6d9086 100644 --- a/jsonpath/function_extensions/search.py +++ b/jsonpath/function_extensions/search.py @@ -9,6 +9,13 @@ REGEX_AVAILABLE = False +try: + from iregexp_check import check + + IREGEXP_AVAILABLE = True +except ImportError: + IREGEXP_AVAILABLE = False + from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction @@ -23,8 +30,14 @@ class Search(FilterFunction): def __call__(self, string: str, pattern: str) -> bool: """Return `True` if _string_ contains _pattern_, or `False` otherwise.""" - # XXX: re.search caches compiled patterns internally, but `map_re` is not - # cached. + # TODO: re.search caches compiled patterns internally, but `map_re` and `check` + # are not cached. + + # TODO: validate literal patterns ar compile time? + + if IREGEXP_AVAILABLE and (not isinstance(pattern, str) or not check(pattern)): + return False + if REGEX_AVAILABLE: try: pattern = map_re(pattern) diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 6b6c684..107d9a0 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -860,7 +860,8 @@ def _decode_string_literal(self, token: Token) -> str: assert isinstance(rv, str) return rv except json.JSONDecodeError as err: - raise JSONPathSyntaxError(str(err).split(":")[1], token=token) from None + message = f"decode error: {str(err).split(':')[1]}" + raise JSONPathSyntaxError(message, token=token) from None return token.value diff --git a/tests/test_iregexp.py b/tests/test_iregexp.py new file mode 100644 index 0000000..f1b91e6 --- /dev/null +++ b/tests/test_iregexp.py @@ -0,0 +1,34 @@ +import pytest + +try: + import iregexp_check # noqa: F401 + + IREGEXP_AVAILABLE = True +except ImportError: + IREGEXP_AVAILABLE = False + +import jsonpath + + +@pytest.mark.skipif(IREGEXP_AVAILABLE is False, reason="requires iregexp_check") +def test_iregexp_check() -> None: + # Character classes are OK. + query = "$[?match(@, '[0-9]+')]" + data = ["123", "abc", "abc123"] + assert jsonpath.findall(query, data) == ["123"] + + # Multi character escapes are not. + query = "$[?match(@, '\\\\d+')]" + assert jsonpath.findall(query, data) == [] + + +@pytest.mark.skipif(IREGEXP_AVAILABLE, reason="iregexp_check is available") +def test_no_iregexp_check() -> None: + # Character classes are OK. + query = "$[?match(@, '[0-9]+')]" + data = ["123", "abc", "abc123"] + assert jsonpath.findall(query, data) == ["123"] + + # Multi character escapes are OK when iregexp_check is not installed. + query = "$[?match(@, '\\\\d+')]" + assert jsonpath.findall(query, data) == ["123"] From 0209d576f419d7d460a7271c30daf072d771036b Mon Sep 17 00:00:00 2001 From: James Prior Date: Sun, 24 Aug 2025 19:58:30 +0100 Subject: [PATCH 28/29] Draft change log --- CHANGELOG.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 744c89e..22dc798 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ # Python JSONPath Change Log +## Version 2.0.0 (unreleased) + +**JSONPath syntax changes** + +These breaking changes apply to Python JSONPath in its default configuration. We've also introduced a _strict mode_ where we follow the RFC 9535 specification exactly. See [optional dependencies](https://jg-rp.github.io/python-jsonpath/#optional-dependencies) and the [syntax guide](https://jg-rp.github.io/python-jsonpath/syntax/) for more information. + +- Using bracket notation, unquoted property names are no longer equivalent to quoted property names. These paths used to be equivalent, `$[foo]`, `$['foo']` and `$["foo"]`. Now, names without quotes start a _singular query selector_. With an implicit _root identifier_, `$.a[b]` is equivalent to `$.a[$.b]`. See [Singular query selector](https://jg-rp.github.io/python-jsonpath/syntax/#singular-query-selector) in the syntax guide. +- In filter selector expressions, float literals now follow the specification. Previously `.1` and `1.` where allowed, now it must be `0.1` and `1.0`, with at least one digit either side of the decimal point. +- Slice selector indexes and step now follow the specification. Previously leading zeros and negative zero were allowed, now they raise a `JSONPathSyntaxError`. +- Whitespace is no longer allowed between a dot (`.` or `..`) and a name when using shorthand notation for the name selector. Whitespace before the dot is OK. + +**JSONPath features** + +- Added the [Keys filter selector](https://jg-rp.github.io/python-jsonpath/syntax/#keys-filter-selector). +- Added the [Singular query selector](https://jg-rp.github.io/python-jsonpath/syntax/#singular-query-selector). +- Use the [regex] package, if available, instead of `re` for match and search function extensions. See [optional dependencies](https://jg-rp.github.io/python-jsonpath/#optional-dependencies). +- Added the `strict` argument to all [convenience functions](https://jg-rp.github.io/python-jsonpath/convenience/), the CLI and the `JSONPathEnvironment` constructor. When `strict=True`, all extensions to RFC 9535 and any lax parsing rules will be disabled. +- Added class variable `JSONPathEnvironment.max_recursion_depth` to control the maximum recursion depth of descendant segments. +- Added pretty exception messages. + +**Python API changes** + +- Renamed class variable `JSONPathEnvironment.fake_root_token` to `JSONPathEnvironment.pseudo_root_token`. + +**Low level API changes** + +These breaking changes will only affect you if you're customizing the JSONPath lexer or parser. + +- The tokens produced by the JSONPath lexer have changed. Previously we broadly skipped some punctuation and whitespace. Now the parser can make better choices about when to accept whitespace and do a better job of enforcing dots. +- We've change the internal representation of compiled JSONPath queries. We now model segments and selectors explicitly and use terminology that matches RFC 9535. + ## Version 1.3.2 **Fixes** From 07b1cad20c0373ec9214c6747712ceaa6e0b6afe Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 25 Aug 2025 08:05:33 +0100 Subject: [PATCH 29/29] Update change log --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22dc798..fba0006 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,16 +6,16 @@ These breaking changes apply to Python JSONPath in its default configuration. We've also introduced a _strict mode_ where we follow the RFC 9535 specification exactly. See [optional dependencies](https://jg-rp.github.io/python-jsonpath/#optional-dependencies) and the [syntax guide](https://jg-rp.github.io/python-jsonpath/syntax/) for more information. -- Using bracket notation, unquoted property names are no longer equivalent to quoted property names. These paths used to be equivalent, `$[foo]`, `$['foo']` and `$["foo"]`. Now, names without quotes start a _singular query selector_. With an implicit _root identifier_, `$.a[b]` is equivalent to `$.a[$.b]`. See [Singular query selector](https://jg-rp.github.io/python-jsonpath/syntax/#singular-query-selector) in the syntax guide. +- Using bracket notation, unquoted property names are no longer interpreted as quoted property names. These paths used to be equivalent, `$[foo]`, `$['foo']` and `$["foo"]`. Now, names without quotes start a _singular query selector_. With an implicit _root identifier_, `$.a[b]` is equivalent to `$.a[$.b]`. See [Singular query selector](https://jg-rp.github.io/python-jsonpath/syntax/#singular-query-selector) in the syntax guide. - In filter selector expressions, float literals now follow the specification. Previously `.1` and `1.` where allowed, now it must be `0.1` and `1.0`, with at least one digit either side of the decimal point. - Slice selector indexes and step now follow the specification. Previously leading zeros and negative zero were allowed, now they raise a `JSONPathSyntaxError`. -- Whitespace is no longer allowed between a dot (`.` or `..`) and a name when using shorthand notation for the name selector. Whitespace before the dot is OK. +- Whitespace is no longer allowed between a dot (`.` or `..`) and a name when using shorthand notation for the name selector. Whitespace before the dot oor double dot is OK. **JSONPath features** - Added the [Keys filter selector](https://jg-rp.github.io/python-jsonpath/syntax/#keys-filter-selector). - Added the [Singular query selector](https://jg-rp.github.io/python-jsonpath/syntax/#singular-query-selector). -- Use the [regex] package, if available, instead of `re` for match and search function extensions. See [optional dependencies](https://jg-rp.github.io/python-jsonpath/#optional-dependencies). +- We now use the [regex] package, if available, instead of `re` for match and search function extensions. See [optional dependencies](https://jg-rp.github.io/python-jsonpath/#optional-dependencies). - Added the `strict` argument to all [convenience functions](https://jg-rp.github.io/python-jsonpath/convenience/), the CLI and the `JSONPathEnvironment` constructor. When `strict=True`, all extensions to RFC 9535 and any lax parsing rules will be disabled. - Added class variable `JSONPathEnvironment.max_recursion_depth` to control the maximum recursion depth of descendant segments. - Added pretty exception messages.