diff --git a/.github/workflows/tests-no-regex.yaml b/.github/workflows/tests-no-regex.yaml new file mode 100644 index 0000000..8969b45 --- /dev/null +++ b/.github/workflows/tests-no-regex.yaml @@ -0,0 +1,19 @@ +name: test-no-regex +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade hatch + - run: hatch -e no-regex run test diff --git a/CHANGELOG.md b/CHANGELOG.md index 744c89e..fba0006 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ # Python JSONPath Change Log +## Version 2.0.0 (unreleased) + +**JSONPath syntax changes** + +These breaking changes apply to Python JSONPath in its default configuration. We've also introduced a _strict mode_ where we follow the RFC 9535 specification exactly. See [optional dependencies](https://jg-rp.github.io/python-jsonpath/#optional-dependencies) and the [syntax guide](https://jg-rp.github.io/python-jsonpath/syntax/) for more information. + +- Using bracket notation, unquoted property names are no longer interpreted as quoted property names. These paths used to be equivalent, `$[foo]`, `$['foo']` and `$["foo"]`. Now, names without quotes start a _singular query selector_. With an implicit _root identifier_, `$.a[b]` is equivalent to `$.a[$.b]`. See [Singular query selector](https://jg-rp.github.io/python-jsonpath/syntax/#singular-query-selector) in the syntax guide. +- In filter selector expressions, float literals now follow the specification. Previously `.1` and `1.` where allowed, now it must be `0.1` and `1.0`, with at least one digit either side of the decimal point. +- Slice selector indexes and step now follow the specification. Previously leading zeros and negative zero were allowed, now they raise a `JSONPathSyntaxError`. +- Whitespace is no longer allowed between a dot (`.` or `..`) and a name when using shorthand notation for the name selector. Whitespace before the dot oor double dot is OK. + +**JSONPath features** + +- Added the [Keys filter selector](https://jg-rp.github.io/python-jsonpath/syntax/#keys-filter-selector). +- Added the [Singular query selector](https://jg-rp.github.io/python-jsonpath/syntax/#singular-query-selector). +- We now use the [regex] package, if available, instead of `re` for match and search function extensions. See [optional dependencies](https://jg-rp.github.io/python-jsonpath/#optional-dependencies). +- Added the `strict` argument to all [convenience functions](https://jg-rp.github.io/python-jsonpath/convenience/), the CLI and the `JSONPathEnvironment` constructor. When `strict=True`, all extensions to RFC 9535 and any lax parsing rules will be disabled. +- Added class variable `JSONPathEnvironment.max_recursion_depth` to control the maximum recursion depth of descendant segments. +- Added pretty exception messages. + +**Python API changes** + +- Renamed class variable `JSONPathEnvironment.fake_root_token` to `JSONPathEnvironment.pseudo_root_token`. + +**Low level API changes** + +These breaking changes will only affect you if you're customizing the JSONPath lexer or parser. + +- The tokens produced by the JSONPath lexer have changed. Previously we broadly skipped some punctuation and whitespace. Now the parser can make better choices about when to accept whitespace and do a better job of enforcing dots. +- We've change the internal representation of compiled JSONPath queries. We now model segments and selectors explicitly and use terminology that matches RFC 9535. + ## Version 1.3.2 **Fixes** diff --git a/docs/advanced.md b/docs/advanced.md index 0dbd761..4259706 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -2,7 +2,7 @@ ## Filter Variables -Arbitrary variables can be made available to [filter expressions](syntax.md#filters-expression) using the _filter_context_ argument to [`findall()`](quickstart.md#findallpath-data) and [`finditer()`](quickstart.md#finditerpath-data). _filter_context_ should be a [mapping](https://docs.python.org/3/library/typing.html#typing.Mapping) of strings to JSON-like objects, like lists, dictionaries, strings and integers. +Arbitrary variables can be made available to [filter selectors](syntax.md#filter-selector) using the `filter_context` argument to [`findall()`](quickstart.md#findallpath-data) and [`finditer()`](quickstart.md#finditerpath-data). `filter_context` should be a [mapping](https://docs.python.org/3/library/typing.html#typing.Mapping) of strings to JSON-like objects, like lists, dictionaries, strings and integers. Filter context variables are selected using a filter query starting with the _filter context identifier_, which defaults to `_` and has usage similar to `$` and `@`. @@ -257,23 +257,3 @@ env = MyJSONPathEnvironment() query = env.compile("$.users[999]") # jsonpath.exceptions.JSONPathIndexError: index out of range, line 1, column 8 ``` - -### Subclassing Lexer - -TODO: - -### Subclassing Parser - -TODO: - -### Get Item - -TODO: - -### Truthiness and Existence - -TODO: - -### Filter Infix Expressions - -TODO: diff --git a/docs/async.md b/docs/async.md index 00b26ab..f00f24e 100644 --- a/docs/async.md +++ b/docs/async.md @@ -59,7 +59,3 @@ data = { best_a_team_players = jsonpath.findall_async("$.teams['A Team'][?rank >= 8]", data) ``` - -## Custom Async Item Getting - -TODO: diff --git a/docs/cli.md b/docs/cli.md index 04aa83c..70b641c 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -62,6 +62,7 @@ optional arguments: File to write resulting objects to, as a JSON array. Defaults to the standard output stream. --no-type-checks Disables filter expression well-typedness checks. + --strict Compile and evaluate JSONPath expressions with strict compliance with RFC 9535. ``` ## Global Options @@ -191,6 +192,12 @@ _New in version 0.10.0_ Disables JSONPath filter expression well-typedness checks. The well-typedness of a filter expression is defined by RFC 9535. +#### `--strict` + +_New in version 2.0.0_ + +Compile and evaluate JSONPath expressions with strict compliance with RFC 9535. + ### `pointer` Resolve a JSON Pointer against a JSON document. One of `-p`/`--pointer` or `-r`/`--pointer-file` must be given. `-p` being a JSON Pointer given on the command line as a string, `-r` being the path to a file containing a JSON Pointer. diff --git a/docs/convenience.md b/docs/convenience.md new file mode 100644 index 0000000..4437f9e --- /dev/null +++ b/docs/convenience.md @@ -0,0 +1,31 @@ +# Convenience Functions + +These package-level functions use the default [JSONPathEnvironment](api.md#jsonpath.JSONPathEnvironment), `jsonpath.DEFAULT_ENV` when `strict=False`, or the preconfigured strict environment, `jsonpath.STRICT_ENV` when `strict=True`. + +::: jsonpath.compile + + handler: python + +::: jsonpath.findall + + handler: python + +::: jsonpath.finditer + + handler: python + +::: jsonpath.findall_async + + handler: python + +::: jsonpath.finditer_async + + handler: python + +::: jsonpath.match + + handler: python + +::: jsonpath.query + + handler: python diff --git a/docs/functions.md b/docs/functions.md index e8f8a9f..10504cd 100644 --- a/docs/functions.md +++ b/docs/functions.md @@ -1,6 +1,6 @@ # Filter Functions -A filter function is a named function that can be called as part of a [filter selector](syntax.md#filters-expression) expression. Here we describe built-in filters. You can [define your own function extensions](advanced.md#function-extensions) too. +A filter function is a named function that can be called as part of a [filter selector](syntax.md#filter-selector). Here we describe built in filters. You can [define your own function extensions](advanced.md#function-extensions) too. ## `count()` diff --git a/docs/index.md b/docs/index.md index 6165583..60c5421 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,7 +2,7 @@ JSONPath is a mini language for selecting values from data formatted in JavaScript Object Notation, or equivalent Python objects, like dictionaries and lists. -Python JSONPath is a non-evaluating, read-only implementation of JSONPath, suitable for situations where JSONPath query authors are untrusted. We follow most of [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535). See [Notable differences](syntax.md#notable-differences) for a list of areas where we deviate from the standard. +Python JSONPath is a non-evaluating, read-only implementation of JSONPath, suitable for situations where JSONPath query authors are untrusted. We follow [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535) and test against the [JSONPath Compliance Test Suite](https://github.com/jsonpath-standard/jsonpath-compliance-test-suite). We also include implementations of [JSON Pointer](pointers.md) ([RFC 6901](https://datatracker.ietf.org/doc/html/rfc6901)) and [JSON Patch](api.md#jsonpath.JSONPatch) ([RFC 6902](https://datatracker.ietf.org/doc/html/rfc6902)), plus methods for converting a [JSONPathMatch](api.md#jsonpath.JSONPathMatch) to a `JSONPointer`. @@ -32,6 +32,14 @@ Or from [conda-forge](https://anaconda.org/conda-forge/python-jsonpath): conda install -c conda-forge python-jsonpath ``` +### Optional dependencies + +By default, and without any additional dependencies, the syntax supported by Python JSONPath is **very close** to RFC 9535. For strict compatibility with the specification, install [regex](https://pypi.org/project/regex/) and [iregexp-check](https://pypi.org/project/iregexp-check/) packages too. + +With these two packages installed, the [`match()`](functions.md#match) and [`search()`](functions.md#search) filter functions will use [regex](https://pypi.org/project/regex/) instead of `re` from the standard library, and will validate regular expression patterns against [RFC 9485](https://datatracker.ietf.org/doc/html/rfc9485). + +See the [syntax guide](syntax.md) for more information about strict compatibility with RFC 9535 and extensions to the specification. + ## Example ```python diff --git a/docs/pointers.md b/docs/pointers.md index aab6934..e29f39f 100644 --- a/docs/pointers.md +++ b/docs/pointers.md @@ -10,7 +10,7 @@ JSON Pointers are a fundamental component of JSON Patch ([RFC 6902](https://data We have extended RFC 6901 to support: - - Interoperability with the JSONPath [keys selector](syntax.md#keys-or) (`~`) + - Interoperability with the JSONPath [keys selector](syntax.md#keys-selector) (`~`) - A special non-standard syntax for targeting **keys or indices themselves**, used in conjunction with [Relative JSON Pointer](#torel) **Keys Selector Compatibility** diff --git a/docs/quickstart.md b/docs/quickstart.md index bae1781..449ee35 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -4,18 +4,18 @@ This page gets you started using JSONPath, JSON Pointer and JSON Patch wih Pytho ## `findall(path, data)` -Find all values matching a JSONPath expression using [`jsonpath.findall()`](api.md#jsonpath.JSONPathEnvironment.findall). +Find all values matching a JSONPath query using [`jsonpath.findall()`](convenience.md#jsonpath.findall). This function takes two arguments: -- `path`: a JSONPath expression as a string (e.g., `"$.users[*].name"`) +- `path`: a JSONPath query as a string (e.g. `"$.users[*].name"`) - `data`: the JSON document to query -It always returns a **list** of matched values, even if the path resolves to a single result or nothing at all. +It **always** returns a list of matched values, even if the path resolves to a single result or nothing at all. The `data` argument can be: -- A Python [`Mapping`](https://docs.python.org/3/library/collections.abc.html#collections.abc.Mapping) (e.g., `dict`) or [`Sequence`](https://docs.python.org/3/library/collections.abc.html#collections.abc.Sequence) (e.g., `list`) +- A Python [`Mapping`](https://docs.python.org/3/library/collections.abc.html#collections.abc.Mapping) (e.g. `dict`) or [`Sequence`](https://docs.python.org/3/library/collections.abc.html#collections.abc.Sequence) (e.g. `list`) - A JSON-formatted string - A file-like object containing JSON @@ -65,7 +65,7 @@ with open("users.json") as fd: ## `finditer(path, data)` -Use [`jsonpath.finditer()`](api.md#jsonpath.JSONPathEnvironment.finditer) to iterate over instances of [`jsonpath.JSONPathMatch`](api.md#jsonpath.JSONPathMatch) for every object in _data_ that matches _path_. It accepts the same arguments as [`findall()`](#findallpath-data), a path string and data from which to select matches. +Use [`jsonpath.finditer()`](convenience.md#jsonpath.finditer) to iterate over instances of [`jsonpath.JSONPathMatch`](api.md#jsonpath.JSONPathMatch) for every object in _data_ that matches _path_. It accepts the same arguments as [`findall()`](#findallpath-data), a query string and data from which to select matches. ```python import jsonpath @@ -109,7 +109,7 @@ The selected object is available from a [`JSONPathMatch`](api.md#jsonpath.JSONPa ## `compile(path)` -When you have a JSONPath that needs to be matched against different data repeatedly, you can _compile_ the path ahead of time using [`jsonpath.compile()`](api.md#jsonpath.JSONPathEnvironment.compile). It takes a path as a string and returns a [`JSONPath`](api.md#jsonpath.JSONPath) instance. `JSONPath` has `findall()` and `finditer()` methods that behave similarly to package-level `findall()` and `finditer()`, just without the `path` argument. +When you have a JSONPath query that needs to be matched against different data repeatedly, you can compile the path ahead of time using [`jsonpath.compile()`](convenience.md#jsonpath.compile). It takes a query as a string and returns an instance of [`JSONPath`](api.md#jsonpath.JSONPath). `JSONPath` has `findall()` and `finditer()` methods that behave similarly to package-level `findall()` and `finditer()`, just without the `path` argument. ```python import jsonpath diff --git a/docs/syntax.md b/docs/syntax.md index 2aa7e10..f8381a3 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -1,229 +1,619 @@ # JSONPath Syntax -Python JSONPath's default syntax is an opinionated combination of JSONPath features from existing, popular implementations and [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535). If you're already familiar with JSONPath syntax, skip to [notable differences](#notable-differences). +Python JSONPath extends the [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535) specification with additional features and relaxed rules. If you need strict compliance with RFC 9535, set `strict=True` when calling [`findall()`](convenience.md#jsonpath.findall), [`finditer()`](convenience.md#jsonpath.finditer), etc., which enforces the standard without these extensions. -Imagine a JSON document as a tree structure, where each object (mapping) and array can contain more objects, arrays and scalar values. Every object, array and scalar value is a node in the tree, and the outermost object or array is the "root" node. +In this guide, we first outline the standard syntax (see the specification for the formal definition), and then describe the non-standard extensions and their semantics in detail. -For our purposes, a JSON "document" could be a file containing valid JSON data, a Python string containing valid JSON data, or a Python `Object` made up of dictionaries (or any [Mapping](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes)), lists (or any [Sequence](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes)), strings, etc. +## JSONPath Terminology -We chain _selectors_ together to retrieve nodes from the target document. Each selector operates on the nodes matched by preceding selectors. What follows is a description of those selectors. +Think of a JSON document as a tree, objects (mappings) and arrays can contain other objects, arrays, or scalar values. Each of these (object, array, or scalar) is a _node_ in the tree. The outermost object or array is called the _root_ node. -## Selectors +In this guide, a JSON "document" may refer to: -### Root (`$`) +- A file containing valid JSON text +- A Python string containing valid JSON text +- A Python object composed of dictionaries (or any [Mapping](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes)), lists (or any [Sequence](https://docs.python.org/3/library/collections.abc.html#collections-abstract-base-classes)), strings, numbers, booleans, or `None` -`$` refers to the first node in the target document, be it an object or an array. Unless referencing the root node from inside a filter expression, `$` is optional. The following two examples are equivalent. +A JSONPath expression (aka "query") is made up of a sequence of **segments**. Each segment contains one or more **selectors**: -```text -$.categories.*.name +- A _segment_ corresponds to a step in the path from one set of nodes to the next. +- A _selector_ describes how to choose nodes within that step (for example, by name, by index, or by wildcard). + +What follows is a description of these selectors, starting with the standard ones defined in [RFC 9535](https://www.rfc-editor.org/rfc/rfc9535). + +## Standard selectors and identifiers + +### Root identifier + +The root identifier, `$`, refers to the outermost node in the target document. This can be an object, an array, or a scalar value. + +A query containing only the root identifier simply returns the entire input document. + +**Example query** + +``` +$ +``` + +```json title="data" +{ + "categories": [ + { "id": 1, "name": "fiction" }, + { "id": 2, "name": "non-fiction" } + ] +} ``` +```json title="results" +[ + { + "categories": [ + { "id": 1, "name": "fiction" }, + { "id": 2, "name": "non-fiction" } + ] + } +] +``` + +### Name selector + +A _name selector_ matches the value of an object member by its key. You can write it in either **shorthand notation** (`.thing`) or **bracket notation** (`['thing']` or `["thing"]`). + +Dot notation can be used when the property name is a valid identifier. Bracket notation is required when the property name contains spaces, special characters, or starts with a number. + +**Example query** + ```text -categories.*.name +$.book.title ``` -An empty path or a path containing just the root (`$`) selector returns the input data in its entirety. +```json title="data" +{ + "book": { + "title": "Moby Dick", + "author": "Herman Melville" + } +} +``` -### Properties (`.thing`, `[thing]` or `['thing']`) +```json title="results" +["Moby Dick"] +``` -Select nodes by property/key name using dot notation (`.something`) or bracket notation (`[something]`). If a target property/key contains reserved characters, it must use bracket notation and be enclosed in quotes (`['thing']`). +### Index selector -A dot in front of bracket notation is OK, but unnecessary. The following examples are equivalent. +The index selector selects an element from an array by its index. Indices are zero-based and enclosed in brackets, `[0]`. If the index is negative, items are selected from the end of the array. + +**Example query** ```text $.categories[0].name ``` -```text -$.categories[0][name] +```json title="data" +{ + "categories": [ + { "id": 1, "name": "fiction" }, + { "id": 2, "name": "non-fiction" } + ] +} ``` -```text -$.categories[0]['name'] +```json title="results" +["fiction"] ``` -By default, `or`, `and`, `in`, `true`, `True`, `false`, `False`, `nil`, `Nil`, `null`, `Null`, `none`, `None`, `contains`, `undefined`, and `missing` are considered _reserved words_. In some cases you will need to use quoted property/name selector syntax if you're selecting a name that matches any of these words exactly. For example, `["and"]`. +### Wildcard selector -### Array indices (`[0]` or `[-1]`) +The _wildcard selector_ matches all member values of an object or all elements in an array. It can be written as `.*` (shorthand notation) or `[*]` (bracket notation). -Select an item from an array by its index. Indices are zero-based and enclosed in brackets. If the index is negative, items are selected from the end of the array. Considering example data from the top of this page, the following examples are equivalent. +**Example query** ```text -$.categories[0] +$.categories[*].name ``` -```text -$.categories[-1] +```json title="data" +{ + "categories": [ + { "id": 1, "name": "fiction" }, + { "id": 2, "name": "non-fiction" } + ] +} +``` + +```json title="results" +["fiction", "non-fiction"] ``` -### Wildcard (`.*` or `[*]`) +### Slice selector + +The slice selector allows you to select a range of elements from an array. A start index, ending index and step size are all optional and separated by colons, `[start:end:step]`. Negative indices count from the end of the array, just like standard Python slicing. -Select all elements from an array or all values from an object using `*`. These two examples are equivalent. +**Example query** ```text -$.categories[0].products.* +$.items[1:4:2] ``` -```text -$.categories[0].products[*] +```json title="data" +{ + "items": ["a", "b", "c", "d", "e", "f"] +} ``` -### Keys (`.~` or `[~]`) +```json title="results" +["b", "d"] +``` -**_New in version 0.6.0_** +### Filter selector + +Filters allow you to remove nodes from a selection based on a Boolean expression, `[?expression]`. A filter expression evaluates each node in the context of either the root (`$`) or current (`@`) node. + +When filtering a mapping-like object, `@` identifies the current member value. When filtering a sequence-like object, `@` identifies the current element. + +Comparison operators include `==`, `!=`, `<`, `>`, `<=`, and `>=`. Logical operators `&&` (and) and `||` (or) can combine terms, and parentheses can be used to group expressions. -Select keys/properties from an object using `~`. +A filter expression on its own - without a comparison - is treated as an existence test. + +**Example query** ```text -$.categories.~ +$..products[?(@.price < $.price_cap)] ``` -```text -$.categories[~] +```json title="data" +{ + "price_cap": 10, + "products": [ + { "name": "apple", "price": 5 }, + { "name": "orange", "price": 12 }, + { "name": "banana", "price": 8 } + ] +} +``` + +```json title="results" +[ + { "name": "apple", "price": 5 }, + { "name": "banana", "price": 8 } +] ``` -### Slices (`[0:-1]` or `[-1:0:-1]`) +Filter expressions can also call predefined [function extensions](functions.md). -Select a range of elements from an array using slice notation. The start index, stop index and step are all optional. These examples are equivalent. +## More on segments + +So far we've seen shorthand notation (`.selector`) and segments with just one selector (`[selector]`). Here we cover the descendant segment and segments with multiple selectors. + +### Segments with multiple selectors + +A segment can include multiple selectors separated by commas and enclosed in square brackets (`[selector, selector, ...]`). Any valid selector (names, indices, slices, filters, or wildcards) can appear in the list. + +**Example query** ```text -$.categories[0:] +$.store.book[0,2] ``` -```text -$.categories[0:-1:] +```json title="data" +{ + "store": { + "book": [ + { "title": "Book A", "price": 10 }, + { "title": "Book B", "price": 12 }, + { "title": "Book C", "price": 8 } + ] + } +} ``` -```text -$.categories[0:-1:1] +```json title="results" +[ + { "title": "Book A", "price": 10 }, + { "title": "Book C", "price": 8 } +] ``` +### Descendant segment + +The descendant segment (`..`) visits all object member values and array elements under the current object or array, applying the selector or selectors that follow to each visited node. It must be followed by a shorthand selector (names, wildcards, etc.) or a bracketed list of one or more selectors. + +**Example query** + ```text -$.categories[::] +$..price ``` -### Lists (`[1, 2, 10:20]`) +```json title="data" +{ + "store": { + "book": [ + { "title": "Book A", "price": 10 }, + { "title": "Book B", "price": 12 } + ], + "bicycle": { "color": "red", "price": 19.95 } + } +} +``` -Select multiple indices, slices or properties using list notation (sometimes known as a "union" or "segment", we use "union" to mean something else). +```json title="results" +[10, 12, 19.95] +``` -```text -$..products.*.[title, price] +## Non-standard selectors and identifiers + +The selectors and identifiers described in this section are an extension to the RFC 9535 specification. They are enabled by default. Set `strict=True` when constructing a [`JSONPathEnvironment`](api.md#jsonpath.JSONPathEnvironment), calling [`findall()`](convenience.md#jsonpath.findall), [`finditer()`](convenience.md#jsonpath.finditer), etc. to disable all non-standard features. + +Also note that when `strict=False`: + +- The root identifier (`$`) is optional and paths starting with a dot (`.`) are OK. `.thing` is the same as `$.thing`, as is `thing` and `$["thing"]`. +- Leading and trailing whitespace is OK. +- Explicit comparisons to `undefined` (aka `missing`) are supported as well as implicit existence tests. + +### Key selector + +**_New in version 2.0.0_** + +The key selector, `.~name` or `[~'name']`, selects at most one name from an object member. It is syntactically similar to the standard [name selector](https://datatracker.ietf.org/doc/html/rfc9535#name-name-selector), with the addition of a tilde (`~`) prefix. + +When applied to a JSON object, the key selector selects the _name_ from an object member, if that name exists, or nothing if it does not exist. This complements the standard name selector, which select the _value_ from a name/value pair. + +When applied to an array or primitive value, the key selector selects nothing. + +Key selector strings must follow the same processing semantics as name selector strings, as described in [section 2.3.2.1](https://datatracker.ietf.org/doc/html/rfc9535#section-2.3.1.2) of RFC 9535. + +!!! info + + The key selector is introduced to facilitate valid normalized paths for nodes produced by the [keys selector](#keys-selector) and the [keys filter selector](#keys-filter-selector). I don't expect it will be of much use elsewhere. + +#### Syntax + +``` +selector = name-selector / + wildcard-selector / + slice-selector / + index-selector / + filter-selector / + key-selector / + keys-selector / + keys-filter-selector + +key-selector = "~" name-selector + +child-segment = bracketed-selection / + ("." + (wildcard-selector / + member-name-shorthand / + member-key-shorthand)) + +descendant-segment = ".." (bracketed-selection / + wildcard-selector / + member-name-shorthand / + member-key-shorthand) + +member-key-shorthand = "~" name-first *name-char ``` -### Recursive descent (`..`) +#### Examples -The `..` selector visits every node beneath the current selection. If a property selector, using dot notation, follows `..`, the dot is optional. These two examples are equivalent. +```json title="Example JSON document" +{ + "a": [{ "b": "x", "c": "z" }, { "b": "y" }] +} +``` -```text -$..title +| Query | Result | Result Paths | Comment | +| ----------- | ----------------- | ----------------------------------------- | ----------------------------- | +| `$.a[0].~c` | `"c"` | `$['a'][0][~'c']` | Key of nested object | +| `$.a[1].~c` | | | Key does not exist | +| `$..[~'b']` | `"b"`
`"b"` | `$['a'][0][~'b']`
`$['a'][1][~'b']` | Descendant, single quoted key | +| `$..[~"b"]` | `"b"`
`"b"` | `$['a'][0][~'b']`
`$['a'][1][~'b']` | Descendant, double quoted key | + +### Keys selector + +**_New in version 0.6.0_** + +The keys selector, `~` or `[~]`, selects all names from an object’s name/value members. This complements the standard [wildcard selector](https://datatracker.ietf.org/doc/html/rfc9535#name-wildcard-selector), which selects all values from an object’s name/value pairs. + +As with the wildcard selector, the order of nodes resulting from a keys selector is not stipulated. + +When applied to an array or primitive value, the keys selector selects nothing. + +The normalized path of a node selected using the keys selector uses [key selector](#key-selector) syntax. + +#### Syntax + +``` +keys-selector = "~" ``` -```text -$...title +#### Examples + +```json title="Example JSON document" +{ + "a": [{ "b": "x", "c": "z" }, { "b": "y" }] +} ``` -### Filters (`[?EXPRESSION]`) +| Query | Result | Result Paths | Comment | +| -------------- | ----------------------------------------- | ----------------------------------------------------------------------------------------- | -------------------------- | +| `$.a[0].~` | `"b"`
`"c"` | `$['a'][0][~'b']`
`$['a'][0][~'c']` | Object keys | +| `$.a.~` | | | Array keys | +| `$.a[0][~, ~]` | `"b"`
`"c"`
`"c"`
`"b"` | `$['a'][0][~'b']`
`$['a'][0][~'c']`
`$['a'][0][~'c']`
`$['a'][0][~'b']` | Non-deterministic ordering | +| `$..[~]` | `"a"`
`"b"`
`"c"`
`"b"` | `$[~'a']`
`$['a'][0][~'b']`
`$['a'][0][~'c']`
`$['a'][1][~'b']` | Descendant keys | -Filters allow you to remove nodes from a selection using a Boolean expression. A _filter query_ is a JSONPath query nested within a filter expression. Every filter query must start with the root identifier (`$`), the current node identifier (`@`) or the [filter context](advanced.md#filter-variables) identifier (`_`). +### Keys filter selector + +**_New in version 2.0.0_** + +The keys filter selector selects names from an object’s name/value members. It is syntactically similar to the standard [filter selector](https://datatracker.ietf.org/doc/html/rfc9535#name-filter-selector), with the addition of a tilde (`~`) prefix. -```text -$..products[?(@.price < $.price_cap)] +``` +~? ``` -```text -$..products[?@.price < $.price_cap] +Whereas the standard filter selector will produce a node for each _value_ from an object’s name/value members - when its expression evaluates to logical true - the keys filter selector produces a node for each _name_ in an object’s name/value members. + +Logical expression syntax and semantics otherwise match that of the standard filter selector. `@` still refers to the current member value. See also the [current key identifier](#current-key-identifier). + +When applied to an array or primitive value, the keys filter selector selects nothing. + +The normalized path of a node selected using the keys filter selector uses [key selector](#key-selector) syntax. + +#### Syntax + +``` +filter-selector = "~?" S logical-expr ``` -When filtering a mapping-like object, `#` references the current key/property and `@` references the current value associated with `#`. When filtering a sequence-like object, `@` references the current item and `#` will hold the item's index in the sequence. +#### Examples -Comparison operators include `==`, `!=`, `<`, `>`, `<=` and `>=`. Plus `<>` as an alias for `!=`. +```json title="Example JSON document" +[{ "a": [1, 2, 3], "b": [4, 5] }, { "c": { "x": [1, 2] } }, { "d": [1, 2, 3] }] +``` -`in` and `contains` are membership operators. `left in right` is equivalent to `right contains left`. +| Query | Result | Result Paths | Comment | +| ---------------------- | ----------------- | ------------------------------- | -------------------------------- | +| `$.*[~?length(@) > 2]` | `"a"`
`"d"` | `$[0][~'a']`
`$[2][~'d']` | Conditionally select object keys | +| `$.*[~?@.x]` | `"c"` | `$[1][~'c']` | Existence test | +| `$[~?(true == true)]` | | | Keys from an array | -`&&` and `||` are logical operators and terms can be grouped with parentheses. `and` and `or` work too. +### Singular query selector -`=~` matches the left value with a regular expression literal. Regular expressions use a syntax similar to that found in JavaScript, where the pattern to match is surrounded by slashes, optionally followed by flags. +**_New in version 2.0.0_** -```text -$..products[?(@.description =~ /.*trainers/i)] +The singular query selector consist of an embedded absolute singular query, the result of which is used as an object member name or array element index. + +If the embedded query resolves to a string or int value, at most one object member value or array element value is selected. Otherwise the singular query selector selects nothing. + +#### Syntax + +``` +selector = name-selector / + wildcard-selector / + slice-selector / + index-selector / + filter-selector / + singular-query-selector + +singular-query-selector = abs-singular-query ``` -A filter query on its own - one that is not part of a comparison expression - is an existence test. We also support comparing a filter query to the special `undefined` keyword. These two example are equivalent. +#### Examples + +```json +{ + "a": { + "j": [1, 2, 3], + "p": { + "q": [4, 5, 6] + } + }, + "b": ["j", "p", "q"], + "c d": { + "x": { + "y": 1 + } + } +} +``` + +| Query | Result | Result Path | Comment | +| --------------------- | ------------------ | ---------------- | ----------------------------------------------------------------- | +| `$.a[$.b[1]]` | `{"q": [4, 5, 6]}` | `$['a']['p']` | Object name from embedded singular query | +| `$.a.j[$['c d'].x.y]` | `2` | `$['a']['j'][1]` | Array index from embedded singular query | +| `$.a[$.b]` | | | Embedded singular query does not resolve to a string or int value | + +### Current key identifier + +`#` is the _current key_ identifier. `#` will be the name of the current object member, or index of the current array element. This complements the current node identifier (`@`), which refers to a member value or array element, respectively. + +It is a syntax error to follow the current key identifier with segments, as if it were a filter query. + +When used as an argument to a function, the current key is of `ValueType`, and outside a function call it must be compared. + +#### Syntax -```text -$..products[?!@.sale_price] ``` +comparable = literal / + singular-query / ; singular query value + function-expr / ; ValueType + current-key-identifier -```text -$..products[?@.sale_price == undefined] + +function-argument = literal / + filter-query / ; (includes singular-query) + logical-expr / + function-expr / + current-key-identifier + +current-key-identifier = "#" ``` -Filter expressions can call predefined [function extensions](functions.md) too. +#### Examples -```text -$.categories[?count(@.products.*) >= 2] +```json title="Example JSON document" +{ "abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": [] } ``` -### Fake root (`^`) +| Query | Result | Result Path | Comment | +| ----------------------------------------- | --------------------- | --------------------------------- | --------------------------- | +| `$[?match(#, '^ab.*') && length(@) > 0 ]` | `[1,2,3]`
`[6]` | `$['abc']`
`$['abx']` | Match on object names | +| `$.abc[?(# >= 1)]` | `2`
`3` | `$['abc'][1]`
`$['abc'][2]` | Compare current array index | + +### Pseudo root identifier **_New in version 0.11.0_** -This non-standard "fake root" identifier behaves like the standard root identifier (`$`), but wraps the target JSON document in a single-element array, so as to make it selectable with a filter selector. +The pseudo root identifier (`^`) behaves like the standard root identifier (`$`), but conceptually wraps the target JSON document in a single-element array. This allows the root document itself to be conditionally selected by filters. + +#### Syntax -```text -^[?length(categories) > 0] ``` +jsonpath-query = (root-identifier / pseudo-root-identifier) segments -### Union (`|`) and intersection (`&`) +root-identifier = "$" +pseudo-root-identifier = "^" +``` -Union (`|`) and intersection (`&`) are similar to Python's set operations, but we don't dedupe the matches (matches will often contain unhashable objects). +#### Examples -The `|` operator combines matches from two or more paths. This example selects a single list of all prices, plus the price cap as the last element. +```json title="Example JSON data" +{ "a": { "b": 42 }, "n": 7 } +``` + +| Query | Result | Result Path | Comment | +| -------------------------- | ------------------------------ | ----------- | ----------------------------------- | +| `^[?@.a.b > 7]` | `{ "a": { "b": 42 } }` | `^[0]` | Conditionally select the root value | +| `^[?@.a.v > value(^.*.n)]` | `{ "a": { "b": 42 }, "n": 7 }` | `^[0]` | Embedded pseudo root query | + +### Filter context identifier + +The filter context identifier (`_`) starts an embedded query, similar to the root identifier (`$`) and current node identifier (`@`), but targets JSON-like data passed as the `filter_context` argument to [`findall()`](api.md#jsonpath.JSONPath.findall) and [`finditer()`](api.md#jsonpath.JSONPath.finditer). + +#### Syntax -```text -$..products.*.price | $.price_cap ``` +current-node-identifier = "@" +extra-context-identifier = "_" -The `&` operator produces matches that are common to both left and right paths. This example would select the list of products that are common to both the "footwear" and "headwear" categories. +filter-query = rel-query / extra-context-query / jsonpath-query +rel-query = current-node-identifier segments +extra-context-query = extra-context-identifier segments -```text -$.categories[?(@.name == 'footwear')].products.* & $.categories[?(@.name == 'headwear')].products.* +singular-query = rel-singular-query / abs-singular-query / extra-context-singular-query +rel-singular-query = current-node-identifier singular-query-segments +abs-singular-query = root-identifier singular-query-segments + +extra-context-singular-query = extra-context-identifier singular-query-segments ``` -Note that `|` and `&` are not allowed inside filter expressions. +#### Examples -## Notable differences +```json title="Example JSON data" +{ "a": [{ "b": 42 }, { "b": 3 }] } +``` + +```json title="Extra JSON data" +{ "c": 42 } +``` -This is a list of things that you might find in other JSONPath implementation that we don't support (yet). +| Query | Result | Result Path | Comment | +| ------------------ | ------------- | ----------- | -------------------------------------------- | +| `$.a[?@.b == _.c]` | `{ "b": 42 }` | `$['a'][0]` | Comparison with extra context singular query | -- We don't support extension functions of the form `selector.func()`. -- We always return a list of matches from `jsonpath.findall()`, never a scalar value. -- We do not support arithmetic in filter expression. -- We don't allow dotted array indices. An array index must be surrounded by square brackets. -- Python JSONPath is strictly read only. There are no update "selectors", but we do provide methods for converting `JSONPathMatch` instances to `JSONPointer`s, and a `JSONPatch` builder API for modifying JSON-like data structures using said pointers. +## Non-standard operators -And this is a list of areas where we deviate from [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535). See [jsonpath-rfc9535](https://github.com/jg-rp/python-jsonpath-rfc9535) for an alternative implementation of JSONPath that does not deviate from RFC 9535. +In addition to the operators described below, the standard _logical and_ operator (`&&`) is aliased as `and`, the standard _logical or_ operator (`||`) is aliased as `or`, and `null` is aliased as `nil` and `none`. -- The root token (default `$`) is optional and paths starting with a dot (`.`) are OK. `.thing` is the same as `$.thing`, as is `thing`, `$[thing]` and `$["thing"]`. -- The built-in `match()` and `search()` filter functions use Python's standard library `re` module, which, at least, doesn't support Unicode properties. We might add an implementation of `match()` and `search()` using the third party [regex](https://pypi.org/project/regex/) package in the future. -- We don't check `match()` and `search()` regex arguments against RFC 9485. Any valid Python pattern is allowed. -- We don't require property names to be quoted inside a bracketed selection, unless the name contains reserved characters. -- We don't require the recursive descent segment to have a selector. `$..` is equivalent to `$..*`. -- We support explicit comparisons to `undefined` as well as implicit existence tests. -- Float literals without a fractional digit are OK or leading digit. `1.` is equivalent to `1.0`. -- We treat literals (such as `true` and `false`) as valid "basic" expressions. For example, `$[?true || false]`, without an existence test or comparison either side of logical _or_, does not raise a syntax error. -- By default, `and` is equivalent to `&&` and `or` is equivalent to `||`. -- `none` and `nil` are aliases for `null`. -- `null` (and its aliases), `true` and `false` can start with an upper or lower case letter. -- We don't treat some invalid `\u` escape sequences in quoted name selectors and string literals as an error. We match the behavior of the JSON decoder in Python's standard library, which is less strict than RFC 9535. +Also, `true`, `false`, `null` and their aliases can start with an upper case letter. -And this is a list of features that are uncommon or unique to Python JSONPath. +### Membership operators -- We support membership operators `in` and `contains`, plus list/array literals. -- `|` is a union operator, where matches from two or more JSONPaths are combined. This is not part of the Python API, but built-in to the JSONPath syntax. -- `&` is an intersection operator, where we exclude matches that don't exist in both left and right paths. This is not part of the Python API, but built-in to the JSONPath syntax. -- `#` is the current key/property or index identifier when filtering a mapping or sequence. -- `_` is a filter context identifier. With usage similar to `$` and `@`, `_` exposes arbitrary data from the `filter_context` argument to `findall()` and `finditer()`. -- `~` is a "keys" or "properties" selector. -- `^` is a "fake root" identifier. It is equivalent to `$`, but wraps the target JSON document in a single-element array, so the root value can be conditionally selected with a filter selector. -- `=~` is the the regex match operator, matching a value to a JavaScript-style regex literal. +The membership operators test whether one value occurs within another. + +An infix expression using `contains` evaluates to true if the right-hand side is a member of the left-hand side, and false otherwise. + +- If the left-hand side is an object and the right-hand side is a string, the result is true if the object has a member with that name. +- If the left-hand side is an array, the result is true if any element of the array is equal to the right-hand side. +- For scalars (strings, numbers, booleans, null), `contains` always evaluates to false. + +The `in` operator is equivalent to `contains` with operands reversed. This makes `contains` and `in` symmetric, so either form may be used depending on which reads more naturally in context. + +A list literal is a comma separated list of JSONPath expression literals. List should appear on the left-hand side of `contains` or the right-hand side of `in`. + +#### Syntax + +``` +basic-expr = paren-expr / + comparison-expr / + membership-expr / + test-expr + +membership-expr = comparable S membership-op S comparable + +membership-operator = "contains" / "in" + +membership-operand = literal / + singular-query / ; singular query value + function-expr / ; ValueType + list-literal + +list-literal = "[" S literal *(S "," S literal) S "]" +``` + +#### Examples + +```json title="Example JSON data" +{ + "x": [{ "a": ["foo", "bar"] }, { "a": ["bar"] }], + "y": [{ "a": { "foo": "bar" } }, { "a": { "bar": "baz" } }], + "z": [{ "a": "foo" }, { "a": "bar" }] +} +``` + +| Query | Result | Result Path | Comment | +| ------------------------------------- | ----------------------- | ----------- | ------------------------------------ | +| `$.x[?@.a contains 'foo']` | `{"a": ["foo", "bar"]}` | `$['x'][0]` | Array contains string literal | +| `$.y[?@.a contains 'foo']` | `{"a": ["foo", "bar"]}` | `$['y'][0]` | Object contains string literal | +| `$.x[?'foo' in @.a]` | `{"a": ["foo", "bar"]}` | `$['x'][0]` | String literal in array | +| `$.y[?'foo' in @.a]` | `{"a": ["foo", "bar"]}` | `$['y'][0]` | String literal in object | +| `$.z[?(['bar', 'baz'] contains @.a)]` | `{"a": "bar"}` | `$['z'][1]` | List literal contains embedded query | + +### Regex operator + +`=~` is an infix operator that matches the left-hand side with a regular expression literal on the right-hand side. Regular expression literals use a syntax similar to that found in JavaScript, where the pattern to match is surrounded by slashes, `/pattern/`, optionally followed by flags, `/pattern/flags`. + +``` +$..products[?(@.description =~ /.*trainers/i)] +``` + +### Union and intersection operators + +The union or concatenation operator, `|`, combines matches from two or more paths. + +The intersection operator, `&`, produces matches that are common to both left and right paths. + +Note that compound queries are not allowed inside filter expressions. + +#### Syntax + +``` +jsonpath-query = root-identifier segments + +compound-jsonpath-query = jsonpath-query compound-op jsonpath-query + +compound-op = "|" / + "&" +``` + +#### Examples + +```text +$..products.*.price | $.price_cap +``` + +```text +$.categories[?(@.name == 'footwear')].products.* & $.categories[?(@.name == 'headwear')].products.* +``` diff --git a/jsonpath/__init__.py b/jsonpath/__init__.py index 2604d4a..31fd4a9 100644 --- a/jsonpath/__init__.py +++ b/jsonpath/__init__.py @@ -1,7 +1,18 @@ # SPDX-FileCopyrightText: 2023-present James Prior # # SPDX-License-Identifier: MIT +from __future__ import annotations +from typing import TYPE_CHECKING +from typing import AsyncIterable +from typing import Iterable +from typing import List +from typing import Optional +from typing import Union + +from ._types import JSON +from ._types import JSONData +from ._types import JSONScalar from .env import JSONPathEnvironment from .exceptions import JSONPatchError from .exceptions import JSONPatchTestFailure @@ -32,6 +43,10 @@ from .pointer import RelativeJSONPointer from .pointer import resolve +if TYPE_CHECKING: + from .match import FilterContextVars + + __all__ = ( "compile", "CompoundJSONPath", @@ -68,16 +83,283 @@ "RelativeJSONPointerIndexError", "RelativeJSONPointerSyntaxError", "resolve", + "JSON", + "JSONData", + "JSONScalar", "UNDEFINED", ) -# For convenience +# For convenience and to delegate to strict or non-strict environments. DEFAULT_ENV = JSONPathEnvironment() -compile = DEFAULT_ENV.compile # noqa: A001 -findall = DEFAULT_ENV.findall -findall_async = DEFAULT_ENV.findall_async -finditer = DEFAULT_ENV.finditer -finditer_async = DEFAULT_ENV.finditer_async -match = DEFAULT_ENV.match -query = DEFAULT_ENV.query +STRICT_ENV = JSONPathEnvironment(strict=True) + + +def compile(path: str, *, strict: bool = False) -> Union[JSONPath, CompoundJSONPath]: # noqa: A001 + """Prepare a path string ready for repeated matching against different data. + + Arguments: + path: A JSONPath as a string. + strict: When `True`, compile the path for strict compliance with RFC 9535. + + Returns: + A `JSONPath` or `CompoundJSONPath`, ready to match against some data. + Expect a `CompoundJSONPath` if the path string uses the _union_ or + _intersection_ operators. + + Raises: + JSONPathSyntaxError: If _path_ is invalid. + JSONPathTypeError: If filter functions are given arguments of an + unacceptable type. + """ + return STRICT_ENV.compile(path) if strict else DEFAULT_ENV.compile(path) + + +def findall( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> List[object]: + """Find all objects in _data_ matching the JSONPath _path_. + + If _data_ is a string or a file-like objects, it will be loaded + using `json.loads()` and the default `JSONDecoder`. + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + A list of matched objects. If there are no matches, the list will + be empty. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + STRICT_ENV.findall(path, data, filter_context=filter_context) + if strict + else DEFAULT_ENV.findall(path, data, filter_context=filter_context) + ) + + +async def findall_async( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> List[object]: + """Find all objects in _data_ matching the JSONPath _path_. + + If _data_ is a string or a file-like objects, it will be loaded + using `json.loads()` and the default `JSONDecoder`. + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + A list of matched objects. If there are no matches, the list will + be empty. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + await STRICT_ENV.findall_async(path, data, filter_context=filter_context) + if strict + else await DEFAULT_ENV.findall_async(path, data, filter_context=filter_context) + ) + + +def finditer( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> Iterable[JSONPathMatch]: + """Generate `JSONPathMatch` objects for each match of _path_ in _data_. + + If _data_ is a string or a file-like objects, it will be loaded using + `json.loads()` and the default `JSONDecoder`. + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + An iterator yielding `JSONPathMatch` objects for each match. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + STRICT_ENV.finditer(path, data, filter_context=filter_context) + if strict + else DEFAULT_ENV.finditer(path, data, filter_context=filter_context) + ) + + +async def finditer_async( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> AsyncIterable[JSONPathMatch]: + """Find all objects in _data_ matching the JSONPath _path_. + + If _data_ is a string or a file-like objects, it will be loaded + using `json.loads()` and the default `JSONDecoder`. + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + A list of matched objects. If there are no matches, the list will + be empty. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + await STRICT_ENV.finditer_async(path, data, filter_context=filter_context) + if strict + else await DEFAULT_ENV.finditer_async(path, data, filter_context=filter_context) + ) + + +def match( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> Union[JSONPathMatch, None]: + """Return a `JSONPathMatch` instance for the first object found in _data_. + + `None` is returned if there are no matches. + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + A `JSONPathMatch` object for the first match, or `None` if there were + no matches. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + STRICT_ENV.match(path, data, filter_context=filter_context) + if strict + else DEFAULT_ENV.match(path, data, filter_context=filter_context) + ) + + +def query( + path: str, + data: JSONData, + *, + filter_context: Optional[FilterContextVars] = None, + strict: bool = False, +) -> Query: + """Return a `Query` iterator over matches found by applying _path_ to _data_. + + `Query` objects are iterable. + + ``` + for match in jsonpath.query("$.foo..bar", data): + ... + ``` + + You can skip and limit results with `Query.skip()` and `Query.limit()`. + + ``` + matches = ( + jsonpath.query("$.foo..bar", data) + .skip(5) + .limit(10) + ) + + for match in matches + ... + ``` + + `Query.tail()` will get the last _n_ results. + + ``` + for match in jsonpath.query("$.foo..bar", data).tail(5): + ... + ``` + + Get values for each match using `Query.values()`. + + ``` + for obj in jsonpath.query("$.foo..bar", data).limit(5).values(): + ... + ``` + + Arguments: + path: The JSONPath as a string. + data: A JSON document or Python object implementing the `Sequence` + or `Mapping` interfaces. + filter_context: Arbitrary data made available to filters using + the _filter context_ selector. + strict: When `True`, compile and evaluate with strict compliance with + RFC 9535. + + Returns: + A query iterator. + + Raises: + JSONPathSyntaxError: If the path is invalid. + JSONPathTypeError: If a filter expression attempts to use types in + an incompatible way. + """ + return ( + STRICT_ENV.query(path, data, filter_context=filter_context) + if strict + else DEFAULT_ENV.query(path, data, filter_context=filter_context) + ) diff --git a/jsonpath/_types.py b/jsonpath/_types.py new file mode 100644 index 0000000..3f413b2 --- /dev/null +++ b/jsonpath/_types.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from io import IOBase +from typing import Any +from typing import Mapping +from typing import Sequence +from typing import Union + +JSONScalar = Union[str, int, float, bool, None] +"""A scalar JSON-like value. + +This includes primitive types that can appear in JSON: +string, number, boolean, or null. +""" + +JSON = Union[JSONScalar, Sequence[Any], Mapping[str, Any]] +"""A JSON-like data structure. + +This covers scalars, sequences (e.g. lists, tuples), and mappings (e.g. +dictionaries with string keys). Values inside may be untyped (`Any`) rather +than recursively constrained to `JSON` for flexibility. +""" + +JSONData = Union[str, IOBase, JSON] +"""Input representing JSON content. + +Accepts: +- a JSON-like object (`JSON`), +- a raw JSON string, +- or a file-like object containing JSON data. +""" diff --git a/jsonpath/cli.py b/jsonpath/cli.py index e79d2fd..fd52358 100644 --- a/jsonpath/cli.py +++ b/jsonpath/cli.py @@ -1,4 +1,5 @@ """JSONPath, JSON Pointer and JSON Patch command line interface.""" + import argparse import json import sys @@ -59,6 +60,15 @@ def path_sub_command(parser: argparse.ArgumentParser) -> None: # noqa: D103 help="Disables filter expression well-typedness checks.", ) + parser.add_argument( + "--strict", + action="store_true", + help=( + "Compile and evaluate JSONPath expressions with strict " + "compliance with RFC 9535." + ), + ) + def pointer_sub_command(parser: argparse.ArgumentParser) -> None: # noqa: D103 parser.set_defaults(func=handle_pointer_command) @@ -248,6 +258,7 @@ def handle_path_command(args: argparse.Namespace) -> None: # noqa: PLR0912 path = jsonpath.JSONPathEnvironment( unicode_escape=not args.no_unicode_escape, well_typed=not args.no_type_checks, + strict=args.strict, ).compile(query) except JSONPathSyntaxError as err: if args.debug: @@ -289,7 +300,6 @@ def handle_pointer_command(args: argparse.Namespace) -> None: if args.pointer is not None: pointer = args.pointer else: - # TODO: is a property with a trailing newline OK? pointer = args.pointer_file.read().strip() try: diff --git a/jsonpath/env.py b/jsonpath/env.py index d951c90..1d0fa49 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -2,7 +2,20 @@ from __future__ import annotations -import re +try: + import regex # noqa: F401 + + REGEX_AVAILABLE = True +except ImportError: + REGEX_AVAILABLE = False + +try: + import iregexp_check # noqa: F401 + + IREGEXP_AVAILABLE = True +except ImportError: + IREGEXP_AVAILABLE = False + from decimal import Decimal from operator import getitem from typing import TYPE_CHECKING @@ -24,10 +37,10 @@ from .exceptions import JSONPathTypeError from .filter import UNDEFINED from .filter import VALUE_TYPE_EXPRESSIONS -from .filter import FilterExpression +from .filter import BaseExpression +from .filter import FilterQuery from .filter import FunctionExtension from .filter import InfixExpression -from .filter import Path from .fluent_api import Query from .function_extensions import ExpressionType from .function_extensions import FilterFunction @@ -40,14 +53,13 @@ from .path import JSONPath from .stream import TokenStream from .token import TOKEN_EOF -from .token import TOKEN_FAKE_ROOT from .token import TOKEN_INTERSECTION +from .token import TOKEN_PSEUDO_ROOT from .token import TOKEN_UNION from .token import Token if TYPE_CHECKING: - from io import IOBase - + from ._types import JSONData from .match import FilterContextVars @@ -88,12 +100,14 @@ class attributes `root_token`, `self_token` and `filter_context_token`. well-typedness as compile time. **New in version 0.10.0** + strict: When `True`, follow RFC 9535 strictly. + **New in version 2.0.0** ## Class attributes Attributes: - fake_root_token (str): The pattern used to select a "fake" root node, one level - above the real root node. + pseudo_root_token (str): The pattern used to select a "fake" root node, one + level above the real root node. filter_context_token (str): The pattern used to select extra filter context data. Defaults to `"_"`. intersection_token (str): The pattern used as the intersection operator. @@ -102,11 +116,16 @@ class attributes `root_token`, `self_token` and `filter_context_token`. filtering a mapping or sequence. Defaults to `"#"`. keys_selector_token (str): The pattern used as the "keys" selector. Defaults to `"~"`. + keys_filter_token (str): The pattern used as the "keys filter" selector. + Defaults to `"~?"`. lexer_class: The lexer to use when tokenizing path strings. max_int_index (int): The maximum integer allowed when selecting array items by index. Defaults to `(2**53) - 1`. min_int_index (int): The minimum integer allowed when selecting array items by index. Defaults to `-(2**53) + 1`. + max_recursion_depth (int): The maximum number of dict/objects and/or arrays/ + lists the recursive descent selector can visit before a + `JSONPathRecursionError` is thrown. parser_class: The parser to use when parsing tokens from the lexer. root_token (str): The pattern used to select the root node in a JSON document. Defaults to `"$"`. @@ -115,19 +134,21 @@ class attributes `root_token`, `self_token` and `filter_context_token`. union_token (str): The pattern used as the union operator. Defaults to `"|"`. """ - # These should be unescaped strings. `re.escape` will be called - # on them automatically when compiling lexer rules. - fake_root_token = "^" + # These should be unescaped strings. `re.escape` will be called on them + # automatically when compiling lexer rules. + pseudo_root_token = "^" filter_context_token = "_" intersection_token = "&" key_token = "#" keys_selector_token = "~" + keys_filter_token = "~?" root_token = "$" self_token = "@" union_token = "|" max_int_index = (2**53) - 1 min_int_index = -(2**53) + 1 + max_recursion_depth = 100 # Override these to customize path tokenization and parsing. lexer_class: Type[Lexer] = Lexer @@ -140,6 +161,7 @@ def __init__( filter_caching: bool = True, unicode_escape: bool = True, well_typed: bool = True, + strict: bool = False, ) -> None: self.filter_caching: bool = filter_caching """Enable or disable filter expression caching.""" @@ -151,6 +173,24 @@ def __init__( self.well_typed: bool = well_typed """Control well-typedness checks on filter function expressions.""" + self.strict: bool = strict + """When `True`, follow RFC 9535 strictly. + + This includes things like enforcing a leading root identifier and + ensuring there's no leading or trailing whitespace when parsing a + JSONPath query. + """ + + self.regex_available: bool = REGEX_AVAILABLE + """When `True`, the third party `regex` package is available.""" + + self.iregexp_available: bool = IREGEXP_AVAILABLE + """When `True`, the iregexp_check package is available. + + iregexp_check will be used to validate regular expressions against RFC 9485, + if available. + """ + self.lexer: Lexer = self.lexer_class(env=self) """The lexer bound to this environment.""" @@ -180,46 +220,53 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 """ tokens = self.lexer.tokenize(path) stream = TokenStream(tokens) - fake_root = stream.current.kind == TOKEN_FAKE_ROOT + pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path: Union[JSONPath, CompoundJSONPath] = JSONPath( - env=self, selectors=self.parser.parse(stream), fake_root=fake_root + env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root ) - if stream.current.kind != TOKEN_EOF: + if stream.skip_whitespace() and self.strict: + raise JSONPathSyntaxError( + "unexpected whitespace", token=stream.tokens[stream.pos - 1] + ) + + if stream.current().kind != TOKEN_EOF: _path = CompoundJSONPath(env=self, path=_path) - while stream.current.kind != TOKEN_EOF: - if stream.peek.kind == TOKEN_EOF: + while stream.current().kind != TOKEN_EOF: + if stream.peek().kind == TOKEN_EOF: # trailing union or intersection raise JSONPathSyntaxError( - f"expected a path after {stream.current.value!r}", - token=stream.current, + f"expected a path after {stream.current().value!r}", + token=stream.current(), ) - if stream.current.kind == TOKEN_UNION: - stream.next_token() - fake_root = stream.current.kind == TOKEN_FAKE_ROOT + if stream.current().kind == TOKEN_UNION: + stream.next() + stream.skip_whitespace() + pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path = _path.union( JSONPath( env=self, - selectors=self.parser.parse(stream), - fake_root=fake_root, + segments=self.parser.parse(stream), + pseudo_root=pseudo_root, ) ) - elif stream.current.kind == TOKEN_INTERSECTION: - stream.next_token() - fake_root = stream.current.kind == TOKEN_FAKE_ROOT + elif stream.current().kind == TOKEN_INTERSECTION: + stream.next() + stream.skip_whitespace() + pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT _path = _path.intersection( JSONPath( env=self, - selectors=self.parser.parse(stream), - fake_root=fake_root, + segments=self.parser.parse(stream), + pseudo_root=pseudo_root, ) ) else: # pragma: no cover # Parser.parse catches this too raise JSONPathSyntaxError( # noqa: TRY003 - f"unexpected token {stream.current.value!r}", - token=stream.current, + f"unexpected token {stream.current().value!r}", + token=stream.current(), ) return _path @@ -227,7 +274,7 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 def findall( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, *, filter_context: Optional[FilterContextVars] = None, ) -> List[object]: @@ -257,7 +304,7 @@ def findall( def finditer( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, *, filter_context: Optional[FilterContextVars] = None, ) -> Iterable[JSONPathMatch]: @@ -286,7 +333,7 @@ def finditer( def match( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, *, filter_context: Optional[FilterContextVars] = None, ) -> Union[JSONPathMatch, None]: @@ -315,7 +362,8 @@ def match( def query( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, + *, filter_context: Optional[FilterContextVars] = None, ) -> Query: """Return a `Query` iterator over matches found by applying _path_ to _data_. @@ -374,7 +422,7 @@ def query( async def findall_async( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, *, filter_context: Optional[FilterContextVars] = None, ) -> List[object]: @@ -386,7 +434,7 @@ async def findall_async( async def finditer_async( self, path: str, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + data: JSONData, *, filter_context: Optional[FilterContextVars] = None, ) -> AsyncIterable[JSONPathMatch]: @@ -440,13 +488,14 @@ def check_well_typedness( self, token: Token, func: FilterFunction, - args: List[FilterExpression], + args: List[BaseExpression], ) -> None: """Check the well-typedness of a function's arguments at compile-time.""" # Correct number of arguments? if len(args) != len(func.arg_types): + plural = "" if len(func.arg_types) == 1 else "s" raise JSONPathTypeError( - f"{token.value!r}() requires {len(func.arg_types)} arguments", + f"{token.value}() requires {len(func.arg_types)} argument{plural}", token=token, ) @@ -456,7 +505,7 @@ def check_well_typedness( if typ == ExpressionType.VALUE: if not ( isinstance(arg, VALUE_TYPE_EXPRESSIONS) - or (isinstance(arg, Path) and arg.path.singular_query()) + or (isinstance(arg, FilterQuery) and arg.path.singular_query()) or (self._function_return_type(arg) == ExpressionType.VALUE) ): raise JSONPathTypeError( @@ -464,13 +513,13 @@ def check_well_typedness( token=token, ) elif typ == ExpressionType.LOGICAL: - if not isinstance(arg, (Path, InfixExpression)): + if not isinstance(arg, (FilterQuery, InfixExpression)): raise JSONPathTypeError( f"{token.value}() argument {idx} must be of LogicalType", token=token, ) elif typ == ExpressionType.NODES and not ( - isinstance(arg, Path) + isinstance(arg, FilterQuery) or self._function_return_type(arg) == ExpressionType.NODES ): raise JSONPathTypeError( @@ -478,7 +527,7 @@ def check_well_typedness( token=token, ) - def _function_return_type(self, expr: FilterExpression) -> Optional[ExpressionType]: + def _function_return_type(self, expr: BaseExpression) -> Optional[ExpressionType]: """Return the type returned from a filter function. If _expr_ is not a `FunctionExtension` or the registered function definition is @@ -568,7 +617,8 @@ def compare( # noqa: PLR0911 return left in right if operator == "contains" and isinstance(left, (Mapping, Sequence)): return right in left - if operator == "=~" and isinstance(right, re.Pattern) and isinstance(left, str): + if operator == "=~" and hasattr(right, "fullmatch") and isinstance(left, str): + # Right should be a regex.Pattern or an re.Pattern. return bool(right.fullmatch(left)) return False diff --git a/jsonpath/exceptions.py b/jsonpath/exceptions.py index c6797c5..62de398 100644 --- a/jsonpath/exceptions.py +++ b/jsonpath/exceptions.py @@ -5,6 +5,8 @@ from typing import TYPE_CHECKING from typing import Optional +from .token import TOKEN_EOF + if TYPE_CHECKING: from .token import Token @@ -22,13 +24,69 @@ def __init__(self, *args: object, token: Optional[Token] = None) -> None: self.token: Optional[Token] = token def __str__(self) -> str: - msg = super().__str__() + return self.detailed_message() + def detailed_message(self) -> str: + """Return an error message formatted with extra context info.""" if not self.token: - return msg + return super().__str__() - line, column = self.token.position() - return f"{msg}, line {line}, column {column}" + lineno, col, _prev, current, _next = self._error_context( + self.token.path, self.token.index + ) + + if self.token.kind == TOKEN_EOF: + col = len(current) + + pad = " " * len(str(lineno)) + length = len(self.token.value) + pointer = (" " * col) + ("^" * max(length, 1)) + + return ( + f"{self.message}\n" + f"{pad} -> {self.token.path!r} {lineno}:{col}\n" + f"{pad} |\n" + f"{lineno} | {current}\n" + f"{pad} | {pointer} {self.message}\n" + ) + + @property + def message(self) -> object: + """The exception's error message if one was given.""" + if self.args: + return self.args[0] + return None + + def _error_context(self, text: str, index: int) -> tuple[int, int, str, str, str]: + lines = text.splitlines(keepends=True) + cumulative_length = 0 + target_line_index = -1 + + for i, line in enumerate(lines): + cumulative_length += len(line) + if index < cumulative_length: + target_line_index = i + break + + if target_line_index == -1: + raise ValueError("index is out of bounds for the given string") + + # Line number (1-based) + line_number = target_line_index + 1 + # Column number within the line + column_number = index - (cumulative_length - len(lines[target_line_index])) + + previous_line = ( + lines[target_line_index - 1].rstrip() if target_line_index > 0 else "" + ) + current_line = lines[target_line_index].rstrip() + next_line = ( + lines[target_line_index + 1].rstrip() + if target_line_index < len(lines) - 1 + else "" + ) + + return line_number, column_number, previous_line, current_line, next_line class JSONPathSyntaxError(JSONPathError): @@ -77,6 +135,19 @@ def __init__(self, *args: object, token: Token) -> None: self.token = token +class JSONPathRecursionError(JSONPathError): + """An exception raised when the maximum recursion depth is reached. + + Arguments: + args: Arguments passed to `Exception`. + token: The token that caused the error. + """ + + def __init__(self, *args: object, token: Token) -> None: + super().__init__(*args) + self.token = token + + class JSONPointerError(Exception): """Base class for all JSON Pointer errors.""" diff --git a/jsonpath/filter.py b/jsonpath/filter.py index 0556cbf..494e2bc 100644 --- a/jsonpath/filter.py +++ b/jsonpath/filter.py @@ -23,17 +23,14 @@ from .function_extensions import FilterFunction from .match import NodeList from .selectors import Filter as FilterSelector -from .selectors import ListSelector from .serialize import canonical_string if TYPE_CHECKING: from .path import JSONPath from .selectors import FilterContext -# ruff: noqa: D102, PLW1641 - -class FilterExpression(ABC): +class BaseExpression(ABC): """Base class for all filter expression nodes.""" __slots__ = ("volatile",) @@ -60,11 +57,11 @@ async def evaluate_async(self, context: FilterContext) -> object: """An async version of `evaluate`.""" @abstractmethod - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: """Return a list of direct child expressions.""" @abstractmethod - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 """Update this expression's child expressions. _children_ is assumed to have the same number of items as is returned @@ -72,7 +69,7 @@ def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG00 """ -class Nil(FilterExpression): +class Nil(BaseExpression): """The constant `nil`. Also aliased as `null` and `None`, sometimes. @@ -95,10 +92,10 @@ def evaluate(self, _: FilterContext) -> None: async def evaluate_async(self, _: FilterContext) -> None: return None - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [] - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 return @@ -126,7 +123,7 @@ def __repr__(self) -> str: UNDEFINED = _Undefined() -class Undefined(FilterExpression): +class Undefined(BaseExpression): """The constant `undefined`.""" __slots__ = () @@ -147,10 +144,10 @@ def evaluate(self, _: FilterContext) -> object: async def evaluate_async(self, _: FilterContext) -> object: return UNDEFINED - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [] - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 return @@ -159,7 +156,7 @@ def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG00 LITERAL_EXPRESSION_T = TypeVar("LITERAL_EXPRESSION_T") -class Literal(FilterExpression, Generic[LITERAL_EXPRESSION_T]): +class FilterExpressionLiteral(BaseExpression, Generic[LITERAL_EXPRESSION_T]): """Base class for filter expression literals.""" __slots__ = ("value",) @@ -183,14 +180,14 @@ def evaluate(self, _: FilterContext) -> LITERAL_EXPRESSION_T: async def evaluate_async(self, _: FilterContext) -> LITERAL_EXPRESSION_T: return self.value - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [] - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 return -class BooleanLiteral(Literal[bool]): +class BooleanLiteral(FilterExpressionLiteral[bool]): """A Boolean `True` or `False`.""" __slots__ = () @@ -202,7 +199,7 @@ class BooleanLiteral(Literal[bool]): FALSE = BooleanLiteral(value=False) -class StringLiteral(Literal[str]): +class StringLiteral(FilterExpressionLiteral[str]): """A string literal.""" __slots__ = () @@ -211,19 +208,19 @@ def __str__(self) -> str: return canonical_string(self.value) -class IntegerLiteral(Literal[int]): +class IntegerLiteral(FilterExpressionLiteral[int]): """An integer literal.""" __slots__ = () -class FloatLiteral(Literal[float]): +class FloatLiteral(FilterExpressionLiteral[float]): """A float literal.""" __slots__ = () -class RegexLiteral(Literal[Pattern[str]]): +class RegexLiteral(FilterExpressionLiteral[Pattern[str]]): """A regex literal.""" __slots__ = () @@ -246,12 +243,12 @@ def __str__(self) -> str: return f"/{self.value.pattern}/{''.join(flags)}" -class ListLiteral(FilterExpression): +class ListLiteral(BaseExpression): """A list literal.""" __slots__ = ("items",) - def __init__(self, items: List[FilterExpression]) -> None: + def __init__(self, items: List[BaseExpression]) -> None: self.items = items super().__init__() @@ -268,19 +265,19 @@ def evaluate(self, context: FilterContext) -> object: async def evaluate_async(self, context: FilterContext) -> object: return [await item.evaluate_async(context) for item in self.items] - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return self.items - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 self.items = children -class PrefixExpression(FilterExpression): +class PrefixExpression(BaseExpression): """An expression composed of a prefix operator and another expression.""" __slots__ = ("operator", "right") - def __init__(self, operator: str, right: FilterExpression): + def __init__(self, operator: str, right: BaseExpression): self.operator = operator self.right = right super().__init__() @@ -306,24 +303,24 @@ def evaluate(self, context: FilterContext) -> object: async def evaluate_async(self, context: FilterContext) -> object: return self._evaluate(context, await self.right.evaluate_async(context)) - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [self.right] - def set_children(self, children: List[FilterExpression]) -> None: + def set_children(self, children: List[BaseExpression]) -> None: assert len(children) == 1 self.right = children[0] -class InfixExpression(FilterExpression): +class InfixExpression(BaseExpression): """A pair of expressions and a comparison or logical operator.""" __slots__ = ("left", "operator", "right", "logical") def __init__( self, - left: FilterExpression, + left: BaseExpression, operator: str, - right: FilterExpression, + right: BaseExpression, ): self.left = left self.operator = operator @@ -366,10 +363,10 @@ async def evaluate_async(self, context: FilterContext) -> bool: return context.env.compare(left, self.operator, right) - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [self.left, self.right] - def set_children(self, children: List[FilterExpression]) -> None: + def set_children(self, children: List[BaseExpression]) -> None: assert len(children) == 2 # noqa: PLR2004 self.left = children[0] self.right = children[1] @@ -381,19 +378,19 @@ def set_children(self, children: List[FilterExpression]) -> None: PRECEDENCE_PREFIX = 7 -class BooleanExpression(FilterExpression): - """An expression that always evaluates to `True` or `False`.""" +class FilterExpression(BaseExpression): + """An expression that evaluates to `True` or `False`.""" __slots__ = ("expression",) - def __init__(self, expression: FilterExpression): + def __init__(self, expression: BaseExpression): self.expression = expression super().__init__() - def cache_tree(self) -> BooleanExpression: + def cache_tree(self) -> FilterExpression: """Return a copy of _self.expression_ augmented with caching nodes.""" - def _cache_tree(expr: FilterExpression) -> FilterExpression: + def _cache_tree(expr: BaseExpression) -> BaseExpression: children = expr.children() if expr.volatile: _expr = copy.copy(expr) @@ -404,7 +401,7 @@ def _cache_tree(expr: FilterExpression) -> FilterExpression: _expr.set_children([_cache_tree(child) for child in children]) return _expr - return BooleanExpression(_cache_tree(copy.copy(self.expression))) + return FilterExpression(_cache_tree(copy.copy(self.expression))) def cacheable_nodes(self) -> bool: """Return `True` if there are any cacheable nodes in this expression tree.""" @@ -418,11 +415,11 @@ def __str__(self) -> str: def __eq__(self, other: object) -> bool: return ( - isinstance(other, BooleanExpression) and self.expression == other.expression + isinstance(other, FilterExpression) and self.expression == other.expression ) def _canonical_string( - self, expression: FilterExpression, parent_precedence: int + self, expression: BaseExpression, parent_precedence: int ) -> str: if isinstance(expression, InfixExpression): if expression.operator == "&&": @@ -454,15 +451,15 @@ def evaluate(self, context: FilterContext) -> bool: async def evaluate_async(self, context: FilterContext) -> bool: return context.env.is_truthy(await self.expression.evaluate_async(context)) - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [self.expression] - def set_children(self, children: List[FilterExpression]) -> None: + def set_children(self, children: List[BaseExpression]) -> None: assert len(children) == 1 self.expression = children[0] -class CachingFilterExpression(FilterExpression): +class CachingFilterExpression(BaseExpression): """A FilterExpression wrapper that caches the result.""" __slots__ = ( @@ -472,7 +469,7 @@ class CachingFilterExpression(FilterExpression): _UNSET = object() - def __init__(self, expression: FilterExpression): + def __init__(self, expression: BaseExpression): self.volatile = False self._expr = expression self._cached: object = self._UNSET @@ -487,14 +484,14 @@ async def evaluate_async(self, context: FilterContext) -> object: self._cached = await self._expr.evaluate_async(context) return self._cached - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return self._expr.children() - def set_children(self, children: List[FilterExpression]) -> None: + def set_children(self, children: List[BaseExpression]) -> None: self._expr.set_children(children) -class Path(FilterExpression, ABC): +class FilterQuery(BaseExpression, ABC): """Base expression for all _sub paths_ found in filter expressions.""" __slots__ = ("path",) @@ -504,25 +501,22 @@ def __init__(self, path: JSONPath) -> None: super().__init__() def __eq__(self, other: object) -> bool: - return isinstance(other, Path) and str(self) == str(other) - - def children(self) -> List[FilterExpression]: - _children: List[FilterExpression] = [] - for segment in self.path.selectors: - if isinstance(segment, ListSelector): - _children.extend( - selector.expression - for selector in segment.items - if isinstance(selector, FilterSelector) - ) + return isinstance(other, FilterQuery) and str(self) == str(other) + + def children(self) -> List[BaseExpression]: + _children: List[BaseExpression] = [] + for segment in self.path.segments: + for selector in segment.selectors: + if isinstance(selector, FilterSelector): + _children.append(selector.expression) return _children - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 # self.path has its own cache return -class SelfPath(Path): +class RelativeFilterQuery(FilterQuery): """A JSONPath starting at the current node.""" __slots__ = () @@ -535,11 +529,9 @@ def __str__(self) -> str: return "@" + str(self.path)[1:] def evaluate(self, context: FilterContext) -> object: - if isinstance(context.current, str): # TODO: refactor - if self.path.empty(): - return context.current - return NodeList() - if not isinstance(context.current, (Sequence, Mapping)): + if isinstance(context.current, str) or not isinstance( + context.current, (Sequence, Mapping) + ): if self.path.empty(): return context.current return NodeList() @@ -552,11 +544,9 @@ def evaluate(self, context: FilterContext) -> object: ) async def evaluate_async(self, context: FilterContext) -> object: - if isinstance(context.current, str): # TODO: refactor - if self.path.empty(): - return context.current - return NodeList() - if not isinstance(context.current, (Sequence, Mapping)): + if isinstance(context.current, str) or not isinstance( + context.current, (Sequence, Mapping) + ): if self.path.empty(): return context.current return NodeList() @@ -572,7 +562,7 @@ async def evaluate_async(self, context: FilterContext) -> object: ) -class RootPath(Path): +class RootFilterQuery(FilterQuery): """A JSONPath starting at the root node.""" __slots__ = () @@ -606,7 +596,7 @@ async def evaluate_async(self, context: FilterContext) -> object: ) -class FilterContextPath(Path): +class FilterContextPath(FilterQuery): """A JSONPath starting at the root of any extra context data.""" __slots__ = () @@ -641,12 +631,12 @@ async def evaluate_async(self, context: FilterContext) -> object: ) -class FunctionExtension(FilterExpression): +class FunctionExtension(BaseExpression): """A filter function.""" __slots__ = ("name", "args") - def __init__(self, name: str, args: Sequence[FilterExpression]) -> None: + def __init__(self, name: str, args: Sequence[BaseExpression]) -> None: self.name = name self.args = args super().__init__() @@ -666,7 +656,9 @@ def evaluate(self, context: FilterContext) -> object: try: func = context.env.function_extensions[self.name] except KeyError: - return UNDEFINED # TODO: should probably raise an exception + # This can only happen if the environment's function register has been + # changed since the query was parsed. + return UNDEFINED args = [arg.evaluate(context) for arg in self.args] return func(*self._unpack_node_lists(func, args)) @@ -674,7 +666,9 @@ async def evaluate_async(self, context: FilterContext) -> object: try: func = context.env.function_extensions[self.name] except KeyError: - return UNDEFINED # TODO: should probably raise an exception + # This can only happen if the environment's function register has been + # changed since the query was parsed. + return UNDEFINED args = [await arg.evaluate_async(context) for arg in self.args] return func(*self._unpack_node_lists(func, args)) @@ -713,15 +707,15 @@ def _unpack_node_lists( for obj in args ] - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return list(self.args) - def set_children(self, children: List[FilterExpression]) -> None: + def set_children(self, children: List[BaseExpression]) -> None: assert len(children) == len(self.args) self.args = children -class CurrentKey(FilterExpression): +class CurrentKey(BaseExpression): """The key/property or index associated with the current object.""" __slots__ = () @@ -744,17 +738,17 @@ def evaluate(self, context: FilterContext) -> object: async def evaluate_async(self, context: FilterContext) -> object: return self.evaluate(context) - def children(self) -> List[FilterExpression]: + def children(self) -> List[BaseExpression]: return [] - def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002 + def set_children(self, children: List[BaseExpression]) -> None: # noqa: ARG002 return CURRENT_KEY = CurrentKey() -def walk(expr: FilterExpression) -> Iterable[FilterExpression]: +def walk(expr: BaseExpression) -> Iterable[BaseExpression]: """Walk the filter expression tree starting at _expr_.""" yield expr for child in expr.children(): @@ -764,7 +758,7 @@ def walk(expr: FilterExpression) -> Iterable[FilterExpression]: VALUE_TYPE_EXPRESSIONS = ( Nil, Undefined, - Literal, + FilterExpressionLiteral, ListLiteral, CurrentKey, ) diff --git a/jsonpath/function_extensions/_pattern.py b/jsonpath/function_extensions/_pattern.py new file mode 100644 index 0000000..a42a689 --- /dev/null +++ b/jsonpath/function_extensions/_pattern.py @@ -0,0 +1,31 @@ +from typing import List + + +def map_re(pattern: str) -> str: + escaped = False + char_class = False + parts: List[str] = [] + for ch in pattern: + if escaped: + parts.append(ch) + escaped = False + continue + + if ch == ".": + if not char_class: + parts.append(r"(?:(?![\r\n])\P{Cs}|\p{Cs}\p{Cs})") + else: + parts.append(ch) + elif ch == "\\": + escaped = True + parts.append(ch) + elif ch == "[": + char_class = True + parts.append(ch) + elif ch == "]": + char_class = False + parts.append(ch) + else: + parts.append(ch) + + return "".join(parts) diff --git a/jsonpath/function_extensions/match.py b/jsonpath/function_extensions/match.py index 7bc8749..68494b8 100644 --- a/jsonpath/function_extensions/match.py +++ b/jsonpath/function_extensions/match.py @@ -1,10 +1,26 @@ """The standard `match` function extension.""" -import re +try: + import regex as re + + REGEX_AVAILABLE = True +except ImportError: + import re # type: ignore + + REGEX_AVAILABLE = False + +try: + from iregexp_check import check + + IREGEXP_AVAILABLE = True +except ImportError: + IREGEXP_AVAILABLE = False from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction +from ._pattern import map_re + class Match(FilterFunction): """A type-aware implementation of the standard `match` function.""" @@ -14,8 +30,21 @@ class Match(FilterFunction): def __call__(self, string: str, pattern: str) -> bool: """Return `True` if _string_ matches _pattern_, or `False` otherwise.""" + # TODO: re.match caches compiled patterns internally, but `map_re` and `check` + # are not cached. + + # TODO: validate literal patterns ar compile time? + + if IREGEXP_AVAILABLE and (not isinstance(pattern, str) or not check(pattern)): + return False + + if REGEX_AVAILABLE: + try: + pattern = map_re(pattern) + except TypeError: + return False + try: - # re.fullmatch caches compiled patterns internally return bool(re.fullmatch(pattern, string)) except (TypeError, re.error): return False diff --git a/jsonpath/function_extensions/search.py b/jsonpath/function_extensions/search.py index ed88635..e6d9086 100644 --- a/jsonpath/function_extensions/search.py +++ b/jsonpath/function_extensions/search.py @@ -1,10 +1,26 @@ """The standard `search` function extension.""" -import re +try: + import regex as re + + REGEX_AVAILABLE = True +except ImportError: + import re # type: ignore + + REGEX_AVAILABLE = False + +try: + from iregexp_check import check + + IREGEXP_AVAILABLE = True +except ImportError: + IREGEXP_AVAILABLE = False from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction +from ._pattern import map_re + class Search(FilterFunction): """A type-aware implementation of the standard `search` function.""" @@ -14,8 +30,21 @@ class Search(FilterFunction): def __call__(self, string: str, pattern: str) -> bool: """Return `True` if _string_ contains _pattern_, or `False` otherwise.""" + # TODO: re.search caches compiled patterns internally, but `map_re` and `check` + # are not cached. + + # TODO: validate literal patterns ar compile time? + + if IREGEXP_AVAILABLE and (not isinstance(pattern, str) or not check(pattern)): + return False + + if REGEX_AVAILABLE: + try: + pattern = map_re(pattern) + except TypeError: + return False + try: - # re.search caches compiled patterns internally return bool(re.search(pattern, string)) except (TypeError, re.error): return False diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 4c4422d..7737c90 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -10,14 +10,16 @@ from .exceptions import JSONPathSyntaxError from .token import TOKEN_AND -from .token import TOKEN_BARE_PROPERTY +from .token import TOKEN_COLON from .token import TOKEN_COMMA from .token import TOKEN_CONTAINS from .token import TOKEN_DDOT +from .token import TOKEN_DOT +from .token import TOKEN_DOT_KEY_PROPERTY from .token import TOKEN_DOT_PROPERTY from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EQ -from .token import TOKEN_FAKE_ROOT +from .token import TOKEN_ERROR from .token import TOKEN_FALSE from .token import TOKEN_FILTER from .token import TOKEN_FILTER_CONTEXT @@ -25,26 +27,27 @@ from .token import TOKEN_FUNCTION from .token import TOKEN_GE from .token import TOKEN_GT -from .token import TOKEN_ILLEGAL from .token import TOKEN_IN from .token import TOKEN_INT from .token import TOKEN_INTERSECTION from .token import TOKEN_KEY +from .token import TOKEN_KEY_NAME from .token import TOKEN_KEYS +from .token import TOKEN_KEYS_FILTER +from .token import TOKEN_LBRACKET from .token import TOKEN_LE from .token import TOKEN_LG -from .token import TOKEN_LIST_SLICE -from .token import TOKEN_LIST_START from .token import TOKEN_LPAREN from .token import TOKEN_LT from .token import TOKEN_MISSING +from .token import TOKEN_NAME from .token import TOKEN_NE from .token import TOKEN_NIL from .token import TOKEN_NONE from .token import TOKEN_NOT from .token import TOKEN_NULL from .token import TOKEN_OR -from .token import TOKEN_PROPERTY +from .token import TOKEN_PSEUDO_ROOT from .token import TOKEN_RBRACKET from .token import TOKEN_RE from .token import TOKEN_RE_FLAGS @@ -53,13 +56,10 @@ from .token import TOKEN_RPAREN from .token import TOKEN_SELF from .token import TOKEN_SINGLE_QUOTE_STRING -from .token import TOKEN_SKIP -from .token import TOKEN_SLICE_START -from .token import TOKEN_SLICE_STEP -from .token import TOKEN_SLICE_STOP from .token import TOKEN_TRUE from .token import TOKEN_UNDEFINED from .token import TOKEN_UNION +from .token import TOKEN_WHITESPACE from .token import TOKEN_WILD from .token import Token @@ -87,7 +87,7 @@ class attributes. Then setting `lexer_class` on a `JSONPathEnvironment`. key_pattern = r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*" - # `not` or ! + # ! or `not` logical_not_pattern = r"(?:not\b)|!" # && or `and` @@ -103,45 +103,50 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: self.single_quote_pattern = r"'(?P(?:(?!(?{self.key_pattern})" + self.dot_property_pattern = rf"(?P\.)(?P{self.key_pattern})" - self.slice_list_pattern = ( - r"(?P\-?\d*)\s*" - r":\s*(?P\-?\d*)\s*" - r"(?::\s*(?P\-?\d*))?" + # .~thing + self.dot_key_pattern = ( + r"(?P\.)" + rf"(?P{re.escape(env.keys_selector_token)})" + rf"(?P{self.key_pattern})" ) # /pattern/ or /pattern/flags self.re_pattern = r"/(?P.+?)/(?P[aims]*)" # func( - self.function_pattern = r"(?P[a-z][a-z_0-9]+)\(\s*" + self.function_pattern = r"(?P[a-z][a-z_0-9]+)(?P\()" - self.rules = self.compile_rules() + self.rules = self.compile_strict_rules() if env.strict else self.compile_rules() def compile_rules(self) -> Pattern[str]: """Prepare regular expression rules.""" env_tokens = [ (TOKEN_ROOT, self.env.root_token), - (TOKEN_FAKE_ROOT, self.env.fake_root_token), + (TOKEN_PSEUDO_ROOT, self.env.pseudo_root_token), (TOKEN_SELF, self.env.self_token), (TOKEN_KEY, self.env.key_token), (TOKEN_UNION, self.env.union_token), (TOKEN_INTERSECTION, self.env.intersection_token), (TOKEN_FILTER_CONTEXT, self.env.filter_context_token), (TOKEN_KEYS, self.env.keys_selector_token), + (TOKEN_KEYS_FILTER, self.env.keys_filter_token), ] rules = [ (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), (TOKEN_RE_PATTERN, self.re_pattern), - (TOKEN_LIST_SLICE, self.slice_list_pattern), - (TOKEN_FUNCTION, self.function_pattern), + (TOKEN_DOT_KEY_PROPERTY, self.dot_key_pattern), (TOKEN_DOT_PROPERTY, self.dot_property_pattern), - (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"), - (TOKEN_INT, r"-?\d+(?P[eE][+\-]?\d+)?\b"), + ( + TOKEN_FLOAT, + r"(:?-?[0-9]+\.[0-9]+(?:[eE][+-]?[0-9]+)?)|(-?[0-9]+[eE]-[0-9]+)", + ), + (TOKEN_INT, r"-?[0-9]+(?:[eE]\+?[0-9]+)?"), (TOKEN_DDOT, r"\.\."), + (TOKEN_DOT, r"\."), (TOKEN_AND, self.logical_and_pattern), (TOKEN_OR, self.logical_or_pattern), *[ @@ -162,9 +167,10 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_CONTAINS, r"contains\b"), (TOKEN_UNDEFINED, r"undefined\b"), (TOKEN_MISSING, r"missing\b"), - (TOKEN_LIST_START, r"\["), + (TOKEN_LBRACKET, r"\["), (TOKEN_RBRACKET, r"]"), (TOKEN_COMMA, r","), + (TOKEN_COLON, r":"), (TOKEN_EQ, r"=="), (TOKEN_NE, r"!="), (TOKEN_LG, r"<>"), @@ -173,12 +179,70 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_RE, r"=~"), (TOKEN_LT, r"<"), (TOKEN_GT, r">"), - (TOKEN_NOT, self.logical_not_pattern), - (TOKEN_BARE_PROPERTY, self.key_pattern), + (TOKEN_NOT, self.logical_not_pattern), # Must go after "!=" + (TOKEN_FUNCTION, self.function_pattern), + (TOKEN_NAME, self.key_pattern), # Must go after reserved words (TOKEN_LPAREN, r"\("), (TOKEN_RPAREN, r"\)"), - (TOKEN_SKIP, r"[ \n\t\r\.]+"), - (TOKEN_ILLEGAL, r"."), + (TOKEN_WHITESPACE, r"[ \n\t\r]+"), + (TOKEN_ERROR, r"."), + ] + + return re.compile( + "|".join(f"(?P<{token}>{pattern})" for token, pattern in rules), + re.DOTALL, + ) + + def compile_strict_rules(self) -> Pattern[str]: + """Prepare regular expression rules in strict mode.""" + env_tokens = [ + (TOKEN_ROOT, self.env.root_token), + (TOKEN_SELF, self.env.self_token), + ] + + rules = [ + (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), + (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), + (TOKEN_DOT_PROPERTY, self.dot_property_pattern), + ( + TOKEN_FLOAT, + r"(:?-?[0-9]+\.[0-9]+(?:[eE][+-]?[0-9]+)?)|(-?[0-9]+[eE]-[0-9]+)", + ), + (TOKEN_INT, r"-?[0-9]+(?:[eE]\+?[0-9]+)?"), + (TOKEN_DDOT, r"\.\."), + (TOKEN_DOT, r"\."), + (TOKEN_AND, r"&&"), + (TOKEN_OR, r"\|\|"), + *[ + (token, re.escape(pattern)) + for token, pattern in sorted( + env_tokens, key=lambda x: len(x[1]), reverse=True + ) + if pattern + ], + (TOKEN_WILD, r"\*"), + (TOKEN_FILTER, r"\?"), + (TOKEN_TRUE, r"true\b"), + (TOKEN_FALSE, r"false\b"), + (TOKEN_NULL, r"null\b"), + (TOKEN_LBRACKET, r"\["), + (TOKEN_RBRACKET, r"]"), + (TOKEN_COMMA, r","), + (TOKEN_COLON, r":"), + (TOKEN_EQ, r"=="), + (TOKEN_NE, r"!="), + (TOKEN_LG, r"<>"), + (TOKEN_LE, r"<="), + (TOKEN_GE, r">="), + (TOKEN_LT, r"<"), + (TOKEN_GT, r">"), + (TOKEN_NOT, r"!"), # Must go after "!=" + (TOKEN_FUNCTION, self.function_pattern), + (TOKEN_NAME, self.key_pattern), # Must go after reserved words + (TOKEN_LPAREN, r"\("), + (TOKEN_RPAREN, r"\)"), + (TOKEN_WHITESPACE, r"[ \n\t\r]+"), + (TOKEN_ERROR, r"."), ] return re.compile( @@ -196,31 +260,25 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 if kind == TOKEN_DOT_PROPERTY: yield _token( - kind=TOKEN_PROPERTY, - value=match.group("G_PROP"), - index=match.start("G_PROP"), + kind=TOKEN_DOT, + value=match.group("G_DOT"), + index=match.start("G_DOT"), ) - elif kind == TOKEN_BARE_PROPERTY: yield _token( - kind=TOKEN_BARE_PROPERTY, - value=match.group(), - index=match.start(), - ) - elif kind == TOKEN_LIST_SLICE: - yield _token( - kind=TOKEN_SLICE_START, - value=match.group("G_LSLICE_START"), - index=match.start("G_LSLICE_START"), + kind=TOKEN_NAME, + value=match.group("G_PROP"), + index=match.start("G_PROP"), ) + elif kind == TOKEN_DOT_KEY_PROPERTY: yield _token( - kind=TOKEN_SLICE_STOP, - value=match.group("G_LSLICE_STOP"), - index=match.start("G_LSLICE_STOP"), + kind=TOKEN_DOT, + value=match.group("G_DOT_KEY"), + index=match.start("G_DOT_KEY"), ) yield _token( - kind=TOKEN_SLICE_STEP, - value=match.group("G_LSLICE_STEP") or "", - index=match.start("G_LSLICE_STEP"), + kind=TOKEN_KEY_NAME, + value=match.group("G_PROP_KEY"), + index=match.start("G_PROP_KEY"), ) elif kind == TOKEN_DOUBLE_QUOTE_STRING: yield _token( @@ -234,19 +292,6 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 value=match.group("G_SQUOTE"), index=match.start("G_SQUOTE"), ) - elif kind == TOKEN_INT: - if match.group("G_EXP") and match.group("G_EXP")[1] == "-": - yield _token( - kind=TOKEN_FLOAT, - value=match.group(), - index=match.start(), - ) - else: - yield _token( - kind=TOKEN_INT, - value=match.group(), - index=match.start(), - ) elif kind == TOKEN_RE_PATTERN: yield _token( kind=TOKEN_RE_PATTERN, @@ -270,13 +315,17 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912 value=match.group("G_FUNC"), index=match.start("G_FUNC"), ) - elif kind == TOKEN_SKIP: - continue - elif kind == TOKEN_ILLEGAL: + + yield _token( + kind=TOKEN_LPAREN, + value=match.group("G_FUNC_PAREN"), + index=match.start("G_FUNC_PAREN"), + ) + elif kind == TOKEN_ERROR: raise JSONPathSyntaxError( f"unexpected token {match.group()!r}", token=_token( - TOKEN_ILLEGAL, + TOKEN_ERROR, value=match.group(), index=match.start(), ), diff --git a/jsonpath/match.py b/jsonpath/match.py index dea2fee..964dff4 100644 --- a/jsonpath/match.py +++ b/jsonpath/match.py @@ -11,6 +11,7 @@ from typing import Union from .pointer import JSONPointer +from .serialize import canonical_string FilterContextVars = Mapping[str, Any] PathPart = Union[int, str] @@ -69,6 +70,18 @@ def add_child(self, *children: JSONPathMatch) -> None: """Append one or more children to this match.""" self.children.extend(children) + def new_child(self, obj: object, key: Union[int, str]) -> JSONPathMatch: + """Return a new JSONPathMatch instance with this instance as its parent.""" + return self.__class__( + filter_context=self.filter_context(), + obj=obj, + parent=self, + parts=self.parts + (key,), + path=self.path + + f"[{canonical_string(key) if isinstance(key, str) else key}]", + root=self.root, + ) + def filter_context(self) -> FilterContextVars: """Return filter context data for this match.""" return self._filter_context diff --git a/jsonpath/parse.py b/jsonpath/parse.py index eaef7fc..107d9a0 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -8,6 +8,7 @@ from typing import Callable from typing import Dict from typing import Iterable +from typing import Iterator from typing import List from typing import Optional from typing import Union @@ -22,41 +23,45 @@ from .filter import NIL from .filter import TRUE from .filter import UNDEFINED_LITERAL -from .filter import BooleanExpression +from .filter import BaseExpression from .filter import FilterContextPath from .filter import FilterExpression +from .filter import FilterExpressionLiteral +from .filter import FilterQuery from .filter import FloatLiteral from .filter import FunctionExtension from .filter import InfixExpression from .filter import IntegerLiteral from .filter import ListLiteral -from .filter import Literal from .filter import Nil -from .filter import Path from .filter import PrefixExpression from .filter import RegexLiteral -from .filter import RootPath -from .filter import SelfPath +from .filter import RelativeFilterQuery +from .filter import RootFilterQuery from .filter import StringLiteral from .path import JSONPath +from .segments import JSONPathChildSegment +from .segments import JSONPathRecursiveDescentSegment +from .segments import JSONPathSegment from .selectors import Filter from .selectors import IndexSelector from .selectors import JSONPathSelector +from .selectors import KeySelector +from .selectors import KeysFilter from .selectors import KeysSelector -from .selectors import ListSelector -from .selectors import PropertySelector -from .selectors import RecursiveDescentSelector +from .selectors import NameSelector +from .selectors import SingularQuerySelector from .selectors import SliceSelector -from .selectors import WildSelector +from .selectors import WildcardSelector from .token import TOKEN_AND -from .token import TOKEN_BARE_PROPERTY +from .token import TOKEN_COLON from .token import TOKEN_COMMA from .token import TOKEN_CONTAINS from .token import TOKEN_DDOT +from .token import TOKEN_DOT from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EOF from .token import TOKEN_EQ -from .token import TOKEN_FAKE_ROOT from .token import TOKEN_FALSE from .token import TOKEN_FILTER from .token import TOKEN_FILTER_CONTEXT @@ -68,20 +73,23 @@ from .token import TOKEN_INT from .token import TOKEN_INTERSECTION from .token import TOKEN_KEY +from .token import TOKEN_KEY_NAME from .token import TOKEN_KEYS +from .token import TOKEN_KEYS_FILTER +from .token import TOKEN_LBRACKET from .token import TOKEN_LE from .token import TOKEN_LG -from .token import TOKEN_LIST_START from .token import TOKEN_LPAREN from .token import TOKEN_LT from .token import TOKEN_MISSING +from .token import TOKEN_NAME from .token import TOKEN_NE from .token import TOKEN_NIL from .token import TOKEN_NONE from .token import TOKEN_NOT from .token import TOKEN_NULL from .token import TOKEN_OR -from .token import TOKEN_PROPERTY +from .token import TOKEN_PSEUDO_ROOT from .token import TOKEN_RBRACKET from .token import TOKEN_RE from .token import TOKEN_RE_FLAGS @@ -90,14 +98,13 @@ from .token import TOKEN_RPAREN from .token import TOKEN_SELF from .token import TOKEN_SINGLE_QUOTE_STRING -from .token import TOKEN_SLICE_START -from .token import TOKEN_SLICE_STEP -from .token import TOKEN_SLICE_STOP from .token import TOKEN_TRUE from .token import TOKEN_UNDEFINED from .token import TOKEN_UNION +from .token import TOKEN_WHITESPACE from .token import TOKEN_WILD from .token import Token +from .unescape import unescape_string if TYPE_CHECKING: from .env import JSONPathEnvironment @@ -145,7 +152,6 @@ class Parser: """A JSONPath parser bound to a JSONPathEnvironment.""" PRECEDENCE_LOWEST = 1 - PRECEDENCE_LOGICALRIGHT = 2 PRECEDENCE_LOGICAL_OR = 3 PRECEDENCE_LOGICAL_AND = 4 PRECEDENCE_RELATIONAL = 5 @@ -234,16 +240,16 @@ class Parser: def __init__(self, *, env: JSONPathEnvironment) -> None: self.env = env - self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = { + self.token_map: Dict[str, Callable[[TokenStream], BaseExpression]] = { TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, - TOKEN_FAKE_ROOT: self.parse_root_path, + TOKEN_PSEUDO_ROOT: self.parse_absolute_query, TOKEN_FALSE: self.parse_boolean, TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, TOKEN_FLOAT: self.parse_float_literal, TOKEN_FUNCTION: self.parse_function_extension, TOKEN_INT: self.parse_integer_literal, TOKEN_KEY: self.parse_current_key, - TOKEN_LIST_START: self.parse_list_literal, + TOKEN_LBRACKET: self.parse_list_literal, TOKEN_LPAREN: self.parse_grouped_expression, TOKEN_MISSING: self.parse_undefined, TOKEN_NIL: self.parse_nil, @@ -251,14 +257,14 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: TOKEN_NOT: self.parse_prefix_expression, TOKEN_NULL: self.parse_nil, TOKEN_RE_PATTERN: self.parse_regex, - TOKEN_ROOT: self.parse_root_path, - TOKEN_SELF: self.parse_self_path, + TOKEN_ROOT: self.parse_absolute_query, + TOKEN_SELF: self.parse_relative_query, TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal, TOKEN_TRUE: self.parse_boolean, TOKEN_UNDEFINED: self.parse_undefined, } - self.list_item_map: Dict[str, Callable[[TokenStream], FilterExpression]] = { + self.list_item_map: Dict[str, Callable[[TokenStream], BaseExpression]] = { TOKEN_FALSE: self.parse_boolean, TOKEN_FLOAT: self.parse_float_literal, TOKEN_INT: self.parse_integer_literal, @@ -271,10 +277,10 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: } self.function_argument_map: Dict[ - str, Callable[[TokenStream], FilterExpression] + str, Callable[[TokenStream], BaseExpression] ] = { TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, - TOKEN_FAKE_ROOT: self.parse_root_path, + TOKEN_PSEUDO_ROOT: self.parse_absolute_query, TOKEN_FALSE: self.parse_boolean, TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, TOKEN_FLOAT: self.parse_float_literal, @@ -284,212 +290,291 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: TOKEN_NIL: self.parse_nil, TOKEN_NONE: self.parse_nil, TOKEN_NULL: self.parse_nil, - TOKEN_ROOT: self.parse_root_path, - TOKEN_SELF: self.parse_self_path, + TOKEN_ROOT: self.parse_absolute_query, + TOKEN_SELF: self.parse_relative_query, TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal, TOKEN_TRUE: self.parse_boolean, } - def parse(self, stream: TokenStream) -> Iterable[JSONPathSelector]: - """Parse a JSONPath from a stream of tokens.""" - if stream.current.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT}: - stream.next_token() - yield from self.parse_path(stream, in_filter=False) + def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]: + """Parse a JSONPath query from a stream of tokens.""" + # Leading whitespace is not allowed in strict mode. + if stream.skip_whitespace() and self.env.strict: + raise JSONPathSyntaxError( + "unexpected leading whitespace", token=stream.current() + ) + + # Trailing whitespace is not allowed in strict mode. + if ( + self.env.strict + and stream.tokens + and stream.tokens[-1].kind == TOKEN_WHITESPACE + ): + raise JSONPathSyntaxError( + "unexpected trailing whitespace", token=stream.tokens[-1] + ) + + token = stream.current() + + if token.kind == TOKEN_ROOT or ( + token.kind == TOKEN_PSEUDO_ROOT and not self.env.strict + ): + stream.next() + elif self.env.strict: + # Raises a syntax error because the current token is not TOKEN_ROOT. + stream.expect(TOKEN_ROOT) - if stream.current.kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): + yield from self.parse_query(stream) + + if stream.current().kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION): raise JSONPathSyntaxError( - f"unexpected token {stream.current.value!r}", - token=stream.current, + f"unexpected token {stream.current().value!r}", + token=stream.current(), ) - def parse_path( - self, - stream: TokenStream, - *, - in_filter: bool = False, - ) -> Iterable[JSONPathSelector]: - """Parse a top-level JSONPath, or one that is nested in a filter.""" + def parse_query(self, stream: TokenStream) -> Iterable[JSONPathSegment]: + """Parse a JSONPath query string. + + This method assumes the root, current or pseudo root identifier has + already been consumed. + """ + if not self.env.strict and stream.current().kind in { + TOKEN_NAME, + TOKEN_WILD, + TOKEN_KEYS, + TOKEN_KEY_NAME, + }: + # A non-standard "bare" path. One that starts with a shorthand selector + # without a leading identifier (`$`, `@`, `^` or `_`). + # + # When no identifier is given, a root query (`$`) is assumed. + token = stream.current() + selector = self.parse_shorthand_selector(stream) + yield JSONPathChildSegment(env=self.env, token=token, selectors=(selector,)) + while True: - if stream.current.kind in (TOKEN_PROPERTY, TOKEN_BARE_PROPERTY): - yield PropertySelector( - env=self.env, - token=stream.current, - name=stream.current.value, - shorthand=True, - ) - elif stream.current.kind == TOKEN_SLICE_START: - yield self.parse_slice(stream) - elif stream.current.kind == TOKEN_WILD: - yield WildSelector( - env=self.env, - token=stream.current, - shorthand=True, + stream.skip_whitespace() + token = stream.next() + + if token.kind == TOKEN_DOT: + selector = self.parse_shorthand_selector(stream) + yield JSONPathChildSegment( + env=self.env, token=token, selectors=(selector,) ) - elif stream.current.kind == TOKEN_KEYS: - yield KeysSelector( - env=self.env, - token=stream.current, - shorthand=True, + elif token.kind == TOKEN_DDOT: + if stream.current().kind == TOKEN_LBRACKET: + selectors = tuple(self.parse_bracketed_selection(stream)) + else: + selectors = (self.parse_shorthand_selector(stream),) + + yield JSONPathRecursiveDescentSegment( + env=self.env, token=token, selectors=selectors ) - elif stream.current.kind == TOKEN_DDOT: - yield RecursiveDescentSelector( + elif token.kind == TOKEN_LBRACKET: + stream.pos -= 1 + yield JSONPathChildSegment( env=self.env, - token=stream.current, + token=token, + selectors=tuple(self.parse_bracketed_selection(stream)), ) - elif stream.current.kind == TOKEN_LIST_START: - yield self.parse_selector_list(stream) + elif token.kind == TOKEN_EOF: + break else: - if in_filter: - stream.push(stream.current) + # An embedded query. Put the token back on the stream. + stream.pos -= 1 break - stream.next_token() + def parse_shorthand_selector(self, stream: TokenStream) -> JSONPathSelector: + token = stream.next() - def parse_slice(self, stream: TokenStream) -> SliceSelector: - """Parse a slice JSONPath expression from a stream of tokens.""" - start_token = stream.next_token() - stream.expect(TOKEN_SLICE_STOP) - stop_token = stream.next_token() - stream.expect(TOKEN_SLICE_STEP) - step_token = stream.current - - if not start_token.value: - start: Optional[int] = None - else: - start = int(start_token.value) - - if not stop_token.value: - stop: Optional[int] = None - else: - stop = int(stop_token.value) - - if not step_token.value: - step: Optional[int] = None - else: - step = int(step_token.value) + if token.kind == TOKEN_NAME: + return NameSelector( + env=self.env, + token=token, + name=token.value, + ) - return SliceSelector( - env=self.env, - token=start_token, - start=start, - stop=stop, - step=step, - ) + if token.kind == TOKEN_KEY_NAME: + return KeySelector( + env=self.env, + token=token, + key=token.value, + ) - def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR0912 - """Parse a comma separated list JSONPath selectors from a stream of tokens.""" - tok = stream.next_token() - list_items: List[ - Union[ - IndexSelector, - KeysSelector, - PropertySelector, - SliceSelector, - WildSelector, - Filter, - ] - ] = [] - - while stream.current.kind != TOKEN_RBRACKET: - if stream.current.kind == TOKEN_INT: - if ( - len(stream.current.value) > 1 - and stream.current.value.startswith("0") - ) or stream.current.value.startswith("-0"): - raise JSONPathSyntaxError( - "leading zero in index selector", token=stream.current - ) - list_items.append( - IndexSelector( - env=self.env, - token=stream.current, - index=int(stream.current.value), - ) - ) - elif stream.current.kind == TOKEN_BARE_PROPERTY: - list_items.append( - PropertySelector( - env=self.env, - token=stream.current, - name=stream.current.value, - shorthand=False, - ), + if token.kind == TOKEN_WILD: + return WildcardSelector( + env=self.env, + token=token, + ) + + if token.kind == TOKEN_KEYS: + if stream.current().kind == TOKEN_NAME: + return KeySelector( + env=self.env, + token=token, + key=self._decode_string_literal(stream.next()), ) - elif stream.current.kind == TOKEN_KEYS: - list_items.append( - KeysSelector( - env=self.env, - token=stream.current, - shorthand=False, + + return KeysSelector( + env=self.env, + token=token, + ) + + raise JSONPathSyntaxError("expected a shorthand selector", token=token) + + def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912, PLR0915 + segment_token = stream.eat(TOKEN_LBRACKET) + selectors: List[JSONPathSelector] = [] + + while True: + stream.skip_whitespace() + token = stream.current() + + if token.kind == TOKEN_RBRACKET: + break + + if token.kind == TOKEN_INT: + if ( + stream.peek().kind == TOKEN_COLON + or stream.peek(2).kind == TOKEN_COLON + ): + selectors.append(self.parse_slice(stream)) + else: + self._raise_for_leading_zero(token) + selectors.append( + IndexSelector( + env=self.env, + token=token, + index=int(token.value), + ) ) - ) - elif stream.current.kind in ( + stream.next() + elif token.kind in ( TOKEN_DOUBLE_QUOTE_STRING, TOKEN_SINGLE_QUOTE_STRING, ): - if self.RE_INVALID_NAME_SELECTOR.search(stream.current.value): - raise JSONPathSyntaxError( - f"invalid name selector {stream.current.value!r}", - token=stream.current, - ) - - list_items.append( - PropertySelector( + selectors.append( + NameSelector( env=self.env, - token=stream.current, - name=self._decode_string_literal(stream.current), - shorthand=False, + token=token, + name=self._decode_string_literal(token), ), ) - elif stream.current.kind == TOKEN_SLICE_START: - list_items.append(self.parse_slice(stream)) - elif stream.current.kind == TOKEN_WILD: - list_items.append( - WildSelector( - env=self.env, - token=stream.current, - shorthand=False, + stream.next() + elif token.kind == TOKEN_COLON: + selectors.append(self.parse_slice(stream)) + elif token.kind == TOKEN_WILD: + selectors.append(WildcardSelector(env=self.env, token=token)) + stream.next() + elif token.kind == TOKEN_KEYS: + stream.eat(TOKEN_KEYS) + if stream.current().kind in ( + TOKEN_DOUBLE_QUOTE_STRING, + TOKEN_SINGLE_QUOTE_STRING, + ): + selectors.append( + KeySelector( + env=self.env, + token=token, + key=self._decode_string_literal(stream.next()), + ) ) - ) - elif stream.current.kind == TOKEN_FILTER: - list_items.append(self.parse_filter(stream)) - elif stream.current.kind == TOKEN_EOF: - raise JSONPathSyntaxError( - "unexpected end of query", token=stream.current - ) + else: + selectors.append(KeysSelector(env=self.env, token=token)) + + elif token.kind == TOKEN_FILTER: + selectors.append(self.parse_filter_selector(stream)) + elif token.kind == TOKEN_KEYS_FILTER: + selectors.append(self.parse_filter_selector(stream, keys=True)) + elif token.kind in (TOKEN_ROOT, TOKEN_NAME): + selectors.append(self.parse_singular_query_selector(stream)) + elif token.kind == TOKEN_EOF: + raise JSONPathSyntaxError("unexpected end of query", token=token) else: raise JSONPathSyntaxError( - f"unexpected token in bracketed selection {stream.current.kind!r}", - token=stream.current, + f"unexpected token in bracketed selection {token.kind!r}", + token=token, ) - if stream.peek.kind == TOKEN_EOF: + stream.skip_whitespace() + + if stream.current().kind == TOKEN_EOF: raise JSONPathSyntaxError( - "unexpected end of selector list", - token=stream.current, + "unexpected end of segment", + token=stream.current(), ) - if stream.peek.kind != TOKEN_RBRACKET: - # TODO: error message .. expected a comma or logical operator - stream.expect_peek(TOKEN_COMMA) - stream.next_token() - - if stream.peek.kind == TOKEN_RBRACKET: + if stream.current().kind != TOKEN_RBRACKET: + stream.eat(TOKEN_COMMA) + stream.skip_whitespace() + if stream.current().kind == TOKEN_RBRACKET: raise JSONPathSyntaxError( - "unexpected trailing comma", - token=stream.peek, + "unexpected trailing comma", token=stream.current() ) - stream.next_token() + stream.eat(TOKEN_RBRACKET) + + if not selectors: + raise JSONPathSyntaxError("empty bracketed segment", token=segment_token) + + return selectors - if not list_items: - raise JSONPathSyntaxError("empty bracketed segment", token=tok) + def parse_slice(self, stream: TokenStream) -> SliceSelector: + """Parse a slice JSONPath expression from a stream of tokens.""" + token = stream.current() + start: Optional[int] = None + stop: Optional[int] = None + step: Optional[int] = None + + def _maybe_index(token: Token) -> bool: + if token.kind == TOKEN_INT: + if len(token.value) > 1 and token.value.startswith(("0", "-0")): + raise JSONPathSyntaxError( + f"invalid index {token.value!r}", token=token + ) + return True + return False + + # 1: or : + if _maybe_index(stream.current()): + start = int(stream.current().value) + stream.next() + + stream.skip_whitespace() + stream.expect(TOKEN_COLON) + stream.next() + stream.skip_whitespace() + + # 1 or 1: or : or ? + if _maybe_index(stream.current()): + stop = int(stream.current().value) + stream.next() + stream.skip_whitespace() + if stream.current().kind == TOKEN_COLON: + stream.next() + elif stream.current().kind == TOKEN_COLON: + stream.expect(TOKEN_COLON) + stream.next() + + # 1 or ? + stream.skip_whitespace() + if _maybe_index(stream.current()): + step = int(stream.current().value) + stream.next() - return ListSelector(env=self.env, token=tok, items=list_items) + return SliceSelector( + env=self.env, + token=token, + start=start, + stop=stop, + step=step, + ) - def parse_filter(self, stream: TokenStream) -> Filter: - tok = stream.next_token() - expr = self.parse_filter_selector(stream) + def parse_filter_selector( + self, stream: TokenStream, *, keys: bool = False + ) -> Union[Filter, KeysFilter]: + token = stream.next() + expr = self.parse_filter_expression(stream) if self.env.well_typed and isinstance(expr, FunctionExtension): func = self.env.function_extensions.get(expr.name) @@ -499,236 +584,291 @@ def parse_filter(self, stream: TokenStream) -> Filter: and func.return_type == ExpressionType.VALUE ): raise JSONPathTypeError( - f"result of {expr.name}() must be compared", token=tok + f"result of {expr.name}() must be compared", token=token ) - if isinstance(expr, (Literal, Nil)): + if isinstance(expr, (FilterExpressionLiteral, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", - token=tok, + token=token, ) - return Filter(env=self.env, token=tok, expression=BooleanExpression(expr)) + if keys: + return KeysFilter( + env=self.env, token=token, expression=FilterExpression(expr) + ) + + return Filter(env=self.env, token=token, expression=FilterExpression(expr)) - def parse_boolean(self, stream: TokenStream) -> FilterExpression: - if stream.current.kind == TOKEN_TRUE: + def parse_boolean(self, stream: TokenStream) -> BaseExpression: + if stream.next().kind == TOKEN_TRUE: return TRUE return FALSE - def parse_nil(self, _: TokenStream) -> FilterExpression: + def parse_nil(self, stream: TokenStream) -> BaseExpression: + stream.next() return NIL - def parse_undefined(self, _: TokenStream) -> FilterExpression: + def parse_undefined(self, stream: TokenStream) -> BaseExpression: + stream.next() return UNDEFINED_LITERAL - def parse_string_literal(self, stream: TokenStream) -> FilterExpression: - return StringLiteral(value=self._decode_string_literal(stream.current)) + def parse_string_literal(self, stream: TokenStream) -> BaseExpression: + return StringLiteral(value=self._decode_string_literal(stream.next())) + + def parse_integer_literal(self, stream: TokenStream) -> BaseExpression: + token = stream.next() + value = token.value + + if self.env.strict and value.startswith("0") and len(value) > 1: + raise JSONPathSyntaxError("invalid integer literal", token=token) - def parse_integer_literal(self, stream: TokenStream) -> FilterExpression: # Convert to float first to handle scientific notation. - return IntegerLiteral(value=int(float(stream.current.value))) + return IntegerLiteral(value=int(float(value))) + + def parse_float_literal(self, stream: TokenStream) -> BaseExpression: + token = stream.next() + value = token.value - def parse_float_literal(self, stream: TokenStream) -> FilterExpression: - return FloatLiteral(value=float(stream.current.value)) + if value.startswith("0") and len(value.split(".")[0]) > 1: + raise JSONPathSyntaxError("invalid float literal", token=token) - def parse_prefix_expression(self, stream: TokenStream) -> FilterExpression: - tok = stream.next_token() - assert tok.kind == TOKEN_NOT + return FloatLiteral(value=float(value)) + + def parse_prefix_expression(self, stream: TokenStream) -> BaseExpression: + token = stream.next() + assert token.kind == TOKEN_NOT return PrefixExpression( operator="!", - right=self.parse_filter_selector(stream, precedence=self.PRECEDENCE_PREFIX), + right=self.parse_filter_expression( + stream, precedence=self.PRECEDENCE_PREFIX + ), ) def parse_infix_expression( - self, stream: TokenStream, left: FilterExpression - ) -> FilterExpression: - tok = stream.next_token() - precedence = self.PRECEDENCES.get(tok.kind, self.PRECEDENCE_LOWEST) - right = self.parse_filter_selector(stream, precedence) - operator = self.BINARY_OPERATORS[tok.kind] + self, stream: TokenStream, left: BaseExpression + ) -> BaseExpression: + token = stream.next() + precedence = self.PRECEDENCES.get(token.kind, self.PRECEDENCE_LOWEST) + right = self.parse_filter_expression(stream, precedence) + operator = self.BINARY_OPERATORS[token.kind] if self.env.well_typed and operator in self.COMPARISON_OPERATORS: - self._raise_for_non_comparable_function(left, tok) - self._raise_for_non_comparable_function(right, tok) + self._raise_for_non_comparable_function(left, token) + self._raise_for_non_comparable_function(right, token) if operator not in self.INFIX_LITERAL_OPERATORS: - if isinstance(left, (Literal, Nil)): + if isinstance(left, (FilterExpressionLiteral, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", - token=tok, + token=token, ) - if isinstance(right, (Literal, Nil)): + if isinstance(right, (FilterExpressionLiteral, Nil)): raise JSONPathSyntaxError( "filter expression literals outside of " "function expressions must be compared", - token=tok, + token=token, ) return InfixExpression(left, operator, right) - def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: - stream.next_token() - expr = self.parse_filter_selector(stream) - stream.next_token() + def parse_grouped_expression(self, stream: TokenStream) -> BaseExpression: + _token = stream.eat(TOKEN_LPAREN) + expr = self.parse_filter_expression(stream) - while stream.current.kind != TOKEN_RPAREN: - if stream.current.kind == TOKEN_EOF: - raise JSONPathSyntaxError( - "unbalanced parentheses", token=stream.current - ) - - if stream.current.kind not in self.BINARY_OPERATORS: - raise JSONPathSyntaxError( - f"expected an expression, found '{stream.current.value}'", - token=stream.current, - ) + while stream.current().kind != TOKEN_RPAREN: + token = stream.current() + if token.kind in (TOKEN_EOF, TOKEN_RBRACKET): + raise JSONPathSyntaxError("unbalanced parentheses", token=_token) expr = self.parse_infix_expression(stream, expr) - stream.expect(TOKEN_RPAREN) + stream.eat(TOKEN_RPAREN) return expr - def parse_root_path(self, stream: TokenStream) -> FilterExpression: - root = stream.next_token() - return RootPath( + def parse_absolute_query(self, stream: TokenStream) -> BaseExpression: + root = stream.next() # Could be TOKEN_ROOT or TOKEN_PSEUDO_ROOT + return RootFilterQuery( JSONPath( env=self.env, - selectors=self.parse_path(stream, in_filter=True), - fake_root=root.kind == TOKEN_FAKE_ROOT, + segments=self.parse_query(stream), + pseudo_root=root.kind == TOKEN_PSEUDO_ROOT, ) ) - def parse_self_path(self, stream: TokenStream) -> FilterExpression: - stream.next_token() - return SelfPath( - JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True)) + def parse_relative_query(self, stream: TokenStream) -> BaseExpression: + stream.eat(TOKEN_SELF) + return RelativeFilterQuery( + JSONPath(env=self.env, segments=self.parse_query(stream)) + ) + + def parse_singular_query_selector( + self, stream: TokenStream + ) -> SingularQuerySelector: + token = ( + stream.next() if stream.current().kind == TOKEN_ROOT else stream.current() + ) + + query = JSONPath(env=self.env, segments=self.parse_query(stream)) + + if not query.singular_query(): + raise JSONPathSyntaxError( + "embedded query selectors must be singular queries", token=token + ) + + return SingularQuerySelector( + env=self.env, + token=token, + query=query, ) - def parse_current_key(self, _: TokenStream) -> FilterExpression: + def parse_current_key(self, stream: TokenStream) -> BaseExpression: + stream.next() return CURRENT_KEY - def parse_filter_context_path(self, stream: TokenStream) -> FilterExpression: - stream.next_token() + def parse_filter_context_path(self, stream: TokenStream) -> BaseExpression: + stream.next() return FilterContextPath( - JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True)) + JSONPath(env=self.env, segments=self.parse_query(stream)) ) - def parse_regex(self, stream: TokenStream) -> FilterExpression: - pattern = stream.current.value + def parse_regex(self, stream: TokenStream) -> BaseExpression: + pattern = stream.current().value flags = 0 - if stream.peek.kind == TOKEN_RE_FLAGS: - stream.next_token() - for flag in set(stream.current.value): + if stream.peek().kind == TOKEN_RE_FLAGS: + stream.next() + for flag in set(stream.next().value): flags |= self.RE_FLAG_MAP[flag] return RegexLiteral(value=re.compile(pattern, flags)) - def parse_list_literal(self, stream: TokenStream) -> FilterExpression: - stream.next_token() - list_items: List[FilterExpression] = [] + def parse_list_literal(self, stream: TokenStream) -> BaseExpression: + stream.eat(TOKEN_LBRACKET) + list_items: List[BaseExpression] = [] + + while True: + stream.skip_whitespace() + + if stream.current().kind == TOKEN_RBRACKET: + break - while stream.current.kind != TOKEN_RBRACKET: try: - list_items.append(self.list_item_map[stream.current.kind](stream)) + list_items.append(self.list_item_map[stream.current().kind](stream)) except KeyError as err: raise JSONPathSyntaxError( - f"unexpected {stream.current.value!r}", - token=stream.current, + f"unexpected {stream.current().value!r}", + token=stream.current(), ) from err - if stream.peek.kind != TOKEN_RBRACKET: - stream.expect_peek(TOKEN_COMMA) - stream.next_token() - - stream.next_token() + stream.skip_whitespace() + if stream.current().kind != TOKEN_RBRACKET: + stream.eat(TOKEN_COMMA) + stream.skip_whitespace() + stream.eat(TOKEN_RBRACKET) return ListLiteral(list_items) - def parse_function_extension(self, stream: TokenStream) -> FilterExpression: - function_arguments: List[FilterExpression] = [] - tok = stream.next_token() + def parse_function_extension(self, stream: TokenStream) -> BaseExpression: + function_arguments: List[BaseExpression] = [] + function_token = stream.next() + stream.eat(TOKEN_LPAREN) + + while True: + stream.skip_whitespace() + token = stream.current() + + if token.kind == TOKEN_RPAREN: + break - while stream.current.kind != TOKEN_RPAREN: try: - func = self.function_argument_map[stream.current.kind] + func = self.function_argument_map[token.kind] except KeyError as err: raise JSONPathSyntaxError( - f"unexpected {stream.current.value!r}", - token=stream.current, + f"unexpected {token.value!r}", token=token ) from err expr = func(stream) + stream.skip_whitespace() - # The argument could be a comparison or logical expression - peek_kind = stream.peek.kind - while peek_kind in self.BINARY_OPERATORS: - stream.next_token() + while stream.current().kind in self.BINARY_OPERATORS: expr = self.parse_infix_expression(stream, expr) - peek_kind = stream.peek.kind function_arguments.append(expr) + stream.skip_whitespace() - if stream.peek.kind != TOKEN_RPAREN: - stream.expect_peek(TOKEN_COMMA) - stream.next_token() + if stream.current().kind != TOKEN_RPAREN: + stream.eat(TOKEN_COMMA) - stream.next_token() + stream.eat(TOKEN_RPAREN) return FunctionExtension( - tok.value, - self.env.validate_function_extension_signature(tok, function_arguments), + function_token.value, + self.env.validate_function_extension_signature( + function_token, function_arguments + ), ) - def parse_filter_selector( + def parse_filter_expression( self, stream: TokenStream, precedence: int = PRECEDENCE_LOWEST - ) -> FilterExpression: + ) -> BaseExpression: + stream.skip_whitespace() + token = stream.current() + try: - left = self.token_map[stream.current.kind](stream) + left = self.token_map[token.kind](stream) except KeyError as err: - if stream.current.kind in (TOKEN_EOF, TOKEN_RBRACKET): + if token.kind in (TOKEN_EOF, TOKEN_RBRACKET): msg = "end of expression" else: - msg = repr(stream.current.value) - raise JSONPathSyntaxError( - f"unexpected {msg}", token=stream.current - ) from err + msg = repr(token.value) + raise JSONPathSyntaxError(f"unexpected {msg}", token=token) from err while True: - peek_kind = stream.peek.kind + stream.skip_whitespace() + kind = stream.current().kind + if ( - peek_kind in (TOKEN_EOF, TOKEN_RBRACKET) - or self.PRECEDENCES.get(peek_kind, self.PRECEDENCE_LOWEST) < precedence + kind not in self.BINARY_OPERATORS + or self.PRECEDENCES.get(kind, self.PRECEDENCE_LOWEST) < precedence ): break - if peek_kind not in self.BINARY_OPERATORS: - return left - - stream.next_token() left = self.parse_infix_expression(stream, left) return left def _decode_string_literal(self, token: Token) -> str: + if self.env.strict: + # For strict compliance with RC 9535, we must unescape string literals + # ourself. RFC 9535 is more strict than json.loads when it comes to + # parsing \uXXXX escape sequences. + return unescape_string( + token.value, + token, + "'" if token.kind == TOKEN_SINGLE_QUOTE_STRING else '"', + ) + if self.env.unicode_escape: if token.kind == TOKEN_SINGLE_QUOTE_STRING: value = token.value.replace('"', '\\"').replace("\\'", "'") else: value = token.value + try: rv = json.loads(f'"{value}"') assert isinstance(rv, str) return rv except json.JSONDecodeError as err: - raise JSONPathSyntaxError(str(err).split(":")[1], token=token) from None + message = f"decode error: {str(err).split(':')[1]}" + raise JSONPathSyntaxError(message, token=token) from None return token.value def _raise_for_non_comparable_function( - self, expr: FilterExpression, token: Token + self, expr: BaseExpression, token: Token ) -> None: - if isinstance(expr, Path) and not expr.path.singular_query(): + if isinstance(expr, FilterQuery) and not expr.path.singular_query(): raise JSONPathTypeError("non-singular query is not comparable", token=token) if isinstance(expr, FunctionExtension): @@ -740,3 +880,9 @@ def _raise_for_non_comparable_function( raise JSONPathTypeError( f"result of {expr.name}() is not comparable", token ) + + def _raise_for_leading_zero(self, token: Token) -> None: + if ( + len(token.value) > 1 and token.value.startswith("0") + ) or token.value.startswith("-0"): + raise JSONPathSyntaxError("leading zero in index selector", token=token) diff --git a/jsonpath/path.py b/jsonpath/path.py index 9cf3d98..7e70021 100644 --- a/jsonpath/path.py +++ b/jsonpath/path.py @@ -1,15 +1,13 @@ -# noqa: D100 +"""A compiled JSONPath ready to be applied to a JSON string or Python object.""" + from __future__ import annotations import itertools from typing import TYPE_CHECKING -from typing import Any from typing import AsyncIterable from typing import Iterable from typing import List -from typing import Mapping from typing import Optional -from typing import Sequence from typing import Tuple from typing import TypeVar from typing import Union @@ -18,15 +16,15 @@ from jsonpath.fluent_api import Query from jsonpath.match import FilterContextVars from jsonpath.match import JSONPathMatch +from jsonpath.segments import JSONPathRecursiveDescentSegment from jsonpath.selectors import IndexSelector -from jsonpath.selectors import ListSelector -from jsonpath.selectors import PropertySelector +from jsonpath.selectors import NameSelector if TYPE_CHECKING: - from io import IOBase + from jsonpath._types import JSONData from .env import JSONPathEnvironment - from .selectors import JSONPathSelector + from .segments import JSONPathSegment class JSONPath: @@ -34,9 +32,9 @@ class JSONPath: Arguments: env: The `JSONPathEnvironment` this path is bound to. - selectors: An iterable of `JSONPathSelector` objects, as generated by + segments: An iterable of `JSONPathSegment` instances, as generated by a `Parser`. - fake_root: Indicates if target JSON values should be wrapped in a single- + pseudo_root: Indicates if target JSON values should be wrapped in a single- element array, so as to make the target root value selectable. @@ -45,35 +43,30 @@ class JSONPath: selectors: The `JSONPathSelector` instances that make up this path. """ - __slots__ = ("env", "fake_root", "selectors") + __slots__ = ("env", "pseudo_root", "segments") def __init__( self, *, env: JSONPathEnvironment, - selectors: Iterable[JSONPathSelector], - fake_root: bool = False, + segments: Iterable[JSONPathSegment], + pseudo_root: bool = False, ) -> None: self.env = env - self.selectors = tuple(selectors) - self.fake_root = fake_root + self.segments = tuple(segments) + self.pseudo_root = pseudo_root def __str__(self) -> str: - return self.env.root_token + "".join( - str(selector) for selector in self.selectors - ) + return self.env.root_token + "".join(str(segment) for segment in self.segments) def __eq__(self, __value: object) -> bool: - return isinstance(__value, JSONPath) and self.selectors == __value.selectors + return isinstance(__value, JSONPath) and self.segments == __value.segments def __hash__(self) -> int: - return hash(self.selectors) + return hash(self.segments) def findall( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> List[object]: """Find all objects in `data` matching the given JSONPath `path`. @@ -100,10 +93,7 @@ def findall( ] def finditer( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Iterable[JSONPathMatch]: """Generate `JSONPathMatch` objects for each match. @@ -125,27 +115,26 @@ def finditer( an incompatible way. """ _data = load_data(data) + path = self.env.pseudo_root_token if self.pseudo_root else self.env.root_token + matches: Iterable[JSONPathMatch] = [ JSONPathMatch( filter_context=filter_context or {}, - obj=[_data] if self.fake_root else _data, + obj=[_data] if self.pseudo_root else _data, parent=None, - path=self.env.root_token, + path=path, parts=(), root=_data, ) ] - for selector in self.selectors: - matches = selector.resolve(matches) + for segment in self.segments: + matches = segment.resolve(matches) return matches async def findall_async( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> List[object]: """An async version of `findall()`.""" return [ @@ -156,36 +145,31 @@ async def findall_async( ] async def finditer_async( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> AsyncIterable[JSONPathMatch]: """An async version of `finditer()`.""" _data = load_data(data) + path = self.env.pseudo_root_token if self.pseudo_root else self.env.root_token async def root_iter() -> AsyncIterable[JSONPathMatch]: yield self.env.match_class( filter_context=filter_context or {}, - obj=[_data] if self.fake_root else _data, + obj=[_data] if self.pseudo_root else _data, parent=None, - path=self.env.root_token, + path=path, parts=(), root=_data, ) matches: AsyncIterable[JSONPathMatch] = root_iter() - for selector in self.selectors: - matches = selector.resolve_async(matches) + for segment in self.segments: + matches = segment.resolve_async(matches) return matches def match( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Union[JSONPathMatch, None]: """Return a `JSONPathMatch` instance for the first object found in _data_. @@ -212,10 +196,7 @@ def match( return None def query( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Query: """Return a `Query` iterator over matches found by applying this path to _data_. @@ -237,20 +218,21 @@ def query( def empty(self) -> bool: """Return `True` if this path has no selectors.""" - return not bool(self.selectors) + return not bool(self.segments) def singular_query(self) -> bool: """Return `True` if this JSONPath query is a singular query.""" - for selector in self.selectors: - if isinstance(selector, (PropertySelector, IndexSelector)): - continue - if ( - isinstance(selector, ListSelector) - and len(selector.items) == 1 - and isinstance(selector.items[0], (PropertySelector, IndexSelector)) + for segment in self.segments: + if isinstance(segment, JSONPathRecursiveDescentSegment): + return False + + if len(segment.selectors) == 1 and isinstance( + segment.selectors[0], (NameSelector, IndexSelector) ): continue + return False + return True @@ -288,10 +270,7 @@ def __hash__(self) -> int: return hash((self.path, self.paths)) def findall( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> List[object]: """Find all objects in `data` matching the given JSONPath `path`. @@ -326,10 +305,7 @@ def findall( return objs def finditer( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Iterable[JSONPathMatch]: """Generate `JSONPathMatch` objects for each match. @@ -364,10 +340,7 @@ def finditer( return matches def match( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Union[JSONPathMatch, None]: """Return a `JSONPathMatch` instance for the first object found in _data_. @@ -394,10 +367,7 @@ def match( return None async def findall_async( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> List[object]: """An async version of `findall()`.""" objs = await self.path.findall_async(data, filter_context=filter_context) @@ -413,10 +383,7 @@ async def findall_async( return objs async def finditer_async( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> AsyncIterable[JSONPathMatch]: """An async version of `finditer()`.""" matches = await self.path.finditer_async(data, filter_context=filter_context) @@ -433,10 +400,7 @@ async def finditer_async( return matches def query( - self, - data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], - *, - filter_context: Optional[FilterContextVars] = None, + self, data: JSONData, *, filter_context: Optional[FilterContextVars] = None ) -> Query: """Return a `Query` iterator over matches found by applying this path to _data_. diff --git a/jsonpath/segments.py b/jsonpath/segments.py new file mode 100644 index 0000000..51054d1 --- /dev/null +++ b/jsonpath/segments.py @@ -0,0 +1,131 @@ +"""JSONPath child and descendant segment definitions.""" + +from __future__ import annotations + +from abc import ABC +from abc import abstractmethod +from typing import TYPE_CHECKING +from typing import AsyncIterable +from typing import Iterable +from typing import Mapping +from typing import Sequence +from typing import Tuple + +from .exceptions import JSONPathRecursionError + +if TYPE_CHECKING: + from .env import JSONPathEnvironment + from .match import JSONPathMatch + from .selectors import JSONPathSelector + from .token import Token + + +class JSONPathSegment(ABC): + """Base class for all JSONPath segments.""" + + __slots__ = ("env", "token", "selectors") + + def __init__( + self, + *, + env: JSONPathEnvironment, + token: Token, + selectors: Tuple[JSONPathSelector, ...], + ) -> None: + self.env = env + self.token = token + self.selectors = selectors + + @abstractmethod + def resolve(self, nodes: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + """Apply this segment to each `JSONPathMatch` in _nodes_.""" + + @abstractmethod + def resolve_async( + self, nodes: AsyncIterable[JSONPathMatch] + ) -> AsyncIterable[JSONPathMatch]: + """An async version of `resolve`.""" + + +class JSONPathChildSegment(JSONPathSegment): + """The JSONPath child selection segment.""" + + def resolve(self, nodes: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + """Select children of each node in _nodes_.""" + for node in nodes: + for selector in self.selectors: + yield from selector.resolve(node) + + async def resolve_async( + self, nodes: AsyncIterable[JSONPathMatch] + ) -> AsyncIterable[JSONPathMatch]: + """An async version of `resolve`.""" + async for node in nodes: + for selector in self.selectors: + async for match in selector.resolve_async(node): + yield match + + def __str__(self) -> str: + return f"[{', '.join(str(itm) for itm in self.selectors)}]" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, JSONPathChildSegment) + and self.selectors == __value.selectors + and self.token == __value.token + ) + + def __hash__(self) -> int: + return hash((self.selectors, self.token)) + + +class JSONPathRecursiveDescentSegment(JSONPathSegment): + """The JSONPath recursive descent segment.""" + + def resolve(self, nodes: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + """Select descendants of each node in _nodes_.""" + for node in nodes: + for _node in self._visit(node): + for selector in self.selectors: + yield from selector.resolve(_node) + + async def resolve_async( + self, nodes: AsyncIterable[JSONPathMatch] + ) -> AsyncIterable[JSONPathMatch]: + """An async version of `resolve`.""" + async for node in nodes: + for _node in self._visit(node): + for selector in self.selectors: + async for match in selector.resolve_async(_node): + yield match + + def _visit(self, node: JSONPathMatch, depth: int = 1) -> Iterable[JSONPathMatch]: + """Depth-first, pre-order node traversal.""" + if depth > self.env.max_recursion_depth: + raise JSONPathRecursionError("recursion limit exceeded", token=self.token) + + yield node + + if isinstance(node.obj, Mapping): + for name, val in node.obj.items(): + if isinstance(val, (Mapping, Sequence)): + _node = node.new_child(val, name) + yield from self._visit(_node, depth + 1) + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, item in enumerate(node.obj): + if isinstance(item, (Mapping, Sequence)): + _node = node.new_child(item, i) + yield from self._visit(_node, depth + 1) + + def __str__(self) -> str: + return f"..[{', '.join(str(itm) for itm in self.selectors)}]" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, JSONPathRecursiveDescentSegment) + and self.selectors == __value.selectors + and self.token == __value.token + ) + + def __hash__(self) -> int: + return hash(("..", self.selectors, self.token)) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 44007e9..d073380 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -11,23 +11,22 @@ from typing import Any from typing import AsyncIterable from typing import Iterable -from typing import List from typing import Optional -from typing import TypeVar from typing import Union from .exceptions import JSONPathIndexError +from .exceptions import JSONPathSyntaxError from .exceptions import JSONPathTypeError +from .match import NodeList from .serialize import canonical_string if TYPE_CHECKING: from .env import JSONPathEnvironment - from .filter import BooleanExpression + from .filter import FilterExpression from .match import JSONPathMatch + from .path import JSONPath from .token import Token -# ruff: noqa: D102 - class JSONPathSelector(ABC): """Base class for all JSONPath segments and selectors.""" @@ -39,13 +38,11 @@ def __init__(self, *, env: JSONPathEnvironment, token: Token) -> None: self.token = token @abstractmethod - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: """Apply the segment/selector to each node in _matches_. Arguments: - matches: Nodes matched by preceding segments/selectors. This is like - a lazy _NodeList_, as described in RFC 9535, but each match carries - more than the node's value and location. + node: A node matched by preceding segments/selectors. Returns: The `JSONPathMatch` instances created by applying this selector to each @@ -53,39 +50,25 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: """ @abstractmethod - def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: + def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: """An async version of `resolve`.""" -class PropertySelector(JSONPathSelector): - """A shorthand or bracketed property selector.""" +class NameSelector(JSONPathSelector): + """Select at most one object member value given an object member name.""" - __slots__ = ("name", "shorthand") + __slots__ = ("name",) - def __init__( - self, - *, - env: JSONPathEnvironment, - token: Token, - name: str, - shorthand: bool, - ) -> None: + def __init__(self, *, env: JSONPathEnvironment, token: Token, name: str) -> None: super().__init__(env=env, token=token) self.name = name - self.shorthand = shorthand def __str__(self) -> str: - return ( - f"[{canonical_string(self.name)}]" - if self.shorthand - else f"{canonical_string(self.name)}" - ) + return canonical_string(self.name) def __eq__(self, __value: object) -> bool: return ( - isinstance(__value, PropertySelector) + isinstance(__value, NameSelector) and self.name == __value.name and self.token == __value.token ) @@ -93,50 +76,25 @@ def __eq__(self, __value: object) -> bool: def __hash__(self) -> int: return hash((self.name, self.token)) - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if not isinstance(match.obj, Mapping): - continue - + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=self.env.getitem(match.obj, self.name), - parent=match, - parts=match.parts + (self.name,), - path=match.path + f"[{canonical_string(self.name)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if not isinstance(match.obj, Mapping): - continue + match = node.new_child(self.env.getitem(node.obj, self.name), self.name) + node.add_child(match) + yield match + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=await self.env.getitem_async(match.obj, self.name), - parent=match, - parts=match.parts + (self.name,), - path=match.path + f"[{canonical_string(self.name)}]", - root=match.root, + match = node.new_child( + await self.env.getitem_async(node.obj, self.name), self.name ) - match.add_child(_match) - yield _match + node.add_child(match) + yield match class IndexSelector(JSONPathSelector): - """Select an element from an array by index. - - Considering we don't require mapping (JSON object) keys/properties to - be quoted, and that we support mappings with numeric keys, we also check - to see if the "index" is a mapping key, which is non-standard. - """ + """Select at most one array element value given an index.""" __slots__ = ("index", "_as_key") @@ -172,122 +130,139 @@ def _normalized_index(self, obj: Sequence[object]) -> int: return len(obj) + self.index return self.index - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if isinstance(match.obj, Mapping): - # Try the string representation of the index as a key. - with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=self.env.getitem(match.obj, self._as_key), - parent=match, - parts=match.parts + (self._as_key,), - path=f"{match.path}['{self.index}']", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - norm_index = self._normalized_index(match.obj) - with suppress(IndexError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=self.env.getitem(match.obj, self.index), - parent=match, - parts=match.parts + (norm_index,), - path=match.path + f"[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if isinstance(match.obj, Mapping): - # Try the string representation of the index as a key. - with suppress(KeyError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=await self.env.getitem_async(match.obj, self._as_key), - parent=match, - parts=match.parts + (self._as_key,), - path=f"{match.path}['{self.index}']", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - norm_index = self._normalized_index(match.obj) - with suppress(IndexError): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=await self.env.getitem_async(match.obj, self.index), - parent=match, - parts=match.parts + (norm_index,), - path=match.path + f"[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + # Optionally try string representation of int + if not self.env.strict and isinstance(node.obj, Mapping): + # Try the string representation of the index as a key. + with suppress(KeyError): + match = node.new_child( + self.env.getitem(node.obj, self._as_key), self.index + ) + node.add_child(match) + yield match + if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + norm_index = self._normalized_index(node.obj) + with suppress(IndexError): + match = node.new_child( + self.env.getitem(node.obj, self.index), norm_index + ) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if not self.env.strict and isinstance(node.obj, Mapping): + # Try the string representation of the index as a key. + with suppress(KeyError): + match = node.new_child( + await self.env.getitem_async(node.obj, self._as_key), self.index + ) + node.add_child(match) + yield match + if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + norm_index = self._normalized_index(node.obj) + with suppress(IndexError): + match = node.new_child( + await self.env.getitem_async(node.obj, self.index), norm_index + ) + node.add_child(match) + yield match -class KeysSelector(JSONPathSelector): - """Select mapping/object keys/properties. +class KeySelector(JSONPathSelector): + """Select at most one name from an object member, given the name. + + The key selector is introduced to facilitate valid normalized paths for nodes + produced by the "keys selector" and the "keys filter selector". It is not expected + to be of much use elsewhere. NOTE: This is a non-standard selector. + + See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#key-selector. """ - __slots__ = ("shorthand",) + __slots__ = ("key",) - def __init__( - self, *, env: JSONPathEnvironment, token: Token, shorthand: bool - ) -> None: + def __init__(self, *, env: JSONPathEnvironment, token: Token, key: str) -> None: super().__init__(env=env, token=token) - self.shorthand = shorthand + self.key = key def __str__(self) -> str: + return f"{self.env.keys_selector_token}{canonical_string(self.key)}" + + def __eq__(self, __value: object) -> bool: return ( - f"[{self.env.keys_selector_token}]" - if self.shorthand - else self.env.keys_selector_token + isinstance(__value, KeySelector) + and self.token == __value.token + and self.key == __value.key ) + def __hash__(self) -> int: + return hash((self.token, self.key)) + + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping) and self.key in node.obj: + match = node.__class__( + filter_context=node.filter_context(), + obj=self.key, + parent=node, + parts=node.parts + (f"{self.env.keys_selector_token}{self.key}",), + path=f"{node.path}[{self}]", + root=node.root, + ) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + for _node in self.resolve(node): + yield _node + + +class KeysSelector(JSONPathSelector): + """Select all names from an object's name/value members. + + NOTE: This is a non-standard selector. + + See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#keys-selector + """ + + __slots__ = () + + def __init__(self, *, env: JSONPathEnvironment, token: Token) -> None: + super().__init__(env=env, token=token) + + def __str__(self) -> str: + return self.env.keys_selector_token + def __eq__(self, __value: object) -> bool: return isinstance(__value, KeysSelector) and self.token == __value.token def __hash__(self) -> int: return hash(self.token) - def _keys(self, match: JSONPathMatch) -> Iterable[JSONPathMatch]: - if isinstance(match.obj, Mapping): - for i, key in enumerate(match.obj.keys()): - _match = self.env.match_class( - filter_context=match.filter_context(), + def _keys(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for key in node.obj: + match = node.__class__( + filter_context=node.filter_context(), obj=key, - parent=match, - parts=match.parts + (f"{self.env.keys_selector_token}{key}",), - path=f"{match.path}[{self.env.keys_selector_token}][{i}]", - root=match.root, + parent=node, + parts=node.parts + (f"{self.env.keys_selector_token}{key}",), + path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]", + root=node.root, ) - match.add_child(_match) - yield _match + node.add_child(match) + yield match - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - yield from self._keys(match) + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + yield from self._keys(node) - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - for _match in self._keys(match): - yield _match + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + for match in self._keys(node): + yield match class SliceSelector(JSONPathSelector): - """Sequence slicing selector.""" + """Select array elements given a start index, a stop index and a step.""" __slots__ = ("slice",) @@ -327,258 +302,191 @@ def _check_range(self, *indices: Optional[int]) -> None: ): raise JSONPathIndexError("index out of range", token=self.token) - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if not isinstance(match.obj, Sequence) or self.slice.step == 0: - continue + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if not isinstance(node.obj, Sequence) or self.slice.step == 0: + return - for norm_index, obj in zip( # noqa: B905 - range(*self.slice.indices(len(match.obj))), - self.env.getitem(match.obj, self.slice), - ): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (norm_index,), - path=f"{match.path}[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if not isinstance(match.obj, Sequence) or self.slice.step == 0: - continue - - for norm_index, obj in zip( # noqa: B905 - range(*self.slice.indices(len(match.obj))), - await self.env.getitem_async(match.obj, self.slice), - ): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (norm_index,), - path=f"{match.path}[{norm_index}]", - root=match.root, - ) - match.add_child(_match) - yield _match + for norm_index, obj in zip( # noqa: B905 + range(*self.slice.indices(len(node.obj))), + self.env.getitem(node.obj, self.slice), + ): + match = node.new_child(obj, norm_index) + node.add_child(match) + yield match + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if not isinstance(node.obj, Sequence) or self.slice.step == 0: + return -class WildSelector(JSONPathSelector): - """Select all items from a sequence/array or values from a mapping/object.""" + for norm_index, obj in zip( # noqa: B905 + range(*self.slice.indices(len(node.obj))), + await self.env.getitem_async(node.obj, self.slice), + ): + match = node.new_child(obj, norm_index) + node.add_child(match) + yield match - __slots__ = ("shorthand",) - def __init__( - self, *, env: JSONPathEnvironment, token: Token, shorthand: bool - ) -> None: - super().__init__(env=env, token=token) - self.shorthand = shorthand +class WildcardSelector(JSONPathSelector): + """Select nodes of all children of an object or array.""" + + __slots__ = () def __str__(self) -> str: - return "[*]" if self.shorthand else "*" + return "*" def __eq__(self, __value: object) -> bool: - return isinstance(__value, WildSelector) and self.token == __value.token + return isinstance(__value, WildcardSelector) and self.token == __value.token def __hash__(self) -> int: return hash(self.token) - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - if isinstance(match.obj, str): - continue - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence): - for i, val in enumerate(match.obj): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - elif isinstance(match.obj, Sequence): - for i, val in enumerate(match.obj): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - - -class RecursiveDescentSelector(JSONPathSelector): - """A JSONPath selector that visits all nodes recursively. - - NOTE: Strictly this is a "segment", not a "selector". + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + match = node.new_child(val, key) + node.add_child(match) + yield match + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, val in enumerate(node.obj): + match = node.new_child(val, i) + node.add_child(match) + yield match + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + match = node.new_child(val, key) + node.add_child(match) + yield match + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, val in enumerate(node.obj): + match = node.new_child(val, i) + node.add_child(match) + yield match + + +class SingularQuerySelector(JSONPathSelector): + """An embedded absolute query. + + The result of the embedded query is used as an object member name or array element + index. + + NOTE: This is a non-standard selector. """ + __slots__ = ("query",) + + def __init__( + self, *, env: JSONPathEnvironment, token: Token, query: JSONPath + ) -> None: + super().__init__(env=env, token=token) + self.query = query + + if env.strict: + raise JSONPathSyntaxError("unexpected query selector", token=token) + def __str__(self) -> str: - return ".." + return str(self.query) def __eq__(self, __value: object) -> bool: return ( - isinstance(__value, RecursiveDescentSelector) + isinstance(__value, SingularQuerySelector) + and self.query == __value.query and self.token == __value.token ) def __hash__(self) -> int: - return hash(self.token) + return hash((self.query, self.token)) - def _expand(self, match: JSONPathMatch) -> Iterable[JSONPathMatch]: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - if isinstance(val, str): - pass - elif isinstance(val, (Mapping, Sequence)): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - yield from self._expand(_match) - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - for i, val in enumerate(match.obj): - if isinstance(val, str): - pass - elif isinstance(val, (Mapping, Sequence)): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - yield from self._expand(_match) - - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match in matches: - yield match - yield from self._expand(match) + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + nodes = NodeList(self.query.finditer(node.root)) - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match in matches: - yield match - for _match in self._expand(match): - yield _match + if nodes.empty(): + return + value = nodes[0].value -T = TypeVar("T") + if not isinstance(value, str): + return + with suppress(KeyError): + match = node.new_child(self.env.getitem(node.obj, value), value) + node.add_child(match) + yield match -async def _alist(it: List[T]) -> AsyncIterable[T]: - for item in it: - yield item + if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + nodes = NodeList(self.query.finditer(node.root)) + if nodes.empty(): + return -class ListSelector(JSONPathSelector): - """A bracketed list of selectors, the results of which are concatenated together. + value = nodes[0].value - NOTE: Strictly this is a "segment", not a "selector". - """ + if not isinstance(value, int): + return - __slots__ = ("items",) + index = self._normalized_index(node.obj, value) - def __init__( - self, - *, - env: JSONPathEnvironment, - token: Token, - items: List[ - Union[ - SliceSelector, - KeysSelector, - IndexSelector, - PropertySelector, - WildSelector, - Filter, - ] - ], - ) -> None: - super().__init__(env=env, token=token) - self.items = tuple(items) + with suppress(IndexError): + match = node.new_child(self.env.getitem(node.obj, index), index) + node.add_child(match) + yield match - def __str__(self) -> str: - return f"[{', '.join(str(itm) for itm in self.items)}]" + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + nodes = NodeList( + [match async for match in await self.query.finditer_async(node.root)] + ) - def __eq__(self, __value: object) -> bool: - return ( - isinstance(__value, ListSelector) - and self.items == __value.items - and self.token == __value.token - ) + if nodes.empty(): + return - def __hash__(self) -> int: - return hash((self.items, self.token)) + value = nodes[0].value + + if not isinstance(value, str): + return + + with suppress(KeyError): + match = node.new_child( + await self.env.getitem_async(node.obj, value), value + ) + node.add_child(match) + yield match + + if isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + nodes = NodeList( + [match async for match in await self.query.finditer_async(node.root)] + ) - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: - for match_ in matches: - for item in self.items: - yield from item.resolve([match_]) + if nodes.empty(): + return - async def resolve_async( - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: - async for match_ in matches: - for item in self.items: - async for m in item.resolve_async(_alist([match_])): - yield m + value = nodes[0].value + + if not isinstance(value, int): + return + + index = self._normalized_index(node.obj, value) + + with suppress(IndexError): + match = node.new_child( + await self.env.getitem_async(node.obj, index), index + ) + node.add_child(match) + yield match + + def _normalized_index(self, obj: Sequence[object], index: int) -> int: + if index < 0 and len(obj) >= abs(index): + return len(obj) + index + return index class Filter(JSONPathSelector): - """Filter sequence/array items or mapping/object values with a filter expression.""" + """Select array elements or object values according to a filter expression.""" __slots__ = ("expression", "cacheable_nodes") @@ -587,7 +495,7 @@ def __init__( *, env: JSONPathEnvironment, token: Token, - expression: BooleanExpression, + expression: FilterExpression, ) -> None: super().__init__(env=env, token=token) self.expression = expression @@ -607,132 +515,190 @@ def __eq__(self, __value: object) -> bool: def __hash__(self) -> int: return hash((str(self.expression), self.token)) - def resolve( # noqa: PLR0912 - self, matches: Iterable[JSONPathMatch] - ) -> Iterable[JSONPathMatch]: + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: if self.cacheable_nodes and self.env.filter_caching: expr = self.expression.cache_tree() else: expr = self.expression - for match in matches: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - context = FilterContext( - env=self.env, - current=val, - root=match.root, - extra_context=match.filter_context(), - current_key=key, - ) - try: - if expr.evaluate(context): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, - ) - match.add_child(_match) - yield _match - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - for i, obj in enumerate(match.obj): - context = FilterContext( - env=self.env, - current=obj, - root=match.root, - extra_context=match.filter_context(), - current_key=i, - ) - try: - if expr.evaluate(context): - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, - ) - match.add_child(_match) - yield _match - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - - async def resolve_async( # noqa: PLR0912 - self, matches: AsyncIterable[JSONPathMatch] - ) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + try: + if expr.evaluate(context): + match = node.new_child(val, key) + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, obj in enumerate(node.obj): + context = FilterContext( + env=self.env, + current=obj, + root=node.root, + extra_context=node.filter_context(), + current_key=i, + ) + try: + if expr.evaluate(context): + match = node.new_child(obj, i) + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: if self.cacheable_nodes and self.env.filter_caching: expr = self.expression.cache_tree() else: expr = self.expression - async for match in matches: - if isinstance(match.obj, Mapping): - for key, val in match.obj.items(): - context = FilterContext( - env=self.env, - current=val, - root=match.root, - extra_context=match.filter_context(), - current_key=key, - ) - - try: - result = await expr.evaluate_async(context) - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - - if result: - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=val, - parent=match, - parts=match.parts + (key,), - path=match.path + f"[{canonical_string(key)}]", - root=match.root, + if isinstance(node.obj, Mapping): + for key, val in node.obj.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + + try: + result = await expr.evaluate_async(context) + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + if result: + match = node.new_child(val, key) + node.add_child(match) + yield match + + elif isinstance(node.obj, Sequence) and not isinstance(node.obj, str): + for i, obj in enumerate(node.obj): + context = FilterContext( + env=self.env, + current=obj, + root=node.root, + extra_context=node.filter_context(), + current_key=i, + ) + + try: + result = await expr.evaluate_async(context) + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + if result: + match = node.new_child(obj, i) + node.add_child(match) + yield match + + +class KeysFilter(JSONPathSelector): + """Selects names from an object's name/value members. + + NOTE: This is a non-standard selector. + + See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#keys-filter-selector + """ + + __slots__ = ("expression",) + + def __init__( + self, + *, + env: JSONPathEnvironment, + token: Token, + expression: FilterExpression, + ) -> None: + super().__init__(env=env, token=token) + self.expression = expression + + def __str__(self) -> str: + return f"~?{self.expression}" + + def __eq__(self, __value: object) -> bool: + return ( + isinstance(__value, Filter) + and self.expression == __value.expression + and self.token == __value.token + ) + + def __hash__(self) -> int: + return hash(("~", str(self.expression), self.token)) + + def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]: + if isinstance(node.value, Mapping): + for key, val in node.value.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + + try: + if self.expression.evaluate(context): + match = node.__class__( + filter_context=node.filter_context(), + obj=key, + parent=node, + parts=node.parts + + (f"{self.env.keys_selector_token}{key}",), + path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]", + root=node.root, ) - match.add_child(_match) - yield _match - - elif isinstance(match.obj, Sequence) and not isinstance(match.obj, str): - for i, obj in enumerate(match.obj): - context = FilterContext( - env=self.env, - current=obj, - root=match.root, - extra_context=match.filter_context(), - current_key=i, - ) - - try: - result = await expr.evaluate_async(context) - except JSONPathTypeError as err: - if not err.token: - err.token = self.token - raise - if result: - _match = self.env.match_class( - filter_context=match.filter_context(), - obj=obj, - parent=match, - parts=match.parts + (i,), - path=f"{match.path}[{i}]", - root=match.root, + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise + + async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]: + if isinstance(node.value, Mapping): + for key, val in node.value.items(): + context = FilterContext( + env=self.env, + current=val, + root=node.root, + extra_context=node.filter_context(), + current_key=key, + ) + + try: + if await self.expression.evaluate_async(context): + match = node.__class__( + filter_context=node.filter_context(), + obj=key, + parent=node, + parts=node.parts + + (f"{self.env.keys_selector_token}{key}",), + path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]", + root=node.root, ) - match.add_child(_match) - yield _match + node.add_child(match) + yield match + except JSONPathTypeError as err: + if not err.token: + err.token = self.token + raise class FilterContext: diff --git a/jsonpath/stream.py b/jsonpath/stream.py index 4a38afb..a703d9c 100644 --- a/jsonpath/stream.py +++ b/jsonpath/stream.py @@ -1,99 +1,97 @@ -# noqa: D100 +"""Step through a stream of tokens.""" + from __future__ import annotations -from collections import deque -from typing import Deque -from typing import Iterator +from typing import Iterable from .exceptions import JSONPathSyntaxError from .token import TOKEN_EOF +from .token import TOKEN_WHITESPACE from .token import Token -# ruff: noqa: D102 - class TokenStream: - """Step through or iterate a stream of tokens.""" - - def __init__(self, token_iter: Iterator[Token]): - self.iter = token_iter - self._pushed: Deque[Token] = deque() - self.current = Token("", "", -1, "") - next(self) - - class TokenStreamIterator: - """An iterable token stream.""" - - def __init__(self, stream: TokenStream): - self.stream = stream - - def __iter__(self) -> Iterator[Token]: - return self - - def __next__(self) -> Token: - tok = self.stream.current - if tok.kind is TOKEN_EOF: - self.stream.close() - raise StopIteration - next(self.stream) - return tok - - def __iter__(self) -> Iterator[Token]: - return self.TokenStreamIterator(self) - - def __next__(self) -> Token: - tok = self.current - if self._pushed: - self.current = self._pushed.popleft() - elif self.current.kind is not TOKEN_EOF: - try: - self.current = next(self.iter) - except StopIteration: - self.close() - return tok + """Step through a stream of tokens.""" + + def __init__(self, token_iter: Iterable[Token]): + self.tokens = list(token_iter) + self.pos = 0 + path = self.tokens[0].path if self.tokens else "" + self.eof = Token(TOKEN_EOF, "", -1, path) def __str__(self) -> str: # pragma: no cover return f"current: {self.current}\nnext: {self.peek}" - def next_token(self) -> Token: - """Return the next token from the stream.""" - return next(self) - - @property - def peek(self) -> Token: - """Look at the next token.""" - current = next(self) - result = self.current - self.push(current) - return result - - def push(self, tok: Token) -> None: - """Push a token back to the stream.""" - self._pushed.append(self.current) - self.current = tok - - def close(self) -> None: - """Close the stream.""" - self.current = Token(TOKEN_EOF, "", -1, "") + def current(self) -> Token: + """Return the token at the current position in the stream.""" + try: + return self.tokens[self.pos] + except IndexError: + return self.eof + + def next(self) -> Token: + """Return the token at the current position and advance the pointer.""" + try: + token = self.tokens[self.pos] + self.pos += 1 + return token + except IndexError: + return self.eof + + def peek(self, offset: int = 1) -> Token: + """Return the token at current position plus the offset. + + Does not advance the pointer. + """ + try: + return self.tokens[self.pos + offset] + except IndexError: + return self.eof + + def eat(self, kind: str, message: str | None = None) -> Token: + """Assert tge type if the current token and advance the pointer.""" + token = self.next() + if token.kind != kind: + raise JSONPathSyntaxError( + message or f"expected {kind}, found {token.kind!r}", + token=token, + ) + return token def expect(self, *typ: str) -> None: - if self.current.kind not in typ: + """Raise an exception of the current token is not in `typ`.""" + token = self.current() + if token.kind not in typ: if len(typ) == 1: _typ = repr(typ[0]) else: _typ = f"one of {typ!r}" raise JSONPathSyntaxError( - f"expected {_typ}, found {self.current.kind!r}", - token=self.current, + f"expected {_typ}, found {token.kind!r}", + token=token, ) def expect_peek(self, *typ: str) -> None: - if self.peek.kind not in typ: + """Raise an exception of the current token is not in `typ`.""" + token = self.peek() + if token.kind not in typ: if len(typ) == 1: _typ = repr(typ[0]) else: _typ = f"one of {typ!r}" raise JSONPathSyntaxError( - f"expected {_typ}, found {self.peek.kind!r}", - token=self.peek, + f"expected {_typ}, found {token.kind!r}", + token=token, ) + + def expect_peek_not(self, typ: str, message: str) -> None: + """Raise an exception if the next token kind of _typ_.""" + if self.peek().kind == typ: + raise JSONPathSyntaxError(message, token=self.peek()) + + def skip_whitespace(self) -> bool: + """Skip whitespace.""" + if self.current().kind == TOKEN_WHITESPACE: + self.pos += 1 + return True + return False diff --git a/jsonpath/token.py b/jsonpath/token.py index a2392e3..e9d39e0 100644 --- a/jsonpath/token.py +++ b/jsonpath/token.py @@ -1,74 +1,72 @@ """JSONPath tokens.""" + import sys from typing import Tuple # Utility tokens -TOKEN_EOF = sys.intern("EOF") -TOKEN_ILLEGAL = sys.intern("ILLEGAL") -TOKEN_SKIP = sys.intern("SKIP") +TOKEN_EOF = sys.intern("TOKEN_EOF") +TOKEN_WHITESPACE = sys.intern("TOKEN_WHITESPACE") +TOKEN_ERROR = sys.intern("TOKEN_ERROR") # JSONPath expression tokens -TOKEN_COLON = sys.intern("COLON") -TOKEN_COMMA = sys.intern("COMMA") -TOKEN_DDOT = sys.intern("DDOT") -TOKEN_DOT = sys.intern("DOT") -TOKEN_DOT_INDEX = sys.intern("DINDEX") -TOKEN_DOT_PROPERTY = sys.intern("DOT_PROPERTY") -TOKEN_FILTER = sys.intern("FILTER") -TOKEN_FAKE_ROOT = sys.intern("FAKE_ROOT") -TOKEN_KEY = sys.intern("KEY") -TOKEN_KEYS = sys.intern("KEYS") -TOKEN_RBRACKET = sys.intern("RBRACKET") -TOKEN_BARE_PROPERTY = sys.intern("BARE_PROPERTY") -TOKEN_LIST_SLICE = sys.intern("LSLICE") -TOKEN_LIST_START = sys.intern("LBRACKET") -TOKEN_PROPERTY = sys.intern("PROP") -TOKEN_ROOT = sys.intern("ROOT") -TOKEN_SLICE_START = sys.intern("SLICE_START") -TOKEN_SLICE_STEP = sys.intern("SLICE_STEP") -TOKEN_SLICE_STOP = sys.intern("SLICE_STOP") -TOKEN_WILD = sys.intern("WILD") +TOKEN_COLON = sys.intern("TOKEN_COLON") +TOKEN_COMMA = sys.intern("TOKEN_COMMA") +TOKEN_DDOT = sys.intern("TOKEN_DDOT") +TOKEN_DOT = sys.intern("TOKEN_DOT") +TOKEN_FILTER = sys.intern("TOKEN_FILTER") +TOKEN_KEY = sys.intern("TOKEN_KEY") +TOKEN_KEYS = sys.intern("TOKEN_KEYS") +TOKEN_KEYS_FILTER = sys.intern("TOKEN_KEYS_FILTER") +TOKEN_LBRACKET = sys.intern("TOKEN_LBRACKET") +TOKEN_PSEUDO_ROOT = sys.intern("TOKEN_PSEUDO_ROOT") +TOKEN_RBRACKET = sys.intern("TOKEN_RBRACKET") +TOKEN_ROOT = sys.intern("TOKEN_ROOT") +TOKEN_WILD = sys.intern("TOKEN_WILD") +TOKEN_NAME = sys.intern("TOKEN_NAME") +TOKEN_DOT_PROPERTY = sys.intern("TOKEN_DOT_PROPERTY") +TOKEN_DOT_KEY_PROPERTY = sys.intern("TOKEN_DOT_KEY_PROPERTY") +TOKEN_KEY_NAME = sys.intern("TOKEN_KEY_NAME") # Filter expression tokens -TOKEN_AND = sys.intern("AND") -TOKEN_BLANK = sys.intern("BLANK") -TOKEN_CONTAINS = sys.intern("CONTAINS") -TOKEN_FILTER_CONTEXT = sys.intern("FILTER_CONTEXT") -TOKEN_FUNCTION = sys.intern("FUNCTION") -TOKEN_EMPTY = sys.intern("EMPTY") -TOKEN_EQ = sys.intern("EQ") -TOKEN_FALSE = sys.intern("FALSE") -TOKEN_FLOAT = sys.intern("FLOAT") -TOKEN_GE = sys.intern("GE") -TOKEN_GT = sys.intern("GT") -TOKEN_IN = sys.intern("IN") -TOKEN_INT = sys.intern("INT") -TOKEN_LE = sys.intern("LE") -TOKEN_LG = sys.intern("LG") -TOKEN_LPAREN = sys.intern("LPAREN") -TOKEN_LT = sys.intern("LT") -TOKEN_NE = sys.intern("NE") -TOKEN_NIL = sys.intern("NIL") -TOKEN_NONE = sys.intern("NONE") -TOKEN_NOT = sys.intern("NOT") -TOKEN_NULL = sys.intern("NULL") -TOKEN_OP = sys.intern("OP") -TOKEN_OR = sys.intern("OR") -TOKEN_RE = sys.intern("RE") -TOKEN_RE_FLAGS = sys.intern("RE_FLAGS") -TOKEN_RE_PATTERN = sys.intern("RE_PATTERN") -TOKEN_RPAREN = sys.intern("RPAREN") -TOKEN_SELF = sys.intern("SELF") -TOKEN_STRING = sys.intern("STRING") -TOKEN_DOUBLE_QUOTE_STRING = sys.intern("DOUBLE_QUOTE_STRING") -TOKEN_SINGLE_QUOTE_STRING = sys.intern("SINGLE_QUOTE_STRING") -TOKEN_TRUE = sys.intern("TRUE") -TOKEN_UNDEFINED = sys.intern("UNDEFINED") -TOKEN_MISSING = sys.intern("MISSING") +TOKEN_AND = sys.intern("TOKEN_AND") +TOKEN_BLANK = sys.intern("TOKEN_BLANK") +TOKEN_CONTAINS = sys.intern("TOKEN_CONTAINS") +TOKEN_DOUBLE_QUOTE_STRING = sys.intern("TOKEN_DOUBLE_QUOTE_STRING") +TOKEN_EMPTY = sys.intern("TOKEN_EMPTY") +TOKEN_EQ = sys.intern("TOKEN_EQ") +TOKEN_FALSE = sys.intern("TOKEN_FALSE") +TOKEN_FILTER_CONTEXT = sys.intern("TOKEN_FILTER_CONTEXT") +TOKEN_FLOAT = sys.intern("TOKEN_FLOAT") +TOKEN_FUNCTION = sys.intern("TOKEN_FUNCTION") +TOKEN_GE = sys.intern("TOKEN_GE") +TOKEN_GT = sys.intern("TOKEN_GT") +TOKEN_IN = sys.intern("TOKEN_IN") +TOKEN_INT = sys.intern("TOKEN_INT") +TOKEN_LE = sys.intern("TOKEN_LE") +TOKEN_LG = sys.intern("TOKEN_LG") +TOKEN_LPAREN = sys.intern("TOKEN_LPAREN") +TOKEN_LT = sys.intern("TOKEN_LT") +TOKEN_MISSING = sys.intern("TOKEN_MISSING") +TOKEN_NE = sys.intern("TOKEN_NE") +TOKEN_NIL = sys.intern("TOKEN_NIL") +TOKEN_NONE = sys.intern("TOKEN_NONE") +TOKEN_NOT = sys.intern("TOKEN_NOT") +TOKEN_NULL = sys.intern("TOKEN_NULL") +TOKEN_OP = sys.intern("TOKEN_OP") +TOKEN_OR = sys.intern("TOKEN_OR") +TOKEN_RE = sys.intern("TOKEN_RE") +TOKEN_RE_FLAGS = sys.intern("TOKEN_RE_FLAGS") +TOKEN_RE_PATTERN = sys.intern("TOKEN_RE_PATTERN") +TOKEN_RPAREN = sys.intern("TOKEN_RPAREN") +TOKEN_SELF = sys.intern("TOKEN_SELF") +TOKEN_SINGLE_QUOTE_STRING = sys.intern("TOKEN_SINGLE_QUOTE_STRING") +TOKEN_STRING = sys.intern("TOKEN_STRING") +TOKEN_TRUE = sys.intern("TOKEN_TRUE") +TOKEN_UNDEFINED = sys.intern("TOKEN_UNDEFINED") # Extension tokens -TOKEN_UNION = sys.intern("UNION") -TOKEN_INTERSECTION = sys.intern("INTERSECT") +TOKEN_INTERSECTION = sys.intern("TOKEN_INTERSECTION") +TOKEN_UNION = sys.intern("TOKEN_UNION") class Token: @@ -99,7 +97,7 @@ def __init__( def __repr__(self) -> str: # pragma: no cover return ( - f"Token(kind={self.kind!r}, value={self.value!r}, " + f"Token(kind={self.kind}, value={self.value!r}, " f"index={self.index}, path={self.path!r})" ) diff --git a/jsonpath/unescape.py b/jsonpath/unescape.py new file mode 100644 index 0000000..584e3c2 --- /dev/null +++ b/jsonpath/unescape.py @@ -0,0 +1,134 @@ +r"""Replace `\uXXXX` escape sequences with Unicode code points.""" + +from typing import List +from typing import Tuple + +from .exceptions import JSONPathSyntaxError +from .token import Token + + +def unescape_string(value: str, token: Token, quote: str) -> str: + """Return `value` with escape sequences replaced with Unicode code points.""" + unescaped: List[str] = [] + index = 0 + + while index < len(value): + ch = value[index] + if ch == "\\": + index += 1 + _ch, index = _decode_escape_sequence(value, index, token, quote) + unescaped.append(_ch) + else: + _string_from_codepoint(ord(ch), token) + unescaped.append(ch) + index += 1 + return "".join(unescaped) + + +def _decode_escape_sequence( # noqa: PLR0911 + value: str, index: int, token: Token, quote: str +) -> Tuple[str, int]: + try: + ch = value[index] + except IndexError as err: + raise JSONPathSyntaxError("incomplete escape sequence", token=token) from err + + if ch == quote: + return quote, index + if ch == "\\": + return "\\", index + if ch == "/": + return "/", index + if ch == "b": + return "\x08", index + if ch == "f": + return "\x0c", index + if ch == "n": + return "\n", index + if ch == "r": + return "\r", index + if ch == "t": + return "\t", index + if ch == "u": + codepoint, index = _decode_hex_char(value, index, token) + return _string_from_codepoint(codepoint, token), index + + raise JSONPathSyntaxError( + f"unknown escape sequence at index {token.index + index - 1}", + token=token, + ) + + +def _decode_hex_char(value: str, index: int, token: Token) -> Tuple[int, int]: + length = len(value) + + if index + 4 >= length: + raise JSONPathSyntaxError( + f"incomplete escape sequence at index {token.index + index - 1}", + token=token, + ) + + index += 1 # move past 'u' + codepoint = _parse_hex_digits(value[index : index + 4], token) + + if _is_low_surrogate(codepoint): + raise JSONPathSyntaxError( + f"unexpected low surrogate at index {token.index + index - 1}", + token=token, + ) + + if _is_high_surrogate(codepoint): + # expect a surrogate pair + if not ( + index + 9 < length and value[index + 4] == "\\" and value[index + 5] == "u" + ): + raise JSONPathSyntaxError( + f"incomplete escape sequence at index {token.index + index - 2}", + token=token, + ) + + low_surrogate = _parse_hex_digits(value[index + 6 : index + 10], token) + + if not _is_low_surrogate(low_surrogate): + raise JSONPathSyntaxError( + f"unexpected codepoint at index {token.index + index + 4}", + token=token, + ) + + codepoint = 0x10000 + (((codepoint & 0x03FF) << 10) | (low_surrogate & 0x03FF)) + + return (codepoint, index + 9) + + return (codepoint, index + 3) + + +def _parse_hex_digits(digits: str, token: Token) -> int: + codepoint = 0 + for digit in digits.encode(): + codepoint <<= 4 + if digit >= 48 and digit <= 57: + codepoint |= digit - 48 + elif digit >= 65 and digit <= 70: + codepoint |= digit - 65 + 10 + elif digit >= 97 and digit <= 102: + codepoint |= digit - 97 + 10 + else: + raise JSONPathSyntaxError( + "invalid \\uXXXX escape sequence", + token=token, + ) + return codepoint + + +def _string_from_codepoint(codepoint: int, token: Token) -> str: + if codepoint <= 0x1F: + raise JSONPathSyntaxError("invalid character", token=token) + return chr(codepoint) + + +def _is_high_surrogate(codepoint: int) -> bool: + return codepoint >= 0xD800 and codepoint <= 0xDBFF + + +def _is_low_surrogate(codepoint: int) -> bool: + return codepoint >= 0xDC00 and codepoint <= 0xDFFF diff --git a/mkdocs.yml b/mkdocs.yml index 8183760..6489f74 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -7,12 +7,14 @@ theme: palette: - scheme: "default" media: "(prefers-color-scheme: light)" + primary: "blue" toggle: icon: "material/weather-sunny" name: "Switch to dark mode" - scheme: "slate" media: "(prefers-color-scheme: dark)" primary: "blue" + accent: blue toggle: icon: "material/weather-night" name: "Switch to light mode" @@ -51,6 +53,7 @@ nav: - JSON Pointers: "pointers.md" - Async Support: "async.md" - API Reference: + - Package Level Functions: "convenience.md" - High Level API: "api.md" - Low Level API: "custom_api.md" - Exceptions: "exceptions.md" diff --git a/pyproject.toml b/pyproject.toml index a801434..ca89351 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,11 +48,12 @@ include = ["/jsonpath"] dependencies = [ "pytest", "pytest-cov", - "black", "mypy", - "ipython", + "regex", + "iregexp-check", "pyyaml", "types-pyyaml", + "types-regex", "twine", "ruff", ] @@ -78,6 +79,9 @@ dependencies = ["black", "mkdocs", "mkdocstrings[python]", "mkdocs-material"] build = "mkdocs build --clean --strict" serve = "mkdocs serve --dev-addr localhost:8000" +[tool.hatch.envs.no-regex] +dependencies = ["pytest"] + [tool.coverage.run] branch = true parallel = true @@ -87,7 +91,8 @@ omit = ["jsonpath/__about__.py", "tests/compliance.py", "tests/consensus.py"] exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] [tool.mypy] -files = "jsonpath" +files = ["jsonpath", "tests"] +exclude = ["tests/nts", "tests/cts"] python_version = "3.11" disallow_subclassing_any = true disallow_untyped_calls = true @@ -185,4 +190,7 @@ convention = "google" [tool.ruff.lint.per-file-ignores] "jsonpath/__about__.py" = ["D100"] "jsonpath/__init__.py" = ["D104"] +"jsonpath/selectors.py" = ["D102"] +"jsonpath/filter.py" = ["D102", "PLW1641"] +"jsonpath/unescape.py" = ["PLR2004"] "tests/*" = ["D100", "D101", "D104", "D103"] diff --git a/tests/_cts_case.py b/tests/_cts_case.py new file mode 100644 index 0000000..9c652b3 --- /dev/null +++ b/tests/_cts_case.py @@ -0,0 +1,60 @@ +"""A dataclass for a test case suitable for the CTS JSON schema.""" + +from dataclasses import dataclass +from dataclasses import field +from typing import Any +from typing import Dict +from typing import List +from typing import Mapping +from typing import Optional +from typing import Sequence +from typing import Union + +from jsonpath import NodeList + + +@dataclass +class Case: + name: str + selector: str + document: Union[Mapping[str, Any], Sequence[Any], None] = None + result: Any = None + results: Optional[List[Any]] = None + result_paths: Optional[List[str]] = None + results_paths: Optional[List[List[str]]] = None + invalid_selector: Optional[bool] = None + tags: List[str] = field(default_factory=list) + + def as_dict(self) -> Dict[str, Any]: + rv: Dict[str, Any] = { + "name": self.name, + "selector": self.selector, + } + + if self.document is not None: + rv["document"] = self.document + + if self.result is not None: + rv["result"] = self.result + rv["result_paths"] = self.result_paths + else: + rv["results"] = self.results + rv["results_paths"] = self.results_paths + else: + assert self.invalid_selector + rv["invalid_selector"] = True + + rv["tags"] = self.tags + + return rv + + def assert_nodes(self, nodes: NodeList) -> None: + """Assert that `nodes` matches this test case.""" + if self.results is not None: + assert self.results_paths is not None + assert nodes.values() in self.results + assert nodes.paths() in self.results_paths + else: + assert self.result_paths is not None + assert nodes.values() == self.result + assert nodes.paths() == self.result_paths diff --git a/tests/consensus.py b/tests/consensus.py index a905500..ad0dd7d 100644 --- a/tests/consensus.py +++ b/tests/consensus.py @@ -8,6 +8,7 @@ We've deliberately named this file so as to exclude it when running `pytest` or `hatch run test`. Target it specifically using `pytest tests/consensus.py`. """ + import operator import unittest from dataclasses import dataclass @@ -44,7 +45,7 @@ class Query: } SKIP = { - "bracket_notation_with_number_on_object": "We support unquoted property names", + # "bracket_notation_with_number_on_object": "We support unquoted property names", "dot_notation_with_number_-1": "conflict with compliance", "dot_notation_with_number_on_object": "conflict with compliance", } diff --git a/tests/current_key_identifier.json b/tests/current_key_identifier.json new file mode 100644 index 0000000..955e4cd --- /dev/null +++ b/tests/current_key_identifier.json @@ -0,0 +1,61 @@ +{ + "tests": [ + { + "name": "current key of an object", + "selector": "$.some[?match(#, '^b.*')]", + "document": { + "some": { "foo": "a", "bar": "b", "baz": "c", "qux": "d" } + }, + "result": ["b", "c"], + "result_paths": ["$['some']['bar']", "$['some']['baz']"] + }, + { + "name": "current key of an array", + "selector": "$.some[?# > 1]", + "document": { "some": ["other", "thing", "foo", "bar"] }, + "result": ["foo", "bar"], + "result_paths": ["$['some'][2]", "$['some'][3]"] + }, + { + "name": "current key of a string selects nothing", + "selector": "$.some[?# > 1]", + "document": { "some": "thing" }, + "result": [], + "result_paths": [] + }, + { + "name": "current key of an object", + "selector": "$.some[?match(#, '^b.*')]", + "document": { + "some": { "foo": "a", "bar": "b", "baz": "c", "qux": "d" } + }, + "result": ["b", "c"], + "result_paths": ["$['some']['bar']", "$['some']['baz']"], + "tags": ["extra"] + }, + { + "name": "current key of an array", + "selector": "$.some[?# > 1]", + "document": { "some": ["other", "thing", "foo", "bar"] }, + "result": ["foo", "bar"], + "result_paths": ["$['some'][2]", "$['some'][3]"], + "tags": ["extra"] + }, + { + "name": "current key identifier, match on object names", + "selector": "$[?match(#, '^ab.*') && length(@) > 0 ]", + "document": { "abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": [] }, + "result": [[1, 2, 3], [6]], + "result_paths": ["$['abc']", "$['abx']"], + "tags": ["extra"] + }, + { + "name": "current key identifier, compare current array index", + "selector": "$.abc[?(# >= 1)]", + "document": { "abc": [1, 2, 3], "def": [4, 5], "abx": [6], "aby": [] }, + "result": [2, 3], + "result_paths": ["$['abc'][1]", "$['abc'][2]"], + "tags": ["extra"] + } + ] +} diff --git a/tests/key_selector.json b/tests/key_selector.json new file mode 100644 index 0000000..77ef007 --- /dev/null +++ b/tests/key_selector.json @@ -0,0 +1,98 @@ +{ + "tests": [ + { + "name": "singular key from an object", + "selector": "$.some[~'other']", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": ["other"], + "result_paths": ["$['some'][~'other']"], + "tags": ["extra"] + }, + { + "name": "singular key from an object, does not exist", + "selector": "$.some[~'else']", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "singular key from an array", + "selector": "$.some[~'1']", + "document": { "some": ["foo", "bar"] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "singular key from an object, shorthand", + "selector": "$.some.~other", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": ["other"], + "result_paths": ["$['some'][~'other']"], + "tags": ["extra"] + }, + { + "name": "recursive key from an object", + "selector": "$.some..[~'other']", + "document": { + "some": { "other": "foo", "thing": "bar", "else": { "other": "baz" } } + }, + "result": ["other", "other"], + "result_paths": ["$['some'][~'other']", "$['some']['else'][~'other']"], + "tags": ["extra"] + }, + { + "name": "recursive key from an object, shorthand", + "selector": "$.some..~other", + "document": { + "some": { "other": "foo", "thing": "bar", "else": { "other": "baz" } } + }, + "result": ["other", "other"], + "result_paths": ["$['some'][~'other']", "$['some']['else'][~'other']"], + "tags": ["extra"] + }, + { + "name": "recursive key from an object, does not exist", + "selector": "$.some..[~'nosuchthing']", + "document": { + "some": { "other": "foo", "thing": "bar", "else": { "other": "baz" } } + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "key of nested object", + "selector": "$.a[0].~c", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": ["c"], + "result_paths": ["$['a'][0][~'c']"], + "tags": ["extra"] + }, + { + "name": "key does not exist", + "selector": "$.a[1].~c", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "descendant, single quoted key", + "selector": "$..[~'b']", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": ["b", "b"], + "result_paths": ["$['a'][0][~'b']", "$['a'][1][~'b']"], + "tags": ["extra"] + }, + { + "name": "descendant, double quoted key", + "selector": "$..[~\"b\"]", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": ["b", "b"], + "result_paths": ["$['a'][0][~'b']", "$['a'][1][~'b']"], + "tags": ["extra"] + } + ] +} diff --git a/tests/keys_filter_selector.json b/tests/keys_filter_selector.json new file mode 100644 index 0000000..3132b22 --- /dev/null +++ b/tests/keys_filter_selector.json @@ -0,0 +1,48 @@ +{ + "tests": [ + { + "name": "filter keys from an object", + "selector": "$.some[~?match(@, '^b.*')]", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": ["thing"], + "result_paths": ["$['some'][~'thing']"], + "tags": ["extra"] + }, + { + "name": "keys filter selector, conditionally select object keys", + "selector": "$.*[~?length(@) > 2]", + "document": [ + { "a": [1, 2, 3], "b": [4, 5] }, + { "c": { "x": [1, 2] } }, + { "d": [1, 2, 3] } + ], + "result": ["a", "d"], + "result_paths": ["$[0][~'a']", "$[2][~'d']"], + "tags": ["extra"] + }, + { + "name": "keys filter selector, existence test", + "selector": "$.*[~?@.x]", + "document": [ + { "a": [1, 2, 3], "b": [4, 5] }, + { "c": { "x": [1, 2] } }, + { "d": [1, 2, 3] } + ], + "result": ["c"], + "result_paths": ["$[1][~'c']"], + "tags": ["extra"] + }, + { + "name": "keys filter selector, keys from an array", + "selector": "$[~?(true == true)]", + "document": [ + { "a": [1, 2, 3], "b": [4, 5] }, + { "c": { "x": [1, 2] } }, + { "d": [1, 2, 3] } + ], + "result": [], + "result_paths": [], + "tags": ["extra"] + } + ] +} diff --git a/tests/keys_selector.json b/tests/keys_selector.json new file mode 100644 index 0000000..25227d5 --- /dev/null +++ b/tests/keys_selector.json @@ -0,0 +1,78 @@ +{ + "tests": [ + { + "name": "keys from an object", + "selector": "$.some[~]", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": ["other", "thing"], + "result_paths": ["$['some'][~'other']", "$['some'][~'thing']"], + "tags": ["extra"] + }, + { + "name": "shorthand keys from an object", + "selector": "$.some.~", + "document": { "some": { "other": "foo", "thing": "bar" } }, + "result": ["other", "thing"], + "result_paths": ["$['some'][~'other']", "$['some'][~'thing']"], + "tags": ["extra"] + }, + { + "name": "keys from an array", + "selector": "$.some[~]", + "document": { "some": ["other", "thing"] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "shorthand keys from an array", + "selector": "$.some.~", + "document": { "some": ["other", "thing"] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "recurse object keys", + "selector": "$..~", + "document": { "some": { "thing": "else", "foo": { "bar": "baz" } } }, + "result": ["some", "thing", "foo", "bar"], + "result_paths": [ + "$[~'some']", + "$['some'][~'thing']", + "$['some'][~'foo']", + "$['some']['foo'][~'bar']" + ], + "tags": ["extra"] + }, + { + "name": "object key", + "selector": "$.a[0].~", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": ["b", "c"], + "result_paths": ["$['a'][0][~'b']", "$['a'][0][~'c']"], + "tags": ["extra"] + }, + { + "name": "array key", + "selector": "$.a.~", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "descendant keys", + "selector": "$..[~]", + "document": { "a": [{ "b": "x", "c": "z" }, { "b": "y" }] }, + "result": ["a", "b", "c", "b"], + "result_paths": [ + "$[~'a']", + "$['a'][0][~'b']", + "$['a'][0][~'c']", + "$['a'][1][~'b']" + ], + "tags": ["extra"] + } + ] +} diff --git a/tests/membership_operators.json b/tests/membership_operators.json new file mode 100644 index 0000000..15242bd --- /dev/null +++ b/tests/membership_operators.json @@ -0,0 +1,88 @@ +{ + "tests": [ + { + "name": "array contains literal string", + "selector": "$[?@.a contains 'foo']", + "document": [{ "a": ["foo", "bar"] }, { "a": ["bar"] }], + "result": [ + { + "a": ["foo", "bar"] + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + }, + { + "name": "object contains literal string", + "selector": "$[?@.a contains 'foo']", + "document": [{ "a": { "foo": "bar" } }, { "a": { "bar": "baz" } }], + "result": [ + { + "a": { "foo": "bar" } + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + }, + { + "name": "string literal in array", + "selector": "$[?'foo' in @.a]", + "document": [{ "a": ["foo", "bar"] }, { "a": ["bar"] }], + "result": [ + { + "a": ["foo", "bar"] + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + }, + { + "name": "string literal in object", + "selector": "$[?'foo' in @.a]", + "document": [{ "a": { "foo": "bar" } }, { "a": { "bar": "baz" } }], + "result": [ + { + "a": { "foo": "bar" } + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + }, + { + "name": "string from embedded query in object", + "selector": "$[?$[-1] in @.a]", + "document": [{ "a": { "foo": "bar" } }, { "a": { "bar": "baz" } }, "foo"], + "result": [ + { + "a": { "foo": "bar" } + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + }, + { + "name": "embedded query in list literal", + "selector": "$[?(@.a in ['bar', 'baz'])]", + "document": [{ "a": "foo" }, { "a": "bar" }], + "result": [ + { + "a": "bar" + } + ], + "result_paths": ["$[1]"], + "tags": ["extra"] + }, + { + "name": "list literal contains embedded query", + "selector": "$[?(['bar', 'baz'] contains @.a)]", + "document": [{ "a": "foo" }, { "a": "bar" }], + "result": [ + { + "a": "bar" + } + ], + "result_paths": ["$[1]"], + "tags": ["extra"] + } + ] +} diff --git a/tests/pseudo_root_identifier.json b/tests/pseudo_root_identifier.json new file mode 100644 index 0000000..93dd194 --- /dev/null +++ b/tests/pseudo_root_identifier.json @@ -0,0 +1,28 @@ +{ + "tests": [ + { + "name": "conditionally select root value", + "selector": "^[?@.some.thing > 7]", + "document": { "some": { "thing": 42 } }, + "result": [{ "some": { "thing": 42 } }], + "result_paths": ["^[0]"], + "tags": ["extra"] + }, + { + "name": "embedded pseudo root query", + "selector": "^[?@.some.thing > value(^.*.num)]", + "document": { "some": { "thing": 42 }, "num": 7 }, + "result": [{ "some": { "thing": 42 }, "num": 7 }], + "result_paths": ["^[0]"], + "tags": ["extra"] + }, + { + "name": "embedded root query", + "selector": "^[?@.some.thing > value($.num)]", + "document": { "some": { "thing": 42 }, "num": 7 }, + "result": [{ "some": { "thing": 42 }, "num": 7 }], + "result_paths": ["^[0]"], + "tags": ["extra"] + } + ] +} diff --git a/tests/query_intersection.json b/tests/query_intersection.json new file mode 100644 index 0000000..465bedd --- /dev/null +++ b/tests/query_intersection.json @@ -0,0 +1,28 @@ +{ + "tests": [ + { + "name": "intersection of two paths, no common items", + "selector": "$.some & $.thing", + "document": { + "some": [1, 2, 3], + "thing": [4, 5, 6], + "other": ["a", "b", "c"] + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "intersection of two paths, with common items", + "selector": "$.some & $.thing", + "document": { + "some": [1, 2, 3], + "thing": [1, 2, 3], + "other": ["a", "b", "c"] + }, + "result": [[1, 2, 3]], + "result_paths": ["$['some']"], + "tags": ["extra"] + } + ] +} diff --git a/tests/query_union.json b/tests/query_union.json new file mode 100644 index 0000000..408c8ad --- /dev/null +++ b/tests/query_union.json @@ -0,0 +1,35 @@ +{ + "tests": [ + { + "name": "union of two paths", + "selector": "$.some | $.thing", + "document": { + "some": [1, 2, 3], + "thing": [4, 5, 6], + "other": ["a", "b", "c"] + }, + "result": [ + [1, 2, 3], + [4, 5, 6] + ], + "result_paths": ["$['some']", "$['thing']"], + "tags": ["extra"] + }, + { + "name": "union of three paths", + "selector": "$.some | $.thing | $.other", + "document": { + "some": [1, 2, 3], + "thing": [4, 5, 6], + "other": ["a", "b", "c"] + }, + "result": [ + [1, 2, 3], + [4, 5, 6], + ["a", "b", "c"] + ], + "result_paths": ["$['some']", "$['thing']", "$['other']"], + "tags": ["extra"] + } + ] +} diff --git a/tests/regex_operator.json b/tests/regex_operator.json new file mode 100644 index 0000000..4816304 --- /dev/null +++ b/tests/regex_operator.json @@ -0,0 +1,36 @@ +{ + "tests": [ + { + "name": "regex literal, match", + "selector": "$.some[?(@.thing =~ /fo[a-z]/)]", + "document": { "some": [{ "thing": "foo" }] }, + "result": [{ "thing": "foo" }], + "result_paths": ["$['some'][0]"], + "tags": ["extra"] + }, + { + "name": "regex literal, no match", + "selector": "$.some[?(@.thing =~ /fo[a-z]/)]", + "document": { "some": [{ "thing": "foO" }] }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "regex literal, case insensitive match", + "selector": "$.some[?(@.thing =~ /fo[a-z]/i)]", + "document": { "some": [{ "thing": "foO" }] }, + "result": [{ "thing": "foO" }], + "result_paths": ["$['some'][0]"], + "tags": ["extra"] + }, + { + "name": "regex literal, escaped slash", + "selector": "$.some[?(@.thing =~ /fo\\\\[a-z]/)]", + "document": { "some": [{ "thing": "fo\\b" }] }, + "result": [{ "thing": "fo\\b" }], + "result_paths": ["$['some'][0]"], + "tags": ["extra"] + } + ] +} diff --git a/tests/singular_path_selector.json b/tests/singular_path_selector.json new file mode 100644 index 0000000..ba0393e --- /dev/null +++ b/tests/singular_path_selector.json @@ -0,0 +1,88 @@ +{ + "tests": [ + { + "name": "object name from embedded singular query", + "selector": "$.a[$.b[1]]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] } }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [{ "q": [4, 5, 6] }], + "result_paths": ["$['a']['p']"], + "tags": ["extra"] + }, + { + "name": "array index from embedded singular query", + "selector": "$.a.j[$['c d'].x.y]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] } }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [2], + "result_paths": ["$['a']['j'][1]"], + "tags": ["extra"] + }, + { + "name": "embedded singular query does not resolve to a string or int value", + "selector": "$.a[$.b]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] } }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "object name from embedded singular query resolving to nothing", + "selector": "$.a[$.foo]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] } }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "array index from embedded singular query resolving to nothing", + "selector": "$.b[$.foo]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] } }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "array index from embedded singular query is not an int", + "selector": "$.b[$.a.z]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] }, "z": "foo" }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": [], + "result_paths": [], + "tags": ["extra"] + }, + { + "name": "array index from embedded singular query is negative", + "selector": "$.b[$.a.z]", + "document": { + "a": { "j": [1, 2, 3], "p": { "q": [4, 5, 6] }, "z": -1 }, + "b": ["j", "p", "q"], + "c d": { "x": { "y": 1 } } + }, + "result": ["q"], + "result_paths": ["$['b'][2]"], + "tags": ["extra"] + } + ] +} diff --git a/tests/test_cli.py b/tests/test_cli.py index 16d7918..1b1f489 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,5 @@ """Test cases for the command line interface.""" + import argparse import json import pathlib @@ -291,6 +292,50 @@ def test_json_path( assert len(json.load(fd)) == 4 # noqa: PLR2004 +def test_json_path_strict( + parser: argparse.ArgumentParser, + sample_target: str, + outfile: str, +) -> None: + """Test a valid JSONPath.""" + args = parser.parse_args( + [ + "--debug", + "path", + "-q", + "price_cap", # No root identifier is an error in strict mode. + "-f", + sample_target, + "-o", + outfile, + "--strict", + ] + ) + + with pytest.raises(JSONPathSyntaxError): + handle_path_command(args) + + args = parser.parse_args( + [ + "path", + "-q", + "$.price_cap", # With a root identifier is OK. + "-f", + sample_target, + "-o", + outfile, + "--strict", + ] + ) + + handle_path_command(args) + args.output.flush() + + with open(outfile, "r") as fd: + rv = json.load(fd) + assert rv == [10] + + def test_pointer_command_invalid_target( parser: argparse.ArgumentParser, invalid_target: str, diff --git a/tests/test_compliance.py b/tests/test_compliance.py index 38592cb..daf4e71 100644 --- a/tests/test_compliance.py +++ b/tests/test_compliance.py @@ -7,144 +7,136 @@ import asyncio import json import operator -from dataclasses import dataclass -from dataclasses import field -from typing import Any from typing import List -from typing import Mapping -from typing import Optional -from typing import Sequence -from typing import Union import pytest -import jsonpath - - -@dataclass -class Case: - name: str - selector: str - document: Union[Mapping[str, Any], Sequence[Any], None] = None - result: Any = None - results: Optional[List[Any]] = None - result_paths: Optional[List[str]] = None - results_paths: Optional[List[List[str]]] = None - invalid_selector: Optional[bool] = None - tags: List[str] = field(default_factory=list) - - -SKIP = { - "basic, no leading whitespace": "flexible whitespace policy", - "basic, no trailing whitespace": "flexible whitespace policy", - "basic, bald descendant segment": "almost has a consensus", - "filter, index segment on object, selects nothing": "flexible selector policy", - "functions, match, dot matcher on \\u2028": "standard library re policy", - "functions, match, dot matcher on \\u2029": "standard library re policy", - "functions, search, dot matcher on \\u2028": "standard library re policy", - "functions, search, dot matcher on \\u2029": "standard library re policy", - "functions, match, filter, match function, unicode char class, uppercase": "\\p not supported", # noqa: E501 - "functions, match, filter, match function, unicode char class negated, uppercase": "\\P not supported", # noqa: E501 - "functions, search, filter, search function, unicode char class, uppercase": "\\p not supported", # noqa: E501 - "functions, search, filter, search function, unicode char class negated, uppercase": "\\P not supported", # noqa: E501 - "filter, equals number, decimal fraction, no fractional digit": "expected behavior policy", # noqa: E501 - "filter, equals number, decimal fraction, no int digit": "expected behavior policy", - "filter, equals number, invalid no int digit": "expected behavior policy", - "filter, equals number, invalid 00": "expected behavior policy", - "filter, equals number, invalid leading 0": "expected behavior policy", - "filter, equals number, invalid no fractional digit": "expected behavior policy", - "filter, equals number, invalid no fractional digit e": "expected behavior policy", - "slice selector, start, leading 0": "expected behavior policy", - "slice selector, start, -0": "expected behavior policy", - "slice selector, start, leading -0": "expected behavior policy", - "slice selector, end, leading 0": "expected behavior policy", - "slice selector, end, minus space": "expected behavior policy", - "slice selector, end, -0": "expected behavior policy", - "slice selector, end, leading -0": "expected behavior policy", - "slice selector, step, leading 0": "expected behavior policy", - "slice selector, step, minus space": "expected behavior policy", - "slice selector, step, -0": "expected behavior policy", - "slice selector, step, leading -0": "expected behavior policy", - "filter, true, incorrectly capitalized": "flexible literal policy", - "filter, false, incorrectly capitalized": "flexible literal policy", - "filter, null, incorrectly capitalized": "flexible literal policy", - "name selector, double quotes, single high surrogate": "expected behavior policy", - "name selector, double quotes, single low surrogate": "expected behavior policy", - "name selector, double quotes, high high surrogate": "expected behavior policy", - "name selector, double quotes, low low surrogate": "expected behavior policy", - "name selector, double quotes, surrogate non-surrogate": "expected behavior policy", - "name selector, double quotes, non-surrogate surrogate": "expected behavior policy", - "name selector, double quotes, surrogate supplementary": "expected behavior policy", - "name selector, double quotes, supplementary surrogate": "expected behavior policy", - "whitespace, selectors, space between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, newline between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, tab between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, return between dot and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, space between recursive descent and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, newline between recursive descent and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, tab between recursive descent and name": "flexible whitespace policy", # noqa: E501 - "whitespace, selectors, return between recursive descent and name": "flexible whitespace policy", # noqa: E501 +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathError +from jsonpath import NodeList + +from ._cts_case import Case + +# CTS tests that are expected to fail when JSONPathEnvironment.strict is False. +XFAIL_INVALID = { + "basic, no leading whitespace", + "basic, no trailing whitespace", + "filter, equals number, invalid 00", + "filter, equals number, invalid leading 0", + "filter, true, incorrectly capitalized", + "filter, false, incorrectly capitalized", + "filter, null, incorrectly capitalized", + "name selector, double quotes, single high surrogate", + "name selector, double quotes, single low surrogate", + "name selector, double quotes, high high surrogate", + "name selector, double quotes, low low surrogate", + "name selector, double quotes, surrogate non-surrogate", + "name selector, double quotes, non-surrogate surrogate", + "name selector, double quotes, surrogate supplementary", + "name selector, double quotes, supplementary surrogate", } +XFAIL_VALID = { + "filter, index segment on object, selects nothing", +} + +# CTS test that will only pass if the third party `regex` package is installed. +REGEX_ONLY = { + "functions, match, dot matcher on \\u2028", + "functions, match, dot matcher on \\u2029", + "functions, search, dot matcher on \\u2028", + "functions, search, dot matcher on \\u2029", + "functions, match, filter, match function, unicode char class, uppercase", + "functions, match, filter, match function, unicode char class negated, uppercase", + "functions, search, filter, search function, unicode char class, uppercase", + "functions, search, filter, search function, unicode char class negated, uppercase", +} -def cases() -> List[Case]: - with open("tests/cts/cts.json", encoding="utf8") as fd: - data = json.load(fd) - return [Case(**case) for case in data["tests"]] +with open("tests/cts/cts.json", encoding="utf8") as fd: + data = json.load(fd) + +CASES = [Case(**case) for case in data["tests"]] def valid_cases() -> List[Case]: - return [case for case in cases() if not case.invalid_selector] + return [case for case in CASES if not case.invalid_selector] def invalid_cases() -> List[Case]: - return [case for case in cases() if case.invalid_selector] + return [case for case in CASES if case.invalid_selector] + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=True) @pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) -def test_compliance(case: Case) -> None: - if case.name in SKIP: - pytest.skip(reason=SKIP[case.name]) +def test_compliance_strict(env: JSONPathEnvironment, case: Case) -> None: + if not env.regex_available and case.name in REGEX_ONLY: + pytest.skip(reason="requires regex package") assert case.document is not None - nodes = jsonpath.NodeList(jsonpath.finditer(case.selector, case.document)) - - if case.results is not None: - assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths - else: - assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) @pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) -def test_compliance_async(case: Case) -> None: - if case.name in SKIP: - pytest.skip(reason=SKIP[case.name]) +def test_compliance_async_strict(env: JSONPathEnvironment, case: Case) -> None: + if not env.regex_available and case.name in REGEX_ONLY: + pytest.skip(reason="requires regex package") - async def coro() -> jsonpath.NodeList: + async def coro() -> NodeList: assert case.document is not None - it = await jsonpath.finditer_async(case.selector, case.document) - return jsonpath.NodeList([node async for node in it]) + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", invalid_cases(), ids=operator.attrgetter("name")) +def test_invalid_selectors_strict(env: JSONPathEnvironment, case: Case) -> None: + with pytest.raises(JSONPathError): + env.compile(case.selector) + + +@pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) +def test_compliance_lax(case: Case) -> None: + env = JSONPathEnvironment(strict=False) + + if not env.regex_available and case.name in REGEX_ONLY: + pytest.skip(reason="requires regex package") + + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) if case.results is not None: assert case.results_paths is not None - assert nodes.values() in case.results - assert nodes.paths() in case.results_paths + + if case.name in XFAIL_VALID: + assert nodes.values() not in case.results + assert nodes.paths() in case.results_paths + else: + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths else: assert case.result_paths is not None - assert nodes.values() == case.result - assert nodes.paths() == case.result_paths + + if case.name in XFAIL_VALID: + assert nodes.values() != case.result + assert nodes.paths() != case.result_paths + else: + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths @pytest.mark.parametrize("case", invalid_cases(), ids=operator.attrgetter("name")) -def test_invalid_selectors(case: Case) -> None: - if case.name in SKIP: - pytest.skip(reason=SKIP[case.name]) +def test_invalid_selectors_lax(case: Case) -> None: + env = JSONPathEnvironment(strict=False) - with pytest.raises(jsonpath.JSONPathError): - jsonpath.compile(case.selector) + if case.name in XFAIL_INVALID: + env.compile(case.selector) + else: + with pytest.raises(JSONPathError): + env.compile(case.selector) diff --git a/tests/test_concrete_path.py b/tests/test_concrete_path.py deleted file mode 100644 index 3ab6de1..0000000 --- a/tests/test_concrete_path.py +++ /dev/null @@ -1,65 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment -from jsonpath import JSONPathMatch - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: List[str] - - -TEST_CASES = [ - Case( - description="normalized negative index", - path="$.a[-2]", - data={"a": [1, 2, 3, 4, 5]}, - want=["$['a'][3]"], - ), - Case( - description="normalized reverse slice", - path="$.a[3:0:-1]", - data={"a": [1, 2, 3, 4, 5]}, - want=["$['a'][3]", "$['a'][2]", "$['a'][1]"], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - matches = list(path.finditer(case.data)) - assert len(matches) == len(case.want) - for match, want in zip(matches, case.want): # noqa: B905 - assert match.path == want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[JSONPathMatch]: - matches = await path.finditer_async(case.data) - return [match async for match in matches] - - matches = asyncio.run(coro()) - assert len(matches) == len(case.want) - for match, want in zip(matches, case.want): # noqa: B905 - assert match.path == want diff --git a/tests/test_convenience_api.py b/tests/test_convenience_api.py new file mode 100644 index 0000000..06a4b69 --- /dev/null +++ b/tests/test_convenience_api.py @@ -0,0 +1,104 @@ +import asyncio +from typing import List + +import pytest + +import jsonpath + + +def test_convenience_compile() -> None: + # Implicit root identifier works by default, but not when strict=True. + path = jsonpath.compile("a.*") + assert isinstance(path, jsonpath.JSONPath) + assert path.findall({"a": [1, 2, 3]}) == [1, 2, 3] + + +def test_convenience_compile_strict() -> None: + with pytest.raises(jsonpath.JSONPathSyntaxError): + jsonpath.compile("a.*", strict=True) + + path = jsonpath.compile("$.a.*", strict=True) + assert isinstance(path, jsonpath.JSONPath) + assert path.findall({"a": [1, 2, 3]}) == [1, 2, 3] + + +def test_convenience_findall() -> None: + assert jsonpath.findall("a.*", {"a": [1, 2, 3]}) == [1, 2, 3] + + +def test_convenience_findall_strict() -> None: + with pytest.raises(jsonpath.JSONPathSyntaxError): + jsonpath.findall("a.*", {"a": [1, 2, 3]}, strict=True) + + assert jsonpath.findall("$.a.*", {"a": [1, 2, 3]}, strict=True) == [1, 2, 3] + + +def test_convenience_findall_async() -> None: + async def coro() -> List[object]: + return await jsonpath.findall_async("a.*", {"a": [1, 2, 3]}) + + assert asyncio.run(coro()) == [1, 2, 3] + + +def test_convenience_findall_async_strict() -> None: + async def coro() -> List[object]: + with pytest.raises(jsonpath.JSONPathSyntaxError): + await jsonpath.findall_async("a.*", {"a": [1, 2, 3]}, strict=True) + + return await jsonpath.findall_async("$.a.*", {"a": [1, 2, 3]}, strict=True) + + assert asyncio.run(coro()) == [1, 2, 3] + + +def test_convenience_finditer() -> None: + matches = list(jsonpath.finditer("a.*", {"a": [1, 2, 3]})) + assert [m.obj for m in matches] == [1, 2, 3] + + +def test_convenience_finditer_strict() -> None: + with pytest.raises(jsonpath.JSONPathSyntaxError): + list(jsonpath.finditer("a.*", {"a": [1, 2, 3]}, strict=True)) + + matches = list(jsonpath.finditer("$.a.*", {"a": [1, 2, 3]}, strict=True)) + assert [m.obj for m in matches] == [1, 2, 3] + + +def test_convenience_finditer_async_strict() -> None: + async def coro() -> List[object]: + with pytest.raises(jsonpath.JSONPathSyntaxError): + await jsonpath.finditer_async("a.*", {"a": [1, 2, 3]}, strict=True) + + it = await jsonpath.finditer_async("$.a.*", {"a": [1, 2, 3]}, strict=True) + return [m.obj async for m in it] + + assert asyncio.run(coro()) == [1, 2, 3] + + +def test_convenience_match() -> None: + match = jsonpath.match("a.*", {"a": [1, 2, 3]}) + assert isinstance(match, jsonpath.JSONPathMatch) + assert match.obj == 1 + + +def test_convenience_match_strict() -> None: + with pytest.raises(jsonpath.JSONPathSyntaxError): + jsonpath.match("a.*", {"a": [1, 2, 3]}, strict=True) + + match = jsonpath.match("$.a.*", {"a": [1, 2, 3]}) + assert isinstance(match, jsonpath.JSONPathMatch) + assert match.obj == 1 + + +def test_convenience_query() -> None: + query = jsonpath.query("a.*", {"a": [1, 2, 3]}) + assert isinstance(query, jsonpath.Query) + assert list(query.values()) == [1, 2, 3] + + +def test_convenience_query_strict() -> None: + with pytest.raises(jsonpath.JSONPathSyntaxError): + jsonpath.query("a.*", {"a": [1, 2, 3]}, strict=True) + + query = jsonpath.query("$.a.*", {"a": [1, 2, 3]}) + assert isinstance(query, jsonpath.Query) + assert list(query.values()) == [1, 2, 3] diff --git a/tests/test_current_key_identifier.py b/tests/test_current_key_identifier.py new file mode 100644 index 0000000..54a0131 --- /dev/null +++ b/tests/test_current_key_identifier.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/current_key_identifier.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_current_key_identifier(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_current_key_identifier_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_current_key_identifier_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_env.py b/tests/test_env.py index 5908baa..51b91d8 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,4 +1,5 @@ """JSONPathEnvironment API test cases.""" + import asyncio from typing import List @@ -178,7 +179,7 @@ def test_custom_fake_root_identifier_token() -> None: """Test that we can change the non-standard fake root identifier.""" class MyJSONPathEnvironment(JSONPathEnvironment): - fake_root_token = "$$" + pseudo_root_token = "$$" env = MyJSONPathEnvironment() data = {"foo": {"a": 1, "b": 2, "c": 3}} @@ -191,7 +192,7 @@ def test_disable_fake_root_identifier() -> None: """Test that we can disable the non-standard fake root identifier.""" class MyJSONPathEnvironment(JSONPathEnvironment): - fake_root_token = "" + pseudo_root_token = "" env = MyJSONPathEnvironment() with pytest.raises(JSONPathSyntaxError): diff --git a/tests/test_errors.py b/tests/test_errors.py index 1d1f46a..b39caab 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -1,10 +1,12 @@ from operator import attrgetter +from typing import Any from typing import List from typing import NamedTuple import pytest from jsonpath import JSONPathEnvironment +from jsonpath.exceptions import JSONPathRecursionError from jsonpath.exceptions import JSONPathSyntaxError from jsonpath.exceptions import JSONPathTypeError @@ -15,12 +17,12 @@ def env() -> JSONPathEnvironment: def test_unclosed_selection_list(env: JSONPathEnvironment) -> None: - with pytest.raises(JSONPathSyntaxError, match=r"unexpected end of selector list"): + with pytest.raises(JSONPathSyntaxError, match=r"unexpected end of segment"): env.compile("$[1,2") def test_function_missing_param(env: JSONPathEnvironment) -> None: - with pytest.raises(JSONPathTypeError): + with pytest.raises(JSONPathTypeError, match=r"length\(\) requires 1 argument"): env.compile("$[?(length()==1)]") @@ -39,6 +41,16 @@ def test_unbalanced_parens(env: JSONPathEnvironment) -> None: env.compile("$[?((@.foo)]") +def test_root_dot(env: JSONPathEnvironment) -> None: + with pytest.raises(JSONPathSyntaxError): + env.compile("$.") + + +def test_embedded_query_is_not_singular(env: JSONPathEnvironment) -> None: + with pytest.raises(JSONPathSyntaxError): + env.compile("$.a[$.*]") + + class FilterLiteralTestCase(NamedTuple): description: str query: str @@ -67,3 +79,29 @@ def test_filter_literals_must_be_compared( ) -> None: with pytest.raises(JSONPathSyntaxError): env.compile(case.query) + + +def test_recursive_data() -> None: + class MockEnv(JSONPathEnvironment): + nondeterministic = False + + env = MockEnv() + query = "$..a" + arr: List[Any] = [] + data: Any = {"foo": arr} + arr.append(data) + + with pytest.raises(JSONPathRecursionError): + env.findall(query, data) + + +def test_low_recursion_limit() -> None: + class MockEnv(JSONPathEnvironment): + max_recursion_depth = 3 + + env = MockEnv() + query = "$..a" + data = {"foo": [{"bar": [1, 2, 3]}]} + + with pytest.raises(JSONPathRecursionError): + env.findall(query, data) diff --git a/tests/test_filter_expression_caching.py b/tests/test_filter_expression_caching.py index 9d1b3cf..b91f7a3 100644 --- a/tests/test_filter_expression_caching.py +++ b/tests/test_filter_expression_caching.py @@ -1,18 +1,19 @@ """Filter expression caching test cases.""" + from unittest import mock from jsonpath import JSONPath from jsonpath import JSONPathEnvironment -from jsonpath.filter import BooleanExpression +from jsonpath.filter import BaseExpression from jsonpath.filter import CachingFilterExpression from jsonpath.filter import FilterContextPath from jsonpath.filter import FilterExpression from jsonpath.filter import InfixExpression from jsonpath.filter import IntegerLiteral -from jsonpath.filter import RootPath -from jsonpath.filter import SelfPath +from jsonpath.filter import RelativeFilterQuery +from jsonpath.filter import RootFilterQuery +from jsonpath.segments import JSONPathChildSegment from jsonpath.selectors import Filter as FilterSelector -from jsonpath.selectors import ListSelector def test_cache_root_path() -> None: @@ -20,28 +21,28 @@ def test_cache_root_path() -> None: env = JSONPathEnvironment() path = env.compile("$.some[?@.a < $.thing].a") assert isinstance(path, JSONPath) - selection_list = path.selectors[1] - assert isinstance(selection_list, ListSelector) - filter_selector = selection_list.items[0] + segment = path.segments[1] + assert isinstance(segment, JSONPathChildSegment) + filter_selector = segment.selectors[0] assert isinstance(filter_selector, FilterSelector) assert filter_selector.cacheable_nodes is True # The original expression tree without caching nodes. - expr: FilterExpression = filter_selector.expression - assert isinstance(expr, BooleanExpression) + expr: BaseExpression = filter_selector.expression + assert isinstance(expr, FilterExpression) expr = expr.expression assert isinstance(expr, InfixExpression) - assert isinstance(expr.left, SelfPath) - assert isinstance(expr.right, RootPath) + assert isinstance(expr.left, RelativeFilterQuery) + assert isinstance(expr.right, RootFilterQuery) # A caching copy of the original expression tree. expr = filter_selector.expression.cache_tree() - assert isinstance(expr, BooleanExpression) + assert isinstance(expr, FilterExpression) expr = expr.expression assert isinstance(expr, InfixExpression) - assert isinstance(expr.left, SelfPath) + assert isinstance(expr.left, RelativeFilterQuery) assert isinstance(expr.right, CachingFilterExpression) - assert isinstance(expr.right._expr, RootPath) # noqa: SLF001 + assert isinstance(expr.right._expr, RootFilterQuery) # noqa: SLF001 def test_root_path_cache() -> None: @@ -49,7 +50,7 @@ def test_root_path_cache() -> None: env = JSONPathEnvironment(filter_caching=True) data = {"some": [{"a": 1}, {"a": 99}, {"a": 2}, {"a": 3}]} with mock.patch( - "jsonpath.filter.RootPath.evaluate", return_value=10 + "jsonpath.filter.RootFilterQuery.evaluate", return_value=10 ) as mock_root_path: path = env.compile("$.some[?@.a < $.thing].a") rv = path.findall(data) @@ -62,7 +63,7 @@ def test_root_path_no_cache() -> None: env = JSONPathEnvironment(filter_caching=False) data = {"some": [{"a": 1}, {"a": 99}, {"a": 2}, {"a": 3}]} with mock.patch( - "jsonpath.filter.RootPath.evaluate", return_value=10 + "jsonpath.filter.RootFilterQuery.evaluate", return_value=10 ) as mock_root_path: path = env.compile("$.some[?@.a < $.thing].a") rv = path.findall(data) @@ -75,28 +76,28 @@ def test_cache_context_path() -> None: env = JSONPathEnvironment() path = env.compile("$.some[?_.thing > @.a].a") assert isinstance(path, JSONPath) - selection_list = path.selectors[1] - assert isinstance(selection_list, ListSelector) - filter_selector = selection_list.items[0] + segment = path.segments[1] + assert isinstance(segment, JSONPathChildSegment) + filter_selector = segment.selectors[0] assert isinstance(filter_selector, FilterSelector) assert filter_selector.cacheable_nodes is True # The original expression tree without caching nodes. - expr: FilterExpression = filter_selector.expression - assert isinstance(expr, BooleanExpression) + expr: BaseExpression = filter_selector.expression + assert isinstance(expr, FilterExpression) expr = expr.expression assert isinstance(expr, InfixExpression) assert isinstance(expr.left, FilterContextPath) - assert isinstance(expr.right, SelfPath) + assert isinstance(expr.right, RelativeFilterQuery) # A caching copy of the original expression tree. expr = filter_selector.expression.cache_tree() - assert isinstance(expr, BooleanExpression) + assert isinstance(expr, FilterExpression) expr = expr.expression assert isinstance(expr, InfixExpression) assert isinstance(expr.left, CachingFilterExpression) assert isinstance(expr.left._expr, FilterContextPath) # noqa: SLF001 - assert isinstance(expr.right, SelfPath) + assert isinstance(expr.right, RelativeFilterQuery) def test_context_path_cache() -> None: @@ -146,20 +147,20 @@ def test_uncacheable_filter() -> None: env = JSONPathEnvironment(filter_caching=True) path = env.compile("$.some[?@.a > 2 and @.b < 4].a") assert isinstance(path, JSONPath) - selection_list = path.selectors[1] - assert isinstance(selection_list, ListSelector) - filter_selector = selection_list.items[0] + segment = path.segments[1] + assert isinstance(segment, JSONPathChildSegment) + filter_selector = segment.selectors[0] assert isinstance(filter_selector, FilterSelector) assert filter_selector.cacheable_nodes is False # The original expression tree without caching nodes. - expr: FilterExpression = filter_selector.expression - assert isinstance(expr, BooleanExpression) + expr: BaseExpression = filter_selector.expression + assert isinstance(expr, FilterExpression) expr = expr.expression assert isinstance(expr, InfixExpression) assert isinstance(expr.left, InfixExpression) assert isinstance(expr.right, InfixExpression) - assert isinstance(expr.left.left, SelfPath) + assert isinstance(expr.left.left, RelativeFilterQuery) assert isinstance(expr.left.right, IntegerLiteral) - assert isinstance(expr.right.left, SelfPath) + assert isinstance(expr.right.left, RelativeFilterQuery) assert isinstance(expr.right.right, IntegerLiteral) diff --git a/tests/test_find.py b/tests/test_find.py deleted file mode 100644 index 140a291..0000000 --- a/tests/test_find.py +++ /dev/null @@ -1,163 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: Union[Sequence[Any], Mapping[str, Any]] - - -TEST_CASES = [ - Case( - description="property key that looks like an index", - path="$[some][0]", - data={"some": {"0": "thing"}}, - want=["thing"], - ), - Case( - description="slice a mapping", - path="$.some[0:4]", - data={"some": {"thing": "else"}}, - want=[], - ), - Case( - description="keys from a mapping", - path="$.some[~]", - data={"some": {"thing": "else"}}, - want=["thing"], - ), - Case( - description="keys from a sequence", - path="$.some.~", - data={"some": ["thing", "else"]}, - want=[], - ), - Case( - description="match key pattern", - path="$.some[?match(#, 'thing[0-9]+')]", - data={ - "some": { - "thing1": {"foo": 1}, - "thing2": {"foo": 2}, - "other": {"foo": 3}, - } - }, - want=[{"foo": 1}, {"foo": 2}], - ), - Case( - description="select root value using fake root", - path="^[?@some.thing > 7]", - data={"some": {"thing": 42}}, - want=[{"some": {"thing": 42}}], - ), - Case( - description="fake root in a filter query", - path="^[?@some.thing > value(^.*.num)]", - data={"some": {"thing": 42}, "num": 7}, - want=[{"some": {"thing": 42}, "num": 7}], - ), - Case( - description="recurse object keys", - path="$..~", - data={"some": {"thing": "else", "foo": {"bar": "baz"}}}, - want=["some", "thing", "foo", "bar"], - ), - Case( - description="logical expr existence tests", - path="$[?@.a && @.b]", - data=[{"a": True, "b": False}], - want=[{"a": True, "b": False}], - ), - Case( - description="logical expr existence tests, alternate and", - path="$[?@.a and @.b]", - data=[{"a": True, "b": False}], - want=[{"a": True, "b": False}], - ), - Case( - description="array contains literal", - path="$[?@.a contains 'foo']", - data=[{"a": ["foo", "bar"]}, {"a": ["bar"]}], - want=[ - { - "a": ["foo", "bar"], - } - ], - ), - Case( - description="object contains literal", - path="$[?@.a contains 'foo']", - data=[{"a": {"foo": "bar"}}, {"a": {"bar": "baz"}}], - want=[ - { - "a": {"foo": "bar"}, - } - ], - ), - Case( - description="literal in array", - path="$[?'foo' in @.a]", - data=[{"a": ["foo", "bar"]}, {"a": ["bar"]}], - want=[ - { - "a": ["foo", "bar"], - } - ], - ), - Case( - description="literal in object", - path="$[?'foo' in @.a]", - data=[{"a": {"foo": "bar"}}, {"a": {"bar": "baz"}}], - want=[ - { - "a": {"foo": "bar"}, - } - ], - ), - Case( - description="quoted reserved word, and", - path="['and']", - data={"and": [1, 2, 3]}, - want=[[1, 2, 3]], - ), - Case( - description="quoted reserved word, or", - path="['or']", - data={"or": [1, 2, 3]}, - want=[[1, 2, 3]], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - assert path.findall(case.data) == case.want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[object]: - return await path.findall_async(case.data) - - assert asyncio.run(coro()) == case.want diff --git a/tests/test_find_compound_path.py b/tests/test_find_compound_path.py deleted file mode 100644 index d45db5a..0000000 --- a/tests/test_find_compound_path.py +++ /dev/null @@ -1,80 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: Union[Sequence[Any], Mapping[str, Any]] - - -TEST_CASES = [ - Case( - description="union of two paths", - path="$.some | $.thing", - data={"some": [1, 2, 3], "thing": [4, 5, 6], "other": ["a", "b", "c"]}, - want=[[1, 2, 3], [4, 5, 6]], - ), - Case( - description="union of three paths", - path="$.some | $.thing | $.other", - data={"some": [1, 2, 3], "thing": [4, 5, 6], "other": ["a", "b", "c"]}, - want=[[1, 2, 3], [4, 5, 6], ["a", "b", "c"]], - ), - Case( - description="intersection of two paths with no common items", - path="$.some & $.thing", - data={"some": [1, 2, 3], "thing": [4, 5, 6], "other": ["a", "b", "c"]}, - want=[], - ), - Case( - description="intersection of two paths with common item", - path="$.some & $.thing", - data={"some": [1, 2, 3], "thing": [1, 2, 3], "other": ["a", "b", "c"]}, - want=[[1, 2, 3]], - ), - Case( - description="intersection then union", - path="$.some & $.thing | $.other", - data={"some": [1, 2, 3], "thing": [1, 2, 3], "other": ["a", "b", "c"]}, - want=[[1, 2, 3], ["a", "b", "c"]], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_compound_path(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - assert path.findall(case.data) == case.want - assert [match.obj for match in path.finditer(case.data)] == case.want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_find_compound_path_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[object]: - return await path.findall_async(case.data) - - async def iter_coro() -> List[object]: - return [match.obj async for match in await path.finditer_async(case.data)] - - assert asyncio.run(coro()) == case.want - assert asyncio.run(iter_coro()) == case.want diff --git a/tests/test_find_reference.py b/tests/test_find_reference.py index cbc7bf0..83a050d 100644 --- a/tests/test_find_reference.py +++ b/tests/test_find_reference.py @@ -2,6 +2,7 @@ See https://goessner.net/articles/JsonPath/ """ + import asyncio import dataclasses import operator @@ -220,132 +221,132 @@ class Case: }, ], ), - Case( - description="root descent", - path="$..", - data=REFERENCE_DATA, - want=[ - { - "store": { - "book": [ - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - ], - "bicycle": {"color": "red", "price": 19.95}, - } - }, - { - "book": [ - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - ], - "bicycle": {"color": "red", "price": 19.95}, - }, - [ - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - ], - { - "category": "reference", - "author": "Nigel Rees", - "title": "Sayings of the Century", - "price": 8.95, - }, - { - "category": "fiction", - "author": "Evelyn Waugh", - "title": "Sword of Honour", - "price": 12.99, - }, - { - "category": "fiction", - "author": "Herman Melville", - "title": "Moby Dick", - "isbn": "0-553-21311-3", - "price": 8.99, - }, - { - "category": "fiction", - "author": "J. R. R. Tolkien", - "title": "The Lord of the Rings", - "isbn": "0-395-19395-8", - "price": 22.99, - }, - {"color": "red", "price": 19.95}, - ], - ), + # Case( + # description="root descent", + # path="$..", + # data=REFERENCE_DATA, + # want=[ + # { + # "store": { + # "book": [ + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # ], + # "bicycle": {"color": "red", "price": 19.95}, + # } + # }, + # { + # "book": [ + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # ], + # "bicycle": {"color": "red", "price": 19.95}, + # }, + # [ + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # ], + # { + # "category": "reference", + # "author": "Nigel Rees", + # "title": "Sayings of the Century", + # "price": 8.95, + # }, + # { + # "category": "fiction", + # "author": "Evelyn Waugh", + # "title": "Sword of Honour", + # "price": 12.99, + # }, + # { + # "category": "fiction", + # "author": "Herman Melville", + # "title": "Moby Dick", + # "isbn": "0-553-21311-3", + # "price": 8.99, + # }, + # { + # "category": "fiction", + # "author": "J. R. R. Tolkien", + # "title": "The Lord of the Rings", + # "isbn": "0-395-19395-8", + # "price": 22.99, + # }, + # {"color": "red", "price": 19.95}, + # ], + # ), Case( description="(reference) all elements", path="$..*", diff --git a/tests/test_iregexp.py b/tests/test_iregexp.py new file mode 100644 index 0000000..f1b91e6 --- /dev/null +++ b/tests/test_iregexp.py @@ -0,0 +1,34 @@ +import pytest + +try: + import iregexp_check # noqa: F401 + + IREGEXP_AVAILABLE = True +except ImportError: + IREGEXP_AVAILABLE = False + +import jsonpath + + +@pytest.mark.skipif(IREGEXP_AVAILABLE is False, reason="requires iregexp_check") +def test_iregexp_check() -> None: + # Character classes are OK. + query = "$[?match(@, '[0-9]+')]" + data = ["123", "abc", "abc123"] + assert jsonpath.findall(query, data) == ["123"] + + # Multi character escapes are not. + query = "$[?match(@, '\\\\d+')]" + assert jsonpath.findall(query, data) == [] + + +@pytest.mark.skipif(IREGEXP_AVAILABLE, reason="iregexp_check is available") +def test_no_iregexp_check() -> None: + # Character classes are OK. + query = "$[?match(@, '[0-9]+')]" + data = ["123", "abc", "abc123"] + assert jsonpath.findall(query, data) == ["123"] + + # Multi character escapes are OK when iregexp_check is not installed. + query = "$[?match(@, '\\\\d+')]" + assert jsonpath.findall(query, data) == ["123"] diff --git a/tests/test_issues.py b/tests/test_issues.py index 3fe8e9b..3aaa879 100644 --- a/tests/test_issues.py +++ b/tests/test_issues.py @@ -67,3 +67,15 @@ def test_issue_103() -> None: ] assert findall(query, data, filter_context=filter_context) == want + + +def test_quoted_reserved_word_and() -> None: + query = "$['and']" + data = {"and": [1, 2, 3]} + assert findall(query, data) == [[1, 2, 3]] + + +def test_quoted_reserved_word_or() -> None: + query = "$['or']" + data = {"or": [1, 2, 3]} + assert findall(query, data) == [[1, 2, 3]] diff --git a/tests/test_key_selector.py b/tests/test_key_selector.py new file mode 100644 index 0000000..876ca3c --- /dev/null +++ b/tests/test_key_selector.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/key_selector.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_key_selector(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_key_selector_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_key_selector_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_keys_filter_selector.py b/tests/test_keys_filter_selector.py new file mode 100644 index 0000000..1360f80 --- /dev/null +++ b/tests/test_keys_filter_selector.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/keys_filter_selector.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_filter_selector(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_filter_selector_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_filter_selector_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_keys_selector.py b/tests/test_keys_selector.py new file mode 100644 index 0000000..7fe99e1 --- /dev/null +++ b/tests/test_keys_selector.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/keys_selector.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_selector(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_selector_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_keys_selector_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_lex.py b/tests/test_lex.py index 14727ac..8241a04 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -7,12 +7,12 @@ from jsonpath import JSONPathEnvironment from jsonpath.exceptions import JSONPathSyntaxError from jsonpath.token import TOKEN_AND -from jsonpath.token import TOKEN_BARE_PROPERTY +from jsonpath.token import TOKEN_COLON from jsonpath.token import TOKEN_COMMA from jsonpath.token import TOKEN_DDOT +from jsonpath.token import TOKEN_DOT from jsonpath.token import TOKEN_DOUBLE_QUOTE_STRING from jsonpath.token import TOKEN_EQ -from jsonpath.token import TOKEN_FAKE_ROOT from jsonpath.token import TOKEN_FALSE from jsonpath.token import TOKEN_FILTER from jsonpath.token import TOKEN_FLOAT @@ -22,13 +22,14 @@ from jsonpath.token import TOKEN_INT from jsonpath.token import TOKEN_INTERSECTION from jsonpath.token import TOKEN_KEYS -from jsonpath.token import TOKEN_LIST_START +from jsonpath.token import TOKEN_LBRACKET from jsonpath.token import TOKEN_LPAREN from jsonpath.token import TOKEN_LT +from jsonpath.token import TOKEN_NAME from jsonpath.token import TOKEN_NIL from jsonpath.token import TOKEN_NOT from jsonpath.token import TOKEN_OR -from jsonpath.token import TOKEN_PROPERTY +from jsonpath.token import TOKEN_PSEUDO_ROOT from jsonpath.token import TOKEN_RBRACKET from jsonpath.token import TOKEN_RE from jsonpath.token import TOKEN_RE_FLAGS @@ -37,11 +38,9 @@ from jsonpath.token import TOKEN_RPAREN from jsonpath.token import TOKEN_SELF from jsonpath.token import TOKEN_SINGLE_QUOTE_STRING -from jsonpath.token import TOKEN_SLICE_START -from jsonpath.token import TOKEN_SLICE_STEP -from jsonpath.token import TOKEN_SLICE_STOP from jsonpath.token import TOKEN_TRUE from jsonpath.token import TOKEN_UNION +from jsonpath.token import TOKEN_WHITESPACE from jsonpath.token import TOKEN_WILD from jsonpath.token import Token @@ -57,33 +56,33 @@ class Case: Case( description="just root", path="$", - want=[ - Token(kind=TOKEN_ROOT, value="$", index=0, path="$"), - ], + want=[Token(kind=TOKEN_ROOT, value="$", index=0, path="$")], ), Case( - description="just fake root", + description="just pseudo-root", path="^", - want=[ - Token(kind=TOKEN_FAKE_ROOT, value="^", index=0, path="^"), - ], + want=[Token(kind=TOKEN_PSEUDO_ROOT, value="^", index=0, path="^")], ), Case( description="root dot property", path="$.some.thing", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.some.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="$.some.thing"), - Token(kind=TOKEN_PROPERTY, value="thing", index=7, path="$.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.some.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=6, path="$.some.thing"), + Token(kind=TOKEN_NAME, value="thing", index=7, path="$.some.thing"), ], ), Case( - description="fake root dot property", + description="pseudo root dot property", path="^.some.thing", want=[ - Token(kind=TOKEN_FAKE_ROOT, value="^", index=0, path="^.some.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="^.some.thing"), - Token(kind=TOKEN_PROPERTY, value="thing", index=7, path="^.some.thing"), + Token(kind=TOKEN_PSEUDO_ROOT, value="^", index=0, path="^.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="^.some.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="^.some.thing"), + Token(kind=TOKEN_DOT, value=".", index=6, path="^.some.thing"), + Token(kind=TOKEN_NAME, value="thing", index=7, path="^.some.thing"), ], ), Case( @@ -91,15 +90,11 @@ class Case: path="$[some][thing]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="some", index=2, path="$[some][thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[some][thing]"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=6, path="$[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=7, path="$[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="thing", index=8, path="$[some][thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=7, path="$[some][thing]"), + Token(kind=TOKEN_NAME, value="thing", index=8, path="$[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=13, path="$[some][thing]"), ], ), @@ -108,7 +103,7 @@ class Case: path='$["some"]', want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path='$["some"]'), - Token(kind=TOKEN_LIST_START, value="[", index=1, path='$["some"]'), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path='$["some"]'), Token( kind=TOKEN_DOUBLE_QUOTE_STRING, value="some", index=3, path='$["some"]' ), @@ -120,7 +115,7 @@ class Case: path="$['some']", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$['some']"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$['some']"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$['some']"), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="some", index=3, path="$['some']" ), @@ -132,15 +127,12 @@ class Case: path="$.[some][thing]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="some", index=3, path="$.[some][thing]" - ), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[some][thing]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[some][thing]"), + Token(kind=TOKEN_NAME, value="some", index=3, path="$.[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=7, path="$.[some][thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=8, path="$.[some][thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="thing", index=9, path="$.[some][thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=8, path="$.[some][thing]"), + Token(kind=TOKEN_NAME, value="thing", index=9, path="$.[some][thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=14, path="$.[some][thing]"), ], ), @@ -149,7 +141,7 @@ class Case: path="$[1]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[1]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[1]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[1]"), Token(kind=TOKEN_INT, value="1", index=2, path="$[1]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="$[1]"), ], @@ -159,7 +151,8 @@ class Case: path="$.[1]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[1]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[1]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[1]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[1]"), Token(kind=TOKEN_INT, value="1", index=3, path="$.[1]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="$.[1]"), ], @@ -168,10 +161,8 @@ class Case: description="empty slice", path="[:]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[:]"), - Token(kind=TOKEN_SLICE_START, value="", index=1, path="[:]"), - Token(kind=TOKEN_SLICE_STOP, value="", index=2, path="[:]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[:]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[:]"), + Token(kind=TOKEN_COLON, value=":", index=1, path="[:]"), Token(kind=TOKEN_RBRACKET, value="]", index=2, path="[:]"), ], ), @@ -179,10 +170,9 @@ class Case: description="empty slice empty step", path="[::]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[::]"), - Token(kind=TOKEN_SLICE_START, value="", index=1, path="[::]"), - Token(kind=TOKEN_SLICE_STOP, value="", index=2, path="[::]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=3, path="[::]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[::]"), + Token(kind=TOKEN_COLON, value=":", index=1, path="[::]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[::]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="[::]"), ], ), @@ -190,10 +180,9 @@ class Case: description="slice empty stop", path="[1:]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[1:]"), - Token(kind=TOKEN_SLICE_START, value="1", index=1, path="[1:]"), - Token(kind=TOKEN_SLICE_STOP, value="", index=3, path="[1:]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[1:]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[1:]"), + Token(kind=TOKEN_INT, value="1", index=1, path="[1:]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[1:]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="[1:]"), ], ), @@ -201,10 +190,9 @@ class Case: description="slice empty start", path="[:-1]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[:-1]"), - Token(kind=TOKEN_SLICE_START, value="", index=1, path="[:-1]"), - Token(kind=TOKEN_SLICE_STOP, value="-1", index=2, path="[:-1]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[:-1]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[:-1]"), + Token(kind=TOKEN_COLON, value=":", index=1, path="[:-1]"), + Token(kind=TOKEN_INT, value="-1", index=2, path="[:-1]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="[:-1]"), ], ), @@ -212,10 +200,10 @@ class Case: description="slice start and stop", path="[1:7]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[1:7]"), - Token(kind=TOKEN_SLICE_START, value="1", index=1, path="[1:7]"), - Token(kind=TOKEN_SLICE_STOP, value="7", index=3, path="[1:7]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="[1:7]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[1:7]"), + Token(kind=TOKEN_INT, value="1", index=1, path="[1:7]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[1:7]"), + Token(kind=TOKEN_INT, value="7", index=3, path="[1:7]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="[1:7]"), ], ), @@ -223,10 +211,12 @@ class Case: description="slice start, stop and step", path="[1:7:2]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[1:7:2]"), - Token(kind=TOKEN_SLICE_START, value="1", index=1, path="[1:7:2]"), - Token(kind=TOKEN_SLICE_STOP, value="7", index=3, path="[1:7:2]"), - Token(kind=TOKEN_SLICE_STEP, value="2", index=5, path="[1:7:2]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[1:7:2]"), + Token(kind=TOKEN_INT, value="1", index=1, path="[1:7:2]"), + Token(kind=TOKEN_COLON, value=":", index=2, path="[1:7:2]"), + Token(kind=TOKEN_INT, value="7", index=3, path="[1:7:2]"), + Token(kind=TOKEN_COLON, value=":", index=4, path="[1:7:2]"), + Token(kind=TOKEN_INT, value="2", index=5, path="[1:7:2]"), Token(kind=TOKEN_RBRACKET, value="]", index=6, path="[1:7:2]"), ], ), @@ -235,6 +225,7 @@ class Case: path="$.*", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.*"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.*"), Token(kind=TOKEN_WILD, value="*", index=2, path="$.*"), ], ), @@ -243,7 +234,7 @@ class Case: path="$[*]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[*]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[*]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[*]"), Token(kind=TOKEN_WILD, value="*", index=2, path="$[*]"), Token(kind=TOKEN_RBRACKET, value="]", index=3, path="$[*]"), ], @@ -253,7 +244,8 @@ class Case: path="$.[*]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[*]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[*]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[*]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[*]"), Token(kind=TOKEN_WILD, value="*", index=3, path="$.[*]"), Token(kind=TOKEN_RBRACKET, value="]", index=4, path="$.[*]"), ], @@ -272,7 +264,7 @@ class Case: want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$..thing"), Token(kind=TOKEN_DDOT, value="..", index=1, path="$..thing"), - Token(kind=TOKEN_BARE_PROPERTY, value="thing", index=3, path="$..thing"), + Token(kind=TOKEN_NAME, value="thing", index=3, path="$..thing"), ], ), Case( @@ -281,7 +273,8 @@ class Case: want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$...thing"), Token(kind=TOKEN_DDOT, value="..", index=1, path="$...thing"), - Token(kind=TOKEN_PROPERTY, value="thing", index=4, path="$...thing"), + Token(kind=TOKEN_DOT, value=".", index=3, path="$...thing"), + Token(kind=TOKEN_NAME, value="thing", index=4, path="$...thing"), ], ), Case( @@ -289,7 +282,7 @@ class Case: path="$[1,4,5]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[1,4,5]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[1,4,5]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[1,4,5]"), Token(kind=TOKEN_INT, value="1", index=2, path="$[1,4,5]"), Token(kind=TOKEN_COMMA, value=",", index=3, path="$[1,4,5]"), Token(kind=TOKEN_INT, value="4", index=4, path="$[1,4,5]"), @@ -303,12 +296,12 @@ class Case: path="$[1,4:9]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[1,4:9]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[1,4:9]"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[1,4:9]"), Token(kind=TOKEN_INT, value="1", index=2, path="$[1,4:9]"), Token(kind=TOKEN_COMMA, value=",", index=3, path="$[1,4:9]"), - Token(kind=TOKEN_SLICE_START, value="4", index=4, path="$[1,4:9]"), - Token(kind=TOKEN_SLICE_STOP, value="9", index=6, path="$[1,4:9]"), - Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="$[1,4:9]"), + Token(kind=TOKEN_INT, value="4", index=4, path="$[1,4:9]"), + Token(kind=TOKEN_COLON, value=":", index=5, path="$[1,4:9]"), + Token(kind=TOKEN_INT, value="9", index=6, path="$[1,4:9]"), Token(kind=TOKEN_RBRACKET, value="]", index=7, path="$[1,4:9]"), ], ), @@ -317,14 +310,10 @@ class Case: path="$[some,thing]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$[some,thing]"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$[some,thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="some", index=2, path="$[some,thing]" - ), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$[some,thing]"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$[some,thing]"), Token(kind=TOKEN_COMMA, value=",", index=6, path="$[some,thing]"), - Token( - kind=TOKEN_BARE_PROPERTY, value="thing", index=7, path="$[some,thing]" - ), + Token(kind=TOKEN_NAME, value="thing", index=7, path="$[some,thing]"), Token(kind=TOKEN_RBRACKET, value="]", index=12, path="$[some,thing]"), ], ), @@ -333,11 +322,13 @@ class Case: path="$.[?(@.some)]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[?(@.some)]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[?(@.some)]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[?(@.some)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[?(@.some)]"), Token(kind=TOKEN_FILTER, value="?", index=3, path="$.[?(@.some)]"), Token(kind=TOKEN_LPAREN, value="(", index=4, path="$.[?(@.some)]"), Token(kind=TOKEN_SELF, value="@", index=5, path="$.[?(@.some)]"), - Token(kind=TOKEN_PROPERTY, value="some", index=7, path="$.[?(@.some)]"), + Token(kind=TOKEN_DOT, value=".", index=6, path="$.[?(@.some)]"), + Token(kind=TOKEN_NAME, value="some", index=7, path="$.[?(@.some)]"), Token(kind=TOKEN_RPAREN, value=")", index=11, path="$.[?(@.some)]"), Token(kind=TOKEN_RBRACKET, value="]", index=12, path="$.[?(@.some)]"), ], @@ -347,11 +338,13 @@ class Case: path="$.[?($.some)]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[?($.some)]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[?($.some)]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[?($.some)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[?($.some)]"), Token(kind=TOKEN_FILTER, value="?", index=3, path="$.[?($.some)]"), Token(kind=TOKEN_LPAREN, value="(", index=4, path="$.[?($.some)]"), Token(kind=TOKEN_ROOT, value="$", index=5, path="$.[?($.some)]"), - Token(kind=TOKEN_PROPERTY, value="some", index=7, path="$.[?($.some)]"), + Token(kind=TOKEN_DOT, value=".", index=6, path="$.[?($.some)]"), + Token(kind=TOKEN_NAME, value="some", index=7, path="$.[?($.some)]"), Token(kind=TOKEN_RPAREN, value=")", index=11, path="$.[?($.some)]"), Token(kind=TOKEN_RBRACKET, value="]", index=12, path="$.[?($.some)]"), ], @@ -361,11 +354,12 @@ class Case: path="$.[?(@[1])]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.[?(@[1])]"), - Token(kind=TOKEN_LIST_START, value="[", index=2, path="$.[?(@[1])]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.[?(@[1])]"), + Token(kind=TOKEN_LBRACKET, value="[", index=2, path="$.[?(@[1])]"), Token(kind=TOKEN_FILTER, value="?", index=3, path="$.[?(@[1])]"), Token(kind=TOKEN_LPAREN, value="(", index=4, path="$.[?(@[1])]"), Token(kind=TOKEN_SELF, value="@", index=5, path="$.[?(@[1])]"), - Token(kind=TOKEN_LIST_START, value="[", index=6, path="$.[?(@[1])]"), + Token(kind=TOKEN_LBRACKET, value="[", index=6, path="$.[?(@[1])]"), Token(kind=TOKEN_INT, value="1", index=7, path="$.[?(@[1])]"), Token(kind=TOKEN_RBRACKET, value="]", index=8, path="$.[?(@[1])]"), Token(kind=TOKEN_RPAREN, value=")", index=9, path="$.[?(@[1])]"), @@ -376,43 +370,41 @@ class Case: description="filter self dot property equality with float", path="[?(@.some == 1.1)]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1.1)]"), + Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1.1)]"), Token( - kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1.1)]" + kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1.1)]" ), - Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_FLOAT, value="1.1", index=13, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_RPAREN, value=")", index=16, path="[?(@.some == 1.1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=17, path="[?(@.some == 1.1)]"), ], ), Case( - description=( - "filter self dot property equality with float in scientific notation" - ), + description="filter self dot property equality float in scientific notation", path="[?(@.some == 1.1e10)]", want=[ Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(@.some == 1.1e10)]", + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1.1e10)]" ), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1.1e10)]"), - Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.some == 1.1e10)]", - ), + Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1.1e10)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1.1e10)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1.1e10)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1.1e10)]"), Token( - kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1.1e10)]" + kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1.1e10)]" ), Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1.1e10)]"), + Token( + kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1.1e10)]" + ), Token( kind=TOKEN_FLOAT, value="1.1e10", index=13, path="[?(@.some == 1.1e10)]" ), @@ -426,14 +418,16 @@ class Case: description="filter self index equality with float", path="[?(@[1] == 1.1)]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@[1] == 1.1)]"), - Token(kind=TOKEN_LIST_START, value="[", index=4, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=4, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_INT, value="1", index=5, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=6, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=7, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_EQ, value="==", index=8, path="[?(@[1] == 1.1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=10, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_FLOAT, value="1.1", index=11, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_RPAREN, value=")", index=14, path="[?(@[1] == 1.1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=15, path="[?(@[1] == 1.1)]"), @@ -443,12 +437,15 @@ class Case: description="filter self dot property equality with int", path="[?(@.some == 1)]", want=[ - Token(kind=TOKEN_LIST_START, value="[", index=0, path="[?(@.some == 1)]"), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1)]"), Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1)]"), Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1)]"), - Token(kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1)]"), Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1)]"), Token(kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1)]"), Token(kind=TOKEN_RPAREN, value=")", index=14, path="[?(@.some == 1)]"), Token(kind=TOKEN_RBRACKET, value="]", index=15, path="[?(@.some == 1)]"), @@ -458,29 +455,19 @@ class Case: description="filter self dot property equality with int in scientific notation", path="[?(@.some == 1e10)]", want=[ - Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(@.some == 1e10)]", - ), - Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(@.some == 1e10)]", - ), - Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.some == 1e10)]", - ), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some == 1e10)]"), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some == 1e10)]"), + Token(kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1e10)]"), Token( - kind=TOKEN_PROPERTY, value="some", index=5, path="[?(@.some == 1e10)]" + kind=TOKEN_WHITESPACE, value=" ", index=9, path="[?(@.some == 1e10)]" ), Token(kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1e10)]"), + Token( + kind=TOKEN_WHITESPACE, value=" ", index=12, path="[?(@.some == 1e10)]" + ), Token(kind=TOKEN_INT, value="1e10", index=13, path="[?(@.some == 1e10)]"), Token(kind=TOKEN_RPAREN, value=")", index=17, path="[?(@.some == 1e10)]"), Token(kind=TOKEN_RBRACKET, value="]", index=18, path="[?(@.some == 1e10)]"), @@ -491,36 +478,37 @@ class Case: path="[?(@.some =~ /foo|bar/i)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some =~ /foo|bar/i)]", ), Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(@.some =~ /foo|bar/i)]", + kind=TOKEN_FILTER, value="?", index=1, path="[?(@.some =~ /foo|bar/i)]" ), Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.some =~ /foo|bar/i)]", + kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.some =~ /foo|bar/i)]" ), Token( kind=TOKEN_SELF, value="@", index=3, path="[?(@.some =~ /foo|bar/i)]" ), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.some =~ /foo|bar/i)]"), Token( - kind=TOKEN_PROPERTY, - value="some", - index=5, + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some =~ /foo|bar/i)]" + ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, path="[?(@.some =~ /foo|bar/i)]", ), Token( - kind=TOKEN_RE, - value="=~", - index=10, + kind=TOKEN_RE, value="=~", index=10, path="[?(@.some =~ /foo|bar/i)]" + ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, path="[?(@.some =~ /foo|bar/i)]", ), Token( @@ -536,10 +524,7 @@ class Case: path="[?(@.some =~ /foo|bar/i)]", ), Token( - kind=TOKEN_RPAREN, - value=")", - index=23, - path="[?(@.some =~ /foo|bar/i)]", + kind=TOKEN_RPAREN, value=")", index=23, path="[?(@.some =~ /foo|bar/i)]" ), Token( kind=TOKEN_RBRACKET, @@ -554,12 +539,14 @@ class Case: path="$.some | $.thing", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.some | $.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="$.some | $.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.some | $.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$.some | $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=6, path="$.some | $.thing"), Token(kind=TOKEN_UNION, value="|", index=7, path="$.some | $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=8, path="$.some | $.thing"), Token(kind=TOKEN_ROOT, value="$", index=9, path="$.some | $.thing"), - Token( - kind=TOKEN_PROPERTY, value="thing", index=11, path="$.some | $.thing" - ), + Token(kind=TOKEN_DOT, value=".", index=10, path="$.some | $.thing"), + Token(kind=TOKEN_NAME, value="thing", index=11, path="$.some | $.thing"), ], ), Case( @@ -570,31 +557,64 @@ class Case: kind=TOKEN_ROOT, value="$", index=0, path="$.some | $.thing | $.other" ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, value=".", index=1, path="$.some | $.thing | $.other" + ), + Token( + kind=TOKEN_NAME, value="some", index=2, path="$.some | $.thing | $.other", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=6, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_UNION, value="|", index=7, path="$.some | $.thing | $.other" ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=8, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_ROOT, value="$", index=9, path="$.some | $.thing | $.other" ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, value=".", index=10, path="$.some | $.thing | $.other" + ), + Token( + kind=TOKEN_NAME, value="thing", index=11, path="$.some | $.thing | $.other", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=16, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_UNION, value="|", index=17, path="$.some | $.thing | $.other" ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=18, + path="$.some | $.thing | $.other", + ), Token( kind=TOKEN_ROOT, value="$", index=19, path="$.some | $.thing | $.other" ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, value=".", index=20, path="$.some | $.thing | $.other" + ), + Token( + kind=TOKEN_NAME, value="other", index=21, path="$.some | $.thing | $.other", @@ -606,12 +626,14 @@ class Case: path="$.some & $.thing", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.some & $.thing"), - Token(kind=TOKEN_PROPERTY, value="some", index=2, path="$.some & $.thing"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.some & $.thing"), + Token(kind=TOKEN_NAME, value="some", index=2, path="$.some & $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=6, path="$.some & $.thing"), Token(kind=TOKEN_INTERSECTION, value="&", index=7, path="$.some & $.thing"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=8, path="$.some & $.thing"), Token(kind=TOKEN_ROOT, value="$", index=9, path="$.some & $.thing"), - Token( - kind=TOKEN_PROPERTY, value="thing", index=11, path="$.some & $.thing" - ), + Token(kind=TOKEN_DOT, value=".", index=10, path="$.some & $.thing"), + Token(kind=TOKEN_NAME, value="thing", index=11, path="$.some & $.thing"), ], ), Case( @@ -619,7 +641,7 @@ class Case: path="[?(@.some > 1 and @.some < 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some > 1 and @.some < 5)]", @@ -643,29 +665,59 @@ class Case: path="[?(@.some > 1 and @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some > 1 and @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_GT, value=">", index=10, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=11, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_INT, value="1", index=12, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=13, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_AND, value="and", index=14, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -673,17 +725,35 @@ class Case: path="[?(@.some > 1 and @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=19, + path="[?(@.some > 1 and @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=20, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_LT, value="<", index=25, path="[?(@.some > 1 and @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=26, + path="[?(@.some > 1 and @.some < 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -709,7 +779,7 @@ class Case: path="[?(@.some == 1 or @.some == 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 or @.some == 5)]", @@ -733,29 +803,59 @@ class Case: path="[?(@.some == 1 or @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 or @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_OR, value="or", index=15, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -763,17 +863,35 @@ class Case: path="[?(@.some == 1 or @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=19, + path="[?(@.some == 1 or @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=20, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=25, path="[?(@.some == 1 or @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=27, + path="[?(@.some == 1 or @.some == 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -799,7 +917,7 @@ class Case: path="[?(@.some == 1 || @.some == 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 || @.some == 5)]", @@ -823,29 +941,59 @@ class Case: path="[?(@.some == 1 || @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 || @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_OR, value="||", index=15, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -853,17 +1001,35 @@ class Case: path="[?(@.some == 1 || @.some == 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=19, + path="[?(@.some == 1 || @.some == 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=20, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=25, path="[?(@.some == 1 || @.some == 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=27, + path="[?(@.some == 1 || @.some == 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -889,33 +1055,34 @@ class Case: path="[?(@.thing in [1, '1'])]", want=[ Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.thing in [1, '1'])]" ), Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_FILTER, value="?", index=1, path="[?(@.thing in [1, '1'])]" ), Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_LPAREN, value="(", index=2, path="[?(@.thing in [1, '1'])]" ), Token(kind=TOKEN_SELF, value="@", index=3, path="[?(@.thing in [1, '1'])]"), + Token(kind=TOKEN_DOT, value=".", index=4, path="[?(@.thing in [1, '1'])]"), Token( - kind=TOKEN_PROPERTY, - value="thing", - index=5, + kind=TOKEN_NAME, value="thing", index=5, path="[?(@.thing in [1, '1'])]" + ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=10, path="[?(@.thing in [1, '1'])]", ), Token(kind=TOKEN_IN, value="in", index=11, path="[?(@.thing in [1, '1'])]"), Token( - kind=TOKEN_LIST_START, + kind=TOKEN_WHITESPACE, + value=" ", + index=13, + path="[?(@.thing in [1, '1'])]", + ), + Token( + kind=TOKEN_LBRACKET, value="[", index=14, path="[?(@.thing in [1, '1'])]", @@ -924,6 +1091,12 @@ class Case: Token( kind=TOKEN_COMMA, value=",", index=16, path="[?(@.thing in [1, '1'])]" ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.thing in [1, '1'])]", + ), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="1", @@ -937,10 +1110,7 @@ class Case: path="[?(@.thing in [1, '1'])]", ), Token( - kind=TOKEN_RPAREN, - value=")", - index=22, - path="[?(@.thing in [1, '1'])]", + kind=TOKEN_RPAREN, value=")", index=22, path="[?(@.thing in [1, '1'])]" ), Token( kind=TOKEN_RBRACKET, @@ -955,7 +1125,7 @@ class Case: path="[?(@.some == 1 or not @.some < 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 or not @.some < 5)]", @@ -979,35 +1149,71 @@ class Case: path="[?(@.some == 1 or not @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 or not @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_OR, value="or", index=15, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_NOT, value="not", index=18, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=21, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -1015,17 +1221,35 @@ class Case: path="[?(@.some == 1 or not @.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=23, + path="[?(@.some == 1 or not @.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=24, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=28, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_LT, value="<", index=29, path="[?(@.some == 1 or not @.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=30, + path="[?(@.some == 1 or not @.some < 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -1051,7 +1275,7 @@ class Case: path="[?(@.some == 1 or !@.some < 5)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(@.some == 1 or !@.some < 5)]", @@ -1075,29 +1299,59 @@ class Case: path="[?(@.some == 1 or !@.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=4, + path="[?(@.some == 1 or !@.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=5, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_EQ, value="==", index=10, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=12, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_INT, value="1", index=13, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_OR, value="or", index=15, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_NOT, value="!", @@ -1111,17 +1365,35 @@ class Case: path="[?(@.some == 1 or !@.some < 5)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=20, + path="[?(@.some == 1 or !@.some < 5)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=21, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=25, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_LT, value="<", index=26, path="[?(@.some == 1 or !@.some < 5)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=27, + path="[?(@.some == 1 or !@.some < 5)]", + ), Token( kind=TOKEN_INT, value="5", @@ -1146,26 +1418,15 @@ class Case: description="filter true and false", path="[?(true == false)]", want=[ - Token( - kind=TOKEN_LIST_START, - value="[", - index=0, - path="[?(true == false)]", - ), - Token( - kind=TOKEN_FILTER, - value="?", - index=1, - path="[?(true == false)]", - ), - Token( - kind=TOKEN_LPAREN, - value="(", - index=2, - path="[?(true == false)]", - ), + Token(kind=TOKEN_LBRACKET, value="[", index=0, path="[?(true == false)]"), + Token(kind=TOKEN_FILTER, value="?", index=1, path="[?(true == false)]"), + Token(kind=TOKEN_LPAREN, value="(", index=2, path="[?(true == false)]"), Token(kind=TOKEN_TRUE, value="true", index=3, path="[?(true == false)]"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=7, path="[?(true == false)]"), Token(kind=TOKEN_EQ, value="==", index=8, path="[?(true == false)]"), + Token( + kind=TOKEN_WHITESPACE, value=" ", index=10, path="[?(true == false)]" + ), Token(kind=TOKEN_FALSE, value="false", index=11, path="[?(true == false)]"), Token(kind=TOKEN_RPAREN, value=")", index=16, path="[?(true == false)]"), Token(kind=TOKEN_RBRACKET, value="]", index=17, path="[?(true == false)]"), @@ -1176,7 +1437,7 @@ class Case: path="[?(nil == none && nil == null)]", want=[ Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=0, path="[?(nil == none && nil == null)]", @@ -1199,36 +1460,72 @@ class Case: index=3, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=6, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_EQ, value="==", index=7, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=9, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_NIL, value="none", index=10, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=14, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_AND, value="&&", index=15, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=17, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_NIL, value="nil", index=18, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=21, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_EQ, value="==", index=22, path="[?(nil == none && nil == null)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="[?(nil == none && nil == null)]", + ), Token( kind=TOKEN_NIL, value="null", @@ -1254,7 +1551,7 @@ class Case: path="$['some', 'thing']", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$['some', 'thing']"), - Token(kind=TOKEN_LIST_START, value="[", index=1, path="$['some', 'thing']"), + Token(kind=TOKEN_LBRACKET, value="[", index=1, path="$['some', 'thing']"), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="some", @@ -1262,6 +1559,7 @@ class Case: path="$['some', 'thing']", ), Token(kind=TOKEN_COMMA, value=",", index=8, path="$['some', 'thing']"), + Token(kind=TOKEN_WHITESPACE, value=" ", index=9, path="$['some', 'thing']"), Token( kind=TOKEN_SINGLE_QUOTE_STRING, value="thing", @@ -1282,13 +1580,19 @@ class Case: path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=1, + path="$.some[?(length(@.thing) < 2)]", + ), + Token( + kind=TOKEN_NAME, value="some", index=2, path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_LIST_START, + kind=TOKEN_LBRACKET, value="[", index=6, path="$.some[?(length(@.thing) < 2)]", @@ -1311,6 +1615,12 @@ class Case: index=9, path="$.some[?(length(@.thing) < 2)]", ), + Token( + kind=TOKEN_LPAREN, + value="(", + index=15, + path="$.some[?(length(@.thing) < 2)]", + ), Token( kind=TOKEN_SELF, value="@", @@ -1318,7 +1628,13 @@ class Case: path="$.some[?(length(@.thing) < 2)]", ), Token( - kind=TOKEN_PROPERTY, + kind=TOKEN_DOT, + value=".", + index=17, + path="$.some[?(length(@.thing) < 2)]", + ), + Token( + kind=TOKEN_NAME, value="thing", index=18, path="$.some[?(length(@.thing) < 2)]", @@ -1329,12 +1645,24 @@ class Case: index=23, path="$.some[?(length(@.thing) < 2)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=24, + path="$.some[?(length(@.thing) < 2)]", + ), Token( kind=TOKEN_LT, value="<", index=25, path="$.some[?(length(@.thing) < 2)]", ), + Token( + kind=TOKEN_WHITESPACE, + value=" ", + index=26, + path="$.some[?(length(@.thing) < 2)]", + ), Token( kind=TOKEN_INT, value="2", @@ -1360,7 +1688,9 @@ class Case: path="$.thing.~", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.thing.~"), - Token(kind=TOKEN_PROPERTY, value="thing", index=2, path="$.thing.~"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.thing.~"), + Token(kind=TOKEN_NAME, value="thing", index=2, path="$.thing.~"), + Token(kind=TOKEN_DOT, value=".", index=7, path="$.thing.~"), Token(kind=TOKEN_KEYS, value="~", index=8, path="$.thing.~"), ], ), @@ -1369,8 +1699,9 @@ class Case: path="$.thing[~]", want=[ Token(kind=TOKEN_ROOT, value="$", index=0, path="$.thing[~]"), - Token(kind=TOKEN_PROPERTY, value="thing", index=2, path="$.thing[~]"), - Token(kind=TOKEN_LIST_START, value="[", index=7, path="$.thing[~]"), + Token(kind=TOKEN_DOT, value=".", index=1, path="$.thing[~]"), + Token(kind=TOKEN_NAME, value="thing", index=2, path="$.thing[~]"), + Token(kind=TOKEN_LBRACKET, value="[", index=7, path="$.thing[~]"), Token(kind=TOKEN_KEYS, value="~", index=8, path="$.thing[~]"), Token(kind=TOKEN_RBRACKET, value="]", index=9, path="$.thing[~]"), ], @@ -1378,81 +1709,49 @@ class Case: Case( description="implicit root selector, name selector starts with `and`", path="anderson", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="anderson", index=0, path="anderson"), - ], + want=[Token(kind=TOKEN_NAME, value="anderson", index=0, path="anderson")], ), Case( description="implicit root selector, name selector starts with `or`", path="order", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="order", index=0, path="order"), - ], + want=[Token(kind=TOKEN_NAME, value="order", index=0, path="order")], ), Case( description="implicit root selector, name selector starts with `true`", path="trueblue", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="trueblue", index=0, path="trueblue"), - ], + want=[Token(kind=TOKEN_NAME, value="trueblue", index=0, path="trueblue")], ), Case( description="implicit root selector, name selector starts with `false`", path="falsehood", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, value="falsehood", index=0, path="falsehood" - ), - ], + want=[Token(kind=TOKEN_NAME, value="falsehood", index=0, path="falsehood")], ), Case( description="implicit root selector, name selector starts with `not`", path="nottingham", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, value="nottingham", index=0, path="nottingham" - ), - ], + want=[Token(kind=TOKEN_NAME, value="nottingham", index=0, path="nottingham")], ), Case( description="implicit root selector, name selector starts with `null`", path="nullable", - want=[ - Token(kind=TOKEN_BARE_PROPERTY, value="nullable", index=0, path="nullable"), - ], + want=[Token(kind=TOKEN_NAME, value="nullable", index=0, path="nullable")], ), Case( description="implicit root selector, name selector starts with `none`", path="nonexpert", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, value="nonexpert", index=0, path="nonexpert" - ), - ], + want=[Token(kind=TOKEN_NAME, value="nonexpert", index=0, path="nonexpert")], ), Case( description="implicit root selector, name selector starts with `undefined`", path="undefinedness", want=[ - Token( - kind=TOKEN_BARE_PROPERTY, - value="undefinedness", - index=0, - path="undefinedness", - ), + Token(kind=TOKEN_NAME, value="undefinedness", index=0, path="undefinedness") ], ), Case( description="implicit root selector, name selector starts with `missing`", path="missingly", - want=[ - Token( - kind=TOKEN_BARE_PROPERTY, - value="missingly", - index=0, - path="missingly", - ), - ], + want=[Token(kind=TOKEN_NAME, value="missingly", index=0, path="missingly")], ), ] diff --git a/tests/test_match_function.py b/tests/test_match_function.py deleted file mode 100644 index 5a9dab7..0000000 --- a/tests/test_match_function.py +++ /dev/null @@ -1,60 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: Union[Sequence[Any], Mapping[str, Any]] - - -TEST_CASES = [ - Case( - description="match a regex", - path="$.some[?match(@.thing, 'fo[a-z]')]", - data={"some": [{"thing": "foo"}]}, - want=[{"thing": "foo"}], - ), - Case( - description="regex with no match", - path="$.some[?match(@.thing, 'fo[a-z]')]", - data={"some": [{"thing": "foO"}]}, - want=[], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_match_function(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - assert path.findall(case.data) == case.want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_match_function_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[object]: - return await path.findall_async(case.data) - - assert asyncio.run(coro()) == case.want - - -# TODO: test error conditions diff --git a/tests/test_membership_operators.py b/tests/test_membership_operators.py new file mode 100644 index 0000000..cb44ab0 --- /dev/null +++ b/tests/test_membership_operators.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/membership_operators.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_membership_operators(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_membership_operators_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_membership_operators_fail_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_parse.py b/tests/test_parse.py index 96949a1..8415b74 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -16,10 +16,8 @@ class Case: TEST_CASES = [ Case(description="empty", path="", want="$"), Case(description="just root", path="$", want="$"), - Case(description="root dot", path="$.", want="$"), Case(description="implicit root dot property", path=".thing", want="$['thing']"), Case(description="root dot property", path="$.thing", want="$['thing']"), - Case(description="root bracket property", path="$[thing]", want="$['thing']"), Case( description="root double quoted property", path='$["thing"]', want="$['thing']" ), @@ -31,40 +29,24 @@ class Case: path="$['anything{!%']", want="$['anything{!%']", ), - Case(description="root dot bracket property", path="$.[thing]", want="$['thing']"), Case(description="root bracket index", path="$[1]", want="$[1]"), Case(description="root slice", path="$[1:-1]", want="$[1:-1:1]"), - Case(description="root dot slice", path="$.[1:-1]", want="$[1:-1:1]"), Case(description="root slice with step", path="$[1:-1:2]", want="$[1:-1:2]"), Case(description="root slice with empty start", path="$[:-1]", want="$[:-1:1]"), Case(description="root slice with empty stop", path="$[1:]", want="$[1::1]"), Case(description="root dot wild", path="$.*", want="$[*]"), Case(description="root bracket wild", path="$[*]", want="$[*]"), - Case(description="root dot bracket wild", path="$.[*]", want="$[*]"), - Case(description="root descend", path="$..", want="$.."), - Case(description="root dot descend", path="$...", want="$.."), Case(description="root selector list", path="$[1,2]", want="$[1, 2]"), - Case(description="root dot selector list", path="$.[1,2]", want="$[1, 2]"), Case( description="root selector list with slice", path="$[1,5:-1:1]", want="$[1, 5:-1:1]", ), - Case( - description="root selector list with properties", - path="$[some,thing]", - want="$['some', 'thing']", - ), Case( description="root selector list with quoted properties", path="$[\"some\",'thing']", want="$['some', 'thing']", ), - Case( - description="implicit root selector list with mixed selectors", - path='$["some",thing, 1, 2:-2:2]', - want="$['some', 'thing', 1, 2:-2:2]", - ), Case( description="filter self dot property", path="[?(@.thing)]", diff --git a/tests/test_pseudo_root_identifier.py b/tests/test_pseudo_root_identifier.py new file mode 100644 index 0000000..fff90bb --- /dev/null +++ b/tests/test_pseudo_root_identifier.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/pseudo_root_identifier.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_pseudo_root_identifier(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_pseudo_root_identifier_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_pseudo_root_identifier_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_query_intersection.py b/tests/test_query_intersection.py new file mode 100644 index 0000000..d4d05c7 --- /dev/null +++ b/tests/test_query_intersection.py @@ -0,0 +1,48 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/query_intersection.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_intersection_operator(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_intersection_operator_async( + env: JSONPathEnvironment, case: Case +) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_intersection_operator_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_query_union.py b/tests/test_query_union.py new file mode 100644 index 0000000..4ec12ef --- /dev/null +++ b/tests/test_query_union.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/query_union.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_union_operator(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_union_operator_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_query_union_operator_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_re.py b/tests/test_re.py deleted file mode 100644 index eca6037..0000000 --- a/tests/test_re.py +++ /dev/null @@ -1,69 +0,0 @@ -import asyncio -import dataclasses -import operator -from typing import Any -from typing import List -from typing import Mapping -from typing import Sequence -from typing import Union - -import pytest - -from jsonpath import JSONPathEnvironment - - -@dataclasses.dataclass -class Case: - description: str - path: str - data: Union[Sequence[Any], Mapping[str, Any]] - want: Union[Sequence[Any], Mapping[str, Any]] - - -TEST_CASES = [ - Case( - description="match a regex", - path="$.some[?(@.thing =~ /fo[a-z]/)]", - data={"some": [{"thing": "foo"}]}, - want=[{"thing": "foo"}], - ), - Case( - description="regex with no match", - path="$.some[?(@.thing =~ /fo[a-z]/)]", - data={"some": [{"thing": "foO"}]}, - want=[], - ), - Case( - description="case insensitive match", - path="$.some[?(@.thing =~ /fo[a-z]/i)]", - data={"some": [{"thing": "foO"}]}, - want=[{"thing": "foO"}], - ), - Case( - description="escaped slash", - path="$.some[?(@.thing =~ /fo\\\\[a-z]/)]", - data={"some": [{"thing": "fo\\b"}]}, - want=[{"thing": "fo\\b"}], - ), -] - - -@pytest.fixture() -def env() -> JSONPathEnvironment: - return JSONPathEnvironment() - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_filter_regex(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - assert path.findall(case.data) == case.want - - -@pytest.mark.parametrize("case", TEST_CASES, ids=operator.attrgetter("description")) -def test_filter_regex_async(env: JSONPathEnvironment, case: Case) -> None: - path = env.compile(case.path) - - async def coro() -> List[object]: - return await path.findall_async(case.data) - - assert asyncio.run(coro()) == case.want diff --git a/tests/test_regex_operator.py b/tests/test_regex_operator.py new file mode 100644 index 0000000..7849169 --- /dev/null +++ b/tests/test_regex_operator.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/regex_operator.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_regex_operator(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_regex_operator_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_regex_operator_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_singular_path_selector.py b/tests/test_singular_path_selector.py new file mode 100644 index 0000000..add9cee --- /dev/null +++ b/tests/test_singular_path_selector.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/singular_path_selector.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_singular_path_selector(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_singular_path_selector_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_singular_path_selector_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_strictness.py b/tests/test_strictness.py new file mode 100644 index 0000000..d4fe39a --- /dev/null +++ b/tests/test_strictness.py @@ -0,0 +1,75 @@ +import pytest + +from jsonpath import JSONPathEnvironment + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +def test_leading_whitespace(env: JSONPathEnvironment) -> None: + query = " $.a" + data = {"a": 1} + assert env.findall(query, data) == [1] + + +def test_trailing_whitespace(env: JSONPathEnvironment) -> None: + query = "$.a " + data = {"a": 1} + assert env.findall(query, data) == [1] + + +def test_index_as_object_name(env: JSONPathEnvironment) -> None: + query = "$.a[0]" + data = {"a": {"0": 1}} + assert env.findall(query, data) == [1] + + +def test_alternative_and(env: JSONPathEnvironment) -> None: + query = "$[?@.a and @.b]" + data = [{"a": True, "b": False}] + assert env.findall(query, data) == [{"a": True, "b": False}] + + +def test_alternative_or(env: JSONPathEnvironment) -> None: + query = "$[?@.a or @.c]" + data = [{"a": True, "b": False}, {"c": 99}] + assert env.findall(query, data) == [{"a": True, "b": False}, {"c": 99}] + + +def test_alternative_null(env: JSONPathEnvironment) -> None: + query = "$[?@.a==Null]" + data = [{"a": None, "d": "e"}, {"a": "c", "d": "f"}] + assert env.findall(query, data) == [{"a": None, "d": "e"}] + + +def test_none(env: JSONPathEnvironment) -> None: + query = "$[?@.a==None]" + data = [{"a": None, "d": "e"}, {"a": "c", "d": "f"}] + assert env.findall(query, data) == [{"a": None, "d": "e"}] + + +def test_implicit_root_identifier( + env: JSONPathEnvironment, +) -> None: + query = "a['p']" + data = { + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, + "b": ["j", "p", "q"], + } + + assert env.findall(query, data) == [{"q": [4, 5, 6]}] + + +def test_singular_path_selector_without_root_identifier( + env: JSONPathEnvironment, +) -> None: + query = "$.a[b[1]]" + data = { + "a": {"j": [1, 2, 3], "p": {"q": [4, 5, 6]}}, + "b": ["j", "p", "q"], + "c d": {"x": {"y": 1}}, + } + + assert env.findall(query, data) == [{"q": [4, 5, 6]}] diff --git a/tests/test_undefined.py b/tests/test_undefined.py new file mode 100644 index 0000000..3d9c1ef --- /dev/null +++ b/tests/test_undefined.py @@ -0,0 +1,46 @@ +import asyncio +import json +import operator + +import pytest + +from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError +from jsonpath import NodeList + +from ._cts_case import Case + + +@pytest.fixture() +def env() -> JSONPathEnvironment: + return JSONPathEnvironment(strict=False) + + +with open("tests/undefined.json", encoding="utf8") as fd: + data = [Case(**case) for case in json.load(fd)["tests"]] + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_undefined_keyword(env: JSONPathEnvironment, case: Case) -> None: + assert case.document is not None + nodes = NodeList(env.finditer(case.selector, case.document)) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_undefined_keyword_async(env: JSONPathEnvironment, case: Case) -> None: + async def coro() -> NodeList: + assert case.document is not None + it = await env.finditer_async(case.selector, case.document) + return NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) + case.assert_nodes(nodes) + + +@pytest.mark.parametrize("case", data, ids=operator.attrgetter("name")) +def test_comparison_to_undefined_fails_in_strict_mode(case: Case) -> None: + env = JSONPathEnvironment(strict=True) + + with pytest.raises(JSONPathSyntaxError): + env.compile(case.selector) diff --git a/tests/test_walk_filter_expression_tree.py b/tests/test_walk_filter_expression_tree.py index b8059ec..90acb6d 100644 --- a/tests/test_walk_filter_expression_tree.py +++ b/tests/test_walk_filter_expression_tree.py @@ -1,4 +1,5 @@ """Test that we can traverse filter expression trees.""" + import dataclasses import operator from typing import List @@ -6,10 +7,9 @@ import pytest import jsonpath -from jsonpath.filter import FilterExpression +from jsonpath.filter import BaseExpression from jsonpath.filter import walk from jsonpath.selectors import Filter as FilterSelector -from jsonpath.selectors import ListSelector @dataclasses.dataclass @@ -53,7 +53,7 @@ class Case: ] -def is_volatile(expr: FilterExpression) -> bool: +def is_volatile(expr: BaseExpression) -> bool: return any(expr.volatile for expr in walk(expr)) @@ -63,13 +63,11 @@ def test_is_volatile(case: Case) -> None: assert isinstance(path, jsonpath.JSONPath) filter_selectors: List[FilterSelector] = [] - for segment in path.selectors: - if isinstance(segment, ListSelector): - filter_selectors.extend( - selector - for selector in segment.items - if isinstance(selector, FilterSelector) - ) + + for segment in path.segments: + for selector in segment.selectors: + if isinstance(selector, FilterSelector): + filter_selectors.append(selector) assert len(filter_selectors) == 1 assert is_volatile(filter_selectors[0].expression) is case.want diff --git a/tests/undefined.json b/tests/undefined.json new file mode 100644 index 0000000..3e04c29 --- /dev/null +++ b/tests/undefined.json @@ -0,0 +1,92 @@ +{ + "tests": [ + { + "name": "explicit comparison to undefined", + "selector": "$[?@.a == undefined]", + "document": [ + { + "a": "b", + "d": "e" + }, + { + "b": "c", + "d": "f" + } + ], + "result": [ + { + "b": "c", + "d": "f" + } + ], + "result_paths": ["$[1]"], + "tags": ["extra"] + }, + { + "name": "explicit comparison to missing", + "selector": "$[?@.a == missing]", + "document": [ + { + "a": "b", + "d": "e" + }, + { + "b": "c", + "d": "f" + } + ], + "result": [ + { + "b": "c", + "d": "f" + } + ], + "result_paths": ["$[1]"], + "tags": ["extra"] + }, + { + "name": "explicit undefined is on the left", + "selector": "$[?undefined == @.a]", + "document": [ + { + "a": "b", + "d": "e" + }, + { + "b": "c", + "d": "f" + } + ], + "result": [ + { + "b": "c", + "d": "f" + } + ], + "result_paths": ["$[1]"], + "tags": ["extra"] + }, + { + "name": "not equal to undefined", + "selector": "$[?@.a != undefined]", + "document": [ + { + "a": "b", + "d": "e" + }, + { + "b": "c", + "d": "f" + } + ], + "result": [ + { + "a": "b", + "d": "e" + } + ], + "result_paths": ["$[0]"], + "tags": ["extra"] + } + ] +}