From 46409122f12b000d8f30d78b550389629bf24470 Mon Sep 17 00:00:00 2001 From: John Murner Date: Mon, 22 Dec 2025 09:28:37 -0500 Subject: [PATCH] Use wild card, *,to filter_fields --- CHANGELOG.md | 4 +- .../core/stac_fastapi/core/utilities.py | 168 ++++++++++++------ .../core/stac_fastapi/core/version.py | 2 +- stac_fastapi/elasticsearch/pyproject.toml | 8 +- .../stac_fastapi/elasticsearch/app.py | 2 +- .../stac_fastapi/elasticsearch/version.py | 2 +- stac_fastapi/opensearch/pyproject.toml | 8 +- .../opensearch/stac_fastapi/opensearch/app.py | 2 +- .../stac_fastapi/opensearch/version.py | 2 +- stac_fastapi/sfeos_helpers/pyproject.toml | 2 +- .../stac_fastapi/sfeos_helpers/version.py | 2 +- stac_fastapi/tests/api/test_api.py | 71 ++++++++ stac_fastapi/tests/resources/test_item.py | 4 +- 13 files changed, 207 insertions(+), 70 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e016fa980..a937c32ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,12 +17,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Updated -## [v6.8.1] - 2025-12-15 +## [v6.8.2] - 2025-12-19 ### Changed - Implemented a safety-first deletion policy for the catalogs endpoint to prevent accidental data loss. Collections are now never deleted through the catalogs route; they are only unlinked and automatically adopted by the root catalog if they become orphans. Collection data can only be permanently deleted via the explicit `/collections/{collection_id}` DELETE endpoint. This ensures a clear separation between container (catalog) deletion and content (collection/item) deletion, with data always being preserved through the catalogs API. +- `filter_fields` supports wildcard declarations (`*`) for including and excluding fields, where `properties.*.lat` include would include any extension's with a `lat` attribute. The same functionality is included in the exclude fields. If field is explicitly included in the include fields it cannot be explicitly excluded. + ### Removed - Removed `cascade` parameter from `DELETE /catalogs/{catalog_id}` endpoint. Collections are no longer deleted when a catalog is deleted; they are unlinked and adopted by root if orphaned. diff --git a/stac_fastapi/core/stac_fastapi/core/utilities.py b/stac_fastapi/core/stac_fastapi/core/utilities.py index 24a588856..d39dbf443 100644 --- a/stac_fastapi/core/stac_fastapi/core/utilities.py +++ b/stac_fastapi/core/stac_fastapi/core/utilities.py @@ -6,6 +6,7 @@ import logging import os +import re from typing import Any, Dict, List, Optional, Set, Union from stac_fastapi.types.stac import Item @@ -70,8 +71,6 @@ def bbox2polygon(b0: float, b1: float, b2: float, b3: float) -> List[List[List[f return [[[b0, b1], [b2, b1], [b2, b3], [b0, b3], [b0, b1]]] -# copied from stac-fastapi-pgstac -# https://github.com/stac-utils/stac-fastapi-pgstac/blob/26f6d918eb933a90833f30e69e21ba3b4e8a7151/stac_fastapi/pgstac/utils.py#L10-L116 def filter_fields( # noqa: C901 item: Union[Item, Dict[str, Any]], include: Optional[Set[str]] = None, @@ -87,78 +86,141 @@ def filter_fields( # noqa: C901 if not include and not exclude: return item - # Build a shallow copy of included fields on an item, or a sub-tree of an item + def match_pattern(pattern: str, key: str) -> bool: + """Check if a key matches a wildcard pattern.""" + regex_pattern = "^" + re.escape(pattern).replace(r"\*", ".*") + "$" + return bool(re.match(regex_pattern, key)) + + def get_matching_keys(source: Dict[str, Any], pattern: str) -> List[str]: + """Get all keys that match the pattern.""" + if not isinstance(source, dict): + return [] + return [key for key in source.keys() if match_pattern(pattern, key)] + def include_fields( source: Dict[str, Any], fields: Optional[Set[str]] ) -> Dict[str, Any]: + """Include only the specified fields from the source dictionary.""" if not fields: return source + def recursive_include( + source: Dict[str, Any], path_parts: List[str] + ) -> Dict[str, Any]: + """Recursively include fields matching the pattern path.""" + if not path_parts: + return source + + if not isinstance(source, dict): + return {} + + current_pattern = path_parts[0] + remaining_parts = path_parts[1:] + + matching_keys = get_matching_keys(source, current_pattern) + + if not matching_keys: + return {} + + result: Dict[str, Any] = {} + for key in matching_keys: + if remaining_parts: + if isinstance(source[key], dict): + value = recursive_include(source[key], remaining_parts) + if value: + result[key] = value + else: + result[key] = source[key] + + return result + clean_item: Dict[str, Any] = {} for key_path in fields or []: key_path_parts = key_path.split(".") - key_root = key_path_parts[0] - if key_root in source: - if isinstance(source[key_root], dict) and len(key_path_parts) > 1: - # The root of this key path on the item is a dict, and the - # key path indicates a sub-key to be included. Walk the dict - # from the root key and get the full nested value to include. - value = include_fields( - source[key_root], fields={".".join(key_path_parts[1:])} - ) - - if isinstance(clean_item.get(key_root), dict): - # A previously specified key and sub-keys may have been included - # already, so do a deep merge update if the root key already exists. - dict_deep_update(clean_item[key_root], value) - else: - # The root key does not exist, so add it. Fields - # extension only allows nested referencing on dicts, so - # this won't overwrite anything. - clean_item[key_root] = value + included_values = recursive_include(source, key_path_parts) + + for key, value in included_values.items(): + if isinstance(clean_item.get(key), dict) and isinstance(value, dict): + dict_deep_update(clean_item[key], value) else: - # The item value to include is not a dict, or, it is a dict but the - # key path is for the whole value, not a sub-key. Include the entire - # value in the cleaned item. - clean_item[key_root] = source[key_root] - else: - # The key, or root key of a multi-part key, is not present in the item, - # so it is ignored - pass + clean_item[key] = value + return clean_item - # For an item built up for included fields, remove excluded fields. This - # modifies `source` in place. - def exclude_fields(source: Dict[str, Any], fields: Optional[Set[str]]) -> None: - for key_path in fields or []: - key_path_part = key_path.split(".") - key_root = key_path_part[0] - if key_root in source: - if isinstance(source[key_root], dict) and len(key_path_part) > 1: - # Walk the nested path of this key to remove the leaf-key - exclude_fields( - source[key_root], fields={".".join(key_path_part[1:])} - ) - # If, after removing the leaf-key, the root is now an empty - # dict, remove it entirely - if not source[key_root]: - del source[key_root] + def exclude_fields( + source: Dict[str, Any], + fields: Optional[Set[str]], + included_fields: Optional[Set[str]] = None, + ) -> None: + """Exclude fields from source, but preserve any fields that were explicitly included.""" + + def is_path_included(current_path: str) -> bool: + """Check if a path matches any of the included field patterns.""" + if not included_fields: + return False + + for include_pattern in included_fields: + include_parts = include_pattern.split(".") + current_parts = current_path.split(".") + + # Check if current path matches the include pattern + if len(include_parts) != len(current_parts): + continue + + match = True + for include_part, current_part in zip(include_parts, current_parts): + if not match_pattern(include_part, current_part): + match = False + break + + if match: + return True + + return False + + def recursive_exclude( + source: Dict[str, Any], path_parts: List[str], current_path: str = "" + ) -> None: + """Recursively exclude fields matching the pattern path.""" + if not path_parts or not isinstance(source, dict): + return + + current_pattern = path_parts[0] + remaining_parts = path_parts[1:] + + matching_keys = get_matching_keys(source, current_pattern) + + for key in list(matching_keys): + if key not in source: + continue + + # Build the full path for this key + full_path = f"{current_path}.{key}" if current_path else key + + # Skip exclusion if this path was explicitly included + if is_path_included(full_path): + continue + + if remaining_parts: + if isinstance(source[key], dict): + recursive_exclude(source[key], remaining_parts, full_path) + if not source[key]: + del source[key] else: - # The key's value is not a dict, or there is no sub-key to remove. The - # entire key can be removed from the source. - source.pop(key_root, None) + source.pop(key, None) + + for key_path in fields or []: + key_path_parts = key_path.split(".") + recursive_exclude(source, key_path_parts) - # Coalesce incoming type to a dict item = dict(item) clean_item = include_fields(item, include) - # If, after including all the specified fields, there are no included properties, - # return just id and collection. if not clean_item: return Item({"id": item["id"], "collection": item["collection"]}) - exclude_fields(clean_item, exclude) + exclude_fields(clean_item, exclude, include) return Item(**clean_item) diff --git a/stac_fastapi/core/stac_fastapi/core/version.py b/stac_fastapi/core/stac_fastapi/core/version.py index 8a68b5cba..ace8dec1b 100644 --- a/stac_fastapi/core/stac_fastapi/core/version.py +++ b/stac_fastapi/core/stac_fastapi/core/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "6.8.1" +__version__ = "6.8.2" diff --git a/stac_fastapi/elasticsearch/pyproject.toml b/stac_fastapi/elasticsearch/pyproject.toml index 03ef3d80c..352ff5a6d 100644 --- a/stac_fastapi/elasticsearch/pyproject.toml +++ b/stac_fastapi/elasticsearch/pyproject.toml @@ -28,8 +28,8 @@ keywords = [ ] dynamic = ["version"] dependencies = [ - "stac-fastapi-core==6.8.1", - "sfeos-helpers==6.8.1", + "stac-fastapi-core==6.8.2", + "sfeos-helpers==6.8.2", "elasticsearch[async]~=8.19.1", "uvicorn~=0.23.0", "starlette>=0.35.0,<0.36.0", @@ -48,7 +48,7 @@ dev = [ "httpx>=0.24.0,<0.28.0", "redis~=6.4.0", "retry~=0.9.2", - "stac-fastapi-core[redis]==6.8.1", + "stac-fastapi-core[redis]==6.8.2", ] docs = [ "mkdocs~=1.4.0", @@ -58,7 +58,7 @@ docs = [ "retry~=0.9.2", ] redis = [ - "stac-fastapi-core[redis]==6.8.1", + "stac-fastapi-core[redis]==6.8.2", ] server = [ "uvicorn[standard]~=0.23.0", diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 915775f4d..e51e42cd8 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -244,7 +244,7 @@ app_config = { "title": os.getenv("STAC_FASTAPI_TITLE", "stac-fastapi-elasticsearch"), "description": os.getenv("STAC_FASTAPI_DESCRIPTION", "stac-fastapi-elasticsearch"), - "api_version": os.getenv("STAC_FASTAPI_VERSION", "6.8.1"), + "api_version": os.getenv("STAC_FASTAPI_VERSION", "6.8.2"), "settings": settings, "extensions": extensions, "client": CoreClient( diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py index 8a68b5cba..ace8dec1b 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "6.8.1" +__version__ = "6.8.2" diff --git a/stac_fastapi/opensearch/pyproject.toml b/stac_fastapi/opensearch/pyproject.toml index 7dc9349b6..bd2586692 100644 --- a/stac_fastapi/opensearch/pyproject.toml +++ b/stac_fastapi/opensearch/pyproject.toml @@ -28,8 +28,8 @@ keywords = [ ] dynamic = ["version"] dependencies = [ - "stac-fastapi-core==6.8.1", - "sfeos-helpers==6.8.1", + "stac-fastapi-core==6.8.2", + "sfeos-helpers==6.8.2", "opensearch-py~=2.8.0", "opensearch-py[async]~=2.8.0", "uvicorn~=0.23.0", @@ -49,7 +49,7 @@ dev = [ "httpx>=0.24.0,<0.28.0", "redis~=6.4.0", "retry~=0.9.2", - "stac-fastapi-core[redis]==6.8.1", + "stac-fastapi-core[redis]==6.8.2", ] docs = [ "mkdocs~=1.4.0", @@ -57,7 +57,7 @@ docs = [ "pdocs~=1.2.0", ] redis = [ - "stac-fastapi-core[redis]==6.8.1", + "stac-fastapi-core[redis]==6.8.2", ] server = [ "uvicorn[standard]~=0.23.0", diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py index ee5524469..9722bb117 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -243,7 +243,7 @@ app_config = { "title": os.getenv("STAC_FASTAPI_TITLE", "stac-fastapi-opensearch"), "description": os.getenv("STAC_FASTAPI_DESCRIPTION", "stac-fastapi-opensearch"), - "api_version": os.getenv("STAC_FASTAPI_VERSION", "6.8.1"), + "api_version": os.getenv("STAC_FASTAPI_VERSION", "6.8.2"), "settings": settings, "extensions": extensions, "client": CoreClient( diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py index 8a68b5cba..ace8dec1b 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "6.8.1" +__version__ = "6.8.2" diff --git a/stac_fastapi/sfeos_helpers/pyproject.toml b/stac_fastapi/sfeos_helpers/pyproject.toml index c893211e8..2e52202d7 100644 --- a/stac_fastapi/sfeos_helpers/pyproject.toml +++ b/stac_fastapi/sfeos_helpers/pyproject.toml @@ -29,7 +29,7 @@ keywords = [ ] dynamic = ["version"] dependencies = [ - "stac-fastapi.core==6.8.1", + "stac-fastapi.core==6.8.2", ] [project.urls] diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/version.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/version.py index 8a68b5cba..ace8dec1b 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/version.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "6.8.1" +__version__ = "6.8.2" diff --git a/stac_fastapi/tests/api/test_api.py b/stac_fastapi/tests/api/test_api.py index 587fcb854..3a4b8c79a 100644 --- a/stac_fastapi/tests/api/test_api.py +++ b/stac_fastapi/tests/api/test_api.py @@ -176,6 +176,77 @@ async def test_app_fields_extension_query(app_client, ctx, txn_client): assert set(resp_json["features"][0]["properties"]) == set(["datetime", "proj:epsg"]) +@pytest.mark.asyncio +async def test_app_fields_extension_wildcard_query(app_client, ctx, txn_client): + item = ctx.item + include = {"include": ["properties.*.lat", "assets.*.href"]} + resp = await app_client.post( + "/search", + json={ + "query": {"proj:epsg": {"gte": item["properties"]["proj:epsg"]}}, + "collections": ["test-collection"], + "fields": include, + }, + ) + assert resp.status_code == 200 + include_resp_json = resp.json() + for feature in include_resp_json["features"]: + assert len(feature["properties"]) == 1 + assert feature["properties"]["proj:centroid"].get("lat", None) + for assets_values in feature["assets"].values(): + assert len(assets_values) == 1 + assert "href" in assets_values + + exclude = {"exclude": ["properties.eo:bands", "properties.*.lat", "assets.*.href"]} + resp = await app_client.post( + "/search", + json={ + "query": {"proj:epsg": {"gte": item["properties"]["proj:epsg"]}}, + "collections": ["test-collection"], + "fields": exclude, + }, + ) + + assert resp.status_code == 200 + exclude_resp_json = resp.json() + for feature in exclude_resp_json["features"]: + assert "eo:bands" not in feature["properties"] + assert not feature["properties"]["proj:centroid"].get("lat", None) + for assets_values in feature["assets"].values(): + assert "href" not in assets_values + + fields = {"include": ["properties.*.lat"], "exclude": ["properties.*.lat"]} + + resp = await app_client.post( + "/search", + json={ + "query": {"proj:epsg": {"gte": item["properties"]["proj:epsg"]}}, + "collections": ["test-collection"], + "fields": fields, + }, + ) + exclude_resp_json = resp.json() + for feature in exclude_resp_json["features"]: + assert feature["properties"]["proj:centroid"].get("lat", None) + + fields = { + "include": ["properties.proj:centroid.lat"], + "exclude": ["properties.*.lat"], + } + + resp = await app_client.post( + "/search", + json={ + "query": {"proj:epsg": {"gte": item["properties"]["proj:epsg"]}}, + "collections": ["test-collection"], + "fields": fields, + }, + ) + exclude_resp_json = resp.json() + for feature in exclude_resp_json["features"]: + assert feature["properties"]["proj:centroid"].get("lat", None) + + @pytest.mark.asyncio async def test_app_fields_extension_no_properties_get(app_client, ctx, txn_client): resp = await app_client.get( diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index 4231f1029..2d25fb5a3 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -855,7 +855,9 @@ async def test_field_extension_exclude_and_include(app_client, ctx): resp = await app_client.post("/search", json=body) resp_json = resp.json() - assert "properties" not in resp_json["features"][0] + assert ( + resp_json["features"][0]["properties"].get("eo:cloud_cover", None) is not None + ) @pytest.mark.asyncio