From fc0c3ba341dd47dec217944f45309830b2734a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20Bult=C3=A9?= Date: Thu, 23 Apr 2026 14:58:06 +0200 Subject: [PATCH 1/3] feat(search): refactor facets declaration --- udata/core/dataservices/search.py | 1 - udata/core/dataset/search.py | 1 - udata/core/reuse/search.py | 1 - udata/core/topic/search.py | 1 - udata/search/adapter.py | 19 +- udata/search/query.py | 4 +- udata/tests/search/test_adapter.py | 16 +- udata/tests/search/test_services.py | 7 + udata_search_service/search_clients.py | 852 +++++++------------------ udata_search_service/services.py | 63 +- 10 files changed, 337 insertions(+), 628 deletions(-) diff --git a/udata/core/dataservices/search.py b/udata/core/dataservices/search.py index b9995da0bc..4abe7e039c 100644 --- a/udata/core/dataservices/search.py +++ b/udata/core/dataservices/search.py @@ -83,7 +83,6 @@ class DataserviceSearch(ModelSearchAdapter): model = Dataservice service_class = DataserviceService consumer_class = DataserviceConsumer - configurable_size_facets = ["organization_id_with_name"] sorts = {"created": "created_at", "views": "views", "followers": "followers"} diff --git a/udata/core/dataset/search.py b/udata/core/dataset/search.py index fa73af9746..947d211103 100644 --- a/udata/core/dataset/search.py +++ b/udata/core/dataset/search.py @@ -34,7 +34,6 @@ class DatasetSearch(ModelSearchAdapter): model = Dataset service_class = DatasetService consumer_class = DatasetConsumer - configurable_size_facets = ["organization_id_with_name"] sorts = { "created": "created_at_internal", diff --git a/udata/core/reuse/search.py b/udata/core/reuse/search.py index fac57f81de..b10e7ec3c8 100644 --- a/udata/core/reuse/search.py +++ b/udata/core/reuse/search.py @@ -25,7 +25,6 @@ class ReuseSearch(ModelSearchAdapter): model = Reuse service_class = ReuseService consumer_class = ReuseConsumer - configurable_size_facets = ["organization_id_with_name"] sorts = { "created": "created_at", diff --git a/udata/core/topic/search.py b/udata/core/topic/search.py index 550a9344b8..316c2809e8 100644 --- a/udata/core/topic/search.py +++ b/udata/core/topic/search.py @@ -22,7 +22,6 @@ class TopicSearch(ModelSearchAdapter): model = Topic service_class = TopicService consumer_class = TopicConsumer - configurable_size_facets = ["organization_id_with_name"] sorts = { "name": "name", diff --git a/udata/search/adapter.py b/udata/search/adapter.py index cca5fe844a..b65ee58804 100644 --- a/udata/search/adapter.py +++ b/udata/search/adapter.py @@ -3,6 +3,7 @@ from flask_restx.reqparse import RequestParser from udata.search.query import SearchQuery +from udata_search_service.search_clients import TermsFacet log = logging.getLogger(__name__) @@ -13,7 +14,6 @@ class ModelSearchAdapter: model = None sorts = None filters = {} - configurable_size_facets = [] service_class = None consumer_class = None @@ -58,13 +58,16 @@ def as_request_parser(cls, paginate=True, store_missing: bool = True): parser.add_argument( "page_size", type=int, location="args", default=20, help="The page size" ) - for facet_name in cls.configurable_size_facets: - parser.add_argument( - f"facet_size__{facet_name}", - type=int, - location="args", - help=f"Number of {facet_name} facet values to return", - ) + if cls.service_class: + for facet in cls.service_class.facets: + if isinstance(facet, TermsFacet): + parser.add_argument( + f"facet_size__{facet.name}", + type=int, + location="args", + store_missing=store_missing, + help=f"Number of {facet.name} facet values to return", + ) return parser @classmethod diff --git a/udata/search/query.py b/udata/search/query.py index 48c5407c4b..a7767883ff 100644 --- a/udata/search/query.py +++ b/udata/search/query.py @@ -49,7 +49,9 @@ def __init__(self, params): self._facet_sizes = {} for key in [k for k in list(params.keys()) if k.startswith("facet_size__")]: facet_name = key[len("facet_size__") :] - self._facet_sizes[facet_name] = parse_facet_size(key, params.pop(key), max_facet_size) + raw = params.pop(key) + if raw is not None: + self._facet_sizes[facet_name] = parse_facet_size(key, raw, max_facet_size) self._filters = {} self.extract_filters(params) diff --git a/udata/tests/search/test_adapter.py b/udata/tests/search/test_adapter.py index 412bdf6fe4..5013de6afa 100644 --- a/udata/tests/search/test_adapter.py +++ b/udata/tests/search/test_adapter.py @@ -562,13 +562,21 @@ def test_serialize_includes_producer_type_user(self): class ConfigurableSizeFacetsTest(APITestCase): def test_facet_size_params_in_request_parser(self): + from udata_search_service.search_clients import TermsFacet + for adapter in [DatasetSearch, ReuseSearch, DataserviceSearch]: parser = adapter.as_request_parser() arg_names = [arg.name for arg in parser.args] - assert "facet_size__organization_id_with_name" in arg_names, ( - f"{adapter.__name__} parser is missing facet_size__organization_id_with_name — " - f"it would be silently dropped from API requests" - ) + expected = [ + f"facet_size__{f.name}" + for f in adapter.service_class.facets + if isinstance(f, TermsFacet) + ] + for expected_arg in expected: + assert expected_arg in arg_names, ( + f"{adapter.__name__} parser is missing {expected_arg} — " + f"it would be silently dropped from API requests" + ) def test_facet_size_param_is_int(self): parser = DatasetSearch.as_request_parser() diff --git a/udata/tests/search/test_services.py b/udata/tests/search/test_services.py index 6a8d1be3ed..af33fd0ab6 100644 --- a/udata/tests/search/test_services.py +++ b/udata/tests/search/test_services.py @@ -28,6 +28,13 @@ def test_empty_facet_sizes_when_not_provided(): assert kwargs["facet_sizes"] == {} +def test_facets_list_passed_to_client_query(): + service, mock_client = make_service() + service.search(base_filters()) + _, kwargs = mock_client.query_datasets.call_args + assert kwargs["facets"] == DatasetService.facets + + def test_facet_sizes_not_passed_as_filter(): service, mock_client = make_service() filters = {**base_filters(), "facet_sizes": {"tag": 100}} diff --git a/udata_search_service/search_clients.py b/udata_search_service/search_clients.py index 0445192469..109dd1954a 100644 --- a/udata_search_service/search_clients.py +++ b/udata_search_service/search_clients.py @@ -1,4 +1,5 @@ import logging +from dataclasses import dataclass from datetime import datetime, timezone from typing import List, Optional, Tuple @@ -30,6 +31,72 @@ log = logging.getLogger(__name__) + +@dataclass +class TermsFacet: + name: str + es_field: str + + +@dataclass +class DateRangeFacet: + name: str + es_field: str + + +DATE_RANGES = [ + {"key": "last_30_days", "from": "now-30d/d"}, + {"key": "last_12_months", "from": "now-12M/d"}, + {"key": "last_3_years", "from": "now-3y/d"}, +] + + +def _parse_filtered_facets(aggregations, facets: list) -> dict: + """Parse ES aggregations built with the filter-wrapper pattern into a facets dict.""" + result = {} + for facet in facets: + if isinstance(facet, TermsFacet): + filtered_name = f"{facet.name}_filtered" + total_name = f"{facet.name}_total" + if hasattr(aggregations, filtered_name): + fa = getattr(aggregations, filtered_name) + if hasattr(fa, facet.name): + buckets = [ + {"name": b.key, "count": b.doc_count} + for b in getattr(fa, facet.name).buckets + ] + total = int(fa.total.value) if hasattr(fa, "total") else 0 + result[facet.name] = [{"name": "all", "count": total}] + buckets + elif hasattr(aggregations, facet.name): + buckets = [ + {"name": b.key, "count": b.doc_count} + for b in getattr(aggregations, facet.name).buckets + ] + total = ( + int(getattr(aggregations, total_name).value) + if hasattr(aggregations, total_name) + else 0 + ) + result[facet.name] = [{"name": "all", "count": total}] + buckets + elif isinstance(facet, DateRangeFacet): + if hasattr(aggregations, "last_update_filtered"): + fa = aggregations.last_update_filtered + buckets = [{"name": b.key, "count": b.doc_count} for b in fa.last_update.buckets] + total = int(fa.total.value) if hasattr(fa, "total") else 0 + result["last_update"] = [{"name": "all", "count": total}] + buckets + elif hasattr(aggregations, "last_update"): + buckets = [ + {"name": b.key, "count": b.doc_count} for b in aggregations.last_update.buckets + ] + total = ( + int(aggregations.last_update_total.value) + if hasattr(aggregations, "last_update_total") + else 0 + ) + result["last_update"] = [{"name": "all", "count": total}] + buckets + return result + + SEARCH_SYNONYMS = [ "AMD, administrateur ministériel des données, AMDAC", "lolf, loi de finance", @@ -327,6 +394,7 @@ def query_organizations( filters: dict, sort: Optional[str] = None, facet_sizes: dict = {}, + facets: list = [], ) -> Tuple[int, List[dict], dict]: search = SearchableOrganization.search() @@ -383,12 +451,11 @@ def query_organizations( ) ) - search.aggs.bucket( - "producer_type", - "terms", - field="producer_type", - size=facet_sizes.get("producer_type", 50), - ) + for facet in facets: + if isinstance(facet, TermsFacet): + search.aggs.bucket( + facet.name, "terms", field=facet.es_field, size=facet_sizes.get(facet.name, 50) + ) search.aggs.metric("total_count", "cardinality", field="_id") if post_filters: @@ -408,23 +475,22 @@ def query_organizations( ) res = [hit.to_dict(skip_empty=False) for hit in response.hits] - facets = {} + facets_result = {} if hasattr(response, "aggregations"): total_count = ( int(response.aggregations.total_count.value) if hasattr(response.aggregations, "total_count") else 0 ) - - for agg_name in ["producer_type"]: - if hasattr(response.aggregations, agg_name): + for facet in facets: + if isinstance(facet, TermsFacet) and hasattr(response.aggregations, facet.name): buckets = [ - {"name": bucket.key, "count": bucket.doc_count} - for bucket in response.aggregations[agg_name].buckets + {"name": b.key, "count": b.doc_count} + for b in response.aggregations[facet.name].buckets ] - facets[agg_name] = [{"name": "all", "count": total_count}] + buckets + facets_result[facet.name] = [{"name": "all", "count": total_count}] + buckets - return results_number, res, facets + return results_number, res, facets_result def query_topics( self, @@ -434,6 +500,7 @@ def query_topics( filters: dict, sort: Optional[str] = None, facet_sizes: dict = {}, + facets: list = [], ) -> Tuple[int, List[dict], dict]: search = SearchableTopic.search() @@ -505,87 +572,34 @@ def get_filters_except(exclude_key): filters_list.append(filter_dict[key]) return filters_list - tag_filters = get_filters_except("tag") - if tag_filters: - tag_agg = search.aggs.bucket( - "tag_filtered", "filter", filter=query.Bool(must=tag_filters) - ) - tag_agg.bucket("tag", "terms", field="tags", size=facet_sizes.get("tag", 50)) - tag_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket("tag", "terms", field="tags", size=facet_sizes.get("tag", 50)) - search.aggs.metric("tag_total", "cardinality", field="_id") - - org_filters = get_filters_except("organization_id_with_name") - if org_filters: - org_agg = search.aggs.bucket( - "organization_id_with_name_filtered", "filter", filter=query.Bool(must=org_filters) - ) - org_agg.bucket( - "organization_id_with_name", - "terms", - field="organization_with_id", - size=facet_sizes.get("organization_id_with_name", 50), - ) - org_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "organization_id_with_name", - "terms", - field="organization_with_id", - size=facet_sizes.get("organization_id_with_name", 50), - ) - search.aggs.metric("organization_id_with_name_total", "cardinality", field="_id") - - producer_filters = get_filters_except("producer_type") - if producer_filters: - producer_agg = search.aggs.bucket( - "producer_type_filtered", "filter", filter=query.Bool(must=producer_filters) - ) - producer_agg.bucket( - "producer_type", - "terms", - field="producer_type", - size=facet_sizes.get("producer_type", 50), - ) - producer_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "producer_type", - "terms", - field="producer_type", - size=facet_sizes.get("producer_type", 50), - ) - search.aggs.metric("producer_type_total", "cardinality", field="_id") - - last_update_filters = get_filters_except("last_update_range") - if last_update_filters: - last_update_agg = search.aggs.bucket( - "last_update_filtered", "filter", filter=query.Bool(must=last_update_filters) - ) - last_update_agg.bucket( - "last_update", - "date_range", - field="last_modified", - ranges=[ - {"key": "last_30_days", "from": "now-30d/d"}, - {"key": "last_12_months", "from": "now-12M/d"}, - {"key": "last_3_years", "from": "now-3y/d"}, - ], - ) - last_update_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "last_update", - "date_range", - field="last_modified", - ranges=[ - {"key": "last_30_days", "from": "now-30d/d"}, - {"key": "last_12_months", "from": "now-12M/d"}, - {"key": "last_3_years", "from": "now-3y/d"}, - ], - ) - search.aggs.metric("last_update_total", "cardinality", field="_id") + for facet in facets: + if isinstance(facet, TermsFacet): + size = facet_sizes.get(facet.name, 50) + f = get_filters_except(facet.name) + if f: + agg = search.aggs.bucket( + f"{facet.name}_filtered", "filter", filter=query.Bool(must=f) + ) + agg.bucket(facet.name, "terms", field=facet.es_field, size=size) + agg.metric("total", "cardinality", field="_id") + else: + search.aggs.bucket(facet.name, "terms", field=facet.es_field, size=size) + search.aggs.metric(f"{facet.name}_total", "cardinality", field="_id") + elif isinstance(facet, DateRangeFacet): + f = get_filters_except("last_update_range") + if f: + agg = search.aggs.bucket( + "last_update_filtered", "filter", filter=query.Bool(must=f) + ) + agg.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) + agg.metric("total", "cardinality", field="_id") + else: + search.aggs.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) + search.aggs.metric("last_update_total", "cardinality", field="_id") post_filters = [] for key, value in filter_dict.items(): @@ -619,44 +633,11 @@ def get_filters_except(exclude_key): ) res = [hit.to_dict(skip_empty=False) for hit in response.hits] - facets = {} + facets_result = {} if hasattr(response, "aggregations"): - facet_configs = [ - ("tag", "tag_filtered", "tag_total"), - ( - "organization_id_with_name", - "organization_id_with_name_filtered", - "organization_id_with_name_total", - ), - ("producer_type", "producer_type_filtered", "producer_type_total"), - ("last_update", "last_update_filtered", "last_update_total"), - ] - - for facet_name, filtered_name, total_name in facet_configs: - if hasattr(response.aggregations, filtered_name): - filtered_agg = getattr(response.aggregations, filtered_name) - if hasattr(filtered_agg, facet_name): - buckets = [ - {"name": bucket.key, "count": bucket.doc_count} - for bucket in getattr(filtered_agg, facet_name).buckets - ] - total_count = ( - int(filtered_agg.total.value) if hasattr(filtered_agg, "total") else 0 - ) - facets[facet_name] = [{"name": "all", "count": total_count}] + buckets - elif hasattr(response.aggregations, facet_name): - buckets = [ - {"name": bucket.key, "count": bucket.doc_count} - for bucket in getattr(response.aggregations, facet_name).buckets - ] - total_count = ( - int(getattr(response.aggregations, total_name).value) - if hasattr(response.aggregations, total_name) - else 0 - ) - facets[facet_name] = [{"name": "all", "count": total_count}] + buckets + facets_result = _parse_filtered_facets(response.aggregations, facets) - return results_number, res, facets + return results_number, res, facets_result def query_datasets( self, @@ -666,6 +647,7 @@ def query_datasets( filters: dict, sort: Optional[str] = None, facet_sizes: dict = {}, + facets: list = [], ) -> Tuple[int, List[dict], dict]: search = SearchableDataset.search() @@ -841,220 +823,34 @@ def get_filters_except(exclude_key): filters_list.append(filter_dict[key]) return filters_list - format_filters = get_filters_except("format_family") - if format_filters: - format_agg = search.aggs.bucket( - "format_family_filtered", "filter", filter=query.Bool(must=format_filters) - ) - format_agg.bucket( - "format_family", - "terms", - field="format_family", - size=facet_sizes.get("format_family", 50), - ) - format_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "format_family", - "terms", - field="format_family", - size=facet_sizes.get("format_family", 50), - ) - search.aggs.metric("format_family_total", "cardinality", field="_id") - - access_filters = get_filters_except("access_type") - if access_filters: - access_agg = search.aggs.bucket( - "access_type_filtered", "filter", filter=query.Bool(must=access_filters) - ) - access_agg.bucket( - "access_type", "terms", field="access_type", size=facet_sizes.get("access_type", 50) - ) - access_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "access_type", "terms", field="access_type", size=facet_sizes.get("access_type", 50) - ) - search.aggs.metric("access_type_total", "cardinality", field="_id") - - producer_filters = get_filters_except("producer_type") - if producer_filters: - producer_agg = search.aggs.bucket( - "producer_type_filtered", "filter", filter=query.Bool(must=producer_filters) - ) - producer_agg.bucket( - "producer_type", - "terms", - field="producer_type", - size=facet_sizes.get("producer_type", 50), - ) - producer_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "producer_type", - "terms", - field="producer_type", - size=facet_sizes.get("producer_type", 50), - ) - search.aggs.metric("producer_type_total", "cardinality", field="_id") - - org_name_filters = get_filters_except("organization_id_with_name") - if org_name_filters: - org_name_agg = search.aggs.bucket( - "organization_id_with_name_filtered", - "filter", - filter=query.Bool(must=org_name_filters), - ) - org_name_agg.bucket( - "organization_id_with_name", - "terms", - field="organization_with_id", - size=facet_sizes.get("organization_id_with_name", 50), - ) - org_name_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "organization_id_with_name", - "terms", - field="organization_with_id", - size=facet_sizes.get("organization_id_with_name", 50), - ) - search.aggs.metric("organization_id_with_name_total", "cardinality", field="_id") - - last_update_filters = get_filters_except("last_update_range") - if last_update_filters: - last_update_agg = search.aggs.bucket( - "last_update_filtered", "filter", filter=query.Bool(must=last_update_filters) - ) - last_update_agg.bucket( - "last_update", - "date_range", - field="last_update", - ranges=[ - {"key": "last_30_days", "from": "now-30d/d"}, - {"key": "last_12_months", "from": "now-12M/d"}, - {"key": "last_3_years", "from": "now-3y/d"}, - ], - ) - last_update_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "last_update", - "date_range", - field="last_update", - ranges=[ - {"key": "last_30_days", "from": "now-30d/d"}, - {"key": "last_12_months", "from": "now-12M/d"}, - {"key": "last_3_years", "from": "now-3y/d"}, - ], - ) - search.aggs.metric("last_update_total", "cardinality", field="_id") - - tag_filters = get_filters_except("tag") - if tag_filters: - tag_agg = search.aggs.bucket( - "tag_filtered", "filter", filter=query.Bool(must=tag_filters) - ) - tag_agg.bucket("tag", "terms", field="tags", size=facet_sizes.get("tag", 50)) - tag_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket("tag", "terms", field="tags", size=facet_sizes.get("tag", 50)) - search.aggs.metric("tag_total", "cardinality", field="_id") - - license_filters = get_filters_except("license") - if license_filters: - license_agg = search.aggs.bucket( - "license_filtered", "filter", filter=query.Bool(must=license_filters) - ) - license_agg.bucket( - "license", "terms", field="license", size=facet_sizes.get("license", 50) - ) - license_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "license", "terms", field="license", size=facet_sizes.get("license", 50) - ) - search.aggs.metric("license_total", "cardinality", field="_id") - - format_filters = get_filters_except("format") - if format_filters: - format_agg = search.aggs.bucket( - "format_filtered", "filter", filter=query.Bool(must=format_filters) - ) - format_agg.bucket("format", "terms", field="format", size=facet_sizes.get("format", 50)) - format_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "format", "terms", field="format", size=facet_sizes.get("format", 50) - ) - search.aggs.metric("format_total", "cardinality", field="_id") - - schema_filters = get_filters_except("schema") - if schema_filters: - schema_agg = search.aggs.bucket( - "schema_filtered", "filter", filter=query.Bool(must=schema_filters) - ) - schema_agg.bucket("schema", "terms", field="schema", size=facet_sizes.get("schema", 50)) - schema_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "schema", "terms", field="schema", size=facet_sizes.get("schema", 50) - ) - search.aggs.metric("schema_total", "cardinality", field="_id") - - geozone_filters = get_filters_except("geozone") - if geozone_filters: - geozone_agg = search.aggs.bucket( - "geozone_filtered", "filter", filter=query.Bool(must=geozone_filters) - ) - geozone_agg.bucket( - "geozone", "terms", field="geozones", size=facet_sizes.get("geozone", 50) - ) - geozone_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "geozone", "terms", field="geozones", size=facet_sizes.get("geozone", 50) - ) - search.aggs.metric("geozone_total", "cardinality", field="_id") - - granularity_filters = get_filters_except("granularity") - if granularity_filters: - granularity_agg = search.aggs.bucket( - "granularity_filtered", "filter", filter=query.Bool(must=granularity_filters) - ) - granularity_agg.bucket( - "granularity", "terms", field="granularity", size=facet_sizes.get("granularity", 50) - ) - granularity_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "granularity", "terms", field="granularity", size=facet_sizes.get("granularity", 50) - ) - search.aggs.metric("granularity_total", "cardinality", field="_id") - - badge_filters = get_filters_except("badge") - if badge_filters: - badge_agg = search.aggs.bucket( - "badge_filtered", "filter", filter=query.Bool(must=badge_filters) - ) - badge_agg.bucket("badge", "terms", field="badges", size=facet_sizes.get("badge", 50)) - badge_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket("badge", "terms", field="badges", size=facet_sizes.get("badge", 50)) - search.aggs.metric("badge_total", "cardinality", field="_id") - - topics_filters = get_filters_except("topics") - if topics_filters: - topics_agg = search.aggs.bucket( - "topics_filtered", "filter", filter=query.Bool(must=topics_filters) - ) - topics_agg.bucket("topics", "terms", field="topics", size=facet_sizes.get("topics", 50)) - topics_agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "topics", "terms", field="topics", size=facet_sizes.get("topics", 50) - ) - search.aggs.metric("topics_total", "cardinality", field="_id") + for facet in facets: + if isinstance(facet, TermsFacet): + size = facet_sizes.get(facet.name, 50) + f = get_filters_except(facet.name) + if f: + agg = search.aggs.bucket( + f"{facet.name}_filtered", "filter", filter=query.Bool(must=f) + ) + agg.bucket(facet.name, "terms", field=facet.es_field, size=size) + agg.metric("total", "cardinality", field="_id") + else: + search.aggs.bucket(facet.name, "terms", field=facet.es_field, size=size) + search.aggs.metric(f"{facet.name}_total", "cardinality", field="_id") + elif isinstance(facet, DateRangeFacet): + f = get_filters_except("last_update_range") + if f: + agg = search.aggs.bucket( + "last_update_filtered", "filter", filter=query.Bool(must=f) + ) + agg.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) + agg.metric("total", "cardinality", field="_id") + else: + search.aggs.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) + search.aggs.metric("last_update_total", "cardinality", field="_id") post_filters = [] for key, value in filter_dict.items(): @@ -1080,53 +876,11 @@ def get_filters_except(exclude_key): ) res = [hit.to_dict(skip_empty=False) for hit in response.hits] - facets = {} + facets_result = {} if hasattr(response, "aggregations"): - facet_configs = [ - ("format_family", "format_family_filtered", "format_family_total"), - ("access_type", "access_type_filtered", "access_type_total"), - ("producer_type", "producer_type_filtered", "producer_type_total"), - ( - "organization_id_with_name", - "organization_id_with_name_filtered", - "organization_id_with_name_total", - ), - ("last_update", "last_update_filtered", "last_update_total"), - ("tag", "tag_filtered", "tag_total"), - ("license", "license_filtered", "license_total"), - ("format", "format_filtered", "format_total"), - ("schema", "schema_filtered", "schema_total"), - ("geozone", "geozone_filtered", "geozone_total"), - ("granularity", "granularity_filtered", "granularity_total"), - ("badge", "badge_filtered", "badge_total"), - ("topics", "topics_filtered", "topics_total"), - ] - - for facet_name, filtered_name, total_name in facet_configs: - if hasattr(response.aggregations, filtered_name): - filtered_agg = getattr(response.aggregations, filtered_name) - if hasattr(filtered_agg, facet_name): - buckets = [ - {"name": bucket.key, "count": bucket.doc_count} - for bucket in getattr(filtered_agg, facet_name).buckets - ] - total_count = ( - int(filtered_agg.total.value) if hasattr(filtered_agg, "total") else 0 - ) - facets[facet_name] = [{"name": "all", "count": total_count}] + buckets - elif hasattr(response.aggregations, facet_name): - buckets = [ - {"name": bucket.key, "count": bucket.doc_count} - for bucket in getattr(response.aggregations, facet_name).buckets - ] - total_count = ( - int(getattr(response.aggregations, total_name).value) - if hasattr(response.aggregations, total_name) - else 0 - ) - facets[facet_name] = [{"name": "all", "count": total_count}] + buckets + facets_result = _parse_filtered_facets(response.aggregations, facets) - return results_number, res, facets + return results_number, res, facets_result def query_reuses( self, @@ -1136,6 +890,7 @@ def query_reuses( filters: dict, sort: Optional[str] = None, facet_sizes: dict = {}, + facets: list = [], ) -> Tuple[int, List[dict], dict]: search = SearchableReuse.search() @@ -1296,55 +1051,34 @@ def get_filters_except(exclude_key: str): flt.append(filter_dict[k]) return flt - facet_fields = { - "producer_type": ("producer_type", "producer_type"), - "organization_id_with_name": ("organization_with_id", "organization_id_with_name"), - "topic": ("topic", "topic"), - "type": ("type", "type"), - "tag": ("tags", "tag"), - "badge": ("badges", "badge"), - } - - for facet_key, (es_field, agg_name) in facet_fields.items(): - f = get_filters_except(facet_key) - if f: - agg = search.aggs.bucket( - f"{agg_name}_filtered", "filter", filter=query.Bool(must=f) - ) - agg.bucket(agg_name, "terms", field=es_field, size=facet_sizes.get(agg_name, 50)) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - agg_name, "terms", field=es_field, size=facet_sizes.get(agg_name, 50) - ) - search.aggs.metric(f"{agg_name}_total", "cardinality", field="_id") - - f = get_filters_except("last_update_range") - if f: - agg = search.aggs.bucket("last_update_filtered", "filter", filter=query.Bool(must=f)) - agg.bucket( - "last_update", - "date_range", - field="last_modified", - ranges=[ - {"key": "last_30_days", "from": "now-30d/d"}, - {"key": "last_12_months", "from": "now-12M/d"}, - {"key": "last_3_years", "from": "now-3y/d"}, - ], - ) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "last_update", - "date_range", - field="last_modified", - ranges=[ - {"key": "last_30_days", "from": "now-30d/d"}, - {"key": "last_12_months", "from": "now-12M/d"}, - {"key": "last_3_years", "from": "now-3y/d"}, - ], - ) - search.aggs.metric("last_update_total", "cardinality", field="_id") + for facet in facets: + if isinstance(facet, TermsFacet): + size = facet_sizes.get(facet.name, 50) + f = get_filters_except(facet.name) + if f: + agg = search.aggs.bucket( + f"{facet.name}_filtered", "filter", filter=query.Bool(must=f) + ) + agg.bucket(facet.name, "terms", field=facet.es_field, size=size) + agg.metric("total", "cardinality", field="_id") + else: + search.aggs.bucket(facet.name, "terms", field=facet.es_field, size=size) + search.aggs.metric(f"{facet.name}_total", "cardinality", field="_id") + elif isinstance(facet, DateRangeFacet): + f = get_filters_except("last_update_range") + if f: + agg = search.aggs.bucket( + "last_update_filtered", "filter", filter=query.Bool(must=f) + ) + agg.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) + agg.metric("total", "cardinality", field="_id") + else: + search.aggs.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) + search.aggs.metric("last_update_total", "cardinality", field="_id") post_filters = [] for k in [ @@ -1378,50 +1112,11 @@ def get_filters_except(exclude_key: str): res = [hit.to_dict(skip_empty=False) for hit in response.hits] - facets = {} - - for facet_key, (_, agg_name) in facet_fields.items(): - filtered_name = f"{agg_name}_filtered" - total_name = f"{agg_name}_total" - - if hasattr(response.aggregations, filtered_name): - fa = getattr(response.aggregations, filtered_name) - buckets = [ - {"name": b.key, "count": b.doc_count} for b in getattr(fa, agg_name).buckets - ] - total = int(fa.total.value) if hasattr(fa, "total") else 0 - facets[agg_name] = [{"name": "all", "count": total}] + buckets - - elif hasattr(response.aggregations, agg_name): - buckets = [ - {"name": b.key, "count": b.doc_count} - for b in getattr(response.aggregations, agg_name).buckets - ] - total = ( - int(getattr(response.aggregations, total_name).value) - if hasattr(response.aggregations, total_name) - else 0 - ) - facets[agg_name] = [{"name": "all", "count": total}] + buckets - - if hasattr(response.aggregations, "last_update_filtered"): - fa = response.aggregations.last_update_filtered - buckets = [{"name": b.key, "count": b.doc_count} for b in fa.last_update.buckets] - total = int(fa.total.value) if hasattr(fa, "total") else 0 - facets["last_update"] = [{"name": "all", "count": total}] + buckets - elif hasattr(response.aggregations, "last_update"): - buckets = [ - {"name": b.key, "count": b.doc_count} - for b in response.aggregations.last_update.buckets - ] - total = ( - int(response.aggregations.last_update_total.value) - if hasattr(response.aggregations, "last_update_total") - else 0 - ) - facets["last_update"] = [{"name": "all", "count": total}] + buckets + facets_result = {} + if hasattr(response, "aggregations"): + facets_result = _parse_filtered_facets(response.aggregations, facets) - return results_number, res, facets + return results_number, res, facets_result def query_dataservices( self, @@ -1431,6 +1126,7 @@ def query_dataservices( filters: dict, sort: Optional[str] = None, facet_sizes: dict = {}, + facets: list = [], ): search = SearchableDataservice.search() @@ -1586,55 +1282,34 @@ def get_filters_except(exclude_key: str): filters_list.append(filter_dict[k]) return filters_list - facet_fields = { - "access_type": ("access_type", "access_type"), - "producer_type": ("producer_type", "producer_type"), - "organization_id_with_name": ("organization_with_id", "organization_id_with_name"), - "tag": ("tags", "tag"), - "badge": ("badges", "badge"), - } - - for facet_name, (es_field, agg_name) in facet_fields.items(): - f = get_filters_except(facet_name) - if f: - agg = search.aggs.bucket( - f"{agg_name}_filtered", "filter", filter=query.Bool(must=f) - ) - agg.bucket(agg_name, "terms", field=es_field, size=facet_sizes.get(agg_name, 50)) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - agg_name, "terms", field=es_field, size=facet_sizes.get(agg_name, 50) - ) - search.aggs.metric(f"{agg_name}_total", "cardinality", field="_id") - - # last_update facet - f = get_filters_except("last_update_range") - if f: - agg = search.aggs.bucket("last_update_filtered", "filter", filter=query.Bool(must=f)) - agg.bucket( - "last_update", - "date_range", - field="metadata_modified_at", - ranges=[ - {"key": "last_30_days", "from": "now-30d/d"}, - {"key": "last_12_months", "from": "now-12M/d"}, - {"key": "last_3_years", "from": "now-3y/d"}, - ], - ) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "last_update", - "date_range", - field="metadata_modified_at", - ranges=[ - {"key": "last_30_days", "from": "now-30d/d"}, - {"key": "last_12_months", "from": "now-12M/d"}, - {"key": "last_3_years", "from": "now-3y/d"}, - ], - ) - search.aggs.metric("last_update_total", "cardinality", field="_id") + for facet in facets: + if isinstance(facet, TermsFacet): + size = facet_sizes.get(facet.name, 50) + f = get_filters_except(facet.name) + if f: + agg = search.aggs.bucket( + f"{facet.name}_filtered", "filter", filter=query.Bool(must=f) + ) + agg.bucket(facet.name, "terms", field=facet.es_field, size=size) + agg.metric("total", "cardinality", field="_id") + else: + search.aggs.bucket(facet.name, "terms", field=facet.es_field, size=size) + search.aggs.metric(f"{facet.name}_total", "cardinality", field="_id") + elif isinstance(facet, DateRangeFacet): + f = get_filters_except("last_update_range") + if f: + agg = search.aggs.bucket( + "last_update_filtered", "filter", filter=query.Bool(must=f) + ) + agg.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) + agg.metric("total", "cardinality", field="_id") + else: + search.aggs.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) + search.aggs.metric("last_update_total", "cardinality", field="_id") post_filters = [] for k in [ @@ -1661,47 +1336,11 @@ def get_filters_except(exclude_key: str): results_number = response.hits.total.value res = [hit.to_dict(skip_empty=False) for hit in response.hits] - facets = {} - for facet_name, (_, agg_name) in facet_fields.items(): - filtered_name = f"{agg_name}_filtered" - total_name = f"{agg_name}_total" - if hasattr(response.aggregations, filtered_name): - fa = getattr(response.aggregations, filtered_name) - buckets = [ - {"name": b.key, "count": b.doc_count} for b in getattr(fa, agg_name).buckets - ] - total = int(fa.total.value) if hasattr(fa, "total") else 0 - facets[agg_name] = [{"name": "all", "count": total}] + buckets - elif hasattr(response.aggregations, agg_name): - buckets = [ - {"name": b.key, "count": b.doc_count} - for b in getattr(response.aggregations, agg_name).buckets - ] - total = ( - int(getattr(response.aggregations, total_name).value) - if hasattr(response.aggregations, total_name) - else 0 - ) - facets[agg_name] = [{"name": "all", "count": total}] + buckets - - if hasattr(response.aggregations, "last_update_filtered"): - fa = response.aggregations.last_update_filtered - buckets = [{"name": b.key, "count": b.doc_count} for b in fa.last_update.buckets] - total = int(fa.total.value) if hasattr(fa, "total") else 0 - facets["last_update"] = [{"name": "all", "count": total}] + buckets - elif hasattr(response.aggregations, "last_update"): - buckets = [ - {"name": b.key, "count": b.doc_count} - for b in response.aggregations.last_update.buckets - ] - total = ( - int(response.aggregations.last_update_total.value) - if hasattr(response.aggregations, "last_update_total") - else 0 - ) - facets["last_update"] = [{"name": "all", "count": total}] + buckets + facets_result = {} + if hasattr(response, "aggregations"): + facets_result = _parse_filtered_facets(response.aggregations, facets) - return results_number, res, facets + return results_number, res, facets_result def find_one_organization(self, organization_id: str) -> Optional[dict]: try: @@ -1780,6 +1419,8 @@ def query_discussions( page_size: int, filters: dict, sort: Optional[str] = None, + facet_sizes: dict = {}, + facets: list = [], ) -> Tuple[int, List[dict], dict]: search = SearchableDiscussion.search() @@ -1818,17 +1459,15 @@ def query_discussions( else: search = search.query(query.MatchAll()) - search.aggs.bucket("object_type", "terms", field="subject_class", size=50) - search.aggs.bucket( - "last_update", - "date_range", - field="created_at", - ranges=[ - {"key": "last_30_days", "from": "now-30d/d"}, - {"key": "last_12_months", "from": "now-12M/d"}, - {"key": "last_3_years", "from": "now-3y/d"}, - ], - ) + for facet in facets: + if isinstance(facet, TermsFacet): + search.aggs.bucket( + facet.name, "terms", field=facet.es_field, size=facet_sizes.get(facet.name, 50) + ) + elif isinstance(facet, DateRangeFacet): + search.aggs.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) search.aggs.metric("total_count", "cardinality", field="_id") if post_filters: @@ -1848,23 +1487,23 @@ def query_discussions( ) res = [hit.to_dict(skip_empty=False) for hit in response.hits] - facets = {} + facets_result = {} if hasattr(response, "aggregations"): total_count = ( int(response.aggregations.total_count.value) if hasattr(response.aggregations, "total_count") else 0 ) - - for agg_name in ["object_type", "last_update"]: + for facet in facets: + agg_name = facet.name if isinstance(facet, TermsFacet) else "last_update" if hasattr(response.aggregations, agg_name): buckets = [ - {"name": bucket.key, "count": bucket.doc_count} - for bucket in response.aggregations[agg_name].buckets + {"name": b.key, "count": b.doc_count} + for b in response.aggregations[agg_name].buckets ] - facets[agg_name] = [{"name": "all", "count": total_count}] + buckets + facets_result[agg_name] = [{"name": "all", "count": total_count}] + buckets - return results_number, res, facets + return results_number, res, facets_result def find_one_discussion(self, discussion_id: str) -> Optional[dict]: try: @@ -1891,6 +1530,8 @@ def query_posts( page_size: int, filters: dict, sort: Optional[str] = None, + facet_sizes: dict = {}, + facets: list = [], ) -> Tuple[int, List[dict], dict]: search = SearchablePost.search() @@ -1933,16 +1574,11 @@ def query_posts( else: search = search.query(query.MatchAll()) - search.aggs.bucket( - "last_update", - "date_range", - field="last_modified", - ranges=[ - {"key": "last_30_days", "from": "now-30d/d"}, - {"key": "last_12_months", "from": "now-12M/d"}, - {"key": "last_3_years", "from": "now-3y/d"}, - ], - ) + for facet in facets: + if isinstance(facet, DateRangeFacet): + search.aggs.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) search.aggs.metric("total_count", "cardinality", field="_id") if post_filters: @@ -1962,23 +1598,21 @@ def query_posts( ) res = [hit.to_dict(skip_empty=False) for hit in response.hits] - facets = {} + facets_result = {} if hasattr(response, "aggregations"): total_count = ( int(response.aggregations.total_count.value) if hasattr(response.aggregations, "total_count") else 0 ) + if hasattr(response.aggregations, "last_update"): + buckets = [ + {"name": b.key, "count": b.doc_count} + for b in response.aggregations.last_update.buckets + ] + facets_result["last_update"] = [{"name": "all", "count": total_count}] + buckets - for agg_name in ["last_update"]: - if hasattr(response.aggregations, agg_name): - buckets = [ - {"name": bucket.key, "count": bucket.doc_count} - for bucket in response.aggregations[agg_name].buckets - ] - facets[agg_name] = [{"name": "all", "count": total_count}] + buckets - - return results_number, res, facets + return results_number, res, facets_result def find_one_post(self, post_id: str) -> Optional[dict]: try: diff --git a/udata_search_service/services.py b/udata_search_service/services.py index ac2f36f407..0bee6ecc81 100644 --- a/udata_search_service/services.py +++ b/udata_search_service/services.py @@ -11,12 +11,17 @@ Reuse, Topic, ) -from udata_search_service.search_clients import ElasticClient +from udata_search_service.search_clients import ( + DateRangeFacet, + ElasticClient, + TermsFacet, +) class BaseService: entity_class: type[EntityBase] entity_name: str + facets: list = [] # {filter_param_name: elasticsearch_field_name} filter_renames: dict[str, str] = {} @@ -45,7 +50,13 @@ def search(self, filters: dict) -> Tuple[List[EntityBase], int, int, dict]: self.format_filters(filters) results_number, search_results, facets = self._client_query( - search_text, offset, page_size, filters, sort, facet_sizes=facet_sizes + search_text, + offset, + page_size, + filters, + sort, + facet_sizes=facet_sizes, + facets=self.__class__.facets, ) results = [self.entity_class.load_from_dict(hit) for hit in search_results] total_pages = ceil(results_number / page_size) or 1 @@ -84,6 +95,9 @@ class OrganizationService(BaseService): filter_renames = { "badge": "badges", } + facets = [ + TermsFacet("producer_type", "producer_type"), + ] class DatasetService(BaseService): @@ -98,6 +112,21 @@ class DatasetService(BaseService): "organization_badge": "organization_badges", "organization": "organization_id_with_name", } + facets = [ + TermsFacet("format_family", "format_family"), + TermsFacet("access_type", "access_type"), + TermsFacet("producer_type", "producer_type"), + TermsFacet("organization_id_with_name", "organization_with_id"), + TermsFacet("tag", "tags"), + TermsFacet("license", "license"), + TermsFacet("format", "format"), + TermsFacet("schema", "schema"), + TermsFacet("geozone", "geozones"), + TermsFacet("granularity", "granularity"), + TermsFacet("badge", "badges"), + TermsFacet("topics", "topics"), + DateRangeFacet("last_update", "last_update"), + ] @classmethod def format_filters(cls, filters): @@ -117,6 +146,15 @@ class ReuseService(BaseService): "organization_badge": "organization_badges", "organization": "organization_id_with_name", } + facets = [ + TermsFacet("producer_type", "producer_type"), + TermsFacet("organization_id_with_name", "organization_with_id"), + TermsFacet("topic", "topic"), + TermsFacet("type", "type"), + TermsFacet("tag", "tags"), + TermsFacet("badge", "badges"), + DateRangeFacet("last_update", "last_modified"), + ] class DataserviceService(BaseService): @@ -128,11 +166,25 @@ class DataserviceService(BaseService): "topic": "topics", "organization": "organization_id_with_name", } + facets = [ + TermsFacet("access_type", "access_type"), + TermsFacet("producer_type", "producer_type"), + TermsFacet("organization_id_with_name", "organization_with_id"), + TermsFacet("tag", "tags"), + TermsFacet("badge", "badges"), + DateRangeFacet("last_update", "metadata_modified_at"), + ] class TopicService(BaseService): entity_class = Topic entity_name = "topic" + facets = [ + TermsFacet("tag", "tags"), + TermsFacet("organization_id_with_name", "organization_with_id"), + TermsFacet("producer_type", "producer_type"), + DateRangeFacet("last_update", "last_modified"), + ] class DiscussionService(BaseService): @@ -142,8 +194,15 @@ class DiscussionService(BaseService): "created": "created_at", "closed": "closed_at", } + facets = [ + TermsFacet("object_type", "subject_class"), + DateRangeFacet("last_update", "created_at"), + ] class PostService(BaseService): entity_class = Post entity_name = "post" + facets = [ + DateRangeFacet("last_update", "last_modified"), + ] From 53220d9c9d8023607eb11104b8f376b93fe7e0f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20Bult=C3=A9?= Date: Fri, 24 Apr 2026 09:59:26 +0200 Subject: [PATCH 2/3] imports on top --- udata/tests/search/test_adapter.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/udata/tests/search/test_adapter.py b/udata/tests/search/test_adapter.py index 5013de6afa..9103bb5da9 100644 --- a/udata/tests/search/test_adapter.py +++ b/udata/tests/search/test_adapter.py @@ -6,6 +6,7 @@ from flask_restx.reqparse import RequestParser from udata import search +from udata.core.access_type.constants import AccessType from udata.core.dataservices.factories import DataserviceFactory from udata.core.dataservices.search import DataserviceSearch from udata.core.dataset.factories import ( @@ -17,6 +18,7 @@ from udata.core.dataset.search import DatasetSearch from udata.core.organization.constants import ( ASSOCIATION, + CERTIFIED, COMPANY, LOCAL_AUTHORITY, NOT_SPECIFIED, @@ -33,6 +35,7 @@ from udata.search.commands import finalize_reindex, index_model from udata.tests.api import APITestCase from udata.utils import clean_string +from udata_search_service.search_clients import TermsFacet from . import FakeSearch @@ -365,8 +368,6 @@ def test_serialize_deduplicates_topic_ids(self): def test_serialize_includes_access_type(self): """Test that DatasetSearch.serialize includes access_type in the serialized document""" - from udata.core.access_type.constants import AccessType - dataset = DatasetFactory(access_type=AccessType.OPEN) serialized = DatasetSearch.serialize(dataset) @@ -501,8 +502,6 @@ def test_serialize_includes_producer_type_user(self): def test_serialize_excludes_certified_from_producer_type(self): """Test that certified badge is excluded from producer_type""" - from udata.core.organization.constants import CERTIFIED - org = OrganizationFactory() org.add_badge(PUBLIC_SERVICE) org.add_badge(CERTIFIED) @@ -562,8 +561,6 @@ def test_serialize_includes_producer_type_user(self): class ConfigurableSizeFacetsTest(APITestCase): def test_facet_size_params_in_request_parser(self): - from udata_search_service.search_clients import TermsFacet - for adapter in [DatasetSearch, ReuseSearch, DataserviceSearch]: parser = adapter.as_request_parser() arg_names = [arg.name for arg in parser.args] @@ -587,8 +584,6 @@ def test_facet_size_param_is_int(self): class DataserviceSearchAdapterTest(APITestCase): def test_serialize_includes_access_type(self): """Test that DataserviceSearch.serialize includes access_type in the serialized document""" - from udata.core.access_type.constants import AccessType - dataservice = DataserviceFactory(access_type=AccessType.OPEN) serialized = DataserviceSearch.serialize(dataservice) From e536bd2837477324cef0cbe4f558e3d1fea14386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20Bult=C3=A9?= Date: Fri, 24 Apr 2026 10:28:46 +0200 Subject: [PATCH 3/3] add _build_facet_aggregations helper --- udata_search_service/search_clients.py | 145 ++++++------------------- 1 file changed, 33 insertions(+), 112 deletions(-) diff --git a/udata_search_service/search_clients.py b/udata_search_service/search_clients.py index 109dd1954a..91735b5043 100644 --- a/udata_search_service/search_clients.py +++ b/udata_search_service/search_clients.py @@ -51,6 +51,35 @@ class DateRangeFacet: ] +def _build_facet_aggregations(search, facets: list, facet_sizes: dict, get_filters_except) -> None: + for facet in facets: + if isinstance(facet, TermsFacet): + size = facet_sizes.get(facet.name, 50) + f = get_filters_except(facet.name) + if f: + agg = search.aggs.bucket( + f"{facet.name}_filtered", "filter", filter=query.Bool(must=f) + ) + agg.bucket(facet.name, "terms", field=facet.es_field, size=size) + agg.metric("total", "cardinality", field="_id") + else: + search.aggs.bucket(facet.name, "terms", field=facet.es_field, size=size) + search.aggs.metric(f"{facet.name}_total", "cardinality", field="_id") + elif isinstance(facet, DateRangeFacet): + f = get_filters_except("last_update_range") + if f: + agg = search.aggs.bucket( + "last_update_filtered", "filter", filter=query.Bool(must=f) + ) + agg.bucket("last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES) + agg.metric("total", "cardinality", field="_id") + else: + search.aggs.bucket( + "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES + ) + search.aggs.metric("last_update_total", "cardinality", field="_id") + + def _parse_filtered_facets(aggregations, facets: list) -> dict: """Parse ES aggregations built with the filter-wrapper pattern into a facets dict.""" result = {} @@ -572,34 +601,7 @@ def get_filters_except(exclude_key): filters_list.append(filter_dict[key]) return filters_list - for facet in facets: - if isinstance(facet, TermsFacet): - size = facet_sizes.get(facet.name, 50) - f = get_filters_except(facet.name) - if f: - agg = search.aggs.bucket( - f"{facet.name}_filtered", "filter", filter=query.Bool(must=f) - ) - agg.bucket(facet.name, "terms", field=facet.es_field, size=size) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket(facet.name, "terms", field=facet.es_field, size=size) - search.aggs.metric(f"{facet.name}_total", "cardinality", field="_id") - elif isinstance(facet, DateRangeFacet): - f = get_filters_except("last_update_range") - if f: - agg = search.aggs.bucket( - "last_update_filtered", "filter", filter=query.Bool(must=f) - ) - agg.bucket( - "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES - ) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES - ) - search.aggs.metric("last_update_total", "cardinality", field="_id") + _build_facet_aggregations(search, facets, facet_sizes, get_filters_except) post_filters = [] for key, value in filter_dict.items(): @@ -823,34 +825,7 @@ def get_filters_except(exclude_key): filters_list.append(filter_dict[key]) return filters_list - for facet in facets: - if isinstance(facet, TermsFacet): - size = facet_sizes.get(facet.name, 50) - f = get_filters_except(facet.name) - if f: - agg = search.aggs.bucket( - f"{facet.name}_filtered", "filter", filter=query.Bool(must=f) - ) - agg.bucket(facet.name, "terms", field=facet.es_field, size=size) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket(facet.name, "terms", field=facet.es_field, size=size) - search.aggs.metric(f"{facet.name}_total", "cardinality", field="_id") - elif isinstance(facet, DateRangeFacet): - f = get_filters_except("last_update_range") - if f: - agg = search.aggs.bucket( - "last_update_filtered", "filter", filter=query.Bool(must=f) - ) - agg.bucket( - "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES - ) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES - ) - search.aggs.metric("last_update_total", "cardinality", field="_id") + _build_facet_aggregations(search, facets, facet_sizes, get_filters_except) post_filters = [] for key, value in filter_dict.items(): @@ -1051,34 +1026,7 @@ def get_filters_except(exclude_key: str): flt.append(filter_dict[k]) return flt - for facet in facets: - if isinstance(facet, TermsFacet): - size = facet_sizes.get(facet.name, 50) - f = get_filters_except(facet.name) - if f: - agg = search.aggs.bucket( - f"{facet.name}_filtered", "filter", filter=query.Bool(must=f) - ) - agg.bucket(facet.name, "terms", field=facet.es_field, size=size) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket(facet.name, "terms", field=facet.es_field, size=size) - search.aggs.metric(f"{facet.name}_total", "cardinality", field="_id") - elif isinstance(facet, DateRangeFacet): - f = get_filters_except("last_update_range") - if f: - agg = search.aggs.bucket( - "last_update_filtered", "filter", filter=query.Bool(must=f) - ) - agg.bucket( - "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES - ) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES - ) - search.aggs.metric("last_update_total", "cardinality", field="_id") + _build_facet_aggregations(search, facets, facet_sizes, get_filters_except) post_filters = [] for k in [ @@ -1282,34 +1230,7 @@ def get_filters_except(exclude_key: str): filters_list.append(filter_dict[k]) return filters_list - for facet in facets: - if isinstance(facet, TermsFacet): - size = facet_sizes.get(facet.name, 50) - f = get_filters_except(facet.name) - if f: - agg = search.aggs.bucket( - f"{facet.name}_filtered", "filter", filter=query.Bool(must=f) - ) - agg.bucket(facet.name, "terms", field=facet.es_field, size=size) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket(facet.name, "terms", field=facet.es_field, size=size) - search.aggs.metric(f"{facet.name}_total", "cardinality", field="_id") - elif isinstance(facet, DateRangeFacet): - f = get_filters_except("last_update_range") - if f: - agg = search.aggs.bucket( - "last_update_filtered", "filter", filter=query.Bool(must=f) - ) - agg.bucket( - "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES - ) - agg.metric("total", "cardinality", field="_id") - else: - search.aggs.bucket( - "last_update", "date_range", field=facet.es_field, ranges=DATE_RANGES - ) - search.aggs.metric("last_update_total", "cardinality", field="_id") + _build_facet_aggregations(search, facets, facet_sizes, get_filters_except) post_filters = [] for k in [