From caed373cb252d61a05ef94280278ea569193dfe9 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Thu, 20 Nov 2025 01:28:23 -0300 Subject: [PATCH 1/9] Remove $facet in top level group stages --- django_mongodb_backend/aggregates.py | 1 + django_mongodb_backend/compiler.py | 18 +++--------------- django_mongodb_backend/query.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/django_mongodb_backend/aggregates.py b/django_mongodb_backend/aggregates.py index fb41ce4fc..9921e34ab 100644 --- a/django_mongodb_backend/aggregates.py +++ b/django_mongodb_backend/aggregates.py @@ -52,6 +52,7 @@ def count(self, compiler, connection, resolve_inner_expression=False): # If distinct=True or resolve_inner_expression=False, sum the size of the # set. lhs_mql = process_lhs(self, compiler, connection, as_expr=True) + lhs_mql = {"$ifNull": [lhs_mql, []]} # None shouldn't be counted, so subtract 1 if it's present. exits_null = {"$cond": {"if": {"$in": [{"$literal": None}, lhs_mql]}, "then": -1, "else": 0}} return {"$add": [{"$size": lhs_mql}, exits_null]} diff --git a/django_mongodb_backend/compiler.py b/django_mongodb_backend/compiler.py index 2145fbf68..4f02924a6 100644 --- a/django_mongodb_backend/compiler.py +++ b/django_mongodb_backend/compiler.py @@ -38,6 +38,7 @@ def __init__(self, *args, **kwargs): self.subqueries = [] # Atlas search stage. self.search_pipeline = [] + self.wrap_for_global_aggregation = False def _get_group_alias_column(self, expr, annotation_group_idx): """Generate a dummy field for use in the ids fields in $group.""" @@ -234,21 +235,8 @@ def _build_aggregation_pipeline(self, ids, group): """Build the aggregation pipeline for grouping.""" pipeline = [] if not ids: - group["_id"] = None - pipeline.append({"$facet": {"group": [{"$group": group}]}}) - pipeline.append( - { - "$addFields": { - key: { - "$getField": { - "input": {"$arrayElemAt": ["$group", 0]}, - "field": key, - } - } - for key in group - } - } - ) + pipeline.append({"$group": {"_id": None, **group}}) + self.wrap_for_global_aggregation = True else: group["_id"] = ids pipeline.append({"$group": group}) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 5b4f0ec51..85cf0c774 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -56,6 +56,7 @@ def __init__(self, compiler): # $lookup stage that encapsulates the pipeline for performing a nested # subquery. self.subquery_lookup = None + self.wrap_for_global_aggregation = compiler.wrap_for_global_aggregation def __repr__(self): return f"" @@ -91,6 +92,22 @@ def get_pipeline(self): pipeline.append({"$match": self.match_mql}) if self.aggregation_pipeline: pipeline.extend(self.aggregation_pipeline) + if self.wrap_for_global_aggregation: + pipeline = [ + {"$collStats": {}}, + { + "$lookup": { + "from": self.compiler.collection_name, + "as": "wrapped", + "pipeline": pipeline, + } + }, + { + "$replaceWith": { + "$cond": [{"$eq": ["$wrapped", []]}, {}, {"$first": "$wrapped"}] + } + }, + ] if self.project_fields: pipeline.append({"$project": self.project_fields}) if self.combinator_pipeline: From 69aaf01e6d4845da5ce0b743418ed3a190e49fe1 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Wed, 26 Nov 2025 01:21:11 -0300 Subject: [PATCH 2/9] Fix: rename wrapped to __wrapped to avoid collisions --- django_mongodb_backend/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 85cf0c774..898b02640 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -98,13 +98,13 @@ def get_pipeline(self): { "$lookup": { "from": self.compiler.collection_name, - "as": "wrapped", + "as": "__wrapped", "pipeline": pipeline, } }, { "$replaceWith": { - "$cond": [{"$eq": ["$wrapped", []]}, {}, {"$first": "$wrapped"}] + "$cond": [{"$eq": ["$__wrapped", []]}, {}, {"$first": "$__wrapped"}] } }, ] From f3d1a758a19ecd0fd97dc2ec78cbd5ab81c86b8f Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Tue, 2 Dec 2025 01:03:52 -0300 Subject: [PATCH 3/9] Comments and docstring --- django_mongodb_backend/compiler.py | 2 ++ django_mongodb_backend/query.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/django_mongodb_backend/compiler.py b/django_mongodb_backend/compiler.py index 4f02924a6..026d0c825 100644 --- a/django_mongodb_backend/compiler.py +++ b/django_mongodb_backend/compiler.py @@ -38,6 +38,7 @@ def __init__(self, *args, **kwargs): self.subqueries = [] # Atlas search stage. self.search_pipeline = [] + # The aggregation has no group-by fields and needs wrapping. self.wrap_for_global_aggregation = False def _get_group_alias_column(self, expr, annotation_group_idx): @@ -236,6 +237,7 @@ def _build_aggregation_pipeline(self, ids, group): pipeline = [] if not ids: pipeline.append({"$group": {"_id": None, **group}}) + # If ids is empty, a global group-by is applied self.wrap_for_global_aggregation = True else: group["_id"] = ids diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 898b02640..3b349c6a3 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -93,8 +93,11 @@ def get_pipeline(self): if self.aggregation_pipeline: pipeline.extend(self.aggregation_pipeline) if self.wrap_for_global_aggregation: + # Use $collStats as a pivot to guarantee a single input document pipeline = [ {"$collStats": {}}, + # Wrap the actual aggregation inside a lookup so its result + # always appears as a one document array { "$lookup": { "from": self.compiler.collection_name, @@ -102,6 +105,7 @@ def get_pipeline(self): "pipeline": pipeline, } }, + # Use {} If the inner aggregation returns nothing, otherwise unwrap { "$replaceWith": { "$cond": [{"$eq": ["$__wrapped", []]}, {}, {"$first": "$__wrapped"}] From eb12ffcd155c3ce831e83abf3b0003e5a3ca38c0 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Thu, 4 Dec 2025 19:25:55 -0300 Subject: [PATCH 4/9] Replace collstats to handle empty results in global aggregations --- django_mongodb_backend/query.py | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 3b349c6a3..502770b99 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -93,25 +93,8 @@ def get_pipeline(self): if self.aggregation_pipeline: pipeline.extend(self.aggregation_pipeline) if self.wrap_for_global_aggregation: - # Use $collStats as a pivot to guarantee a single input document - pipeline = [ - {"$collStats": {}}, - # Wrap the actual aggregation inside a lookup so its result - # always appears as a one document array - { - "$lookup": { - "from": self.compiler.collection_name, - "as": "__wrapped", - "pipeline": pipeline, - } - }, - # Use {} If the inner aggregation returns nothing, otherwise unwrap - { - "$replaceWith": { - "$cond": [{"$eq": ["$__wrapped", []]}, {}, {"$first": "$__wrapped"}] - } - }, - ] + # Add an empty extra document to handle default values on empty results + pipeline.append({"$unionWith": {"pipeline": [{"$documents": [{}]}]}}) if self.project_fields: pipeline.append({"$project": self.project_fields}) if self.combinator_pipeline: From 038bcf2be33342cbe169539ef32ec317c32b4154 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Thu, 4 Dec 2025 23:56:03 -0300 Subject: [PATCH 5/9] Fix pipeline position of unionWith. --- django_mongodb_backend/compiler.py | 4 +++- django_mongodb_backend/query.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/django_mongodb_backend/compiler.py b/django_mongodb_backend/compiler.py index 026d0c825..5ed097675 100644 --- a/django_mongodb_backend/compiler.py +++ b/django_mongodb_backend/compiler.py @@ -40,6 +40,8 @@ def __init__(self, *args, **kwargs): self.search_pipeline = [] # The aggregation has no group-by fields and needs wrapping. self.wrap_for_global_aggregation = False + # HAVING stage match (MongoDB equivalent) + self.having_match_mql = None def _get_group_alias_column(self, expr, annotation_group_idx): """Generate a dummy field for use in the ids fields in $group.""" @@ -324,7 +326,7 @@ def pre_sql_setup(self, with_col_aliases=False): pipeline.extend(query.get_pipeline()) # Remove the added subqueries. self.subqueries = [] - pipeline.append({"$match": having}) + self.having_match_mql = having self.aggregation_pipeline = pipeline self.annotations = { target: expr.replace_expressions(all_replacements) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 502770b99..e507537c6 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -50,6 +50,7 @@ def __init__(self, compiler): self.lookup_pipeline = None self.project_fields = None self.aggregation_pipeline = compiler.aggregation_pipeline + self.having = compiler.having_match_mql self.search_pipeline = compiler.search_pipeline self.extra_fields = None self.combinator_pipeline = None @@ -95,6 +96,8 @@ def get_pipeline(self): if self.wrap_for_global_aggregation: # Add an empty extra document to handle default values on empty results pipeline.append({"$unionWith": {"pipeline": [{"$documents": [{}]}]}}) + if self.having: + pipeline.append({"$match": self.having}) if self.project_fields: pipeline.append({"$project": self.project_fields}) if self.combinator_pipeline: From 282324c9ab5cdda65d67f587fcee9ba1832919bb Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Fri, 5 Dec 2025 01:23:49 -0300 Subject: [PATCH 6/9] Polish the wrapping logic --- django_mongodb_backend/compiler.py | 4 ++-- django_mongodb_backend/query.py | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/django_mongodb_backend/compiler.py b/django_mongodb_backend/compiler.py index 5ed097675..b1f5e79a4 100644 --- a/django_mongodb_backend/compiler.py +++ b/django_mongodb_backend/compiler.py @@ -240,7 +240,7 @@ def _build_aggregation_pipeline(self, ids, group): if not ids: pipeline.append({"$group": {"_id": None, **group}}) # If ids is empty, a global group-by is applied - self.wrap_for_global_aggregation = True + self.wrap_for_global_aggregation = not bool(self.having) else: group["_id"] = ids pipeline.append({"$group": group}) @@ -326,7 +326,7 @@ def pre_sql_setup(self, with_col_aliases=False): pipeline.extend(query.get_pipeline()) # Remove the added subqueries. self.subqueries = [] - self.having_match_mql = having + pipeline.append({"$match": having}) self.aggregation_pipeline = pipeline self.annotations = { target: expr.replace_expressions(all_replacements) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index e507537c6..502770b99 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -50,7 +50,6 @@ def __init__(self, compiler): self.lookup_pipeline = None self.project_fields = None self.aggregation_pipeline = compiler.aggregation_pipeline - self.having = compiler.having_match_mql self.search_pipeline = compiler.search_pipeline self.extra_fields = None self.combinator_pipeline = None @@ -96,8 +95,6 @@ def get_pipeline(self): if self.wrap_for_global_aggregation: # Add an empty extra document to handle default values on empty results pipeline.append({"$unionWith": {"pipeline": [{"$documents": [{}]}]}}) - if self.having: - pipeline.append({"$match": self.having}) if self.project_fields: pipeline.append({"$project": self.project_fields}) if self.combinator_pipeline: From cc0b716e3543b3024639c21666fa0e3fe220d52b Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Fri, 5 Dec 2025 01:29:01 -0300 Subject: [PATCH 7/9] update comment --- django_mongodb_backend/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/django_mongodb_backend/compiler.py b/django_mongodb_backend/compiler.py index b1f5e79a4..409032675 100644 --- a/django_mongodb_backend/compiler.py +++ b/django_mongodb_backend/compiler.py @@ -239,7 +239,7 @@ def _build_aggregation_pipeline(self, ids, group): pipeline = [] if not ids: pipeline.append({"$group": {"_id": None, **group}}) - # If ids is empty, a global group-by is applied + # If there are no ids and no having clause, apply a global aggregation self.wrap_for_global_aggregation = not bool(self.having) else: group["_id"] = ids From b8900afb4ca116dfb22b933771d91d2f883143fc Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Fri, 5 Dec 2025 02:35:40 -0300 Subject: [PATCH 8/9] Remove $facet when an ArrayField is ued as rhs. --- django_mongodb_backend/fields/array.py | 34 +++---------- .../fields/embedded_model_array.py | 49 ++++++------------- django_mongodb_backend/lookups.py | 33 +++---------- tests/model_fields_/test_arrayfield.py | 15 ++++++ .../test_embedded_model_array.py | 5 ++ 5 files changed, 51 insertions(+), 85 deletions(-) diff --git a/django_mongodb_backend/fields/array.py b/django_mongodb_backend/fields/array.py index 84164c4d1..cd8f07f68 100644 --- a/django_mongodb_backend/fields/array.py +++ b/django_mongodb_backend/fields/array.py @@ -310,37 +310,19 @@ class ArrayOverlap(ArrayRHSMixin, FieldGetDbPrepValueMixin, Lookup): def get_subquery_wrapping_pipeline(self, compiler, connection, field_name, expr): return [ + {"$project": {"tmp_name": expr.as_mql(compiler, connection, as_expr=True)}}, { - "$facet": { - "group": [ - {"$project": {"tmp_name": expr.as_mql(compiler, connection, as_expr=True)}}, - { - "$unwind": "$tmp_name", - }, - { - "$group": { - "_id": None, - "tmp_name": {"$addToSet": "$tmp_name"}, - } - }, - ] - } + "$unwind": "$tmp_name", }, { - "$project": { - field_name: { - "$ifNull": [ - { - "$getField": { - "input": {"$arrayElemAt": ["$group", 0]}, - "field": "tmp_name", - } - }, - [], - ] - } + "$group": { + "_id": None, + "tmp_name": {"$addToSet": "$tmp_name"}, } }, + {"$unionWith": {"pipeline": [{"$documents": [{"tmp_name": []}]}]}}, + {"$limit": 1}, + {"$project": {field_name: "$tmp_name"}}, ] def as_mql_expr(self, compiler, connection): diff --git a/django_mongodb_backend/fields/embedded_model_array.py b/django_mongodb_backend/fields/embedded_model_array.py index 501b78428..0e525f68e 100644 --- a/django_mongodb_backend/fields/embedded_model_array.py +++ b/django_mongodb_backend/fields/embedded_model_array.py @@ -150,44 +150,27 @@ def get_subquery_wrapping_pipeline(self, compiler, connection, field_name, expr) # structure of EmbeddedModelArrayField on the RHS behaves similar to # ArrayField. return [ + {"$project": {"tmp_name": expr.as_mql(compiler, connection, as_expr=True)}}, + # To concatenate all the values from the RHS subquery, + # use an $unwind followed by a $group. { - "$facet": { - "gathered_data": [ - {"$project": {"tmp_name": expr.as_mql(compiler, connection, as_expr=True)}}, - # To concatenate all the values from the RHS subquery, - # use an $unwind followed by a $group. - { - "$unwind": "$tmp_name", - }, - # The $group stage collects values into an array using - # $addToSet. The use of {_id: null} results in a - # single grouped array. However, because arrays from - # multiple documents are aggregated, the result is a - # list of lists. - { - "$group": { - "_id": None, - "tmp_name": {"$addToSet": "$tmp_name"}, - } - }, - ] - } + "$unwind": "$tmp_name", }, + # The $group stage collects values into an array using + # $addToSet. The use of {_id: null} results in a + # single grouped array. However, because arrays from + # multiple documents are aggregated, the result is a + # list of lists. { - "$project": { - field_name: { - "$ifNull": [ - { - "$getField": { - "input": {"$arrayElemAt": ["$gathered_data", 0]}, - "field": "tmp_name", - } - }, - [], - ] - } + "$group": { + "_id": None, + "tmp_name": {"$addToSet": "$tmp_name"}, } }, + # Add a dummy document in case of empty result. + {"$unionWith": {"pipeline": [{"$documents": [{"tmp_name": []}]}]}}, + {"$limit": 1}, + {"$project": {field_name: "$tmp_name"}}, ] diff --git a/django_mongodb_backend/lookups.py b/django_mongodb_backend/lookups.py index 6b59fb961..f63e4a0b6 100644 --- a/django_mongodb_backend/lookups.py +++ b/django_mongodb_backend/lookups.py @@ -56,34 +56,15 @@ def inner(self, compiler, connection): def get_subquery_wrapping_pipeline(self, compiler, connection, field_name, expr): # noqa: ARG001 return [ { - "$facet": { - "group": [ - { - "$group": { - "_id": None, - "tmp_name": { - "$addToSet": expr.as_mql(compiler, connection, as_expr=True) - }, - } - } - ] - } - }, - { - "$project": { - field_name: { - "$ifNull": [ - { - "$getField": { - "input": {"$arrayElemAt": ["$group", 0]}, - "field": "tmp_name", - } - }, - [], - ] - } + "$group": { + "_id": None, + # use a temporal name in order to support field_name="_id" + "tmp_name": {"$addToSet": expr.as_mql(compiler, connection, as_expr=True)}, } }, + {"$unionWith": {"pipeline": [{"$documents": [{"tmp_name": []}]}]}}, + {"$limit": 1}, + {"$project": {field_name: "$tmp_name"}}, ] diff --git a/tests/model_fields_/test_arrayfield.py b/tests/model_fields_/test_arrayfield.py index e334b21dc..8d40214da 100644 --- a/tests/model_fields_/test_arrayfield.py +++ b/tests/model_fields_/test_arrayfield.py @@ -634,6 +634,21 @@ def test_overlap_values(self): self.objs[:3], ) + def test_overlap_empty_values(self): + qs = NullableIntegerArrayModel.objects.filter(order__lt=-30) + self.assertCountEqual( + NullableIntegerArrayModel.objects.filter( + field__overlap=qs.values_list("field"), + ), + [], + ) + self.assertCountEqual( + NullableIntegerArrayModel.objects.filter( + field__overlap=qs.values("field"), + ), + [], + ) + def test_index(self): self.assertSequenceEqual( NullableIntegerArrayModel.objects.filter(field__0=2), self.objs[1:3] diff --git a/tests/model_fields_/test_embedded_model_array.py b/tests/model_fields_/test_embedded_model_array.py index 8453f6379..499205e9b 100644 --- a/tests/model_fields_/test_embedded_model_array.py +++ b/tests/model_fields_/test_embedded_model_array.py @@ -520,6 +520,11 @@ def test_subquery_in_lookup(self): result = Exhibit.objects.filter(sections__number__in=subquery) self.assertCountEqual(result, [self.wonders, self.new_discoveries, self.egypt]) + def test_subquery_empty_in_lookup(self): + subquery = Audit.objects.filter(section_number=10).values_list("section_number", flat=True) + result = Exhibit.objects.filter(sections__number__in=subquery) + self.assertCountEqual(result, []) + def test_array_as_rhs(self): result = Exhibit.objects.filter(main_section__number__in=models.F("sections__number")) self.assertCountEqual(result, [self.new_discoveries]) From ec0ab3764f6d17486eff24325ef7ef651bd0c35b Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Fri, 5 Dec 2025 02:51:28 -0300 Subject: [PATCH 9/9] Fix unit test --- tests/lookup_/tests.py | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/tests/lookup_/tests.py b/tests/lookup_/tests.py index b6ac8a322..115501b08 100644 --- a/tests/lookup_/tests.py +++ b/tests/lookup_/tests.py @@ -137,28 +137,10 @@ def test_subquery_filter_constant(self): "let": {}, "pipeline": [ {"$match": {"num": {"$gt": 2}}}, - { - "$facet": { - "group": [ - {"$group": {"_id": None, "tmp_name": {"$addToSet": "$num"}}} - ] - } - }, - { - "$project": { - "num": { - "$ifNull": [ - { - "$getField": { - "input": {"$arrayElemAt": ["$group", 0]}, - "field": "tmp_name", - } - }, - [], - ] - } - } - }, + {"$group": {"_id": None, "tmp_name": {"$addToSet": "$num"}}}, + {"$unionWith": {"pipeline": [{"$documents": [{"tmp_name": []}]}]}}, + {"$limit": 1}, + {"$project": {"num": "$tmp_name"}}, ], } },