Skip to content
15 changes: 12 additions & 3 deletions django_mongodb_backend/query_conversion/expression_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,12 @@ def convert(cls, args):
):
field_name = field_expr[1:] # Remove the $ prefix.
if cls.operator == "$eq":
return {field_name: value}
return {field_name: {cls.operator: value}}
query = {field_name: value}
else:
query = {field_name: {cls.operator: value}}
if value is None:
query = {"$and": [{field_name: {"$exists": True}}, query]}
return query
return None


Expand Down Expand Up @@ -102,7 +106,12 @@ def convert(cls, in_args):
if isinstance(values, (list, tuple, set)) and all(
cls.is_simple_value(v) for v in values
):
return {field_name: {"$in": values}}
core_check = {field_name: {"$in": values}}
return (
{"$and": [{field_name: {"$exists": True}}, core_check]}
if None in values
else core_check
)
return None


Expand Down
36 changes: 29 additions & 7 deletions tests/expression_converter_/test_op_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
from django_mongodb_backend.query_conversion.expression_converters import convert_expression


def _wrap_condition_if_null(_type, condition, path):
if _type is None:
return {"$and": [{path: {"$exists": True}}, condition]}
return condition


class ConversionTestCase(SimpleTestCase):
CONVERTIBLE_TYPES = {
"int": 42,
Expand Down Expand Up @@ -53,10 +59,14 @@ def test_no_conversion_dict_value(self):
self.assertNotOptimizable({"$eq": ["$status", {"$gt": 5}]})

def _test_conversion_valid_type(self, _type):
self.assertConversionEqual({"$eq": ["$age", _type]}, {"age": _type})
self.assertConversionEqual(
{"$eq": ["$age", _type]}, _wrap_condition_if_null(_type, {"age": _type}, "age")
)

def _test_conversion_valid_array_type(self, _type):
self.assertConversionEqual({"$eq": ["$age", _type]}, {"age": _type})
self.assertConversionEqual(
{"$eq": ["$age", _type]}, _wrap_condition_if_null(_type, {"age": _type}, "age")
)

def test_conversion_various_types(self):
self._test_conversion_various_types(self._test_conversion_valid_type)
Expand All @@ -78,7 +88,10 @@ def test_no_conversion_dict_value(self):
self.assertNotOptimizable({"$in": ["$status", [{"bad": "val"}]]})

def _test_conversion_valid_type(self, _type):
self.assertConversionEqual({"$in": ["$age", [_type]]}, {"age": {"$in": [_type]}})
self.assertConversionEqual(
{"$in": ["$age", [_type]]},
_wrap_condition_if_null(_type, {"age": {"$in": [_type]}}, "age"),
)

def test_conversion_various_types(self):
for _type, val in self.CONVERTIBLE_TYPES.items():
Expand Down Expand Up @@ -170,7 +183,10 @@ def test_no_conversion_dict_value(self):
self.assertNotOptimizable({"$gt": ["$price", {}]})

def _test_conversion_valid_type(self, _type):
self.assertConversionEqual({"$gt": ["$price", _type]}, {"price": {"$gt": _type}})
self.assertConversionEqual(
{"$gt": ["$price", _type]},
_wrap_condition_if_null(_type, {"price": {"$gt": _type}}, "price"),
)

def test_conversion_various_types(self):
self._test_conversion_various_types(self._test_conversion_valid_type)
Expand All @@ -193,7 +209,7 @@ def test_no_conversion_dict_value(self):
def _test_conversion_valid_type(self, _type):
expr = {"$gte": ["$price", _type]}
expected = {"price": {"$gte": _type}}
self.assertConversionEqual(expr, expected)
self.assertConversionEqual(expr, _wrap_condition_if_null(_type, expected, "price"))

def test_conversion_various_types(self):
self._test_conversion_various_types(self._test_conversion_valid_type)
Expand All @@ -210,7 +226,10 @@ def test_no_conversion_dict_value(self):
self.assertNotOptimizable({"$lt": ["$price", {}]})

def _test_conversion_valid_type(self, _type):
self.assertConversionEqual({"$lt": ["$price", _type]}, {"price": {"$lt": _type}})
self.assertConversionEqual(
{"$lt": ["$price", _type]},
_wrap_condition_if_null(_type, {"price": {"$lt": _type}}, "price"),
)

def test_conversion_various_types(self):
self._test_conversion_various_types(self._test_conversion_valid_type)
Expand All @@ -227,7 +246,10 @@ def test_no_conversion_dict_value(self):
self.assertNotOptimizable({"$lte": ["$price", {}]})

def _test_conversion_valid_type(self, _type):
self.assertConversionEqual({"$lte": ["$price", _type]}, {"price": {"$lte": _type}})
self.assertConversionEqual(
{"$lte": ["$price", _type]},
_wrap_condition_if_null(_type, {"price": {"$lte": _type}}, "price"),
)

def test_conversion_various_types(self):
self._test_conversion_various_types(self._test_conversion_valid_type)
7 changes: 7 additions & 0 deletions tests/lookup_/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,10 @@ class Meta:

def __str__(self):
return str(self.num)


class NullableJSONModel(models.Model):
value = models.JSONField(blank=True, null=True)

class Meta:
required_db_features = {"supports_json_field"}
73 changes: 72 additions & 1 deletion tests/lookup_/tests.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from bson import ObjectId
from django.db import connection
from django.test import TestCase

from django_mongodb_backend.test import MongoTestCaseMixin

from .models import Book, Number
from .models import Book, NullableJSONModel, Number


class NumericLookupTests(TestCase):
Expand Down Expand Up @@ -66,3 +68,72 @@ def test_eq_and_in(self):
"lookup__book",
[{"$match": {"$and": [{"isbn": {"$in": ("12345", "56789")}}, {"title": "Moby Dick"}]}}],
)


class NullValueLookupTests(MongoTestCaseMixin, TestCase):
_OPERATOR_PREDICATE_MAP = {
"exact": lambda field: {field: None},
"in": lambda field: {field: {"$in": [None]}},
}

@classmethod
def setUpTestData(cls):
cls.book_objs = Book.objects.bulk_create(
Book(title=f"Book {i}", isbn=str(i)) for i in range(5)
)

cls.null_objs = NullableJSONModel.objects.bulk_create(NullableJSONModel() for _ in range(5))
cls.null_objs.append(NullableJSONModel.objects.create(value={"name": None}))
cls.unique_id = ObjectId()

def _test_none_filter_nullable_json(self, op, predicate, field):
with self.assertNumQueries(1) as ctx:
self.assertQuerySetEqual(
NullableJSONModel.objects.filter(
**{f"{field}__{op}": [None] if op == "in" else None}
),
[],
)
self.assertAggregateQuery(
ctx.captured_queries[0]["sql"],
"lookup__nullablejsonmodel",
[{"$match": {"$and": [{"$exists": False}, predicate(field)]}}],
)

def _test_none_filter_binary_operator(self, op, predicate, field):
with self.assertNumQueries(1) as ctx:
self.assertQuerySetEqual(
Book.objects.filter(**{f"{field}__{op}": [None] if op == "in" else None}), []
)
self.assertAggregateQuery(
ctx.captured_queries[0]["sql"],
"lookup__book",
[
{
"$match": {
"$or": [
{"$and": [{field: {"$exists": True}}, predicate(field)]},
{"$expr": {"$eq": [{"$type": f"${field}"}, "missing"]}},
]
}
}
],
)

def _test_with_raw_data(self, model, test_function, field):
collection = connection.database.get_collection(model._meta.db_table)
try:
collection.insert_one({"_id": self.unique_id})
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to decide to what extent we'll support "sparse" data with missing columns (another example: #275). We have only a few tests with this sort of data, so the current status is basically "ad-hoc, best effort, react to bug reports." I feel we should decide on an official policy and document it so that users have clear expectations. If we move forward with supporting this, we should at least have these tests omit optional fields, so that we don't have the test assertions fail to generate the usual error message (in this case, Book.__str__() fails because title is None.

======================================================================
ERROR: test_none_filter_binary_operator (lookup_.tests.NullValueLookupTests.test_none_filter_binary_operator) (op='exact')
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/tim/code/django-mongodb/tests/lookup_/tests.py", line 105, in _test_with_raw_data
    test_function(op, predicate, field)
  File "/home/tim/code/django-mongodb/tests/lookup_/tests.py", line 80, in _test_none_filter_binary_operator
    self.assertQuerySetEqual(
  File "/home/tim/code/django/django/test/testcases.py", line 1290, in assertQuerySetEqual
    return self.assertEqual(list(items), values, msg=msg)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/unittest/case.py", line 885, in assertEqual
    assertion_func(first, second, msg=msg)
  File "/usr/lib/python3.12/unittest/case.py", line 1091, in assertListEqual
    self.assertSequenceEqual(list1, list2, msg, seq_type=list)
  File "/usr/lib/python3.12/unittest/case.py", line 1068, in assertSequenceEqual
    difflib.ndiff(pprint.pformat(seq1).splitlines(),
                  ^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/pprint.py", line 62, in pformat
    underscore_numbers=underscore_numbers).pformat(object)
                                           ^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/pprint.py", line 161, in pformat
    self._format(object, sio, 0, 0, {}, 0)
  File "/usr/lib/python3.12/pprint.py", line 178, in _format
    rep = self._repr(object, context, level)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/pprint.py", line 458, in _repr
    repr, readable, recursive = self.format(object, context.copy(),
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/pprint.py", line 471, in format
    return self._safe_repr(object, context, maxlevels, level)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/pprint.py", line 622, in _safe_repr
    orepr, oreadable, orecur = self.format(
                               ^^^^^^^^^^^^
  File "/usr/lib/python3.12/pprint.py", line 471, in format
    return self._safe_repr(object, context, maxlevels, level)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/pprint.py", line 632, in _safe_repr
    rep = repr(object)
          ^^^^^^^^^^^^
  File "/home/tim/code/django/django/db/models/base.py", line 590, in __repr__
    return "<%s: %s>" % (self.__class__.__name__, self)
                                                  ^^^^
TypeError: __str__ returned non-string (type NoneType)

Copy link
Contributor Author

@Jibola Jibola Sep 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm actually of the mind -- for this case specifically -- that we do not include this test. In order to reach this case, you have to do ORM-breaking behavior that we don't have a firm policy on. The real "bugs" for this case stem from optimizing embedded documents in a flow that only leverages the Django ORM.

Like you've also just stated we don't have a clear "limit" on what is a valid experience. Let's loop in product and get their take.


for op, predicate in self._OPERATOR_PREDICATE_MAP.items():
with self.subTest(op=op):
test_function(op, predicate, field)

finally:
collection.delete_one({"_id": self.unique_id})

def test_none_filter_nullable_json(self):
self._test_with_raw_data(NullableJSONModel, self._test_none_filter_nullable_json, "value")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This style of writing tests with generic methods calling other generic methods doesn't make my heart sing. It seems extremely tedious to understand and confirm that the test actually does what it's designed to do. I saw some of this in the converter patch and let it slide there, but I think it's best to avoid going forward. There can be a role for helper functions and assertions, but when there's special casing like **{f"{field}__{op}": [None] if op == "in" else None}, I think it's time to have separate tests, even if it's a bit more repetitive. I find even subTest fails sometimes very convoluted to debug.

Copy link
Contributor Author

@Jibola Jibola Sep 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can agree I don't need to shoe-horn "in" into the test, so I don't mind separating them. I will say, though, it is a matter of what makes the most sense. If readability is the issue then that's more of a lack of explicit documentation on authorship.

For context on this; I originally had more operators, but now that it went back down to 2, it's less valuable. In the case where the same rote operation became needlessly repetitious, I do think it still holds value.


def test_none_filter_binary_operator(self):
self._test_with_raw_data(Book, self._test_none_filter_binary_operator, "title")
Loading