From 36d54b701c8789329d7354ec94435b6e1d523e42 Mon Sep 17 00:00:00 2001
From: Qinren Zhou
Date: Wed, 25 Mar 2026 13:52:40 +0800
Subject: [PATCH] test(test_collection_dql): add complex filter combinations
test case
---
README.md | 11 +-
python/tests/detail/test_collection_dql.py | 126 +++++++++++++++++++--
2 files changed, 124 insertions(+), 13 deletions(-)
diff --git a/README.md b/README.md
index 01c91e1c1..129ab6491 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,8 @@
📚 Docs |
📊 Benchmarks |
🔎 DeepWiki |
- 🎮 Discord
+ 🎮 Discord |
+ 🐦 X (Twitter)
**Zvec** is an open-source, in-process vector database — lightweight, lightning-fast, and designed to embed directly into applications. Built on **Proxima** (Alibaba's battle-tested vector search engine), it delivers production-grade, low-latency, scalable similarity search with minimal setup.
@@ -108,10 +109,10 @@ Stay updated and get support — scan or click:
-| 💬 DingTalk | 📱 WeChat | 🎮 Discord |
-|:---:|:---:|:---:|
-|

|

| [](https://discord.gg/rKddFBBu9z) |
-| Scan to join | Scan to join | Click to join |
+| 💬 DingTalk | 📱 WeChat | 🎮 Discord | X (Twitter) |
+| :---: | :---: | :---: | :---: |
+|

|

| [](https://discord.gg/rKddFBBu9z) | [](
) |
+| Scan to join | Scan to join | Click to join | Click to follow |
diff --git a/python/tests/detail/test_collection_dql.py b/python/tests/detail/test_collection_dql.py
index f4804f26d..cb3f310d0 100644
--- a/python/tests/detail/test_collection_dql.py
+++ b/python/tests/detail/test_collection_dql.py
@@ -29,9 +29,6 @@
from zvec.model.schema import FieldSchema, VectorSchema
from zvec.extension import RrfReRanker, WeightedReRanker, QwenReRanker
from distance_helper import *
-
-from zvec import StatusCode
-from distance_helper import *
from fixture_helper import *
from doc_helper import *
from params_helper import *
@@ -305,7 +302,7 @@ def test_query_with_filter_empty(self, full_collection: Collection, doc_num):
ids2 = set(doc.id for doc in result2)
assert ids1 == ids2
- @pytest.mark.parametrize("field_name", ["int32_field"])
+ @pytest.mark.parametrize("field_name", DEFAULT_SCALAR_FIELD_NAME.values())
@pytest.mark.parametrize("doc_num", [10])
def test_query_with_filter_single_condition(
self, full_collection: Collection, doc_num, field_name
@@ -314,19 +311,81 @@ def test_query_with_filter_single_condition(
generate_doc(i, full_collection.schema) for i in range(doc_num)
]
batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
- filter = field_name + " > 5"
+
+ # Construct different filter conditions based on field type
+ if field_name == "bool_field":
+ filter = field_name + " = true"
+ expected_doc_indices = [
+ i
+ for i in range(doc_num)
+ if generate_doc(i, full_collection.schema).field(field_name) == True
+ ]
+ elif field_name in ["float_field", "double_field"]:
+ filter = field_name + " > 5.0"
+ expected_doc_indices = [
+ i
+ for i in range(doc_num)
+ if generate_doc(i, full_collection.schema).field(field_name) > 5.0
+ ]
+ elif field_name.startswith("array_"):
+ # For array types, we check that the array length is greater than 0 (indicating the array is not empty)
+ filter = (
+ "array_length(" + field_name + ") > 0"
+ ) # Use array_length function to check array length
+ expected_doc_indices = [
+ i
+ for i in range(doc_num)
+ if len(generate_doc(i, full_collection.schema).field(field_name)) > 0
+ ]
+ elif field_name in ["string_field"]:
+ filter = field_name + " != 'lcy'"
+ expected_doc_indices = [
+ i
+ for i in range(doc_num)
+ if generate_doc(i, full_collection.schema).field(field_name) != "lcy"
+ ]
+ elif field_name in [
+ "int32_field",
+ "int64_field",
+ "uint32_field",
+ "uint64_field",
+ ]: # Integer type
+ filter = field_name + " > 5"
+ expected_doc_indices = [i for i in range(6, doc_num)]
+ else:
+ raise ValueError(f"Unsupported field type for filtering: {field_name}")
+
query_result = full_collection.query(filter=filter)
- assert len(query_result) == doc_num - 6
+
+ assert len(query_result) == len(expected_doc_indices)
returned_doc_ids = set()
for doc in query_result:
returned_doc_ids.add(doc.id)
- expected_doc_ids = set(str(i) for i in range(6, doc_num))
+ expected_doc_ids = set(str(i) for i in expected_doc_indices)
for doc in query_result:
assert doc.id in expected_doc_ids
- assert int(doc.field(field_name)) > 5
+ if field_name == "bool_field":
+ assert doc.field(field_name) == True
+ elif field_name in ["float_field", "double_field"]:
+ assert doc.field(field_name) > 5.0
+ elif field_name.startswith("array_"):
+ # For array types, validate that the array length is greater than 0
+ field_val = doc.field(field_name)
+ assert len(field_val) > 0
+ elif field_name in [
+ "int32_field",
+ "int64_field",
+ "uint32_field",
+ "uint64_field",
+ ]: # Integer type
+ assert int(doc.field(field_name)) > 5
+ elif field_name in ["string_field"]:
+ assert doc.field(field_name) != "lcy"
+ else:
+ raise ValueError(f"Unsupported field type for validation: {field_name}")
single_querydoc_check(multiple_docs, query_result, full_collection)
@@ -450,6 +509,57 @@ def test_query_with_filter_parentheses(
)
single_querydoc_check(multiple_docs, query_result, full_collection)
+ @pytest.mark.parametrize(
+ "filter",
+ [
+ # Test combinations with different scalar field types using AND, OR, parentheses
+ "(int32_field > 2 AND int32_field < 8) OR bool_field = true",
+ "(float_field > 3.0 AND float_field < 7.0) AND string_field != 'exclude'",
+ "(double_field >= 1.5 OR double_field <= 8.5) AND uint32_field > 2",
+ "bool_field = false OR (int64_field > 3 AND int64_field < 9)",
+ "(string_field = 'special' OR string_field = 'test') AND int32_field > 1",
+ "(array_length(array_int32_field) > 0 AND int32_field > 5) OR bool_field = true",
+ "uint64_field > 1 AND uint64_field < 9 AND (float_field > 2.0 OR double_field < 9.0)",
+ "(bool_field = true OR string_field = 'special') AND (int32_field > 4 OR int64_field < 6)",
+ # More complex combinations covering more field types
+ "((int32_field > 1 AND int32_field < 5) OR (int64_field > 6 AND int64_field < 9)) AND bool_field = true",
+ "(uint32_field > 2 OR uint64_field < 8) AND (float_field > 1.0 AND double_field < 10.0)",
+ "(string_field != 'skip' AND int32_field >= 2) OR (bool_field = false AND double_field <= 7.5)",
+ # Additional combinations with array_length and other supported operations on array fields
+ "(array_length(array_string_field) > 0 OR int32_field > 5) AND bool_field = true",
+ "(array_length(array_int32_field) >= 1 AND float_field > 2.0) OR (array_length(array_float_field) > 0)",
+ "(array_length(array_bool_field) > 0 OR array_length(array_double_field) > 0) AND string_field != ''",
+ # Additional combinations with other supported scalar operations using range comparisons
+ "(int32_field > 1 AND int32_field < 10 OR string_field != 'exclude1') AND bool_field = true",
+ "(float_field > 1.0 AND float_field < 10.0 AND double_field > 0.5) OR (uint32_field > 5 AND uint32_field < 50)",
+ "(int64_field > 50 OR uint64_field < 1000) AND string_field != ''",
+ ],
+ )
+ @pytest.mark.parametrize(
+ "doc_num", [20]
+ ) # Increased doc number for better coverage
+ def test_query_with_filter_complex_combinations(
+ self, full_collection: Collection, doc_num, filter
+ ):
+ multiple_docs = [
+ generate_doc(i, full_collection.schema) for i in range(doc_num)
+ ]
+ batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
+
+ # Execute the query with the complex filter
+ query_result = full_collection.query(filter=filter)
+
+ # Basic validation that query executes without error and returns results
+ assert query_result is not None
+
+ # Validate that all returned documents exist in the collection
+ for doc in query_result:
+ assert hasattr(doc, "id")
+ assert doc.id in [d.id for d in multiple_docs]
+
+ # Perform basic check on the query results
+ single_querydoc_check(multiple_docs, query_result, full_collection)
+
@pytest.mark.parametrize(
"filter",
[