From 36d54b701c8789329d7354ec94435b6e1d523e42 Mon Sep 17 00:00:00 2001 From: Qinren Zhou Date: Wed, 25 Mar 2026 13:52:40 +0800 Subject: [PATCH] test(test_collection_dql): add complex filter combinations test case --- README.md | 11 +- python/tests/detail/test_collection_dql.py | 126 +++++++++++++++++++-- 2 files changed, 124 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 01c91e1c1..129ab6491 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,8 @@ 📚 Docs | 📊 Benchmarks | 🔎 DeepWiki | - 🎮 Discord + 🎮 Discord | + 🐦 X (Twitter)

**Zvec** is an open-source, in-process vector database — lightweight, lightning-fast, and designed to embed directly into applications. Built on **Proxima** (Alibaba's battle-tested vector search engine), it delivers production-grade, low-latency, scalable similarity search with minimal setup. @@ -108,10 +109,10 @@ Stay updated and get support — scan or click:
-| 💬 DingTalk | 📱 WeChat | 🎮 Discord | -|:---:|:---:|:---:| -| | | [![Discord](https://img.shields.io/badge/Discord-Join%20Server-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/rKddFBBu9z) | -| Scan to join | Scan to join | Click to join | +| 💬 DingTalk | 📱 WeChat | 🎮 Discord | X (Twitter) | +| :---: | :---: | :---: | :---: | +| DingTalk QR Code | WeChat QR Code | [![Discord](https://img.shields.io/badge/Discord-Join%20Server-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/rKddFBBu9z) | [![X (formerly Twitter) Follow](https://img.shields.io/twitter/follow/ZvecAI)]() | +| Scan to join | Scan to join | Click to join | Click to follow |
diff --git a/python/tests/detail/test_collection_dql.py b/python/tests/detail/test_collection_dql.py index f4804f26d..cb3f310d0 100644 --- a/python/tests/detail/test_collection_dql.py +++ b/python/tests/detail/test_collection_dql.py @@ -29,9 +29,6 @@ from zvec.model.schema import FieldSchema, VectorSchema from zvec.extension import RrfReRanker, WeightedReRanker, QwenReRanker from distance_helper import * - -from zvec import StatusCode -from distance_helper import * from fixture_helper import * from doc_helper import * from params_helper import * @@ -305,7 +302,7 @@ def test_query_with_filter_empty(self, full_collection: Collection, doc_num): ids2 = set(doc.id for doc in result2) assert ids1 == ids2 - @pytest.mark.parametrize("field_name", ["int32_field"]) + @pytest.mark.parametrize("field_name", DEFAULT_SCALAR_FIELD_NAME.values()) @pytest.mark.parametrize("doc_num", [10]) def test_query_with_filter_single_condition( self, full_collection: Collection, doc_num, field_name @@ -314,19 +311,81 @@ def test_query_with_filter_single_condition( generate_doc(i, full_collection.schema) for i in range(doc_num) ] batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert") - filter = field_name + " > 5" + + # Construct different filter conditions based on field type + if field_name == "bool_field": + filter = field_name + " = true" + expected_doc_indices = [ + i + for i in range(doc_num) + if generate_doc(i, full_collection.schema).field(field_name) == True + ] + elif field_name in ["float_field", "double_field"]: + filter = field_name + " > 5.0" + expected_doc_indices = [ + i + for i in range(doc_num) + if generate_doc(i, full_collection.schema).field(field_name) > 5.0 + ] + elif field_name.startswith("array_"): + # For array types, we check that the array length is greater than 0 (indicating the array is not empty) + filter = ( + "array_length(" + field_name + ") > 0" + ) # Use array_length function to check array length + expected_doc_indices = [ + i + for i in range(doc_num) + if len(generate_doc(i, full_collection.schema).field(field_name)) > 0 + ] + elif field_name in ["string_field"]: + filter = field_name + " != 'lcy'" + expected_doc_indices = [ + i + for i in range(doc_num) + if generate_doc(i, full_collection.schema).field(field_name) != "lcy" + ] + elif field_name in [ + "int32_field", + "int64_field", + "uint32_field", + "uint64_field", + ]: # Integer type + filter = field_name + " > 5" + expected_doc_indices = [i for i in range(6, doc_num)] + else: + raise ValueError(f"Unsupported field type for filtering: {field_name}") + query_result = full_collection.query(filter=filter) - assert len(query_result) == doc_num - 6 + + assert len(query_result) == len(expected_doc_indices) returned_doc_ids = set() for doc in query_result: returned_doc_ids.add(doc.id) - expected_doc_ids = set(str(i) for i in range(6, doc_num)) + expected_doc_ids = set(str(i) for i in expected_doc_indices) for doc in query_result: assert doc.id in expected_doc_ids - assert int(doc.field(field_name)) > 5 + if field_name == "bool_field": + assert doc.field(field_name) == True + elif field_name in ["float_field", "double_field"]: + assert doc.field(field_name) > 5.0 + elif field_name.startswith("array_"): + # For array types, validate that the array length is greater than 0 + field_val = doc.field(field_name) + assert len(field_val) > 0 + elif field_name in [ + "int32_field", + "int64_field", + "uint32_field", + "uint64_field", + ]: # Integer type + assert int(doc.field(field_name)) > 5 + elif field_name in ["string_field"]: + assert doc.field(field_name) != "lcy" + else: + raise ValueError(f"Unsupported field type for validation: {field_name}") single_querydoc_check(multiple_docs, query_result, full_collection) @@ -450,6 +509,57 @@ def test_query_with_filter_parentheses( ) single_querydoc_check(multiple_docs, query_result, full_collection) + @pytest.mark.parametrize( + "filter", + [ + # Test combinations with different scalar field types using AND, OR, parentheses + "(int32_field > 2 AND int32_field < 8) OR bool_field = true", + "(float_field > 3.0 AND float_field < 7.0) AND string_field != 'exclude'", + "(double_field >= 1.5 OR double_field <= 8.5) AND uint32_field > 2", + "bool_field = false OR (int64_field > 3 AND int64_field < 9)", + "(string_field = 'special' OR string_field = 'test') AND int32_field > 1", + "(array_length(array_int32_field) > 0 AND int32_field > 5) OR bool_field = true", + "uint64_field > 1 AND uint64_field < 9 AND (float_field > 2.0 OR double_field < 9.0)", + "(bool_field = true OR string_field = 'special') AND (int32_field > 4 OR int64_field < 6)", + # More complex combinations covering more field types + "((int32_field > 1 AND int32_field < 5) OR (int64_field > 6 AND int64_field < 9)) AND bool_field = true", + "(uint32_field > 2 OR uint64_field < 8) AND (float_field > 1.0 AND double_field < 10.0)", + "(string_field != 'skip' AND int32_field >= 2) OR (bool_field = false AND double_field <= 7.5)", + # Additional combinations with array_length and other supported operations on array fields + "(array_length(array_string_field) > 0 OR int32_field > 5) AND bool_field = true", + "(array_length(array_int32_field) >= 1 AND float_field > 2.0) OR (array_length(array_float_field) > 0)", + "(array_length(array_bool_field) > 0 OR array_length(array_double_field) > 0) AND string_field != ''", + # Additional combinations with other supported scalar operations using range comparisons + "(int32_field > 1 AND int32_field < 10 OR string_field != 'exclude1') AND bool_field = true", + "(float_field > 1.0 AND float_field < 10.0 AND double_field > 0.5) OR (uint32_field > 5 AND uint32_field < 50)", + "(int64_field > 50 OR uint64_field < 1000) AND string_field != ''", + ], + ) + @pytest.mark.parametrize( + "doc_num", [20] + ) # Increased doc number for better coverage + def test_query_with_filter_complex_combinations( + self, full_collection: Collection, doc_num, filter + ): + multiple_docs = [ + generate_doc(i, full_collection.schema) for i in range(doc_num) + ] + batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert") + + # Execute the query with the complex filter + query_result = full_collection.query(filter=filter) + + # Basic validation that query executes without error and returns results + assert query_result is not None + + # Validate that all returned documents exist in the collection + for doc in query_result: + assert hasattr(doc, "id") + assert doc.id in [d.id for d in multiple_docs] + + # Perform basic check on the query results + single_querydoc_check(multiple_docs, query_result, full_collection) + @pytest.mark.parametrize( "filter", [