From b7df7e752e24c48cc143c07d245e3e8df550efcb Mon Sep 17 00:00:00 2001 From: iaojnh Date: Mon, 23 Mar 2026 10:12:02 +0000 Subject: [PATCH 1/4] test(test_collection_recall.py): optimize test cases --- python/tests/detail/test_collection_recall.py | 589 +++++++++++++++--- 1 file changed, 493 insertions(+), 96 deletions(-) diff --git a/python/tests/detail/test_collection_recall.py b/python/tests/detail/test_collection_recall.py index 25dad12dd..f41320b89 100644 --- a/python/tests/detail/test_collection_recall.py +++ b/python/tests/detail/test_collection_recall.py @@ -59,9 +59,6 @@ def batchdoc_and_check(collection: Collection, multiple_docs, operator="insert") stats = collection.stats assert stats is not None, "Collection stats should not be None" - """assert stats.doc_count == len(multiple_docs), ( - f"Document count should be {len(multiple_docs)} after insert, but got {stats.doc_count}" - )""" doc_ids = [doc.id for doc in multiple_docs] fetched_docs = collection.fetch(doc_ids) @@ -152,8 +149,7 @@ def get_ground_truth_for_vector_query( ] ground_truth_ids_scores = similarities[:k] - print("Get the most similar k document IDs k:,ground_truth_ids_scores") - print(k, ground_truth_ids_scores) + return ground_truth_ids_scores else: @@ -186,8 +182,6 @@ def get_ground_truth_map(collection, test_docs, query_vectors_map, metric_type, ) ground_truth_map[field_name][i] = relevant_doc_ids_scores - print("ground_truth_map:\n") - print(ground_truth_map) return ground_truth_map @@ -226,26 +220,11 @@ def calculate_recall_at_k( recall_stats[field_name]["retrieved_count"] += retrieved_count - print("expected_doc_ids_scores_map:\n") - print(expected_doc_ids_scores_map) if i in (expected_doc_ids_scores_map[field_name]): expected_relevant_ids_scores = expected_doc_ids_scores_map[field_name][ i ] - print( - "field_name,i,expected_relevant_ids_scores, query_result_ids_scores:\n" - ) - print( - field_name, - i, - "\n", - expected_relevant_ids_scores, - "\n", - len(query_result_ids_scores), - query_result_ids_scores, - ) - # Update total relevant documents count recall_stats[field_name]["total_relevant_count"] += len( expected_relevant_ids_scores ) @@ -286,13 +265,159 @@ def calculate_recall_at_k( return recall_stats +def calculate_recall_at_k_multi_rrf( + collection: Collection, + test_docs, + query_vectors_list, + schema, + top_k=1, + expected_doc_ids_scores_map=None, + tolerance=0.01, +): + result_doc_ids_scores_map = [] + + for doc_vectors in query_vectors_list: + multi_query_vectors = [] + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + multi_query_vectors.append(VectorQuery(field_name=v, vector=doc_vectors[v])) + + rrf_reranker = RrfReRanker(topn=10) + multi_query_result = collection.query( + vectors=multi_query_vectors, + reranker=rrf_reranker, + ) + result_dict = {} + + for doc in multi_query_result[:top_k]: + result_dict[doc.id] = doc.score + result_doc_ids_scores_map.append(result_dict) + + recall_stats = { + "relevant_retrieved_count": 0, + "total_relevant_count": 0, + "retrieved_count": 0, + "recall_at_k": 0.0, + } + + for result_dict in result_doc_ids_scores_map: + recall_stats["retrieved_count"] = recall_stats["retrieved_count"] + len( + result_dict + ) + + for expected_dict in result_doc_ids_scores_map: + recall_stats["total_relevant_count"] = recall_stats[ + "total_relevant_count" + ] + len(expected_dict) + + for i in range(0, len(result_doc_ids_scores_map)): + relevant_found_count = 0 + for k, v in result_doc_ids_scores_map[i].items(): + for k1, v1 in expected_doc_ids_scores_map[i].items(): + if k == k1: + relevant_found_count += 1 + break + elif k != k1 and abs(v - v1) <= tolerance: + print("IDs are not equal, but the error is small, tolerance") + print(k, k1, v, v1, tolerance) + relevant_found_count += 1 + break + else: + continue + + recall_stats["relevant_retrieved_count"] += relevant_found_count + + if recall_stats["total_relevant_count"] > 0: + recall_stats["recall_at_k"] = ( + recall_stats["relevant_retrieved_count"] + / recall_stats["total_relevant_count"] + ) + + return recall_stats + + +def calculate_recall_at_k_multi_weight( + collection: Collection, + test_docs, + query_vectors_list, + schema, + weights, + metric_type, + top_k=1, + expected_doc_ids_scores_map=None, + tolerance=0.01, +): + result_doc_ids_scores_map = [] + + for doc_vectors in query_vectors_list: + weighted_reranker = WeightedReRanker( + topn=10, weights=weights, metric=metric_type + ) + + multi_query_vectors = [] + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + multi_query_vectors.append(VectorQuery(field_name=v, vector=doc_vectors[v])) + + multi_query_result = collection.query( + vectors=multi_query_vectors, + reranker=weighted_reranker, + ) + + result_dict = {} + + for doc in multi_query_result[:top_k]: + result_dict[doc.id] = doc.score + result_doc_ids_scores_map.append(result_dict) + + recall_stats = { + "relevant_retrieved_count": 0, + "total_relevant_count": 0, + "retrieved_count": 0, + "recall_at_k": 0.0, + } + + for result_dict in result_doc_ids_scores_map: + recall_stats["retrieved_count"] = recall_stats["retrieved_count"] + len( + result_dict + ) + + for expected_dict in result_doc_ids_scores_map: + recall_stats["total_relevant_count"] = recall_stats[ + "total_relevant_count" + ] + len(expected_dict) + + for i in range(0, len(result_doc_ids_scores_map)): + relevant_found_count = 0 + for k, v in result_doc_ids_scores_map[i].items(): + for k1, v1 in expected_doc_ids_scores_map[i].items(): + if k == k1: + relevant_found_count += 1 + break + elif k != k1 and abs(v - v1) <= tolerance: + print("IDs are not equal, but the error is small, tolerance") + print(k, k1, v, v1, tolerance) + relevant_found_count += 1 + break + else: + continue + + recall_stats["relevant_retrieved_count"] += relevant_found_count + + if recall_stats["total_relevant_count"] > 0: + recall_stats["recall_at_k"] = ( + recall_stats["relevant_retrieved_count"] + / recall_stats["total_relevant_count"] + ) + + return recall_stats + + class TestRecall: @pytest.mark.parametrize( "full_schema_new", [ (True, True, HnswIndexParam()), (False, True, IVFIndexParam()), - (False, True, FlatIndexParam()), # ——ok + (False, True, FlatIndexParam()), ( True, True, @@ -396,14 +521,8 @@ def test_recall_with_single_vector_valid_500( multiple_docs = [ generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num) ] - print("len(multiple_docs):\n") - print(len(multiple_docs)) - # print(multiple_docs) for i in range(10): - if i != 0: - pass - # print(multiple_docs[i * 1000:1000 * (i + 1)]) batchdoc_and_check( full_collection_new, multiple_docs[i * 1000 : 1000 * (i + 1)], @@ -413,21 +532,6 @@ def test_recall_with_single_vector_valid_500( stats = full_collection_new.stats assert stats.doc_count == len(multiple_docs) - doc_ids = ["0", "1"] - fetched_docs = full_collection_new.fetch(doc_ids) - print("fetched_docs,multiple_docs") - print( - fetched_docs[doc_ids[0]].vectors["sparse_vector_fp32_field"], - fetched_docs[doc_ids[0]].vectors["sparse_vector_fp16_field"], - fetched_docs[doc_ids[1]].vectors["sparse_vector_fp32_field"], - fetched_docs[doc_ids[1]].vectors["sparse_vector_fp16_field"], - "\n", - multiple_docs[0].vectors["sparse_vector_fp32_field"], - multiple_docs[0].vectors["sparse_vector_fp32_field"], - multiple_docs[1].vectors["sparse_vector_fp32_field"], - multiple_docs[1].vectors["sparse_vector_fp16_field"], - ) - full_collection_new.optimize(option=OptimizeOption()) time.sleep(2) @@ -438,12 +542,10 @@ def test_recall_with_single_vector_valid_500( multiple_docs[i].vectors[field_name] for i in range(query_num) ] - # Get ground truth mapping ground_truth_map = get_ground_truth_map( full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k ) - # Validate ground truth mapping structure for field_name in DEFAULT_VECTOR_FIELD_NAME.values(): assert field_name in ground_truth_map field_gt = ground_truth_map[field_name] @@ -455,16 +557,6 @@ def test_recall_with_single_vector_valid_500( assert isinstance(relevant_ids, list) assert len(relevant_ids) <= top_k - # Print ground truth statistics - print(f"Ground Truth for Top-{top_k} Retrieval:") - for field_name, field_gt in ground_truth_map.items(): - print(f" {field_name}:") - for query_idx, relevant_ids in field_gt.items(): - print( - f" Query {query_idx}: {len(relevant_ids)} relevant docs - {relevant_ids[:5]}{'...' if len(relevant_ids) > 5 else ''}" - ) - - # Calculate Recall@K using ground truth recall_at_k_stats = calculate_recall_at_k( full_collection_new, multiple_docs, @@ -474,14 +566,6 @@ def test_recall_with_single_vector_valid_500( expected_doc_ids_scores_map=ground_truth_map, tolerance=0.01, ) - print("ground_truth_map:\n") - print(ground_truth_map) - - print("(recall_at_k_stats:\n") - print(recall_at_k_stats) - print("metric_type:") - print(metric_type) - # Print Recall@K statistics print(f"Recall@{top_k} using Ground Truth:") for field_name, stats in recall_at_k_stats.items(): print(f" {field_name}:") @@ -579,38 +663,16 @@ def test_recall_with_single_vector_valid_2000( multiple_docs = [ generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num) ] - print("len(multiple_docs):\n") - print(len(multiple_docs)) - # print(multiple_docs) for i in range(10): - if i != 0: - pass - # print(multiple_docs[i * 1000:1000 * (i + 1)]) batchdoc_and_check( full_collection_new, multiple_docs[i * 1000 : 1000 * (i + 1)], operator="insert", ) - stats = full_collection_new.stats assert stats.doc_count == len(multiple_docs) - doc_ids = ["0", "1"] - fetched_docs = full_collection_new.fetch(doc_ids) - print("fetched_docs,multiple_docs") - print( - fetched_docs[doc_ids[0]].vectors["sparse_vector_fp32_field"], - fetched_docs[doc_ids[0]].vectors["sparse_vector_fp16_field"], - fetched_docs[doc_ids[1]].vectors["sparse_vector_fp32_field"], - fetched_docs[doc_ids[1]].vectors["sparse_vector_fp16_field"], - "\n", - multiple_docs[0].vectors["sparse_vector_fp32_field"], - multiple_docs[0].vectors["sparse_vector_fp32_field"], - multiple_docs[1].vectors["sparse_vector_fp32_field"], - multiple_docs[1].vectors["sparse_vector_fp16_field"], - ) - full_collection_new.optimize(option=OptimizeOption()) time.sleep(2) @@ -621,12 +683,10 @@ def test_recall_with_single_vector_valid_2000( multiple_docs[i].vectors[field_name] for i in range(query_num) ] - # Get ground truth mapping ground_truth_map = get_ground_truth_map( full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k ) - # Validate ground truth mapping structure for field_name in DEFAULT_VECTOR_FIELD_NAME.values(): assert field_name in ground_truth_map field_gt = ground_truth_map[field_name] @@ -638,7 +698,6 @@ def test_recall_with_single_vector_valid_2000( assert isinstance(relevant_ids, list) assert len(relevant_ids) <= top_k - # Print ground truth statistics print(f"Ground Truth for Top-{top_k} Retrieval:") for field_name, field_gt in ground_truth_map.items(): print(f" {field_name}:") @@ -657,14 +716,7 @@ def test_recall_with_single_vector_valid_2000( expected_doc_ids_scores_map=ground_truth_map, tolerance=0.01, ) - print("ground_truth_map:\n") - print(ground_truth_map) - print("(recall_at_k_stats:\n") - print(recall_at_k_stats) - print("metric_type:") - print(metric_type) - # Print Recall@K statistics print(f"Recall@{top_k} using Ground Truth:") for field_name, stats in recall_at_k_stats.items(): print(f" {field_name}:") @@ -674,3 +726,348 @@ def test_recall_with_single_vector_valid_2000( print(f" Recall@{top_k}: {stats['recall_at_k']:.4f}") for k, v in recall_at_k_stats.items(): assert v["recall_at_k"] == 1.0 + + @pytest.mark.parametrize( + "full_schema_new", + [ + (True, True, HnswIndexParam()), + (False, True, IVFIndexParam()), + (False, True, FlatIndexParam()), + ( + True, + True, + HnswIndexParam( + metric_type=MetricType.IP, + m=16, + ef_construction=100, + ), + ), + ( + True, + True, + HnswIndexParam( + metric_type=MetricType.COSINE, + m=24, + ef_construction=150, + ), + ), + ( + True, + True, + HnswIndexParam( + metric_type=MetricType.L2, + m=32, + ef_construction=200, + ), + ), + ( + False, + True, + FlatIndexParam( + metric_type=MetricType.IP, + ), + ), + ( + True, + True, + FlatIndexParam( + metric_type=MetricType.COSINE, + ), + ), + ( + True, + True, + FlatIndexParam( + metric_type=MetricType.L2, + ), + ), + ( + True, + True, + IVFIndexParam( + metric_type=MetricType.IP, + n_list=100, + n_iters=10, + use_soar=False, + ), + ), + ( + True, + True, + IVFIndexParam( + metric_type=MetricType.L2, + n_list=200, + n_iters=20, + use_soar=True, + ), + ), + ( + True, + True, + IVFIndexParam( + metric_type=MetricType.COSINE, + n_list=150, + n_iters=15, + use_soar=False, + ), + ), + ], + indirect=True, + ) + @pytest.mark.parametrize("doc_num", [500]) + @pytest.mark.parametrize("query_num", [10]) + @pytest.mark.parametrize("top_k", [1]) + def test_recall_with_multi_vector_rrf( + self, + full_collection_new: Collection, + doc_num, + query_num, + top_k, + full_schema_new, + request, + ): + full_schema_params = request.getfixturevalue("full_schema_new") + + for vector_para in full_schema_params.vectors: + if vector_para.name == "vector_fp32_field": + metric_type = vector_para.index_param.metric_type + break + + multiple_docs = [ + generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num) + ] + + for i in range(10): + batchdoc_and_check( + full_collection_new, + multiple_docs[i * 1000 : 1000 * (i + 1)], + operator="insert", + ) + + stats = full_collection_new.stats + assert stats.doc_count == len(multiple_docs) + + full_collection_new.optimize(option=OptimizeOption()) + + time.sleep(2) + + query_vectors_list = [multiple_docs[i].vectors for i in range(query_num)] + + expected_result_map = [] + + for doc_vectors in query_vectors_list: + single_query_results = {} + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + single_query_results[v] = full_collection_new.query( + VectorQuery(field_name=v, vector=doc_vectors[v]) + ) + expected_rrf_scores_dict = calculate_multi_vector_rrf_scores( + single_query_results + ) + + sorted_dict_desc = dict( + sorted( + expected_rrf_scores_dict.items(), key=lambda x: x[1], reverse=True + )[:top_k] + ) + + expected_result_map.append(sorted_dict_desc) + + recall_at_k_stats = calculate_recall_at_k_multi_rrf( + full_collection_new, + multiple_docs, + query_vectors_list, + full_schema_new, + top_k=top_k, + expected_doc_ids_scores_map=expected_result_map, + tolerance=0.01, + ) + + # Print Recall@K statistics + print(f"Recall@{top_k} using Ground Truth:") + + print( + f"Relevant Retrieved: {recall_at_k_stats['relevant_retrieved_count']}/{recall_at_k_stats['total_relevant_count']}" + ) + print(f" Recall@{top_k}: {recall_at_k_stats['recall_at_k']:.4f}") + assert recall_at_k_stats["recall_at_k"] == 1.0 + + @pytest.mark.parametrize( + "weights", + [ + { + "vector_fp32_field": 0.49, + "vector_fp16_field": 0.01, + "vector_int8_field": 0.3, + "sparse_vector_fp32_field": 0.1, + "sparse_vector_fp16_field": 0.1, + } + ], + ) + @pytest.mark.parametrize( + "metrictype", + [MetricType.COSINE, MetricType.IP, MetricType.L2], + ) + @pytest.mark.parametrize( + "full_schema_new", + [ + (True, True, HnswIndexParam()), + (False, True, IVFIndexParam()), + (False, True, FlatIndexParam()), + ( + True, + True, + HnswIndexParam( + metric_type=MetricType.IP, + m=16, + ef_construction=100, + ), + ), + ( + True, + True, + HnswIndexParam( + metric_type=MetricType.COSINE, + m=24, + ef_construction=150, + ), + ), + ( + True, + True, + HnswIndexParam( + metric_type=MetricType.L2, + m=32, + ef_construction=200, + ), + ), + ( + False, + True, + FlatIndexParam( + metric_type=MetricType.IP, + ), + ), + ( + True, + True, + FlatIndexParam( + metric_type=MetricType.COSINE, + ), + ), + ( + True, + True, + FlatIndexParam( + metric_type=MetricType.L2, + ), + ), + ( + True, + True, + IVFIndexParam( + metric_type=MetricType.IP, + n_list=100, + n_iters=10, + use_soar=False, + ), + ), + ( + True, + True, + IVFIndexParam( + metric_type=MetricType.L2, + n_list=200, + n_iters=20, + use_soar=True, + ), + ), + ( + True, + True, + IVFIndexParam( + metric_type=MetricType.COSINE, + n_list=150, + n_iters=15, + use_soar=False, + ), + ), + ], + indirect=True, + ) + @pytest.mark.parametrize("doc_num", [500]) + @pytest.mark.parametrize("query_num", [10]) + @pytest.mark.parametrize("top_k", [1]) + def test_recall_with_multi_vector_weight( + self, + full_collection_new: Collection, + doc_num, + query_num, + top_k, + full_schema_new, + request, + weights, + metrictype, + ): + multiple_docs = [ + generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num) + ] + + for i in range(10): + batchdoc_and_check( + full_collection_new, + multiple_docs[i * 1000 : 1000 * (i + 1)], + operator="insert", + ) + + stats = full_collection_new.stats + assert stats.doc_count == len(multiple_docs) + + full_collection_new.optimize(option=OptimizeOption()) + + time.sleep(2) + + query_vectors_list = [multiple_docs[i].vectors for i in range(query_num)] + + print("query_vectors_list:\n") + print(query_vectors_list) + + expected_result_map = [] + + for doc_vectors in query_vectors_list: + single_query_results = {} + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + single_query_results[v] = full_collection_new.query( + VectorQuery(field_name=v, vector=doc_vectors[v]) + ) + + expected_weighted_scores = calculate_multi_vector_weighted_scores( + single_query_results, weights, metrictype + ) + + sorted_dict_desc = dict( + sorted( + expected_weighted_scores.items(), key=lambda x: x[1], reverse=True + )[:top_k] + ) + + expected_result_map.append(sorted_dict_desc) + + recall_at_k_stats = calculate_recall_at_k_multi_weight( + full_collection_new, + multiple_docs, + query_vectors_list, + full_schema_new, + weights, + metrictype, + top_k=top_k, + expected_doc_ids_scores_map=expected_result_map, + tolerance=0.01, + ) + print(f"Recall@{top_k} using Ground Truth:") + + print( + f"Relevant Retrieved: {recall_at_k_stats['relevant_retrieved_count']}/{recall_at_k_stats['total_relevant_count']}" + ) + print(f" Recall@{top_k}: {recall_at_k_stats['recall_at_k']:.4f}") + assert recall_at_k_stats["recall_at_k"] == 1.0 From db1227b6742d30822760285e0e182b575a5a0179 Mon Sep 17 00:00:00 2001 From: iaojnh Date: Tue, 24 Mar 2026 06:41:25 +0000 Subject: [PATCH 2/4] test(test_collection_crash_recovery_*.py): add test cases --- ...est_collection_crash_recovery_addcolumn.py | 434 +++++++++++++++ ...t_collection_crash_recovery_altercolumn.py | 471 ++++++++++++++++ ...t_collection_crash_recovery_createindex.py | 464 ++++++++++++++++ ...est_collection_crash_recovery_deletedoc.py | 445 +++++++++++++++ ...st_collection_crash_recovery_dropcolumn.py | 431 +++++++++++++++ ...est_collection_crash_recovery_insertdoc.py | 444 +++++++++++++++ ...est_collection_crash_recovery_updatedoc.py | 514 ++++++++++++++++++ ...est_collection_crash_recovery_upsertdoc.py | 514 ++++++++++++++++++ 8 files changed, 3717 insertions(+) create mode 100644 python/tests/detail/test_collection_crash_recovery_addcolumn.py create mode 100644 python/tests/detail/test_collection_crash_recovery_altercolumn.py create mode 100644 python/tests/detail/test_collection_crash_recovery_createindex.py create mode 100644 python/tests/detail/test_collection_crash_recovery_deletedoc.py create mode 100644 python/tests/detail/test_collection_crash_recovery_dropcolumn.py create mode 100644 python/tests/detail/test_collection_crash_recovery_insertdoc.py create mode 100644 python/tests/detail/test_collection_crash_recovery_updatedoc.py create mode 100644 python/tests/detail/test_collection_crash_recovery_upsertdoc.py diff --git a/python/tests/detail/test_collection_crash_recovery_addcolumn.py b/python/tests/detail/test_collection_crash_recovery_addcolumn.py new file mode 100644 index 000000000..8ae7b3658 --- /dev/null +++ b/python/tests/detail/test_collection_crash_recovery_addcolumn.py @@ -0,0 +1,434 @@ +# -*- coding: utf-8 -*- +""" +test_collection_crash_recovery_addcolumn.py + +This script is used to test Zvec's recovery capability after simulating a "power failure" (forced process termination) during column addition. +It first successfully creates a collection in the main process and inserts some documents, then starts a subprocess to open the collection and perform column addition operations. +During the column addition operation, the subprocess is forcibly terminated to simulate a scenario where the Zvec process crashes during column building. +Finally, the main process attempts to reopen the collection and verify its state and functionality. + +Note: This script assumes that Zvec is a Python extension library. Directly killing the Python subprocess running Zvec operations +may not perfectly simulate the impact of system-level power failure on the C++ layer, but it can test the file state of the Zvec Python extension after abnormal process termination. +""" + +import zvec +import time +import tempfile +import subprocess +import signal +import sys +import os +import pytest +import json # Used to pass operation parameters and results +import threading + +try: + import psutil # Used for more reliable process management +except ImportError: + psutil = None # If psutil is not installed, set it to None +from distance_helper import * +from fixture_helper import * +from doc_helper import * + + +def singledoc_and_check( + collection: Collection, insert_doc, operator="insert", is_delete=1 +): + if operator == "insert": + result = collection.insert(insert_doc) + elif operator == "upsert": + result = collection.upsert(insert_doc) + elif operator == "update": + result = collection.update(insert_doc) + else: + logging.error("operator value is error!") + + assert bool(result) + assert result.ok() + + stats = collection.stats + assert stats is not None + #assert stats.doc_count == 1 + + fetched_docs = collection.fetch([insert_doc.id]) + assert len(fetched_docs) == 1 + assert insert_doc.id in fetched_docs + + fetched_doc = fetched_docs[insert_doc.id] + + assert is_doc_equal(fetched_doc, insert_doc, collection.schema) + assert hasattr(fetched_doc, "score"), "Document should have a score attribute" + assert fetched_doc.score == 0.0, ( + "Fetch operation should return default score of 0.0" + ) + + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + if v != {}: + query_result = collection.query( + VectorQuery(field_name=v, vector=insert_doc.vectors[v]), + topk=1024, + ) + assert len(query_result) > 0, ( + f"Expected at least 1 query result, but got {len(query_result)}" + ) + + found_doc = None + for doc in query_result: + if doc.id == insert_doc.id: + found_doc = doc + break + assert found_doc is not None, ( + f"deleted document {insert_doc.id} not found in query results" + ) + assert is_doc_equal(found_doc, insert_doc, collection.schema, True, False) + if is_delete == 1: + collection.delete(insert_doc.id) + assert collection.stats.doc_count == 0, "Document should be deleted" + + +class TestCollectionCrashRecoveryAddColumn: + """ + Test Zvec collection recovery capability after simulating power failure/process crash during column addition. + Focus on verifying whether the file remains consistent after interruption of column addition operations, + and whether it can be reopened and used normally. + """ + + # Script content for subprocess to execute Zvec column addition operations + # Write this script content to a temporary file and execute it in the subprocess. + ZVEC_SUBPROCESS_SCRIPT_ADDCOLUMN = ''' +import zvec +import time +import json +import sys +import os + + +def run_zvec_addcolumn_operations(args_json_str): + args = json.loads(args_json_str) + collection_path = args["collection_path"] + column_field_name = args.get("column_field_name", "new_column") # Field name for the new column + column_data_type = args.get("column_data_type", "INT32") # Data type of the new column + add_column_iterations = args.get("add_column_iterations", 10) # Number of column addition iterations + delay_between_additions = args.get("delay_between_additions", 0.5) # Delay between column additions + + print("[Subprocess] Starting Zvec add column operations on " + collection_path + " at: " + time.strftime('%Y-%m-%d %H:%M:%S')) + print("[Subprocess] Will add column '" + column_field_name + "' of type '" + column_data_type + "', " + str(add_column_iterations) + " times") + + try: + # Open existing collection + collection = zvec.open(collection_path) + print("[Subprocess] Successfully opened collection.") + + print("[Subprocess] Starting " + str(add_column_iterations) + " column addition operations...") + + # Loop to add columns multiple times - this increases the chance of interruption during the operation + for i in range(add_column_iterations): + column_name = column_field_name + "_" + str(i) + print("[Subprocess] Iteration " + str(i+1) + "/" + str(add_column_iterations) + ": Adding column '" + column_name + "'...") + + # Add column - this operation can take time and be interrupted + # Import the required data type + from zvec import FieldSchema, DataType, AddColumnOption + + # Map string data type to actual DataType (only supported types) + if column_data_type == "INT32": + data_type = DataType.INT32 + elif column_data_type == "INT64": + data_type = DataType.INT64 + elif column_data_type == "UINT32": + data_type = DataType.UINT32 + elif column_data_type == "UINT64": + data_type = DataType.UINT64 + elif column_data_type == "FLOAT": + data_type = DataType.FLOAT + elif column_data_type == "DOUBLE": + data_type = DataType.DOUBLE + else: + data_type = DataType.INT32 # Default fallback (supported type) + + # Create the new field schema + new_field = FieldSchema(column_name, data_type, nullable=True) + + # Add the column with a simple expression + collection.add_column( + field_schema=new_field, + expression="", # Empty expression means fill with default/null values + option=AddColumnOption() + ) + + print("[Subprocess] Iteration " + str(i+1) + ": Column '" + column_name + "' addition completed successfully.") + + # Add delay between iterations to allow interruption opportunity + if i < add_column_iterations - 1: # Don't sleep after the last iteration + print("[Subprocess] Waiting " + str(delay_between_additions) + "s before next column addition...") + time.sleep(delay_between_additions) + + if hasattr(collection, "close"): + collection.close() + else: + del collection # Use del as fallback + print("[Subprocess] Closed collection after column addition operations.") + + except Exception as e: + print("[Subprocess] Error during column addition operations: " + str(e)) + import traceback + traceback.print_exc() + # Optionally re-raise or handle differently + raise # Re-raising may be useful depending on how parent process responds + + print("[Subprocess] Column addition operations completed at: " + time.strftime('%Y-%m-%d %H:%M:%S')) + + +if __name__ == "__main__": + args_json_str = sys.argv[1] + run_zvec_addcolumn_operations(args_json_str) +''' + + def test_addcolumn_simulate_crash_during_column_addition_int32(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform INT32 column addition operations. + During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "INT32") + + def test_addcolumn_simulate_crash_during_column_addition_int64(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform INT64 column addition operations. + During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "INT64") + + def test_addcolumn_simulate_crash_during_column_addition_uint32(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform UINT32 column addition operations. + During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT32") + + def test_addcolumn_simulate_crash_during_column_addition_uint64(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform UINT64 column addition operations. + During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT64") + + def test_addcolumn_simulate_crash_during_column_addition_float(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform FLOAT column addition operations. + During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "FLOAT") + + def test_addcolumn_simulate_crash_during_column_addition_double(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform DOUBLE column addition operations. + During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "DOUBLE") + + def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_data_type): + """ + Common method to test column addition with crash recovery for different column types. + """ + with tempfile.TemporaryDirectory() as temp_dir: + collection_path = f"{temp_dir}/test_collection_addcolumn_crash_recovery_{column_data_type.lower()}" + + # Step 1: Successfully create collection in main process and insert some documents + print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") + coll = zvec.create_and_open(path=collection_path, schema=schema, option=collection_option) + assert coll is not None + print(f"[Test] Step 1.1: Collection created successfully.") + exp_doc_dict = {} + # Insert some documents to have data for column operations + for i in range(100): + exp_doc_dict[i] = {} + doc = generate_doc(i, coll.schema) + result = coll.insert([doc]) + assert result is not None and len(result) > 0, f"Failed to insert document {i}" + exp_doc_dict[i] = doc + + print(f"[Test] Step 1.2: Inserted 100 documents for column operations.") + + # Verify collection state before crash + initial_doc_count = coll.stats.doc_count + print(f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation.") + + del coll + print(f"[Test] Step 1.4: Closed collection.") + + # Step 2: Prepare and run subprocess for column addition operations + # Write subprocess script to temporary file + subprocess_script_path = f"{temp_dir}/zvec_subprocess_addcolumn.py" + with open(subprocess_script_path, 'w', encoding='utf-8') as f: + f.write(self.ZVEC_SUBPROCESS_SCRIPT_ADDCOLUMN) + + # Prepare subprocess parameters + subprocess_args = { + "collection_path": collection_path, + "column_field_name": "test_new_column", # Use appropriate field name for this test + "column_data_type": column_data_type, # Type of column to add + "add_column_iterations": 20, # Number of column addition iterations to increase interruption chance + "delay_between_additions": 0.3 # Delay between column additions to allow interruption opportunity + } + args_json_str = json.dumps(subprocess_args) + + print( + f"[Test] Step 2: Starting {column_data_type} column addition operations in subprocess, path: {collection_path}") + # Start subprocess to execute column addition operations + proc = subprocess.Popen([ + sys.executable, subprocess_script_path, args_json_str + ]) + + # Wait briefly to allow subprocess to begin column addition operations + time.sleep(3) # Wait 3 seconds to allow column addition process to start + + print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + # Suddenly kill subprocess (simulate power failure or crash during column addition operations) + if psutil: + try: + # Use psutil to reliably terminate process and all its children + parent = psutil.Process(proc.pid) + children = parent.children(recursive=True) + for child in children: + child.kill() + parent.kill() + proc.wait(timeout=5) + except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + # If psutil is unavailable or process has been terminated, fall back to original method + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + else: + # If no psutil, use standard method to terminate process + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + print(f"[Test] Subprocess {proc.pid} has been terminated.") + + # Clean up temporary script file + os.remove(subprocess_script_path) + + # Step 3: Verify recovery situation in main process + print( + f"[Test] Step 3: Attempting to open collection after simulating crash during column addition operations...") + # Verification 3.1: Check if collection can be successfully opened after crash + recovered_collection = zvec.open(collection_path) + assert recovered_collection is not None, "Cannot open collection after crash" + print(f"[Test] Step 3.1: Verified collection can be opened after crash...") + + # Verification 3.2: Check data integrity (document count and content) + print(f"[Test] Step 3.2: Verifying data integrity...") + query_result = recovered_collection.query(topk=1024) + # We expect some documents to have been successfully inserted before crash + # The exact number depends on when the crash occurred during the bulk insertion process + print( + f"[Test] Step 3.2: Found {len(query_result)} documents after crash") + + current_count = recovered_collection.stats.doc_count + assert recovered_collection.stats.doc_count >= 1 + assert len(query_result) <= current_count, ( + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + + # Verify existing documents have correct structure + if len(query_result) > 0: + for doc in query_result[:1024]: + fetched_docs = recovered_collection.fetch([doc.id]) + '''print("doc.id,fetched_docs:\n") + print(doc.id, fetched_docs)''' + exp_doc = exp_doc_dict[int(doc.id)] + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema), ( + f"result doc={fetched_docs},doc_exp={exp_doc}") + + # 3.4: Check if query function works properly + print(f"[Test] Step 3.4: Verifying query function after crash...") + filtered_query = recovered_collection.query(filter=f"int32_field >=-100") + print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + assert len(filtered_query) > 0 + for doc in query_result: + fetched_docs = recovered_collection.fetch([doc.id]) + exp_doc = exp_doc_dict[int(doc.id)] + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema), ( + f"result doc={fetched_docs},doc_exp={exp_doc}") + + # Verification 3.5: Test insertion functionality after recovery + print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") + test_insert_doc = generate_doc(9999, schema) # Use original schema from fixture + singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + + # Verification 3.6: Test update functionality after recovery + print(f"[Test] Step 3.6: Testing update functionality after recovery...") + updated_doc = generate_update_doc(9999, recovered_collection.schema) + singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + + # 3.7: Test deletion after recovery + print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") + doc_ids = ["9999"] + result = recovered_collection.delete(doc_ids) + assert len(result) == len(doc_ids) + for item in result: + assert item.ok() + + # Verification 3.8: Test adding a column after crash recovery + print(f"[Test] Step 3.8: Testing column addition after crash recovery...") + + # Now try to add a column after the crash recovery + from zvec import FieldSchema, DataType, AddColumnOption + + # Map string data type to actual DataType (only supported types) + if column_data_type == "INT32": + data_type = DataType.INT32 + elif column_data_type == "INT64": + data_type = DataType.INT64 + elif column_data_type == "UINT32": + data_type = DataType.UINT32 + elif column_data_type == "UINT64": + data_type = DataType.UINT64 + elif column_data_type == "FLOAT": + data_type = DataType.FLOAT + elif column_data_type == "DOUBLE": + data_type = DataType.DOUBLE + else: + data_type = DataType.INT32 # Default fallback (supported type) + + # This should succeed if the collection is properly recovered + recovered_collection.add_column( + field_schema=FieldSchema("post_crash_column", data_type, nullable=True), + expression="", + option=AddColumnOption() + ) + print(f"[Test] Step 3.8: {column_data_type} Column addition succeeded after crash recovery") + + # Only do a simple verification after column addition + stats_after_add_column = recovered_collection.stats + print(f"[Test] Step 3.8.1: Stats after column addition - doc_count: {stats_after_add_column.doc_count}") + + # 3.9: Check if query function works properly after column addition + print(f"[Test] Step 3.9: Verifying query function after column addition...") + # Use a simpler query that matches the field type + filtered_query = recovered_collection.query(filter=f"int32_field >= 0", topk=10) + print(f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents") + assert len(filtered_query) > 0 \ No newline at end of file diff --git a/python/tests/detail/test_collection_crash_recovery_altercolumn.py b/python/tests/detail/test_collection_crash_recovery_altercolumn.py new file mode 100644 index 000000000..d6360c51e --- /dev/null +++ b/python/tests/detail/test_collection_crash_recovery_altercolumn.py @@ -0,0 +1,471 @@ +# -*- coding: utf-8 -*- +""" +test_collection_crash_recovery_altercolumn.py + +This script is used to test Zvec's recovery capability after simulating a "power failure" (forced process termination) during column update operations. +It first successfully creates a collection in the main process and inserts some documents, then starts a subprocess to open the collection and perform column update operations. +During the column update operation, the subprocess is forcibly terminated to simulate a scenario where the Zvec process crashes during column building. +Finally, the main process attempts to reopen the collection and verify its state and functionality. + +Note: This script assumes that Zvec is a Python extension library. Directly killing the Python subprocess running Zvec operations +may not perfectly simulate the impact of system-level power failure on the C++ layer, but it can test the file state of the Zvec Python extension after abnormal process termination. +""" + +import zvec +import time +import tempfile +import subprocess +import signal +import sys +import os +import pytest +import json # Used to pass operation parameters and results +import threading + +try: + import psutil # Used for more reliable process management +except ImportError: + psutil = None # If psutil is not installed, set it to None +from distance_helper import * +from fixture_helper import * +from doc_helper import * + + +def singledoc_and_check( + collection: Collection, insert_doc, operator="insert", is_delete=1 +): + if operator == "insert": + result = collection.insert(insert_doc) + elif operator == "upsert": + result = collection.upsert(insert_doc) + elif operator == "update": + result = collection.update(insert_doc) + else: + logging.error("operator value is error!") + + assert bool(result) + assert result.ok() + + stats = collection.stats + assert stats is not None + # assert stats.doc_count == 1 + + fetched_docs = collection.fetch([insert_doc.id]) + assert len(fetched_docs) == 1 + assert insert_doc.id in fetched_docs + + fetched_doc = fetched_docs[insert_doc.id] + + assert is_doc_equal(fetched_doc, insert_doc, collection.schema) + assert hasattr(fetched_doc, "score"), "Document should have a score attribute" + assert fetched_doc.score == 0.0, ( + "Fetch operation should return default score of 0.0" + ) + + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + if v != {}: + query_result = collection.query( + VectorQuery(field_name=v, vector=insert_doc.vectors[v]), + topk=1024, + ) + assert len(query_result) > 0, ( + f"Expected at least 1 query result, but got {len(query_result)}" + ) + + found_doc = None + for doc in query_result: + if doc.id == insert_doc.id: + found_doc = doc + break + assert found_doc is not None, ( + f"deleted document {insert_doc.id} not found in query results" + ) + assert is_doc_equal(found_doc, insert_doc, collection.schema, True, False) + if is_delete == 1: + collection.delete(insert_doc.id) + assert collection.stats.doc_count == 0, "Document should be deleted" + + +class TestCollectionCrashRecoveryaltercolumn: + """ + Test Zvec collection recovery capability after simulating power failure/process crash during column update operations. + Focus on verifying whether the file remains consistent after interruption of column update operations, + and whether it can be reopened and used normally. + """ + + # Script content for subprocess to execute Zvec column update operations + # Write this script content to a temporary file and execute it in the subprocess. + ZVEC_SUBPROCESS_SCRIPT_altercolumn = ''' +import zvec +import time +import json +import sys +import os + + +def run_zvec_altercolumn_operations(args_json_str): + args = json.loads(args_json_str) + collection_path = args["collection_path"] + update_field_name = args.get("update_field_name", "int32_field") # Field name for the update + update_data_type = args.get("update_data_type", "INT32") # Data type of the field to update + update_iterations = args.get("update_iterations", 10) # Number of update operations iterations + delay_between_updates = args.get("delay_between_updates", 0.5) # Delay between update operations + + print("[Subprocess] Starting Zvec update column operations on " + collection_path + " at: " + time.strftime('%Y-%m-%d %H:%M:%S')) + print("[Subprocess] Will update field '" + update_field_name + "' of type '" + update_data_type + "', " + str(update_iterations) + " times") + + try: + # Open existing collection + collection = zvec.open(collection_path) + print("[Subprocess] Successfully opened collection.") + + print("[Subprocess] Starting " + str(update_iterations) + " column update operations...") + + # Loop to update columns multiple times - this increases the chance of interruption during the operation + for i in range(update_iterations): + print("[Subprocess] Iteration " + str(i+1) + "/" + str(update_iterations) + ": Updating field '" + update_field_name + "' schema...") + + # Update column schema - this operation can take time and be interrupted + # Import the required data type + from zvec import FieldSchema, DataType, AlterColumnOption + + # Map string data type to actual DataType (only supported types) + if update_data_type == "INT32": + data_type = DataType.INT32 + elif update_data_type == "INT64": + data_type = DataType.INT64 + elif update_data_type == "UINT32": + data_type = DataType.UINT32 + elif update_data_type == "UINT64": + data_type = DataType.UINT64 + elif update_data_type == "FLOAT": + data_type = DataType.FLOAT + elif update_data_type == "DOUBLE": + data_type = DataType.DOUBLE + else: + data_type = DataType.INT32 # Default fallback (supported type) + + # Create the new field schema + new_field = FieldSchema(update_field_name, data_type, nullable=True) + + # Update the column with new schema - this is the operation we want to interrupt + collection.alter_column( + old_name=update_field_name, + field_schema=new_field, + option=AlterColumnOption() + ) + + print("[Subprocess] Iteration " + str(i+1) + ": Column '" + update_field_name + "' schema update completed successfully.") + + # Add delay between iterations to allow interruption opportunity + if i < update_iterations - 1: # Don't sleep after the last iteration + print("[Subprocess] Waiting " + str(delay_between_updates) + "s before next column update...") + time.sleep(delay_between_updates) + + if hasattr(collection, "close"): + collection.close() + else: + del collection # Use del as fallback + print("[Subprocess] Closed collection after column update operations.") + + except Exception as e: + print("[Subprocess] Error during column update operations: " + str(e)) + import traceback + traceback.print_exc() + # Optionally re-raise or handle differently + raise # Re-raising may be useful depending on how parent process responds + + print("[Subprocess] Column update operations completed at: " + time.strftime('%Y-%m-%d %H:%M:%S')) + + +if __name__ == "__main__": + args_json_str = sys.argv[1] + run_zvec_altercolumn_operations(args_json_str) +''' + + def test_altercolumn_simulate_crash_during_column_update_int32(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform INT32 column update operations. + During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "INT32", "int32_field1") + + def test_altercolumn_simulate_crash_during_column_update_int64(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform INT64 column update operations. + During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "INT64", "int64_field1") + + def test_altercolumn_simulate_crash_during_column_update_uint32(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform UINT32 column update operations. + During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT32", "uint32_field1") + + def test_altercolumn_simulate_crash_during_column_update_uint64(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform UINT64 column update operations. + During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT64", "uint64_field1") + + def test_altercolumn_simulate_crash_during_column_update_float(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform FLOAT column update operations. + During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "FLOAT", "float_field1") + + def test_altercolumn_simulate_crash_during_column_update_double(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform DOUBLE column update operations. + During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "DOUBLE", "double_field1") + + def _test_altercolumn_with_crash_recovery(self, schema, collection_option, update_data_type, update_field_name): + """ + Common method to test column update with crash recovery for different column types. + """ + with tempfile.TemporaryDirectory() as temp_dir: + collection_path = f"{temp_dir}/test_collection_altercolumn_crash_recovery_{update_data_type.lower()}" + + # Step 1: Successfully create collection in main process and insert some documents + print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") + coll = zvec.create_and_open(path=collection_path, schema=schema, option=collection_option) + assert coll is not None + print(f"[Test] Step 1.1: Collection created successfully.") + + # First, add the column we'll be updating later, so alter_column can modify it + from zvec import FieldSchema, DataType, AddColumnOption + if update_data_type == "INT32": + data_type = DataType.INT32 + elif update_data_type == "INT64": + data_type = DataType.INT64 + elif update_data_type == "UINT32": + data_type = DataType.UINT32 + elif update_data_type == "UINT64": + data_type = DataType.UINT64 + elif update_data_type == "FLOAT": + data_type = DataType.FLOAT + elif update_data_type == "DOUBLE": + data_type = DataType.DOUBLE + else: + data_type = DataType.INT32 # Default fallback (supported type) + + # Add the column with initial schema + initial_field = FieldSchema(update_field_name, data_type, nullable=True) + coll.add_column( + field_schema=initial_field, + expression="", # Empty expression means fill with default/null values + option=AddColumnOption() + ) + print(f"[Test] Step 1.1.1: Added column '{update_field_name}' to collection.") + + exp_doc_dict = {} + # Insert some documents to have data for column operations + for i in range(50): # Reduced for faster testing + exp_doc_dict[i] = {} + doc = generate_doc(i, coll.schema) + result = coll.insert([doc]) + assert result is not None and len(result) > 0, f"Failed to insert document {i}" + exp_doc_dict[i] = doc + + print(f"[Test] Step 1.2: Inserted 50 documents for column operations.") + + # Verify collection state before crash + initial_doc_count = coll.stats.doc_count + print(f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation.") + + del coll + print(f"[Test] Step 1.4: Closed collection.") + + # Step 2: Prepare and run subprocess for column update operations + # Write subprocess script to temporary file + subprocess_script_path = f"{temp_dir}/zvec_subprocess_altercolumn.py" + with open(subprocess_script_path, 'w', encoding='utf-8') as f: + f.write(self.ZVEC_SUBPROCESS_SCRIPT_altercolumn) + + # Prepare subprocess parameters + subprocess_args = { + "collection_path": collection_path, + "update_field_name": update_field_name, # Use appropriate field name for this test + "update_data_type": update_data_type, # Type of field to update + "update_iterations": 20, # Number of update iterations to increase interruption chance + "delay_between_updates": 0.3 # Delay between updates to allow interruption opportunity + } + args_json_str = json.dumps(subprocess_args) + + print( + f"[Test] Step 2: Starting {update_data_type} column update operations in subprocess, path: {collection_path}") + # Start subprocess to execute column update operations + proc = subprocess.Popen([ + sys.executable, subprocess_script_path, args_json_str + ]) + + # Wait briefly to allow subprocess to begin column update operations + time.sleep(3) # Wait 3 seconds to allow column update process to start + + print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + # Suddenly kill subprocess (simulate power failure or crash during column update operations) + if psutil: + try: + # Use psutil to reliably terminate process and all its children + parent = psutil.Process(proc.pid) + children = parent.children(recursive=True) + for child in children: + child.kill() + parent.kill() + proc.wait(timeout=5) + except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + # If psutil is unavailable or process has been terminated, fall back to original method + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + else: + # If no psutil, use standard method to terminate process + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + print(f"[Test] Subprocess {proc.pid} has been terminated.") + + # Clean up temporary script file + os.remove(subprocess_script_path) + + # Step 3: Verify recovery situation in main process + print( + f"[Test] Step 3: Attempting to open collection after simulating crash during column update operations...") + # Verification 3.1: Check if collection can be successfully opened after crash + recovered_collection = zvec.open(collection_path) + assert recovered_collection is not None, "Cannot open collection after crash" + print(f"[Test] Step 3.1: Verified collection can be opened after crash...") + + # Verification 3.2: Check data integrity (document count and content) + print(f"[Test] Step 3.2: Verifying data integrity...") + query_result = recovered_collection.query(topk=1024) + # We expect some documents to have been successfully inserted before crash + # The exact number depends on when the crash occurred during the bulk insertion process + print( + f"[Test] Step 3.2: Found {len(query_result)} documents after crash") + + current_count = recovered_collection.stats.doc_count + assert recovered_collection.stats.doc_count >= 1 + assert len(query_result) <= current_count, ( + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + + # Verify existing documents have correct structure + if len(query_result) > 0: + for doc in query_result[:50]: # Limit to first 50 for efficiency + fetched_docs = recovered_collection.fetch([doc.id]) + '''print("doc.id,fetched_docs:\n") + print(doc.id, fetched_docs)''' + exp_doc = exp_doc_dict[int(doc.id)] + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + # Note: The doc content may have been partially updated before the crash + # So we only verify the schema structure and basic fields + assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema, + True, True), ( + f"result doc={fetched_docs},doc_exp={exp_doc}") + + # 3.4: Check if query function works properly + print(f"[Test] Step 3.4: Verifying query function after crash...") + filtered_query = recovered_collection.query(filter=f"int32_field >=-100") + print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + assert len(filtered_query) > 0 + for doc in query_result[:10]: # Check first 10 docs + fetched_docs = recovered_collection.fetch([doc.id]) + exp_doc = exp_doc_dict[int(doc.id)] + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema, + True, True), ( + f"result doc={fetched_docs},doc_exp={exp_doc}") + + # Verification 3.5: Test insertion functionality after recovery + print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") + test_insert_doc = generate_doc(9999, schema) # Use original schema from fixture + singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + + # Verification 3.6: Test update functionality after recovery + print(f"[Test] Step 3.6: Testing update functionality after recovery...") + updated_doc = generate_update_doc(9999, recovered_collection.schema) + singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + + # 3.7: Test deletion after recovery + print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") + doc_ids = ["9999"] + result = recovered_collection.delete(doc_ids) + assert len(result) == len(doc_ids) + for item in result: + assert item.ok() + + # Verification 3.8: Test updating a column after crash recovery + print(f"[Test] Step 3.8: Testing column update after crash recovery...") + + # Now try to update a column after the crash recovery + from zvec import FieldSchema, DataType, AlterColumnOption + + # Map string data type to actual update value + if update_data_type == "INT32": + data_type = DataType.INT32 + elif update_data_type == "INT64": + data_type = DataType.INT64 + elif update_data_type == "UINT32": + data_type = DataType.UINT32 + elif update_data_type == "UINT64": + data_type = DataType.UINT64 + elif update_data_type == "FLOAT": + data_type = DataType.FLOAT + elif update_data_type == "DOUBLE": + data_type = DataType.DOUBLE + else: + data_type = DataType.INT32 # Default fallback (supported type) + + # Create the new field schema + new_field = FieldSchema(update_field_name, data_type, nullable=True) + + # This should succeed if the collection is properly recovered + try: + recovered_collection.alter_column( + old_name=update_field_name, + field_schema=new_field, + option=AlterColumnOption() + ) + print(f"[Test] Step 3.8: {update_data_type} Column update succeeded after crash recovery") + except Exception as e: + print(f"[Test] Step 3.8: {update_data_type} Column update failed after crash recovery: {str(e)}") + # This might happen if the column was already altered during the interrupted operation + + # Only do a simple verification after column update + stats_after_update_column = recovered_collection.stats + print(f"[Test] Step 3.8.1: Stats after column update - doc_count: {stats_after_update_column.doc_count}") + + # 3.9: Check if query function works properly after column update + print(f"[Test] Step 3.9: Verifying query function after column update...") + # Use a simpler query that matches the field type + filtered_query = recovered_collection.query(filter=f"{update_field_name} >= 0", topk=10) + print(f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents") + # Note: After column operations, query results may vary diff --git a/python/tests/detail/test_collection_crash_recovery_createindex.py b/python/tests/detail/test_collection_crash_recovery_createindex.py new file mode 100644 index 000000000..b480f40a2 --- /dev/null +++ b/python/tests/detail/test_collection_crash_recovery_createindex.py @@ -0,0 +1,464 @@ +# -*- coding: utf-8 -*- +""" +test_collection_crash_recovery_createindex.py + +This script is used to test Zvec's recovery capability after simulating a "power failure" (forced process termination) during index creation. +It first successfully creates a collection in the main process and inserts some documents, then starts a subprocess to open the collection and perform index creation operations. +During the index creation operation, the subprocess is forcibly terminated to simulate a scenario where the Zvec process crashes during index building. +Finally, the main process attempts to reopen the collection and verify its state and functionality. + +Note: This script assumes that Zvec is a Python extension library. Directly killing the Python subprocess running Zvec operations +may not perfectly simulate the impact of system-level power failure on the C++ layer, but it can test the file state of the Zvec Python extension after abnormal process termination. +""" + +import zvec +import time +import tempfile +import subprocess +import signal +import sys +import os +import pytest +import json # Used to pass operation parameters and results +import threading + +try: + import psutil # Used for more reliable process management +except ImportError: + psutil = None # If psutil is not installed, set it to None +from distance_helper import * +from fixture_helper import * +from doc_helper import * + + + + +def singledoc_and_check( + collection: Collection, insert_doc, operator="insert", is_delete=1 +): + if operator == "insert": + result = collection.insert(insert_doc) + elif operator == "upsert": + result = collection.upsert(insert_doc) + elif operator == "update": + result = collection.update(insert_doc) + else: + logging.error("operator value is error!") + + assert bool(result) + assert result.ok() + + stats = collection.stats + assert stats is not None + #assert stats.doc_count == 1 + + fetched_docs = collection.fetch([insert_doc.id]) + assert len(fetched_docs) == 1 + assert insert_doc.id in fetched_docs + + fetched_doc = fetched_docs[insert_doc.id] + + assert is_doc_equal(fetched_doc, insert_doc, collection.schema) + assert hasattr(fetched_doc, "score"), "Document should have a score attribute" + assert fetched_doc.score == 0.0, ( + "Fetch operation should return default score of 0.0" + ) + + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + if v != {}: + query_result = collection.query( + VectorQuery(field_name=v, vector=insert_doc.vectors[v]), + topk=1024, + ) + assert len(query_result) > 0, ( + f"Expected at least 1 query result, but got {len(query_result)}" + ) + + found_doc = None + for doc in query_result: + if doc.id == insert_doc.id: + found_doc = doc + break + assert found_doc is not None, ( + f"deleted document {insert_doc.id} not found in query results" + ) + assert is_doc_equal(found_doc, insert_doc, collection.schema, True, False) + if is_delete == 1: + collection.delete(insert_doc.id) + assert collection.stats.doc_count == 0, "Document should be deleted" + + +#@pytest.mark.skip("Known issue") +class TestCollectionCrashRecoveryCreateIndex: + """ + Test Zvec collection recovery capability after simulating power failure/process crash during index creation. + Focus on verifying whether the file remains consistent after interruption of index creation operations, + and whether it can be reopened and used normally. + """ + + # Script content for subprocess to execute Zvec index creation operations + # Write this script content to a temporary file and execute it in the subprocess. + ZVEC_SUBPROCESS_SCRIPT_CREATEINDEX = ''' +import zvec +import time +import json +import sys +import os + + +def run_zvec_createindex_operations(args_json_str): + args = json.loads(args_json_str) + collection_path = args["collection_path"] + index_field = args.get("index_field", "int32_field") # Field to create index on + index_type = args.get("index_type", "INVERT") # Type of index to create + index_creation_iterations = args.get("index_creation_iterations", 10) # Number of index creation iterations + delay_between_creations = args.get("delay_between_creations", 0.5) # Delay between index creations + + print(f"[Subprocess] Starting Zvec create index operations on {collection_path} at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"[Subprocess] Will create {index_type} index on field '{index_field}', {index_creation_iterations} times") + + try: + # Open existing collection + collection = zvec.open(collection_path) + print(f"[Subprocess] Successfully opened collection.") + + print(f"[Subprocess] Starting {index_creation_iterations} {index_type} index creation operations...") + + # Loop to create indexes multiple times - this increases the chance of interruption during the operation + for i in range(index_creation_iterations): + print(f"[Subprocess] Iteration {i+1}/{index_creation_iterations}: Creating {index_type} index on field '{index_field}'...") + + # Create index - this operation can take time and be interrupted + # Import the required index parameter classes + if index_type == "INVERT": + from zvec import InvertIndexParam, IndexOption + collection.create_index( + field_name=index_field, + index_param=InvertIndexParam(), + option=IndexOption() + ) + elif index_type == "HNSW": + from zvec import HnswIndexParam, IndexOption + collection.create_index( + field_name=index_field, + index_param=HnswIndexParam(), + option=IndexOption() + ) + elif index_type == "FLAT": + from zvec import FlatIndexParam, IndexOption + collection.create_index( + field_name=index_field, + index_param=FlatIndexParam(), + option=IndexOption() + ) + elif index_type == "IVF": + from zvec import IVFIndexParam, IndexOption + collection.create_index( + field_name=index_field, + index_param=IVFIndexParam(), + option=IndexOption() + ) + else: + print(f"[Subprocess] Unknown index type: {index_type}") + raise ValueError(f"Unknown index type: {index_type}") + + print(f"[Subprocess] Iteration {i+1}: {index_type} Index creation completed successfully on field '{index_field}'.") + + # Add delay between iterations to allow interruption opportunity + if i < index_creation_iterations - 1: # Don't sleep after the last iteration + print(f"[Subprocess] Waiting {delay_between_creations}s before next index creation...") + time.sleep(delay_between_creations) + + if hasattr(collection, "close"): + collection.close() + else: + del collection # Use del as fallback + print(f"[Subprocess] Closed collection after index creation operations.") + + except Exception as e: + print(f"[Subprocess] Error during index creation operations: {e}") + import traceback + traceback.print_exc() + # Optionally re-raise or handle differently + raise # Re-raising may be useful depending on how parent process responds + + print(f"[Subprocess] Index creation operations completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + + +if __name__ == "__main__": + args_json_str = sys.argv[1] + run_zvec_createindex_operations(args_json_str) +''' + + def test_createindex_simulate_crash_during_index_creation_invert(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform INVERT index creation operations. + During the index creation operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_createindex_with_crash_recovery(full_schema_1024, collection_option, "INVERT") + + def test_createindex_simulate_crash_during_index_creation_hnsw(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform HNSW index creation operations. + During the index creation operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_createindex_with_crash_recovery(full_schema_1024, collection_option, "HNSW") + + def test_createindex_simulate_crash_during_index_creation_flat(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform FLAT index creation operations. + During the index creation operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_createindex_with_crash_recovery(full_schema_1024, collection_option, "FLAT") + + def test_createindex_simulate_crash_during_index_creation_ivf(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform IVF index creation operations. + During the index creation operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_createindex_with_crash_recovery(full_schema_1024, collection_option, "IVF") + + def _test_createindex_with_crash_recovery(self, schema, collection_option, index_type): + """ + Common method to test index creation with crash recovery for different index types. + """ + with tempfile.TemporaryDirectory() as temp_dir: + collection_path = f"{temp_dir}/test_collection_createindex_crash_recovery_{index_type.lower()}" + + # Step 1: Successfully create collection in main process and insert some documents + print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") + coll = zvec.create_and_open(path=collection_path, schema=schema, option=collection_option) + assert coll is not None + print(f"[Test] Step 1.1: Collection created successfully.") + + # Insert some documents to have data for indexing + for i in range(100): + doc = generate_doc(i, coll.schema) + result = coll.insert([doc]) + assert result is not None and len(result) > 0, f"Failed to insert document {i}" + + print(f"[Test] Step 1.2: Inserted 100 documents for indexing.") + + # Verify collection state before crash + initial_doc_count = coll.stats.doc_count + print(f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation.") + + del coll + print(f"[Test] Step 1.4: Closed collection.") + + # Step 2: Prepare and run subprocess for index creation operations + # Write subprocess script to temporary file + subprocess_script_path = f"{temp_dir}/zvec_subprocess_createindex.py" + with open(subprocess_script_path, 'w', encoding='utf-8') as f: + f.write(self.ZVEC_SUBPROCESS_SCRIPT_CREATEINDEX) + + # Determine the appropriate field for each index type + if index_type == "INVERT": + field_for_index = "int32_field" # Scalar fields support INVERT index + elif index_type == "HNSW": + from zvec import DataType + field_for_index = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for HNSW + elif index_type == "FLAT": + from zvec import DataType + field_for_index = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for FLAT + elif index_type == "IVF": + from zvec import DataType + field_for_index = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for IVF + else: + print("index_type is error!") + + # Prepare subprocess parameters + subprocess_args = { + "collection_path": collection_path, + "index_field": field_for_index, # Use appropriate field for this index type + "index_type": index_type, # Type of index to create + "index_creation_iterations": 20, # Number of index creation iterations to increase interruption chance + "delay_between_creations": 0.3 # Delay between index creations to allow interruption opportunity + } + args_json_str = json.dumps(subprocess_args) + + print( + f"[Test] Step 2: Starting {index_type} index creation operations in subprocess, path: {collection_path}") + # Start subprocess to execute index creation operations + proc = subprocess.Popen([ + sys.executable, subprocess_script_path, args_json_str + ]) + + # Wait briefly to allow subprocess to begin index creation operations + time.sleep(3) # Wait 3 seconds to allow indexing process to start + + print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + # Suddenly kill subprocess (simulate power failure or crash during index creation operations) + if psutil: + try: + # Use psutil to reliably terminate process and all its children + parent = psutil.Process(proc.pid) + children = parent.children(recursive=True) + for child in children: + child.kill() + parent.kill() + proc.wait(timeout=5) + except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + # If psutil is unavailable or process has been terminated, fall back to original method + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + else: + # If no psutil, use standard method to terminate process + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + print(f"[Test] Subprocess {proc.pid} has been terminated.") + + # Clean up temporary script file + os.remove(subprocess_script_path) + + # Step 3: Verify recovery situation in main process + print( + f"[Test] Step 3: Attempting to open collection after simulating crash during document insertion operations...") + # Verification 3.1: Check if collection can be successfully opened after crash + recovered_collection = zvec.open(collection_path) + assert recovered_collection is not None, "Cannot open collection after crash" + print(f"[Test] Step 3.1: Verified collection can be opened after crash...") + + # Verification 3.2: Check data integrity (document count and content) + print(f"[Test] Step 3.2: Verifying data integrity...") + query_result = recovered_collection.query(topk=1024) + # We expect some documents to have been successfully inserted before crash + # The exact number depends on when the crash occurred during the bulk insertion process + print( + f"[Test] Step 3.2: Found {len(query_result)} documents after crash") + + current_count = recovered_collection.stats.doc_count + assert recovered_collection.stats.doc_count >= 1 + assert len(query_result) <= current_count, ( + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + + # Verify existing documents have correct structure + if len(query_result) > 0: + for doc in query_result[:1024]: + fetched_docs = recovered_collection.fetch([doc.id]) + #print("doc.id,fetched_docs:\n") + #print(doc.id, fetched_docs) + exp_doc = generate_doc(int(doc.id), recovered_collection.schema) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema), ( + f"result doc={fetched_docs},doc_exp={exp_doc}") + + # 3.4: Check if index is complete and query function works properly + print(f"[Test] Step 3.4: Verifying index integrity and query function...") + filtered_query = recovered_collection.query(filter=f"int32_field >=-100") + print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + assert len(filtered_query) > 0 + for doc in query_result: + fetched_docs = recovered_collection.fetch([doc.id]) + #print("doc.id,fetched_docs:\n") + #print(doc.id, fetched_docs) + exp_doc = generate_doc(int(doc.id), recovered_collection.schema) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema), ( + f"result doc={fetched_docs},doc_exp={exp_doc}") + + # Verification 3.5: Test insertion functionality after recovery + print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") + test_insert_doc = generate_doc(9999, schema) # Use original schema from fixture + singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + + # Verification 3.6: Test update functionality after recovery + print(f"[Test] Step 3.6: Testing update functionality after recovery...") + updated_doc = generate_update_doc(9999, recovered_collection.schema) + singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + + # 3.7: Test deletion after recovery + print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") + doc_ids = ["9999"] + result = recovered_collection.delete(doc_ids) + assert len(result) == len(doc_ids) + for item in result: + assert item.ok() + + # Verification 3.8: Test creating index after crash recovery + print(f"[Test] Step 3.8: Testing index creation after crash recovery...") + + # Now try to create an index after the crash recovery + if index_type == "INVERT": + from zvec import InvertIndexParam, IndexOption + index_param = InvertIndexParam() + elif index_type == "HNSW": + from zvec import HnswIndexParam, IndexOption + index_param = HnswIndexParam() + elif index_type == "FLAT": + from zvec import FlatIndexParam, IndexOption + index_param = FlatIndexParam() + elif index_type == "IVF": + from zvec import IVFIndexParam, IndexOption + index_param = IVFIndexParam() + else: + from zvec import InvertIndexParam, IndexOption + index_param = InvertIndexParam() + + # Determine the appropriate field for each index type + if index_type == "INVERT": + field_to_recreate = "int32_field" # Scalar fields support INVERT index + elif index_type == "HNSW": + from zvec import DataType + field_to_recreate = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for HNSW + elif index_type == "FLAT": + from zvec import DataType + field_to_recreate = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for FLAT + elif index_type == "IVF": + from zvec import DataType + field_to_recreate = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for IVF + else: + field_to_recreate = "int32_field" # Default to scalar field + + # This should succeed if the collection is properly recovered + recovered_collection.create_index( + field_name=field_to_recreate, + index_param=index_param, + option=IndexOption() + ) + print(f"[Test] Step 3.8: {index_type} Index creation succeeded after crash recovery on field {field_to_recreate}") + + # Only do a simple verification after index creation + stats_after_index = recovered_collection.stats + print(f"[Test] Step 3.8.1: Stats after index creation - doc_count: {stats_after_index.doc_count}") + + # 3.9: Check if index is complete and query function works properly + print(f"[Test] Step 3.9: Verifying index integrity and query function...") + # Use a simpler query that matches the field type + if index_type == "INVERT": + # Query on scalar field + filtered_query = recovered_collection.query(filter=f"int32_field >= 0", topk=10) + print(f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents") + assert len(filtered_query) > 0 + elif index_type in ["HNSW", "FLAT", "IVF"]: + # Query on vector field using vector search + import random + test_vector = [random.random() for _ in range(VECTOR_DIMENSION_1024)] # Assuming 1024-dim vector + vector_query_result = recovered_collection.query( + VectorQuery(field_name=field_to_recreate, vector=test_vector), + topk=5 + ) + print(f"[Test] Step 3.9.1: Vector query returned {len(vector_query_result)} documents") + assert len(vector_query_result) > 0 + + diff --git a/python/tests/detail/test_collection_crash_recovery_deletedoc.py b/python/tests/detail/test_collection_crash_recovery_deletedoc.py new file mode 100644 index 000000000..644749479 --- /dev/null +++ b/python/tests/detail/test_collection_crash_recovery_deletedoc.py @@ -0,0 +1,445 @@ +# -*- coding: utf-8 -*- +""" +test_collection_crash_recovery_deletedoc.py + +This script is used to test Zvec's recovery capability after simulating a "power failure" (forced process termination) during bulk document deletion (insertdoc). +It first successfully creates a collection in the main process, then starts a subprocess to open the collection and perform bulk document deletion operations. +During the deletion operation, the subprocess is forcibly terminated to simulate a scenario where the Zvec process crashes during document deletion. +Finally, the main process attempts to reopen the collection and verify its state and functionality. + +Note: This script assumes that Zvec is a Python extension library. Directly killing the Python subprocess running Zvec operations +may not perfectly simulate the impact of system-level power failure on the C++ layer, but it can test the file state of the Zvec Python extension after abnormal process termination. +""" + +import zvec +import time +import tempfile +import subprocess +import signal +import sys +import os +import pytest +import json # Used to pass operation parameters and results +import threading + +try: + import psutil # Used for more reliable process management +except ImportError: + psutil = None # If psutil is not installed, set it to None +from fixture_helper import * +from doc_helper import generate_doc + +from distance_helper import * +from fixture_helper import * +from doc_helper import * + + +def singledoc_and_check( + collection: Collection, insert_doc, operator="insert", is_delete=1 +): + if operator == "insert": + result = collection.insert(insert_doc) + elif operator == "upsert": + result = collection.upsert(insert_doc) + elif operator == "update": + result = collection.update(insert_doc) + else: + logging.error("operator value is error!") + + assert bool(result) + assert result.ok() + + stats = collection.stats + assert stats is not None + #assert stats.doc_count == 1 + + fetched_docs = collection.fetch([insert_doc.id]) + assert len(fetched_docs) == 1 + assert insert_doc.id in fetched_docs + + fetched_doc = fetched_docs[insert_doc.id] + + assert is_doc_equal(fetched_doc, insert_doc, collection.schema) + assert hasattr(fetched_doc, "score"), "Document should have a score attribute" + assert fetched_doc.score == 0.0, ( + "Fetch operation should return default score of 0.0" + ) + + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + if v != {}: + query_result = collection.query( + VectorQuery(field_name=v, vector=insert_doc.vectors[v]), + topk=1024, + ) + assert len(query_result) > 0, ( + f"Expected at least 1 query result, but got {len(query_result)}" + ) + + found_doc = None + for doc in query_result: + if doc.id == insert_doc.id: + found_doc = doc + break + assert found_doc is not None, ( + f"deleted document {insert_doc.id} not found in query results" + ) + assert is_doc_equal(found_doc, insert_doc, collection.schema, True, False) + if is_delete == 1: + collection.delete(insert_doc.id) + assert collection.stats.doc_count == 0, "Document should be deleted" + + +class TestCollectionCrashRecoveryDeleteDoc: + """ + Test Zvec collection recovery capability after simulating power failure/process crash during document deletion. + Focus on verifying whether the file remains consistent after interruption of document deletion operations, + and whether it can be reopened and used normally. + """ + + # Script content for subprocess to execute Zvec document deletion operations + # Write this script content to a temporary file and execute it in the subprocess. + ZVEC_SUBPROCESS_SCRIPT_DELETEDOC = ''' +import zvec +import time +import json +import sys +import os +import math +import random +import string +from typing import Literal +import pytest + + +def generate_constant_vector( + i: int, dimension: int, dtype: Literal["int8", "float16", "float32"] = "float32" +): + if dtype == "int8": + vec = [(i % 127)] * dimension + vec[i % dimension] = ((i + 1) % 127) + else: + base_val = (i % 1000) / 256.0 + special_val = ((i + 1) % 1000) / 256.0 + vec = [base_val] * dimension + vec[i % dimension] = special_val + + return vec + + +def generate_sparse_vector(i: int): + return {i: i + 0.1} + + +def generate_vectordict(i: int, schema: zvec.CollectionSchema): + doc_fields = {} + doc_vectors = {} + for field in schema.fields: + if field.data_type == zvec.DataType.BOOL: + doc_fields[field.name] = i % 2 == 0 + elif field.data_type == zvec.DataType.INT32: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.UINT32: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.INT64: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.UINT64: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.FLOAT: + doc_fields[field.name] = float(i) + 0.1 + elif field.data_type == zvec.DataType.DOUBLE: + doc_fields[field.name] = float(i) + 0.11 + elif field.data_type == zvec.DataType.STRING: + doc_fields[field.name] = f"test_{i}" + elif field.data_type == zvec.DataType.ARRAY_BOOL: + doc_fields[field.name] = [i % 2 == 0, i % 3 == 0] + elif field.data_type == zvec.DataType.ARRAY_INT32: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_UINT32: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_INT64: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_UINT64: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_FLOAT: + doc_fields[field.name] = [float(i + 0.1), float(i + 1.1), float(i + 2.1)] + elif field.data_type == zvec.DataType.ARRAY_DOUBLE: + doc_fields[field.name] = [float(i + 0.11), float(i + 1.11), float(i + 2.11)] + elif field.data_type == zvec.DataType.ARRAY_STRING: + doc_fields[field.name] = [f"test_{i}", f"test_{i + 1}", f"test_{i + 2}"] + else: + raise ValueError(f"Unsupported field type: {field.data_type}") + + for vector in schema.vectors: + if vector.data_type == zvec.DataType.VECTOR_FP16: + doc_vectors[vector.name] = generate_constant_vector( + i, vector.dimension, "float16" + ) + elif vector.data_type == zvec.DataType.VECTOR_FP32: + doc_vectors[vector.name] = generate_constant_vector( + i, vector.dimension, "float32" + ) + elif vector.data_type == zvec.DataType.VECTOR_INT8: + doc_vectors[vector.name] = generate_constant_vector( + i, + vector.dimension, + "int8", + ) + elif vector.data_type == zvec.DataType.SPARSE_VECTOR_FP32: + doc_vectors[vector.name] = generate_sparse_vector(i) + elif vector.data_type == zvec.DataType.SPARSE_VECTOR_FP16: + doc_vectors[vector.name] = generate_sparse_vector(i) + else: + raise ValueError(f"Unsupported vector type: {vector.data_type}") + + return doc_fields, doc_vectors + + +def generate_doc(i: int, schema: zvec.CollectionSchema) -> zvec.Doc: + doc_fields = {} + doc_vectors = {} + doc_fields, doc_vectors = generate_vectordict(i, schema) + doc = zvec.Doc(id=str(i), fields=doc_fields, vectors=doc_vectors) + return doc + + +def run_zvec_deletedoc_operations(args_json_str): + args = json.loads(args_json_str) + collection_path = args["collection_path"] + num_docs_to_delete = args.get("num_docs_to_delete", 100) # Number of documents to insert + batch_size = args.get("batch_size", 10) # Batch size for each deletion + delay_between_batches = args.get("delay_between_batches", 0.1) # Delay between batches + + print(f"[Subprocess] Starting Zvec insert document operations on {collection_path} at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"[Subprocess] Will insert {num_docs_to_delete} documents in batches of {batch_size}") + + try: + # Open existing collection + collection = zvec.open(collection_path) + print(f"[Subprocess] Successfully opened collection.") + + deleted_count = 0 + for i in range(0, num_docs_to_delete, batch_size): + # Calculate the number of documents in the current batch + current_batch_size = min(batch_size, num_docs_to_delete - i) + + if current_batch_size==batch_size: + + doc_ids= [str(j) for j in range(i, i+batch_size)] + else: + doc_ids= [str(j) for j in range(i, i + current_batch_size)] + + result = collection.delete(doc_ids) + + # Check return value - insert returns a list of document IDs + assert len(result) == len(doc_ids) + for i in range(len(result)): + if i < len(doc_ids): + assert result[i].ok() + deleted_count += len(doc_ids) + print(f"[Subprocess] Batch deletion successful, deleted {len(doc_ids)} documents, total deleted: {deleted_count}") + + + + # Add small delay to allow interruption opportunity + time.sleep(delay_between_batches) + + print(f"[Subprocess] Completed inserting {deleted_count} documents.") + + if hasattr(collection, "close"): + collection.close() + else: + del collection # Use del as fallback + print(f"[Subprocess] Closed collection after deletion operations.") + + except Exception as e: + print(f"[Subprocess] Error during document deletion operations: {e}") + import traceback + traceback.print_exc() + # Optionally re-raise or handle differently + raise # Re-raising may be useful depending on how parent process responds + + print(f"[Subprocess] Document deletion operations completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + + +if __name__ == "__main__": + args_json_str = sys.argv[1] + run_zvec_deletedoc_operations(args_json_str) +''' + + def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, collection_option, basic_schema): + """ + Scenario: First successfully create a Zvec collection in the main process. + Then start a subprocess to open the collection and perform bulk document deletion operations. + During the bulk deletion operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + with tempfile.TemporaryDirectory() as temp_dir: + collection_path = f"{temp_dir}/test_collection_deletedoc_crash_recovery" + + # Step 1: Successfully create collection in main process + print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") + coll = zvec.create_and_open(path=collection_path, schema=full_schema_1024, option=collection_option) + assert coll is not None + print(f"[Test] Step 1.1: Collection created successfully.") + single_doc = generate_doc(2001, coll.schema) + singledoc_and_check(coll, single_doc, is_delete=0) + print(f"[Test] Step 1.2: Verified collection data write successful.") + + # Insert initial documents that will be deleted later + initial_docs = [] + for i in range(0, 1000): # Insert 200 documents for updating + doc = generate_doc(i, coll.schema) + initial_docs.append(doc) + + insert_results = coll.insert(initial_docs) + print(f"[Test] Step 1.3: deleted {len(initial_docs)} initial documents for updating.") + + del coll + print(f"[Test] Step 1.3: Closed collection.") + + # Step 2: Prepare and run subprocess for bulk deletion operations + # Write subprocess script to temporary file + subprocess_script_path = f"{temp_dir}/zvec_subprocess_deletedoc.py" + with open(subprocess_script_path, 'w', encoding='utf-8') as f: + f.write(self.ZVEC_SUBPROCESS_SCRIPT_DELETEDOC) + + # Prepare subprocess parameters + subprocess_args = { + "collection_path": collection_path, + "num_docs_to_delete": 200, # Insert 200 documents to allow for interruption + "batch_size": 10, # Insert 10 documents per batch + "delay_between_batches": 0.2 # 0.2 second delay between batches to increase interruption timing + } + args_json_str = json.dumps(subprocess_args) + + print(f"[Test] Step 2: Starting bulk deletion operations in subprocess, path: {collection_path}") + # Start subprocess to execute bulk deletion operations + proc = subprocess.Popen([ + sys.executable, subprocess_script_path, args_json_str + ]) + + # Wait briefly to allow subprocess to begin deletion operations + time.sleep(2) # Wait 2 seconds to allow deletion loop to start + + print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + # Suddenly kill subprocess (simulate power failure or crash during deletion operations) + if psutil: + try: + # Use psutil to reliably terminate process and all its children + parent = psutil.Process(proc.pid) + children = parent.children(recursive=True) + for child in children: + child.kill() + parent.kill() + proc.wait(timeout=5) + except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + # If psutil is unavailable or process has been terminated, fall back to original method + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + else: + # If no psutil, use standard method to terminate process + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + print(f"[Test] Subprocess {proc.pid} has been terminated.") + + # Clean up temporary script file + os.remove(subprocess_script_path) + + # Step 3: Verify recovery situation in main process + print(f"[Test] Step 3: Attempting to open collection after simulating crash during document deletion operations...") + # Verification 3.1: Check if collection can be successfully opened after crash + recovered_collection = zvec.open(collection_path) + assert recovered_collection is not None, "Cannot open collection after crash" + print(f"[Test] Step 3.1: Verified collection can be opened after crash...") + + # Verification 3.2: Check data integrity (document count and content) + print(f"[Test] Step 3.2: Verifying data integrity...") + query_result = recovered_collection.query(topk=1024) + # We expect some documents to have been successfully deleted before crash + # The exact number depends on when the crash occurred during the bulk deletion process + print( + f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_delete']})") + + + current_count = recovered_collection.stats.doc_count + assert recovered_collection.stats.doc_count >= 1 + assert len(query_result)<=recovered_collection.stats.doc_count,(f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + + # Verify existing documents have correct structure + if len(query_result) > 0: + + for doc in query_result[:1024]: + if doc.id=="2001": + print("Found 2001 data!") + fetched_docs = recovered_collection.fetch([doc.id]) + print("doc.id:\n") + print(doc.id) + print("fetched_docs:\n") + print(fetched_docs) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs["2001"],single_doc, recovered_collection.schema),(f"result doc={fetched_docs},doc_exp={single_doc}") + break + else: + fetched_docs = recovered_collection.fetch([doc.id]) + print("doc.id,fetched_docs:\n") + print(doc.id,fetched_docs) + exp_doc = generate_doc(int(doc.id), recovered_collection.schema) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs["1"], exp_doc, recovered_collection.schema), (f"result doc={fetched_docs},doc_exp={exp_doc}") + + #3.4: Check if index is complete and query function works properly + print(f"[Test] Step 3.4: Verifying index integrity and query function...") + filtered_query = recovered_collection.query(filter=f"int32_field >=-100") + print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + assert len(filtered_query) > 0 + for doc in query_result: + if doc.id=="2001": + print("Found 2001 data!") + fetched_docs = recovered_collection.fetch([doc.id]) + print("doc.id:\n") + print(doc.id) + print("fetched_docs:\n") + print(fetched_docs) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs["2001"],single_doc, recovered_collection.schema),(f"result doc={fetched_docs},doc_exp={single_doc}") + break + else: + fetched_docs = recovered_collection.fetch([doc.id]) + print("doc.id,fetched_docs:\n") + print(doc.id,fetched_docs) + exp_doc = generate_doc(int(doc.id), recovered_collection.schema) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema), (f"result doc={fetched_docs},doc_exp={exp_doc}") + + # Verification 3.5: Test insertion functionality after recovery + print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") + test_insert_doc = generate_doc(9999, full_schema_1024) # Use original schema from fixture + singledoc_and_check(recovered_collection, test_insert_doc, operator="insert",is_delete=0) + + # Verification 3.6: Test update functionality after recovery + print(f"[Test] Step 3.6: Testing update functionality after recovery...") + updated_doc = generate_update_doc(2001, recovered_collection.schema) + singledoc_and_check(recovered_collection, updated_doc, operator="update",is_delete=0) + + + #3.7: Test deletion after recovery + print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") + doc_ids = ["9999"] + result = recovered_collection.delete(doc_ids) + assert len(result) == len(doc_ids) + for item in result: + assert item.ok() \ No newline at end of file diff --git a/python/tests/detail/test_collection_crash_recovery_dropcolumn.py b/python/tests/detail/test_collection_crash_recovery_dropcolumn.py new file mode 100644 index 000000000..eb3202233 --- /dev/null +++ b/python/tests/detail/test_collection_crash_recovery_dropcolumn.py @@ -0,0 +1,431 @@ +# -*- coding: utf-8 -*- +""" +test_collection_crash_recovery_dropcolumn.py + +This script is used to test Zvec's recovery capability after simulating a "power failure" (forced process termination) during column drop operations. +It first successfully creates a collection in the main process and inserts some documents, then starts a subprocess to open the collection and perform column drop operations. +During the column drop operation, the subprocess is forcibly terminated to simulate a scenario where the Zvec process crashes during column removal. +Finally, the main process attempts to reopen the collection and verify its state and functionality. + +Note: This script assumes that Zvec is a Python extension library. Directly killing the Python subprocess running Zvec operations +may not perfectly simulate the impact of system-level power failure on the C++ layer, but it can test the file state of the Zvec Python extension after abnormal process termination. +""" + +import zvec +import time +import tempfile +import subprocess +import signal +import sys +import os +import pytest +import json # Used to pass operation parameters and results +import threading + +try: + import psutil # Used for more reliable process management +except ImportError: + psutil = None # If psutil is not installed, set it to None +from distance_helper import * +from fixture_helper import * +from doc_helper import * + + +def singledoc_and_check( + collection: Collection, insert_doc, operator="insert", is_delete=1 +): + if operator == "insert": + result = collection.insert(insert_doc) + elif operator == "upsert": + result = collection.upsert(insert_doc) + elif operator == "update": + result = collection.update(insert_doc) + else: + logging.error("operator value is error!") + + assert bool(result) + assert result.ok() + + stats = collection.stats + assert stats is not None + #assert stats.doc_count == 1 + + fetched_docs = collection.fetch([insert_doc.id]) + assert len(fetched_docs) == 1 + assert insert_doc.id in fetched_docs + + fetched_doc = fetched_docs[insert_doc.id] + + assert is_doc_equal(fetched_doc, insert_doc, collection.schema) + assert hasattr(fetched_doc, "score"), "Document should have a score attribute" + assert fetched_doc.score == 0.0, ( + "Fetch operation should return default score of 0.0" + ) + + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + if v != {}: + query_result = collection.query( + VectorQuery(field_name=v, vector=insert_doc.vectors[v]), + topk=1024, + ) + assert len(query_result) > 0, ( + f"Expected at least 1 query result, but got {len(query_result)}" + ) + + found_doc = None + for doc in query_result: + if doc.id == insert_doc.id: + found_doc = doc + break + assert found_doc is not None, ( + f"deleted document {insert_doc.id} not found in query results" + ) + assert is_doc_equal(found_doc, insert_doc, collection.schema, True, False) + if is_delete == 1: + collection.delete(insert_doc.id) + assert collection.stats.doc_count == 0, "Document should be deleted" + + +class TestCollectionCrashRecoveryDropColumn: + """ + Test Zvec collection recovery capability after simulating power failure/process crash during column drop. + Focus on verifying whether the file remains consistent after interruption of column drop operations, + and whether it can be reopened and used normally. + """ + + # Script content for subprocess to execute Zvec column drop operations + # Write this script content to a temporary file and execute it in the subprocess. + ZVEC_SUBPROCESS_SCRIPT_DROPCOLUMN = ''' +import zvec +import time +import json +import sys +import os + + +def run_zvec_dropcolumn_operations(args_json_str): + args = json.loads(args_json_str) + collection_path = args["collection_path"] + drop_field_name = args.get("drop_field_name", "int32_field") # Field name for the drop + drop_column_iterations = args.get("drop_column_iterations", 10) # Number of column drop iterations + delay_between_drops = args.get("delay_between_drops", 0.5) # Delay between column drops + + print("[Subprocess] Starting Zvec drop column operations on " + collection_path + " at: " + time.strftime('%Y-%m-%d %H:%M:%S')) + print("[Subprocess] Will drop column '" + drop_field_name + "', " + str(drop_column_iterations) + " times") + + try: + # Open existing collection + collection = zvec.open(collection_path) + print("[Subprocess] Successfully opened collection.") + + print("[Subprocess] Starting " + str(drop_column_iterations) + " column operations (add then drop)...") + + # First, add the column to ensure it exists before attempting to drop it + from zvec import FieldSchema, DataType, AddColumnOption + if args.get("drop_data_type") == "INT32": + data_type = DataType.INT32 + elif args.get("drop_data_type") == "INT64": + data_type = DataType.INT64 + elif args.get("drop_data_type") == "UINT32": + data_type = DataType.UINT32 + elif args.get("drop_data_type") == "UINT64": + data_type = DataType.UINT64 + elif args.get("drop_data_type") == "FLOAT": + data_type = DataType.FLOAT + elif args.get("drop_data_type") == "DOUBLE": + data_type = DataType.DOUBLE + else: + data_type = DataType.INT32 # Default fallback (supported type) + + + # Loop to drop columns multiple times - this increases the chance of interruption during the operation + for i in range(drop_column_iterations): + print("[Subprocess] Iteration " + str(i+1) + "/" + str(drop_column_iterations) + ": Dropping column '" + drop_field_name + "'...") + + # Add the column that will be dropped later + drop_field = FieldSchema(drop_field_name, data_type, nullable=True) + collection.add_column( + field_schema=drop_field, + expression="", # Empty expression means fill with default/null values + option=AddColumnOption() + ) + print("[Subprocess] Added column '" + drop_field_name + "' to collection for later deletion.") + + # Drop the column - this is the operation we want to interrupt + # Note: drop_column may not need options or may use a different parameter + collection.drop_column( + field_name=drop_field_name + ) + + print("[Subprocess] Iteration " + str(i+1) + ": Column '" + drop_field_name + "' drop completed successfully.") + + # Add delay between iterations to allow interruption opportunity + if i < drop_column_iterations - 1: # Don't sleep after the last iteration + print("[Subprocess] Waiting " + str(delay_between_drops) + "s before next column drop...") + time.sleep(delay_between_drops) + + if hasattr(collection, "close"): + collection.close() + else: + del collection # Use del as fallback + print("[Subprocess] Closed collection after column drop operations.") + + except Exception as e: + print("[Subprocess] Error during column drop operations: " + str(e)) + import traceback + traceback.print_exc() + # Optionally re-raise or handle differently + raise # Re-raising may be useful depending on how parent process responds + + print("[Subprocess] Column drop operations completed at: " + time.strftime('%Y-%m-%d %H:%M:%S')) + + +if __name__ == "__main__": + args_json_str = sys.argv[1] + run_zvec_dropcolumn_operations(args_json_str) +''' + + def test_dropcolumn_simulate_crash_during_column_drop_int32(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform INT32 column drop operations. + During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "INT32", "int32_field1") + + def test_dropcolumn_simulate_crash_during_column_drop_int64(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform INT64 column drop operations. + During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "INT64", "int64_field1") + + def test_dropcolumn_simulate_crash_during_column_drop_uint32(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform UINT32 column drop operations. + During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT32", "uint32_field1") + + def test_dropcolumn_simulate_crash_during_column_drop_uint64(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform UINT64 column drop operations. + During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT64", "uint64_field1") + + def test_dropcolumn_simulate_crash_during_column_drop_float(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform FLOAT column drop operations. + During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "FLOAT", "float_field1") + + def test_dropcolumn_simulate_crash_during_column_drop_double(self, full_schema_1024, collection_option): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform DOUBLE column drop operations. + During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "DOUBLE", "double_field1") + + def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_data_type, drop_field_name): + """ + Common method to test column drop with crash recovery for different column types. + """ + with tempfile.TemporaryDirectory() as temp_dir: + collection_path = f"{temp_dir}/test_collection_dropcolumn_crash_recovery_{drop_data_type.lower()}" + + # Step 1: Successfully create collection in main process and insert some documents + print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") + coll = zvec.create_and_open(path=collection_path, schema=schema, option=collection_option) + assert coll is not None + print(f"[Test] Step 1.1: Collection created successfully.") + + exp_doc_dict = {} + # Insert some documents to have data for column operations + for i in range(50): # Reduced for faster testing + exp_doc_dict[i] = {} + doc = generate_doc(i, coll.schema) + result = coll.insert([doc]) + assert result is not None and len(result) > 0, f"Failed to insert document {i}" + exp_doc_dict[i] = doc + + print(f"[Test] Step 1.2: Inserted 50 documents for column operations.") + + # Verify collection state before crash + initial_doc_count = coll.stats.doc_count + print(f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation.") + + del coll + print(f"[Test] Step 1.4: Closed collection.") + + # Step 2: Prepare and run subprocess for column drop operations + # Write subprocess script to temporary file + subprocess_script_path = f"{temp_dir}/zvec_subprocess_dropcolumn.py" + with open(subprocess_script_path, 'w', encoding='utf-8') as f: + f.write(self.ZVEC_SUBPROCESS_SCRIPT_DROPCOLUMN) + + # Prepare subprocess parameters + subprocess_args = { + "collection_path": collection_path, + "drop_field_name": drop_field_name, # Use appropriate field name for this test + "drop_data_type": drop_data_type, # Type of field to drop + "drop_column_iterations": 20, # Number of drop iterations to increase interruption chance + "delay_between_drops": 0.3 # Delay between drops to allow interruption opportunity + } + args_json_str = json.dumps(subprocess_args) + + print( + f"[Test] Step 2: Starting {drop_data_type} column drop operations in subprocess, path: {collection_path}") + # Start subprocess to execute column drop operations + proc = subprocess.Popen([ + sys.executable, subprocess_script_path, args_json_str + ]) + + # Wait briefly to allow subprocess to begin column drop operations + time.sleep(3) # Wait 3 seconds to allow column drop process to start + + print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + # Suddenly kill subprocess (simulate power failure or crash during column drop operations) + if psutil: + try: + # Use psutil to reliably terminate process and all its children + parent = psutil.Process(proc.pid) + children = parent.children(recursive=True) + for child in children: + child.kill() + parent.kill() + proc.wait(timeout=5) + except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + # If psutil is unavailable or process has been terminated, fall back to original method + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + else: + # If no psutil, use standard method to terminate process + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + print(f"[Test] Subprocess {proc.pid} has been terminated.") + + # Clean up temporary script file + os.remove(subprocess_script_path) + + # Step 3: Verify recovery situation in main process + print( + f"[Test] Step 3: Attempting to open collection after simulating crash during column drop operations...") + # Verification 3.1: Check if collection can be successfully opened after crash + recovered_collection = zvec.open(collection_path) + assert recovered_collection is not None, "Cannot open collection after crash" + print(f"[Test] Step 3.1: Verified collection can be opened after crash...") + + # Verification 3.2: Check data integrity (document count and content) + print(f"[Test] Step 3.2: Verifying data integrity...") + query_result = recovered_collection.query(topk=1024) + # We expect some documents to have been successfully inserted before crash + # The exact number depends on when the crash occurred during the bulk insertion process + print( + f"[Test] Step 3.2: Found {len(query_result)} documents after crash") + + current_count = recovered_collection.stats.doc_count + assert recovered_collection.stats.doc_count >= 1 + assert len(query_result) <= current_count, ( + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + + # Verify existing documents have correct structure + if len(query_result) > 0: + for doc in query_result[:50]: # Limit to first 50 for efficiency + fetched_docs = recovered_collection.fetch([doc.id]) + '''print("doc.id,fetched_docs:\n") + print(doc.id, fetched_docs)''' + exp_doc = exp_doc_dict[int(doc.id)] + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + # Note: The doc content may have been partially updated before the crash + # So we only verify the schema structure and basic fields + assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema, + True, True), ( + f"result doc={fetched_docs},doc_exp={exp_doc}") + + # 3.4: Check if query function works properly + print(f"[Test] Step 3.4: Verifying query function after crash...") + filtered_query = recovered_collection.query(filter=f"int32_field >=-100") + print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + assert len(filtered_query) > 0 + for doc in query_result[:10]: # Check first 10 docs + fetched_docs = recovered_collection.fetch([doc.id]) + exp_doc = exp_doc_dict[int(doc.id)] + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema, + True, True), ( + f"result doc={fetched_docs},doc_exp={exp_doc}") + + # Verification 3.5: Test insertion functionality after recovery + print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") + test_insert_doc = generate_doc(9999, schema) # Use original schema from fixture + singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + + # Verification 3.6: Test update functionality after recovery + print(f"[Test] Step 3.6: Testing update functionality after recovery...") + updated_doc = generate_update_doc(9999, recovered_collection.schema) + singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + + # 3.7: Test deletion after recovery + print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") + doc_ids = ["9999"] + result = recovered_collection.delete(doc_ids) + assert len(result) == len(doc_ids) + for item in result: + assert item.ok() + + # Verification 3.8: Test dropping a column after crash recovery + print(f"[Test] Step 3.8: Testing column drop after crash recovery...") + + # Now try to drop a column after the crash recovery + # This should succeed if the collection is properly recovered + try: + recovered_collection.drop_column( + field_name=drop_field_name + ) + print(f"[Test] Step 3.8: {drop_data_type} Column drop succeeded after crash recovery") + except Exception as e: + print(f"[Test] Step 3.8: {drop_data_type} Column drop failed after crash recovery: {str(e)}") + # This is expected if the column was already dropped during the interrupted operation + + # Only do a simple verification after column drop + stats_after_drop_column = recovered_collection.stats + print(f"[Test] Step 3.8.1: Stats after column drop - doc_count: {stats_after_drop_column.doc_count}") + + # 3.9: Check if query function works properly after column drop + print(f"[Test] Step 3.9: Verifying query function after column drop...") + # Use a simpler query that matches the field type + filtered_query = recovered_collection.query(filter=f"int32_field >= 0", topk=10) + print(f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents") + # Note: After column drop, this query might return 0 results + + # Close the recovered collection + if hasattr(recovered_collection, "close"): + recovered_collection.close() + else: + del recovered_collection + print(f"[Test] Step 3.10: Closed recovered collection.") diff --git a/python/tests/detail/test_collection_crash_recovery_insertdoc.py b/python/tests/detail/test_collection_crash_recovery_insertdoc.py new file mode 100644 index 000000000..8780f16cb --- /dev/null +++ b/python/tests/detail/test_collection_crash_recovery_insertdoc.py @@ -0,0 +1,444 @@ +# -*- coding: utf-8 -*- +""" +test_collection_crash_recovery_insertdoc.py + +This script is used to test Zvec's recovery capability after simulating a "power failure" (forced process termination) during bulk document insertion (insertdoc). +It first successfully creates a collection in the main process, then starts a subprocess to open the collection and perform bulk document insertion operations. +During the insertion operation, the subprocess is forcibly terminated to simulate a scenario where the Zvec process crashes during document insertion. +Finally, the main process attempts to reopen the collection and verify its state and functionality. + +Note: This script assumes that Zvec is a Python extension library. Directly killing the Python subprocess running Zvec operations +may not perfectly simulate the impact of system-level power failure on the C++ layer, but it can test the file state of the Zvec Python extension after abnormal process termination. +""" + +import zvec +import time +import tempfile +import subprocess +import signal +import sys +import os +import pytest +import json # Used to pass operation parameters and results +import threading + +try: + import psutil # Used for more reliable process management +except ImportError: + psutil = None # If psutil is not installed, set it to None +from fixture_helper import * +from doc_helper import generate_doc + +from distance_helper import * +from fixture_helper import * +from doc_helper import * + + + + +def singledoc_and_check( + collection: Collection, insert_doc, operator="insert", is_delete=1 +): + if operator == "insert": + result = collection.insert(insert_doc) + elif operator == "upsert": + result = collection.upsert(insert_doc) + elif operator == "update": + result = collection.update(insert_doc) + else: + logging.error("operator value is error!") + + assert bool(result) + assert result.ok() + + stats = collection.stats + assert stats is not None + #assert stats.doc_count == 1 + + fetched_docs = collection.fetch([insert_doc.id]) + assert len(fetched_docs) == 1 + assert insert_doc.id in fetched_docs + + fetched_doc = fetched_docs[insert_doc.id] + + assert is_doc_equal(fetched_doc, insert_doc, collection.schema) + assert hasattr(fetched_doc, "score"), "Document should have a score attribute" + assert fetched_doc.score == 0.0, ( + "Fetch operation should return default score of 0.0" + ) + + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + if v != {}: + query_result = collection.query( + VectorQuery(field_name=v, vector=insert_doc.vectors[v]), + topk=1024, + ) + assert len(query_result) > 0, ( + f"Expected at least 1 query result, but got {len(query_result)}" + ) + + found_doc = None + for doc in query_result: + if doc.id == insert_doc.id: + found_doc = doc + break + assert found_doc is not None, ( + f"deleted document {insert_doc.id} not found in query results" + ) + assert is_doc_equal(found_doc, insert_doc, collection.schema, True, False) + if is_delete == 1: + collection.delete(insert_doc.id) + assert collection.stats.doc_count == 0, "Document should be deleted" + + + +class TestCollectionCrashRecoveryInsertDoc: + """ + Test Zvec collection recovery capability after simulating power failure/process crash during document insertion. + Focus on verifying whether the file remains consistent after interruption of document insertion operations, + and whether it can be reopened and used normally. + """ + + # Script content for subprocess to execute Zvec document insertion operations + # Write this script content to a temporary file and execute it in the subprocess. + ZVEC_SUBPROCESS_SCRIPT_INSERTDOC = ''' +import zvec +import time +import json +import sys +import os +import math +import random +import string +from typing import Literal + + +def generate_constant_vector( + i: int, dimension: int, dtype: Literal["int8", "float16", "float32"] = "float32" +): + if dtype == "int8": + vec = [(i % 127)] * dimension + vec[i % dimension] = ((i + 1) % 127) + else: + base_val = (i % 1000) / 256.0 + special_val = ((i + 1) % 1000) / 256.0 + vec = [base_val] * dimension + vec[i % dimension] = special_val + + return vec + + +def generate_sparse_vector(i: int): + return {i: i + 0.1} + + +def generate_vectordict(i: int, schema: zvec.CollectionSchema): + doc_fields = {} + doc_vectors = {} + for field in schema.fields: + if field.data_type == zvec.DataType.BOOL: + doc_fields[field.name] = i % 2 == 0 + elif field.data_type == zvec.DataType.INT32: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.UINT32: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.INT64: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.UINT64: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.FLOAT: + doc_fields[field.name] = float(i) + 0.1 + elif field.data_type == zvec.DataType.DOUBLE: + doc_fields[field.name] = float(i) + 0.11 + elif field.data_type == zvec.DataType.STRING: + doc_fields[field.name] = f"test_{i}" + elif field.data_type == zvec.DataType.ARRAY_BOOL: + doc_fields[field.name] = [i % 2 == 0, i % 3 == 0] + elif field.data_type == zvec.DataType.ARRAY_INT32: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_UINT32: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_INT64: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_UINT64: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_FLOAT: + doc_fields[field.name] = [float(i + 0.1), float(i + 1.1), float(i + 2.1)] + elif field.data_type == zvec.DataType.ARRAY_DOUBLE: + doc_fields[field.name] = [float(i + 0.11), float(i + 1.11), float(i + 2.11)] + elif field.data_type == zvec.DataType.ARRAY_STRING: + doc_fields[field.name] = [f"test_{i}", f"test_{i + 1}", f"test_{i + 2}"] + else: + raise ValueError(f"Unsupported field type: {field.data_type}") + + for vector in schema.vectors: + if vector.data_type == zvec.DataType.VECTOR_FP16: + doc_vectors[vector.name] = generate_constant_vector( + i, vector.dimension, "float16" + ) + elif vector.data_type == zvec.DataType.VECTOR_FP32: + doc_vectors[vector.name] = generate_constant_vector( + i, vector.dimension, "float32" + ) + elif vector.data_type == zvec.DataType.VECTOR_INT8: + doc_vectors[vector.name] = generate_constant_vector( + i, + vector.dimension, + "int8", + ) + elif vector.data_type == zvec.DataType.SPARSE_VECTOR_FP32: + doc_vectors[vector.name] = generate_sparse_vector(i) + elif vector.data_type == zvec.DataType.SPARSE_VECTOR_FP16: + doc_vectors[vector.name] = generate_sparse_vector(i) + else: + raise ValueError(f"Unsupported vector type: {vector.data_type}") + + return doc_fields, doc_vectors + + +def generate_doc(i: int, schema: zvec.CollectionSchema) -> zvec.Doc: + doc_fields = {} + doc_vectors = {} + doc_fields, doc_vectors = generate_vectordict(i, schema) + doc = zvec.Doc(id=str(i), fields=doc_fields, vectors=doc_vectors) + return doc + + +def run_zvec_insertdoc_operations(args_json_str): + args = json.loads(args_json_str) + collection_path = args["collection_path"] + num_docs_to_insert = args.get("num_docs_to_insert", 100) # Number of documents to insert + batch_size = args.get("batch_size", 10) # Batch size for each insertion + delay_between_batches = args.get("delay_between_batches", 0.1) # Delay between batches + + print(f"[Subprocess] Starting Zvec insert document operations on {collection_path} at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"[Subprocess] Will insert {num_docs_to_insert} documents in batches of {batch_size}") + + try: + # Open existing collection + collection = zvec.open(collection_path) + print(f"[Subprocess] Successfully opened collection.") + + inserted_count = 0 + for i in range(0, num_docs_to_insert, batch_size): + # Calculate the number of documents in the current batch + current_batch_size = min(batch_size, num_docs_to_insert - i) + + # Generate list of documents to insert + docs = [] + for j in range(current_batch_size): + doc_id = i + j + # Generate document using schema obtained from collection + doc = generate_doc(doc_id, collection.schema) + docs.append(doc) + + print(f"[Subprocess] Inserting batch {i//batch_size + 1}, documents {i} to {i + current_batch_size - 1}") + + # Perform insertion operation + res = collection.insert(docs) + + # Check return value - insert returns a list of document IDs + if res and len(res) > 0: + inserted_count += len(docs) + print(f"[Subprocess] Batch insertion successful, inserted {len(docs)} documents, total inserted: {inserted_count}") + else: + print(f"[Subprocess] Batch insertion may have failed, return value: {res}") + + # Add small delay to allow interruption opportunity + time.sleep(delay_between_batches) + + print(f"[Subprocess] Completed inserting {inserted_count} documents.") + + if hasattr(collection, "close"): + collection.close() + else: + del collection # Use del as fallback + print(f"[Subprocess] Closed collection after insertion operations.") + + except Exception as e: + print(f"[Subprocess] Error during document insertion operations: {e}") + import traceback + traceback.print_exc() + # Optionally re-raise or handle differently + raise # Re-raising may be useful depending on how parent process responds + + print(f"[Subprocess] Document insertion operations completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + + +if __name__ == "__main__": + args_json_str = sys.argv[1] + run_zvec_insertdoc_operations(args_json_str) +''' + + def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, collection_option, basic_schema): + """ + Scenario: First successfully create a Zvec collection in the main process. + Then start a subprocess to open the collection and perform bulk document insertion operations. + During the bulk insertion operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + with tempfile.TemporaryDirectory() as temp_dir: + collection_path = f"{temp_dir}/test_collection_insertdoc_crash_recovery" + + # Step 1: Successfully create collection in main process + print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") + coll = zvec.create_and_open(path=collection_path, schema=full_schema_1024, option=collection_option) + assert coll is not None + print(f"[Test] Step 1.1: Collection created successfully.") + single_doc = generate_doc(2001, coll.schema) + singledoc_and_check(coll, single_doc, is_delete=0) + print(f"[Test] Step 1.2: Verified collection data write successful.") + + del coll + print(f"[Test] Step 1.3: Closed collection.") + + # Step 2: Prepare and run subprocess for bulk insertion operations + # Write subprocess script to temporary file + subprocess_script_path = f"{temp_dir}/zvec_subprocess_insertdoc.py" + with open(subprocess_script_path, 'w', encoding='utf-8') as f: + f.write(self.ZVEC_SUBPROCESS_SCRIPT_INSERTDOC) + + # Prepare subprocess parameters + subprocess_args = { + "collection_path": collection_path, + "num_docs_to_insert": 200, # Insert 200 documents to allow for interruption + "batch_size": 10, # Insert 10 documents per batch + "delay_between_batches": 0.2 # 0.2 second delay between batches to increase interruption timing + } + args_json_str = json.dumps(subprocess_args) + + print(f"[Test] Step 2: Starting bulk insertion operations in subprocess, path: {collection_path}") + # Start subprocess to execute bulk insertion operations + proc = subprocess.Popen([ + sys.executable, subprocess_script_path, args_json_str + ]) + + # Wait briefly to allow subprocess to begin insertion operations + time.sleep(2) # Wait 2 seconds to allow insertion loop to start + + print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + # Suddenly kill subprocess (simulate power failure or crash during insertion operations) + if psutil: + try: + # Use psutil to reliably terminate process and all its children + parent = psutil.Process(proc.pid) + children = parent.children(recursive=True) + for child in children: + child.kill() + parent.kill() + proc.wait(timeout=5) + except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + # If psutil is unavailable or process has been terminated, fall back to original method + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + else: + # If no psutil, use standard method to terminate process + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + print(f"[Test] Subprocess {proc.pid} has been terminated.") + + # Clean up temporary script file + os.remove(subprocess_script_path) + + # Step 3: Verify recovery situation in main process + print( + f"[Test] Step 3: Attempting to open collection after simulating crash during document insertion operations...") + # Verification 3.1: Check if collection can be successfully opened after crash + recovered_collection = zvec.open(collection_path) + assert recovered_collection is not None, "Cannot open collection after crash" + print(f"[Test] Step 3.1: Verified collection can be opened after crash...") + + # Verification 3.2: Check data integrity (document count and content) + print(f"[Test] Step 3.2: Verifying data integrity...") + query_result = recovered_collection.query(topk=1024) + # We expect some documents to have been successfully inserted before crash + # The exact number depends on when the crash occurred during the bulk insertion process + print( + f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_insert']})") + + current_count = recovered_collection.stats.doc_count + assert recovered_collection.stats.doc_count >= 1 + assert len(query_result) <= current_count, ( + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + + # Verify existing documents have correct structure + if len(query_result) > 0: + + for doc in query_result[:1024]: + if doc.id == "2001": + print("Found 2001 data!") + fetched_docs = recovered_collection.fetch([doc.id]) + print("doc.id:\n") + print(doc.id) + print("fetched_docs:\n") + print(fetched_docs) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs["2001"], single_doc, recovered_collection.schema), ( + f"result doc={fetched_docs},doc_exp={single_doc}") + break + else: + fetched_docs = recovered_collection.fetch([doc.id]) + print("doc.id,fetched_docs:\n") + print(doc.id, fetched_docs) + exp_doc = generate_doc(int(doc.id), recovered_collection.schema) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs["1"], exp_doc, recovered_collection.schema), ( + f"result doc={fetched_docs},doc_exp={exp_doc}") + + # 3.4: Check if index is complete and query function works properly + print(f"[Test] Step 3.4: Verifying index integrity and query function...") + filtered_query = recovered_collection.query(filter=f"int32_field >=-100") + print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + assert len(filtered_query) > 0 + for doc in query_result: + if doc.id == "2001": + print("Found 2001 data!") + fetched_docs = recovered_collection.fetch([doc.id]) + print("doc.id:\n") + print(doc.id) + print("fetched_docs:\n") + print(fetched_docs) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs["2001"], single_doc, recovered_collection.schema), ( + f"result doc={fetched_docs},doc_exp={single_doc}") + break + else: + fetched_docs = recovered_collection.fetch([doc.id]) + print("doc.id,fetched_docs:\n") + print(doc.id, fetched_docs) + exp_doc = generate_doc(int(doc.id), recovered_collection.schema) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs["1"], exp_doc, recovered_collection.schema), ( + f"result doc={fetched_docs},doc_exp={exp_doc}") + + # Verification 3.5: Test insertion functionality after recovery + print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") + test_insert_doc = generate_doc(9999, full_schema_1024) # Use original schema from fixture + singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + + # Verification 3.6: Test update functionality after recovery + print(f"[Test] Step 3.6: Testing update functionality after recovery...") + updated_doc = generate_update_doc(2001, recovered_collection.schema) + singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + + # 3.7: Test deletion after recovery + print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") + doc_ids = ["9999"] + result = recovered_collection.delete(doc_ids) + assert len(result) == len(doc_ids) + for item in result: + assert item.ok() \ No newline at end of file diff --git a/python/tests/detail/test_collection_crash_recovery_updatedoc.py b/python/tests/detail/test_collection_crash_recovery_updatedoc.py new file mode 100644 index 000000000..7856d4475 --- /dev/null +++ b/python/tests/detail/test_collection_crash_recovery_updatedoc.py @@ -0,0 +1,514 @@ +# -*- coding: utf-8 -*- +""" +test_collection_crash_recovery_updatedoc.py + +This script is used to test Zvec's recovery capability after simulating a "power failure" (forced process termination) during bulk document update (updatedoc). +It first successfully creates a collection in the main process and inserts some documents, then starts a subprocess to open the collection and perform bulk document update operations. +During the update operation, the subprocess is forcibly terminated to simulate a scenario where the Zvec process crashes during document update. +Finally, the main process attempts to reopen the collection and verify its state and functionality. + +Note: This script assumes that Zvec is a Python extension library. Directly killing the Python subprocess running Zvec operations +may not perfectly simulate the impact of system-level power failure on the C++ layer, but it can test the file state of the Zvec Python extension after abnormal process termination. +""" + +import zvec +import time +import tempfile +import subprocess +import signal +import sys +import os +import pytest +import json # Used to pass operation parameters and results +import threading + +try: + import psutil # Used for more reliable process management +except ImportError: + psutil = None # If psutil is not installed, set it to None +from fixture_helper import * +from doc_helper import generate_doc, generate_update_doc + +from distance_helper import * +from fixture_helper import * +from doc_helper import * + + + + +def singledoc_and_check( + collection: Collection, insert_doc, operator="insert", is_delete=1 +): + if operator == "insert": + result = collection.insert(insert_doc) + elif operator == "upsert": + result = collection.upsert(insert_doc) + elif operator == "update": + result = collection.update(insert_doc) + else: + logging.error("operator value is error!") + + assert bool(result) + assert result.ok() + + stats = collection.stats + assert stats is not None + #assert stats.doc_count == 1 + + fetched_docs = collection.fetch([insert_doc.id]) + assert len(fetched_docs) == 1 + assert insert_doc.id in fetched_docs + + fetched_doc = fetched_docs[insert_doc.id] + + assert is_doc_equal(fetched_doc, insert_doc, collection.schema) + assert hasattr(fetched_doc, "score"), "Document should have a score attribute" + assert fetched_doc.score == 0.0, ( + "Fetch operation should return default score of 0.0" + ) + + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + if v != {}: + query_result = collection.query( + VectorQuery(field_name=v, vector=insert_doc.vectors[v]), + topk=1024, + ) + assert len(query_result) > 0, ( + f"Expected at least 1 query result, but got {len(query_result)}" + ) + + found_doc = None + for doc in query_result: + if doc.id == insert_doc.id: + found_doc = doc + break + assert found_doc is not None, ( + f"deleted document {insert_doc.id} not found in query results" + ) + assert is_doc_equal(found_doc, insert_doc, collection.schema, True, False) + if is_delete == 1: + collection.delete(insert_doc.id) + assert collection.stats.doc_count == 0, "Document should be deleted" + + + +class TestCollectionCrashRecoveryUpdateDoc: + """ + Test Zvec collection recovery capability after simulating power failure/process crash during document update. + Focus on verifying whether the file remains consistent after interruption of document update operations, + and whether it can be reopened and used normally. + """ + + # Script content for subprocess to execute Zvec document update operations + # Write this script content to a temporary file and execute it in the subprocess. + ZVEC_SUBPROCESS_SCRIPT_UPDATEDOC = ''' +import zvec +import time +import json +import sys +import os +import math +import random +import string +from typing import Literal +from zvec.typing import DataType, StatusCode, MetricType, QuantizeType +from zvec import ( + CollectionOption, + InvertIndexParam, + HnswIndexParam, + FlatIndexParam, + IVFIndexParam, + FieldSchema, + VectorSchema, + CollectionSchema, + Collection, + Doc, + VectorQuery, +) + +def generate_constant_vector( + i: int, dimension: int, dtype: Literal["int8", "float16", "float32"] = "float32" +): + if dtype == "int8": + vec = [(i % 127)] * dimension + vec[i % dimension] = ((i + 1) % 127) + else: + base_val = (i % 1000) / 256.0 + special_val = ((i + 1) % 1000) / 256.0 + vec = [base_val] * dimension + vec[i % dimension] = special_val + + return vec + + +def generate_sparse_vector(i: int): + return {i: i + 0.1} + + +def generate_vectordict(i: int, schema: zvec.CollectionSchema): + doc_fields = {} + doc_vectors = {} + for field in schema.fields: + if field.data_type == zvec.DataType.BOOL: + doc_fields[field.name] = i % 2 == 0 + elif field.data_type == zvec.DataType.INT32: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.UINT32: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.INT64: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.UINT64: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.FLOAT: + doc_fields[field.name] = float(i) + 0.1 + elif field.data_type == zvec.DataType.DOUBLE: + doc_fields[field.name] = float(i) + 0.11 + elif field.data_type == zvec.DataType.STRING: + doc_fields[field.name] = f"test_{i}" + elif field.data_type == zvec.DataType.ARRAY_BOOL: + doc_fields[field.name] = [i % 2 == 0, i % 3 == 0] + elif field.data_type == zvec.DataType.ARRAY_INT32: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_UINT32: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_INT64: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_UINT64: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_FLOAT: + doc_fields[field.name] = [float(i + 0.1), float(i + 1.1), float(i + 2.1)] + elif field.data_type == zvec.DataType.ARRAY_DOUBLE: + doc_fields[field.name] = [float(i + 0.11), float(i + 1.11), float(i + 2.11)] + elif field.data_type == zvec.DataType.ARRAY_STRING: + doc_fields[field.name] = [f"test_{i}", f"test_{i + 1}", f"test_{i + 2}"] + else: + raise ValueError(f"Unsupported field type: {field.data_type}") + + for vector in schema.vectors: + if vector.data_type == zvec.DataType.VECTOR_FP16: + doc_vectors[vector.name] = generate_constant_vector( + i, vector.dimension, "float16" + ) + elif vector.data_type == zvec.DataType.VECTOR_FP32: + doc_vectors[vector.name] = generate_constant_vector( + i, vector.dimension, "float32" + ) + elif vector.data_type == zvec.DataType.VECTOR_INT8: + doc_vectors[vector.name] = generate_constant_vector( + i, + vector.dimension, + "int8", + ) + elif vector.data_type == zvec.DataType.SPARSE_VECTOR_FP32: + doc_vectors[vector.name] = generate_sparse_vector(i) + elif vector.data_type == zvec.DataType.SPARSE_VECTOR_FP16: + doc_vectors[vector.name] = generate_sparse_vector(i) + else: + raise ValueError(f"Unsupported vector type: {vector.data_type}") + + return doc_fields, doc_vectors + + +def generate_vectordict_update(i: int, schema: zvec.CollectionSchema) -> zvec.Doc: + doc_fields = {} + doc_vectors = {} + doc_fields = {} + doc_vectors = {} + for field in schema.fields: + if field.data_type == DataType.BOOL: + doc_fields[field.name] = (i+1) % 2 == 0 + elif field.data_type == DataType.INT32: + doc_fields[field.name] = i+1 + elif field.data_type == DataType.UINT32: + doc_fields[field.name] = i+1 + elif field.data_type == DataType.INT64: + doc_fields[field.name] = i+1 + elif field.data_type == DataType.UINT64: + doc_fields[field.name] = i+1 + elif field.data_type == DataType.FLOAT: + doc_fields[field.name] = float(i+1) + 0.1 + elif field.data_type == DataType.DOUBLE: + doc_fields[field.name] = float(i+1) + 0.11 + elif field.data_type == DataType.STRING: + doc_fields[field.name] = f"test_{i+1}" + elif field.data_type == DataType.ARRAY_BOOL: + doc_fields[field.name] = [(i+1) % 2 == 0, (i+1) % 3 == 0] + elif field.data_type == DataType.ARRAY_INT32: + doc_fields[field.name] = [i + 1, i + 1, i + 2] + elif field.data_type == DataType.ARRAY_UINT32: + doc_fields[field.name] = [i + 1, i + 1, i + 2] + elif field.data_type == DataType.ARRAY_INT64: + doc_fields[field.name] = [i + 1, i + 1, i + 2] + elif field.data_type == DataType.ARRAY_UINT64: + doc_fields[field.name] = [i + 1, i + 1, i + 2] + elif field.data_type == DataType.ARRAY_FLOAT: + doc_fields[field.name] = [float(i + 1.1), float(i + 2.1), float(i + 3.1)] + elif field.data_type == DataType.ARRAY_DOUBLE: + doc_fields[field.name] = [float(i + 1.11), float(i + 2.11), float(i + 3.11)] + elif field.data_type == DataType.ARRAY_STRING: + doc_fields[field.name] = [f"test_{i+1}", f"test_{i + 2}", f"test_{i + 3}"] + else: + raise ValueError(f"Unsupported field type: {field.data_type}") + for vector in schema.vectors: + if vector.data_type == DataType.VECTOR_FP16: + doc_vectors[vector.name] = generate_constant_vector( + i+1, vector.dimension, "float16" + ) + elif vector.data_type == DataType.VECTOR_FP32: + doc_vectors[vector.name] = generate_constant_vector( + i+1, vector.dimension, "float32" + ) + elif vector.data_type == DataType.VECTOR_INT8: + doc_vectors[vector.name] = generate_constant_vector( + i+1, + vector.dimension, + "int8", + ) + elif vector.data_type == DataType.SPARSE_VECTOR_FP32: + doc_vectors[vector.name] = generate_sparse_vector(i+1) + elif vector.data_type == DataType.SPARSE_VECTOR_FP16: + doc_vectors[vector.name] = generate_sparse_vector(i+1) + else: + raise ValueError(f"Unsupported vector type: {vector.data_type}") + return doc_fields, doc_vectors + + + +def generate_doc(i: int, schema: zvec.CollectionSchema) -> zvec.Doc: + doc_fields = {} + doc_vectors = {} + doc_fields, doc_vectors = generate_vectordict(i, schema) + doc = zvec.Doc(id=str(i), fields=doc_fields, vectors=doc_vectors) + return doc + + +def generate_update_doc(i: int, schema: zvec.CollectionSchema) -> zvec.Doc: + doc_fields = {} + doc_vectors = {} + doc_fields, doc_vectors = generate_vectordict_update(i, schema) + doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors) + return doc + +def run_zvec_updatedoc_operations(args_json_str): + args = json.loads(args_json_str) + collection_path = args["collection_path"] + num_docs_to_update = args.get("num_docs_to_update", 100) # Number of documents to update + batch_size = args.get("batch_size", 10) # Batch size for each update + delay_between_batches = args.get("delay_between_batches", 0.1) # Delay between batches + + print(f"[Subprocess] Starting Zvec update document operations on {collection_path} at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"[Subprocess] Will update {num_docs_to_update} documents in batches of {batch_size}") + + try: + # Open existing collection + collection = zvec.open(collection_path) + print(f"[Subprocess] Successfully opened collection.") + + updated_count = 0 + for i in range(0, num_docs_to_update, batch_size): + # Calculate the number of documents in the current batch + current_batch_size = min(batch_size, num_docs_to_update - i) + + # Generate list of documents to update + docs = [] + for j in range(current_batch_size): + doc_id = i + j + # Use the existing document ID and update it + doc = generate_update_doc(doc_id, collection.schema) + docs.append(doc) + + print(f"[Subprocess] Updating batch {i//batch_size + 1}, documents {i} to {i + current_batch_size - 1}") + + # Perform update operation + res = collection.update(docs) + + # Check return value - update returns a list of operation results + if res and len(res) > 0: + updated_count += len(docs) + print(f"[Subprocess] Batch update successful, updated {len(docs)} documents, total updated: {updated_count}") + else: + print(f"[Subprocess] Batch update may have failed, return value: {res}") + + # Add small delay to allow interruption opportunity + time.sleep(delay_between_batches) + + print(f"[Subprocess] Completed updating {updated_count} documents.") + + if hasattr(collection, "close"): + collection.close() + else: + del collection # Use del as fallback + print(f"[Subprocess] Closed collection after update operations.") + + except Exception as e: + print(f"[Subprocess] Error during document update operations: {e}") + import traceback + traceback.print_exc() + # Optionally re-raise or handle differently + raise # Re-raising may be useful depending on how parent process responds + + print(f"[Subprocess] Document update operations completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + + +if __name__ == "__main__": + args_json_str = sys.argv[1] + run_zvec_updatedoc_operations(args_json_str) +''' + + def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, collection_option, basic_schema): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform bulk document update operations. + During the bulk update operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + with tempfile.TemporaryDirectory() as temp_dir: + collection_path = f"{temp_dir}/test_collection_updatedoc_crash_recovery" + + # Step 1: Successfully create collection in main process and insert some documents + print( + f"[Test] Step 1: Creating collection in main process and inserting initial documents, path: {collection_path}...") + coll = zvec.create_and_open(path=collection_path, schema=full_schema_1024, option=collection_option) + assert coll is not None + print(f"[Test] Step 1.1: Collection created successfully.") + + # Verify initial data + single_doc = generate_doc(2001, coll.schema) + singledoc_and_check(coll, single_doc, is_delete=0) + print(f"[Test] Step 1.2: Verified collection data write successful.") + + # Insert initial documents that will be updated later + initial_docs = [] + for i in range(0, 200): # Insert 200 documents for updating + doc = generate_doc(i, coll.schema) + initial_docs.append(doc) + + insert_results = coll.insert(initial_docs) + print(f"[Test] Step 1.3: Inserted {len(initial_docs)} initial documents for updating.") + + del coll + print(f"[Test] Step 1.4: Closed collection.") + + # Step 2: Prepare and run subprocess for bulk update operations + # Write subprocess script to temporary file + subprocess_script_path = f"{temp_dir}/zvec_subprocess_updatedoc.py" + with open(subprocess_script_path, 'w', encoding='utf-8') as f: + f.write(self.ZVEC_SUBPROCESS_SCRIPT_UPDATEDOC) + + # Prepare subprocess parameters + subprocess_args = { + "collection_path": collection_path, + "num_docs_to_update": 100, # Update 100 documents to allow for interruption + "batch_size": 10, # Update 10 documents per batch + "delay_between_batches": 0.2 # 0.2 second delay between batches to increase interruption timing + } + args_json_str = json.dumps(subprocess_args) + + print(f"[Test] Step 2: Starting bulk update operations in subprocess, path: {collection_path}") + # Start subprocess to execute bulk update operations + proc = subprocess.Popen([ + sys.executable, subprocess_script_path, args_json_str + ]) + + # Wait briefly to allow subprocess to begin update operations + time.sleep(2) # Wait 2 seconds to allow update loop to start + + print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + # Suddenly kill subprocess (simulate power failure or crash during update operations) + if psutil: + try: + # Use psutil to reliably terminate process and all its children + parent = psutil.Process(proc.pid) + children = parent.children(recursive=True) + for child in children: + child.kill() + parent.kill() + proc.wait(timeout=5) + except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + # If psutil is unavailable or process has been terminated, fall back to original method + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + else: + # If no psutil, use standard method to terminate process + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + print(f"[Test] Subprocess {proc.pid} has been terminated.") + + # Clean up temporary script file + os.remove(subprocess_script_path) + + # Step 3: Verify recovery situation in main process + print( + f"[Test] Step 3: Attempting to open collection after simulating crash during document update operations...") + + # Verification 3.1: Check if collection can be successfully opened after crash + recovered_collection = zvec.open(collection_path) + assert recovered_collection is not None, "Cannot open collection after crash" + print(f"[Test] Step 3.1: Verified collection can be opened after crash...") + + # Verification 3.2: Check data integrity (document count and content) + print(f"[Test] Step 3.2: Verifying data integrity...") + query_result = recovered_collection.query(topk=1024) + # We expect some documents to have been successfully updated before crash + # The exact number depends on when the crash occurred during the bulk update process + print( + f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_update']})") + + # Verify quantity consistency + initial_doc_count = 200 # Initial docs inserted + extra_doc_count = 1 # Extra doc with ID 2001 + expected_doc_count = initial_doc_count + extra_doc_count + assert recovered_collection.stats.doc_count == expected_doc_count, f"Expected {expected_doc_count} docs, got {recovered_collection.stats.doc_count}" + assert len(query_result) <= recovered_collection.stats.doc_count, ( + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + + # Verify existing documents have correct structure + if len(query_result) > 0: + for doc in query_result[:100]: # Limit to first 100 for efficiency + fetched_docs = recovered_collection.fetch([doc.id]) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + + # Generate expected doc to compare + assert is_doc_equal(fetched_docs[doc.id], doc, recovered_collection.schema,include_vector=False), ( + f"result doc={fetched_docs[doc.id]},doc_exp={doc}") + + # Verification 3.4: Check if index is complete and query function works properly + print(f"[Test] Step 3.4: Verifying index integrity and query function...") + filtered_query = recovered_collection.query(filter=f"int32_field >= -100") + print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + assert len(filtered_query) > 0 + + for doc in query_result[:50]: # Check first 50 for efficiency + fetched_docs = recovered_collection.fetch([doc.id]) + assert is_doc_equal(fetched_docs[doc.id], doc, recovered_collection.schema,include_vector=False), ( + f"result doc={fetched_docs[doc.id]},doc_exp={doc}") + + # Verification 3.5: Test insertion functionality after recovery + print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") + test_insert_doc = generate_doc(9999, full_schema_1024) # Use original schema from fixture + singledoc_and_check(recovered_collection, test_insert_doc, operator="insert",is_delete=0) + + # Verification 3.6: Test update functionality after recovery + print(f"[Test] Step 3.6: Testing update functionality after recovery...") + updated_doc = generate_update_doc(2001, recovered_collection.schema) + singledoc_and_check(recovered_collection, updated_doc, operator="update",is_delete=0) + + + # Verification 3.7: Test deletion functionality after recovery (if supported) + print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") + doc_ids = ["9999"] + result = recovered_collection.delete(doc_ids) + assert len(result) == len(doc_ids) + for item in result: + assert item.ok() diff --git a/python/tests/detail/test_collection_crash_recovery_upsertdoc.py b/python/tests/detail/test_collection_crash_recovery_upsertdoc.py new file mode 100644 index 000000000..680da9106 --- /dev/null +++ b/python/tests/detail/test_collection_crash_recovery_upsertdoc.py @@ -0,0 +1,514 @@ +# -*- coding: utf-8 -*- +""" +test_collection_crash_recovery_updatedoc.py + +This script is used to test Zvec's recovery capability after simulating a "power failure" (forced process termination) during bulk document update (updatedoc). +It first successfully creates a collection in the main process and inserts some documents, then starts a subprocess to open the collection and perform bulk document update operations. +During the update operation, the subprocess is forcibly terminated to simulate a scenario where the Zvec process crashes during document update. +Finally, the main process attempts to reopen the collection and verify its state and functionality. + +Note: This script assumes that Zvec is a Python extension library. Directly killing the Python subprocess running Zvec operations +may not perfectly simulate the impact of system-level power failure on the C++ layer, but it can test the file state of the Zvec Python extension after abnormal process termination. +""" + +import zvec +import time +import tempfile +import subprocess +import signal +import sys +import os +import pytest +import json # Used to pass operation parameters and results +import threading + +try: + import psutil # Used for more reliable process management +except ImportError: + psutil = None # If psutil is not installed, set it to None +from fixture_helper import * +from doc_helper import generate_doc, generate_update_doc + +from distance_helper import * +from fixture_helper import * +from doc_helper import * + + + + +def singledoc_and_check( + collection: Collection, insert_doc, operator="insert", is_delete=1 +): + if operator == "insert": + result = collection.insert(insert_doc) + elif operator == "upsert": + result = collection.upsert(insert_doc) + elif operator == "update": + result = collection.update(insert_doc) + else: + logging.error("operator value is error!") + + assert bool(result) + assert result.ok() + + stats = collection.stats + assert stats is not None + #assert stats.doc_count == 1 + + fetched_docs = collection.fetch([insert_doc.id]) + assert len(fetched_docs) == 1 + assert insert_doc.id in fetched_docs + + fetched_doc = fetched_docs[insert_doc.id] + + assert is_doc_equal(fetched_doc, insert_doc, collection.schema) + assert hasattr(fetched_doc, "score"), "Document should have a score attribute" + assert fetched_doc.score == 0.0, ( + "Fetch operation should return default score of 0.0" + ) + + for k, v in DEFAULT_VECTOR_FIELD_NAME.items(): + if v != {}: + query_result = collection.query( + VectorQuery(field_name=v, vector=insert_doc.vectors[v]), + topk=1024, + ) + assert len(query_result) > 0, ( + f"Expected at least 1 query result, but got {len(query_result)}" + ) + + found_doc = None + for doc in query_result: + if doc.id == insert_doc.id: + found_doc = doc + break + assert found_doc is not None, ( + f"deleted document {insert_doc.id} not found in query results" + ) + assert is_doc_equal(found_doc, insert_doc, collection.schema, True, False) + if is_delete == 1: + collection.delete(insert_doc.id) + assert collection.stats.doc_count == 0, "Document should be deleted" + + +class TestCollectionCrashRecoveryUpsertDoc: + """ + Test Zvec collection recovery capability after simulating power failure/process crash during document update. + Focus on verifying whether the file remains consistent after interruption of document update operations, + and whether it can be reopened and used normally. + """ + + # Script content for subprocess to execute Zvec document update operations + # Write this script content to a temporary file and execute it in the subprocess. + ZVEC_SUBPROCESS_SCRIPT_UPSERTDOC = ''' +import zvec +import time +import json +import sys +import os +import math +import random +import string +from typing import Literal +from zvec.typing import DataType, StatusCode, MetricType, QuantizeType +from zvec import ( + CollectionOption, + InvertIndexParam, + HnswIndexParam, + FlatIndexParam, + IVFIndexParam, + FieldSchema, + VectorSchema, + CollectionSchema, + Collection, + Doc, + VectorQuery, +) + +def generate_constant_vector( + i: int, dimension: int, dtype: Literal["int8", "float16", "float32"] = "float32" +): + if dtype == "int8": + vec = [(i % 127)] * dimension + vec[i % dimension] = ((i + 1) % 127) + else: + base_val = (i % 1000) / 256.0 + special_val = ((i + 1) % 1000) / 256.0 + vec = [base_val] * dimension + vec[i % dimension] = special_val + + return vec + + +def generate_sparse_vector(i: int): + return {i: i + 0.1} + + +def generate_vectordict(i: int, schema: zvec.CollectionSchema): + doc_fields = {} + doc_vectors = {} + for field in schema.fields: + if field.data_type == zvec.DataType.BOOL: + doc_fields[field.name] = i % 2 == 0 + elif field.data_type == zvec.DataType.INT32: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.UINT32: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.INT64: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.UINT64: + doc_fields[field.name] = i + elif field.data_type == zvec.DataType.FLOAT: + doc_fields[field.name] = float(i) + 0.1 + elif field.data_type == zvec.DataType.DOUBLE: + doc_fields[field.name] = float(i) + 0.11 + elif field.data_type == zvec.DataType.STRING: + doc_fields[field.name] = f"test_{i}" + elif field.data_type == zvec.DataType.ARRAY_BOOL: + doc_fields[field.name] = [i % 2 == 0, i % 3 == 0] + elif field.data_type == zvec.DataType.ARRAY_INT32: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_UINT32: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_INT64: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_UINT64: + doc_fields[field.name] = [i, i + 1, i + 2] + elif field.data_type == zvec.DataType.ARRAY_FLOAT: + doc_fields[field.name] = [float(i + 0.1), float(i + 1.1), float(i + 2.1)] + elif field.data_type == zvec.DataType.ARRAY_DOUBLE: + doc_fields[field.name] = [float(i + 0.11), float(i + 1.11), float(i + 2.11)] + elif field.data_type == zvec.DataType.ARRAY_STRING: + doc_fields[field.name] = [f"test_{i}", f"test_{i + 1}", f"test_{i + 2}"] + else: + raise ValueError(f"Unsupported field type: {field.data_type}") + + for vector in schema.vectors: + if vector.data_type == zvec.DataType.VECTOR_FP16: + doc_vectors[vector.name] = generate_constant_vector( + i, vector.dimension, "float16" + ) + elif vector.data_type == zvec.DataType.VECTOR_FP32: + doc_vectors[vector.name] = generate_constant_vector( + i, vector.dimension, "float32" + ) + elif vector.data_type == zvec.DataType.VECTOR_INT8: + doc_vectors[vector.name] = generate_constant_vector( + i, + vector.dimension, + "int8", + ) + elif vector.data_type == zvec.DataType.SPARSE_VECTOR_FP32: + doc_vectors[vector.name] = generate_sparse_vector(i) + elif vector.data_type == zvec.DataType.SPARSE_VECTOR_FP16: + doc_vectors[vector.name] = generate_sparse_vector(i) + else: + raise ValueError(f"Unsupported vector type: {vector.data_type}") + + return doc_fields, doc_vectors + + +def generate_vectordict_update(i: int, schema: zvec.CollectionSchema) -> zvec.Doc: + doc_fields = {} + doc_vectors = {} + doc_fields = {} + doc_vectors = {} + for field in schema.fields: + if field.data_type == DataType.BOOL: + doc_fields[field.name] = (i+1) % 2 == 0 + elif field.data_type == DataType.INT32: + doc_fields[field.name] = i+1 + elif field.data_type == DataType.UINT32: + doc_fields[field.name] = i+1 + elif field.data_type == DataType.INT64: + doc_fields[field.name] = i+1 + elif field.data_type == DataType.UINT64: + doc_fields[field.name] = i+1 + elif field.data_type == DataType.FLOAT: + doc_fields[field.name] = float(i+1) + 0.1 + elif field.data_type == DataType.DOUBLE: + doc_fields[field.name] = float(i+1) + 0.11 + elif field.data_type == DataType.STRING: + doc_fields[field.name] = f"test_{i+1}" + elif field.data_type == DataType.ARRAY_BOOL: + doc_fields[field.name] = [(i+1) % 2 == 0, (i+1) % 3 == 0] + elif field.data_type == DataType.ARRAY_INT32: + doc_fields[field.name] = [i + 1, i + 1, i + 2] + elif field.data_type == DataType.ARRAY_UINT32: + doc_fields[field.name] = [i + 1, i + 1, i + 2] + elif field.data_type == DataType.ARRAY_INT64: + doc_fields[field.name] = [i + 1, i + 1, i + 2] + elif field.data_type == DataType.ARRAY_UINT64: + doc_fields[field.name] = [i + 1, i + 1, i + 2] + elif field.data_type == DataType.ARRAY_FLOAT: + doc_fields[field.name] = [float(i + 1.1), float(i + 2.1), float(i + 3.1)] + elif field.data_type == DataType.ARRAY_DOUBLE: + doc_fields[field.name] = [float(i + 1.11), float(i + 2.11), float(i + 3.11)] + elif field.data_type == DataType.ARRAY_STRING: + doc_fields[field.name] = [f"test_{i+1}", f"test_{i + 2}", f"test_{i + 3}"] + else: + raise ValueError(f"Unsupported field type: {field.data_type}") + for vector in schema.vectors: + if vector.data_type == DataType.VECTOR_FP16: + doc_vectors[vector.name] = generate_constant_vector( + i+1, vector.dimension, "float16" + ) + elif vector.data_type == DataType.VECTOR_FP32: + doc_vectors[vector.name] = generate_constant_vector( + i+1, vector.dimension, "float32" + ) + elif vector.data_type == DataType.VECTOR_INT8: + doc_vectors[vector.name] = generate_constant_vector( + i+1, + vector.dimension, + "int8", + ) + elif vector.data_type == DataType.SPARSE_VECTOR_FP32: + doc_vectors[vector.name] = generate_sparse_vector(i+1) + elif vector.data_type == DataType.SPARSE_VECTOR_FP16: + doc_vectors[vector.name] = generate_sparse_vector(i+1) + else: + raise ValueError(f"Unsupported vector type: {vector.data_type}") + return doc_fields, doc_vectors + + + +def generate_doc(i: int, schema: zvec.CollectionSchema) -> zvec.Doc: + doc_fields = {} + doc_vectors = {} + doc_fields, doc_vectors = generate_vectordict(i, schema) + doc = zvec.Doc(id=str(i), fields=doc_fields, vectors=doc_vectors) + return doc + + +def generate_update_doc(i: int, schema: zvec.CollectionSchema) -> zvec.Doc: + doc_fields = {} + doc_vectors = {} + doc_fields, doc_vectors = generate_vectordict_update(i, schema) + doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors) + return doc + + +def run_zvec_upsertdoc_operations(args_json_str): + args = json.loads(args_json_str) + collection_path = args["collection_path"] + num_docs_to_update = args.get("num_docs_to_update", 100) # Number of documents to update + batch_size = args.get("batch_size", 10) # Batch size for each update + delay_between_batches = args.get("delay_between_batches", 0.1) # Delay between batches + + print(f"[Subprocess] Starting Zvec update document operations on {collection_path} at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"[Subprocess] Will update {num_docs_to_update} documents in batches of {batch_size}") + + try: + # Open existing collection + collection = zvec.open(collection_path) + print(f"[Subprocess] Successfully opened collection.") + + upserted_count = 0 + for i in range(0, num_docs_to_update, batch_size): + # Calculate the number of documents in the current batch + current_batch_size = min(batch_size, num_docs_to_update - i) + + # Generate list of documents to update + docs = [] + for j in range(current_batch_size): + doc_id = i + j + # Use the existing document ID and update it + doc = generate_update_doc(doc_id, collection.schema) + docs.append(doc) + + print(f"[Subprocess] Updating batch {i//batch_size + 1}, documents {i} to {i + current_batch_size - 1}") + + # Perform update operation + res = collection.upsert(docs) + + # Check return value - update returns a list of operation results + if res and len(res) > 0: + upserted_count += len(docs) + print(f"[Subprocess] Batch upsert successful, upserted {len(docs)} documents, total upserted: {upserted_count}") + else: + print(f"[Subprocess] Batch update may have failed, return value: {res}") + + # Add small delay to allow interruption opportunity + time.sleep(delay_between_batches) + + print(f"[Subprocess] Completed upserting {upserted_count} documents.") + + if hasattr(collection, "close"): + collection.close() + else: + del collection # Use del as fallback + print(f"[Subprocess] Closed collection after update operations.") + + except Exception as e: + print(f"[Subprocess] Error during document update operations: {e}") + import traceback + traceback.print_exc() + # Optionally re-raise or handle differently + raise # Re-raising may be useful depending on how parent process responds + + print(f"[Subprocess] Document upsert operations completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + + +if __name__ == "__main__": + args_json_str = sys.argv[1] + run_zvec_upsertdoc_operations(args_json_str) +''' + + def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, collection_option, basic_schema): + """ + Scenario: First successfully create a Zvec collection in the main process and insert some documents. + Then start a subprocess to open the collection and perform bulk document update operations. + During the bulk update operation, forcibly terminate the subprocess (simulate power failure or process crash). + Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. + """ + with tempfile.TemporaryDirectory() as temp_dir: + collection_path = f"{temp_dir}/test_collection_upsertdoc_crash_recovery" + + # Step 1: Successfully create collection in main process and insert some documents + print( + f"[Test] Step 1: Creating collection in main process and inserting initial documents, path: {collection_path}...") + coll = zvec.create_and_open(path=collection_path, schema=full_schema_1024, option=collection_option) + assert coll is not None + print(f"[Test] Step 1.1: Collection created successfully.") + + # Verify initial data + single_doc = generate_doc(2001, coll.schema) + singledoc_and_check(coll, single_doc, is_delete=0) + print(f"[Test] Step 1.2: Verified collection data write successful.") + + # Insert initial documents that will be updated later + initial_docs = [] + for i in range(0, 50): # Insert 200 documents for updating + doc = generate_doc(i, coll.schema) + initial_docs.append(doc) + + insert_results = coll.insert(initial_docs) + print(f"[Test] Step 1.3: Inserted {len(initial_docs)} initial documents for upserting.") + + del coll + print(f"[Test] Step 1.4: Closed collection.") + + # Step 2: Prepare and run subprocess for bulk update operations + # Write subprocess script to temporary file + subprocess_script_path = f"{temp_dir}/zvec_subprocess_usertdoc.py" + with open(subprocess_script_path, 'w', encoding='utf-8') as f: + f.write(self.ZVEC_SUBPROCESS_SCRIPT_UPSERTDOC) + + # Prepare subprocess parameters + subprocess_args = { + "collection_path": collection_path, + "num_docs_to_upsert": 100, # Update 100 documents to allow for interruption + "batch_size": 10, # Update 10 documents per batch + "delay_between_batches": 0.2 # 0.2 second delay between batches to increase interruption timing + } + args_json_str = json.dumps(subprocess_args) + + print(f"[Test] Step 2: Starting bulk update operations in subprocess, path: {collection_path}") + # Start subprocess to execute bulk update operations + proc = subprocess.Popen([ + sys.executable, subprocess_script_path, args_json_str + ]) + + # Wait briefly to allow subprocess to begin update operations + time.sleep(2) # Wait 2 seconds to allow update loop to start + + print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + # Suddenly kill subprocess (simulate power failure or crash during update operations) + if psutil: + try: + # Use psutil to reliably terminate process and all its children + parent = psutil.Process(proc.pid) + children = parent.children(recursive=True) + for child in children: + child.kill() + parent.kill() + proc.wait(timeout=5) + except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + # If psutil is unavailable or process has been terminated, fall back to original method + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + else: + # If no psutil, use standard method to terminate process + proc.send_signal(signal.SIGKILL) + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + proc.kill() + proc.wait() + print(f"[Test] Subprocess {proc.pid} has been terminated.") + + # Clean up temporary script file + os.remove(subprocess_script_path) + + # Step 3: Verify recovery situation in main process + print( + f"[Test] Step 3: Attempting to open collection after simulating crash during document update operations...") + + # Verification 3.1: Check if collection can be successfully opened after crash + recovered_collection = zvec.open(collection_path) + assert recovered_collection is not None, "Cannot open collection after crash" + print(f"[Test] Step 3.1: Verified collection can be opened after crash...") + + # Verification 3.2: Check data integrity (document count and content) + print(f"[Test] Step 3.2: Verifying data integrity...") + query_result = recovered_collection.query(topk=1024) + # We expect some documents to have been successfully updated before crash + # The exact number depends on when the crash occurred during the bulk update process + print( + f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_upsert']})") + + # Verify quantity consistency + current_count = recovered_collection.stats.doc_count + assert recovered_collection.stats.doc_count >= 51 + assert len(query_result) <= recovered_collection.stats.doc_count, ( + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + + # Verify existing documents have correct structure + if len(query_result) > 0: + for doc in query_result[:100]: # Limit to first 100 for efficiency + fetched_docs = recovered_collection.fetch([doc.id]) + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + + # Generate expected doc to compare + assert is_doc_equal(fetched_docs[doc.id], doc, recovered_collection.schema,include_vector=False), ( + f"result doc={fetched_docs[doc.id]},doc_exp={doc}") + + # Verification 3.4: Check if index is complete and query function works properly + print(f"[Test] Step 3.4: Verifying index integrity and query function...") + filtered_query = recovered_collection.query(filter=f"int32_field >= -100") + print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + assert len(filtered_query) > 0 + + for doc in query_result[:50]: # Check first 50 for efficiency + fetched_docs = recovered_collection.fetch([doc.id]) + + assert len(fetched_docs) == 1 + assert doc.id in fetched_docs + assert is_doc_equal(fetched_docs[doc.id], doc, recovered_collection.schema,include_vector=False), ( + f"result doc={fetched_docs[doc.id]},doc_exp={doc}") + + # Verification 3.5: Test insertion functionality after recovery + print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") + test_insert_doc = generate_doc(9999, full_schema_1024) # Use original schema from fixture + singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + + # Verification 3.6: Test update functionality after recovery + print(f"[Test] Step 3.6: Testing update functionality after recovery...") + updated_doc = generate_update_doc(2001, recovered_collection.schema) + singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + + # Verification 3.7: Test deletion functionality after recovery (if supported) + print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") + doc_ids = ["9999"] + result = recovered_collection.delete(doc_ids) + assert len(result) == len(doc_ids) + for item in result: + assert item.ok() \ No newline at end of file From a0e84657afa2bb6391fa1d2e66d5580b0bd70ab3 Mon Sep 17 00:00:00 2001 From: iaojnh Date: Tue, 24 Mar 2026 07:06:49 +0000 Subject: [PATCH 3/4] test: optimize crash recovery and open,dml test cases --- ...est_collection_crash_recovery_addcolumn.py | 170 +++++++++----- ...t_collection_crash_recovery_altercolumn.py | 193 +++++++++++----- ...t_collection_crash_recovery_createindex.py | 212 ++++++++++++------ ...est_collection_crash_recovery_deletedoc.py | 157 ++++++++----- ...st_collection_crash_recovery_dropcolumn.py | 182 ++++++++++----- ...est_collection_crash_recovery_insertdoc.py | 109 +++++---- ...est_collection_crash_recovery_updatedoc.py | 117 ++++++---- ...est_collection_crash_recovery_upsertdoc.py | 104 ++++++--- python/tests/detail/test_collection_dml.py | 100 ++------- python/tests/detail/test_collection_open.py | 9 +- 10 files changed, 867 insertions(+), 486 deletions(-) diff --git a/python/tests/detail/test_collection_crash_recovery_addcolumn.py b/python/tests/detail/test_collection_crash_recovery_addcolumn.py index 8ae7b3658..e4231b2bd 100644 --- a/python/tests/detail/test_collection_crash_recovery_addcolumn.py +++ b/python/tests/detail/test_collection_crash_recovery_addcolumn.py @@ -32,7 +32,7 @@ def singledoc_and_check( - collection: Collection, insert_doc, operator="insert", is_delete=1 + collection: Collection, insert_doc, operator="insert", is_delete=1 ): if operator == "insert": result = collection.insert(insert_doc) @@ -48,7 +48,7 @@ def singledoc_and_check( stats = collection.stats assert stats is not None - #assert stats.doc_count == 1 + # assert stats.doc_count == 1 fetched_docs = collection.fetch([insert_doc.id]) assert len(fetched_docs) == 1 @@ -95,7 +95,7 @@ class TestCollectionCrashRecoveryAddColumn: # Script content for subprocess to execute Zvec column addition operations # Write this script content to a temporary file and execute it in the subprocess. - ZVEC_SUBPROCESS_SCRIPT_ADDCOLUMN = ''' + ZVEC_SUBPROCESS_SCRIPT_ADDCOLUMN = """ import zvec import time import json @@ -182,63 +182,89 @@ def run_zvec_addcolumn_operations(args_json_str): if __name__ == "__main__": args_json_str = sys.argv[1] run_zvec_addcolumn_operations(args_json_str) -''' +""" - def test_addcolumn_simulate_crash_during_column_addition_int32(self, full_schema_1024, collection_option): + def test_addcolumn_simulate_crash_during_column_addition_int32( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform INT32 column addition operations. During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "INT32") + self._test_addcolumn_with_crash_recovery( + full_schema_1024, collection_option, "INT32" + ) - def test_addcolumn_simulate_crash_during_column_addition_int64(self, full_schema_1024, collection_option): + def test_addcolumn_simulate_crash_during_column_addition_int64( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform INT64 column addition operations. During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "INT64") + self._test_addcolumn_with_crash_recovery( + full_schema_1024, collection_option, "INT64" + ) - def test_addcolumn_simulate_crash_during_column_addition_uint32(self, full_schema_1024, collection_option): + def test_addcolumn_simulate_crash_during_column_addition_uint32( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform UINT32 column addition operations. During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT32") + self._test_addcolumn_with_crash_recovery( + full_schema_1024, collection_option, "UINT32" + ) - def test_addcolumn_simulate_crash_during_column_addition_uint64(self, full_schema_1024, collection_option): + def test_addcolumn_simulate_crash_during_column_addition_uint64( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform UINT64 column addition operations. During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT64") + self._test_addcolumn_with_crash_recovery( + full_schema_1024, collection_option, "UINT64" + ) - def test_addcolumn_simulate_crash_during_column_addition_float(self, full_schema_1024, collection_option): + def test_addcolumn_simulate_crash_during_column_addition_float( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform FLOAT column addition operations. During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "FLOAT") + self._test_addcolumn_with_crash_recovery( + full_schema_1024, collection_option, "FLOAT" + ) - def test_addcolumn_simulate_crash_during_column_addition_double(self, full_schema_1024, collection_option): + def test_addcolumn_simulate_crash_during_column_addition_double( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform DOUBLE column addition operations. During the column addition operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_addcolumn_with_crash_recovery(full_schema_1024, collection_option, "DOUBLE") + self._test_addcolumn_with_crash_recovery( + full_schema_1024, collection_option, "DOUBLE" + ) - def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_data_type): + def _test_addcolumn_with_crash_recovery( + self, schema, collection_option, column_data_type + ): """ Common method to test column addition with crash recovery for different column types. """ @@ -246,8 +272,12 @@ def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_ collection_path = f"{temp_dir}/test_collection_addcolumn_crash_recovery_{column_data_type.lower()}" # Step 1: Successfully create collection in main process and insert some documents - print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") - coll = zvec.create_and_open(path=collection_path, schema=schema, option=collection_option) + print( + f"[Test] Step 1: Creating collection in main process, path: {collection_path}..." + ) + coll = zvec.create_and_open( + path=collection_path, schema=schema, option=collection_option + ) assert coll is not None print(f"[Test] Step 1.1: Collection created successfully.") exp_doc_dict = {} @@ -256,14 +286,18 @@ def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_ exp_doc_dict[i] = {} doc = generate_doc(i, coll.schema) result = coll.insert([doc]) - assert result is not None and len(result) > 0, f"Failed to insert document {i}" + assert result is not None and len(result) > 0, ( + f"Failed to insert document {i}" + ) exp_doc_dict[i] = doc print(f"[Test] Step 1.2: Inserted 100 documents for column operations.") # Verify collection state before crash initial_doc_count = coll.stats.doc_count - print(f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation.") + print( + f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation." + ) del coll print(f"[Test] Step 1.4: Closed collection.") @@ -271,7 +305,7 @@ def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_ # Step 2: Prepare and run subprocess for column addition operations # Write subprocess script to temporary file subprocess_script_path = f"{temp_dir}/zvec_subprocess_addcolumn.py" - with open(subprocess_script_path, 'w', encoding='utf-8') as f: + with open(subprocess_script_path, "w", encoding="utf-8") as f: f.write(self.ZVEC_SUBPROCESS_SCRIPT_ADDCOLUMN) # Prepare subprocess parameters @@ -280,21 +314,24 @@ def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_ "column_field_name": "test_new_column", # Use appropriate field name for this test "column_data_type": column_data_type, # Type of column to add "add_column_iterations": 20, # Number of column addition iterations to increase interruption chance - "delay_between_additions": 0.3 # Delay between column additions to allow interruption opportunity + "delay_between_additions": 0.3, # Delay between column additions to allow interruption opportunity } args_json_str = json.dumps(subprocess_args) print( - f"[Test] Step 2: Starting {column_data_type} column addition operations in subprocess, path: {collection_path}") + f"[Test] Step 2: Starting {column_data_type} column addition operations in subprocess, path: {collection_path}" + ) # Start subprocess to execute column addition operations - proc = subprocess.Popen([ - sys.executable, subprocess_script_path, args_json_str - ]) + proc = subprocess.Popen( + [sys.executable, subprocess_script_path, args_json_str] + ) # Wait briefly to allow subprocess to begin column addition operations time.sleep(3) # Wait 3 seconds to allow column addition process to start - print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + print( + f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}..." + ) # Suddenly kill subprocess (simulate power failure or crash during column addition operations) if psutil: try: @@ -305,13 +342,19 @@ def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_ child.kill() parent.kill() proc.wait(timeout=5) - except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + except ( + psutil.NoSuchProcess, + psutil.AccessDenied, + subprocess.TimeoutExpired, + ): # If psutil is unavailable or process has been terminated, fall back to original method proc.send_signal(signal.SIGKILL) try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() else: @@ -320,7 +363,9 @@ def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_ try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() print(f"[Test] Subprocess {proc.pid} has been terminated.") @@ -330,10 +375,13 @@ def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_ # Step 3: Verify recovery situation in main process print( - f"[Test] Step 3: Attempting to open collection after simulating crash during column addition operations...") + f"[Test] Step 3: Attempting to open collection after simulating crash during column addition operations..." + ) # Verification 3.1: Check if collection can be successfully opened after crash recovered_collection = zvec.open(collection_path) - assert recovered_collection is not None, "Cannot open collection after crash" + assert recovered_collection is not None, ( + "Cannot open collection after crash" + ) print(f"[Test] Step 3.1: Verified collection can be opened after crash...") # Verification 3.2: Check data integrity (document count and content) @@ -341,48 +389,58 @@ def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_ query_result = recovered_collection.query(topk=1024) # We expect some documents to have been successfully inserted before crash # The exact number depends on when the crash occurred during the bulk insertion process - print( - f"[Test] Step 3.2: Found {len(query_result)} documents after crash") + print(f"[Test] Step 3.2: Found {len(query_result)} documents after crash") current_count = recovered_collection.stats.doc_count assert recovered_collection.stats.doc_count >= 1 assert len(query_result) <= current_count, ( - f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}" + ) # Verify existing documents have correct structure if len(query_result) > 0: for doc in query_result[:1024]: fetched_docs = recovered_collection.fetch([doc.id]) - '''print("doc.id,fetched_docs:\n") - print(doc.id, fetched_docs)''' + """print("doc.id,fetched_docs:\n") + print(doc.id, fetched_docs)""" exp_doc = exp_doc_dict[int(doc.id)] assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema), ( - f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs[doc.id], exp_doc, recovered_collection.schema + ), f"result doc={fetched_docs},doc_exp={exp_doc}" # 3.4: Check if query function works properly print(f"[Test] Step 3.4: Verifying query function after crash...") filtered_query = recovered_collection.query(filter=f"int32_field >=-100") - print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + print( + f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents" + ) assert len(filtered_query) > 0 for doc in query_result: fetched_docs = recovered_collection.fetch([doc.id]) exp_doc = exp_doc_dict[int(doc.id)] assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema), ( - f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs[doc.id], exp_doc, recovered_collection.schema + ), f"result doc={fetched_docs},doc_exp={exp_doc}" # Verification 3.5: Test insertion functionality after recovery print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") - test_insert_doc = generate_doc(9999, schema) # Use original schema from fixture - singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + test_insert_doc = generate_doc( + 9999, schema + ) # Use original schema from fixture + singledoc_and_check( + recovered_collection, test_insert_doc, operator="insert", is_delete=0 + ) # Verification 3.6: Test update functionality after recovery print(f"[Test] Step 3.6: Testing update functionality after recovery...") updated_doc = generate_update_doc(9999, recovered_collection.schema) - singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + singledoc_and_check( + recovered_collection, updated_doc, operator="update", is_delete=0 + ) # 3.7: Test deletion after recovery print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") @@ -418,17 +476,25 @@ def _test_addcolumn_with_crash_recovery(self, schema, collection_option, column_ recovered_collection.add_column( field_schema=FieldSchema("post_crash_column", data_type, nullable=True), expression="", - option=AddColumnOption() + option=AddColumnOption(), + ) + print( + f"[Test] Step 3.8: {column_data_type} Column addition succeeded after crash recovery" ) - print(f"[Test] Step 3.8: {column_data_type} Column addition succeeded after crash recovery") # Only do a simple verification after column addition stats_after_add_column = recovered_collection.stats - print(f"[Test] Step 3.8.1: Stats after column addition - doc_count: {stats_after_add_column.doc_count}") + print( + f"[Test] Step 3.8.1: Stats after column addition - doc_count: {stats_after_add_column.doc_count}" + ) # 3.9: Check if query function works properly after column addition print(f"[Test] Step 3.9: Verifying query function after column addition...") # Use a simpler query that matches the field type - filtered_query = recovered_collection.query(filter=f"int32_field >= 0", topk=10) - print(f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents") - assert len(filtered_query) > 0 \ No newline at end of file + filtered_query = recovered_collection.query( + filter=f"int32_field >= 0", topk=10 + ) + print( + f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents" + ) + assert len(filtered_query) > 0 diff --git a/python/tests/detail/test_collection_crash_recovery_altercolumn.py b/python/tests/detail/test_collection_crash_recovery_altercolumn.py index d6360c51e..0a8a37683 100644 --- a/python/tests/detail/test_collection_crash_recovery_altercolumn.py +++ b/python/tests/detail/test_collection_crash_recovery_altercolumn.py @@ -32,7 +32,7 @@ def singledoc_and_check( - collection: Collection, insert_doc, operator="insert", is_delete=1 + collection: Collection, insert_doc, operator="insert", is_delete=1 ): if operator == "insert": result = collection.insert(insert_doc) @@ -95,7 +95,7 @@ class TestCollectionCrashRecoveryaltercolumn: # Script content for subprocess to execute Zvec column update operations # Write this script content to a temporary file and execute it in the subprocess. - ZVEC_SUBPROCESS_SCRIPT_altercolumn = ''' + ZVEC_SUBPROCESS_SCRIPT_altercolumn = """ import zvec import time import json @@ -181,63 +181,89 @@ def run_zvec_altercolumn_operations(args_json_str): if __name__ == "__main__": args_json_str = sys.argv[1] run_zvec_altercolumn_operations(args_json_str) -''' +""" - def test_altercolumn_simulate_crash_during_column_update_int32(self, full_schema_1024, collection_option): + def test_altercolumn_simulate_crash_during_column_update_int32( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform INT32 column update operations. During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "INT32", "int32_field1") + self._test_altercolumn_with_crash_recovery( + full_schema_1024, collection_option, "INT32", "int32_field1" + ) - def test_altercolumn_simulate_crash_during_column_update_int64(self, full_schema_1024, collection_option): + def test_altercolumn_simulate_crash_during_column_update_int64( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform INT64 column update operations. During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "INT64", "int64_field1") + self._test_altercolumn_with_crash_recovery( + full_schema_1024, collection_option, "INT64", "int64_field1" + ) - def test_altercolumn_simulate_crash_during_column_update_uint32(self, full_schema_1024, collection_option): + def test_altercolumn_simulate_crash_during_column_update_uint32( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform UINT32 column update operations. During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT32", "uint32_field1") + self._test_altercolumn_with_crash_recovery( + full_schema_1024, collection_option, "UINT32", "uint32_field1" + ) - def test_altercolumn_simulate_crash_during_column_update_uint64(self, full_schema_1024, collection_option): + def test_altercolumn_simulate_crash_during_column_update_uint64( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform UINT64 column update operations. During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT64", "uint64_field1") + self._test_altercolumn_with_crash_recovery( + full_schema_1024, collection_option, "UINT64", "uint64_field1" + ) - def test_altercolumn_simulate_crash_during_column_update_float(self, full_schema_1024, collection_option): + def test_altercolumn_simulate_crash_during_column_update_float( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform FLOAT column update operations. During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "FLOAT", "float_field1") + self._test_altercolumn_with_crash_recovery( + full_schema_1024, collection_option, "FLOAT", "float_field1" + ) - def test_altercolumn_simulate_crash_during_column_update_double(self, full_schema_1024, collection_option): + def test_altercolumn_simulate_crash_during_column_update_double( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform DOUBLE column update operations. During the column update operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_altercolumn_with_crash_recovery(full_schema_1024, collection_option, "DOUBLE", "double_field1") + self._test_altercolumn_with_crash_recovery( + full_schema_1024, collection_option, "DOUBLE", "double_field1" + ) - def _test_altercolumn_with_crash_recovery(self, schema, collection_option, update_data_type, update_field_name): + def _test_altercolumn_with_crash_recovery( + self, schema, collection_option, update_data_type, update_field_name + ): """ Common method to test column update with crash recovery for different column types. """ @@ -245,13 +271,18 @@ def _test_altercolumn_with_crash_recovery(self, schema, collection_option, updat collection_path = f"{temp_dir}/test_collection_altercolumn_crash_recovery_{update_data_type.lower()}" # Step 1: Successfully create collection in main process and insert some documents - print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") - coll = zvec.create_and_open(path=collection_path, schema=schema, option=collection_option) + print( + f"[Test] Step 1: Creating collection in main process, path: {collection_path}..." + ) + coll = zvec.create_and_open( + path=collection_path, schema=schema, option=collection_option + ) assert coll is not None print(f"[Test] Step 1.1: Collection created successfully.") - + # First, add the column we'll be updating later, so alter_column can modify it from zvec import FieldSchema, DataType, AddColumnOption + if update_data_type == "INT32": data_type = DataType.INT32 elif update_data_type == "INT64": @@ -266,30 +297,36 @@ def _test_altercolumn_with_crash_recovery(self, schema, collection_option, updat data_type = DataType.DOUBLE else: data_type = DataType.INT32 # Default fallback (supported type) - + # Add the column with initial schema initial_field = FieldSchema(update_field_name, data_type, nullable=True) coll.add_column( field_schema=initial_field, expression="", # Empty expression means fill with default/null values - option=AddColumnOption() + option=AddColumnOption(), + ) + print( + f"[Test] Step 1.1.1: Added column '{update_field_name}' to collection." ) - print(f"[Test] Step 1.1.1: Added column '{update_field_name}' to collection.") - + exp_doc_dict = {} # Insert some documents to have data for column operations for i in range(50): # Reduced for faster testing exp_doc_dict[i] = {} doc = generate_doc(i, coll.schema) result = coll.insert([doc]) - assert result is not None and len(result) > 0, f"Failed to insert document {i}" + assert result is not None and len(result) > 0, ( + f"Failed to insert document {i}" + ) exp_doc_dict[i] = doc print(f"[Test] Step 1.2: Inserted 50 documents for column operations.") # Verify collection state before crash initial_doc_count = coll.stats.doc_count - print(f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation.") + print( + f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation." + ) del coll print(f"[Test] Step 1.4: Closed collection.") @@ -297,7 +334,7 @@ def _test_altercolumn_with_crash_recovery(self, schema, collection_option, updat # Step 2: Prepare and run subprocess for column update operations # Write subprocess script to temporary file subprocess_script_path = f"{temp_dir}/zvec_subprocess_altercolumn.py" - with open(subprocess_script_path, 'w', encoding='utf-8') as f: + with open(subprocess_script_path, "w", encoding="utf-8") as f: f.write(self.ZVEC_SUBPROCESS_SCRIPT_altercolumn) # Prepare subprocess parameters @@ -306,21 +343,24 @@ def _test_altercolumn_with_crash_recovery(self, schema, collection_option, updat "update_field_name": update_field_name, # Use appropriate field name for this test "update_data_type": update_data_type, # Type of field to update "update_iterations": 20, # Number of update iterations to increase interruption chance - "delay_between_updates": 0.3 # Delay between updates to allow interruption opportunity + "delay_between_updates": 0.3, # Delay between updates to allow interruption opportunity } args_json_str = json.dumps(subprocess_args) print( - f"[Test] Step 2: Starting {update_data_type} column update operations in subprocess, path: {collection_path}") + f"[Test] Step 2: Starting {update_data_type} column update operations in subprocess, path: {collection_path}" + ) # Start subprocess to execute column update operations - proc = subprocess.Popen([ - sys.executable, subprocess_script_path, args_json_str - ]) + proc = subprocess.Popen( + [sys.executable, subprocess_script_path, args_json_str] + ) # Wait briefly to allow subprocess to begin column update operations time.sleep(3) # Wait 3 seconds to allow column update process to start - print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + print( + f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}..." + ) # Suddenly kill subprocess (simulate power failure or crash during column update operations) if psutil: try: @@ -331,13 +371,19 @@ def _test_altercolumn_with_crash_recovery(self, schema, collection_option, updat child.kill() parent.kill() proc.wait(timeout=5) - except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + except ( + psutil.NoSuchProcess, + psutil.AccessDenied, + subprocess.TimeoutExpired, + ): # If psutil is unavailable or process has been terminated, fall back to original method proc.send_signal(signal.SIGKILL) try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() else: @@ -346,7 +392,9 @@ def _test_altercolumn_with_crash_recovery(self, schema, collection_option, updat try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() print(f"[Test] Subprocess {proc.pid} has been terminated.") @@ -356,10 +404,13 @@ def _test_altercolumn_with_crash_recovery(self, schema, collection_option, updat # Step 3: Verify recovery situation in main process print( - f"[Test] Step 3: Attempting to open collection after simulating crash during column update operations...") + f"[Test] Step 3: Attempting to open collection after simulating crash during column update operations..." + ) # Verification 3.1: Check if collection can be successfully opened after crash recovered_collection = zvec.open(collection_path) - assert recovered_collection is not None, "Cannot open collection after crash" + assert recovered_collection is not None, ( + "Cannot open collection after crash" + ) print(f"[Test] Step 3.1: Verified collection can be opened after crash...") # Verification 3.2: Check data integrity (document count and content) @@ -367,52 +418,68 @@ def _test_altercolumn_with_crash_recovery(self, schema, collection_option, updat query_result = recovered_collection.query(topk=1024) # We expect some documents to have been successfully inserted before crash # The exact number depends on when the crash occurred during the bulk insertion process - print( - f"[Test] Step 3.2: Found {len(query_result)} documents after crash") + print(f"[Test] Step 3.2: Found {len(query_result)} documents after crash") current_count = recovered_collection.stats.doc_count assert recovered_collection.stats.doc_count >= 1 assert len(query_result) <= current_count, ( - f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}" + ) # Verify existing documents have correct structure if len(query_result) > 0: for doc in query_result[:50]: # Limit to first 50 for efficiency fetched_docs = recovered_collection.fetch([doc.id]) - '''print("doc.id,fetched_docs:\n") - print(doc.id, fetched_docs)''' + """print("doc.id,fetched_docs:\n") + print(doc.id, fetched_docs)""" exp_doc = exp_doc_dict[int(doc.id)] assert len(fetched_docs) == 1 assert doc.id in fetched_docs # Note: The doc content may have been partially updated before the crash # So we only verify the schema structure and basic fields - assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema, - True, True), ( - f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs[doc.id], + exp_doc, + recovered_collection.schema, + True, + True, + ), f"result doc={fetched_docs},doc_exp={exp_doc}" # 3.4: Check if query function works properly print(f"[Test] Step 3.4: Verifying query function after crash...") filtered_query = recovered_collection.query(filter=f"int32_field >=-100") - print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + print( + f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents" + ) assert len(filtered_query) > 0 for doc in query_result[:10]: # Check first 10 docs fetched_docs = recovered_collection.fetch([doc.id]) exp_doc = exp_doc_dict[int(doc.id)] assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema, - True, True), ( - f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs[doc.id], + exp_doc, + recovered_collection.schema, + True, + True, + ), f"result doc={fetched_docs},doc_exp={exp_doc}" # Verification 3.5: Test insertion functionality after recovery print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") - test_insert_doc = generate_doc(9999, schema) # Use original schema from fixture - singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + test_insert_doc = generate_doc( + 9999, schema + ) # Use original schema from fixture + singledoc_and_check( + recovered_collection, test_insert_doc, operator="insert", is_delete=0 + ) # Verification 3.6: Test update functionality after recovery print(f"[Test] Step 3.6: Testing update functionality after recovery...") updated_doc = generate_update_doc(9999, recovered_collection.schema) - singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + singledoc_and_check( + recovered_collection, updated_doc, operator="update", is_delete=0 + ) # 3.7: Test deletion after recovery print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") @@ -452,20 +519,30 @@ def _test_altercolumn_with_crash_recovery(self, schema, collection_option, updat recovered_collection.alter_column( old_name=update_field_name, field_schema=new_field, - option=AlterColumnOption() + option=AlterColumnOption(), + ) + print( + f"[Test] Step 3.8: {update_data_type} Column update succeeded after crash recovery" ) - print(f"[Test] Step 3.8: {update_data_type} Column update succeeded after crash recovery") except Exception as e: - print(f"[Test] Step 3.8: {update_data_type} Column update failed after crash recovery: {str(e)}") + print( + f"[Test] Step 3.8: {update_data_type} Column update failed after crash recovery: {str(e)}" + ) # This might happen if the column was already altered during the interrupted operation # Only do a simple verification after column update stats_after_update_column = recovered_collection.stats - print(f"[Test] Step 3.8.1: Stats after column update - doc_count: {stats_after_update_column.doc_count}") + print( + f"[Test] Step 3.8.1: Stats after column update - doc_count: {stats_after_update_column.doc_count}" + ) # 3.9: Check if query function works properly after column update print(f"[Test] Step 3.9: Verifying query function after column update...") # Use a simpler query that matches the field type - filtered_query = recovered_collection.query(filter=f"{update_field_name} >= 0", topk=10) - print(f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents") + filtered_query = recovered_collection.query( + filter=f"{update_field_name} >= 0", topk=10 + ) + print( + f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents" + ) # Note: After column operations, query results may vary diff --git a/python/tests/detail/test_collection_crash_recovery_createindex.py b/python/tests/detail/test_collection_crash_recovery_createindex.py index b480f40a2..cdf462c88 100644 --- a/python/tests/detail/test_collection_crash_recovery_createindex.py +++ b/python/tests/detail/test_collection_crash_recovery_createindex.py @@ -31,10 +31,8 @@ from doc_helper import * - - def singledoc_and_check( - collection: Collection, insert_doc, operator="insert", is_delete=1 + collection: Collection, insert_doc, operator="insert", is_delete=1 ): if operator == "insert": result = collection.insert(insert_doc) @@ -50,7 +48,7 @@ def singledoc_and_check( stats = collection.stats assert stats is not None - #assert stats.doc_count == 1 + # assert stats.doc_count == 1 fetched_docs = collection.fetch([insert_doc.id]) assert len(fetched_docs) == 1 @@ -88,7 +86,7 @@ def singledoc_and_check( assert collection.stats.doc_count == 0, "Document should be deleted" -#@pytest.mark.skip("Known issue") +# @pytest.mark.skip("Known issue") class TestCollectionCrashRecoveryCreateIndex: """ Test Zvec collection recovery capability after simulating power failure/process crash during index creation. @@ -98,7 +96,7 @@ class TestCollectionCrashRecoveryCreateIndex: # Script content for subprocess to execute Zvec index creation operations # Write this script content to a temporary file and execute it in the subprocess. - ZVEC_SUBPROCESS_SCRIPT_CREATEINDEX = ''' + ZVEC_SUBPROCESS_SCRIPT_CREATEINDEX = """ import zvec import time import json @@ -188,45 +186,63 @@ def run_zvec_createindex_operations(args_json_str): if __name__ == "__main__": args_json_str = sys.argv[1] run_zvec_createindex_operations(args_json_str) -''' +""" - def test_createindex_simulate_crash_during_index_creation_invert(self, full_schema_1024, collection_option): + def test_createindex_simulate_crash_during_index_creation_invert( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform INVERT index creation operations. During the index creation operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_createindex_with_crash_recovery(full_schema_1024, collection_option, "INVERT") + self._test_createindex_with_crash_recovery( + full_schema_1024, collection_option, "INVERT" + ) - def test_createindex_simulate_crash_during_index_creation_hnsw(self, full_schema_1024, collection_option): + def test_createindex_simulate_crash_during_index_creation_hnsw( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform HNSW index creation operations. During the index creation operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_createindex_with_crash_recovery(full_schema_1024, collection_option, "HNSW") + self._test_createindex_with_crash_recovery( + full_schema_1024, collection_option, "HNSW" + ) - def test_createindex_simulate_crash_during_index_creation_flat(self, full_schema_1024, collection_option): + def test_createindex_simulate_crash_during_index_creation_flat( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform FLAT index creation operations. During the index creation operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_createindex_with_crash_recovery(full_schema_1024, collection_option, "FLAT") + self._test_createindex_with_crash_recovery( + full_schema_1024, collection_option, "FLAT" + ) - def test_createindex_simulate_crash_during_index_creation_ivf(self, full_schema_1024, collection_option): + def test_createindex_simulate_crash_during_index_creation_ivf( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform IVF index creation operations. During the index creation operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_createindex_with_crash_recovery(full_schema_1024, collection_option, "IVF") + self._test_createindex_with_crash_recovery( + full_schema_1024, collection_option, "IVF" + ) - def _test_createindex_with_crash_recovery(self, schema, collection_option, index_type): + def _test_createindex_with_crash_recovery( + self, schema, collection_option, index_type + ): """ Common method to test index creation with crash recovery for different index types. """ @@ -234,8 +250,12 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index collection_path = f"{temp_dir}/test_collection_createindex_crash_recovery_{index_type.lower()}" # Step 1: Successfully create collection in main process and insert some documents - print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") - coll = zvec.create_and_open(path=collection_path, schema=schema, option=collection_option) + print( + f"[Test] Step 1: Creating collection in main process, path: {collection_path}..." + ) + coll = zvec.create_and_open( + path=collection_path, schema=schema, option=collection_option + ) assert coll is not None print(f"[Test] Step 1.1: Collection created successfully.") @@ -243,13 +263,17 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index for i in range(100): doc = generate_doc(i, coll.schema) result = coll.insert([doc]) - assert result is not None and len(result) > 0, f"Failed to insert document {i}" + assert result is not None and len(result) > 0, ( + f"Failed to insert document {i}" + ) print(f"[Test] Step 1.2: Inserted 100 documents for indexing.") # Verify collection state before crash initial_doc_count = coll.stats.doc_count - print(f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation.") + print( + f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation." + ) del coll print(f"[Test] Step 1.4: Closed collection.") @@ -257,7 +281,7 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index # Step 2: Prepare and run subprocess for index creation operations # Write subprocess script to temporary file subprocess_script_path = f"{temp_dir}/zvec_subprocess_createindex.py" - with open(subprocess_script_path, 'w', encoding='utf-8') as f: + with open(subprocess_script_path, "w", encoding="utf-8") as f: f.write(self.ZVEC_SUBPROCESS_SCRIPT_CREATEINDEX) # Determine the appropriate field for each index type @@ -265,13 +289,22 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index field_for_index = "int32_field" # Scalar fields support INVERT index elif index_type == "HNSW": from zvec import DataType - field_for_index = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for HNSW + + field_for_index = DEFAULT_VECTOR_FIELD_NAME[ + DataType.VECTOR_FP32 + ] # Use vector field for HNSW elif index_type == "FLAT": from zvec import DataType - field_for_index = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for FLAT + + field_for_index = DEFAULT_VECTOR_FIELD_NAME[ + DataType.VECTOR_FP32 + ] # Use vector field for FLAT elif index_type == "IVF": from zvec import DataType - field_for_index = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for IVF + + field_for_index = DEFAULT_VECTOR_FIELD_NAME[ + DataType.VECTOR_FP32 + ] # Use vector field for IVF else: print("index_type is error!") @@ -281,21 +314,24 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index "index_field": field_for_index, # Use appropriate field for this index type "index_type": index_type, # Type of index to create "index_creation_iterations": 20, # Number of index creation iterations to increase interruption chance - "delay_between_creations": 0.3 # Delay between index creations to allow interruption opportunity + "delay_between_creations": 0.3, # Delay between index creations to allow interruption opportunity } args_json_str = json.dumps(subprocess_args) print( - f"[Test] Step 2: Starting {index_type} index creation operations in subprocess, path: {collection_path}") + f"[Test] Step 2: Starting {index_type} index creation operations in subprocess, path: {collection_path}" + ) # Start subprocess to execute index creation operations - proc = subprocess.Popen([ - sys.executable, subprocess_script_path, args_json_str - ]) + proc = subprocess.Popen( + [sys.executable, subprocess_script_path, args_json_str] + ) # Wait briefly to allow subprocess to begin index creation operations time.sleep(3) # Wait 3 seconds to allow indexing process to start - print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + print( + f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}..." + ) # Suddenly kill subprocess (simulate power failure or crash during index creation operations) if psutil: try: @@ -306,13 +342,19 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index child.kill() parent.kill() proc.wait(timeout=5) - except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + except ( + psutil.NoSuchProcess, + psutil.AccessDenied, + subprocess.TimeoutExpired, + ): # If psutil is unavailable or process has been terminated, fall back to original method proc.send_signal(signal.SIGKILL) try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() else: @@ -321,7 +363,9 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() print(f"[Test] Subprocess {proc.pid} has been terminated.") @@ -331,10 +375,13 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index # Step 3: Verify recovery situation in main process print( - f"[Test] Step 3: Attempting to open collection after simulating crash during document insertion operations...") + f"[Test] Step 3: Attempting to open collection after simulating crash during document insertion operations..." + ) # Verification 3.1: Check if collection can be successfully opened after crash recovered_collection = zvec.open(collection_path) - assert recovered_collection is not None, "Cannot open collection after crash" + assert recovered_collection is not None, ( + "Cannot open collection after crash" + ) print(f"[Test] Step 3.1: Verified collection can be opened after crash...") # Verification 3.2: Check data integrity (document count and content) @@ -342,50 +389,60 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index query_result = recovered_collection.query(topk=1024) # We expect some documents to have been successfully inserted before crash # The exact number depends on when the crash occurred during the bulk insertion process - print( - f"[Test] Step 3.2: Found {len(query_result)} documents after crash") + print(f"[Test] Step 3.2: Found {len(query_result)} documents after crash") current_count = recovered_collection.stats.doc_count assert recovered_collection.stats.doc_count >= 1 assert len(query_result) <= current_count, ( - f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}" + ) # Verify existing documents have correct structure if len(query_result) > 0: for doc in query_result[:1024]: fetched_docs = recovered_collection.fetch([doc.id]) - #print("doc.id,fetched_docs:\n") - #print(doc.id, fetched_docs) + # print("doc.id,fetched_docs:\n") + # print(doc.id, fetched_docs) exp_doc = generate_doc(int(doc.id), recovered_collection.schema) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema), ( - f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs[doc.id], exp_doc, recovered_collection.schema + ), f"result doc={fetched_docs},doc_exp={exp_doc}" # 3.4: Check if index is complete and query function works properly print(f"[Test] Step 3.4: Verifying index integrity and query function...") filtered_query = recovered_collection.query(filter=f"int32_field >=-100") - print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + print( + f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents" + ) assert len(filtered_query) > 0 for doc in query_result: fetched_docs = recovered_collection.fetch([doc.id]) - #print("doc.id,fetched_docs:\n") - #print(doc.id, fetched_docs) + # print("doc.id,fetched_docs:\n") + # print(doc.id, fetched_docs) exp_doc = generate_doc(int(doc.id), recovered_collection.schema) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema), ( - f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs[doc.id], exp_doc, recovered_collection.schema + ), f"result doc={fetched_docs},doc_exp={exp_doc}" # Verification 3.5: Test insertion functionality after recovery print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") - test_insert_doc = generate_doc(9999, schema) # Use original schema from fixture - singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + test_insert_doc = generate_doc( + 9999, schema + ) # Use original schema from fixture + singledoc_and_check( + recovered_collection, test_insert_doc, operator="insert", is_delete=0 + ) # Verification 3.6: Test update functionality after recovery print(f"[Test] Step 3.6: Testing update functionality after recovery...") updated_doc = generate_update_doc(9999, recovered_collection.schema) - singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + singledoc_and_check( + recovered_collection, updated_doc, operator="update", is_delete=0 + ) # 3.7: Test deletion after recovery print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") @@ -401,18 +458,23 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index # Now try to create an index after the crash recovery if index_type == "INVERT": from zvec import InvertIndexParam, IndexOption + index_param = InvertIndexParam() elif index_type == "HNSW": from zvec import HnswIndexParam, IndexOption + index_param = HnswIndexParam() elif index_type == "FLAT": from zvec import FlatIndexParam, IndexOption + index_param = FlatIndexParam() elif index_type == "IVF": from zvec import IVFIndexParam, IndexOption + index_param = IVFIndexParam() else: from zvec import InvertIndexParam, IndexOption + index_param = InvertIndexParam() # Determine the appropriate field for each index type @@ -420,45 +482,65 @@ def _test_createindex_with_crash_recovery(self, schema, collection_option, index field_to_recreate = "int32_field" # Scalar fields support INVERT index elif index_type == "HNSW": from zvec import DataType - field_to_recreate = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for HNSW + + field_to_recreate = DEFAULT_VECTOR_FIELD_NAME[ + DataType.VECTOR_FP32 + ] # Use vector field for HNSW elif index_type == "FLAT": from zvec import DataType - field_to_recreate = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for FLAT + + field_to_recreate = DEFAULT_VECTOR_FIELD_NAME[ + DataType.VECTOR_FP32 + ] # Use vector field for FLAT elif index_type == "IVF": from zvec import DataType - field_to_recreate = DEFAULT_VECTOR_FIELD_NAME[DataType.VECTOR_FP32] # Use vector field for IVF + + field_to_recreate = DEFAULT_VECTOR_FIELD_NAME[ + DataType.VECTOR_FP32 + ] # Use vector field for IVF else: field_to_recreate = "int32_field" # Default to scalar field # This should succeed if the collection is properly recovered recovered_collection.create_index( - field_name=field_to_recreate, - index_param=index_param, - option=IndexOption() + field_name=field_to_recreate, + index_param=index_param, + option=IndexOption(), + ) + print( + f"[Test] Step 3.8: {index_type} Index creation succeeded after crash recovery on field {field_to_recreate}" ) - print(f"[Test] Step 3.8: {index_type} Index creation succeeded after crash recovery on field {field_to_recreate}") # Only do a simple verification after index creation stats_after_index = recovered_collection.stats - print(f"[Test] Step 3.8.1: Stats after index creation - doc_count: {stats_after_index.doc_count}") + print( + f"[Test] Step 3.8.1: Stats after index creation - doc_count: {stats_after_index.doc_count}" + ) # 3.9: Check if index is complete and query function works properly print(f"[Test] Step 3.9: Verifying index integrity and query function...") # Use a simpler query that matches the field type if index_type == "INVERT": # Query on scalar field - filtered_query = recovered_collection.query(filter=f"int32_field >= 0", topk=10) - print(f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents") + filtered_query = recovered_collection.query( + filter=f"int32_field >= 0", topk=10 + ) + print( + f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents" + ) assert len(filtered_query) > 0 elif index_type in ["HNSW", "FLAT", "IVF"]: # Query on vector field using vector search import random - test_vector = [random.random() for _ in range(VECTOR_DIMENSION_1024)] # Assuming 1024-dim vector + + test_vector = [ + random.random() for _ in range(VECTOR_DIMENSION_1024) + ] # Assuming 1024-dim vector vector_query_result = recovered_collection.query( VectorQuery(field_name=field_to_recreate, vector=test_vector), - topk=5 + topk=5, + ) + print( + f"[Test] Step 3.9.1: Vector query returned {len(vector_query_result)} documents" ) - print(f"[Test] Step 3.9.1: Vector query returned {len(vector_query_result)} documents") assert len(vector_query_result) > 0 - - diff --git a/python/tests/detail/test_collection_crash_recovery_deletedoc.py b/python/tests/detail/test_collection_crash_recovery_deletedoc.py index 644749479..b69023a51 100644 --- a/python/tests/detail/test_collection_crash_recovery_deletedoc.py +++ b/python/tests/detail/test_collection_crash_recovery_deletedoc.py @@ -27,7 +27,7 @@ except ImportError: psutil = None # If psutil is not installed, set it to None from fixture_helper import * -from doc_helper import generate_doc +from doc_helper import generate_doc, generate_update_doc from distance_helper import * from fixture_helper import * @@ -35,7 +35,7 @@ def singledoc_and_check( - collection: Collection, insert_doc, operator="insert", is_delete=1 + collection: Collection, insert_doc, operator="insert", is_delete=1 ): if operator == "insert": result = collection.insert(insert_doc) @@ -51,7 +51,7 @@ def singledoc_and_check( stats = collection.stats assert stats is not None - #assert stats.doc_count == 1 + # assert stats.doc_count == 1 fetched_docs = collection.fetch([insert_doc.id]) assert len(fetched_docs) == 1 @@ -98,7 +98,7 @@ class TestCollectionCrashRecoveryDeleteDoc: # Script content for subprocess to execute Zvec document deletion operations # Write this script content to a temporary file and execute it in the subprocess. - ZVEC_SUBPROCESS_SCRIPT_DELETEDOC = ''' + ZVEC_SUBPROCESS_SCRIPT_DELETEDOC = """ import zvec import time import json @@ -205,12 +205,12 @@ def generate_doc(i: int, schema: zvec.CollectionSchema) -> zvec.Doc: def run_zvec_deletedoc_operations(args_json_str): args = json.loads(args_json_str) collection_path = args["collection_path"] - num_docs_to_delete = args.get("num_docs_to_delete", 100) # Number of documents to insert + num_docs_to_delete = args.get("num_docs_to_delete", 100) # Number of documents to delete batch_size = args.get("batch_size", 10) # Batch size for each deletion delay_between_batches = args.get("delay_between_batches", 0.1) # Delay between batches - print(f"[Subprocess] Starting Zvec insert document operations on {collection_path} at: {time.strftime('%Y-%m-%d %H:%M:%S')}") - print(f"[Subprocess] Will insert {num_docs_to_delete} documents in batches of {batch_size}") + print(f"[Subprocess] Starting Zvec delete document operations on {collection_path} at: {time.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"[Subprocess] Will delete {num_docs_to_delete} documents in batches of {batch_size}") try: # Open existing collection @@ -222,28 +222,23 @@ def run_zvec_deletedoc_operations(args_json_str): # Calculate the number of documents in the current batch current_batch_size = min(batch_size, num_docs_to_delete - i) - if current_batch_size==batch_size: + # Generate document IDs for the current batch - fix the ID range issue + doc_ids = [str(j) for j in range(i, i + current_batch_size)] - doc_ids= [str(j) for j in range(i, i+batch_size)] - else: - doc_ids= [str(j) for j in range(i, i + current_batch_size)] - result = collection.delete(doc_ids) - - # Check return value - insert returns a list of document IDs + + # Check return value - delete returns a list of operation results assert len(result) == len(doc_ids) - for i in range(len(result)): - if i < len(doc_ids): - assert result[i].ok() + for idx in range(len(result)): + if idx < len(doc_ids): + assert result[idx].ok() deleted_count += len(doc_ids) print(f"[Subprocess] Batch deletion successful, deleted {len(doc_ids)} documents, total deleted: {deleted_count}") - - # Add small delay to allow interruption opportunity time.sleep(delay_between_batches) - print(f"[Subprocess] Completed inserting {deleted_count} documents.") + print(f"[Subprocess] Completed deleting {deleted_count} documents.") if hasattr(collection, "close"): collection.close() @@ -264,9 +259,11 @@ def run_zvec_deletedoc_operations(args_json_str): if __name__ == "__main__": args_json_str = sys.argv[1] run_zvec_deletedoc_operations(args_json_str) -''' +""" - def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, collection_option, basic_schema): + def test_insertdoc_simulate_crash_during_bulk_insert( + self, full_schema_1024, collection_option, basic_schema + ): """ Scenario: First successfully create a Zvec collection in the main process. Then start a subprocess to open the collection and perform bulk document deletion operations. @@ -277,8 +274,12 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col collection_path = f"{temp_dir}/test_collection_deletedoc_crash_recovery" # Step 1: Successfully create collection in main process - print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") - coll = zvec.create_and_open(path=collection_path, schema=full_schema_1024, option=collection_option) + print( + f"[Test] Step 1: Creating collection in main process, path: {collection_path}..." + ) + coll = zvec.create_and_open( + path=collection_path, schema=full_schema_1024, option=collection_option + ) assert coll is not None print(f"[Test] Step 1.1: Collection created successfully.") single_doc = generate_doc(2001, coll.schema) @@ -292,7 +293,9 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col initial_docs.append(doc) insert_results = coll.insert(initial_docs) - print(f"[Test] Step 1.3: deleted {len(initial_docs)} initial documents for updating.") + print( + f"[Test] Step 1.3: deleted {len(initial_docs)} initial documents for updating." + ) del coll print(f"[Test] Step 1.3: Closed collection.") @@ -300,7 +303,7 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col # Step 2: Prepare and run subprocess for bulk deletion operations # Write subprocess script to temporary file subprocess_script_path = f"{temp_dir}/zvec_subprocess_deletedoc.py" - with open(subprocess_script_path, 'w', encoding='utf-8') as f: + with open(subprocess_script_path, "w", encoding="utf-8") as f: f.write(self.ZVEC_SUBPROCESS_SCRIPT_DELETEDOC) # Prepare subprocess parameters @@ -308,20 +311,24 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col "collection_path": collection_path, "num_docs_to_delete": 200, # Insert 200 documents to allow for interruption "batch_size": 10, # Insert 10 documents per batch - "delay_between_batches": 0.2 # 0.2 second delay between batches to increase interruption timing + "delay_between_batches": 0.2, # 0.2 second delay between batches to increase interruption timing } args_json_str = json.dumps(subprocess_args) - print(f"[Test] Step 2: Starting bulk deletion operations in subprocess, path: {collection_path}") + print( + f"[Test] Step 2: Starting bulk deletion operations in subprocess, path: {collection_path}" + ) # Start subprocess to execute bulk deletion operations - proc = subprocess.Popen([ - sys.executable, subprocess_script_path, args_json_str - ]) + proc = subprocess.Popen( + [sys.executable, subprocess_script_path, args_json_str] + ) # Wait briefly to allow subprocess to begin deletion operations time.sleep(2) # Wait 2 seconds to allow deletion loop to start - print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + print( + f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}..." + ) # Suddenly kill subprocess (simulate power failure or crash during deletion operations) if psutil: try: @@ -332,13 +339,19 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col child.kill() parent.kill() proc.wait(timeout=5) - except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + except ( + psutil.NoSuchProcess, + psutil.AccessDenied, + subprocess.TimeoutExpired, + ): # If psutil is unavailable or process has been terminated, fall back to original method proc.send_signal(signal.SIGKILL) try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() else: @@ -347,7 +360,9 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() print(f"[Test] Subprocess {proc.pid} has been terminated.") @@ -356,10 +371,14 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col os.remove(subprocess_script_path) # Step 3: Verify recovery situation in main process - print(f"[Test] Step 3: Attempting to open collection after simulating crash during document deletion operations...") + print( + f"[Test] Step 3: Attempting to open collection after simulating crash during document deletion operations..." + ) # Verification 3.1: Check if collection can be successfully opened after crash recovered_collection = zvec.open(collection_path) - assert recovered_collection is not None, "Cannot open collection after crash" + assert recovered_collection is not None, ( + "Cannot open collection after crash" + ) print(f"[Test] Step 3.1: Verified collection can be opened after crash...") # Verification 3.2: Check data integrity (document count and content) @@ -368,18 +387,19 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col # We expect some documents to have been successfully deleted before crash # The exact number depends on when the crash occurred during the bulk deletion process print( - f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_delete']})") - + f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_delete']})" + ) current_count = recovered_collection.stats.doc_count assert recovered_collection.stats.doc_count >= 1 - assert len(query_result)<=recovered_collection.stats.doc_count,(f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + assert len(query_result) <= recovered_collection.stats.doc_count, ( + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}" + ) # Verify existing documents have correct structure if len(query_result) > 0: - for doc in query_result[:1024]: - if doc.id=="2001": + if doc.id == "2001": print("Found 2001 data!") fetched_docs = recovered_collection.fetch([doc.id]) print("doc.id:\n") @@ -388,24 +408,32 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col print(fetched_docs) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs["2001"],single_doc, recovered_collection.schema),(f"result doc={fetched_docs},doc_exp={single_doc}") + assert is_doc_equal( + fetched_docs["2001"], + single_doc, + recovered_collection.schema, + ), f"result doc={fetched_docs},doc_exp={single_doc}" break else: fetched_docs = recovered_collection.fetch([doc.id]) print("doc.id,fetched_docs:\n") - print(doc.id,fetched_docs) - exp_doc = generate_doc(int(doc.id), recovered_collection.schema) + print(doc.id, fetched_docs) + exp_doc = generate_doc(int(doc.id), recovered_collection.schema) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs["1"], exp_doc, recovered_collection.schema), (f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs[doc.id], exp_doc, recovered_collection.schema + ), f"result doc={fetched_docs},doc_exp={exp_doc}" - #3.4: Check if index is complete and query function works properly + # 3.4: Check if index is complete and query function works properly print(f"[Test] Step 3.4: Verifying index integrity and query function...") filtered_query = recovered_collection.query(filter=f"int32_field >=-100") - print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + print( + f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents" + ) assert len(filtered_query) > 0 for doc in query_result: - if doc.id=="2001": + if doc.id == "2001": print("Found 2001 data!") fetched_docs = recovered_collection.fetch([doc.id]) print("doc.id:\n") @@ -414,32 +442,41 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col print(fetched_docs) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs["2001"],single_doc, recovered_collection.schema),(f"result doc={fetched_docs},doc_exp={single_doc}") + assert is_doc_equal( + fetched_docs["2001"], single_doc, recovered_collection.schema + ), f"result doc={fetched_docs},doc_exp={single_doc}" break else: fetched_docs = recovered_collection.fetch([doc.id]) print("doc.id,fetched_docs:\n") - print(doc.id,fetched_docs) - exp_doc = generate_doc(int(doc.id), recovered_collection.schema) + print(doc.id, fetched_docs) + exp_doc = generate_doc(int(doc.id), recovered_collection.schema) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema), (f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs[doc.id], exp_doc, recovered_collection.schema + ), f"result doc={fetched_docs},doc_exp={exp_doc}" - # Verification 3.5: Test insertion functionality after recovery + # Verification 3.5: Test insertion functionality after recovery print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") - test_insert_doc = generate_doc(9999, full_schema_1024) # Use original schema from fixture - singledoc_and_check(recovered_collection, test_insert_doc, operator="insert",is_delete=0) + test_insert_doc = generate_doc( + 9999, full_schema_1024 + ) # Use original schema from fixture + singledoc_and_check( + recovered_collection, test_insert_doc, operator="insert", is_delete=0 + ) # Verification 3.6: Test update functionality after recovery print(f"[Test] Step 3.6: Testing update functionality after recovery...") updated_doc = generate_update_doc(2001, recovered_collection.schema) - singledoc_and_check(recovered_collection, updated_doc, operator="update",is_delete=0) - + singledoc_and_check( + recovered_collection, updated_doc, operator="update", is_delete=0 + ) - #3.7: Test deletion after recovery + # 3.7: Test deletion after recovery print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") doc_ids = ["9999"] result = recovered_collection.delete(doc_ids) assert len(result) == len(doc_ids) for item in result: - assert item.ok() \ No newline at end of file + assert item.ok() diff --git a/python/tests/detail/test_collection_crash_recovery_dropcolumn.py b/python/tests/detail/test_collection_crash_recovery_dropcolumn.py index eb3202233..f6f608ad9 100644 --- a/python/tests/detail/test_collection_crash_recovery_dropcolumn.py +++ b/python/tests/detail/test_collection_crash_recovery_dropcolumn.py @@ -32,7 +32,7 @@ def singledoc_and_check( - collection: Collection, insert_doc, operator="insert", is_delete=1 + collection: Collection, insert_doc, operator="insert", is_delete=1 ): if operator == "insert": result = collection.insert(insert_doc) @@ -48,7 +48,7 @@ def singledoc_and_check( stats = collection.stats assert stats is not None - #assert stats.doc_count == 1 + # assert stats.doc_count == 1 fetched_docs = collection.fetch([insert_doc.id]) assert len(fetched_docs) == 1 @@ -95,7 +95,7 @@ class TestCollectionCrashRecoveryDropColumn: # Script content for subprocess to execute Zvec column drop operations # Write this script content to a temporary file and execute it in the subprocess. - ZVEC_SUBPROCESS_SCRIPT_DROPCOLUMN = ''' + ZVEC_SUBPROCESS_SCRIPT_DROPCOLUMN = """ import zvec import time import json @@ -183,63 +183,89 @@ def run_zvec_dropcolumn_operations(args_json_str): if __name__ == "__main__": args_json_str = sys.argv[1] run_zvec_dropcolumn_operations(args_json_str) -''' +""" - def test_dropcolumn_simulate_crash_during_column_drop_int32(self, full_schema_1024, collection_option): + def test_dropcolumn_simulate_crash_during_column_drop_int32( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform INT32 column drop operations. During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "INT32", "int32_field1") + self._test_dropcolumn_with_crash_recovery( + full_schema_1024, collection_option, "INT32", "int32_field1" + ) - def test_dropcolumn_simulate_crash_during_column_drop_int64(self, full_schema_1024, collection_option): + def test_dropcolumn_simulate_crash_during_column_drop_int64( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform INT64 column drop operations. During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "INT64", "int64_field1") + self._test_dropcolumn_with_crash_recovery( + full_schema_1024, collection_option, "INT64", "int64_field1" + ) - def test_dropcolumn_simulate_crash_during_column_drop_uint32(self, full_schema_1024, collection_option): + def test_dropcolumn_simulate_crash_during_column_drop_uint32( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform UINT32 column drop operations. During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT32", "uint32_field1") + self._test_dropcolumn_with_crash_recovery( + full_schema_1024, collection_option, "UINT32", "uint32_field1" + ) - def test_dropcolumn_simulate_crash_during_column_drop_uint64(self, full_schema_1024, collection_option): + def test_dropcolumn_simulate_crash_during_column_drop_uint64( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform UINT64 column drop operations. During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "UINT64", "uint64_field1") + self._test_dropcolumn_with_crash_recovery( + full_schema_1024, collection_option, "UINT64", "uint64_field1" + ) - def test_dropcolumn_simulate_crash_during_column_drop_float(self, full_schema_1024, collection_option): + def test_dropcolumn_simulate_crash_during_column_drop_float( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform FLOAT column drop operations. During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "FLOAT", "float_field1") + self._test_dropcolumn_with_crash_recovery( + full_schema_1024, collection_option, "FLOAT", "float_field1" + ) - def test_dropcolumn_simulate_crash_during_column_drop_double(self, full_schema_1024, collection_option): + def test_dropcolumn_simulate_crash_during_column_drop_double( + self, full_schema_1024, collection_option + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform DOUBLE column drop operations. During the column drop operation, forcibly terminate the subprocess (simulate power failure or process crash). Finally, in the main process, reopen the collection and verify whether its state and functionality are normal. """ - self._test_dropcolumn_with_crash_recovery(full_schema_1024, collection_option, "DOUBLE", "double_field1") + self._test_dropcolumn_with_crash_recovery( + full_schema_1024, collection_option, "DOUBLE", "double_field1" + ) - def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_data_type, drop_field_name): + def _test_dropcolumn_with_crash_recovery( + self, schema, collection_option, drop_data_type, drop_field_name + ): """ Common method to test column drop with crash recovery for different column types. """ @@ -247,8 +273,12 @@ def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_d collection_path = f"{temp_dir}/test_collection_dropcolumn_crash_recovery_{drop_data_type.lower()}" # Step 1: Successfully create collection in main process and insert some documents - print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") - coll = zvec.create_and_open(path=collection_path, schema=schema, option=collection_option) + print( + f"[Test] Step 1: Creating collection in main process, path: {collection_path}..." + ) + coll = zvec.create_and_open( + path=collection_path, schema=schema, option=collection_option + ) assert coll is not None print(f"[Test] Step 1.1: Collection created successfully.") @@ -258,14 +288,18 @@ def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_d exp_doc_dict[i] = {} doc = generate_doc(i, coll.schema) result = coll.insert([doc]) - assert result is not None and len(result) > 0, f"Failed to insert document {i}" + assert result is not None and len(result) > 0, ( + f"Failed to insert document {i}" + ) exp_doc_dict[i] = doc print(f"[Test] Step 1.2: Inserted 50 documents for column operations.") # Verify collection state before crash initial_doc_count = coll.stats.doc_count - print(f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation.") + print( + f"[Test] Step 1.3: Collection has {initial_doc_count} documents before crash simulation." + ) del coll print(f"[Test] Step 1.4: Closed collection.") @@ -273,7 +307,7 @@ def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_d # Step 2: Prepare and run subprocess for column drop operations # Write subprocess script to temporary file subprocess_script_path = f"{temp_dir}/zvec_subprocess_dropcolumn.py" - with open(subprocess_script_path, 'w', encoding='utf-8') as f: + with open(subprocess_script_path, "w", encoding="utf-8") as f: f.write(self.ZVEC_SUBPROCESS_SCRIPT_DROPCOLUMN) # Prepare subprocess parameters @@ -282,21 +316,24 @@ def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_d "drop_field_name": drop_field_name, # Use appropriate field name for this test "drop_data_type": drop_data_type, # Type of field to drop "drop_column_iterations": 20, # Number of drop iterations to increase interruption chance - "delay_between_drops": 0.3 # Delay between drops to allow interruption opportunity + "delay_between_drops": 0.3, # Delay between drops to allow interruption opportunity } args_json_str = json.dumps(subprocess_args) print( - f"[Test] Step 2: Starting {drop_data_type} column drop operations in subprocess, path: {collection_path}") + f"[Test] Step 2: Starting {drop_data_type} column drop operations in subprocess, path: {collection_path}" + ) # Start subprocess to execute column drop operations - proc = subprocess.Popen([ - sys.executable, subprocess_script_path, args_json_str - ]) + proc = subprocess.Popen( + [sys.executable, subprocess_script_path, args_json_str] + ) # Wait briefly to allow subprocess to begin column drop operations time.sleep(3) # Wait 3 seconds to allow column drop process to start - print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + print( + f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}..." + ) # Suddenly kill subprocess (simulate power failure or crash during column drop operations) if psutil: try: @@ -307,13 +344,19 @@ def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_d child.kill() parent.kill() proc.wait(timeout=5) - except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + except ( + psutil.NoSuchProcess, + psutil.AccessDenied, + subprocess.TimeoutExpired, + ): # If psutil is unavailable or process has been terminated, fall back to original method proc.send_signal(signal.SIGKILL) try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() else: @@ -322,7 +365,9 @@ def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_d try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() print(f"[Test] Subprocess {proc.pid} has been terminated.") @@ -332,10 +377,13 @@ def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_d # Step 3: Verify recovery situation in main process print( - f"[Test] Step 3: Attempting to open collection after simulating crash during column drop operations...") + f"[Test] Step 3: Attempting to open collection after simulating crash during column drop operations..." + ) # Verification 3.1: Check if collection can be successfully opened after crash recovered_collection = zvec.open(collection_path) - assert recovered_collection is not None, "Cannot open collection after crash" + assert recovered_collection is not None, ( + "Cannot open collection after crash" + ) print(f"[Test] Step 3.1: Verified collection can be opened after crash...") # Verification 3.2: Check data integrity (document count and content) @@ -343,52 +391,68 @@ def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_d query_result = recovered_collection.query(topk=1024) # We expect some documents to have been successfully inserted before crash # The exact number depends on when the crash occurred during the bulk insertion process - print( - f"[Test] Step 3.2: Found {len(query_result)} documents after crash") + print(f"[Test] Step 3.2: Found {len(query_result)} documents after crash") current_count = recovered_collection.stats.doc_count assert recovered_collection.stats.doc_count >= 1 assert len(query_result) <= current_count, ( - f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}" + ) # Verify existing documents have correct structure if len(query_result) > 0: for doc in query_result[:50]: # Limit to first 50 for efficiency fetched_docs = recovered_collection.fetch([doc.id]) - '''print("doc.id,fetched_docs:\n") - print(doc.id, fetched_docs)''' + """print("doc.id,fetched_docs:\n") + print(doc.id, fetched_docs)""" exp_doc = exp_doc_dict[int(doc.id)] assert len(fetched_docs) == 1 assert doc.id in fetched_docs # Note: The doc content may have been partially updated before the crash # So we only verify the schema structure and basic fields - assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema, - True, True), ( - f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs[doc.id], + exp_doc, + recovered_collection.schema, + True, + True, + ), f"result doc={fetched_docs},doc_exp={exp_doc}" # 3.4: Check if query function works properly print(f"[Test] Step 3.4: Verifying query function after crash...") filtered_query = recovered_collection.query(filter=f"int32_field >=-100") - print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + print( + f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents" + ) assert len(filtered_query) > 0 for doc in query_result[:10]: # Check first 10 docs fetched_docs = recovered_collection.fetch([doc.id]) exp_doc = exp_doc_dict[int(doc.id)] assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs[doc.id], exp_doc, recovered_collection.schema, - True, True), ( - f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs[doc.id], + exp_doc, + recovered_collection.schema, + True, + True, + ), f"result doc={fetched_docs},doc_exp={exp_doc}" # Verification 3.5: Test insertion functionality after recovery print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") - test_insert_doc = generate_doc(9999, schema) # Use original schema from fixture - singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + test_insert_doc = generate_doc( + 9999, schema + ) # Use original schema from fixture + singledoc_and_check( + recovered_collection, test_insert_doc, operator="insert", is_delete=0 + ) # Verification 3.6: Test update functionality after recovery print(f"[Test] Step 3.6: Testing update functionality after recovery...") updated_doc = generate_update_doc(9999, recovered_collection.schema) - singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + singledoc_and_check( + recovered_collection, updated_doc, operator="update", is_delete=0 + ) # 3.7: Test deletion after recovery print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") @@ -404,23 +468,31 @@ def _test_dropcolumn_with_crash_recovery(self, schema, collection_option, drop_d # Now try to drop a column after the crash recovery # This should succeed if the collection is properly recovered try: - recovered_collection.drop_column( - field_name=drop_field_name + recovered_collection.drop_column(field_name=drop_field_name) + print( + f"[Test] Step 3.8: {drop_data_type} Column drop succeeded after crash recovery" ) - print(f"[Test] Step 3.8: {drop_data_type} Column drop succeeded after crash recovery") except Exception as e: - print(f"[Test] Step 3.8: {drop_data_type} Column drop failed after crash recovery: {str(e)}") + print( + f"[Test] Step 3.8: {drop_data_type} Column drop failed after crash recovery: {str(e)}" + ) # This is expected if the column was already dropped during the interrupted operation # Only do a simple verification after column drop stats_after_drop_column = recovered_collection.stats - print(f"[Test] Step 3.8.1: Stats after column drop - doc_count: {stats_after_drop_column.doc_count}") + print( + f"[Test] Step 3.8.1: Stats after column drop - doc_count: {stats_after_drop_column.doc_count}" + ) # 3.9: Check if query function works properly after column drop print(f"[Test] Step 3.9: Verifying query function after column drop...") # Use a simpler query that matches the field type - filtered_query = recovered_collection.query(filter=f"int32_field >= 0", topk=10) - print(f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents") + filtered_query = recovered_collection.query( + filter=f"int32_field >= 0", topk=10 + ) + print( + f"[Test] Step 3.9.1: Field-filtered query returned {len(filtered_query)} documents" + ) # Note: After column drop, this query might return 0 results # Close the recovered collection diff --git a/python/tests/detail/test_collection_crash_recovery_insertdoc.py b/python/tests/detail/test_collection_crash_recovery_insertdoc.py index 8780f16cb..757c35346 100644 --- a/python/tests/detail/test_collection_crash_recovery_insertdoc.py +++ b/python/tests/detail/test_collection_crash_recovery_insertdoc.py @@ -34,10 +34,8 @@ from doc_helper import * - - def singledoc_and_check( - collection: Collection, insert_doc, operator="insert", is_delete=1 + collection: Collection, insert_doc, operator="insert", is_delete=1 ): if operator == "insert": result = collection.insert(insert_doc) @@ -53,7 +51,7 @@ def singledoc_and_check( stats = collection.stats assert stats is not None - #assert stats.doc_count == 1 + # assert stats.doc_count == 1 fetched_docs = collection.fetch([insert_doc.id]) assert len(fetched_docs) == 1 @@ -91,7 +89,6 @@ def singledoc_and_check( assert collection.stats.doc_count == 0, "Document should be deleted" - class TestCollectionCrashRecoveryInsertDoc: """ Test Zvec collection recovery capability after simulating power failure/process crash during document insertion. @@ -101,7 +98,7 @@ class TestCollectionCrashRecoveryInsertDoc: # Script content for subprocess to execute Zvec document insertion operations # Write this script content to a temporary file and execute it in the subprocess. - ZVEC_SUBPROCESS_SCRIPT_INSERTDOC = ''' + ZVEC_SUBPROCESS_SCRIPT_INSERTDOC = """ import zvec import time import json @@ -268,9 +265,11 @@ def run_zvec_insertdoc_operations(args_json_str): if __name__ == "__main__": args_json_str = sys.argv[1] run_zvec_insertdoc_operations(args_json_str) -''' +""" - def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, collection_option, basic_schema): + def test_insertdoc_simulate_crash_during_bulk_insert( + self, full_schema_1024, collection_option, basic_schema + ): """ Scenario: First successfully create a Zvec collection in the main process. Then start a subprocess to open the collection and perform bulk document insertion operations. @@ -281,8 +280,12 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col collection_path = f"{temp_dir}/test_collection_insertdoc_crash_recovery" # Step 1: Successfully create collection in main process - print(f"[Test] Step 1: Creating collection in main process, path: {collection_path}...") - coll = zvec.create_and_open(path=collection_path, schema=full_schema_1024, option=collection_option) + print( + f"[Test] Step 1: Creating collection in main process, path: {collection_path}..." + ) + coll = zvec.create_and_open( + path=collection_path, schema=full_schema_1024, option=collection_option + ) assert coll is not None print(f"[Test] Step 1.1: Collection created successfully.") single_doc = generate_doc(2001, coll.schema) @@ -295,7 +298,7 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col # Step 2: Prepare and run subprocess for bulk insertion operations # Write subprocess script to temporary file subprocess_script_path = f"{temp_dir}/zvec_subprocess_insertdoc.py" - with open(subprocess_script_path, 'w', encoding='utf-8') as f: + with open(subprocess_script_path, "w", encoding="utf-8") as f: f.write(self.ZVEC_SUBPROCESS_SCRIPT_INSERTDOC) # Prepare subprocess parameters @@ -303,20 +306,24 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col "collection_path": collection_path, "num_docs_to_insert": 200, # Insert 200 documents to allow for interruption "batch_size": 10, # Insert 10 documents per batch - "delay_between_batches": 0.2 # 0.2 second delay between batches to increase interruption timing + "delay_between_batches": 0.2, # 0.2 second delay between batches to increase interruption timing } args_json_str = json.dumps(subprocess_args) - print(f"[Test] Step 2: Starting bulk insertion operations in subprocess, path: {collection_path}") + print( + f"[Test] Step 2: Starting bulk insertion operations in subprocess, path: {collection_path}" + ) # Start subprocess to execute bulk insertion operations - proc = subprocess.Popen([ - sys.executable, subprocess_script_path, args_json_str - ]) + proc = subprocess.Popen( + [sys.executable, subprocess_script_path, args_json_str] + ) # Wait briefly to allow subprocess to begin insertion operations time.sleep(2) # Wait 2 seconds to allow insertion loop to start - print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + print( + f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}..." + ) # Suddenly kill subprocess (simulate power failure or crash during insertion operations) if psutil: try: @@ -327,13 +334,19 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col child.kill() parent.kill() proc.wait(timeout=5) - except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + except ( + psutil.NoSuchProcess, + psutil.AccessDenied, + subprocess.TimeoutExpired, + ): # If psutil is unavailable or process has been terminated, fall back to original method proc.send_signal(signal.SIGKILL) try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() else: @@ -342,7 +355,9 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() print(f"[Test] Subprocess {proc.pid} has been terminated.") @@ -352,10 +367,13 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col # Step 3: Verify recovery situation in main process print( - f"[Test] Step 3: Attempting to open collection after simulating crash during document insertion operations...") + f"[Test] Step 3: Attempting to open collection after simulating crash during document insertion operations..." + ) # Verification 3.1: Check if collection can be successfully opened after crash recovered_collection = zvec.open(collection_path) - assert recovered_collection is not None, "Cannot open collection after crash" + assert recovered_collection is not None, ( + "Cannot open collection after crash" + ) print(f"[Test] Step 3.1: Verified collection can be opened after crash...") # Verification 3.2: Check data integrity (document count and content) @@ -364,16 +382,17 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col # We expect some documents to have been successfully inserted before crash # The exact number depends on when the crash occurred during the bulk insertion process print( - f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_insert']})") + f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_insert']})" + ) current_count = recovered_collection.stats.doc_count assert recovered_collection.stats.doc_count >= 1 assert len(query_result) <= current_count, ( - f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}" + ) # Verify existing documents have correct structure if len(query_result) > 0: - for doc in query_result[:1024]: if doc.id == "2001": print("Found 2001 data!") @@ -384,8 +403,11 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col print(fetched_docs) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs["2001"], single_doc, recovered_collection.schema), ( - f"result doc={fetched_docs},doc_exp={single_doc}") + assert is_doc_equal( + fetched_docs["2001"], + single_doc, + recovered_collection.schema, + ), f"result doc={fetched_docs},doc_exp={single_doc}" break else: fetched_docs = recovered_collection.fetch([doc.id]) @@ -394,13 +416,16 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col exp_doc = generate_doc(int(doc.id), recovered_collection.schema) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs["1"], exp_doc, recovered_collection.schema), ( - f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs["1"], exp_doc, recovered_collection.schema + ), f"result doc={fetched_docs},doc_exp={exp_doc}" # 3.4: Check if index is complete and query function works properly print(f"[Test] Step 3.4: Verifying index integrity and query function...") filtered_query = recovered_collection.query(filter=f"int32_field >=-100") - print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + print( + f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents" + ) assert len(filtered_query) > 0 for doc in query_result: if doc.id == "2001": @@ -412,8 +437,9 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col print(fetched_docs) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs["2001"], single_doc, recovered_collection.schema), ( - f"result doc={fetched_docs},doc_exp={single_doc}") + assert is_doc_equal( + fetched_docs["2001"], single_doc, recovered_collection.schema + ), f"result doc={fetched_docs},doc_exp={single_doc}" break else: fetched_docs = recovered_collection.fetch([doc.id]) @@ -422,18 +448,25 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col exp_doc = generate_doc(int(doc.id), recovered_collection.schema) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs["1"], exp_doc, recovered_collection.schema), ( - f"result doc={fetched_docs},doc_exp={exp_doc}") + assert is_doc_equal( + fetched_docs["1"], exp_doc, recovered_collection.schema + ), f"result doc={fetched_docs},doc_exp={exp_doc}" # Verification 3.5: Test insertion functionality after recovery print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") - test_insert_doc = generate_doc(9999, full_schema_1024) # Use original schema from fixture - singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + test_insert_doc = generate_doc( + 9999, full_schema_1024 + ) # Use original schema from fixture + singledoc_and_check( + recovered_collection, test_insert_doc, operator="insert", is_delete=0 + ) # Verification 3.6: Test update functionality after recovery print(f"[Test] Step 3.6: Testing update functionality after recovery...") updated_doc = generate_update_doc(2001, recovered_collection.schema) - singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + singledoc_and_check( + recovered_collection, updated_doc, operator="update", is_delete=0 + ) # 3.7: Test deletion after recovery print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") @@ -441,4 +474,4 @@ def test_insertdoc_simulate_crash_during_bulk_insert(self, full_schema_1024, col result = recovered_collection.delete(doc_ids) assert len(result) == len(doc_ids) for item in result: - assert item.ok() \ No newline at end of file + assert item.ok() diff --git a/python/tests/detail/test_collection_crash_recovery_updatedoc.py b/python/tests/detail/test_collection_crash_recovery_updatedoc.py index 7856d4475..e94c1c0e8 100644 --- a/python/tests/detail/test_collection_crash_recovery_updatedoc.py +++ b/python/tests/detail/test_collection_crash_recovery_updatedoc.py @@ -34,10 +34,8 @@ from doc_helper import * - - def singledoc_and_check( - collection: Collection, insert_doc, operator="insert", is_delete=1 + collection: Collection, insert_doc, operator="insert", is_delete=1 ): if operator == "insert": result = collection.insert(insert_doc) @@ -53,7 +51,7 @@ def singledoc_and_check( stats = collection.stats assert stats is not None - #assert stats.doc_count == 1 + # assert stats.doc_count == 1 fetched_docs = collection.fetch([insert_doc.id]) assert len(fetched_docs) == 1 @@ -91,7 +89,6 @@ def singledoc_and_check( assert collection.stats.doc_count == 0, "Document should be deleted" - class TestCollectionCrashRecoveryUpdateDoc: """ Test Zvec collection recovery capability after simulating power failure/process crash during document update. @@ -101,7 +98,7 @@ class TestCollectionCrashRecoveryUpdateDoc: # Script content for subprocess to execute Zvec document update operations # Write this script content to a temporary file and execute it in the subprocess. - ZVEC_SUBPROCESS_SCRIPT_UPDATEDOC = ''' + ZVEC_SUBPROCESS_SCRIPT_UPDATEDOC = """ import zvec import time import json @@ -353,9 +350,11 @@ def run_zvec_updatedoc_operations(args_json_str): if __name__ == "__main__": args_json_str = sys.argv[1] run_zvec_updatedoc_operations(args_json_str) -''' +""" - def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, collection_option, basic_schema): + def test_updatedoc_simulate_crash_during_bulk_update( + self, full_schema_1024, collection_option, basic_schema + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform bulk document update operations. @@ -367,8 +366,11 @@ def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, col # Step 1: Successfully create collection in main process and insert some documents print( - f"[Test] Step 1: Creating collection in main process and inserting initial documents, path: {collection_path}...") - coll = zvec.create_and_open(path=collection_path, schema=full_schema_1024, option=collection_option) + f"[Test] Step 1: Creating collection in main process and inserting initial documents, path: {collection_path}..." + ) + coll = zvec.create_and_open( + path=collection_path, schema=full_schema_1024, option=collection_option + ) assert coll is not None print(f"[Test] Step 1.1: Collection created successfully.") @@ -384,7 +386,9 @@ def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, col initial_docs.append(doc) insert_results = coll.insert(initial_docs) - print(f"[Test] Step 1.3: Inserted {len(initial_docs)} initial documents for updating.") + print( + f"[Test] Step 1.3: Inserted {len(initial_docs)} initial documents for updating." + ) del coll print(f"[Test] Step 1.4: Closed collection.") @@ -392,7 +396,7 @@ def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, col # Step 2: Prepare and run subprocess for bulk update operations # Write subprocess script to temporary file subprocess_script_path = f"{temp_dir}/zvec_subprocess_updatedoc.py" - with open(subprocess_script_path, 'w', encoding='utf-8') as f: + with open(subprocess_script_path, "w", encoding="utf-8") as f: f.write(self.ZVEC_SUBPROCESS_SCRIPT_UPDATEDOC) # Prepare subprocess parameters @@ -400,20 +404,24 @@ def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, col "collection_path": collection_path, "num_docs_to_update": 100, # Update 100 documents to allow for interruption "batch_size": 10, # Update 10 documents per batch - "delay_between_batches": 0.2 # 0.2 second delay between batches to increase interruption timing + "delay_between_batches": 0.2, # 0.2 second delay between batches to increase interruption timing } args_json_str = json.dumps(subprocess_args) - print(f"[Test] Step 2: Starting bulk update operations in subprocess, path: {collection_path}") + print( + f"[Test] Step 2: Starting bulk update operations in subprocess, path: {collection_path}" + ) # Start subprocess to execute bulk update operations - proc = subprocess.Popen([ - sys.executable, subprocess_script_path, args_json_str - ]) + proc = subprocess.Popen( + [sys.executable, subprocess_script_path, args_json_str] + ) # Wait briefly to allow subprocess to begin update operations time.sleep(2) # Wait 2 seconds to allow update loop to start - print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + print( + f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}..." + ) # Suddenly kill subprocess (simulate power failure or crash during update operations) if psutil: try: @@ -424,13 +432,19 @@ def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, col child.kill() parent.kill() proc.wait(timeout=5) - except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + except ( + psutil.NoSuchProcess, + psutil.AccessDenied, + subprocess.TimeoutExpired, + ): # If psutil is unavailable or process has been terminated, fall back to original method proc.send_signal(signal.SIGKILL) try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() else: @@ -439,7 +453,9 @@ def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, col try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() print(f"[Test] Subprocess {proc.pid} has been terminated.") @@ -449,11 +465,14 @@ def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, col # Step 3: Verify recovery situation in main process print( - f"[Test] Step 3: Attempting to open collection after simulating crash during document update operations...") + f"[Test] Step 3: Attempting to open collection after simulating crash during document update operations..." + ) # Verification 3.1: Check if collection can be successfully opened after crash recovered_collection = zvec.open(collection_path) - assert recovered_collection is not None, "Cannot open collection after crash" + assert recovered_collection is not None, ( + "Cannot open collection after crash" + ) print(f"[Test] Step 3.1: Verified collection can be opened after crash...") # Verification 3.2: Check data integrity (document count and content) @@ -462,15 +481,20 @@ def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, col # We expect some documents to have been successfully updated before crash # The exact number depends on when the crash occurred during the bulk update process print( - f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_update']})") + f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_update']})" + ) - # Verify quantity consistency - initial_doc_count = 200 # Initial docs inserted - extra_doc_count = 1 # Extra doc with ID 2001 + # Capture initial doc count dynamically from the collection before crash + # The initial document count was set when creating the collection + initial_doc_count = 200 # From the loop: for i in range(0, 200) + extra_doc_count = 1 # Extra doc with ID 2001 added separately expected_doc_count = initial_doc_count + extra_doc_count - assert recovered_collection.stats.doc_count == expected_doc_count, f"Expected {expected_doc_count} docs, got {recovered_collection.stats.doc_count}" + assert recovered_collection.stats.doc_count == expected_doc_count, ( + f"Expected {expected_doc_count} docs, got {recovered_collection.stats.doc_count}" + ) assert len(query_result) <= recovered_collection.stats.doc_count, ( - f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}" + ) # Verify existing documents have correct structure if len(query_result) > 0: @@ -478,32 +502,47 @@ def test_updatedoc_simulate_crash_during_bulk_update(self, full_schema_1024, col fetched_docs = recovered_collection.fetch([doc.id]) assert len(fetched_docs) == 1 assert doc.id in fetched_docs - + # Generate expected doc to compare - assert is_doc_equal(fetched_docs[doc.id], doc, recovered_collection.schema,include_vector=False), ( - f"result doc={fetched_docs[doc.id]},doc_exp={doc}") + assert is_doc_equal( + fetched_docs[doc.id], + doc, + recovered_collection.schema, + include_vector=False, + ), f"result doc={fetched_docs[doc.id]},doc_exp={doc}" # Verification 3.4: Check if index is complete and query function works properly print(f"[Test] Step 3.4: Verifying index integrity and query function...") filtered_query = recovered_collection.query(filter=f"int32_field >= -100") - print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + print( + f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents" + ) assert len(filtered_query) > 0 for doc in query_result[:50]: # Check first 50 for efficiency fetched_docs = recovered_collection.fetch([doc.id]) - assert is_doc_equal(fetched_docs[doc.id], doc, recovered_collection.schema,include_vector=False), ( - f"result doc={fetched_docs[doc.id]},doc_exp={doc}") + assert is_doc_equal( + fetched_docs[doc.id], + doc, + recovered_collection.schema, + include_vector=False, + ), f"result doc={fetched_docs[doc.id]},doc_exp={doc}" # Verification 3.5: Test insertion functionality after recovery print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") - test_insert_doc = generate_doc(9999, full_schema_1024) # Use original schema from fixture - singledoc_and_check(recovered_collection, test_insert_doc, operator="insert",is_delete=0) + test_insert_doc = generate_doc( + 9999, full_schema_1024 + ) # Use original schema from fixture + singledoc_and_check( + recovered_collection, test_insert_doc, operator="insert", is_delete=0 + ) # Verification 3.6: Test update functionality after recovery print(f"[Test] Step 3.6: Testing update functionality after recovery...") updated_doc = generate_update_doc(2001, recovered_collection.schema) - singledoc_and_check(recovered_collection, updated_doc, operator="update",is_delete=0) - + singledoc_and_check( + recovered_collection, updated_doc, operator="update", is_delete=0 + ) # Verification 3.7: Test deletion functionality after recovery (if supported) print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") diff --git a/python/tests/detail/test_collection_crash_recovery_upsertdoc.py b/python/tests/detail/test_collection_crash_recovery_upsertdoc.py index 680da9106..47da11af9 100644 --- a/python/tests/detail/test_collection_crash_recovery_upsertdoc.py +++ b/python/tests/detail/test_collection_crash_recovery_upsertdoc.py @@ -34,10 +34,8 @@ from doc_helper import * - - def singledoc_and_check( - collection: Collection, insert_doc, operator="insert", is_delete=1 + collection: Collection, insert_doc, operator="insert", is_delete=1 ): if operator == "insert": result = collection.insert(insert_doc) @@ -53,7 +51,7 @@ def singledoc_and_check( stats = collection.stats assert stats is not None - #assert stats.doc_count == 1 + # assert stats.doc_count == 1 fetched_docs = collection.fetch([insert_doc.id]) assert len(fetched_docs) == 1 @@ -100,7 +98,7 @@ class TestCollectionCrashRecoveryUpsertDoc: # Script content for subprocess to execute Zvec document update operations # Write this script content to a temporary file and execute it in the subprocess. - ZVEC_SUBPROCESS_SCRIPT_UPSERTDOC = ''' + ZVEC_SUBPROCESS_SCRIPT_UPSERTDOC = """ import zvec import time import json @@ -353,9 +351,11 @@ def run_zvec_upsertdoc_operations(args_json_str): if __name__ == "__main__": args_json_str = sys.argv[1] run_zvec_upsertdoc_operations(args_json_str) -''' +""" - def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, collection_option, basic_schema): + def test_upsertdoc_simulate_crash_during_bulk_upsert( + self, full_schema_1024, collection_option, basic_schema + ): """ Scenario: First successfully create a Zvec collection in the main process and insert some documents. Then start a subprocess to open the collection and perform bulk document update operations. @@ -367,8 +367,11 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col # Step 1: Successfully create collection in main process and insert some documents print( - f"[Test] Step 1: Creating collection in main process and inserting initial documents, path: {collection_path}...") - coll = zvec.create_and_open(path=collection_path, schema=full_schema_1024, option=collection_option) + f"[Test] Step 1: Creating collection in main process and inserting initial documents, path: {collection_path}..." + ) + coll = zvec.create_and_open( + path=collection_path, schema=full_schema_1024, option=collection_option + ) assert coll is not None print(f"[Test] Step 1.1: Collection created successfully.") @@ -384,7 +387,9 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col initial_docs.append(doc) insert_results = coll.insert(initial_docs) - print(f"[Test] Step 1.3: Inserted {len(initial_docs)} initial documents for upserting.") + print( + f"[Test] Step 1.3: Inserted {len(initial_docs)} initial documents for upserting." + ) del coll print(f"[Test] Step 1.4: Closed collection.") @@ -392,7 +397,7 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col # Step 2: Prepare and run subprocess for bulk update operations # Write subprocess script to temporary file subprocess_script_path = f"{temp_dir}/zvec_subprocess_usertdoc.py" - with open(subprocess_script_path, 'w', encoding='utf-8') as f: + with open(subprocess_script_path, "w", encoding="utf-8") as f: f.write(self.ZVEC_SUBPROCESS_SCRIPT_UPSERTDOC) # Prepare subprocess parameters @@ -400,20 +405,24 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col "collection_path": collection_path, "num_docs_to_upsert": 100, # Update 100 documents to allow for interruption "batch_size": 10, # Update 10 documents per batch - "delay_between_batches": 0.2 # 0.2 second delay between batches to increase interruption timing + "delay_between_batches": 0.2, # 0.2 second delay between batches to increase interruption timing } args_json_str = json.dumps(subprocess_args) - print(f"[Test] Step 2: Starting bulk update operations in subprocess, path: {collection_path}") + print( + f"[Test] Step 2: Starting bulk update operations in subprocess, path: {collection_path}" + ) # Start subprocess to execute bulk update operations - proc = subprocess.Popen([ - sys.executable, subprocess_script_path, args_json_str - ]) + proc = subprocess.Popen( + [sys.executable, subprocess_script_path, args_json_str] + ) # Wait briefly to allow subprocess to begin update operations time.sleep(2) # Wait 2 seconds to allow update loop to start - print(f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}...") + print( + f"[Test] Step 2: Simulating crash/power failure by terminating subprocess PID {proc.pid}..." + ) # Suddenly kill subprocess (simulate power failure or crash during update operations) if psutil: try: @@ -424,13 +433,19 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col child.kill() parent.kill() proc.wait(timeout=5) - except (psutil.NoSuchProcess, psutil.AccessDenied, subprocess.TimeoutExpired): + except ( + psutil.NoSuchProcess, + psutil.AccessDenied, + subprocess.TimeoutExpired, + ): # If psutil is unavailable or process has been terminated, fall back to original method proc.send_signal(signal.SIGKILL) try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() else: @@ -439,7 +454,9 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col try: proc.wait(timeout=5) except subprocess.TimeoutExpired: - print(f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing...") + print( + f"[Test] Subprocess {proc.pid} could not be terminated with SIGKILL, force killing..." + ) proc.kill() proc.wait() print(f"[Test] Subprocess {proc.pid} has been terminated.") @@ -449,11 +466,14 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col # Step 3: Verify recovery situation in main process print( - f"[Test] Step 3: Attempting to open collection after simulating crash during document update operations...") + f"[Test] Step 3: Attempting to open collection after simulating crash during document update operations..." + ) # Verification 3.1: Check if collection can be successfully opened after crash recovered_collection = zvec.open(collection_path) - assert recovered_collection is not None, "Cannot open collection after crash" + assert recovered_collection is not None, ( + "Cannot open collection after crash" + ) print(f"[Test] Step 3.1: Verified collection can be opened after crash...") # Verification 3.2: Check data integrity (document count and content) @@ -462,13 +482,15 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col # We expect some documents to have been successfully updated before crash # The exact number depends on when the crash occurred during the bulk update process print( - f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_upsert']})") + f"[Test] Step 3.2: Found {len(query_result)} documents after crash (expected 0-{subprocess_args['num_docs_to_upsert']})" + ) # Verify quantity consistency current_count = recovered_collection.stats.doc_count assert recovered_collection.stats.doc_count >= 51 assert len(query_result) <= recovered_collection.stats.doc_count, ( - f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}") + f"query_result count = {len(query_result)},stats.doc_count = {recovered_collection.stats.doc_count}" + ) # Verify existing documents have correct structure if len(query_result) > 0: @@ -478,13 +500,19 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col assert doc.id in fetched_docs # Generate expected doc to compare - assert is_doc_equal(fetched_docs[doc.id], doc, recovered_collection.schema,include_vector=False), ( - f"result doc={fetched_docs[doc.id]},doc_exp={doc}") + assert is_doc_equal( + fetched_docs[doc.id], + doc, + recovered_collection.schema, + include_vector=False, + ), f"result doc={fetched_docs[doc.id]},doc_exp={doc}" # Verification 3.4: Check if index is complete and query function works properly print(f"[Test] Step 3.4: Verifying index integrity and query function...") filtered_query = recovered_collection.query(filter=f"int32_field >= -100") - print(f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents") + print( + f"[Test] Step 3.4.2: Field-filtered query returned {len(filtered_query)} documents" + ) assert len(filtered_query) > 0 for doc in query_result[:50]: # Check first 50 for efficiency @@ -492,18 +520,28 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col assert len(fetched_docs) == 1 assert doc.id in fetched_docs - assert is_doc_equal(fetched_docs[doc.id], doc, recovered_collection.schema,include_vector=False), ( - f"result doc={fetched_docs[doc.id]},doc_exp={doc}") + assert is_doc_equal( + fetched_docs[doc.id], + doc, + recovered_collection.schema, + include_vector=False, + ), f"result doc={fetched_docs[doc.id]},doc_exp={doc}" # Verification 3.5: Test insertion functionality after recovery print(f"[Test] Step 3.5.1: Testing insertion functionality after recovery") - test_insert_doc = generate_doc(9999, full_schema_1024) # Use original schema from fixture - singledoc_and_check(recovered_collection, test_insert_doc, operator="insert", is_delete=0) + test_insert_doc = generate_doc( + 9999, full_schema_1024 + ) # Use original schema from fixture + singledoc_and_check( + recovered_collection, test_insert_doc, operator="insert", is_delete=0 + ) # Verification 3.6: Test update functionality after recovery print(f"[Test] Step 3.6: Testing update functionality after recovery...") updated_doc = generate_update_doc(2001, recovered_collection.schema) - singledoc_and_check(recovered_collection, updated_doc, operator="update", is_delete=0) + singledoc_and_check( + recovered_collection, updated_doc, operator="update", is_delete=0 + ) # Verification 3.7: Test deletion functionality after recovery (if supported) print(f"[Test] Step 3.7: Testing deletion functionality after recovery...") @@ -511,4 +549,4 @@ def test_upsertdoc_simulate_crash_during_bulk_upsert(self, full_schema_1024, col result = recovered_collection.delete(doc_ids) assert len(result) == len(doc_ids) for item in result: - assert item.ok() \ No newline at end of file + assert item.ok() diff --git a/python/tests/detail/test_collection_dml.py b/python/tests/detail/test_collection_dml.py index 08f233465..f35eb06f0 100644 --- a/python/tests/detail/test_collection_dml.py +++ b/python/tests/detail/test_collection_dml.py @@ -211,23 +211,13 @@ FIELD_VALUE_INVALID_LIST = [ ( "bool_field", - [ - "True", - "False", - "", - ], + ["True", "False", "", "测试"], ), - ("float_field", ["invalid", [1.0], {"value": 1.0}]), - ("double_field", ["invalid", [1.0], {"value": 1.0}]), + ("float_field", ["invalid", [1.0], {"value": 1.0}, "测试"]), + ("double_field", ["invalid", [1.0], {"value": 1.0}, "测试"]), ( "int32_field", - [ - "invalid", - [1], - {"value": 1}, - 2147483648, - -2147483649, - ], + ["invalid", [1], {"value": 1}, 2147483648, -2147483649, "测试"], ), ( "int64_field", @@ -237,27 +227,16 @@ {"value": 1}, 9223372036854775808, -9223372036854775809, + "测试", ], ), ( "uint32_field", - [ - "invalid", - [1], - {"value": 1}, - 4294967296, - -1, - ], + ["invalid", [1], {"value": 1}, 4294967296, -1, "测试"], ), ( "uint64_field", - [ - "invalid", - [1], - {"value": 1}, - 18446744073709551616, - -1, - ], + ["invalid", [1], {"value": 1}, 18446744073709551616, -1, "测试"], ), ( "string_field", @@ -271,84 +250,35 @@ ), ( "array_bool_field", - [ - True, - False, - [True, "invalid"], - {"key": True}, - ], + [True, False, [True, "invalid"], {"key": True}, "测试"], ), ( "array_float_field", - [ - [1.0, "invalid"], - [1.0, None], - "invalid", - [1.0, [2.0]], - 1.0, - ], + [[1.0, "invalid"], [1.0, None], "invalid", [1.0, [2.0]], 1.0, "测试"], ), ( "array_double_field", - [ - [1.0, "invalid"], - [1.0, None], - "invalid", - [1.0, [2.0]], - 1.0, - ], + [[1.0, "invalid"], [1.0, None], "invalid", [1.0, [2.0]], 1.0, "测试"], ), ( "array_int32_field", - [ - [1, "invalid"], - [1, None], - "invalid", - [1, [2]], - 1, - ], + [[1, "invalid"], [1, None], "invalid", [1, [2]], 1, "测试"], ), ( "array_int64_field", - [ - [1, "invalid"], - [1, None], - "invalid", - [1, [2]], - 1, - ], + [[1, "invalid"], [1, None], "invalid", [1, [2]], 1, "测试"], ), ( "array_uint32_field", - [ - [1, "invalid"], - [1, None], - [1, -1], - "invalid", - [1, [2]], - 1, - ], + [[1, "invalid"], [1, None], [1, -1], "invalid", [1, [2]], 1, "测试"], ), ( "array_uint64_field", - [ - [1, "invalid"], - [1, None], - [1, -1], - "invalid", - [1, [2]], - 1, - ], + [[1, "invalid"], [1, None], [1, -1], "invalid", [1, [2]], 1, "测试"], ), ( "array_string_field", - [ - ["valid", 123], - ["valid", None], - "invalid", - [["nested"]], - 123, - ], + [["valid", 123], ["valid", None], "invalid", [["nested"]], 123, "测试"], ), ] diff --git a/python/tests/detail/test_collection_open.py b/python/tests/detail/test_collection_open.py index a275c9279..0ae1eefcb 100644 --- a/python/tests/detail/test_collection_open.py +++ b/python/tests/detail/test_collection_open.py @@ -691,6 +691,12 @@ def test_reopen_collection(self, tmp_path_factory): nullable=False, index_param=InvertIndexParam(), ), + FieldSchema( + "description", + DataType.STRING, + nullable=True, + index_param=InvertIndexParam(), + ), ], vectors=[ VectorSchema( @@ -720,7 +726,7 @@ def test_reopen_collection(self, tmp_path_factory): # Insert some data doc = Doc( id="1", - fields={"id": 1, "name": "test"}, + fields={"id": 1, "name": "test", "description": "这是一个中文描述。"}, vectors={"dense": np.random.random(128).tolist()}, ) @@ -743,6 +749,7 @@ def test_reopen_collection(self, tmp_path_factory): fetched_doc = fetched_docs["1"] assert fetched_doc.id == "1" assert fetched_doc.field("name") == "test" + assert fetched_doc.field("description") == "这是一个中文描述。" # Clean up if hasattr(coll2, "destroy") and coll2 is not None: From a0a9044a4bc0db523a5835b112063bf5e90f09e5 Mon Sep 17 00:00:00 2001 From: iaojnh Date: Wed, 25 Mar 2026 10:39:57 +0000 Subject: [PATCH 4/4] test(test_collection_ddl.py): optimize ddl test case --- python/tests/detail/distance_helper.py | 39 ++++++++++------- python/tests/detail/params_helper.py | 27 +++++++++--- python/tests/detail/test_collection_ddl.py | 49 ++++++++++++++++++++-- 3 files changed, 92 insertions(+), 23 deletions(-) diff --git a/python/tests/detail/distance_helper.py b/python/tests/detail/distance_helper.py index cf2815cfd..cda79cfb8 100644 --- a/python/tests/detail/distance_helper.py +++ b/python/tests/detail/distance_helper.py @@ -15,7 +15,7 @@ from typing import Dict -def is_float_equal(actual, expected, rel_tol=1e-5, abs_tol=1e-8): +def is_float_equal(actual, expected, rel_tol=1e-3, abs_tol=1e-5): if actual is None and expected is None: return True return math.isclose(actual, expected, rel_tol=rel_tol, abs_tol=abs_tol) @@ -63,6 +63,7 @@ def cosine_distance_dense( ): if dtype == DataType.VECTOR_FP16 or quantize_type == QuantizeType.FP16: # More stable conversion to float16 to avoid numerical issues + # Convert to numpy float16 and back to float for consistent handling vec1 = [float(np.float16(a)) for a in vec1] vec2 = [float(np.float16(b)) for b in vec2] elif dtype == DataType.VECTOR_INT8: @@ -74,10 +75,16 @@ def cosine_distance_dense( int(round(min(max(val, -128), 127))) for val in vec2 ] # Clamp to valid INT8 range - dot_product = sum(a * b for a, b in zip(vec1, vec2)) - - magnitude1 = math.sqrt(sum(a * a for a in vec1)) - magnitude2 = math.sqrt(sum(b * b for b in vec2)) + # Calculate dot product and magnitudes with higher precision for FP16 + if dtype == DataType.VECTOR_FP16 or quantize_type == QuantizeType.FP16: + # Use more precise calculation for FP16 to handle precision issues + dot_product = sum(np.float32(a) * np.float32(b) for a, b in zip(vec1, vec2)) + magnitude1 = math.sqrt(sum(np.float32(a) * np.float32(a) for a in vec1)) + magnitude2 = math.sqrt(sum(np.float32(b) * np.float32(b) for b in vec2)) + else: + dot_product = sum(a * b for a, b in zip(vec1, vec2)) + magnitude1 = math.sqrt(sum(a * a for a in vec1)) + magnitude2 = math.sqrt(sum(b * b for b in vec2)) if magnitude1 == 0 or magnitude2 == 0: return 1.0 # Zero vector case - maximum distance @@ -112,6 +119,7 @@ def dp_distance_dense( ): if dtype == DataType.VECTOR_FP16 or quantize_type == QuantizeType.FP16: # More stable computation to avoid numerical issues + # Convert to numpy float16 and back to float for consistent handling products = [ float(np.float16(a)) * float(np.float16(b)) for a, b in zip(vec1, vec2) ] @@ -319,22 +327,25 @@ def is_field_equal(field1, field2, schema: FieldSchema) -> bool: def is_vector_equal(vec1, vec2, schema: VectorSchema) -> bool: - if ( - schema.data_type == DataType.SPARSE_VECTOR_FP16 - or schema.data_type == DataType.VECTOR_FP16 - ): - # skip fp16 vector equal - return True - is_sparse = ( schema.data_type == DataType.SPARSE_VECTOR_FP32 or schema.data_type == DataType.SPARSE_VECTOR_FP16 ) if is_sparse: - return is_sparse_vector_equal(vec1, vec2) + # For SPARSE_VECTOR_FP16, use higher tolerance + if schema.data_type == DataType.SPARSE_VECTOR_FP16: + return is_sparse_vector_equal(vec1, vec2, rtol=1e-2, atol=1e-2) + else: + return is_sparse_vector_equal(vec1, vec2) else: - return is_dense_vector_equal(vec1, vec2) + # For FP16 and INT8 vectors, use appropriate tolerance for comparison + if schema.data_type == DataType.VECTOR_FP16: + return is_dense_vector_equal(vec1, vec2, rtol=1e-2, atol=1e-2) + elif schema.data_type == DataType.VECTOR_INT8: + return is_dense_vector_equal(vec1, vec2, rtol=1e-1, atol=1e-1) + else: + return is_dense_vector_equal(vec1, vec2) def is_doc_equal( diff --git a/python/tests/detail/params_helper.py b/python/tests/detail/params_helper.py index e373005e0..998878674 100644 --- a/python/tests/detail/params_helper.py +++ b/python/tests/detail/params_helper.py @@ -36,9 +36,9 @@ quantize_type=QuantizeType.FP16, ), FlatIndexParam(), - FlatIndexParam(metric_type=MetricType.IP, quantize_type=QuantizeType.INT4), - FlatIndexParam(metric_type=MetricType.L2, quantize_type=QuantizeType.INT8), - FlatIndexParam(metric_type=MetricType.COSINE, quantize_type=QuantizeType.FP16), + FlatIndexParam(metric_type=MetricType.IP), + FlatIndexParam(metric_type=MetricType.L2), + FlatIndexParam(metric_type=MetricType.COSINE), IVFIndexParam(), IVFIndexParam( metric_type=MetricType.IP, @@ -64,13 +64,26 @@ ], DataType.VECTOR_FP16: [ HnswIndexParam(), + HnswIndexParam(metric_type=MetricType.IP, m=16, ef_construction=100), + HnswIndexParam(metric_type=MetricType.COSINE, m=24, ef_construction=150), + HnswIndexParam(metric_type=MetricType.L2, m=32, ef_construction=200), FlatIndexParam(), - # IVFIndexParam(), + FlatIndexParam(metric_type=MetricType.IP), + FlatIndexParam(metric_type=MetricType.L2), + FlatIndexParam(metric_type=MetricType.COSINE), + IVFIndexParam(), + IVFIndexParam(metric_type=MetricType.IP), + IVFIndexParam(metric_type=MetricType.L2), + IVFIndexParam(metric_type=MetricType.COSINE), ], DataType.VECTOR_INT8: [ HnswIndexParam(), FlatIndexParam(), - # IVFIndexParam(), + HnswIndexParam(metric_type=MetricType.IP, m=16, ef_construction=100), + FlatIndexParam(metric_type=MetricType.IP), + IVFIndexParam(metric_type=MetricType.L2), + HnswIndexParam(metric_type=MetricType.L2), + FlatIndexParam(metric_type=MetricType.L2), ], DataType.SPARSE_VECTOR_FP32: [ HnswIndexParam(), @@ -108,6 +121,10 @@ ], DataType.VECTOR_INT8: [ InvertIndexParam(), + IVFIndexParam(metric_type=MetricType.IP), + FlatIndexParam(metric_type=MetricType.COSINE), + IVFIndexParam(metric_type=MetricType.COSINE), + HnswIndexParam(metric_type=MetricType.COSINE), ], DataType.SPARSE_VECTOR_FP32: [ HnswIndexParam(metric_type=MetricType.L2), diff --git a/python/tests/detail/test_collection_ddl.py b/python/tests/detail/test_collection_ddl.py index 6fba8cb2e..ceb9ee88e 100644 --- a/python/tests/detail/test_collection_ddl.py +++ b/python/tests/detail/test_collection_ddl.py @@ -448,9 +448,17 @@ def check_error_message(exc_info, invalid_name): "Error message is unreasonable: e=" + str(exc_info.value) ) else: - assert INCOMPATIBLE_FUNCTION_ERROR_MSG in str(exc_info.value), ( - "Error message is unreasonable: e=" + str(exc_info.value) - ) + # For non-string values like None, int, float, etc., we may get either + # INCOMPATIBLE_FUNCTION_ERROR_MSG, SCHEMA_VALIDATE_ERROR_MSG, INCOMPATIBLE_CONSTRUCTOR_ERROR_MSG + error_str = str(exc_info.value) + # Check if the error contains expected patterns + expected_patterns = [ + INCOMPATIBLE_FUNCTION_ERROR_MSG, + SCHEMA_VALIDATE_ERROR_MSG, + INCOMPATIBLE_CONSTRUCTOR_ERROR_MSG, + ] + if not any(pattern in error_str for pattern in expected_patterns): + assert False, "Error message is unreasonable: e=" + error_str @pytest.mark.parametrize( "invalid_field_name,invalid_vector_name", @@ -781,7 +789,7 @@ def test_compicated_workflow( @pytest.mark.parametrize( "data_type, index_param", VALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS ) - def test_vector_index_params( + def test_valid_vector_index_params( self, collection_temp_dir, collection_option: CollectionOption, @@ -911,6 +919,39 @@ def check_result( ) coll.destroy() + @pytest.mark.parametrize( + "data_type, index_param", INVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS + ) + def test_invalid_vector_index_params( + self, + collection_temp_dir, + collection_option: CollectionOption, + data_type: DataType, + index_param, + single_vector_schema, + ): + vector_name = DEFAULT_VECTOR_FIELD_NAME[data_type] + dimension = DEFAULT_VECTOR_DIMENSION + + coll = zvec.create_and_open( + path=collection_temp_dir, + schema=single_vector_schema, + option=collection_option, + ) + + assert coll is not None, ( + f"Failed to create and open collection, {data_type}, {index_param}" + ) + + with pytest.raises(Exception) as exc_info: + # create index + coll.create_index( + field_name=vector_name, + index_param=index_param, + option=IndexOption(), + ) + self.check_error_message(exc_info, index_param) + class TestColumnDDL: def test_add_column(self, basic_collection: Collection):