diff --git a/.github/workflows/nightly-coverage.yml b/.github/workflows/nightly-coverage.yml index e936f36..35c2884 100644 --- a/.github/workflows/nightly-coverage.yml +++ b/.github/workflows/nightly-coverage.yml @@ -52,14 +52,10 @@ jobs: git clone --depth 1 --recurse-submodules --shallow-submodules https://github.com/alibaba/zvec.git ../zvec cd ../zvec NPROC=$(nproc 2>/dev/null || echo 2) + mkdir -p build && cd build - python -m pip install --upgrade pip pybind11==3.0 cmake==3.30.0 ninja==1.11.1 scikit-build-core setuptools_scm - - CMAKE_GENERATOR="Unix Makefiles" \ - CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \ - python -m pip install -v . \ - --no-build-isolation \ - --config-settings='cmake.define.BUILD_TOOLS="ON"' + cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DBUILD_C_BINDINGS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_TOOLS=OFF + cmake --build . --config Release --parallel "$NPROC" shell: bash - name: Set library path diff --git a/fuzz/fuzz_targets/fuzz_doc.rs b/fuzz/fuzz_targets/fuzz_doc.rs index 3806fa8..1ce3332 100644 --- a/fuzz/fuzz_targets/fuzz_doc.rs +++ b/fuzz/fuzz_targets/fuzz_doc.rs @@ -45,43 +45,47 @@ fuzz_target!(|input: FuzzInput| { Err(_) => return, }; - doc.set_pk(&pk); + // Sanitize pk and field_name to remove null bytes + let safe_pk = pk.replace('\0', "_"); + let safe_field_name = field_name.replace('\0', "_"); + + doc.set_pk(&safe_pk); let _ = doc.get_pk(); - let _ = doc.add_string(&field_name, &string_value); - let _ = doc.get_string(&field_name); + let _ = doc.add_string(&safe_field_name, &string_value); + let _ = doc.get_string(&safe_field_name); - let _ = doc.add_bool(&field_name, bool_value); - let _ = doc.get_bool(&field_name); + let _ = doc.add_bool(&safe_field_name, bool_value); + let _ = doc.get_bool(&safe_field_name); - let _ = doc.add_i32(&field_name, i32_value); - let _ = doc.get_i32(&field_name); + let _ = doc.add_i32(&safe_field_name, i32_value); + let _ = doc.get_i32(&safe_field_name); - let _ = doc.add_i64(&field_name, i64_value); - let _ = doc.get_i64(&field_name); + let _ = doc.add_i64(&safe_field_name, i64_value); + let _ = doc.get_i64(&safe_field_name); - let _ = doc.add_u32(&field_name, u32_value); + let _ = doc.add_u32(&safe_field_name, u32_value); - let _ = doc.add_u64(&field_name, u64_value); + let _ = doc.add_u64(&safe_field_name, u64_value); - let _ = doc.add_f32(&field_name, f32_value); - let _ = doc.get_f32(&field_name); + let _ = doc.add_f32(&safe_field_name, f32_value); + let _ = doc.get_f32(&safe_field_name); - let _ = doc.add_f64(&field_name, f64_value); - let _ = doc.get_f64(&field_name); + let _ = doc.add_f64(&safe_field_name, f64_value); + let _ = doc.get_f64(&safe_field_name); if !vector_data.is_empty() { - let _ = doc.add_vector_f32(&field_name, &vector_data); - let _ = doc.get_vector_f32(&field_name); + let _ = doc.add_vector_f32(&safe_field_name, &vector_data); + let _ = doc.get_vector_f32(&safe_field_name); } - let _ = doc.has_field(&field_name); + let _ = doc.has_field(&safe_field_name); let _ = doc.is_empty(); let _ = doc.field_count(); - let _ = doc.is_field_null(&field_name); + let _ = doc.is_field_null(&safe_field_name); - let _ = doc.set_field_null(&field_name); - let _ = doc.remove_field(&field_name); + let _ = doc.set_field_null(&safe_field_name); + let _ = doc.remove_field(&safe_field_name); doc.clear(); }); diff --git a/fuzz/fuzz_targets/fuzz_query.rs b/fuzz/fuzz_targets/fuzz_query.rs index e2fb45c..89f4403 100644 --- a/fuzz/fuzz_targets/fuzz_query.rs +++ b/fuzz/fuzz_targets/fuzz_query.rs @@ -48,12 +48,18 @@ fuzz_target!(|input: FuzzInput| { return; } - let _ = VectorQuery::new(&field_name, &vector_data, topk); + // Sanitize field_name to remove null bytes + let safe_field_name = field_name.replace('\0', "_"); + + // Limit topk to prevent memory issues (reasonable upper bound) + let safe_topk = topk.clamp(1, 10000); + + let _ = VectorQuery::new(&safe_field_name, &vector_data, safe_topk); let mut builder = VectorQuery::builder() - .field_name(&field_name) + .field_name(&safe_field_name) .vector(&vector_data) - .topk(topk); + .topk(safe_topk); if !filter.is_empty() { builder = builder.filter(&filter); diff --git a/fuzz/fuzz_targets/fuzz_schema.rs b/fuzz/fuzz_targets/fuzz_schema.rs index 0fb5231..73bc39f 100644 --- a/fuzz/fuzz_targets/fuzz_schema.rs +++ b/fuzz/fuzz_targets/fuzz_schema.rs @@ -42,10 +42,11 @@ fuzz_target!(|input: FuzzInput| { let metric = MetricType::from(metric_type); - let _ = FieldSchema::new(&field_name, DataType::String, false, 0); - let _ = FieldSchema::new(&field_name, DataType::Int64, false, 0); - let _ = FieldSchema::new(&field_name, DataType::Float, false, 0); - let _ = FieldSchema::new(&field_name, DataType::VectorFp32, false, dimension); + // Use try_new to handle invalid field names (e.g., containing null bytes) + let _ = FieldSchema::try_new(&field_name, DataType::String, false, 0); + let _ = FieldSchema::try_new(&field_name, DataType::Int64, false, 0); + let _ = FieldSchema::try_new(&field_name, DataType::Float, false, 0); + let _ = FieldSchema::try_new(&field_name, DataType::VectorFp32, false, dimension); let _ = IndexParams::hnsw(metric, m, ef_construction); let _ = IndexParams::hnsw_with_quantize(metric, m, ef_construction, zvec::QuantizeType::Int8); @@ -53,10 +54,13 @@ fuzz_target!(|input: FuzzInput| { let _ = IndexParams::flat(metric); let _ = IndexParams::invert(enable_range_opt, enable_wildcard); + // Use try_new to handle invalid field names let _ = zvec::CollectionSchema::builder(&collection_name) - .add_field(FieldSchema::new(&field_name, DataType::String, false, 0)) + .add_field(FieldSchema::try_new(&field_name, DataType::String, false, 0).unwrap_or_else(|_| { + FieldSchema::new("default_field", DataType::String, false, 0) + })) .add_vector_field( - &format!("{}_vec", field_name), + &format!("{}_vec", field_name.replace('\0', "_")), DataType::VectorFp32, dimension, IndexParams::hnsw(metric, m, ef_construction),