diff --git a/.gitmodules b/.gitmodules index 49ed1920b..8479ef81e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -26,9 +26,6 @@ path = thirdparty/glog/glog-0.5.0 url = https://github.com/google/glog.git ignore = all -[submodule "thirdparty/protobuf/protobuf-3.21.12"] - path = thirdparty/protobuf/protobuf-3.21.12 - url = https://github.com/protocolbuffers/protobuf.git [submodule "thirdparty/lz4/lz4-1.9.4"] path = thirdparty/lz4/lz4-1.9.4 url = https://github.com/lz4/lz4.git diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt index 3c8a15efd..e22b83132 100644 --- a/examples/c++/CMakeLists.txt +++ b/examples/c++/CMakeLists.txt @@ -37,7 +37,6 @@ if(CMAKE_BUILD_TYPE STREQUAL "Debug") else () set(GFLAGS_LIB gflags_nothreads_debug) endif () - set(PROTOBUF_LIB protobufd) else() set(GLOG_LIB glog) if (WIN32) @@ -45,7 +44,6 @@ else() else () set(GFLAGS_LIB gflags_nothreads) endif () - set(PROTOBUF_LIB protobuf) endif() # --- Dependency groups --- @@ -77,12 +75,10 @@ if (NOT WIN32) antlr4-runtime ${GLOG_LIB} ${GFLAGS_LIB} - ${PROTOBUF_LIB} lz4 ) else () # Windows static libraries use different naming conventions - set(PROTOBUF_LIB libprotobuf) set(zvec_ailego_deps arrow_static parquet_static @@ -102,7 +98,6 @@ else () antlr4-runtime-static ${GLOG_LIB} ${GFLAGS_LIB} - ${PROTOBUF_LIB} lz4 rpcrt4 shlwapi diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt index 5edb881a6..9a565d09c 100644 --- a/examples/c/CMakeLists.txt +++ b/examples/c/CMakeLists.txt @@ -50,11 +50,9 @@ find_package(Threads REQUIRED) if(CMAKE_BUILD_TYPE STREQUAL "Debug") set(GLOG_LIB glogd) set(GFLAGS_LIB gflags_nothreads_debug) - set(PROTOBUF_LIB protobufd) else() set(GLOG_LIB glog) set(GFLAGS_LIB gflags_nothreads) - set(PROTOBUF_LIB protobuf) endif() # --- Dependency groups --- @@ -71,14 +69,12 @@ if(NOT WIN32) antlr4-runtime ${GLOG_LIB} ${GFLAGS_LIB} - ${PROTOBUF_LIB} lz4 ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS} ) else() # Windows static libraries use different naming conventions - set(PROTOBUF_LIB libprotobuf) set(zvec_c_api_deps roaring rocksdb @@ -91,7 +87,6 @@ else() antlr4-runtime-static ${GLOG_LIB} ${GFLAGS_LIB} - ${PROTOBUF_LIB} lz4 ${CMAKE_THREAD_LIBS_INIT} rpcrt4 diff --git a/src/binding/c/CMakeLists.txt b/src/binding/c/CMakeLists.txt index 3007478ea..dbddfc702 100644 --- a/src/binding/c/CMakeLists.txt +++ b/src/binding/c/CMakeLists.txt @@ -136,7 +136,6 @@ elseif(APPLE) Arrow::arrow_acero rocksdb glog - libprotobuf antlr4 ) diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt index b2689278a..3570b87e3 100644 --- a/src/db/CMakeLists.txt +++ b/src/db/CMakeLists.txt @@ -1,12 +1,6 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) -cc_proto_library( - NAME zvec_proto STATIC - SRCS proto/*.proto - PROTOROOT ./ -) - cc_directory(common) cc_directory(index) cc_directory(sqlengine) @@ -15,8 +9,8 @@ file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h) cc_library( NAME zvec_db STATIC STRICT SRCS_NO_GLOB PACKED - SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc - INCS . ${CMAKE_CURRENT_BINARY_DIR} + SRCS ${ALL_DB_SRCS} + INCS . PUBINCS ${PROJECT_ROOT_DIR}/src/include LIBS zvec_ailego @@ -25,11 +19,9 @@ cc_library( roaring rocksdb antlr4 - libprotobuf Arrow::arrow_static Arrow::arrow_compute Arrow::arrow_dataset Arrow::arrow_acero - DEPS zvec_proto VERSION "${PROXIMA_ZVEC_VERSION}" ) \ No newline at end of file diff --git a/src/db/common/binary_codec.h b/src/db/common/binary_codec.h new file mode 100644 index 000000000..d290001c3 --- /dev/null +++ b/src/db/common/binary_codec.h @@ -0,0 +1,155 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +namespace zvec { + +// CRC32 implementation (IEEE polynomial, same as zlib) +class CRC32 { + public: + static uint32_t Compute(const void *data, size_t length) { + const uint8_t *bytes = static_cast(data); + uint32_t crc = 0xFFFFFFFF; + for (size_t i = 0; i < length; ++i) { + crc ^= bytes[i]; + for (int j = 0; j < 8; ++j) { + crc = (crc >> 1) ^ (0xEDB88320 & (-(crc & 1))); + } + } + return crc ^ 0xFFFFFFFF; + } +}; + +// Binary writer: appends data to an internal buffer +class BinaryWriter { + public: + void PutUint8(uint8_t value) { + buffer_.push_back(value); + } + + void PutUint32(uint32_t value) { + const size_t offset = buffer_.size(); + buffer_.resize(offset + sizeof(uint32_t)); + std::memcpy(buffer_.data() + offset, &value, sizeof(uint32_t)); + } + + void PutUint64(uint64_t value) { + const size_t offset = buffer_.size(); + buffer_.resize(offset + sizeof(uint64_t)); + std::memcpy(buffer_.data() + offset, &value, sizeof(uint64_t)); + } + + void PutInt32(int32_t value) { + PutUint32(static_cast(value)); + } + + void PutBool(bool value) { + PutUint8(value ? 1 : 0); + } + + void PutString(const std::string &value) { + PutUint32(static_cast(value.size())); + if (!value.empty()) { + const size_t offset = buffer_.size(); + buffer_.resize(offset + value.size()); + std::memcpy(buffer_.data() + offset, value.data(), value.size()); + } + } + + const std::vector &buffer() const { + return buffer_; + } + + size_t size() const { + return buffer_.size(); + } + + const uint8_t *data() const { + return buffer_.data(); + } + + private: + std::vector buffer_; +}; + +// Binary reader: reads data from a byte buffer +class BinaryReader { + public: + BinaryReader(const uint8_t *data, size_t size) + : data_(data), size_(size), offset_(0) {} + + bool GetUint8(uint8_t *value) { + if (offset_ + sizeof(uint8_t) > size_) return false; + *value = data_[offset_]; + offset_ += sizeof(uint8_t); + return true; + } + + bool GetUint32(uint32_t *value) { + if (offset_ + sizeof(uint32_t) > size_) return false; + std::memcpy(value, data_ + offset_, sizeof(uint32_t)); + offset_ += sizeof(uint32_t); + return true; + } + + bool GetUint64(uint64_t *value) { + if (offset_ + sizeof(uint64_t) > size_) return false; + std::memcpy(value, data_ + offset_, sizeof(uint64_t)); + offset_ += sizeof(uint64_t); + return true; + } + + bool GetInt32(int32_t *value) { + uint32_t raw; + if (!GetUint32(&raw)) return false; + *value = static_cast(raw); + return true; + } + + bool GetBool(bool *value) { + uint8_t raw; + if (!GetUint8(&raw)) return false; + *value = (raw != 0); + return true; + } + + bool GetString(std::string *value) { + uint32_t length; + if (!GetUint32(&length)) return false; + if (offset_ + length > size_) return false; + value->assign(reinterpret_cast(data_ + offset_), length); + offset_ += length; + return true; + } + + size_t offset() const { + return offset_; + } + + size_t remaining() const { + return size_ - offset_; + } + + private: + const uint8_t *data_; + size_t size_; + size_t offset_; +}; + +} // namespace zvec diff --git a/src/db/index/CMakeLists.txt b/src/db/index/CMakeLists.txt index 4420050e6..89eed96c4 100644 --- a/src/db/index/CMakeLists.txt +++ b/src/db/index/CMakeLists.txt @@ -5,7 +5,6 @@ cc_library( NAME zvec_index STATIC STRICT SRCS *.cc segment/*.cc column/vector_column/*.cc column/inverted_column/*.cc storage/*.cc storage/wal/*.cc common/*.cc LIBS zvec_common - zvec_proto rocksdb core_interface Arrow::arrow_static diff --git a/src/db/index/common/manifest_serializer.cc b/src/db/index/common/manifest_serializer.cc new file mode 100644 index 000000000..d972d3890 --- /dev/null +++ b/src/db/index/common/manifest_serializer.cc @@ -0,0 +1,514 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "manifest_serializer.h" +#include + +namespace zvec { + +// --- IndexParams --- + +void ManifestSerializer::WriteIndexParams(BinaryWriter *writer, + const IndexParams *params) { + if (!params) { + writer->PutUint32(static_cast(IndexType::UNDEFINED)); + return; + } + + writer->PutUint32(static_cast(params->type())); + + switch (params->type()) { + case IndexType::INVERT: { + auto *invert = dynamic_cast(params); + writer->PutBool(invert->enable_range_optimization()); + break; + } + case IndexType::HNSW: { + auto *hnsw = dynamic_cast(params); + writer->PutUint32(static_cast(hnsw->metric_type())); + writer->PutUint32(static_cast(hnsw->quantize_type())); + writer->PutInt32(hnsw->m()); + writer->PutInt32(hnsw->ef_construction()); + break; + } + case IndexType::HNSW_RABITQ: { + auto *rabitq = dynamic_cast(params); + writer->PutUint32(static_cast(rabitq->metric_type())); + writer->PutUint32(static_cast(rabitq->quantize_type())); + writer->PutInt32(rabitq->m()); + writer->PutInt32(rabitq->ef_construction()); + writer->PutInt32(rabitq->total_bits()); + writer->PutInt32(rabitq->num_clusters()); + writer->PutInt32(rabitq->sample_count()); + break; + } + case IndexType::FLAT: { + auto *flat = dynamic_cast(params); + writer->PutUint32(static_cast(flat->metric_type())); + writer->PutUint32(static_cast(flat->quantize_type())); + break; + } + case IndexType::IVF: { + auto *ivf = dynamic_cast(params); + writer->PutUint32(static_cast(ivf->metric_type())); + writer->PutUint32(static_cast(ivf->quantize_type())); + writer->PutInt32(ivf->n_list()); + writer->PutInt32(ivf->n_iters()); + writer->PutBool(ivf->use_soar()); + break; + } + default: + break; + } +} + +IndexParams::Ptr ManifestSerializer::ReadIndexParams(BinaryReader *reader, + bool *ok) { + uint32_t type_raw; + if (!reader->GetUint32(&type_raw)) { + *ok = false; + return nullptr; + } + + auto index_type = static_cast(type_raw); + if (index_type == IndexType::UNDEFINED) { + *ok = true; + return nullptr; + } + + switch (index_type) { + case IndexType::INVERT: { + bool enable_range_opt; + if (!reader->GetBool(&enable_range_opt)) { + *ok = false; + return nullptr; + } + *ok = true; + return std::make_shared(enable_range_opt); + } + case IndexType::HNSW: { + uint32_t metric_raw, quantize_raw; + int32_t m_val, ef_val; + if (!reader->GetUint32(&metric_raw) || + !reader->GetUint32(&quantize_raw) || !reader->GetInt32(&m_val) || + !reader->GetInt32(&ef_val)) { + *ok = false; + return nullptr; + } + *ok = true; + return std::make_shared( + static_cast(metric_raw), m_val, ef_val, + static_cast(quantize_raw)); + } + case IndexType::HNSW_RABITQ: { + uint32_t metric_raw, quantize_raw; + int32_t m_val, ef_val, total_bits, num_clusters, sample_count; + if (!reader->GetUint32(&metric_raw) || + !reader->GetUint32(&quantize_raw) || !reader->GetInt32(&m_val) || + !reader->GetInt32(&ef_val) || !reader->GetInt32(&total_bits) || + !reader->GetInt32(&num_clusters) || + !reader->GetInt32(&sample_count)) { + *ok = false; + return nullptr; + } + *ok = true; + return std::make_shared( + static_cast(metric_raw), total_bits, num_clusters, m_val, + ef_val, sample_count); + } + case IndexType::FLAT: { + uint32_t metric_raw, quantize_raw; + if (!reader->GetUint32(&metric_raw) || + !reader->GetUint32(&quantize_raw)) { + *ok = false; + return nullptr; + } + *ok = true; + return std::make_shared( + static_cast(metric_raw), + static_cast(quantize_raw)); + } + case IndexType::IVF: { + uint32_t metric_raw, quantize_raw; + int32_t n_list, n_iters; + bool use_soar; + if (!reader->GetUint32(&metric_raw) || + !reader->GetUint32(&quantize_raw) || !reader->GetInt32(&n_list) || + !reader->GetInt32(&n_iters) || !reader->GetBool(&use_soar)) { + *ok = false; + return nullptr; + } + *ok = true; + return std::make_shared( + static_cast(metric_raw), n_list, n_iters, use_soar, + static_cast(quantize_raw)); + } + default: + *ok = true; + return nullptr; + } +} + +// --- FieldSchema --- + +void ManifestSerializer::WriteFieldSchema(BinaryWriter *writer, + const FieldSchema &field) { + writer->PutString(field.name()); + writer->PutUint32(static_cast(field.data_type())); + writer->PutUint32(field.dimension()); + writer->PutBool(field.nullable()); + + bool has_index = (field.index_params() != nullptr); + writer->PutBool(has_index); + if (has_index) { + WriteIndexParams(writer, field.index_params().get()); + } +} + +FieldSchema::Ptr ManifestSerializer::ReadFieldSchema(BinaryReader *reader, + bool *ok) { + std::string name; + uint32_t data_type_raw, dimension; + bool nullable, has_index; + + if (!reader->GetString(&name) || !reader->GetUint32(&data_type_raw) || + !reader->GetUint32(&dimension) || !reader->GetBool(&nullable) || + !reader->GetBool(&has_index)) { + *ok = false; + return nullptr; + } + + auto field = std::make_shared(); + field->set_name(name); + field->set_data_type(static_cast(data_type_raw)); + field->set_dimension(dimension); + field->set_nullable(nullable); + + if (has_index) { + auto index_params = ReadIndexParams(reader, ok); + if (!*ok) return nullptr; + if (index_params) { + field->set_index_params(index_params); + } + } + + *ok = true; + return field; +} + +// --- CollectionSchema --- + +void ManifestSerializer::WriteCollectionSchema(BinaryWriter *writer, + const CollectionSchema &schema) { + writer->PutString(schema.name()); + writer->PutUint64(schema.max_doc_count_per_segment()); + + auto fields = schema.fields(); + writer->PutUint32(static_cast(fields.size())); + for (const auto &field : fields) { + WriteFieldSchema(writer, *field); + } +} + +bool ManifestSerializer::ReadCollectionSchema(BinaryReader *reader, + CollectionSchema *schema) { + std::string name; + uint64_t max_doc_count; + uint32_t field_count; + + if (!reader->GetString(&name) || !reader->GetUint64(&max_doc_count) || + !reader->GetUint32(&field_count)) { + return false; + } + + schema->set_name(name); + schema->set_max_doc_count_per_segment(max_doc_count); + + for (uint32_t i = 0; i < field_count; ++i) { + bool field_ok = true; + auto field = ReadFieldSchema(reader, &field_ok); + if (!field_ok || !field) return false; + schema->add_field(field); + } + + return true; +} + +// --- BlockMeta --- + +void ManifestSerializer::WriteBlockMeta(BinaryWriter *writer, + const BlockMeta &meta) { + writer->PutUint32(meta.id()); + writer->PutUint32(static_cast(meta.type())); + writer->PutUint64(meta.min_doc_id()); + writer->PutUint64(meta.max_doc_id()); + writer->PutUint32(meta.doc_count()); + + const auto &columns = meta.columns(); + writer->PutUint32(static_cast(columns.size())); + for (const auto &col : columns) { + writer->PutString(col); + } +} + +bool ManifestSerializer::ReadBlockMeta(BinaryReader *reader, BlockMeta *meta) { + uint32_t block_id, block_type_raw, doc_count, column_count; + uint64_t min_doc_id, max_doc_id; + + if (!reader->GetUint32(&block_id) || !reader->GetUint32(&block_type_raw) || + !reader->GetUint64(&min_doc_id) || !reader->GetUint64(&max_doc_id) || + !reader->GetUint32(&doc_count) || !reader->GetUint32(&column_count)) { + return false; + } + + meta->set_id(block_id); + meta->set_type(static_cast(block_type_raw)); + meta->set_min_doc_id(min_doc_id); + meta->set_max_doc_id(max_doc_id); + meta->set_doc_count(doc_count); + + for (uint32_t i = 0; i < column_count; ++i) { + std::string col; + if (!reader->GetString(&col)) return false; + meta->add_column(col); + } + + return true; +} + +// --- SegmentMeta --- + +void ManifestSerializer::WriteSegmentMeta(BinaryWriter *writer, + const SegmentMeta &meta) { + writer->PutUint32(meta.id()); + + // persisted blocks + const auto &blocks = meta.persisted_blocks(); + writer->PutUint32(static_cast(blocks.size())); + for (const auto &block : blocks) { + WriteBlockMeta(writer, block); + } + + // writing forward block + bool has_writing = meta.has_writing_forward_block(); + writer->PutBool(has_writing); + if (has_writing) { + WriteBlockMeta(writer, meta.writing_forward_block().value()); + } + + // indexed vector fields + auto indexed_fields = meta.indexed_vector_fields(); + writer->PutUint32(static_cast(indexed_fields.size())); + for (const auto &field : indexed_fields) { + writer->PutString(field); + } +} + +SegmentMeta::Ptr ManifestSerializer::ReadSegmentMeta(BinaryReader *reader, + bool *ok) { + uint32_t segment_id; + if (!reader->GetUint32(&segment_id)) { + *ok = false; + return nullptr; + } + + auto meta = std::make_shared(segment_id); + + // persisted blocks + uint32_t block_count; + if (!reader->GetUint32(&block_count)) { + *ok = false; + return nullptr; + } + for (uint32_t i = 0; i < block_count; ++i) { + BlockMeta block; + if (!ReadBlockMeta(reader, &block)) { + *ok = false; + return nullptr; + } + meta->add_persisted_block(block); + } + + // writing forward block + bool has_writing; + if (!reader->GetBool(&has_writing)) { + *ok = false; + return nullptr; + } + if (has_writing) { + BlockMeta writing_block; + if (!ReadBlockMeta(reader, &writing_block)) { + *ok = false; + return nullptr; + } + meta->set_writing_forward_block(writing_block); + } + + // indexed vector fields + uint32_t field_count; + if (!reader->GetUint32(&field_count)) { + *ok = false; + return nullptr; + } + for (uint32_t i = 0; i < field_count; ++i) { + std::string field_name; + if (!reader->GetString(&field_name)) { + *ok = false; + return nullptr; + } + meta->add_indexed_vector_field(field_name); + } + + *ok = true; + return meta; +} + +// --- Top-level Serialize / Deserialize --- + +Status ManifestSerializer::Serialize( + const CollectionSchema &schema, bool enable_mmap, + uint32_t id_map_path_suffix, uint32_t delete_snapshot_path_suffix, + uint32_t next_segment_id, + const std::vector &persisted_segments, + const SegmentMeta::Ptr &writing_segment, std::vector *output) { + BinaryWriter payload_writer; + + // Scalar fields + payload_writer.PutBool(enable_mmap); + payload_writer.PutUint32(id_map_path_suffix); + payload_writer.PutUint32(delete_snapshot_path_suffix); + payload_writer.PutUint32(next_segment_id); + + // Schema + WriteCollectionSchema(&payload_writer, schema); + + // Persisted segments + payload_writer.PutUint32(static_cast(persisted_segments.size())); + for (const auto &seg : persisted_segments) { + WriteSegmentMeta(&payload_writer, *seg); + } + + // Writing segment + bool has_writing = (writing_segment != nullptr); + payload_writer.PutBool(has_writing); + if (has_writing) { + WriteSegmentMeta(&payload_writer, *writing_segment); + } + + // Build final output: header + payload + uint32_t payload_length = static_cast(payload_writer.size()); + uint32_t payload_crc = + CRC32::Compute(payload_writer.data(), payload_writer.size()); + + BinaryWriter header_writer; + header_writer.PutUint32(MAGIC); + header_writer.PutUint32(FORMAT_VERSION); + header_writer.PutUint32(payload_length); + header_writer.PutUint32(payload_crc); + + output->clear(); + output->reserve(header_writer.size() + payload_writer.size()); + output->insert(output->end(), header_writer.buffer().begin(), + header_writer.buffer().end()); + output->insert(output->end(), payload_writer.buffer().begin(), + payload_writer.buffer().end()); + + return Status::OK(); +} + +Status ManifestSerializer::Deserialize( + const uint8_t *data, size_t size, CollectionSchema *schema, + bool *enable_mmap, uint32_t *id_map_path_suffix, + uint32_t *delete_snapshot_path_suffix, uint32_t *next_segment_id, + std::vector *persisted_segments, + SegmentMeta::Ptr *writing_segment) { + if (size < HEADER_SIZE) { + return Status::InternalError("Manifest file too small"); + } + + BinaryReader header_reader(data, HEADER_SIZE); + + uint32_t magic, version, payload_length, expected_crc; + header_reader.GetUint32(&magic); + header_reader.GetUint32(&version); + header_reader.GetUint32(&payload_length); + header_reader.GetUint32(&expected_crc); + + if (magic != MAGIC) { + return Status::InternalError("Invalid manifest magic number"); + } + + if (version != FORMAT_VERSION) { + return Status::InternalError("Unsupported manifest format version: ", + std::to_string(version)); + } + + if (HEADER_SIZE + payload_length > size) { + return Status::InternalError("Manifest payload truncated"); + } + + const uint8_t *payload_data = data + HEADER_SIZE; + uint32_t actual_crc = CRC32::Compute(payload_data, payload_length); + if (actual_crc != expected_crc) { + return Status::InternalError("Manifest CRC32 checksum mismatch"); + } + + BinaryReader reader(payload_data, payload_length); + + // Scalar fields + if (!reader.GetBool(enable_mmap) || !reader.GetUint32(id_map_path_suffix) || + !reader.GetUint32(delete_snapshot_path_suffix) || + !reader.GetUint32(next_segment_id)) { + return Status::InternalError("Failed to read manifest scalar fields"); + } + + // Schema + if (!ReadCollectionSchema(&reader, schema)) { + return Status::InternalError("Failed to read manifest schema"); + } + + // Persisted segments + uint32_t segment_count; + if (!reader.GetUint32(&segment_count)) { + return Status::InternalError("Failed to read persisted segment count"); + } + persisted_segments->clear(); + for (uint32_t i = 0; i < segment_count; ++i) { + bool seg_ok = true; + auto seg = ReadSegmentMeta(&reader, &seg_ok); + if (!seg_ok) { + return Status::InternalError("Failed to read persisted segment meta"); + } + persisted_segments->push_back(seg); + } + + // Writing segment + bool has_writing; + if (!reader.GetBool(&has_writing)) { + return Status::InternalError("Failed to read writing segment flag"); + } + if (has_writing) { + bool seg_ok = true; + *writing_segment = ReadSegmentMeta(&reader, &seg_ok); + if (!seg_ok) { + return Status::InternalError("Failed to read writing segment meta"); + } + } else { + *writing_segment = nullptr; + } + + return Status::OK(); +} + +} // namespace zvec diff --git a/src/db/index/common/manifest_serializer.h b/src/db/index/common/manifest_serializer.h new file mode 100644 index 000000000..26df841fd --- /dev/null +++ b/src/db/index/common/manifest_serializer.h @@ -0,0 +1,92 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include "db/common/binary_codec.h" +#include "db/index/common/meta.h" + +namespace zvec { + +// File format: +// [Magic: 4 bytes "ZVEC"] +// [Format Version: uint32] +// [Payload Length: uint32] +// [CRC32 of Payload: uint32] +// [Payload: variable length] +// +// Payload layout (all little-endian): +// enable_mmap: uint8 +// id_map_path_suffix: uint32 +// delete_snapshot_path_suffix: uint32 +// next_segment_id: uint32 +// schema: CollectionSchema (see below) +// persisted_segment_count: uint32 +// persisted_segments: SegmentMeta[] (see below) +// has_writing_segment: uint8 +// writing_segment: SegmentMeta (if has_writing_segment) + +class ManifestSerializer { + public: + static constexpr uint32_t MAGIC = 0x4345565A; // "ZVEC" in little-endian + static constexpr uint32_t FORMAT_VERSION = 1; + static constexpr size_t HEADER_SIZE = 16; // magic + version + length + crc + + // Serialize a manifest to binary data + static Status Serialize( + const CollectionSchema &schema, bool enable_mmap, + uint32_t id_map_path_suffix, uint32_t delete_snapshot_path_suffix, + uint32_t next_segment_id, + const std::vector &persisted_segments, + const SegmentMeta::Ptr &writing_segment, std::vector *output); + + // Deserialize binary data to manifest fields + static Status Deserialize(const uint8_t *data, size_t size, + CollectionSchema *schema, bool *enable_mmap, + uint32_t *id_map_path_suffix, + uint32_t *delete_snapshot_path_suffix, + uint32_t *next_segment_id, + std::vector *persisted_segments, + SegmentMeta::Ptr *writing_segment); + + private: + // IndexParams serialization + static void WriteIndexParams(BinaryWriter *writer, const IndexParams *params); + static IndexParams::Ptr ReadIndexParams(BinaryReader *reader, bool *ok); + + // FieldSchema serialization + static void WriteFieldSchema(BinaryWriter *writer, const FieldSchema &field); + static FieldSchema::Ptr ReadFieldSchema(BinaryReader *reader, bool *ok); + + // CollectionSchema serialization + static void WriteCollectionSchema(BinaryWriter *writer, + const CollectionSchema &schema); + static bool ReadCollectionSchema(BinaryReader *reader, + CollectionSchema *schema); + + // BlockMeta serialization + static void WriteBlockMeta(BinaryWriter *writer, const BlockMeta &meta); + static bool ReadBlockMeta(BinaryReader *reader, BlockMeta *meta); + + // SegmentMeta serialization + static void WriteSegmentMeta(BinaryWriter *writer, const SegmentMeta &meta); + static SegmentMeta::Ptr ReadSegmentMeta(BinaryReader *reader, bool *ok); +}; + +} // namespace zvec diff --git a/src/db/index/common/meta.h b/src/db/index/common/meta.h index ae0ba3df8..51471f15e 100644 --- a/src/db/index/common/meta.h +++ b/src/db/index/common/meta.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/src/db/index/common/proto_converter.cc b/src/db/index/common/proto_converter.cc deleted file mode 100644 index 46eb93f5a..000000000 --- a/src/db/index/common/proto_converter.cc +++ /dev/null @@ -1,318 +0,0 @@ -// Copyright 2025-present the zvec project -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "proto_converter.h" - -namespace zvec { - -HnswIndexParams::OPtr ProtoConverter::FromPb( - const proto::HnswIndexParams ¶ms_pb) { - auto params = std::make_shared( - MetricTypeCodeBook::Get(params_pb.base().metric_type()), params_pb.m(), - params_pb.ef_construction(), - QuantizeTypeCodeBook::Get(params_pb.base().quantize_type())); - - return params; -} - -proto::HnswIndexParams ProtoConverter::ToPb(const HnswIndexParams *params) { - proto::HnswIndexParams params_pb; - params_pb.mutable_base()->set_metric_type( - MetricTypeCodeBook::Get(params->metric_type())); - params_pb.mutable_base()->set_quantize_type( - QuantizeTypeCodeBook::Get(params->quantize_type())); - params_pb.set_ef_construction(params->ef_construction()); - params_pb.set_m(params->m()); - return params_pb; -} - -// HnswRabitqIndexParams -HnswRabitqIndexParams::OPtr ProtoConverter::FromPb( - const proto::HnswRabitqIndexParams ¶ms_pb) { - auto params = std::make_shared( - MetricTypeCodeBook::Get(params_pb.base().metric_type()), - params_pb.total_bits(), params_pb.num_clusters(), params_pb.m(), - params_pb.ef_construction(), params_pb.sample_count()); - - return params; -} - -proto::HnswRabitqIndexParams ProtoConverter::ToPb( - const HnswRabitqIndexParams *params) { - proto::HnswRabitqIndexParams params_pb; - params_pb.mutable_base()->set_metric_type( - MetricTypeCodeBook::Get(params->metric_type())); - params_pb.mutable_base()->set_quantize_type( - QuantizeTypeCodeBook::Get(params->quantize_type())); - params_pb.set_m(params->m()); - params_pb.set_ef_construction(params->ef_construction()); - params_pb.set_total_bits(params->total_bits()); - params_pb.set_num_clusters(params->num_clusters()); - params_pb.set_sample_count(params->sample_count()); - return params_pb; -} - -// FlatIndexParams -FlatIndexParams::OPtr ProtoConverter::FromPb( - const proto::FlatIndexParams ¶ms_pb) { - return std::make_shared( - MetricTypeCodeBook::Get(params_pb.base().metric_type()), - QuantizeTypeCodeBook::Get(params_pb.base().quantize_type())); -} - -proto::FlatIndexParams ProtoConverter::ToPb(const FlatIndexParams *params) { - proto::FlatIndexParams params_pb; - params_pb.mutable_base()->set_metric_type( - MetricTypeCodeBook::Get(params->metric_type())); - params_pb.mutable_base()->set_quantize_type( - QuantizeTypeCodeBook::Get(params->quantize_type())); - return params_pb; -} - -// IVFIndexParams -IVFIndexParams::OPtr ProtoConverter::FromPb( - const proto::IVFIndexParams ¶ms_pb) { - return std::make_shared( - MetricTypeCodeBook::Get(params_pb.base().metric_type()), - params_pb.n_list(), params_pb.n_iters(), params_pb.use_soar(), - QuantizeTypeCodeBook::Get(params_pb.base().quantize_type())); -} - -proto::IVFIndexParams ProtoConverter::ToPb(const IVFIndexParams *params) { - proto::IVFIndexParams params_pb; - params_pb.mutable_base()->set_metric_type( - MetricTypeCodeBook::Get(params->metric_type())); - params_pb.mutable_base()->set_quantize_type( - QuantizeTypeCodeBook::Get(params->quantize_type())); - params_pb.set_n_list(params->n_list()); - params_pb.set_n_iters(params->n_iters()); - params_pb.set_use_soar(params->use_soar()); - return params_pb; -} - -// InvertIndexParams -InvertIndexParams::OPtr ProtoConverter::FromPb( - const proto::InvertIndexParams ¶ms_pb) { - auto params = std::make_shared( - params_pb.enable_range_optimization()); - - return params; -} - -proto::InvertIndexParams ProtoConverter::ToPb(const InvertIndexParams *params) { - proto::InvertIndexParams params_pb; - params_pb.set_enable_range_optimization(params->enable_range_optimization()); - return params_pb; -} - -// FieldSchema -FieldSchema::Ptr ProtoConverter::FromPb(const proto::FieldSchema &schema_pb) { - auto schema = std::make_shared(); - - schema->set_name(schema_pb.name()); - schema->set_data_type(DataTypeCodeBook::Get(schema_pb.data_type())); - schema->set_dimension(schema_pb.dimension()); - schema->set_nullable(schema_pb.nullable()); - if (schema_pb.has_index_params()) { - schema->set_index_params(ProtoConverter::FromPb(schema_pb.index_params())); - } - return schema; -} -proto::FieldSchema ProtoConverter::ToPb(const FieldSchema &schema) { - proto::FieldSchema schema_pb; - - schema_pb.set_name(schema.name()); - schema_pb.set_data_type(DataTypeCodeBook::Get(schema.data_type())); - schema_pb.set_dimension(schema.dimension()); - schema_pb.set_nullable(schema.nullable()); - auto index_params = schema.index_params(); - if (index_params) { - auto index_params_pb = schema_pb.mutable_index_params(); - index_params_pb->MergeFrom(ProtoConverter::ToPb(index_params.get())); - } - return schema_pb; -} - -// CollectionSchema -CollectionSchema::Ptr ProtoConverter::FromPb( - const proto::CollectionSchema &schema_pb) { - CollectionSchema::Ptr schema = std::make_shared(); - - schema->set_name(schema_pb.name()); - - for (auto &column_schema_pb : schema_pb.fields()) { - FieldSchema::Ptr column_schema = ProtoConverter::FromPb(column_schema_pb); - schema->add_field(column_schema); - } - - schema->set_max_doc_count_per_segment(schema_pb.max_doc_count_per_segment()); - - return schema; -} - -proto::CollectionSchema ProtoConverter::ToPb(const CollectionSchema &schema) { - proto::CollectionSchema schema_pb; - schema_pb.set_name(schema.name()); - for (auto &column_schema : schema.fields()) { - proto::FieldSchema *column_schema_pb = schema_pb.add_fields(); - column_schema_pb->MergeFrom(ProtoConverter::ToPb(*column_schema)); - } - - schema_pb.set_max_doc_count_per_segment(schema.max_doc_count_per_segment()); - - return schema_pb; -} - -IndexParams::Ptr ProtoConverter::FromPb(const proto::IndexParams ¶ms_pb) { - if (params_pb.has_hnsw()) { - return ProtoConverter::FromPb(params_pb.hnsw()); - } else if (params_pb.has_invert()) { - return ProtoConverter::FromPb(params_pb.invert()); - } else if (params_pb.has_ivf()) { - return ProtoConverter::FromPb(params_pb.ivf()); - } else if (params_pb.has_flat()) { - return ProtoConverter::FromPb(params_pb.flat()); - } else if (params_pb.has_hnsw_rabitq()) { - return ProtoConverter::FromPb(params_pb.hnsw_rabitq()); - } - - return nullptr; -} - -// BlockMeta -BlockMeta::Ptr ProtoConverter::FromPb(const proto::BlockMeta &meta_pb) { - auto block_meta = std::make_shared(); - - block_meta->set_id(meta_pb.block_id()); - block_meta->set_type(BlockTypeCodeBook::Get(meta_pb.block_type())); - block_meta->set_min_doc_id(meta_pb.min_doc_id()); - block_meta->set_max_doc_id(meta_pb.max_doc_id()); - block_meta->set_doc_count(meta_pb.doc_count()); - for (auto &column : meta_pb.columns()) { - block_meta->add_column(column); - } - - return block_meta; -} - -proto::IndexParams ProtoConverter::ToPb(const IndexParams *params) { - proto::IndexParams params_pb; - - switch (params->type()) { - case IndexType::INVERT: { - auto invert_params = dynamic_cast(params); - if (invert_params) { - params_pb.mutable_invert()->CopyFrom( - ProtoConverter::ToPb(invert_params)); - } - break; - } - case IndexType::HNSW: { - auto hnsw_params = dynamic_cast(params); - if (hnsw_params) { - params_pb.mutable_hnsw()->CopyFrom(ProtoConverter::ToPb(hnsw_params)); - } - break; - } - case IndexType::IVF: { - auto ivf_params = dynamic_cast(params); - if (ivf_params) { - params_pb.mutable_ivf()->CopyFrom(ProtoConverter::ToPb(ivf_params)); - } - break; - } - case IndexType::FLAT: { - auto flat_params = dynamic_cast(params); - if (flat_params) { - params_pb.mutable_flat()->CopyFrom(ProtoConverter::ToPb(flat_params)); - } - break; - } - case IndexType::HNSW_RABITQ: { - auto hnsw_rabitq_params = - dynamic_cast(params); - if (hnsw_rabitq_params) { - params_pb.mutable_hnsw_rabitq()->CopyFrom( - ProtoConverter::ToPb(hnsw_rabitq_params)); - } - } - default: - break; - } - - return params_pb; -} - -proto::BlockMeta ProtoConverter::ToPb(const BlockMeta &meta) { - proto::BlockMeta meta_pb; - meta_pb.set_block_id(meta.id()); - meta_pb.set_block_type(BlockTypeCodeBook::Get(meta.type())); - meta_pb.set_min_doc_id(meta.min_doc_id()); - meta_pb.set_max_doc_id(meta.max_doc_id()); - meta_pb.set_doc_count(meta.doc_count()); - for (auto &column : meta.columns()) { - meta_pb.add_columns(column); - } - - return meta_pb; -} - -// SegmentMeta -SegmentMeta::Ptr ProtoConverter::FromPb(const proto::SegmentMeta &meta_pb) { - auto meta = std::make_shared(meta_pb.segment_id()); - - auto persisted_blocks = meta_pb.persisted_blocks(); - - for (auto &persisted_block_pb : persisted_blocks) { - BlockMeta::Ptr persisted_block = ProtoConverter::FromPb(persisted_block_pb); - meta->add_persisted_block(*persisted_block); - } - - if (meta_pb.has_writing_forward_block()) { - meta->set_writing_forward_block( - *ProtoConverter::FromPb(meta_pb.writing_forward_block())); - } - - auto indexed_vector_fields = meta_pb.indexed_vector_fields(); - for (auto &indexed_vector_field : indexed_vector_fields) { - meta->add_indexed_vector_field(indexed_vector_field); - } - - return meta; -} - -proto::SegmentMeta ProtoConverter::ToPb(const SegmentMeta &meta) { - proto::SegmentMeta meta_pb; - meta_pb.set_segment_id(meta.id()); - - auto persisted_blocks = meta.persisted_blocks(); - for (auto &persisted_block : persisted_blocks) { - auto persisted_block_pb = ProtoConverter::ToPb(persisted_block); - meta_pb.add_persisted_blocks()->MergeFrom(persisted_block_pb); - } - - if (meta.has_writing_forward_block()) { - meta_pb.mutable_writing_forward_block()->MergeFrom( - ProtoConverter::ToPb(meta.writing_forward_block().value())); - } - - auto indexed_vector_fields = meta.indexed_vector_fields(); - for (auto &field : indexed_vector_fields) { - meta_pb.add_indexed_vector_fields(field); - } - - return meta_pb; -} - -} // namespace zvec \ No newline at end of file diff --git a/src/db/index/common/proto_converter.h b/src/db/index/common/proto_converter.h deleted file mode 100644 index ad96007a4..000000000 --- a/src/db/index/common/proto_converter.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2025-present the zvec project -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include -#include -#include "db/index/common/meta.h" - -namespace zvec { - -struct ProtoConverter { - // HnswIndexParams - static HnswIndexParams::OPtr FromPb(const proto::HnswIndexParams ¶ms_pb); - - static proto::HnswIndexParams ToPb(const HnswIndexParams *params); - - // HnswRabitqIndexParams - static HnswRabitqIndexParams::OPtr FromPb( - const proto::HnswRabitqIndexParams ¶ms_pb); - static proto::HnswRabitqIndexParams ToPb(const HnswRabitqIndexParams *params); - - // FlatIndexParams - static FlatIndexParams::OPtr FromPb(const proto::FlatIndexParams ¶ms_pb); - static proto::FlatIndexParams ToPb(const FlatIndexParams *params); - - // IVFIndexParams - static IVFIndexParams::OPtr FromPb(const proto::IVFIndexParams ¶ms_pb); - static proto::IVFIndexParams ToPb(const IVFIndexParams *params); - - // InvertIndexParams - static InvertIndexParams::OPtr FromPb( - const proto::InvertIndexParams ¶ms_pb); - static proto::InvertIndexParams ToPb(const InvertIndexParams *params); - - // IndexParams - static IndexParams::Ptr FromPb(const proto::IndexParams ¶ms_pb); - static proto::IndexParams ToPb(const IndexParams *params); - - // FieldSchema - static FieldSchema::Ptr FromPb(const proto::FieldSchema &field_pb); - static proto::FieldSchema ToPb(const FieldSchema &field); - - // CollectionSchema - static CollectionSchema::Ptr FromPb(const proto::CollectionSchema &schema_pb); - static proto::CollectionSchema ToPb(const CollectionSchema &schema); - - // BlockMeta - static BlockMeta::Ptr FromPb(const proto::BlockMeta &meta_pb); - static proto::BlockMeta ToPb(const BlockMeta &meta); - - // SegmentMeta - static SegmentMeta::Ptr FromPb(const proto::SegmentMeta &meta_pb); - static proto::SegmentMeta ToPb(const SegmentMeta &meta); -}; - -} // namespace zvec \ No newline at end of file diff --git a/src/db/index/common/type_helper.h b/src/db/index/common/type_helper.h index 33d2ee344..8a117cd6b 100644 --- a/src/db/index/common/type_helper.h +++ b/src/db/index/common/type_helper.h @@ -14,53 +14,14 @@ #pragma once +#include +#include #include #include -#include "proto/zvec.pb.h" namespace zvec { -//! Index Type Codebook struct IndexTypeCodeBook { - //! convert protobuf IndexType to C++ IndexType - static IndexType Get(proto::IndexType type) { - switch (type) { - case proto::IT_HNSW: - return IndexType::HNSW; - case proto::IT_HNSW_RABITQ: - return IndexType::HNSW_RABITQ; - case proto::IT_FLAT: - return IndexType::FLAT; - case proto::IT_IVF: - return IndexType::IVF; - case proto::IT_INVERT: - return IndexType::INVERT; - default: - break; - } - return IndexType::UNDEFINED; - } - - //! Convert C++ IndexType to protobuf IndexType - static proto::IndexType Get(IndexType type) { - switch (type) { - case IndexType::HNSW: - return proto::IT_HNSW; - case IndexType::HNSW_RABITQ: - return proto::IT_HNSW_RABITQ; - case IndexType::FLAT: - return proto::IT_FLAT; - case IndexType::IVF: - return proto::IT_IVF; - case IndexType::INVERT: - return proto::IT_INVERT; - default: - break; - } - return proto::IT_UNDEFINED; - } - - //! Convert C++ IndexType to C++ String static std::string AsString(IndexType type) { switch (type) { case IndexType::HNSW: @@ -74,331 +35,83 @@ struct IndexTypeCodeBook { case IndexType::INVERT: return "INVERT"; default: - break; + return "UNDEFINED"; } - return "UNDEFINED"; } }; struct DataTypeCodeBook { - static bool IsArrayType(proto::DataType type) { - return proto::DataType::DT_ARRAY_BINARY <= type && - type <= proto::DataType::DT_ARRAY_DOUBLE; - } - - static DataType Get(proto::DataType type) { - DataType data_types = DataType::UNDEFINED; - switch (type) { - case proto::DataType::DT_BINARY: - data_types = DataType::BINARY; - break; - case proto::DataType::DT_STRING: - data_types = DataType::STRING; - break; - case proto::DataType::DT_BOOL: - data_types = DataType::BOOL; - break; - case proto::DataType::DT_INT32: - data_types = DataType::INT32; - break; - case proto::DataType::DT_INT64: - data_types = DataType::INT64; - break; - case proto::DataType::DT_UINT32: - data_types = DataType::UINT32; - break; - case proto::DataType::DT_UINT64: - data_types = DataType::UINT64; - break; - case proto::DataType::DT_FLOAT: - data_types = DataType::FLOAT; - break; - case proto::DataType::DT_DOUBLE: - data_types = DataType::DOUBLE; - break; - case proto::DataType::DT_VECTOR_BINARY32: - data_types = DataType::VECTOR_BINARY32; - break; - case proto::DataType::DT_VECTOR_BINARY64: - data_types = DataType::VECTOR_BINARY64; - break; - case proto::DataType::DT_VECTOR_FP16: - data_types = DataType::VECTOR_FP16; - break; - case proto::DataType::DT_VECTOR_FP32: - data_types = DataType::VECTOR_FP32; - break; - case proto::DataType::DT_VECTOR_FP64: - data_types = DataType::VECTOR_FP64; - break; - case proto::DataType::DT_VECTOR_INT4: - data_types = DataType::VECTOR_INT4; - break; - case proto::DataType::DT_VECTOR_INT8: - data_types = DataType::VECTOR_INT8; - break; - case proto::DataType::DT_VECTOR_INT16: - data_types = DataType::VECTOR_INT16; - break; - case proto::DataType::DT_SPARSE_VECTOR_FP16: - data_types = DataType::SPARSE_VECTOR_FP16; - break; - case proto::DataType::DT_SPARSE_VECTOR_FP32: - data_types = DataType::SPARSE_VECTOR_FP32; - break; - case proto::DataType::DT_ARRAY_BINARY: - data_types = DataType::ARRAY_BINARY; - break; - case proto::DataType::DT_ARRAY_STRING: - data_types = DataType::ARRAY_STRING; - break; - case proto::DataType::DT_ARRAY_BOOL: - data_types = DataType::ARRAY_BOOL; - break; - case proto::DataType::DT_ARRAY_INT32: - data_types = DataType::ARRAY_INT32; - break; - case proto::DataType::DT_ARRAY_INT64: - data_types = DataType::ARRAY_INT64; - break; - case proto::DataType::DT_ARRAY_UINT32: - data_types = DataType::ARRAY_UINT32; - break; - case proto::DataType::DT_ARRAY_UINT64: - data_types = DataType::ARRAY_UINT64; - break; - case proto::DataType::DT_ARRAY_FLOAT: - data_types = DataType::ARRAY_FLOAT; - break; - case proto::DataType::DT_ARRAY_DOUBLE: - data_types = DataType::ARRAY_DOUBLE; - break; - - default: - break; - } - return data_types; - } - - static proto::DataType Get(const DataType type) { - proto::DataType data_type = proto::DataType::DT_UNDEFINED; - switch (type) { - case DataType::BINARY: - data_type = proto::DataType::DT_BINARY; - break; - case DataType::STRING: - data_type = proto::DataType::DT_STRING; - break; - case DataType::BOOL: - data_type = proto::DataType::DT_BOOL; - break; - case DataType::INT32: - data_type = proto::DataType::DT_INT32; - break; - case DataType::INT64: - data_type = proto::DataType::DT_INT64; - break; - case DataType::UINT32: - data_type = proto::DataType::DT_UINT32; - break; - case DataType::UINT64: - data_type = proto::DataType::DT_UINT64; - break; - case DataType::FLOAT: - data_type = proto::DataType::DT_FLOAT; - break; - case DataType::DOUBLE: - data_type = proto::DataType::DT_DOUBLE; - break; - case DataType::VECTOR_BINARY32: - data_type = proto::DataType::DT_VECTOR_BINARY32; - break; - case DataType::VECTOR_BINARY64: - data_type = proto::DataType::DT_VECTOR_BINARY64; - break; - case DataType::VECTOR_FP16: - data_type = proto::DataType::DT_VECTOR_FP16; - break; - case DataType::VECTOR_FP32: - data_type = proto::DataType::DT_VECTOR_FP32; - break; - case DataType::VECTOR_FP64: - data_type = proto::DataType::DT_VECTOR_FP64; - break; - case DataType::VECTOR_INT4: - data_type = proto::DataType::DT_VECTOR_INT4; - break; - case DataType::VECTOR_INT8: - data_type = proto::DataType::DT_VECTOR_INT8; - break; - case DataType::VECTOR_INT16: - data_type = proto::DataType::DT_VECTOR_INT16; - break; - case DataType::SPARSE_VECTOR_FP16: - data_type = proto::DataType::DT_SPARSE_VECTOR_FP16; - break; - case DataType::SPARSE_VECTOR_FP32: - data_type = proto::DataType::DT_SPARSE_VECTOR_FP32; - break; - case DataType::ARRAY_BINARY: - data_type = proto::DataType::DT_ARRAY_BINARY; - break; - case DataType::ARRAY_BOOL: - data_type = proto::DataType::DT_ARRAY_BOOL; - break; - case DataType::ARRAY_DOUBLE: - data_type = proto::DataType::DT_ARRAY_DOUBLE; - break; - case DataType::ARRAY_FLOAT: - data_type = proto::DataType::DT_ARRAY_FLOAT; - break; - case DataType::ARRAY_INT32: - data_type = proto::DataType::DT_ARRAY_INT32; - break; - case DataType::ARRAY_INT64: - data_type = proto::DataType::DT_ARRAY_INT64; - break; - case DataType::ARRAY_STRING: - data_type = proto::DataType::DT_ARRAY_STRING; - break; - case DataType::ARRAY_UINT32: - data_type = proto::DataType::DT_ARRAY_UINT32; - break; - case DataType::ARRAY_UINT64: - data_type = proto::DataType::DT_ARRAY_UINT64; - break; - default: - break; - } - - return data_type; + static bool IsArrayType(DataType type) { + return type >= DataType::ARRAY_BINARY && type <= DataType::ARRAY_DOUBLE; } static std::string AsString(DataType type) { - std::string data_type; - switch (type) { case DataType::BINARY: - data_type = "BINARY"; - break; + return "BINARY"; case DataType::STRING: - data_type = "STRING"; - break; + return "STRING"; case DataType::BOOL: - data_type = "BOOL"; - break; + return "BOOL"; case DataType::INT32: - data_type = "INT32"; - break; + return "INT32"; case DataType::INT64: - data_type = "INT64"; - break; + return "INT64"; case DataType::UINT32: - data_type = "UINT32"; - break; + return "UINT32"; case DataType::UINT64: - data_type = "UINT64"; - break; + return "UINT64"; case DataType::FLOAT: - data_type = "FLOAT"; - break; + return "FLOAT"; case DataType::DOUBLE: - data_type = "DOUBLE"; - break; + return "DOUBLE"; case DataType::VECTOR_BINARY32: - data_type = "VECTOR_BINARY32"; - break; + return "VECTOR_BINARY32"; case DataType::VECTOR_BINARY64: - data_type = "VECTOR_BINARY64"; - break; + return "VECTOR_BINARY64"; case DataType::VECTOR_FP16: - data_type = "VECTOR_FP16"; - break; + return "VECTOR_FP16"; case DataType::VECTOR_FP32: - data_type = "VECTOR_FP32"; - break; + return "VECTOR_FP32"; case DataType::VECTOR_FP64: - data_type = "VECTOR_FP64"; - break; + return "VECTOR_FP64"; case DataType::VECTOR_INT4: - data_type = "VECTOR_INT4"; - break; + return "VECTOR_INT4"; case DataType::VECTOR_INT8: - data_type = "VECTOR_INT8"; - break; + return "VECTOR_INT8"; case DataType::VECTOR_INT16: - data_type = "VECTOR_INT16"; - break; + return "VECTOR_INT16"; case DataType::SPARSE_VECTOR_FP16: - data_type = "SPARSE_VECTOR_FP16"; - break; + return "SPARSE_VECTOR_FP16"; case DataType::SPARSE_VECTOR_FP32: - data_type = "SPARSE_VECTOR_FP32"; - break; + return "SPARSE_VECTOR_FP32"; case DataType::ARRAY_BINARY: - data_type = "ARRAY_BINARY"; - break; + return "ARRAY_BINARY"; + case DataType::ARRAY_STRING: + return "ARRAY_STRING"; case DataType::ARRAY_BOOL: - data_type = "ARRAY_BOOL"; - break; - case DataType::ARRAY_DOUBLE: - data_type = "ARRAY_DOUBLE"; - break; - case DataType::ARRAY_FLOAT: - data_type = "ARRAY_FLOAT"; - break; + return "ARRAY_BOOL"; case DataType::ARRAY_INT32: - data_type = "ARRAY_INT32"; - break; + return "ARRAY_INT32"; case DataType::ARRAY_INT64: - data_type = "ARRAY_INT64"; - break; - case DataType::ARRAY_STRING: - data_type = "ARRAY_STRING"; - break; + return "ARRAY_INT64"; case DataType::ARRAY_UINT32: - data_type = "ARRAY_UINT32"; - break; + return "ARRAY_UINT32"; case DataType::ARRAY_UINT64: - data_type = "ARRAY_UINT64"; - break; + return "ARRAY_UINT64"; + case DataType::ARRAY_FLOAT: + return "ARRAY_FLOAT"; + case DataType::ARRAY_DOUBLE: + return "ARRAY_DOUBLE"; default: - break; + return ""; } - - return data_type; } static core::IndexMeta::DataType to_data_type(DataType type); }; struct MetricTypeCodeBook { - static MetricType Get(proto::MetricType type) { - switch (type) { - case proto::MetricType::MT_IP: - return MetricType::IP; - case proto::MetricType::MT_L2: - return MetricType::L2; - case proto::MetricType::MT_COSINE: - return MetricType::COSINE; - default: - return MetricType::UNDEFINED; - } - } - - static proto::MetricType Get(MetricType type) { - switch (type) { - case MetricType::IP: - return proto::MetricType::MT_IP; - case MetricType::L2: - return proto::MetricType::MT_L2; - case MetricType::COSINE: - return proto::MetricType::MT_COSINE; - default: - return proto::MetricType::MT_UNDEFINED; - } - } - static std::string AsString(MetricType type) { switch (type) { case MetricType::IP: @@ -414,36 +127,6 @@ struct MetricTypeCodeBook { }; struct QuantizeTypeCodeBook { - static QuantizeType Get(proto::QuantizeType type) { - switch (type) { - case proto::QuantizeType::QT_FP16: - return QuantizeType::FP16; - case proto::QuantizeType::QT_INT4: - return QuantizeType::INT4; - case proto::QuantizeType::QT_INT8: - return QuantizeType::INT8; - case proto::QuantizeType::QT_RABITQ: - return QuantizeType::RABITQ; - default: - return QuantizeType::UNDEFINED; - } - } - - static proto::QuantizeType Get(QuantizeType type) { - switch (type) { - case QuantizeType::FP16: - return proto::QuantizeType::QT_FP16; - case QuantizeType::INT4: - return proto::QuantizeType::QT_INT4; - case QuantizeType::INT8: - return proto::QuantizeType::QT_INT8; - case QuantizeType::RABITQ: - return proto::QuantizeType::QT_RABITQ; - default: - return proto::QuantizeType::QT_UNDEFINED; - } - } - static std::string AsString(QuantizeType type) { switch (type) { case QuantizeType::FP16: @@ -469,49 +152,6 @@ struct QuantizeTypeCodeBook { }; struct BlockTypeCodeBook { - static BlockType Get(proto::BlockType type) { - BlockType block_types = BlockType::UNDEFINED; - switch (type) { - case proto::BlockType::BT_SCALAR: - block_types = BlockType::SCALAR; - break; - case proto::BlockType::BT_SCALAR_INDEX: - block_types = BlockType::SCALAR_INDEX; - break; - case proto::BlockType::BT_VECTOR_INDEX: - block_types = BlockType::VECTOR_INDEX; - break; - case proto::BlockType::BT_VECTOR_INDEX_QUANTIZE: - block_types = BlockType::VECTOR_INDEX_QUANTIZE; - break; - default: - break; - } - return block_types; - } - - static proto::BlockType Get(BlockType type) { - proto::BlockType block_types = proto::BlockType::BT_UNDEFINED; - switch (type) { - case BlockType::SCALAR: - block_types = proto::BlockType::BT_SCALAR; - break; - case BlockType::SCALAR_INDEX: - block_types = proto::BlockType::BT_SCALAR_INDEX; - break; - case BlockType::VECTOR_INDEX: - block_types = proto::BlockType::BT_VECTOR_INDEX; - break; - case BlockType::VECTOR_INDEX_QUANTIZE: - block_types = proto::BlockType::BT_VECTOR_INDEX_QUANTIZE; - break; - default: - break; - } - - return block_types; - } - static std::string AsString(BlockType type) { switch (type) { case BlockType::SCALAR: diff --git a/src/db/index/common/version_manager.cc b/src/db/index/common/version_manager.cc index 47be9cb70..6b15ede15 100644 --- a/src/db/index/common/version_manager.cc +++ b/src/db/index/common/version_manager.cc @@ -21,59 +21,76 @@ #include #include #include -#include #include #include #include #include #include "db/common/file_helper.h" #include "db/common/typedef.h" -#include "db/index/common/proto_converter.h" -#include "db/index/common/type_helper.h" +#include "db/index/common/manifest_serializer.h" namespace zvec { Status Version::Load(const std::string &path, Version *version) { - std::ifstream ifs(path, std::ios::binary); + std::ifstream ifs(path, std::ios::binary | std::ios::ate); if (!ifs.is_open()) { LOG_ERROR("Failed to open file: %s", path.c_str()); return Status::InternalError("Failed to open file"); } - proto::Manifest manifest; + auto file_size = ifs.tellg(); + ifs.seekg(0, std::ios::beg); - if (!manifest.ParseFromIstream(&ifs)) { - LOG_ERROR("Failed to parse manifest from file: %s", path.c_str()); - return Status::InternalError("Failed to parse manifest"); + std::vector buffer(file_size); + if (!ifs.read(reinterpret_cast(buffer.data()), file_size)) { + LOG_ERROR("Failed to read manifest file: %s", path.c_str()); + return Status::InternalError("Failed to read manifest file"); } - CollectionSchema::Ptr schema = ProtoConverter::FromPb(manifest.schema()); - version->set_schema(*schema); - - version->set_enable_mmap(manifest.enable_mmap()); + CollectionSchema schema; + bool enable_mmap = false; + uint32_t id_map_suffix = 0; + uint32_t delete_suffix = 0; + uint32_t next_seg_id = 0; + std::vector persisted_segments; + SegmentMeta::Ptr writing_segment; + + auto s = ManifestSerializer::Deserialize( + buffer.data(), buffer.size(), &schema, &enable_mmap, &id_map_suffix, + &delete_suffix, &next_seg_id, &persisted_segments, &writing_segment); + if (!s.ok()) { + LOG_ERROR("Failed to parse manifest from file: %s, err: %s", path.c_str(), + s.message().c_str()); + return s; + } - for (int i = 0; i < manifest.persisted_segment_metas_size(); ++i) { - SegmentMeta::Ptr meta = - ProtoConverter::FromPb(manifest.persisted_segment_metas(i)); + version->set_schema(schema); + version->set_enable_mmap(enable_mmap); + for (auto &meta : persisted_segments) { version->add_persisted_segment_meta(meta); } - - if (manifest.has_writing_segment_meta()) { - SegmentMeta::Ptr meta = - ProtoConverter::FromPb(manifest.writing_segment_meta()); - version->reset_writing_segment_meta(meta); + if (writing_segment) { + version->reset_writing_segment_meta(writing_segment); } - - version->set_id_map_path_suffix(manifest.id_map_path_suffix()); - version->set_delete_snapshot_path_suffix( - manifest.delete_snapshot_path_suffix()); - - version->set_next_segment_id(manifest.next_segment_id()); + version->set_id_map_path_suffix(id_map_suffix); + version->set_delete_snapshot_path_suffix(delete_suffix); + version->set_next_segment_id(next_seg_id); return Status::OK(); } Status Version::Save(const std::string &path, const Version &version) { + std::vector buffer; + auto s = ManifestSerializer::Serialize( + version.schema(), version.enable_mmap(), version.id_map_path_suffix(), + version.delete_snapshot_path_suffix(), version.next_segment_id(), + version.persisted_segment_metas(), version.writing_segment_meta(), + &buffer); + if (!s.ok()) { + LOG_ERROR("Failed to serialize manifest: %s", s.message().c_str()); + return s; + } + std::ofstream ofs(path, std::ios::binary); if (!ofs.is_open()) { LOG_ERROR("Failed to open file: %s, err: %s", path.c_str(), @@ -81,33 +98,10 @@ Status Version::Save(const std::string &path, const Version &version) { return Status::InternalError("Failed to open file: %s", path.c_str()); } - proto::Manifest manifest; - - // set schema - auto schema = ProtoConverter::ToPb(version.schema()); - manifest.mutable_schema()->Swap(&schema); - - manifest.set_enable_mmap(version.enable_mmap()); - - // set segments meta - for (auto &meta : version.persisted_segment_metas()) { - auto meta_pb = ProtoConverter::ToPb(*meta); - manifest.add_persisted_segment_metas()->Swap(&meta_pb); - } - - if (version.writing_segment_meta()) { - auto meta_pb = ProtoConverter::ToPb(*version.writing_segment_meta()); - manifest.mutable_writing_segment_meta()->Swap(&meta_pb); - } - - manifest.set_id_map_path_suffix(version.id_map_path_suffix()); - manifest.set_delete_snapshot_path_suffix( - version.delete_snapshot_path_suffix()); - manifest.set_next_segment_id(version.next_segment_id()); - - if (!manifest.SerializeToOstream(&ofs)) { - LOG_ERROR("Failed to serialize manifest to file: %s", path.c_str()); - return Status::InternalError("Failed to serialize manifest to file"); + if (!ofs.write(reinterpret_cast(buffer.data()), + buffer.size())) { + LOG_ERROR("Failed to write manifest to file: %s", path.c_str()); + return Status::InternalError("Failed to write manifest to file"); } return Status::OK(); @@ -115,7 +109,8 @@ Status Version::Save(const std::string &path, const Version &version) { std::string Version::to_string() const { std::ostringstream oss; - oss << "Version{" << "schema:" << (schema_ ? schema_->to_string() : "null") + oss << "Version{" + << "schema:" << (schema_ ? schema_->to_string() : "null") << ",persisted_segment_metas:["; size_t i = 0; diff --git a/src/db/proto/zvec.proto b/src/db/proto/zvec.proto deleted file mode 100644 index 3c9d33319..000000000 --- a/src/db/proto/zvec.proto +++ /dev/null @@ -1,189 +0,0 @@ -syntax = "proto3"; - -package zvec.proto; - -option cc_enable_arenas = true; - -// The Go package name, refers to -// https://developers.google.com/protocol-buffers/docs/reference/go-generated#package -option go_package = "proxima/zvec/proto"; - -/*! Types of Data - */ -enum DataType { - DT_UNDEFINED = 0; - - DT_BINARY = 1; - DT_STRING = 2; - DT_BOOL = 3; - DT_INT32 = 4; - DT_INT64 = 5; - DT_UINT32 = 6; - DT_UINT64 = 7; - DT_FLOAT = 8; - DT_DOUBLE = 9; - - DT_VECTOR_BINARY32 = 20; - DT_VECTOR_BINARY64 = 21; - DT_VECTOR_FP16 = 22; - DT_VECTOR_FP32 = 23; - DT_VECTOR_FP64 = 24; - DT_VECTOR_INT4 = 25; - DT_VECTOR_INT8 = 26; - DT_VECTOR_INT16 = 27; - - DT_SPARSE_VECTOR_FP16 = 30; - DT_SPARSE_VECTOR_FP32 = 31; - - // ARRAY - DT_ARRAY_BINARY = 40; - DT_ARRAY_STRING = 41; - DT_ARRAY_BOOL = 42; - DT_ARRAY_INT32 = 43; - DT_ARRAY_INT64 = 44; - DT_ARRAY_UINT32 = 45; - DT_ARRAY_UINT64 = 46; - DT_ARRAY_FLOAT = 47; - DT_ARRAY_DOUBLE = 48; -}; - -enum IndexType { - // Undefined - IT_UNDEFINED = 0; - // Proxima HNSW Index - IT_HNSW = 1; - // Proxima IVF Index - IT_IVF = 2; - // Proxima FLAT Index - IT_FLAT = 3; - // Proxima HNSW RABITQ Index - IT_HNSW_RABITQ = 4; - // Invert Index - IT_INVERT = 10; -}; - -enum QuantizeType { - QT_UNDEFINED = 0; - QT_FP16 = 1; - QT_INT8 = 2; - QT_INT4 = 3; - QT_RABITQ = 4; -}; - -enum MetricType { - MT_UNDEFINED = 0; - MT_L2 = 1; - MT_IP = 2; - MT_COSINE = 3; -}; - -message InvertIndexParams { - bool enable_range_optimization = 1; -}; - -message BaseIndexParams { - MetricType metric_type = 1; - QuantizeType quantize_type = 2; -}; - -message HnswIndexParams { - BaseIndexParams base = 1; - int32 m = 2; - int32 ef_construction = 3; -} - -message HnswRabitqIndexParams { - BaseIndexParams base = 1; - int32 m = 2; - int32 ef_construction = 3; - int32 total_bits = 4; - int32 num_clusters = 5; - int32 sample_count = 6; -} - -message FlatIndexParams { - BaseIndexParams base = 1; -} - -message IVFIndexParams { - BaseIndexParams base = 1; - int32 n_list = 2; - int32 n_iters = 3; - bool use_soar = 4; -} - -message IndexParams { - oneof params { - InvertIndexParams invert = 1; - HnswIndexParams hnsw = 2; - FlatIndexParams flat = 3; - IVFIndexParams ivf = 4; - HnswRabitqIndexParams hnsw_rabitq = 5; - }; -}; - -message FieldSchema { - string name = 1; - DataType data_type = 2; - uint32 dimension = 3; - bool nullable = 4; - IndexParams index_params = 5; -}; - -message CollectionSchema { - string name = 1; - repeated FieldSchema fields = 2; - uint64 max_doc_count_per_segment = 3; -}; - -enum BlockType { - BT_UNDEFINED = 0; - BT_SCALAR = 1; - BT_SCALAR_INDEX = 2; - BT_VECTOR_INDEX = 3; - BT_VECTOR_INDEX_QUANTIZE = 4; -}; - -message BlockMeta { - uint32 block_id = 1; - BlockType block_type = 2; // for getting filename prefix - uint64 min_doc_id = 3; - uint64 max_doc_id = 4; - uint64 doc_count = 5; - repeated string columns = 6; // columns contained in this block -}; - -// message AlterColumnMeta { -// string old_column_name = 1; -// FieldSchema new_schema = 2; -// }; - -message SegmentMeta { - uint32 segment_id = 1; - // scalar data, vector data and vector index - repeated BlockMeta persisted_blocks = 2; - - BlockMeta writing_forward_block = 3; - - // if indexed, index_params can be retrieved from schema - // if not indexed, index_params is default index_params(flat) - repeated string indexed_vector_fields = 4; - // repeated AlterColumnMeta alter_columns = 10; -}; - -message Manifest { - uint32 version = 1; - - CollectionSchema schema = 2; - - bool enable_mmap = 3; - - repeated SegmentMeta persisted_segment_metas = 4; - - SegmentMeta writing_segment_meta = 5; - - uint32 id_map_path_suffix = 6; - uint32 delete_snapshot_path_suffix = 7; - - uint32 next_segment_id = 8; -}; \ No newline at end of file diff --git a/tests/db/CMakeLists.txt b/tests/db/CMakeLists.txt index 3ea846706..f66640f15 100644 --- a/tests/db/CMakeLists.txt +++ b/tests/db/CMakeLists.txt @@ -27,7 +27,6 @@ foreach(CC_SRCS ${ALL_TEST_SRCS}) cc_gmock( NAME ${CC_TARGET} STRICT LIBS zvec_db - zvec_proto core_knn_flat core_knn_flat_sparse core_knn_hnsw diff --git a/tests/db/common/binary_codec_test.cc b/tests/db/common/binary_codec_test.cc new file mode 100644 index 000000000..492996bbb --- /dev/null +++ b/tests/db/common/binary_codec_test.cc @@ -0,0 +1,196 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "db/common/binary_codec.h" +#include + +using namespace zvec; + +TEST(BinaryWriterReaderTest, Uint8RoundTrip) { + BinaryWriter writer; + writer.PutUint8(0); + writer.PutUint8(127); + writer.PutUint8(255); + + BinaryReader reader(writer.data(), writer.size()); + uint8_t val; + ASSERT_TRUE(reader.GetUint8(&val)); + EXPECT_EQ(val, 0); + ASSERT_TRUE(reader.GetUint8(&val)); + EXPECT_EQ(val, 127); + ASSERT_TRUE(reader.GetUint8(&val)); + EXPECT_EQ(val, 255); + EXPECT_EQ(reader.remaining(), 0u); +} + +TEST(BinaryWriterReaderTest, Uint32RoundTrip) { + BinaryWriter writer; + writer.PutUint32(0); + writer.PutUint32(42); + writer.PutUint32(0xFFFFFFFF); + + BinaryReader reader(writer.data(), writer.size()); + uint32_t val; + ASSERT_TRUE(reader.GetUint32(&val)); + EXPECT_EQ(val, 0u); + ASSERT_TRUE(reader.GetUint32(&val)); + EXPECT_EQ(val, 42u); + ASSERT_TRUE(reader.GetUint32(&val)); + EXPECT_EQ(val, 0xFFFFFFFF); +} + +TEST(BinaryWriterReaderTest, Uint64RoundTrip) { + BinaryWriter writer; + writer.PutUint64(0); + writer.PutUint64(123456789012345ULL); + writer.PutUint64(0xFFFFFFFFFFFFFFFFULL); + + BinaryReader reader(writer.data(), writer.size()); + uint64_t val; + ASSERT_TRUE(reader.GetUint64(&val)); + EXPECT_EQ(val, 0u); + ASSERT_TRUE(reader.GetUint64(&val)); + EXPECT_EQ(val, 123456789012345ULL); + ASSERT_TRUE(reader.GetUint64(&val)); + EXPECT_EQ(val, 0xFFFFFFFFFFFFFFFFULL); +} + +TEST(BinaryWriterReaderTest, Int32RoundTrip) { + BinaryWriter writer; + writer.PutInt32(0); + writer.PutInt32(-1); + writer.PutInt32(2147483647); + writer.PutInt32(-2147483648); + + BinaryReader reader(writer.data(), writer.size()); + int32_t val; + ASSERT_TRUE(reader.GetInt32(&val)); + EXPECT_EQ(val, 0); + ASSERT_TRUE(reader.GetInt32(&val)); + EXPECT_EQ(val, -1); + ASSERT_TRUE(reader.GetInt32(&val)); + EXPECT_EQ(val, 2147483647); + ASSERT_TRUE(reader.GetInt32(&val)); + EXPECT_EQ(val, -2147483648); +} + +TEST(BinaryWriterReaderTest, BoolRoundTrip) { + BinaryWriter writer; + writer.PutBool(true); + writer.PutBool(false); + + BinaryReader reader(writer.data(), writer.size()); + bool val; + ASSERT_TRUE(reader.GetBool(&val)); + EXPECT_TRUE(val); + ASSERT_TRUE(reader.GetBool(&val)); + EXPECT_FALSE(val); +} + +TEST(BinaryWriterReaderTest, StringRoundTrip) { + BinaryWriter writer; + writer.PutString(""); + writer.PutString("hello"); + writer.PutString("world with spaces and 特殊字符"); + + BinaryReader reader(writer.data(), writer.size()); + std::string val; + ASSERT_TRUE(reader.GetString(&val)); + EXPECT_EQ(val, ""); + ASSERT_TRUE(reader.GetString(&val)); + EXPECT_EQ(val, "hello"); + ASSERT_TRUE(reader.GetString(&val)); + EXPECT_EQ(val, "world with spaces and 特殊字符"); +} + +TEST(BinaryWriterReaderTest, MixedTypesRoundTrip) { + BinaryWriter writer; + writer.PutUint32(42); + writer.PutString("test"); + writer.PutBool(true); + writer.PutUint64(99999); + writer.PutInt32(-5); + + BinaryReader reader(writer.data(), writer.size()); + uint32_t u32; + std::string str; + bool b; + uint64_t u64; + int32_t i32; + + ASSERT_TRUE(reader.GetUint32(&u32)); + EXPECT_EQ(u32, 42u); + ASSERT_TRUE(reader.GetString(&str)); + EXPECT_EQ(str, "test"); + ASSERT_TRUE(reader.GetBool(&b)); + EXPECT_TRUE(b); + ASSERT_TRUE(reader.GetUint64(&u64)); + EXPECT_EQ(u64, 99999u); + ASSERT_TRUE(reader.GetInt32(&i32)); + EXPECT_EQ(i32, -5); + EXPECT_EQ(reader.remaining(), 0u); +} + +TEST(BinaryWriterReaderTest, ReadBeyondBuffer) { + BinaryWriter writer; + writer.PutUint8(1); + + BinaryReader reader(writer.data(), writer.size()); + uint8_t val; + ASSERT_TRUE(reader.GetUint8(&val)); + + // Buffer exhausted, further reads should fail + EXPECT_FALSE(reader.GetUint8(&val)); + uint32_t u32; + EXPECT_FALSE(reader.GetUint32(&u32)); + uint64_t u64; + EXPECT_FALSE(reader.GetUint64(&u64)); + std::string str; + EXPECT_FALSE(reader.GetString(&str)); +} + +TEST(BinaryWriterReaderTest, TruncatedString) { + BinaryWriter writer; + writer.PutUint32(1000); // claim string length 1000 but no data follows + + BinaryReader reader(writer.data(), writer.size()); + std::string val; + EXPECT_FALSE(reader.GetString(&val)); +} + +TEST(CRC32Test, KnownValues) { + // CRC32 of empty data + uint32_t crc_empty = CRC32::Compute(nullptr, 0); + EXPECT_EQ(crc_empty, 0u); + + // CRC32 of "123456789" is a well-known test vector: 0xCBF43926 + const char *test_data = "123456789"; + uint32_t crc = CRC32::Compute(test_data, 9); + EXPECT_EQ(crc, 0xCBF43926u); +} + +TEST(CRC32Test, DifferentDataProducesDifferentCRC) { + const char *data1 = "hello"; + const char *data2 = "world"; + uint32_t crc1 = CRC32::Compute(data1, 5); + uint32_t crc2 = CRC32::Compute(data2, 5); + EXPECT_NE(crc1, crc2); +} + +TEST(CRC32Test, Deterministic) { + const char *data = "test data for crc"; + uint32_t crc1 = CRC32::Compute(data, strlen(data)); + uint32_t crc2 = CRC32::Compute(data, strlen(data)); + EXPECT_EQ(crc1, crc2); +} diff --git a/tests/db/crash_recovery/CMakeLists.txt b/tests/db/crash_recovery/CMakeLists.txt index 32b5f5610..221668657 100644 --- a/tests/db/crash_recovery/CMakeLists.txt +++ b/tests/db/crash_recovery/CMakeLists.txt @@ -26,7 +26,6 @@ endif() # Common libraries set(CRASH_RECOVERY_COMMON_LIBS zvec_db - zvec_proto core_knn_flat core_knn_flat_sparse core_knn_hnsw diff --git a/tests/db/index/CMakeLists.txt b/tests/db/index/CMakeLists.txt index d600dca6a..7a3835efd 100644 --- a/tests/db/index/CMakeLists.txt +++ b/tests/db/index/CMakeLists.txt @@ -38,7 +38,6 @@ foreach(CC_SRCS ${ALL_TEST_SRCS}) NAME ${CC_TARGET} STRICT LIBS zvec_db zvec_ailego - zvec_proto core_metric_static core_utility_static core_quantizer_static diff --git a/tests/db/index/common/db_proto_converter_test.cc b/tests/db/index/common/db_proto_converter_test.cc deleted file mode 100644 index dff93e9dd..000000000 --- a/tests/db/index/common/db_proto_converter_test.cc +++ /dev/null @@ -1,473 +0,0 @@ -// Copyright 2025-present the zvec project -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include "db/index/common/proto_converter.h" -#include "db/index/common/type_helper.h" - -using namespace zvec; - -TEST(ConverterTest, InvertIndexParamsConversion) { - // Test conversion from protobuf to C++ InvertIndexParams - proto::InvertIndexParams invert_pb; - invert_pb.set_enable_range_optimization(true); - - auto invert_params = ProtoConverter::FromPb(invert_pb); - ASSERT_NE(invert_params, nullptr); - EXPECT_TRUE(invert_params->enable_range_optimization()); - EXPECT_EQ(invert_params->type(), IndexType::INVERT); - - // Test with false value - proto::InvertIndexParams invert_pb2; - invert_pb2.set_enable_range_optimization(false); - - auto invert_params2 = ProtoConverter::FromPb(invert_pb2); - ASSERT_NE(invert_params2, nullptr); - EXPECT_FALSE(invert_params2->enable_range_optimization()); - - // Test conversion from C++ to protobuf - InvertIndexParams original_params(true); - auto pb_result = ProtoConverter::ToPb(&original_params); - EXPECT_TRUE(pb_result.enable_range_optimization()); -} - -TEST(ConverterTest, HnswIndexParamsConversion) { - // Test conversion from protobuf to C++ HnswIndexParams - proto::HnswIndexParams hnsw_pb; - auto *base_params = hnsw_pb.mutable_base(); - base_params->set_metric_type(proto::MT_L2); - base_params->set_quantize_type(proto::QT_FP16); - hnsw_pb.set_m(16); - hnsw_pb.set_ef_construction(100); - - auto hnsw_params = ProtoConverter::FromPb(hnsw_pb); - ASSERT_NE(hnsw_params, nullptr); - EXPECT_EQ(hnsw_params->metric_type(), MetricType::L2); - EXPECT_EQ(hnsw_params->m(), 16); - EXPECT_EQ(hnsw_params->ef_construction(), 100); - EXPECT_EQ(hnsw_params->quantize_type(), QuantizeType::FP16); - EXPECT_EQ(hnsw_params->type(), IndexType::HNSW); - - // Test conversion from C++ to protobuf - HnswIndexParams original_params(MetricType::IP, 32, 200, QuantizeType::INT8); - auto pb_result = ProtoConverter::ToPb(&original_params); - EXPECT_EQ(pb_result.base().metric_type(), proto::MT_IP); - EXPECT_EQ(pb_result.m(), 32); - EXPECT_EQ(pb_result.ef_construction(), 200); - EXPECT_EQ(pb_result.base().quantize_type(), proto::QT_INT8); -} - -TEST(ConverterTest, FlatIndexParamsConversion) { - // Test conversion from protobuf to C++ FlatIndexParams - proto::FlatIndexParams flat_pb; - auto *base_params = flat_pb.mutable_base(); - base_params->set_metric_type(proto::MT_COSINE); - base_params->set_quantize_type(proto::QT_INT4); - - auto flat_params = ProtoConverter::FromPb(flat_pb); - ASSERT_NE(flat_params, nullptr); - EXPECT_EQ(flat_params->metric_type(), MetricType::COSINE); - EXPECT_EQ(flat_params->quantize_type(), QuantizeType::INT4); - EXPECT_EQ(flat_params->type(), IndexType::FLAT); - - // Test conversion from C++ to protobuf - FlatIndexParams original_params(MetricType::L2, QuantizeType::FP16); - auto pb_result = ProtoConverter::ToPb(&original_params); - EXPECT_EQ(pb_result.base().metric_type(), proto::MT_L2); - EXPECT_EQ(pb_result.base().quantize_type(), proto::QT_FP16); -} - -TEST(ConverterTest, IVFIndexParamsConversion) { - // Test conversion from protobuf to C++ IVFIndexParams - proto::IVFIndexParams ivf_pb; - auto *base_params = ivf_pb.mutable_base(); - base_params->set_metric_type(proto::MT_IP); - base_params->set_quantize_type(proto::QT_INT8); - ivf_pb.set_n_list(128); - - auto ivf_params = ProtoConverter::FromPb(ivf_pb); - ASSERT_NE(ivf_params, nullptr); - EXPECT_EQ(ivf_params->metric_type(), MetricType::IP); - EXPECT_EQ(ivf_params->n_list(), 128); - EXPECT_EQ(ivf_params->quantize_type(), QuantizeType::INT8); - EXPECT_EQ(ivf_params->type(), IndexType::IVF); - - // Test conversion from C++ to protobuf - IVFIndexParams original_params(MetricType::COSINE, 256, 10, false, - QuantizeType::INT4); - auto pb_result = ProtoConverter::ToPb(&original_params); - EXPECT_EQ(pb_result.base().metric_type(), proto::MT_COSINE); - EXPECT_EQ(pb_result.n_list(), 256); - EXPECT_EQ(pb_result.n_iters(), 10); - EXPECT_FALSE(pb_result.use_soar()); - EXPECT_EQ(pb_result.base().quantize_type(), proto::QT_INT4); -} - -TEST(ConverterTest, IndexParamsConversion) { - // Test conversion from protobuf to C++ IndexParams for HNSW - proto::IndexParams index_pb; - auto *hnsw_pb = index_pb.mutable_hnsw(); - auto *base_params = hnsw_pb->mutable_base(); - base_params->set_metric_type(proto::MT_L2); - base_params->set_quantize_type(proto::QT_FP16); - hnsw_pb->set_m(16); - hnsw_pb->set_ef_construction(100); - - auto index_params = ProtoConverter::FromPb(index_pb); - ASSERT_NE(index_params, nullptr); - EXPECT_EQ(index_params->type(), IndexType::HNSW); - auto hnsw_cast = std::dynamic_pointer_cast(index_params); - ASSERT_NE(hnsw_cast, nullptr); - EXPECT_EQ(hnsw_cast->metric_type(), MetricType::L2); - EXPECT_EQ(hnsw_cast->m(), 16); - EXPECT_EQ(hnsw_cast->ef_construction(), 100); - EXPECT_EQ(hnsw_cast->quantize_type(), QuantizeType::FP16); - - // Test conversion from C++ HnswIndexParams to protobuf IndexParams - HnswIndexParams hnsw_original(MetricType::IP, 32, 200); - auto pb_result = ProtoConverter::ToPb(&hnsw_original); - EXPECT_EQ(pb_result.base().metric_type(), proto::MT_IP); - EXPECT_EQ(pb_result.m(), 32); - EXPECT_EQ(pb_result.ef_construction(), 200); - - // Test conversion from protobuf to C++ IndexParams for FLAT - proto::IndexParams index_pb2; - auto *flat_pb = index_pb2.mutable_flat(); - auto *base_params2 = flat_pb->mutable_base(); - base_params2->set_metric_type(proto::MT_COSINE); - base_params2->set_quantize_type(proto::QT_INT8); - - auto index_params2 = ProtoConverter::FromPb(index_pb2); - ASSERT_NE(index_params2, nullptr); - EXPECT_EQ(index_params2->type(), IndexType::FLAT); - auto flat_cast = std::dynamic_pointer_cast(index_params2); - ASSERT_NE(flat_cast, nullptr); - EXPECT_EQ(flat_cast->metric_type(), MetricType::COSINE); - EXPECT_EQ(flat_cast->quantize_type(), QuantizeType::INT8); - - // Test conversion from C++ FlatIndexParams to protobuf IndexParams - FlatIndexParams flat_original(MetricType::L2); - auto pb_result2 = ProtoConverter::ToPb(&flat_original); - EXPECT_EQ(pb_result2.base().metric_type(), proto::MT_L2); - - // Test conversion from protobuf to C++ IndexParams for IVF - proto::IndexParams index_pb3; - auto *ivf_pb = index_pb3.mutable_ivf(); - auto *base_params3 = ivf_pb->mutable_base(); - base_params3->set_metric_type(proto::MT_IP); - base_params3->set_quantize_type(proto::QT_INT4); - ivf_pb->set_n_list(128); - - auto index_params3 = ProtoConverter::FromPb(index_pb3); - ASSERT_NE(index_params3, nullptr); - EXPECT_EQ(index_params3->type(), IndexType::IVF); - auto ivf_cast = std::dynamic_pointer_cast(index_params3); - ASSERT_NE(ivf_cast, nullptr); - EXPECT_EQ(ivf_cast->metric_type(), MetricType::IP); - EXPECT_EQ(ivf_cast->n_list(), 128); - EXPECT_EQ(ivf_cast->quantize_type(), QuantizeType::INT4); - - // Test conversion from C++ IVFIndexParams to protobuf IndexParams - IVFIndexParams ivf_original(MetricType::COSINE, 256); - auto pb_result3 = ProtoConverter::ToPb(&ivf_original); - EXPECT_EQ(pb_result3.base().metric_type(), proto::MT_COSINE); - EXPECT_EQ(pb_result3.n_list(), 256); - - // Test conversion from protobuf to C++ IndexParams for INVERT - proto::IndexParams index_pb4; - auto *invert_pb = index_pb4.mutable_invert(); - invert_pb->set_enable_range_optimization(true); - - auto index_params4 = ProtoConverter::FromPb(index_pb4); - ASSERT_NE(index_params4, nullptr); - EXPECT_EQ(index_params4->type(), IndexType::INVERT); - auto invert_cast = - std::dynamic_pointer_cast(index_params4); - ASSERT_NE(invert_cast, nullptr); - EXPECT_TRUE(invert_cast->enable_range_optimization()); - - // Test conversion from C++ InvertIndexParams to protobuf IndexParams - InvertIndexParams invert_original(false); - auto pb_result4 = ProtoConverter::ToPb(&invert_original); - EXPECT_FALSE(pb_result4.enable_range_optimization()); -} - -TEST(ConverterTest, FieldSchemaConversion) { - // Test conversion from protobuf to C++ FieldSchema - proto::FieldSchema field_pb; - field_pb.set_name("test_field"); - field_pb.set_data_type(proto::DT_VECTOR_FP32); - field_pb.set_dimension(128); - field_pb.set_nullable(true); - - // Add index params - auto *index_params_pb = field_pb.mutable_index_params(); - auto *hnsw_pb = index_params_pb->mutable_hnsw(); - auto *base_params = hnsw_pb->mutable_base(); - base_params->set_metric_type(proto::MT_L2); - base_params->set_quantize_type(proto::QT_FP16); - hnsw_pb->set_m(16); - hnsw_pb->set_ef_construction(100); - - auto field_schema = ProtoConverter::FromPb(field_pb); - ASSERT_NE(field_schema, nullptr); - EXPECT_EQ(field_schema->name(), "test_field"); - EXPECT_EQ(field_schema->data_type(), DataType::VECTOR_FP32); - EXPECT_TRUE(field_schema->nullable()); - EXPECT_EQ(field_schema->dimension(), 128u); - ASSERT_NE(field_schema->index_params(), nullptr); - EXPECT_EQ(field_schema->index_params()->type(), IndexType::HNSW); - - // Test conversion from C++ to protobuf - FieldSchema original_field("another_field", DataType::ARRAY_INT32, 64, false, - nullptr); - auto pb_result = ProtoConverter::ToPb(original_field); - EXPECT_EQ(pb_result.name(), "another_field"); - EXPECT_EQ(pb_result.data_type(), proto::DT_ARRAY_INT32); - EXPECT_FALSE(pb_result.nullable()); - EXPECT_EQ(pb_result.dimension(), 64u); -} - -TEST(ConverterTest, CollectionSchemaConversion) { - // Test conversion from protobuf to C++ CollectionSchema - proto::CollectionSchema schema_pb; - schema_pb.set_name("test_collection"); - schema_pb.set_max_doc_count_per_segment(1000000); - - auto *field1_pb = schema_pb.add_fields(); - field1_pb->set_name("field1"); - field1_pb->set_data_type(proto::DT_STRING); - - auto *field2_pb = schema_pb.add_fields(); - field2_pb->set_name("field2"); - field2_pb->set_data_type(proto::DT_VECTOR_FP32); - field2_pb->set_dimension(128); - - auto collection_schema = ProtoConverter::FromPb(schema_pb); - ASSERT_NE(collection_schema, nullptr); - EXPECT_EQ(collection_schema->name(), "test_collection"); - EXPECT_EQ(collection_schema->fields().size(), 2); - EXPECT_EQ(collection_schema->max_doc_count_per_segment(), 1000000u); - - // Test conversion from C++ to protobuf - CollectionSchema original_schema; - original_schema.set_name("original_collection"); - - auto pb_result = ProtoConverter::ToPb(original_schema); - EXPECT_EQ(pb_result.name(), "original_collection"); -} - -TEST(ConverterTest, BlockMetaConversion) { - // Test conversion from protobuf to C++ BlockMeta - proto::BlockMeta meta_pb; - meta_pb.set_block_id(1); - meta_pb.set_block_type(proto::BT_SCALAR); - meta_pb.set_min_doc_id(100); - meta_pb.set_max_doc_id(200); - meta_pb.set_doc_count(50); - meta_pb.add_columns("col1"); - meta_pb.add_columns("col2"); - - auto block_meta = ProtoConverter::FromPb(meta_pb); - ASSERT_NE(block_meta, nullptr); - EXPECT_EQ(block_meta->id(), 1u); - EXPECT_EQ(block_meta->type(), BlockType::SCALAR); - EXPECT_EQ(block_meta->min_doc_id(), 100u); - EXPECT_EQ(block_meta->max_doc_id(), 200u); - EXPECT_EQ(block_meta->doc_count(), 50u); - EXPECT_EQ(block_meta->columns().size(), 2); - EXPECT_EQ(block_meta->columns()[0], "col1"); - EXPECT_EQ(block_meta->columns()[1], "col2"); - - // Test conversion from C++ to protobuf - BlockMeta original_meta(2, BlockType::VECTOR_INDEX, 300, 400); - original_meta.set_doc_count(75); - original_meta.add_column("col3"); - original_meta.add_column("col4"); - - auto pb_result = ProtoConverter::ToPb(original_meta); - EXPECT_EQ(pb_result.block_id(), 2u); - EXPECT_EQ(pb_result.block_type(), proto::BT_VECTOR_INDEX); - EXPECT_EQ(pb_result.min_doc_id(), 300u); - EXPECT_EQ(pb_result.max_doc_id(), 400u); - EXPECT_EQ(pb_result.doc_count(), 75u); - EXPECT_EQ(pb_result.columns_size(), 2); - EXPECT_EQ(pb_result.columns(0), "col3"); - EXPECT_EQ(pb_result.columns(1), "col4"); -} - -TEST(ConverterTest, SegmentMetaConversion) { - // Test conversion from protobuf to C++ SegmentMeta - proto::SegmentMeta segment_pb; - segment_pb.set_segment_id(10); - - // Add persisted blocks - auto *block1_pb = segment_pb.add_persisted_blocks(); - block1_pb->set_block_id(1); - block1_pb->set_block_type(proto::BT_SCALAR); - block1_pb->set_min_doc_id(0); - block1_pb->set_max_doc_id(100); - block1_pb->set_doc_count(50); - block1_pb->add_columns("col1"); - block1_pb->add_columns("col2"); - - auto *block2_pb = segment_pb.add_persisted_blocks(); - block2_pb->set_block_id(2); - block2_pb->set_block_type(proto::BT_VECTOR_INDEX); - block2_pb->set_min_doc_id(101); - block2_pb->set_max_doc_id(200); - block2_pb->set_doc_count(75); - block2_pb->add_columns("vec_col"); - - // Add writing forward block - auto *writing_block_pb = segment_pb.mutable_writing_forward_block(); - writing_block_pb->set_block_id(3); - writing_block_pb->set_block_type(proto::BT_SCALAR); - writing_block_pb->set_min_doc_id(201); - writing_block_pb->set_max_doc_id(300); - writing_block_pb->set_doc_count(25); - writing_block_pb->add_columns("col3"); - - // Add indexed vector fields - segment_pb.add_indexed_vector_fields("vec_col1"); - segment_pb.add_indexed_vector_fields("vec_col2"); - - auto segment_meta = ProtoConverter::FromPb(segment_pb); - ASSERT_NE(segment_meta, nullptr); - EXPECT_EQ(segment_meta->id(), 10u); - EXPECT_EQ(segment_meta->persisted_blocks().size(), 2); - EXPECT_TRUE(segment_meta->has_writing_forward_block()); - - // Check first persisted block - const auto &block1 = segment_meta->persisted_blocks()[0]; - EXPECT_EQ(block1.id(), 1u); - EXPECT_EQ(block1.type(), BlockType::SCALAR); - EXPECT_EQ(block1.min_doc_id(), 0u); - EXPECT_EQ(block1.max_doc_id(), 100u); - EXPECT_EQ(block1.doc_count(), 50u); - EXPECT_EQ(block1.columns().size(), 2); - EXPECT_EQ(block1.columns()[0], "col1"); - EXPECT_EQ(block1.columns()[1], "col2"); - - // Check second persisted block - const auto &block2 = segment_meta->persisted_blocks()[1]; - EXPECT_EQ(block2.id(), 2u); - EXPECT_EQ(block2.type(), BlockType::VECTOR_INDEX); - EXPECT_EQ(block2.min_doc_id(), 101u); - EXPECT_EQ(block2.max_doc_id(), 200u); - EXPECT_EQ(block2.doc_count(), 75u); - EXPECT_EQ(block2.columns().size(), 1); - EXPECT_EQ(block2.columns()[0], "vec_col"); - - // Check writing forward block - const auto &writing_block = segment_meta->writing_forward_block(); - EXPECT_EQ(writing_block.value().id(), 3u); - EXPECT_EQ(writing_block.value().type(), BlockType::SCALAR); - EXPECT_EQ(writing_block.value().min_doc_id(), 201u); - EXPECT_EQ(writing_block.value().max_doc_id(), 300u); - EXPECT_EQ(writing_block.value().doc_count(), 25u); - EXPECT_EQ(writing_block.value().columns().size(), 1); - EXPECT_EQ(writing_block.value().columns()[0], "col3"); - - // Check indexed vector fields - EXPECT_TRUE(segment_meta->vector_indexed("vec_col1")); - EXPECT_TRUE(segment_meta->vector_indexed("vec_col2")); - EXPECT_FALSE(segment_meta->vector_indexed("non_existent_field")); - - // Test conversion from C++ to protobuf - SegmentMeta original_meta(20); - - // Add persisted blocks - BlockMeta block1_meta(1, BlockType::SCALAR_INDEX, 0, 50); - block1_meta.set_doc_count(25); - block1_meta.add_column("col3"); - block1_meta.add_column("col4"); - original_meta.add_persisted_block(block1_meta); - - BlockMeta block2_meta(2, BlockType::VECTOR_INDEX_QUANTIZE, 51, 100); - block2_meta.set_doc_count(30); - block2_meta.add_column("vec_col2"); - original_meta.add_persisted_block(block2_meta); - - // Set writing forward block - BlockMeta writing_block_meta(3, BlockType::SCALAR, 101, 150); - writing_block_meta.set_doc_count(40); - writing_block_meta.add_column("col5"); - original_meta.set_writing_forward_block(writing_block_meta); - - // Add indexed vector fields - original_meta.add_indexed_vector_field("vec_field1"); - original_meta.add_indexed_vector_field("vec_field2"); - - auto pb_result = ProtoConverter::ToPb(original_meta); - EXPECT_EQ(pb_result.segment_id(), 20u); - EXPECT_EQ(pb_result.persisted_blocks_size(), 2); - - // Check first persisted block - const auto &pb_block1 = pb_result.persisted_blocks(0); - EXPECT_EQ(pb_block1.block_id(), 1u); - EXPECT_EQ(pb_block1.block_type(), proto::BT_SCALAR_INDEX); - EXPECT_EQ(pb_block1.min_doc_id(), 0u); - EXPECT_EQ(pb_block1.max_doc_id(), 50u); - EXPECT_EQ(pb_block1.doc_count(), 25u); - EXPECT_EQ(pb_block1.columns_size(), 2); - EXPECT_EQ(pb_block1.columns(0), "col3"); - EXPECT_EQ(pb_block1.columns(1), "col4"); - - // Check second persisted block - const auto &pb_block2 = pb_result.persisted_blocks(1); - EXPECT_EQ(pb_block2.block_id(), 2u); - EXPECT_EQ(pb_block2.block_type(), proto::BT_VECTOR_INDEX_QUANTIZE); - EXPECT_EQ(pb_block2.min_doc_id(), 51u); - EXPECT_EQ(pb_block2.max_doc_id(), 100u); - EXPECT_EQ(pb_block2.doc_count(), 30u); - EXPECT_EQ(pb_block2.columns_size(), 1); - EXPECT_EQ(pb_block2.columns(0), "vec_col2"); - - // Check writing forward block - const auto &pb_writing_block = pb_result.writing_forward_block(); - EXPECT_EQ(pb_writing_block.block_id(), 3u); - EXPECT_EQ(pb_writing_block.block_type(), proto::BT_SCALAR); - EXPECT_EQ(pb_writing_block.min_doc_id(), 101u); - EXPECT_EQ(pb_writing_block.max_doc_id(), 150u); - EXPECT_EQ(pb_writing_block.doc_count(), 40u); - EXPECT_EQ(pb_writing_block.columns_size(), 1); - EXPECT_EQ(pb_writing_block.columns(0), "col5"); - - // Check indexed vector fields - EXPECT_EQ(pb_result.indexed_vector_fields_size(), 2); - EXPECT_EQ(pb_result.indexed_vector_fields(0), "vec_field1"); - EXPECT_EQ(pb_result.indexed_vector_fields(1), "vec_field2"); -} - -TEST(ConverterTest, SegmentMetaWithEmptyFields) { - // Test conversion with minimal data - proto::SegmentMeta segment_pb; - segment_pb.set_segment_id(1); - - auto segment_meta = ProtoConverter::FromPb(segment_pb); - ASSERT_NE(segment_meta, nullptr); - EXPECT_EQ(segment_meta->id(), 1u); - EXPECT_EQ(segment_meta->persisted_blocks().size(), 0); - EXPECT_FALSE(segment_meta->has_writing_forward_block()); - EXPECT_EQ(segment_meta->indexed_vector_fields().size(), 0); - - // Test conversion from C++ to protobuf with minimal data - SegmentMeta original_meta(5); - auto pb_result = ProtoConverter::ToPb(original_meta); - EXPECT_EQ(pb_result.segment_id(), 5u); - EXPECT_EQ(pb_result.persisted_blocks_size(), 0); - EXPECT_FALSE(pb_result.has_writing_forward_block()); - EXPECT_EQ(pb_result.indexed_vector_fields_size(), 0); -} \ No newline at end of file diff --git a/tests/db/index/common/db_type_helper_test.cc b/tests/db/index/common/db_type_helper_test.cc index b3b7cc3d7..1000042e8 100644 --- a/tests/db/index/common/db_type_helper_test.cc +++ b/tests/db/index/common/db_type_helper_test.cc @@ -17,31 +17,11 @@ using namespace zvec; -TEST(IndexTypeCodeBookTest, ProtoToCppConversion) { - // Test conversion from protobuf to C++ IndexType - EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_HNSW), IndexType::HNSW); - EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_FLAT), IndexType::FLAT); - EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_IVF), IndexType::IVF); - EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_INVERT), IndexType::INVERT); - EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_UNDEFINED), IndexType::UNDEFINED); - EXPECT_EQ(IndexTypeCodeBook::Get(static_cast(999)), - IndexType::UNDEFINED); -} - -TEST(IndexTypeCodeBookTest, CppToProtoConversion) { - // Test conversion from C++ IndexType to protobuf IndexType - EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::HNSW), proto::IT_HNSW); - EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::FLAT), proto::IT_FLAT); - EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::IVF), proto::IT_IVF); - EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::INVERT), proto::IT_INVERT); - EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::UNDEFINED), proto::IT_UNDEFINED); - EXPECT_EQ(IndexTypeCodeBook::Get(static_cast(999)), - proto::IT_UNDEFINED); -} - TEST(IndexTypeCodeBookTest, CppToStringConversion) { - // Test conversion from C++ IndexType to string EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::HNSW), "HNSW"); + EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::HNSW_RABITQ), "HNSW_RABITQ"); + EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::FLAT), "FLAT"); + EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::IVF), "IVF"); EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::INVERT), "INVERT"); EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::UNDEFINED), "UNDEFINED"); EXPECT_EQ(IndexTypeCodeBook::AsString(static_cast(999)), @@ -49,143 +29,37 @@ TEST(IndexTypeCodeBookTest, CppToStringConversion) { } TEST(DataTypeCodeBookTest, IsArrayType) { - // Test array type detection - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_BINARY)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_STRING)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_BOOL)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_INT32)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_INT64)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_UINT32)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_UINT64)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_FLOAT)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_DOUBLE)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_BINARY32)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_BINARY64)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_FP16)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_FP32)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_FP64)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_INT4)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_INT8)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_INT16)); - EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_SPARSE_VECTOR_FP32)); - - EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_BINARY)); - EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_STRING)); - EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_BOOL)); - EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_INT32)); - EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_INT64)); - EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_UINT32)); - EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_UINT64)); - EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_FLOAT)); - EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_DOUBLE)); -} - -TEST(DataTypeCodeBookTest, ProtoToCppConversion) { - // Test conversion from protobuf to C++ DataType - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_BINARY), DataType::BINARY); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_STRING), DataType::STRING); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_BOOL), DataType::BOOL); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_INT32), DataType::INT32); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_INT64), DataType::INT64); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_UINT32), DataType::UINT32); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_UINT64), DataType::UINT64); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_FLOAT), DataType::FLOAT); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_DOUBLE), DataType::DOUBLE); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_BINARY32), - DataType::VECTOR_BINARY32); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_BINARY64), - DataType::VECTOR_BINARY64); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_FP16), - DataType::VECTOR_FP16); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_FP32), - DataType::VECTOR_FP32); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_FP64), - DataType::VECTOR_FP64); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_INT4), - DataType::VECTOR_INT4); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_INT8), - DataType::VECTOR_INT8); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_INT16), - DataType::VECTOR_INT16); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_SPARSE_VECTOR_FP32), - DataType::SPARSE_VECTOR_FP32); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_BINARY), - DataType::ARRAY_BINARY); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_STRING), - DataType::ARRAY_STRING); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_BOOL), DataType::ARRAY_BOOL); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_INT32), - DataType::ARRAY_INT32); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_INT64), - DataType::ARRAY_INT64); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_UINT32), - DataType::ARRAY_UINT32); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_UINT64), - DataType::ARRAY_UINT64); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_FLOAT), - DataType::ARRAY_FLOAT); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_DOUBLE), - DataType::ARRAY_DOUBLE); - EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_UNDEFINED), DataType::UNDEFINED); - EXPECT_EQ(DataTypeCodeBook::Get(static_cast(999)), - DataType::UNDEFINED); -} - -TEST(DataTypeCodeBookTest, CppToProtoConversion) { - // Test conversion from C++ DataType to protobuf DataType - EXPECT_EQ(DataTypeCodeBook::Get(DataType::BINARY), proto::DT_BINARY); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::STRING), proto::DT_STRING); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::BOOL), proto::DT_BOOL); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::INT32), proto::DT_INT32); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::INT64), proto::DT_INT64); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::UINT32), proto::DT_UINT32); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::UINT64), proto::DT_UINT64); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::FLOAT), proto::DT_FLOAT); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::DOUBLE), proto::DT_DOUBLE); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_BINARY32), - proto::DT_VECTOR_BINARY32); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_BINARY64), - proto::DT_VECTOR_BINARY64); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_FP16), - proto::DT_VECTOR_FP16); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_FP32), - proto::DT_VECTOR_FP32); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_FP64), - proto::DT_VECTOR_FP64); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_INT4), - proto::DT_VECTOR_INT4); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_INT8), - proto::DT_VECTOR_INT8); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_INT16), - proto::DT_VECTOR_INT16); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::SPARSE_VECTOR_FP16), - proto::DT_SPARSE_VECTOR_FP16); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::SPARSE_VECTOR_FP32), - proto::DT_SPARSE_VECTOR_FP32); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_BINARY), - proto::DT_ARRAY_BINARY); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_STRING), - proto::DT_ARRAY_STRING); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_BOOL), proto::DT_ARRAY_BOOL); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_INT32), - proto::DT_ARRAY_INT32); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_INT64), - proto::DT_ARRAY_INT64); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_UINT32), - proto::DT_ARRAY_UINT32); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_UINT64), - proto::DT_ARRAY_UINT64); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_FLOAT), - proto::DT_ARRAY_FLOAT); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_DOUBLE), - proto::DT_ARRAY_DOUBLE); - EXPECT_EQ(DataTypeCodeBook::Get(DataType::UNDEFINED), proto::DT_UNDEFINED); - EXPECT_EQ(DataTypeCodeBook::Get(static_cast(999)), - proto::DT_UNDEFINED); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::BINARY)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::STRING)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::BOOL)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::INT32)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::INT64)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::UINT32)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::UINT64)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::FLOAT)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::DOUBLE)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::VECTOR_BINARY32)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::VECTOR_BINARY64)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::VECTOR_FP16)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::VECTOR_FP32)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::VECTOR_FP64)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::VECTOR_INT4)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::VECTOR_INT8)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::VECTOR_INT16)); + EXPECT_FALSE(DataTypeCodeBook::IsArrayType(DataType::SPARSE_VECTOR_FP32)); + + EXPECT_TRUE(DataTypeCodeBook::IsArrayType(DataType::ARRAY_BINARY)); + EXPECT_TRUE(DataTypeCodeBook::IsArrayType(DataType::ARRAY_STRING)); + EXPECT_TRUE(DataTypeCodeBook::IsArrayType(DataType::ARRAY_BOOL)); + EXPECT_TRUE(DataTypeCodeBook::IsArrayType(DataType::ARRAY_INT32)); + EXPECT_TRUE(DataTypeCodeBook::IsArrayType(DataType::ARRAY_INT64)); + EXPECT_TRUE(DataTypeCodeBook::IsArrayType(DataType::ARRAY_UINT32)); + EXPECT_TRUE(DataTypeCodeBook::IsArrayType(DataType::ARRAY_UINT64)); + EXPECT_TRUE(DataTypeCodeBook::IsArrayType(DataType::ARRAY_FLOAT)); + EXPECT_TRUE(DataTypeCodeBook::IsArrayType(DataType::ARRAY_DOUBLE)); } TEST(DataTypeCodeBookTest, CppToStringConversion) { - // Test conversion from C++ DataType to string EXPECT_EQ(DataTypeCodeBook::AsString(DataType::BINARY), "BINARY"); EXPECT_EQ(DataTypeCodeBook::AsString(DataType::STRING), "STRING"); EXPECT_EQ(DataTypeCodeBook::AsString(DataType::BOOL), "BOOL"); @@ -205,6 +79,10 @@ TEST(DataTypeCodeBookTest, CppToStringConversion) { EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_INT4), "VECTOR_INT4"); EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_INT8), "VECTOR_INT8"); EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_INT16), "VECTOR_INT16"); + EXPECT_EQ(DataTypeCodeBook::AsString(DataType::SPARSE_VECTOR_FP16), + "SPARSE_VECTOR_FP16"); + EXPECT_EQ(DataTypeCodeBook::AsString(DataType::SPARSE_VECTOR_FP32), + "SPARSE_VECTOR_FP32"); EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_BINARY), "ARRAY_BINARY"); EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_STRING), "ARRAY_STRING"); EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_BOOL), "ARRAY_BOOL"); @@ -218,74 +96,33 @@ TEST(DataTypeCodeBookTest, CppToStringConversion) { EXPECT_EQ(DataTypeCodeBook::AsString(static_cast(999)), ""); } -TEST(MetricTypeCodeBookTest, ProtoToCppConversion) { - // Test conversion from protobuf to C++ MetricType - EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_IP), MetricType::IP); - EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_L2), MetricType::L2); - EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_COSINE), MetricType::COSINE); - EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_UNDEFINED), - MetricType::UNDEFINED); - EXPECT_EQ(MetricTypeCodeBook::Get(static_cast(999)), - MetricType::UNDEFINED); -} - -TEST(MetricTypeCodeBookTest, CppToProtoConversion) { - // Test conversion from C++ MetricType to protobuf MetricType - EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::IP), proto::MT_IP); - EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::L2), proto::MT_L2); - EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::COSINE), proto::MT_COSINE); - EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::UNDEFINED), - proto::MT_UNDEFINED); - EXPECT_EQ(MetricTypeCodeBook::Get(static_cast(999)), - proto::MT_UNDEFINED); -} - -TEST(QuantizeTypeCodeBookTest, ProtoToCppConversion) { - // Test conversion from protobuf to C++ QuantizeType - EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_FP16), QuantizeType::FP16); - EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_INT4), QuantizeType::INT4); - EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_INT8), QuantizeType::INT8); - EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_UNDEFINED), - QuantizeType::UNDEFINED); - EXPECT_EQ(QuantizeTypeCodeBook::Get(static_cast(999)), - QuantizeType::UNDEFINED); +TEST(MetricTypeCodeBookTest, CppToStringConversion) { + EXPECT_EQ(MetricTypeCodeBook::AsString(MetricType::IP), "IP"); + EXPECT_EQ(MetricTypeCodeBook::AsString(MetricType::L2), "L2"); + EXPECT_EQ(MetricTypeCodeBook::AsString(MetricType::COSINE), "COSINE"); + EXPECT_EQ(MetricTypeCodeBook::AsString(MetricType::UNDEFINED), "UNDEFINED"); + EXPECT_EQ(MetricTypeCodeBook::AsString(static_cast(999)), + "UNDEFINED"); } -TEST(QuantizeTypeCodeBookTest, CppToProtoConversion) { - // Test conversion from C++ QuantizeType to protobuf QuantizeType - EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::FP16), proto::QT_FP16); - EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::INT4), proto::QT_INT4); - EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::INT8), proto::QT_INT8); - EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::UNDEFINED), - proto::QT_UNDEFINED); - EXPECT_EQ(QuantizeTypeCodeBook::Get(static_cast(999)), - proto::QT_UNDEFINED); +TEST(QuantizeTypeCodeBookTest, CppToStringConversion) { + EXPECT_EQ(QuantizeTypeCodeBook::AsString(QuantizeType::FP16), "FP16"); + EXPECT_EQ(QuantizeTypeCodeBook::AsString(QuantizeType::INT4), "INT4"); + EXPECT_EQ(QuantizeTypeCodeBook::AsString(QuantizeType::INT8), "INT8"); + EXPECT_EQ(QuantizeTypeCodeBook::AsString(QuantizeType::RABITQ), "RABITQ"); + EXPECT_EQ(QuantizeTypeCodeBook::AsString(QuantizeType::UNDEFINED), + "UNDEFINED"); } -TEST(BlockTypeCodeBookTest, ProtoToCppConversion) { - // Test conversion from protobuf to C++ BlockType - EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_SCALAR), BlockType::SCALAR); - EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_SCALAR_INDEX), - BlockType::SCALAR_INDEX); - EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_VECTOR_INDEX), - BlockType::VECTOR_INDEX); - EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_VECTOR_INDEX_QUANTIZE), - BlockType::VECTOR_INDEX_QUANTIZE); - EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_UNDEFINED), BlockType::UNDEFINED); - EXPECT_EQ(BlockTypeCodeBook::Get(static_cast(999)), - BlockType::UNDEFINED); +TEST(BlockTypeCodeBookTest, CppToStringConversion) { + EXPECT_EQ(BlockTypeCodeBook::AsString(BlockType::SCALAR), "SCALAR"); + EXPECT_EQ(BlockTypeCodeBook::AsString(BlockType::SCALAR_INDEX), + "SCALAR_INDEX"); + EXPECT_EQ(BlockTypeCodeBook::AsString(BlockType::VECTOR_INDEX), + "VECTOR_INDEX"); + EXPECT_EQ(BlockTypeCodeBook::AsString(BlockType::VECTOR_INDEX_QUANTIZE), + "VECTOR_INDEX_QUANTIZE"); + EXPECT_EQ(BlockTypeCodeBook::AsString(BlockType::UNDEFINED), "UNDEFINED"); + EXPECT_EQ(BlockTypeCodeBook::AsString(static_cast(999)), + "UNDEFINED"); } - -TEST(BlockTypeCodeBookTest, CppToProtoConversion) { - // Test conversion from C++ BlockType to protobuf BlockType - EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::SCALAR), proto::BT_SCALAR); - EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::SCALAR_INDEX), - proto::BT_SCALAR_INDEX); - EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::VECTOR_INDEX), - proto::BT_VECTOR_INDEX); - EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::VECTOR_INDEX_QUANTIZE), - proto::BT_VECTOR_INDEX_QUANTIZE); - EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::UNDEFINED), proto::BT_UNDEFINED); - EXPECT_EQ(BlockTypeCodeBook::Get(static_cast(999)), - proto::BT_UNDEFINED); -} \ No newline at end of file diff --git a/tests/db/index/common/manifest_serializer_test.cc b/tests/db/index/common/manifest_serializer_test.cc new file mode 100644 index 000000000..04a83e3ec --- /dev/null +++ b/tests/db/index/common/manifest_serializer_test.cc @@ -0,0 +1,452 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "db/index/common/manifest_serializer.h" +#include + +using namespace zvec; + +class ManifestSerializerTest : public ::testing::Test { + protected: + CollectionSchema MakeTestSchema() { + CollectionSchema schema; + schema.set_name("test_collection"); + schema.set_max_doc_count_per_segment(5000000); + + auto scalar_field = + std::make_shared("name", DataType::STRING, false, nullptr); + schema.add_field(scalar_field); + + auto vector_field = std::make_shared( + "embedding", DataType::VECTOR_FP32, 128, false, + std::make_shared(MetricType::L2, 16, 200, + QuantizeType::FP16)); + schema.add_field(vector_field); + + auto invert_field = std::make_shared( + "category", DataType::INT32, false, + std::make_shared(true)); + schema.add_field(invert_field); + + return schema; + } + + SegmentMeta::Ptr MakeTestSegment(uint32_t segment_id) { + auto meta = std::make_shared(segment_id); + + BlockMeta scalar_block(1, BlockType::SCALAR, 0, 100); + scalar_block.set_doc_count(50); + scalar_block.add_column("name"); + scalar_block.add_column("category"); + meta->add_persisted_block(scalar_block); + + BlockMeta vector_block(2, BlockType::VECTOR_INDEX, 0, 100); + vector_block.set_doc_count(50); + vector_block.add_column("embedding"); + meta->add_persisted_block(vector_block); + + meta->add_indexed_vector_field("embedding"); + + return meta; + } +}; + +TEST_F(ManifestSerializerTest, EmptyManifestRoundTrip) { + CollectionSchema schema; + schema.set_name("empty"); + + std::vector buffer; + auto status = ManifestSerializer::Serialize(schema, false, 0, 0, 0, {}, + nullptr, &buffer); + ASSERT_TRUE(status.ok()); + EXPECT_GT(buffer.size(), ManifestSerializer::HEADER_SIZE); + + CollectionSchema out_schema; + bool out_mmap; + uint32_t out_id_suffix, out_del_suffix, out_next_seg; + std::vector out_segments; + SegmentMeta::Ptr out_writing; + + status = ManifestSerializer::Deserialize( + buffer.data(), buffer.size(), &out_schema, &out_mmap, &out_id_suffix, + &out_del_suffix, &out_next_seg, &out_segments, &out_writing); + ASSERT_TRUE(status.ok()); + + EXPECT_EQ(out_schema.name(), "empty"); + EXPECT_FALSE(out_mmap); + EXPECT_EQ(out_id_suffix, 0u); + EXPECT_EQ(out_del_suffix, 0u); + EXPECT_EQ(out_next_seg, 0u); + EXPECT_TRUE(out_segments.empty()); + EXPECT_EQ(out_writing, nullptr); +} + +TEST_F(ManifestSerializerTest, FullManifestRoundTrip) { + auto schema = MakeTestSchema(); + auto segment1 = MakeTestSegment(1); + auto segment2 = MakeTestSegment(2); + + auto writing_segment = std::make_shared(3); + BlockMeta writing_block(5, BlockType::SCALAR, 201, 300); + writing_block.set_doc_count(25); + writing_block.add_column("name"); + writing_segment->set_writing_forward_block(writing_block); + + std::vector persisted = {segment1, segment2}; + + std::vector buffer; + auto status = ManifestSerializer::Serialize( + schema, true, 42, 99, 4, persisted, writing_segment, &buffer); + ASSERT_TRUE(status.ok()); + + CollectionSchema out_schema; + bool out_mmap; + uint32_t out_id_suffix, out_del_suffix, out_next_seg; + std::vector out_segments; + SegmentMeta::Ptr out_writing; + + status = ManifestSerializer::Deserialize( + buffer.data(), buffer.size(), &out_schema, &out_mmap, &out_id_suffix, + &out_del_suffix, &out_next_seg, &out_segments, &out_writing); + ASSERT_TRUE(status.ok()); + + // Verify scalar fields + EXPECT_TRUE(out_mmap); + EXPECT_EQ(out_id_suffix, 42u); + EXPECT_EQ(out_del_suffix, 99u); + EXPECT_EQ(out_next_seg, 4u); + + // Verify schema + EXPECT_EQ(out_schema.name(), "test_collection"); + EXPECT_EQ(out_schema.max_doc_count_per_segment(), 5000000u); + ASSERT_EQ(out_schema.fields().size(), 3u); + + EXPECT_EQ(out_schema.fields()[0]->name(), "name"); + EXPECT_EQ(out_schema.fields()[0]->data_type(), DataType::STRING); + EXPECT_FALSE(out_schema.fields()[0]->nullable()); + EXPECT_EQ(out_schema.fields()[0]->index_params(), nullptr); + + EXPECT_EQ(out_schema.fields()[1]->name(), "embedding"); + EXPECT_EQ(out_schema.fields()[1]->data_type(), DataType::VECTOR_FP32); + EXPECT_EQ(out_schema.fields()[1]->dimension(), 128u); + ASSERT_NE(out_schema.fields()[1]->index_params(), nullptr); + EXPECT_EQ(out_schema.fields()[1]->index_params()->type(), IndexType::HNSW); + auto hnsw = std::dynamic_pointer_cast( + out_schema.fields()[1]->index_params()); + ASSERT_NE(hnsw, nullptr); + EXPECT_EQ(hnsw->metric_type(), MetricType::L2); + EXPECT_EQ(hnsw->m(), 16); + EXPECT_EQ(hnsw->ef_construction(), 200); + EXPECT_EQ(hnsw->quantize_type(), QuantizeType::FP16); + + EXPECT_EQ(out_schema.fields()[2]->name(), "category"); + EXPECT_EQ(out_schema.fields()[2]->data_type(), DataType::INT32); + ASSERT_NE(out_schema.fields()[2]->index_params(), nullptr); + EXPECT_EQ(out_schema.fields()[2]->index_params()->type(), IndexType::INVERT); + + // Verify persisted segments + ASSERT_EQ(out_segments.size(), 2u); + EXPECT_EQ(out_segments[0]->id(), 1u); + EXPECT_EQ(out_segments[0]->persisted_blocks().size(), 2u); + EXPECT_TRUE(out_segments[0]->vector_indexed("embedding")); + EXPECT_EQ(out_segments[1]->id(), 2u); + + // Verify first segment's blocks + const auto &blocks = out_segments[0]->persisted_blocks(); + EXPECT_EQ(blocks[0].id(), 1u); + EXPECT_EQ(blocks[0].type(), BlockType::SCALAR); + EXPECT_EQ(blocks[0].min_doc_id(), 0u); + EXPECT_EQ(blocks[0].max_doc_id(), 100u); + EXPECT_EQ(blocks[0].doc_count(), 50u); + ASSERT_EQ(blocks[0].columns().size(), 2u); + EXPECT_EQ(blocks[0].columns()[0], "name"); + EXPECT_EQ(blocks[0].columns()[1], "category"); + + EXPECT_EQ(blocks[1].id(), 2u); + EXPECT_EQ(blocks[1].type(), BlockType::VECTOR_INDEX); + + // Verify writing segment + ASSERT_NE(out_writing, nullptr); + EXPECT_EQ(out_writing->id(), 3u); + ASSERT_TRUE(out_writing->has_writing_forward_block()); + EXPECT_EQ(out_writing->writing_forward_block()->id(), 5u); + EXPECT_EQ(out_writing->writing_forward_block()->type(), BlockType::SCALAR); + EXPECT_EQ(out_writing->writing_forward_block()->doc_count(), 25u); +} + +TEST_F(ManifestSerializerTest, AllIndexTypesRoundTrip) { + CollectionSchema schema; + schema.set_name("index_types_test"); + + // HNSW + auto hnsw_field = std::make_shared( + "vec_hnsw", DataType::VECTOR_FP32, 64, false, + std::make_shared(MetricType::IP, 32, 100, + QuantizeType::INT8)); + schema.add_field(hnsw_field); + + // HNSW_RABITQ + auto rabitq_field = std::make_shared( + "vec_rabitq", DataType::VECTOR_FP32, 128, false, + std::make_shared(MetricType::COSINE, 64, 8, 24, + 150, 1000)); + schema.add_field(rabitq_field); + + // FLAT + auto flat_field = std::make_shared( + "vec_flat", DataType::VECTOR_FP32, 32, false, + std::make_shared(MetricType::L2, QuantizeType::INT4)); + schema.add_field(flat_field); + + // IVF + auto ivf_field = std::make_shared( + "vec_ivf", DataType::VECTOR_FP32, 256, false, + std::make_shared(MetricType::IP, 128, 20, true, + QuantizeType::FP16)); + schema.add_field(ivf_field); + + // INVERT + auto invert_field = + std::make_shared("scalar_idx", DataType::STRING, false, + std::make_shared(false)); + schema.add_field(invert_field); + + std::vector buffer; + auto status = ManifestSerializer::Serialize(schema, false, 0, 0, 0, {}, + nullptr, &buffer); + ASSERT_TRUE(status.ok()); + + CollectionSchema out_schema; + bool out_mmap; + uint32_t out_id, out_del, out_next; + std::vector out_segs; + SegmentMeta::Ptr out_writing; + + status = ManifestSerializer::Deserialize( + buffer.data(), buffer.size(), &out_schema, &out_mmap, &out_id, &out_del, + &out_next, &out_segs, &out_writing); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(out_schema.fields().size(), 5u); + + // Verify HNSW + auto out_hnsw = std::dynamic_pointer_cast( + out_schema.fields()[0]->index_params()); + ASSERT_NE(out_hnsw, nullptr); + EXPECT_EQ(out_hnsw->metric_type(), MetricType::IP); + EXPECT_EQ(out_hnsw->m(), 32); + EXPECT_EQ(out_hnsw->ef_construction(), 100); + EXPECT_EQ(out_hnsw->quantize_type(), QuantizeType::INT8); + + // Verify HNSW_RABITQ + auto out_rabitq = std::dynamic_pointer_cast( + out_schema.fields()[1]->index_params()); + ASSERT_NE(out_rabitq, nullptr); + EXPECT_EQ(out_rabitq->metric_type(), MetricType::COSINE); + EXPECT_EQ(out_rabitq->total_bits(), 64); + EXPECT_EQ(out_rabitq->num_clusters(), 8); + EXPECT_EQ(out_rabitq->m(), 24); + EXPECT_EQ(out_rabitq->ef_construction(), 150); + EXPECT_EQ(out_rabitq->sample_count(), 1000); + + // Verify FLAT + auto out_flat = std::dynamic_pointer_cast( + out_schema.fields()[2]->index_params()); + ASSERT_NE(out_flat, nullptr); + EXPECT_EQ(out_flat->metric_type(), MetricType::L2); + EXPECT_EQ(out_flat->quantize_type(), QuantizeType::INT4); + + // Verify IVF + auto out_ivf = std::dynamic_pointer_cast( + out_schema.fields()[3]->index_params()); + ASSERT_NE(out_ivf, nullptr); + EXPECT_EQ(out_ivf->metric_type(), MetricType::IP); + EXPECT_EQ(out_ivf->n_list(), 128); + EXPECT_EQ(out_ivf->n_iters(), 20); + EXPECT_TRUE(out_ivf->use_soar()); + EXPECT_EQ(out_ivf->quantize_type(), QuantizeType::FP16); + + // Verify INVERT + auto out_invert = std::dynamic_pointer_cast( + out_schema.fields()[4]->index_params()); + ASSERT_NE(out_invert, nullptr); + EXPECT_FALSE(out_invert->enable_range_optimization()); +} + +TEST_F(ManifestSerializerTest, InvalidMagicNumber) { + std::vector bad_data = {0x00, 0x00, 0x00, 0x00, // wrong magic + 0x01, 0x00, 0x00, 0x00, // version + 0x00, 0x00, 0x00, 0x00, // length + 0x00, 0x00, 0x00, 0x00}; // crc + + CollectionSchema schema; + bool mmap; + uint32_t id, del, next; + std::vector segs; + SegmentMeta::Ptr writing; + + auto status = + ManifestSerializer::Deserialize(bad_data.data(), bad_data.size(), &schema, + &mmap, &id, &del, &next, &segs, &writing); + EXPECT_FALSE(status.ok()); +} + +TEST_F(ManifestSerializerTest, TruncatedData) { + std::vector truncated = {0x5A, 0x56, 0x45, 0x43}; // just magic + + CollectionSchema schema; + bool mmap; + uint32_t id, del, next; + std::vector segs; + SegmentMeta::Ptr writing; + + auto status = ManifestSerializer::Deserialize( + truncated.data(), truncated.size(), &schema, &mmap, &id, &del, &next, + &segs, &writing); + EXPECT_FALSE(status.ok()); +} + +TEST_F(ManifestSerializerTest, CorruptedCRC) { + auto schema = MakeTestSchema(); + std::vector buffer; + auto status = ManifestSerializer::Serialize(schema, false, 0, 0, 0, {}, + nullptr, &buffer); + ASSERT_TRUE(status.ok()); + + // Corrupt a byte in the payload + if (buffer.size() > ManifestSerializer::HEADER_SIZE + 1) { + buffer[ManifestSerializer::HEADER_SIZE + 1] ^= 0xFF; + } + + CollectionSchema out_schema; + bool mmap; + uint32_t id, del, next; + std::vector segs; + SegmentMeta::Ptr writing; + + status = + ManifestSerializer::Deserialize(buffer.data(), buffer.size(), &out_schema, + &mmap, &id, &del, &next, &segs, &writing); + EXPECT_FALSE(status.ok()); +} + +TEST_F(ManifestSerializerTest, NoWritingSegment) { + CollectionSchema schema; + schema.set_name("no_writing"); + + auto segment = MakeTestSegment(1); + std::vector persisted = {segment}; + + std::vector buffer; + auto status = ManifestSerializer::Serialize(schema, false, 10, 20, 2, + persisted, nullptr, &buffer); + ASSERT_TRUE(status.ok()); + + CollectionSchema out_schema; + bool out_mmap; + uint32_t out_id, out_del, out_next; + std::vector out_segs; + SegmentMeta::Ptr out_writing; + + status = ManifestSerializer::Deserialize( + buffer.data(), buffer.size(), &out_schema, &out_mmap, &out_id, &out_del, + &out_next, &out_segs, &out_writing); + ASSERT_TRUE(status.ok()); + + EXPECT_EQ(out_schema.name(), "no_writing"); + EXPECT_EQ(out_segs.size(), 1u); + EXPECT_EQ(out_writing, nullptr); + EXPECT_EQ(out_id, 10u); + EXPECT_EQ(out_del, 20u); + EXPECT_EQ(out_next, 2u); +} + +TEST_F(ManifestSerializerTest, FieldWithNoIndex) { + CollectionSchema schema; + schema.set_name("no_index"); + + auto field = std::make_shared("plain_field", DataType::DOUBLE, + false, nullptr); + schema.add_field(field); + + std::vector buffer; + auto status = ManifestSerializer::Serialize(schema, false, 0, 0, 0, {}, + nullptr, &buffer); + ASSERT_TRUE(status.ok()); + + CollectionSchema out_schema; + bool mmap; + uint32_t id, del, next; + std::vector segs; + SegmentMeta::Ptr writing; + + status = + ManifestSerializer::Deserialize(buffer.data(), buffer.size(), &out_schema, + &mmap, &id, &del, &next, &segs, &writing); + ASSERT_TRUE(status.ok()); + + ASSERT_EQ(out_schema.fields().size(), 1u); + EXPECT_EQ(out_schema.fields()[0]->name(), "plain_field"); + EXPECT_EQ(out_schema.fields()[0]->data_type(), DataType::DOUBLE); + EXPECT_EQ(out_schema.fields()[0]->index_params(), nullptr); +} + +TEST_F(ManifestSerializerTest, MultipleBlockTypes) { + CollectionSchema schema; + schema.set_name("multi_blocks"); + + auto segment = std::make_shared(1); + + BlockMeta scalar(1, BlockType::SCALAR, 0, 100); + scalar.set_doc_count(100); + segment->add_persisted_block(scalar); + + BlockMeta scalar_idx(2, BlockType::SCALAR_INDEX, 0, 100); + scalar_idx.set_doc_count(100); + scalar_idx.add_column("col_a"); + segment->add_persisted_block(scalar_idx); + + BlockMeta vec_idx(3, BlockType::VECTOR_INDEX, 0, 100); + vec_idx.set_doc_count(100); + vec_idx.add_column("vec_col"); + segment->add_persisted_block(vec_idx); + + BlockMeta vec_quant(4, BlockType::VECTOR_INDEX_QUANTIZE, 0, 100); + vec_quant.set_doc_count(100); + vec_quant.add_column("vec_col"); + segment->add_persisted_block(vec_quant); + + std::vector buffer; + auto status = ManifestSerializer::Serialize(schema, false, 0, 0, 0, {segment}, + nullptr, &buffer); + ASSERT_TRUE(status.ok()); + + CollectionSchema out_schema; + bool mmap; + uint32_t id, del, next; + std::vector out_segs; + SegmentMeta::Ptr writing; + + status = ManifestSerializer::Deserialize(buffer.data(), buffer.size(), + &out_schema, &mmap, &id, &del, &next, + &out_segs, &writing); + ASSERT_TRUE(status.ok()); + + ASSERT_EQ(out_segs.size(), 1u); + const auto &blocks = out_segs[0]->persisted_blocks(); + ASSERT_EQ(blocks.size(), 4u); + EXPECT_EQ(blocks[0].type(), BlockType::SCALAR); + EXPECT_EQ(blocks[1].type(), BlockType::SCALAR_INDEX); + EXPECT_EQ(blocks[2].type(), BlockType::VECTOR_INDEX); + EXPECT_EQ(blocks[3].type(), BlockType::VECTOR_INDEX_QUANTIZE); +} diff --git a/tests/db/index/common/version_manager_test.cc b/tests/db/index/common/version_manager_test.cc index 7a16a0ca7..0b92fe160 100644 --- a/tests/db/index/common/version_manager_test.cc +++ b/tests/db/index/common/version_manager_test.cc @@ -18,7 +18,6 @@ #include #include "db/common/file_helper.h" #include "db/index/common/meta.h" -#include "proto/zvec.pb.h" #include "zvec/db/schema.h" namespace zvec { @@ -253,27 +252,23 @@ TEST_F(VersionManagerTest, ErrorConditions) { EXPECT_FALSE(version_manager->remove_persisted_segment_meta(999).ok()); } -// Test conversion between protobuf and internal schema +// Test schema round-trip through Version TEST_F(VersionManagerTest, SchemaConversion) { - // Create protobuf schema - zvec::proto::CollectionSchema pb_schema; - pb_schema.set_name("test_collection"); - - auto pb_field = pb_schema.add_fields(); - pb_field->set_name("vector_field"); - pb_field->set_data_type(zvec::proto::DataType::DT_VECTOR_FP32); - pb_field->set_dimension(128); + CollectionSchema schema; + schema.set_name("test_collection"); - // Convert to internal schema (this would be done in the Load method) - CollectionSchema internal_schema; - internal_schema.set_name(pb_schema.name()); - // In a real implementation, fields would be converted here + auto field = std::make_shared("vector_field", + DataType::VECTOR_FP32, 128, false); + schema.add_field(field); - // Test that we can set and retrieve the schema Version version; - version.set_schema(internal_schema); + version.set_schema(schema); EXPECT_EQ(version.schema().name(), "test_collection"); + EXPECT_EQ(version.schema().fields().size(), 1); + EXPECT_EQ(version.schema().fields()[0]->name(), "vector_field"); + EXPECT_EQ(version.schema().fields()[0]->data_type(), DataType::VECTOR_FP32); + EXPECT_EQ(version.schema().fields()[0]->dimension(), 128u); } // Test SegmentMeta functionality diff --git a/tests/db/sqlengine/CMakeLists.txt b/tests/db/sqlengine/CMakeLists.txt index 7922bbf6b..5c1f660b9 100644 --- a/tests/db/sqlengine/CMakeLists.txt +++ b/tests/db/sqlengine/CMakeLists.txt @@ -23,7 +23,6 @@ foreach(CC_SRCS ${ALL_TEST_SRCS}) cc_gmock( NAME ${CC_TARGET} STRICT LIBS zvec_common - zvec_proto zvec_sqlengine zvec_ailego core_metric diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt index 22f06ceae..0f2f6a3f0 100644 --- a/thirdparty/CMakeLists.txt +++ b/thirdparty/CMakeLists.txt @@ -18,7 +18,6 @@ add_subdirectory(gflags gflags EXCLUDE_FROM_ALL) add_subdirectory(glog glog EXCLUDE_FROM_ALL) add_subdirectory(sparsehash sparsehash EXCLUDE_FROM_ALL) add_subdirectory(yaml-cpp yaml-cpp EXCLUDE_FROM_ALL) -add_subdirectory(protobuf protobuf EXCLUDE_FROM_ALL) add_subdirectory(antlr antlr EXCLUDE_FROM_ALL) add_subdirectory(lz4 lz4 EXCLUDE_FROM_ALL) add_subdirectory(rocksdb rocksdb EXCLUDE_FROM_ALL) diff --git a/thirdparty/protobuf/CMakeLists.txt b/thirdparty/protobuf/CMakeLists.txt deleted file mode 100644 index 0eebf9445..000000000 --- a/thirdparty/protobuf/CMakeLists.txt +++ /dev/null @@ -1,65 +0,0 @@ -set(protobuf_BUILD_TESTS OFF CACHE BOOL "Disable testing in protobuf" FORCE) -set(protobuf_WITH_ZLIB ON CACHE BOOL "Disable zlib support in protobuf" FORCE) -if(MSVC) - set(protobuf_MSVC_STATIC_RUNTIME ${ZVEC_USE_STATIC_CRT} CACHE BOOL "" FORCE) -endif() - -set(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR}) -add_subdirectory(protobuf-3.21.12/cmake protobuf-3.21.12) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY}) -unset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY) - -if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options(libprotobuf PRIVATE - -Wno-deprecated-declarations - -Wno-invalid-noreturn - -Wno-unused-function - ) - target_compile_options(libprotoc PRIVATE - -Wno-unused-private-field - -Wno-unused-function - ) - target_compile_options(protoc PRIVATE - -Wno-unused-private-field - -Wno-unused-function - ) -elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - target_compile_options(libprotobuf PRIVATE - -Wno-deprecated-declarations - -Wno-unused-function - -Wno-maybe-uninitialized - -Wno-sign-compare - -Wno-return-type - -Wno-stringop-overflow - ) - target_compile_options(libprotoc PRIVATE - -Wno-unused-private-field - -Wno-unused-function - -Wno-unused-but-set-variable - -Wno-sign-compare - ) - target_compile_options(protoc PRIVATE - -Wno-unused-private-field - -Wno-unused-function - -Wno-unused-but-set-variable - -Wno-sign-compare - ) -endif() - -get_target_property(libprotobuf_SOURCE_DIR libprotobuf SOURCE_DIR) -get_filename_component(libprotobuf_INCLUDE_DIR ${libprotobuf_SOURCE_DIR}/../src ABSOLUTE) - -set(PROTOBUF_FOUND TRUE PARENT_SCOPE) -set(PROTOBUF_INCLUDE_DIR ${libprotobuf_INCLUDE_DIR} PARENT_SCOPE) -set(PROTOBUF_INCLUDE_DIRS ${libprotobuf_INCLUDE_DIR} PARENT_SCOPE) - -set(PROTOBUF_LIBRARY $ PARENT_SCOPE) -set(PROTOBUF_LIBRARIES $ PARENT_SCOPE) - -set(PROTOBUF_LITE_LIBRARY $ PARENT_SCOPE) -set(PROTOBUF_LITE_LIBRARIES $ PARENT_SCOPE) - -set(PROTOBUF_PROTOC_LIBRARY $ PARENT_SCOPE) -set(PROTOBUF_PROTOC_LIBRARIES $ PARENT_SCOPE) -set(PROTOBUF_PROTOC_EXECUTABLE $ PARENT_SCOPE) diff --git a/thirdparty/protobuf/protobuf-3.21.12 b/thirdparty/protobuf/protobuf-3.21.12 deleted file mode 160000 index f0dc78d7e..000000000 --- a/thirdparty/protobuf/protobuf-3.21.12 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f0dc78d7e6e331b8c6bb2d5283e06aa26883ca7c