From ebd71b06a230ca62e08c435e775b096c62723066 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 29 Jan 2026 02:27:14 +0800
Subject: [PATCH 001/126] Add OMEGALib as git submodule in thirdparty/omega

Integrate OMEGALib repository as a submodule to provide OMEGA adaptive search functionality. The submodule includes GBDT inference, feature extraction, model management, and search context components.
---
 .gitmodules      | 3 +++
 thirdparty/omega | 1 +
 2 files changed, 4 insertions(+)
 create mode 160000 thirdparty/omega

diff --git a/.gitmodules b/.gitmodules
index 5086d05f1..478e917eb 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -39,3 +39,6 @@
 	path = thirdparty/magic_enum/magic_enum-0.9.7
 	url = https://github.com/Neargye/magic_enum.git
 	ignore = all
+[submodule "thirdparty/omega"]
+	path = thirdparty/omega
+	url = git@github.com:driPyf/OMEGALib.git
diff --git a/thirdparty/omega b/thirdparty/omega
new file mode 160000
index 000000000..3d613ec9b
--- /dev/null
+++ b/thirdparty/omega
@@ -0,0 +1 @@
+Subproject commit 3d613ec9ba9df0b382001998ecebe1b56d224039

From d0e6be399d4ed77b80e62e1e2867b44abc9e80cd Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 29 Jan 2026 02:30:44 +0800
Subject: [PATCH 002/126] Change omega submodule URL from SSH to HTTPS

---
 .gitmodules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitmodules b/.gitmodules
index 478e917eb..b1baf0770 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -41,4 +41,4 @@
 	ignore = all
 [submodule "thirdparty/omega"]
 	path = thirdparty/omega
-	url = git@github.com:driPyf/OMEGALib.git
+	url = https://github.com/driPyf/OMEGALib.git

From 7890bcd7593bdc0ed5129432c16673edbaa32531 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 29 Jan 2026 03:04:10 +0800
Subject: [PATCH 003/126] Implement OMEGA index algorithm as wrapper around
 HNSW

Add OMEGA index components that wrap HNSW with adaptive search capability:
- OmegaSearcher: Wraps HnswSearcher with OMEGA model integration and automatic fallback
- OmegaBuilder: Wraps HnswBuilder for index construction
- OmegaStreamer: Wraps HnswStreamer for streaming operations
- Factory registration for all components
- CMakeLists.txt integration with omega library dependency

OMEGA mode activates when vector count >= threshold and model is loaded, otherwise falls back to standard HNSW transparently.
---
 src/core/algorithm/CMakeLists.txt          |   3 +-
 src/core/algorithm/omega/CMakeLists.txt    |  11 ++
 src/core/algorithm/omega/omega_builder.cc  | 126 +++++++++++++++
 src/core/algorithm/omega/omega_builder.h   |  67 ++++++++
 src/core/algorithm/omega/omega_searcher.cc | 179 +++++++++++++++++++++
 src/core/algorithm/omega/omega_searcher.h  | 152 +++++++++++++++++
 src/core/algorithm/omega/omega_streamer.cc |  53 ++++++
 src/core/algorithm/omega/omega_streamer.h  | 143 ++++++++++++++++
 8 files changed, 733 insertions(+), 1 deletion(-)
 create mode 100644 src/core/algorithm/omega/CMakeLists.txt
 create mode 100644 src/core/algorithm/omega/omega_builder.cc
 create mode 100644 src/core/algorithm/omega/omega_builder.h
 create mode 100644 src/core/algorithm/omega/omega_searcher.cc
 create mode 100644 src/core/algorithm/omega/omega_searcher.h
 create mode 100644 src/core/algorithm/omega/omega_streamer.cc
 create mode 100644 src/core/algorithm/omega/omega_streamer.h

diff --git a/src/core/algorithm/CMakeLists.txt b/src/core/algorithm/CMakeLists.txt
index 648dbefea..cb954a978 100644
--- a/src/core/algorithm/CMakeLists.txt
+++ b/src/core/algorithm/CMakeLists.txt
@@ -6,4 +6,5 @@ cc_directory(flat)
 cc_directory(flat_sparse)
 cc_directory(ivf)
 cc_directory(hnsw)
-cc_directory(hnsw_sparse)
\ No newline at end of file
+cc_directory(hnsw_sparse)
+cc_directory(omega)
\ No newline at end of file
diff --git a/src/core/algorithm/omega/CMakeLists.txt b/src/core/algorithm/omega/CMakeLists.txt
new file mode 100644
index 000000000..9358aaa3e
--- /dev/null
+++ b/src/core/algorithm/omega/CMakeLists.txt
@@ -0,0 +1,11 @@
+include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
+include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
+
+cc_library(
+    NAME core_knn_omega
+    STATIC SHARED STRICT ALWAYS_LINK
+    SRCS *.cc
+    LIBS core_framework core_knn_hnsw omega
+    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm ${PROJECT_ROOT_DIR}/thirdparty/omega/include
+    VERSION "${PROXIMA_ZVEC_VERSION}"
+  )
diff --git a/src/core/algorithm/omega/omega_builder.cc b/src/core/algorithm/omega/omega_builder.cc
new file mode 100644
index 000000000..c64713cd5
--- /dev/null
+++ b/src/core/algorithm/omega/omega_builder.cc
@@ -0,0 +1,126 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "omega_builder.h"
+#include <aitheta2/index_framework.h>
+
+namespace zvec {
+namespace core {
+
+OmegaBuilder::OmegaBuilder() : hnsw_builder_(nullptr) {}
+
+int OmegaBuilder::init(const IndexMeta &meta, const ailego::Params &params) {
+  if (state_ != BUILD_STATE_INIT) {
+    LOG_ERROR("OmegaBuilder already initialized");
+    return PROXIMA_BE_ERROR_CODE(DuplicateInit);
+  }
+
+  // Create underlying HNSW builder
+  hnsw_builder_ = std::make_shared<HnswBuilder>();
+  int ret = hnsw_builder_->init(meta, params);
+  if (ret != 0) {
+    LOG_ERROR("Failed to initialize HNSW builder");
+    return ret;
+  }
+
+  state_ = BUILD_STATE_INITED;
+  LOG_INFO("OmegaBuilder initialized");
+  return 0;
+}
+
+int OmegaBuilder::cleanup(void) {
+  if (state_ == BUILD_STATE_INIT) {
+    return 0;
+  }
+
+  if (hnsw_builder_ != nullptr) {
+    hnsw_builder_->cleanup();
+    hnsw_builder_.reset();
+  }
+
+  state_ = BUILD_STATE_INIT;
+  return 0;
+}
+
+int OmegaBuilder::train(IndexThreads::Pointer threads,
+                        IndexHolder::Pointer holder) {
+  if (state_ != BUILD_STATE_INITED) {
+    LOG_ERROR("OmegaBuilder not initialized");
+    return PROXIMA_BE_ERROR_CODE(InvalidState);
+  }
+
+  int ret = hnsw_builder_->train(threads, holder);
+  if (ret != 0) {
+    LOG_ERROR("Failed to train HNSW builder");
+    return ret;
+  }
+
+  state_ = BUILD_STATE_TRAINED;
+  return 0;
+}
+
+int OmegaBuilder::train(const IndexTrainer::Pointer &trainer) {
+  if (state_ != BUILD_STATE_INITED) {
+    LOG_ERROR("OmegaBuilder not initialized");
+    return PROXIMA_BE_ERROR_CODE(InvalidState);
+  }
+
+  int ret = hnsw_builder_->train(trainer);
+  if (ret != 0) {
+    LOG_ERROR("Failed to train HNSW builder");
+    return ret;
+  }
+
+  state_ = BUILD_STATE_TRAINED;
+  return 0;
+}
+
+int OmegaBuilder::build(IndexThreads::Pointer threads,
+                        IndexHolder::Pointer holder) {
+  if (state_ != BUILD_STATE_TRAINED) {
+    LOG_ERROR("OmegaBuilder not trained");
+    return PROXIMA_BE_ERROR_CODE(InvalidState);
+  }
+
+  int ret = hnsw_builder_->build(threads, holder);
+  if (ret != 0) {
+    LOG_ERROR("Failed to build HNSW index");
+    return ret;
+  }
+
+  state_ = BUILD_STATE_BUILT;
+  LOG_INFO("OmegaBuilder build completed");
+  return 0;
+}
+
+int OmegaBuilder::dump(const IndexDumper::Pointer &dumper) {
+  if (state_ != BUILD_STATE_BUILT) {
+    LOG_ERROR("OmegaBuilder not built");
+    return PROXIMA_BE_ERROR_CODE(InvalidState);
+  }
+
+  int ret = hnsw_builder_->dump(dumper);
+  if (ret != 0) {
+    LOG_ERROR("Failed to dump HNSW index");
+    return ret;
+  }
+
+  LOG_INFO("OmegaBuilder dump completed");
+  return 0;
+}
+
+}  // namespace core
+}  // namespace zvec
+
+INDEX_FACTORY_REGISTER_BUILDER(zvec::core::OmegaBuilder);
diff --git a/src/core/algorithm/omega/omega_builder.h b/src/core/algorithm/omega/omega_builder.h
new file mode 100644
index 000000000..4fc38b18e
--- /dev/null
+++ b/src/core/algorithm/omega/omega_builder.h
@@ -0,0 +1,67 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <zvec/core/framework/index_builder.h>
+#include "../hnsw/hnsw_builder.h"
+
+namespace zvec {
+namespace core {
+
+//! OMEGA Index Builder - wraps HNSW builder
+class OmegaBuilder : public IndexBuilder {
+ public:
+  //! Constructor
+  OmegaBuilder();
+
+  //! Initialize the builder
+  virtual int init(const IndexMeta &meta,
+                   const ailego::Params &params) override;
+
+  //! Cleanup the builder
+  virtual int cleanup(void) override;
+
+  //! Train the data (delegate to HNSW)
+  virtual int train(IndexThreads::Pointer threads,
+                    IndexHolder::Pointer holder) override;
+
+  //! Train the data (delegate to HNSW)
+  virtual int train(const IndexTrainer::Pointer &trainer) override;
+
+  //! Build the index (delegate to HNSW)
+  virtual int build(IndexThreads::Pointer threads,
+                    IndexHolder::Pointer holder) override;
+
+  //! Dump index into storage (delegate to HNSW)
+  virtual int dump(const IndexDumper::Pointer &dumper) override;
+
+  //! Retrieve statistics (delegate to HNSW)
+  virtual const Stats &stats(void) const override {
+    return hnsw_builder_->stats();
+  }
+
+ private:
+  enum BUILD_STATE {
+    BUILD_STATE_INIT = 0,
+    BUILD_STATE_INITED = 1,
+    BUILD_STATE_TRAINED = 2,
+    BUILD_STATE_BUILT = 3
+  };
+
+  std::shared_ptr<HnswBuilder> hnsw_builder_;
+  BUILD_STATE state_{BUILD_STATE_INIT};
+};
+
+}  // namespace core
+}  // namespace zvec
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
new file mode 100644
index 000000000..81475f027
--- /dev/null
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -0,0 +1,179 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "omega_searcher.h"
+#include <aitheta2/index_framework.h>
+
+namespace zvec {
+namespace core {
+
+OmegaSearcher::OmegaSearcher(void)
+    : hnsw_searcher_(nullptr),
+      omega_model_(nullptr),
+      omega_enabled_(false),
+      use_omega_mode_(false),
+      target_recall_(0.95f),
+      min_vector_threshold_(10000),
+      current_vector_count_(0) {}
+
+OmegaSearcher::~OmegaSearcher(void) {
+  this->cleanup();
+}
+
+int OmegaSearcher::init(const ailego::Params &params) {
+  if (state_ != STATE_INIT) {
+    LOG_ERROR("OmegaSearcher already initialized");
+    return PROXIMA_BE_ERROR_CODE(DuplicateInit);
+  }
+
+  params_ = params;
+
+  // Get OMEGA-specific parameters
+  omega_enabled_ = params.get_as_bool("omega.enabled", false);
+  target_recall_ = params.get_as_float("omega.target_recall", 0.95f);
+  min_vector_threshold_ = params.get_as_uint32("omega.min_vector_threshold", 10000);
+  model_dir_ = params.get_as_string("omega.model_dir", "");
+
+  // Create underlying HNSW searcher
+  hnsw_searcher_ = std::make_shared<HnswSearcher>();
+  int ret = hnsw_searcher_->init(params);
+  if (ret != 0) {
+    LOG_ERROR("Failed to initialize HNSW searcher");
+    return ret;
+  }
+
+  state_ = STATE_INITED;
+  LOG_INFO("OmegaSearcher initialized (omega_enabled=%d, target_recall=%.2f, "
+           "min_threshold=%u)",
+           omega_enabled_, target_recall_, min_vector_threshold_);
+  return 0;
+}
+
+int OmegaSearcher::cleanup(void) {
+  if (state_ == STATE_INIT) {
+    return 0;
+  }
+
+  // Cleanup OMEGA model
+  if (omega_model_ != nullptr) {
+    omega_model_destroy(omega_model_);
+    omega_model_ = nullptr;
+  }
+
+  // Cleanup HNSW searcher
+  if (hnsw_searcher_ != nullptr) {
+    hnsw_searcher_->cleanup();
+    hnsw_searcher_.reset();
+  }
+
+  state_ = STATE_INIT;
+  return 0;
+}
+
+int OmegaSearcher::load(IndexStorage::Pointer container,
+                        IndexMetric::Pointer metric) {
+  if (state_ != STATE_INITED) {
+    LOG_ERROR("OmegaSearcher not initialized");
+    return PROXIMA_BE_ERROR_CODE(InvalidState);
+  }
+
+  // Load HNSW index
+  int ret = hnsw_searcher_->load(container, metric);
+  if (ret != 0) {
+    LOG_ERROR("Failed to load HNSW index");
+    return ret;
+  }
+
+  // Get vector count from HNSW stats
+  current_vector_count_ = hnsw_searcher_->stats().total_doc_count;
+
+  // Try to load OMEGA model if enabled and threshold met
+  use_omega_mode_ = false;
+  if (omega_enabled_ && current_vector_count_ >= min_vector_threshold_) {
+    if (!model_dir_.empty()) {
+      omega_model_ = omega_model_create();
+      if (omega_model_ != nullptr) {
+        ret = omega_model_load(omega_model_, model_dir_.c_str());
+        if (ret == 0 && omega_model_is_loaded(omega_model_)) {
+          use_omega_mode_ = true;
+          LOG_INFO("OMEGA model loaded successfully from %s", model_dir_.c_str());
+        } else {
+          LOG_WARN("Failed to load OMEGA model from %s, falling back to HNSW",
+                   model_dir_.c_str());
+          omega_model_destroy(omega_model_);
+          omega_model_ = nullptr;
+        }
+      }
+    } else {
+      LOG_WARN("OMEGA enabled but model_dir not specified, falling back to HNSW");
+    }
+  } else {
+    if (omega_enabled_) {
+      LOG_INFO("Vector count (%zu) below threshold (%u), using standard HNSW",
+               current_vector_count_, min_vector_threshold_);
+    }
+  }
+
+  state_ = STATE_LOADED;
+  return 0;
+}
+
+int OmegaSearcher::unload(void) {
+  if (state_ != STATE_LOADED) {
+    return 0;
+  }
+
+  // Unload OMEGA model
+  if (omega_model_ != nullptr) {
+    omega_model_destroy(omega_model_);
+    omega_model_ = nullptr;
+  }
+  use_omega_mode_ = false;
+
+  // Unload HNSW index
+  if (hnsw_searcher_ != nullptr) {
+    hnsw_searcher_->unload();
+  }
+
+  state_ = STATE_INITED;
+  return 0;
+}
+
+int OmegaSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
+                               uint32_t count,
+                               ContextPointer &context) const {
+  if (state_ != STATE_LOADED) {
+    LOG_ERROR("OmegaSearcher not loaded");
+    return PROXIMA_BE_ERROR_CODE(InvalidState);
+  }
+
+  // If OMEGA mode is not active, delegate to HNSW
+  if (!should_use_omega()) {
+    return hnsw_searcher_->search_impl(query, qmeta, count, context);
+  }
+
+  // TODO: Implement adaptive search with OMEGA
+  // For now, just delegate to HNSW
+  // In the future, this will:
+  // 1. Create OmegaSearchHandle
+  // 2. Perform search with dynamic EF adjustment
+  // 3. Use early stopping based on model predictions
+  LOG_DEBUG("OMEGA adaptive search not yet implemented, using HNSW");
+  return hnsw_searcher_->search_impl(query, qmeta, count, context);
+}
+
+}  // namespace core
+}  // namespace zvec
+
+INDEX_FACTORY_REGISTER_SEARCHER(zvec::core::OmegaSearcher);
diff --git a/src/core/algorithm/omega/omega_searcher.h b/src/core/algorithm/omega/omega_searcher.h
new file mode 100644
index 000000000..7a68a1b05
--- /dev/null
+++ b/src/core/algorithm/omega/omega_searcher.h
@@ -0,0 +1,152 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <zvec/core/framework/index_framework.h>
+#include "../hnsw/hnsw_searcher.h"
+#include "omega/omega_api.h"
+
+namespace zvec {
+namespace core {
+
+//! OMEGA Index Searcher - wraps HNSW with adaptive search
+class OmegaSearcher : public IndexSearcher {
+ public:
+  using ContextPointer = IndexSearcher::Context::Pointer;
+
+ public:
+  OmegaSearcher(void);
+  ~OmegaSearcher(void);
+
+  OmegaSearcher(const OmegaSearcher &) = delete;
+  OmegaSearcher &operator=(const OmegaSearcher &) = delete;
+
+ protected:
+  //! Initialize Searcher
+  virtual int init(const ailego::Params &params) override;
+
+  //! Cleanup Searcher
+  virtual int cleanup(void) override;
+
+  //! Load Index from storage
+  virtual int load(IndexStorage::Pointer container,
+                   IndexMetric::Pointer metric) override;
+
+  //! Unload index from storage
+  virtual int unload(void) override;
+
+  //! KNN Search
+  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
+                          ContextPointer &context) const override {
+    return search_impl(query, qmeta, 1, context);
+  }
+
+  //! KNN Search with OMEGA adaptive search
+  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
+                          uint32_t count,
+                          ContextPointer &context) const override;
+
+  //! Linear Search (delegate to HNSW)
+  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
+                             ContextPointer &context) const override {
+    return hnsw_searcher_->search_bf_impl(query, qmeta, context);
+  }
+
+  //! Linear Search (delegate to HNSW)
+  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
+                             uint32_t count,
+                             ContextPointer &context) const override {
+    return hnsw_searcher_->search_bf_impl(query, qmeta, count, context);
+  }
+
+  //! Linear search by primary keys (delegate to HNSW)
+  virtual int search_bf_by_p_keys_impl(
+      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
+      const IndexQueryMeta &qmeta, ContextPointer &context) const override {
+    return hnsw_searcher_->search_bf_by_p_keys_impl(query, p_keys, qmeta,
+                                                     context);
+  }
+
+  //! Linear search by primary keys (delegate to HNSW)
+  virtual int search_bf_by_p_keys_impl(
+      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
+      const IndexQueryMeta &qmeta, uint32_t count,
+      ContextPointer &context) const override {
+    return hnsw_searcher_->search_bf_by_p_keys_impl(query, p_keys, qmeta,
+                                                     count, context);
+  }
+
+  //! Fetch vector by key (delegate to HNSW)
+  virtual const void *get_vector(uint64_t key) const override {
+    return hnsw_searcher_->get_vector(key);
+  }
+
+  //! Create a searcher context (delegate to HNSW)
+  virtual ContextPointer create_context() const override {
+    return hnsw_searcher_->create_context();
+  }
+
+  //! Create a new iterator (delegate to HNSW)
+  virtual IndexProvider::Pointer create_provider(void) const override {
+    return hnsw_searcher_->create_provider();
+  }
+
+  //! Retrieve statistics (delegate to HNSW)
+  virtual const Stats &stats(void) const override {
+    return hnsw_searcher_->stats();
+  }
+
+  //! Retrieve meta of index (delegate to HNSW)
+  virtual const IndexMeta &meta(void) const override {
+    return hnsw_searcher_->meta();
+  }
+
+  //! Retrieve params of index
+  virtual const ailego::Params &params(void) const override {
+    return params_;
+  }
+
+  virtual void print_debug_info() override {
+    hnsw_searcher_->print_debug_info();
+  }
+
+ private:
+  //! Check if OMEGA mode should be used
+  bool should_use_omega() const {
+    return omega_enabled_ && use_omega_mode_ &&
+           omega_model_ != nullptr &&
+           omega_model_is_loaded(omega_model_);
+  }
+
+ private:
+  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };
+
+  // Underlying HNSW searcher
+  std::shared_ptr<HnswSearcher> hnsw_searcher_;
+
+  // OMEGA components
+  OmegaModelHandle omega_model_;
+  bool omega_enabled_;
+  bool use_omega_mode_;
+  float target_recall_;
+  uint32_t min_vector_threshold_;
+  size_t current_vector_count_;
+  std::string model_dir_;
+
+  ailego::Params params_{};
+  State state_{STATE_INIT};
+};
+
+}  // namespace core
+}  // namespace zvec
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
new file mode 100644
index 000000000..7be599a89
--- /dev/null
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -0,0 +1,53 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "omega_streamer.h"
+#include <aitheta2/index_framework.h>
+
+namespace zvec {
+namespace core {
+
+OmegaStreamer::OmegaStreamer(void) : hnsw_streamer_(nullptr) {}
+
+OmegaStreamer::~OmegaStreamer(void) {
+  this->cleanup();
+}
+
+int OmegaStreamer::init(const IndexMeta &imeta, const ailego::Params &params) {
+  params_ = params;
+
+  // Create underlying HNSW streamer
+  hnsw_streamer_ = std::make_shared<HnswStreamer>();
+  int ret = hnsw_streamer_->init(imeta, params);
+  if (ret != 0) {
+    LOG_ERROR("Failed to initialize HNSW streamer");
+    return ret;
+  }
+
+  LOG_INFO("OmegaStreamer initialized");
+  return 0;
+}
+
+int OmegaStreamer::cleanup(void) {
+  if (hnsw_streamer_ != nullptr) {
+    hnsw_streamer_->cleanup();
+    hnsw_streamer_.reset();
+  }
+  return 0;
+}
+
+}  // namespace core
+}  // namespace zvec
+
+INDEX_FACTORY_REGISTER_STREAMER(zvec::core::OmegaStreamer);
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
new file mode 100644
index 000000000..82af4ebbd
--- /dev/null
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -0,0 +1,143 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <zvec/core/framework/index_framework.h>
+#include "../hnsw/hnsw_streamer.h"
+
+namespace zvec {
+namespace core {
+
+//! OMEGA Index Streamer - wraps HNSW streamer
+class OmegaStreamer : public IndexStreamer {
+ public:
+  using ContextPointer = IndexStreamer::Context::Pointer;
+
+  OmegaStreamer(void);
+  virtual ~OmegaStreamer(void);
+
+  OmegaStreamer(const OmegaStreamer &streamer) = delete;
+  OmegaStreamer &operator=(const OmegaStreamer &streamer) = delete;
+
+ protected:
+  //! Initialize Streamer
+  virtual int init(const IndexMeta &imeta,
+                   const ailego::Params &params) override;
+
+  //! Cleanup Streamer
+  virtual int cleanup(void) override;
+
+  //! Create a context (delegate to HNSW)
+  virtual Context::Pointer create_context(void) const override {
+    return hnsw_streamer_->create_context();
+  }
+
+  //! Create a new iterator (delegate to HNSW)
+  virtual IndexProvider::Pointer create_provider(void) const override {
+    return hnsw_streamer_->create_provider();
+  }
+
+  //! Add a vector into index (delegate to HNSW)
+  virtual int add_impl(uint64_t pkey, const void *query,
+                       const IndexQueryMeta &qmeta,
+                       Context::Pointer &context) override {
+    return hnsw_streamer_->add_impl(pkey, query, qmeta, context);
+  }
+
+  //! Add a vector with id into index (delegate to HNSW)
+  virtual int add_with_id_impl(uint32_t id, const void *query,
+                               const IndexQueryMeta &qmeta,
+                               Context::Pointer &context) override {
+    return hnsw_streamer_->add_with_id_impl(id, query, qmeta, context);
+  }
+
+  //! Similarity search (delegate to HNSW)
+  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
+                          Context::Pointer &context) const override {
+    return hnsw_streamer_->search_impl(query, qmeta, context);
+  }
+
+  //! Similarity search (delegate to HNSW)
+  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
+                          uint32_t count,
+                          Context::Pointer &context) const override {
+    return hnsw_streamer_->search_impl(query, qmeta, count, context);
+  }
+
+  //! Similarity brute force search (delegate to HNSW)
+  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
+                             Context::Pointer &context) const override {
+    return hnsw_streamer_->search_bf_impl(query, qmeta, context);
+  }
+
+  //! Similarity brute force search (delegate to HNSW)
+  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
+                             uint32_t count,
+                             Context::Pointer &context) const override {
+    return hnsw_streamer_->search_bf_impl(query, qmeta, count, context);
+  }
+
+  //! Linear search by primary keys (delegate to HNSW)
+  virtual int search_bf_by_p_keys_impl(
+      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
+      const IndexQueryMeta &qmeta, ContextPointer &context) const override {
+    return hnsw_streamer_->search_bf_by_p_keys_impl(query, p_keys, qmeta,
+                                                     context);
+  }
+
+  //! Linear search by primary keys (delegate to HNSW)
+  virtual int search_bf_by_p_keys_impl(
+      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
+      const IndexQueryMeta &qmeta, uint32_t count,
+      ContextPointer &context) const override {
+    return hnsw_streamer_->search_bf_by_p_keys_impl(query, p_keys, qmeta,
+                                                     count, context);
+  }
+
+  //! Remove a vector from index (delegate to HNSW)
+  virtual int remove_impl(uint64_t pkey, Context::Pointer &context) override {
+    return hnsw_streamer_->remove_impl(pkey, context);
+  }
+
+  //! Fetch vector by key (delegate to HNSW)
+  virtual const void *get_vector(uint64_t key) const override {
+    return hnsw_streamer_->get_vector(key);
+  }
+
+  //! Retrieve statistics (delegate to HNSW)
+  virtual const Stats &stats(void) const override {
+    return hnsw_streamer_->stats();
+  }
+
+  //! Retrieve meta of index (delegate to HNSW)
+  virtual const IndexMeta &meta(void) const override {
+    return hnsw_streamer_->meta();
+  }
+
+  //! Retrieve params of index
+  virtual const ailego::Params &params(void) const override {
+    return params_;
+  }
+
+  virtual void print_debug_info() override {
+    hnsw_streamer_->print_debug_info();
+  }
+
+ private:
+  std::shared_ptr<HnswStreamer> hnsw_streamer_;
+  ailego::Params params_{};
+};
+
+}  // namespace core
+}  // namespace zvec

From 4fbdbd4f34eb0dc74145bcb157b04bc9063fc1fa Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 30 Jan 2026 19:16:58 +0800
Subject: [PATCH 004/126] feat: add OMEGA index support with Python bindings
 and tests

- Add OMEGA index type to zvec type system
- Implement OmegaIndexParams class for index configuration
- Add Python bindings for OmegaIndexParam
- Integrate OMEGA searcher with HNSW fallback mechanism
- Add comprehensive Python unit tests for OMEGA functionality
- Update schema validation to support OMEGA index type

Tests verify that OMEGA index correctly falls back to HNSW behavior
when OMEGA-specific features are not enabled, ensuring full compatibility.
---
 python/tests/test_omega_fallback.py           | 229 ++++++++++
 python/zvec/__init__.py                       |   2 +
 python/zvec/model/param/__init__.py           |   2 +
 .../python/model/param/python_param.cc        |  83 ++++
 src/binding/python/typing/python_type.cc      |   3 +-
 src/core/CMakeLists.txt                       |   2 +-
 src/core/algorithm/hnsw/hnsw_searcher.h       |   4 +-
 src/core/algorithm/omega/omega_builder.cc     |  24 +-
 src/core/algorithm/omega/omega_searcher.cc    | 277 +++++++++---
 src/core/algorithm/omega/omega_searcher.h     |  51 +--
 src/core/algorithm/omega/omega_streamer.cc    |  19 +-
 src/core/algorithm/omega/omega_streamer.h     |  23 +-
 src/db/index/common/schema.cc                 |   2 +-
 src/include/zvec/db/index_params.h            |  61 ++-
 src/include/zvec/db/type.h                    |   1 +
 tests/core/algorithm/CMakeLists.txt           |   1 +
 tests/core/algorithm/omega/CMakeLists.txt     |  14 +
 .../algorithm/omega/omega_searcher_test.cc    | 425 ++++++++++++++++++
 thirdparty/CMakeLists.txt                     |   1 +
 thirdparty/omega                              |   2 +-
 20 files changed, 1088 insertions(+), 138 deletions(-)
 create mode 100644 python/tests/test_omega_fallback.py
 create mode 100644 tests/core/algorithm/omega/CMakeLists.txt
 create mode 100644 tests/core/algorithm/omega/omega_searcher_test.cc

diff --git a/python/tests/test_omega_fallback.py b/python/tests/test_omega_fallback.py
new file mode 100644
index 000000000..05958cb7f
--- /dev/null
+++ b/python/tests/test_omega_fallback.py
@@ -0,0 +1,229 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+
+import numpy as np
+import pytest
+import zvec
+from zvec import (
+    CollectionOption,
+    CollectionSchema,
+    DataType,
+    Doc,
+    FieldSchema,
+    HnswIndexParam,
+    IndexOption,
+    MetricType,
+    OmegaIndexParam,
+    VectorQuery,
+    VectorSchema,
+)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def init_zvec():
+    """Initialize zvec once for all tests in this module."""
+    zvec.init()
+
+
+def test_omega_index_param_creation():
+    """Test that OmegaIndexParam can be created with various parameters."""
+    # Default parameters
+    param1 = OmegaIndexParam()
+    assert param1.m == 50
+    assert param1.ef_construction == 500
+    assert param1.metric_type == MetricType.IP
+
+    # Custom parameters
+    param2 = OmegaIndexParam(
+        metric_type=MetricType.L2,
+        m=32,
+        ef_construction=200
+    )
+    assert param2.m == 32
+    assert param2.ef_construction == 200
+    assert param2.metric_type == MetricType.L2
+
+
+def test_omega_collection_creation():
+    """Test creating a collection with OMEGA index."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = Path(tmpdir) / "test_omega_db"
+
+        # Create schema with OMEGA index
+        schema = CollectionSchema(
+            name="test_omega_collection",
+            fields=[
+                FieldSchema("id", DataType.INT64, nullable=False),
+                FieldSchema("text", DataType.STRING, nullable=False),
+            ],
+            vectors=[
+                VectorSchema(
+                    "embedding",
+                    DataType.VECTOR_FP32,
+                    dimension=128,
+                    index_param=OmegaIndexParam(
+                        metric_type=MetricType.L2,
+                        m=16,
+                        ef_construction=200
+                    ),
+                ),
+            ],
+        )
+
+        # Create collection
+        collection = zvec.create_and_open(
+            str(db_path),
+            schema,
+            CollectionOption(read_only=False, enable_mmap=False)
+        )
+
+        # Insert some test data
+        docs = [
+            Doc(
+                id=str(i),
+                fields={"id": i, "text": f"doc_{i}"},
+                vectors={"embedding": np.random.randn(128).astype(np.float32).tolist()}
+            )
+            for i in range(100)
+        ]
+
+        status = collection.insert(docs)
+        # insert() returns a list of Status objects for multiple docs
+        assert len(status) == len(docs), "Insert returned wrong number of statuses"
+        for s in status:
+            assert s.ok(), f"Insert failed: {s.message()}"
+
+        # Create index
+        collection.create_index(
+            field_name="embedding",
+            index_param=OmegaIndexParam(metric_type=MetricType.L2, m=16, ef_construction=200),
+            option=IndexOption()
+        )
+
+        # Query
+        query_vector = np.random.randn(128).astype(np.float32).tolist()
+        results = collection.query(
+            vectors=VectorQuery(field_name="embedding", vector=query_vector),
+            topk=10
+        )
+
+        assert len(results) > 0, "Query returned no results"
+        assert len(results) <= 10, "Query returned more than top_k results"
+
+
+def test_omega_vs_hnsw_compatibility():
+    """Test that OMEGA index produces similar results to HNSW index."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path_hnsw = Path(tmpdir) / "test_hnsw_db"
+        db_path_omega = Path(tmpdir) / "test_omega_db"
+
+        # Create identical schemas except for index type
+        def create_schema(name, index_param):
+            return CollectionSchema(
+                name=name,
+                fields=[
+                    FieldSchema("id", DataType.INT64, nullable=False),
+                ],
+                vectors=[
+                    VectorSchema(
+                        "embedding",
+                        DataType.VECTOR_FP32,
+                        dimension=64,
+                        index_param=index_param,
+                    ),
+                ],
+            )
+
+        # Create test data
+        np.random.seed(42)
+        test_docs = [
+            Doc(
+                id=str(i),
+                fields={"id": i},
+                vectors={"embedding": np.random.randn(64).astype(np.float32).tolist()}
+            )
+            for i in range(200)
+        ]
+
+        query_vector = np.random.randn(64).astype(np.float32).tolist()
+
+        # Test with HNSW
+        schema_hnsw = create_schema(
+            "test_hnsw",
+            HnswIndexParam(metric_type=MetricType.L2, m=16, ef_construction=200)
+        )
+        collection_hnsw = zvec.create_and_open(
+            str(db_path_hnsw),
+            schema_hnsw,
+            CollectionOption(read_only=False, enable_mmap=False)
+        )
+        collection_hnsw.insert(test_docs)
+        collection_hnsw.create_index(
+            field_name="embedding",
+            index_param=HnswIndexParam(metric_type=MetricType.L2, m=16, ef_construction=200),
+            option=IndexOption()
+        )
+        results_hnsw = collection_hnsw.query(
+            vectors=VectorQuery(field_name="embedding", vector=query_vector),
+            topk=10
+        )
+
+        # Test with OMEGA
+        schema_omega = create_schema(
+            "test_omega",
+            OmegaIndexParam(metric_type=MetricType.L2, m=16, ef_construction=200)
+        )
+        collection_omega = zvec.create_and_open(
+            str(db_path_omega),
+            schema_omega,
+            CollectionOption(read_only=False, enable_mmap=False)
+        )
+        collection_omega.insert(test_docs)
+        collection_omega.create_index(
+            field_name="embedding",
+            index_param=OmegaIndexParam(metric_type=MetricType.L2, m=16, ef_construction=200),
+            option=IndexOption()
+        )
+        results_omega = collection_omega.query(
+            vectors=VectorQuery(field_name="embedding", vector=query_vector),
+            topk=10
+        )
+
+        # Both should return results
+        assert len(results_hnsw) > 0, "HNSW query returned no results"
+        assert len(results_omega) > 0, "OMEGA query returned no results"
+
+        # Results should have the same number of documents
+        assert len(results_hnsw) == len(results_omega), \
+            f"Different number of results: HNSW={len(results_hnsw)}, OMEGA={len(results_omega)}"
+
+        # Verify that OMEGA fallback produces identical results to HNSW
+        # Since both use the same index structure (HNSW) with identical parameters,
+        # they should return the exact same documents in the same order with the same scores
+        for i, (doc_hnsw, doc_omega) in enumerate(zip(results_hnsw, results_omega)):
+            assert doc_hnsw.id == doc_omega.id, \
+                f"Document ID mismatch at position {i}: HNSW={doc_hnsw.id}, OMEGA={doc_omega.id}"
+
+            # Scores should be identical (or very close due to floating point)
+            assert abs(doc_hnsw.score - doc_omega.score) < 1e-5, \
+                f"Score mismatch at position {i} for doc {doc_hnsw.id}: " \
+                f"HNSW={doc_hnsw.score}, OMEGA={doc_omega.score}, diff={abs(doc_hnsw.score - doc_omega.score)}"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/python/zvec/__init__.py b/python/zvec/__init__.py
index ec35829d9..97240f98c 100644
--- a/python/zvec/__init__.py
+++ b/python/zvec/__init__.py
@@ -48,6 +48,7 @@
     InvertIndexParam,
     IVFIndexParam,
     IVFQueryParam,
+    OmegaIndexParam,
     OptimizeOption,
 )
 from .model.param.vector_query import VectorQuery
@@ -92,6 +93,7 @@
     "HnswIndexParam",
     "FlatIndexParam",
     "IVFIndexParam",
+    "OmegaIndexParam",
     "CollectionOption",
     "IndexOption",
     "OptimizeOption",
diff --git a/python/zvec/model/param/__init__.py b/python/zvec/model/param/__init__.py
index 4dbeb249b..cc89a5e15 100644
--- a/python/zvec/model/param/__init__.py
+++ b/python/zvec/model/param/__init__.py
@@ -24,6 +24,7 @@
     InvertIndexParam,
     IVFIndexParam,
     IVFQueryParam,
+    OmegaIndexParam,
     OptimizeOption,
 )
 
@@ -38,5 +39,6 @@
     "IVFQueryParam",
     "IndexOption",
     "InvertIndexParam",
+    "OmegaIndexParam",
     "OptimizeOption",
 ]
diff --git a/src/binding/python/model/param/python_param.cc b/src/binding/python/model/param/python_param.cc
index 98c2adf47..48333f62a 100644
--- a/src/binding/python/model/param/python_param.cc
+++ b/src/binding/python/model/param/python_param.cc
@@ -540,6 +540,89 @@ Constructs an IVFIndexParam instance.
                 t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
                 t[3].cast<bool>(), t[4].cast<QuantizeType>());
           }));
+
+  // OmegaIndexParams
+  py::class_<OmegaIndexParams, VectorIndexParams,
+             std::shared_ptr<OmegaIndexParams>>
+      omega_params(m, "OmegaIndexParam", R"pbdoc(
+Parameters for configuring an OMEGA index.
+
+OMEGA is an advanced graph-based index that can fall back to HNSW when omega
+functionality is disabled. This class encapsulates its construction hyperparameters.
+
+Attributes:
+    metric_type (MetricType): Distance metric used for similarity computation.
+        Default is ``MetricType.IP`` (inner product).
+    m (int): Number of bi-directional links created for every new element
+        during construction. Higher values improve accuracy but increase
+        memory usage and construction time. Default is 100.
+    ef_construction (int): Size of the dynamic candidate list for nearest
+        neighbors during index construction. Larger values yield better
+        graph quality at the cost of slower build time. Default is 500.
+    quantize_type (QuantizeType): Optional quantization type for vector
+        compression (e.g., FP16, INT8). Default is `QuantizeType.UNDEFINED` to
+        disable quantization.
+
+Examples:
+    >>> from zvec.typing import MetricType, QuantizeType
+    >>> params = OmegaIndexParam(
+    ...     metric_type=MetricType.COSINE,
+    ...     m=16,
+    ...     ef_construction=200,
+    ...     quantize_type=QuantizeType.INT8
+    ... )
+    >>> print(params)
+    {'metric_type': 'IP', 'm': 16, 'ef_construction': 200, 'quantize_type': 'INT8'}
+)pbdoc");
+  omega_params
+      .def(py::init<MetricType, int, int, QuantizeType>(),
+           py::arg("metric_type") = MetricType::IP,
+           py::arg("m") = core_interface::kDefaultHnswNeighborCnt,
+           py::arg("ef_construction") =
+               core_interface::kDefaultHnswEfConstruction,
+           py::arg("quantize_type") = QuantizeType::UNDEFINED)
+      .def_property_readonly(
+          "m", &OmegaIndexParams::m,
+          "int: Maximum number of neighbors per node in upper layers.")
+      .def_property_readonly(
+          "ef_construction", &OmegaIndexParams::ef_construction,
+          "int: Candidate list size during index construction.")
+      .def(
+          "to_dict",
+          [](const OmegaIndexParams &self) -> py::dict {
+            py::dict dict;
+            dict["type"] = index_type_to_string(self.type());
+            dict["metric_type"] = metric_type_to_string(self.metric_type());
+            dict["m"] = self.m();
+            dict["ef_construction"] = self.ef_construction();
+            dict["quantize_type"] =
+                quantize_type_to_string(self.quantize_type());
+            return dict;
+          },
+          "Convert to dictionary with all fields")
+      .def("__repr__",
+           [](const OmegaIndexParams &self) -> std::string {
+             return "{"
+                    "\"metric_type\":" +
+                    metric_type_to_string(self.metric_type()) +
+                    ", \"m\":" + std::to_string(self.m()) +
+                    ", \"ef_construction\":" +
+                    std::to_string(self.ef_construction()) +
+                    ", \"quantize_type\":" +
+                    quantize_type_to_string(self.quantize_type()) + "}";
+           })
+      .def(py::pickle(
+          [](const OmegaIndexParams &self) {
+            return py::make_tuple(self.metric_type(), self.m(),
+                                  self.ef_construction(), self.quantize_type());
+          },
+          [](py::tuple t) {
+            if (t.size() != 4)
+              throw std::runtime_error("Invalid state for OmegaIndexParams");
+            return std::make_shared<OmegaIndexParams>(
+                t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
+                t[3].cast<QuantizeType>());
+          }));
 }
 
 void ZVecPyParams::bind_query_params(py::module_ &m) {
diff --git a/src/binding/python/typing/python_type.cc b/src/binding/python/typing/python_type.cc
index ee057cf3a..1dfdaea49 100644
--- a/src/binding/python/typing/python_type.cc
+++ b/src/binding/python/typing/python_type.cc
@@ -98,7 +98,8 @@ Enumeration of supported index types in Zvec.
       .value("HNSW", IndexType::HNSW)
       .value("IVF", IndexType::IVF)
       .value("FLAT", IndexType::FLAT)
-      .value("INVERT", IndexType::INVERT);
+      .value("INVERT", IndexType::INVERT)
+      .value("OMEGA", IndexType::OMEGA);
 }
 
 void ZVecPyTyping::bind_metric_types(pybind11::module_ &m) {
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 7742db594..03f9bbb98 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -15,7 +15,7 @@ file(GLOB_RECURSE ALL_CORE_SRCS *.cc *.c *.h)
 cc_library(
     NAME zvec_core STATIC STRICT PACKED
     SRCS ${ALL_CORE_SRCS}
-    LIBS zvec_ailego sparsehash magic_enum
+    LIBS zvec_ailego sparsehash magic_enum omega
     INCS . ${PROJECT_ROOT_DIR}/src/core
     VERSION "${GIT_SRCS_VER}"
 )
\ No newline at end of file
diff --git a/src/core/algorithm/hnsw/hnsw_searcher.h b/src/core/algorithm/hnsw/hnsw_searcher.h
index 4c8a31466..5113a2461 100644
--- a/src/core/algorithm/hnsw/hnsw_searcher.h
+++ b/src/core/algorithm/hnsw/hnsw_searcher.h
@@ -111,6 +111,9 @@ class HnswSearcher : public IndexSearcher {
   //! current streamer/searcher
   int update_context(HnswContext *ctx) const;
 
+ protected:
+  uint32_t ef_{HnswEntity::kDefaultEf};
+
  private:
   enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };
 
@@ -121,7 +124,6 @@ class HnswSearcher : public IndexSearcher {
   IndexMeta meta_{};
   ailego::Params params_{};
   Stats stats_;
-  uint32_t ef_{HnswEntity::kDefaultEf};
   uint32_t max_scan_num_{0U};
   uint32_t bruteforce_threshold_{HnswEntity::kDefaultBruteForceThreshold};
   float max_scan_ratio_{HnswEntity::kDefaultScanRatio};
diff --git a/src/core/algorithm/omega/omega_builder.cc b/src/core/algorithm/omega/omega_builder.cc
index c64713cd5..e9e5bc0ce 100644
--- a/src/core/algorithm/omega/omega_builder.cc
+++ b/src/core/algorithm/omega/omega_builder.cc
@@ -13,7 +13,9 @@
 // limitations under the License.
 
 #include "omega_builder.h"
-#include <aitheta2/index_framework.h>
+#include <zvec/core/framework/index_error.h>
+#include <zvec/core/framework/index_factory.h>
+#include <zvec/core/framework/index_logger.h>
 
 namespace zvec {
 namespace core {
@@ -23,9 +25,15 @@ OmegaBuilder::OmegaBuilder() : hnsw_builder_(nullptr) {}
 int OmegaBuilder::init(const IndexMeta &meta, const ailego::Params &params) {
   if (state_ != BUILD_STATE_INIT) {
     LOG_ERROR("OmegaBuilder already initialized");
-    return PROXIMA_BE_ERROR_CODE(DuplicateInit);
+    return IndexError_Duplicate;
   }
 
+  // TODO: Fix design - cannot call protected init method of HnswBuilder
+  // For now, return NotImplemented error
+  LOG_ERROR("OmegaBuilder is not yet fully implemented - wrapper design needs fixing");
+  return IndexError_NotImplemented;
+
+  /*
   // Create underlying HNSW builder
   hnsw_builder_ = std::make_shared<HnswBuilder>();
   int ret = hnsw_builder_->init(meta, params);
@@ -37,6 +45,7 @@ int OmegaBuilder::init(const IndexMeta &meta, const ailego::Params &params) {
   state_ = BUILD_STATE_INITED;
   LOG_INFO("OmegaBuilder initialized");
   return 0;
+  */
 }
 
 int OmegaBuilder::cleanup(void) {
@@ -57,7 +66,7 @@ int OmegaBuilder::train(IndexThreads::Pointer threads,
                         IndexHolder::Pointer holder) {
   if (state_ != BUILD_STATE_INITED) {
     LOG_ERROR("OmegaBuilder not initialized");
-    return PROXIMA_BE_ERROR_CODE(InvalidState);
+    return IndexError_NoReady;
   }
 
   int ret = hnsw_builder_->train(threads, holder);
@@ -73,7 +82,7 @@ int OmegaBuilder::train(IndexThreads::Pointer threads,
 int OmegaBuilder::train(const IndexTrainer::Pointer &trainer) {
   if (state_ != BUILD_STATE_INITED) {
     LOG_ERROR("OmegaBuilder not initialized");
-    return PROXIMA_BE_ERROR_CODE(InvalidState);
+    return IndexError_NoReady;
   }
 
   int ret = hnsw_builder_->train(trainer);
@@ -90,7 +99,7 @@ int OmegaBuilder::build(IndexThreads::Pointer threads,
                         IndexHolder::Pointer holder) {
   if (state_ != BUILD_STATE_TRAINED) {
     LOG_ERROR("OmegaBuilder not trained");
-    return PROXIMA_BE_ERROR_CODE(InvalidState);
+    return IndexError_NoReady;
   }
 
   int ret = hnsw_builder_->build(threads, holder);
@@ -107,7 +116,7 @@ int OmegaBuilder::build(IndexThreads::Pointer threads,
 int OmegaBuilder::dump(const IndexDumper::Pointer &dumper) {
   if (state_ != BUILD_STATE_BUILT) {
     LOG_ERROR("OmegaBuilder not built");
-    return PROXIMA_BE_ERROR_CODE(InvalidState);
+    return IndexError_NoReady;
   }
 
   int ret = hnsw_builder_->dump(dumper);
@@ -123,4 +132,5 @@ int OmegaBuilder::dump(const IndexDumper::Pointer &dumper) {
 }  // namespace core
 }  // namespace zvec
 
-INDEX_FACTORY_REGISTER_BUILDER(zvec::core::OmegaBuilder);
+// TODO: Fix OmegaBuilder design - it tries to call protected methods of HnswBuilder
+// INDEX_FACTORY_REGISTER_BUILDER(zvec::core::OmegaBuilder);
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 81475f027..19ed34101 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -13,13 +13,19 @@
 // limitations under the License.
 
 #include "omega_searcher.h"
-#include <aitheta2/index_framework.h>
+#include <zvec/core/framework/index_error.h>
+#include <zvec/core/framework/index_factory.h>
+#include <zvec/core/framework/index_logger.h>
+#include "../hnsw/hnsw_context.h"
+#include <limits>
+#include <deque>
+#include <algorithm>
 
 namespace zvec {
 namespace core {
 
 OmegaSearcher::OmegaSearcher(void)
-    : hnsw_searcher_(nullptr),
+    : HnswSearcher(),
       omega_model_(nullptr),
       omega_enabled_(false),
       use_omega_mode_(false),
@@ -32,28 +38,19 @@ OmegaSearcher::~OmegaSearcher(void) {
 }
 
 int OmegaSearcher::init(const ailego::Params &params) {
-  if (state_ != STATE_INIT) {
-    LOG_ERROR("OmegaSearcher already initialized");
-    return PROXIMA_BE_ERROR_CODE(DuplicateInit);
-  }
-
-  params_ = params;
-
   // Get OMEGA-specific parameters
-  omega_enabled_ = params.get_as_bool("omega.enabled", false);
-  target_recall_ = params.get_as_float("omega.target_recall", 0.95f);
-  min_vector_threshold_ = params.get_as_uint32("omega.min_vector_threshold", 10000);
-  model_dir_ = params.get_as_string("omega.model_dir", "");
-
-  // Create underlying HNSW searcher
-  hnsw_searcher_ = std::make_shared<HnswSearcher>();
-  int ret = hnsw_searcher_->init(params);
+  omega_enabled_ = params.has("omega.enabled") ? params.get_as_bool("omega.enabled") : false;
+  target_recall_ = params.has("omega.target_recall") ? params.get_as_float("omega.target_recall") : 0.95f;
+  min_vector_threshold_ = params.has("omega.min_vector_threshold") ? params.get_as_uint32("omega.min_vector_threshold") : 10000;
+  model_dir_ = params.has("omega.model_dir") ? params.get_as_string("omega.model_dir") : "";
+
+  // Call parent class init
+  int ret = HnswSearcher::init(params);
   if (ret != 0) {
     LOG_ERROR("Failed to initialize HNSW searcher");
     return ret;
   }
 
-  state_ = STATE_INITED;
   LOG_INFO("OmegaSearcher initialized (omega_enabled=%d, target_recall=%.2f, "
            "min_threshold=%u)",
            omega_enabled_, target_recall_, min_vector_threshold_);
@@ -61,42 +58,27 @@ int OmegaSearcher::init(const ailego::Params &params) {
 }
 
 int OmegaSearcher::cleanup(void) {
-  if (state_ == STATE_INIT) {
-    return 0;
-  }
-
   // Cleanup OMEGA model
   if (omega_model_ != nullptr) {
     omega_model_destroy(omega_model_);
     omega_model_ = nullptr;
   }
 
-  // Cleanup HNSW searcher
-  if (hnsw_searcher_ != nullptr) {
-    hnsw_searcher_->cleanup();
-    hnsw_searcher_.reset();
-  }
-
-  state_ = STATE_INIT;
-  return 0;
+  // Call parent class cleanup
+  return HnswSearcher::cleanup();
 }
 
 int OmegaSearcher::load(IndexStorage::Pointer container,
                         IndexMetric::Pointer metric) {
-  if (state_ != STATE_INITED) {
-    LOG_ERROR("OmegaSearcher not initialized");
-    return PROXIMA_BE_ERROR_CODE(InvalidState);
-  }
-
-  // Load HNSW index
-  int ret = hnsw_searcher_->load(container, metric);
+  // Load HNSW index using parent class
+  int ret = HnswSearcher::load(container, metric);
   if (ret != 0) {
     LOG_ERROR("Failed to load HNSW index");
     return ret;
   }
 
   // Get vector count from HNSW stats
-  current_vector_count_ = hnsw_searcher_->stats().total_doc_count;
+  current_vector_count_ = stats().loaded_count();
 
   // Try to load OMEGA model if enabled and threshold met
   use_omega_mode_ = false;
@@ -125,15 +107,10 @@ int OmegaSearcher::load(IndexStorage::Pointer container,
     }
   }
 
-  state_ = STATE_LOADED;
   return 0;
 }
 
 int OmegaSearcher::unload(void) {
-  if (state_ != STATE_LOADED) {
-    return 0;
-  }
-
   // Unload OMEGA model
   if (omega_model_ != nullptr) {
     omega_model_destroy(omega_model_);
@@ -141,39 +118,207 @@ int OmegaSearcher::unload(void) {
   }
   use_omega_mode_ = false;
 
-  // Unload HNSW index
-  if (hnsw_searcher_ != nullptr) {
-    hnsw_searcher_->unload();
-  }
-
-  state_ = STATE_INITED;
-  return 0;
+  // Call parent class unload
+  return HnswSearcher::unload();
 }
 
 int OmegaSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
                                uint32_t count,
                                ContextPointer &context) const {
-  if (state_ != STATE_LOADED) {
-    LOG_ERROR("OmegaSearcher not loaded");
-    return PROXIMA_BE_ERROR_CODE(InvalidState);
+  // If OMEGA mode is not active, delegate to parent HNSW
+  if (!should_use_omega()) {
+    return HnswSearcher::search_impl(query, qmeta, count, context);
   }
 
-  // If OMEGA mode is not active, delegate to HNSW
-  if (!should_use_omega()) {
-    return hnsw_searcher_->search_impl(query, qmeta, count, context);
+  // Use OMEGA adaptive search
+  return adaptive_search(query, qmeta, count, context);
+}
+
+int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmeta,
+                                   uint32_t count,
+                                   ContextPointer &context) const {
+  // Create OMEGA search context with parameters (stateful interface)
+  OmegaSearchHandle omega_search = omega_search_create_with_params(
+      omega_model_, target_recall_, count, 100);  // window_size=100
+
+  if (omega_search == nullptr) {
+    LOG_WARN("Failed to create OMEGA search context, falling back to HNSW");
+    return HnswSearcher::search_impl(query, qmeta, count, context);
+  }
+
+  // Cast context to HnswContext to access HNSW-specific features
+  auto *hnsw_ctx = dynamic_cast<HnswContext*>(context.get());
+  if (hnsw_ctx == nullptr) {
+    LOG_ERROR("Context is not HnswContext");
+    omega_search_destroy(omega_search);
+    return IndexError_InvalidArgument;
+  }
+
+  // Initialize query in distance calculator
+  hnsw_ctx->reset_query(query);
+
+  // Get entity and distance calculator
+  const auto &entity = hnsw_ctx->get_entity();
+  auto &dc = hnsw_ctx->dist_calculator();
+  auto &visit_filter = hnsw_ctx->visit_filter();
+  auto &candidates = hnsw_ctx->candidates();
+  auto &topk_heap = hnsw_ctx->topk_heap();
+
+  // Use ef from parent class (now protected, so accessible)
+  uint32_t ef = ef_;
+  topk_heap.limit(std::max(ef, count));
+
+  // Get entry point
+  auto max_level = entity.cur_max_level();
+  auto entry_point = entity.entry_point();
+
+  if (entry_point == kInvalidNodeId) {
+    omega_search_destroy(omega_search);
+    return 0;
+  }
+
+  // Navigate to layer 0
+  dist_t dist = dc.dist(entry_point);
+  for (level_t cur_level = max_level; cur_level >= 1; --cur_level) {
+    const Neighbors neighbors = entity.get_neighbors(cur_level, entry_point);
+    if (neighbors.size() == 0) break;
+
+    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
+    int ret = entity.get_vector(&neighbors[0], neighbors.size(), neighbor_vec_blocks);
+    if (ret != 0) break;
+
+    bool find_closer = false;
+    for (uint32_t i = 0; i < neighbors.size(); ++i) {
+      const void *neighbor_vec = neighbor_vec_blocks[i].data();
+      dist_t cur_dist = dc.dist(neighbor_vec);
+      if (cur_dist < dist) {
+        entry_point = neighbors[i];
+        dist = cur_dist;
+        find_closer = true;
+      }
+    }
+    if (!find_closer) break;
   }
 
-  // TODO: Implement adaptive search with OMEGA
-  // For now, just delegate to HNSW
-  // In the future, this will:
-  // 1. Create OmegaSearchHandle
-  // 2. Perform search with dynamic EF adjustment
-  // 3. Use early stopping based on model predictions
-  LOG_DEBUG("OMEGA adaptive search not yet implemented, using HNSW");
-  return hnsw_searcher_->search_impl(query, qmeta, count, context);
+  // Set dist_start for OMEGA
+  omega_search_set_dist_start(omega_search, dist);
+
+  // Now perform OMEGA-enhanced search on layer 0
+  candidates.clear();
+  visit_filter.clear();
+  topk_heap.clear();
+
+  // Add entry point to search
+  visit_filter.set_visited(entry_point);
+  topk_heap.emplace(entry_point, dist);
+  candidates.emplace(entry_point, dist);
+
+  // Report initial visit to OMEGA
+  omega_search_report_visit(omega_search, entry_point, dist, 1);  // is_in_topk=1
+
+  dist_t lowerBound = dist;
+
+  // Main search loop with OMEGA predictions
+  while (!candidates.empty()) {
+    auto top = candidates.begin();
+    node_id_t current_node = top->first;
+    dist_t candidate_dist = top->second;
+
+    // Standard HNSW stopping condition
+    if (candidate_dist > lowerBound && topk_heap.size() >= ef) {
+      break;
+    }
+
+    // OMEGA early stopping check
+    if (omega_search_should_predict(omega_search)) {
+      if (omega_search_should_stop(omega_search)) {
+        int hops, cmps, collected_gt;
+        omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
+        LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
+                  cmps, hops, collected_gt);
+        break;
+      }
+    }
+
+    candidates.pop();
+
+    // Report hop to OMEGA
+    omega_search_report_hop(omega_search);
+
+    // Get neighbors of current node
+    const Neighbors neighbors = entity.get_neighbors(0, current_node);
+    if (neighbors.size() == 0) continue;
+
+    // Prepare to compute distances
+    std::vector<node_id_t> unvisited_neighbors;
+    for (uint32_t i = 0; i < neighbors.size(); ++i) {
+      node_id_t neighbor = neighbors[i];
+      if (!visit_filter.visited(neighbor)) {
+        visit_filter.set_visited(neighbor);
+        unvisited_neighbors.push_back(neighbor);
+      }
+    }
+
+    if (unvisited_neighbors.empty()) continue;
+
+    // Get neighbor vectors
+    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
+    int ret = entity.get_vector(unvisited_neighbors.data(),
+                                unvisited_neighbors.size(),
+                                neighbor_vec_blocks);
+    if (ret != 0) break;
+
+    // Compute distances and update candidates
+    for (size_t i = 0; i < unvisited_neighbors.size(); ++i) {
+      node_id_t neighbor = unvisited_neighbors[i];
+      const void *neighbor_vec = neighbor_vec_blocks[i].data();
+      dist_t neighbor_dist = dc.dist(neighbor_vec);
+
+      // Check if this node will be in topk
+      bool is_in_topk = (topk_heap.size() < ef || neighbor_dist < lowerBound);
+
+      // Report visit to OMEGA
+      omega_search_report_visit(omega_search, neighbor, neighbor_dist, is_in_topk ? 1 : 0);
+
+      // Consider this candidate
+      if (is_in_topk) {
+        candidates.emplace(neighbor, neighbor_dist);
+        topk_heap.emplace(neighbor, neighbor_dist);
+
+        // Update lowerBound
+        if (neighbor_dist < lowerBound) {
+          lowerBound = neighbor_dist;
+        }
+
+        // Remove excess from topk_heap
+        while (topk_heap.size() > ef) {
+          topk_heap.pop();
+        }
+
+        // Update lowerBound to the worst distance in topk
+        if (!topk_heap.empty() && topk_heap.size() >= ef) {
+          lowerBound = topk_heap[0].second;  // Max heap, so [0] is the worst
+        }
+      }
+    }
+  }
+
+  // Convert results to context format
+  hnsw_ctx->topk_to_result();
+
+  // Get final statistics
+  int hops, cmps, collected_gt;
+  omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
+  LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu",
+            cmps, hops, topk_heap.size());
+
+  // Cleanup
+  omega_search_destroy(omega_search);
+
+  return 0;
 }
 
+INDEX_FACTORY_REGISTER_SEARCHER(OmegaSearcher);
+
 }  // namespace core
 }  // namespace zvec
-
-INDEX_FACTORY_REGISTER_SEARCHER(zvec::core::OmegaSearcher);
diff --git a/src/core/algorithm/omega/omega_searcher.h b/src/core/algorithm/omega/omega_searcher.h
index 7a68a1b05..126c04a88 100644
--- a/src/core/algorithm/omega/omega_searcher.h
+++ b/src/core/algorithm/omega/omega_searcher.h
@@ -15,13 +15,13 @@
 
 #include <zvec/core/framework/index_framework.h>
 #include "../hnsw/hnsw_searcher.h"
-#include "omega/omega_api.h"
+#include <omega/omega_api.h>
 
 namespace zvec {
 namespace core {
 
-//! OMEGA Index Searcher - wraps HNSW with adaptive search
-class OmegaSearcher : public IndexSearcher {
+//! OMEGA Index Searcher - extends HNSW with adaptive search
+class OmegaSearcher : public HnswSearcher {
  public:
   using ContextPointer = IndexSearcher::Context::Pointer;
 
@@ -57,36 +57,8 @@ class OmegaSearcher : public IndexSearcher {
                           uint32_t count,
                           ContextPointer &context) const override;
 
-  //! Linear Search (delegate to HNSW)
-  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
-                             ContextPointer &context) const override {
-    return hnsw_searcher_->search_bf_impl(query, qmeta, context);
-  }
-
-  //! Linear Search (delegate to HNSW)
-  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
-                             uint32_t count,
-                             ContextPointer &context) const override {
-    return hnsw_searcher_->search_bf_impl(query, qmeta, count, context);
-  }
-
-  //! Linear search by primary keys (delegate to HNSW)
-  virtual int search_bf_by_p_keys_impl(
-      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
-      const IndexQueryMeta &qmeta, ContextPointer &context) const override {
-    return hnsw_searcher_->search_bf_by_p_keys_impl(query, p_keys, qmeta,
-                                                     context);
-  }
-
-  //! Linear search by primary keys (delegate to HNSW)
-  virtual int search_bf_by_p_keys_impl(
-      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
-      const IndexQueryMeta &qmeta, uint32_t count,
-      ContextPointer &context) const override {
-    return hnsw_searcher_->search_bf_by_p_keys_impl(query, p_keys, qmeta,
-                                                     count, context);
-  }
-
+  // TODO: These methods call protected methods of HnswSearcher and need to be fixed
+  /*
   //! Fetch vector by key (delegate to HNSW)
   virtual const void *get_vector(uint64_t key) const override {
     return hnsw_searcher_->get_vector(key);
@@ -120,6 +92,7 @@ class OmegaSearcher : public IndexSearcher {
   virtual void print_debug_info() override {
     hnsw_searcher_->print_debug_info();
   }
+  */
 
  private:
   //! Check if OMEGA mode should be used
@@ -129,12 +102,11 @@ class OmegaSearcher : public IndexSearcher {
            omega_model_is_loaded(omega_model_);
   }
 
- private:
-  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };
-
-  // Underlying HNSW searcher
-  std::shared_ptr<HnswSearcher> hnsw_searcher_;
+  //! Adaptive search with OMEGA predictions
+  int adaptive_search(const void *query, const IndexQueryMeta &qmeta,
+                      uint32_t count, ContextPointer &context) const;
 
+ private:
   // OMEGA components
   OmegaModelHandle omega_model_;
   bool omega_enabled_;
@@ -143,9 +115,6 @@ class OmegaSearcher : public IndexSearcher {
   uint32_t min_vector_threshold_;
   size_t current_vector_count_;
   std::string model_dir_;
-
-  ailego::Params params_{};
-  State state_{STATE_INIT};
 };
 
 }  // namespace core
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 7be599a89..bdbae4f15 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -13,7 +13,9 @@
 // limitations under the License.
 
 #include "omega_streamer.h"
-#include <aitheta2/index_framework.h>
+#include <zvec/core/framework/index_error.h>
+#include <zvec/core/framework/index_factory.h>
+#include <zvec/core/framework/index_logger.h>
 
 namespace zvec {
 namespace core {
@@ -27,6 +29,12 @@ OmegaStreamer::~OmegaStreamer(void) {
 int OmegaStreamer::init(const IndexMeta &imeta, const ailego::Params &params) {
   params_ = params;
 
+  // TODO: Fix design - cannot call protected init method of HnswStreamer
+  // For now, return NotImplemented error
+  LOG_ERROR("OmegaStreamer is not yet fully implemented - wrapper design needs fixing");
+  return IndexError_NotImplemented;
+
+  /*
   // Create underlying HNSW streamer
   hnsw_streamer_ = std::make_shared<HnswStreamer>();
   int ret = hnsw_streamer_->init(imeta, params);
@@ -37,17 +45,16 @@ int OmegaStreamer::init(const IndexMeta &imeta, const ailego::Params &params) {
 
   LOG_INFO("OmegaStreamer initialized");
   return 0;
+  */
 }
 
 int OmegaStreamer::cleanup(void) {
-  if (hnsw_streamer_ != nullptr) {
-    hnsw_streamer_->cleanup();
-    hnsw_streamer_.reset();
-  }
+  // Since init returns NotImplemented, cleanup does nothing
   return 0;
 }
 
 }  // namespace core
 }  // namespace zvec
 
-INDEX_FACTORY_REGISTER_STREAMER(zvec::core::OmegaStreamer);
+// TODO: Fix OmegaStreamer design - it tries to call protected methods of HnswStreamer
+// INDEX_FACTORY_REGISTER_STREAMER(zvec::core::OmegaStreamer);
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index 82af4ebbd..9e54631cb 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -38,16 +38,8 @@ class OmegaStreamer : public IndexStreamer {
   //! Cleanup Streamer
   virtual int cleanup(void) override;
 
-  //! Create a context (delegate to HNSW)
-  virtual Context::Pointer create_context(void) const override {
-    return hnsw_streamer_->create_context();
-  }
-
-  //! Create a new iterator (delegate to HNSW)
-  virtual IndexProvider::Pointer create_provider(void) const override {
-    return hnsw_streamer_->create_provider();
-  }
-
+  // TODO: These methods call protected methods and need to be fixed
+  /*
   //! Add a vector into index (delegate to HNSW)
   virtual int add_impl(uint64_t pkey, const void *query,
                        const IndexQueryMeta &qmeta,
@@ -74,7 +66,10 @@ class OmegaStreamer : public IndexStreamer {
                           Context::Pointer &context) const override {
     return hnsw_streamer_->search_impl(query, qmeta, count, context);
   }
+  */
 
+  // TODO: These methods call protected methods and need to be fixed
+  /*
   //! Similarity brute force search (delegate to HNSW)
   virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                              Context::Pointer &context) const override {
@@ -104,7 +99,10 @@ class OmegaStreamer : public IndexStreamer {
     return hnsw_streamer_->search_bf_by_p_keys_impl(query, p_keys, qmeta,
                                                      count, context);
   }
+  */
 
+  // TODO: These methods call protected methods and need to be fixed
+  /*
   //! Remove a vector from index (delegate to HNSW)
   virtual int remove_impl(uint64_t pkey, Context::Pointer &context) override {
     return hnsw_streamer_->remove_impl(pkey, context);
@@ -125,14 +123,15 @@ class OmegaStreamer : public IndexStreamer {
     return hnsw_streamer_->meta();
   }
 
-  //! Retrieve params of index
-  virtual const ailego::Params &params(void) const override {
+  //! Retrieve params of index - NOTE: Not overriding base class method
+  const ailego::Params &params(void) const {
     return params_;
   }
 
   virtual void print_debug_info() override {
     hnsw_streamer_->print_debug_info();
   }
+  */
 
  private:
   std::shared_ptr<HnswStreamer> hnsw_streamer_;
diff --git a/src/db/index/common/schema.cc b/src/db/index/common/schema.cc
index 02789c613..27860ea61 100644
--- a/src/db/index/common/schema.cc
+++ b/src/db/index/common/schema.cc
@@ -46,7 +46,7 @@ std::unordered_set<DataType> support_sparse_vector_type = {
 };
 
 std::unordered_set<IndexType> support_dense_vector_index = {
-    IndexType::FLAT, IndexType::HNSW, IndexType::IVF};
+    IndexType::FLAT, IndexType::HNSW, IndexType::IVF, IndexType::OMEGA};
 
 std::unordered_set<IndexType> support_sparse_vector_index = {IndexType::FLAT,
                                                              IndexType::HNSW};
diff --git a/src/include/zvec/db/index_params.h b/src/include/zvec/db/index_params.h
index dc0a22ae2..82891482e 100644
--- a/src/include/zvec/db/index_params.h
+++ b/src/include/zvec/db/index_params.h
@@ -44,7 +44,7 @@ class IndexParams {
 
   bool is_vector_index_type() const {
     return type_ == IndexType::FLAT || type_ == IndexType::HNSW ||
-           type_ == IndexType::IVF;
+           type_ == IndexType::IVF || type_ == IndexType::OMEGA;
   }
 
   IndexType type() const {
@@ -314,4 +314,63 @@ class IVFIndexParams : public VectorIndexParams {
   bool use_soar_;
 };
 
+/*
+ * Vector: Omega index params
+ */
+class OmegaIndexParams : public VectorIndexParams {
+ public:
+  OmegaIndexParams(
+      MetricType metric_type, int m = core_interface::kDefaultHnswNeighborCnt,
+      int ef_construction = core_interface::kDefaultHnswEfConstruction,
+      QuantizeType quantize_type = QuantizeType::UNDEFINED)
+      : VectorIndexParams(IndexType::OMEGA, metric_type, quantize_type),
+        m_(m),
+        ef_construction_(ef_construction) {}
+
+  using OPtr = std::shared_ptr<OmegaIndexParams>;
+
+ public:
+  Ptr clone() const override {
+    return std::make_shared<OmegaIndexParams>(metric_type_, m_, ef_construction_,
+                                             quantize_type_);
+  }
+
+  std::string to_string() const override {
+    auto base_str = vector_index_params_to_string("OmegaIndexParams",
+                                                  metric_type_, quantize_type_);
+    std::ostringstream oss;
+    oss << base_str << ",m:" << m_ << ",ef_construction:" << ef_construction_
+        << "}";
+    return oss.str();
+  }
+
+  bool operator==(const IndexParams &other) const override {
+    return type() == other.type() &&
+           metric_type() ==
+               static_cast<const OmegaIndexParams &>(other).metric_type() &&
+           m_ == static_cast<const OmegaIndexParams &>(other).m_ &&
+           ef_construction_ ==
+               static_cast<const OmegaIndexParams &>(other).ef_construction_ &&
+           quantize_type() ==
+               static_cast<const OmegaIndexParams &>(other).quantize_type();
+  }
+
+  void set_m(int m) {
+    m_ = m;
+  }
+  int m() const {
+    return m_;
+  }
+  void set_ef_construction(int ef_construction) {
+    ef_construction_ = ef_construction;
+  }
+  int ef_construction() const {
+    return ef_construction_;
+  }
+
+ private:
+  int m_;
+  int ef_construction_;
+};
+
 }  // namespace zvec
\ No newline at end of file
diff --git a/src/include/zvec/db/type.h b/src/include/zvec/db/type.h
index 188c1bdc2..8f8bf2055 100644
--- a/src/include/zvec/db/type.h
+++ b/src/include/zvec/db/type.h
@@ -26,6 +26,7 @@ enum class IndexType : uint32_t {
   IVF = 3,
   FLAT = 4,
   INVERT = 10,
+  OMEGA = 11,
 };
 
 /*
diff --git a/tests/core/algorithm/CMakeLists.txt b/tests/core/algorithm/CMakeLists.txt
index 0e9aa7259..ca54094e6 100644
--- a/tests/core/algorithm/CMakeLists.txt
+++ b/tests/core/algorithm/CMakeLists.txt
@@ -7,3 +7,4 @@ cc_directories(flat_sparse)
 cc_directories(ivf)
 cc_directories(hnsw)
 cc_directories(hnsw_sparse)
+cc_directories(omega)
diff --git a/tests/core/algorithm/omega/CMakeLists.txt b/tests/core/algorithm/omega/CMakeLists.txt
new file mode 100644
index 000000000..fd89e8275
--- /dev/null
+++ b/tests/core/algorithm/omega/CMakeLists.txt
@@ -0,0 +1,14 @@
+include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake)
+
+file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)
+
+foreach(CC_SRCS ${ALL_TEST_SRCS})
+  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
+  cc_gtest(
+      NAME ${CC_TARGET}
+      STRICT
+      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_hnsw core_knn_omega
+      SRCS ${CC_SRCS}
+      INCS . ${CMAKE_SOURCE_DIR}/src/core ${CMAKE_SOURCE_DIR}/src/core/algorithm/omega ${CMAKE_SOURCE_DIR}/src/core/algorithm/hnsw
+    )
+endforeach()
diff --git a/tests/core/algorithm/omega/omega_searcher_test.cc b/tests/core/algorithm/omega/omega_searcher_test.cc
new file mode 100644
index 000000000..0b450c7a1
--- /dev/null
+++ b/tests/core/algorithm/omega/omega_searcher_test.cc
@@ -0,0 +1,425 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <cstdio>
+#include <gtest/gtest.h>
+#include <ailego/math/distance.h>
+#include <zvec/ailego/container/vector.h>
+#include "zvec/core/framework/index_builder.h"
+#include "zvec/core/framework/index_factory.h"
+#include "zvec/core/framework/index_meta.h"
+
+using namespace std;
+using namespace testing;
+using namespace zvec::ailego;
+
+#if defined(__GNUC__) || defined(__GNUG__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+#endif
+
+namespace zvec {
+namespace core {
+
+constexpr size_t static dim = 16;
+
+class OmegaSearcherTest : public testing::Test {
+ protected:
+  void SetUp(void);
+  void TearDown(void);
+
+  static std::string _dir;
+  static shared_ptr<IndexMeta> _index_meta_ptr;
+};
+
+std::string OmegaSearcherTest::_dir("OmegaSearcherTest/");
+shared_ptr<IndexMeta> OmegaSearcherTest::_index_meta_ptr;
+
+void OmegaSearcherTest::SetUp(void) {
+  _index_meta_ptr.reset(new (nothrow)
+                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
+  _index_meta_ptr->set_metric("SquaredEuclidean", 0, ailego::Params());
+}
+
+void OmegaSearcherTest::TearDown(void) {
+  char cmdBuf[100];
+  snprintf(cmdBuf, 100, "rm -rf %s", _dir.c_str());
+  system(cmdBuf);
+}
+
+// Test that OmegaSearcher falls back to HNSW when omega is disabled
+TEST_F(OmegaSearcherTest, TestFallbackToHnswWhenDisabled) {
+  // Build index using HnswBuilder
+  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
+  ASSERT_NE(builder, nullptr);
+
+  auto holder =
+      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
+  size_t doc_cnt = 1000UL;
+  for (size_t i = 0; i < doc_cnt; i++) {
+    NumericalVector<float> vec(dim);
+    for (size_t j = 0; j < dim; ++j) {
+      vec[j] = i;
+    }
+    ASSERT_TRUE(holder->emplace(i, vec));
+  }
+
+  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));
+  ASSERT_EQ(0, builder->train(holder));
+  ASSERT_EQ(0, builder->build(holder));
+
+  auto dumper = IndexFactory::CreateDumper("FileDumper");
+  ASSERT_NE(dumper, nullptr);
+  string path = _dir + "/TestFallbackToHnswWhenDisabled";
+  ASSERT_EQ(0, dumper->create(path));
+  ASSERT_EQ(0, builder->dump(dumper));
+  ASSERT_EQ(0, dumper->close());
+
+  // Test OmegaSearcher with omega.enabled=false (default)
+  IndexSearcher::Pointer omega_searcher =
+      IndexFactory::CreateSearcher("OmegaSearcher");
+  ASSERT_TRUE(omega_searcher != nullptr);
+
+  // Initialize without enabling omega (should fallback to HNSW)
+  ailego::Params params;
+  params.insert("omega.enabled", false);  // Explicitly disable omega
+  ASSERT_EQ(0, omega_searcher->init(params));
+
+  auto storage = IndexFactory::CreateStorage("FileReadStorage");
+  ASSERT_EQ(0, storage->open(path, false));
+  ASSERT_EQ(0, omega_searcher->load(storage, IndexMetric::Pointer()));
+  auto ctx = omega_searcher->create_context();
+  ASSERT_TRUE(!!ctx);
+
+  // Perform search
+  NumericalVector<float> vec(dim);
+  for (size_t j = 0; j < dim; ++j) {
+    vec[j] = 0.0;
+  }
+  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
+  size_t topk = 50;
+  ctx->set_topk(topk);
+  ASSERT_EQ(0, omega_searcher->search_impl(vec.data(), qmeta, ctx));
+  auto &results = ctx->result();
+  ASSERT_EQ(topk, results.size());
+
+  // Verify results are sorted by distance
+  for (size_t k = 1; k < results.size(); ++k) {
+    ASSERT_LE(results[k - 1].score(), results[k].score());
+  }
+}
+
+// Test that OmegaSearcher and HnswSearcher produce identical results when omega is disabled
+TEST_F(OmegaSearcherTest, TestIdenticalResultsWithHnsw) {
+  // Build index using HnswBuilder
+  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
+  ASSERT_NE(builder, nullptr);
+
+  auto holder =
+      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
+  size_t doc_cnt = 500UL;
+  for (size_t i = 0; i < doc_cnt; i++) {
+    NumericalVector<float> vec(dim);
+    for (size_t j = 0; j < dim; ++j) {
+      vec[j] = static_cast<float>(i + j);
+    }
+    ASSERT_TRUE(holder->emplace(i, vec));
+  }
+
+  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));
+  ASSERT_EQ(0, builder->train(holder));
+  ASSERT_EQ(0, builder->build(holder));
+
+  auto dumper = IndexFactory::CreateDumper("FileDumper");
+  ASSERT_NE(dumper, nullptr);
+  string path = _dir + "/TestIdenticalResultsWithHnsw";
+  ASSERT_EQ(0, dumper->create(path));
+  ASSERT_EQ(0, builder->dump(dumper));
+  ASSERT_EQ(0, dumper->close());
+
+  // Create HnswSearcher
+  IndexSearcher::Pointer hnsw_searcher =
+      IndexFactory::CreateSearcher("HnswSearcher");
+  ASSERT_TRUE(hnsw_searcher != nullptr);
+  ASSERT_EQ(0, hnsw_searcher->init(ailego::Params()));
+
+  auto storage1 = IndexFactory::CreateStorage("FileReadStorage");
+  ASSERT_EQ(0, storage1->open(path, false));
+  ASSERT_EQ(0, hnsw_searcher->load(storage1, IndexMetric::Pointer()));
+
+  // Create OmegaSearcher with omega disabled
+  IndexSearcher::Pointer omega_searcher =
+      IndexFactory::CreateSearcher("OmegaSearcher");
+  ASSERT_TRUE(omega_searcher != nullptr);
+
+  ailego::Params params;
+  params.insert("omega.enabled", false);
+  ASSERT_EQ(0, omega_searcher->init(params));
+
+  auto storage2 = IndexFactory::CreateStorage("FileReadStorage");
+  ASSERT_EQ(0, storage2->open(path, false));
+  ASSERT_EQ(0, omega_searcher->load(storage2, IndexMetric::Pointer()));
+
+  // Search with both searchers and compare results
+  NumericalVector<float> query(dim);
+  for (size_t j = 0; j < dim; ++j) {
+    query[j] = 100.0f + j;
+  }
+
+  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
+  size_t topk = 20;
+
+  auto hnsw_ctx = hnsw_searcher->create_context();
+  hnsw_ctx->set_topk(topk);
+  ASSERT_EQ(0, hnsw_searcher->search_impl(query.data(), qmeta, hnsw_ctx));
+  auto &hnsw_results = hnsw_ctx->result();
+
+  auto omega_ctx = omega_searcher->create_context();
+  omega_ctx->set_topk(topk);
+  ASSERT_EQ(0, omega_searcher->search_impl(query.data(), qmeta, omega_ctx));
+  auto &omega_results = omega_ctx->result();
+
+  // Results should be identical
+  ASSERT_EQ(hnsw_results.size(), omega_results.size());
+  for (size_t k = 0; k < hnsw_results.size(); ++k) {
+    ASSERT_EQ(hnsw_results[k].key(), omega_results[k].key());
+    ASSERT_FLOAT_EQ(hnsw_results[k].score(), omega_results[k].score());
+  }
+}
+
+// Test OmegaSearcher with RNN search (radius search)
+TEST_F(OmegaSearcherTest, TestRnnSearchFallback) {
+  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
+  ASSERT_NE(builder, nullptr);
+
+  auto holder =
+      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
+  size_t doc_cnt = 1000UL;
+  for (size_t i = 0; i < doc_cnt; i++) {
+    NumericalVector<float> vec(dim);
+    for (size_t j = 0; j < dim; ++j) {
+      vec[j] = i;
+    }
+    ASSERT_TRUE(holder->emplace(i, vec));
+  }
+
+  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));
+  ASSERT_EQ(0, builder->train(holder));
+  ASSERT_EQ(0, builder->build(holder));
+
+  auto dumper = IndexFactory::CreateDumper("FileDumper");
+  ASSERT_NE(dumper, nullptr);
+  string path = _dir + "/TestRnnSearchFallback";
+  ASSERT_EQ(0, dumper->create(path));
+  ASSERT_EQ(0, builder->dump(dumper));
+  ASSERT_EQ(0, dumper->close());
+
+  // Test OmegaSearcher with omega disabled
+  IndexSearcher::Pointer searcher =
+      IndexFactory::CreateSearcher("OmegaSearcher");
+  ASSERT_TRUE(searcher != nullptr);
+
+  ailego::Params params;
+  params.insert("omega.enabled", false);
+  ASSERT_EQ(0, searcher->init(params));
+
+  auto storage = IndexFactory::CreateStorage("FileReadStorage");
+  ASSERT_EQ(0, storage->open(path, false));
+  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
+  auto ctx = searcher->create_context();
+  ASSERT_TRUE(!!ctx);
+
+  NumericalVector<float> vec(dim);
+  for (size_t j = 0; j < dim; ++j) {
+    vec[j] = 0.0;
+  }
+  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
+  size_t topk = 50;
+  ctx->set_topk(topk);
+  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
+  auto &results = ctx->result();
+  ASSERT_EQ(topk, results.size());
+
+  // Test with radius threshold
+  float radius = results[topk / 2].score();
+  ctx->set_threshold(radius);
+  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
+  ASSERT_GT(topk, results.size());
+  for (size_t k = 0; k < results.size(); ++k) {
+    ASSERT_GE(radius, results[k].score());
+  }
+
+  // Test Reset Threshold
+  ctx->reset_threshold();
+  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
+  ASSERT_EQ(topk, results.size());
+  ASSERT_LT(radius, results[topk - 1].score());
+}
+
+// Test OmegaSearcher with InnerProduct metric
+TEST_F(OmegaSearcherTest, TestInnerProductFallback) {
+  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
+  ASSERT_NE(builder, nullptr);
+
+  auto holder =
+      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
+  size_t doc_cnt = 1000UL;
+  for (size_t i = 0; i < doc_cnt; i++) {
+    NumericalVector<float> vec(dim);
+    for (size_t j = 0; j < dim; ++j) {
+      vec[j] = i;
+    }
+    ASSERT_TRUE(holder->emplace(i, vec));
+  }
+
+  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);
+  index_meta.set_metric("InnerProduct", 0, ailego::Params());
+
+  ASSERT_EQ(0, builder->init(index_meta, ailego::Params()));
+  ASSERT_EQ(0, builder->train(holder));
+  ASSERT_EQ(0, builder->build(holder));
+
+  auto dumper = IndexFactory::CreateDumper("FileDumper");
+  ASSERT_NE(dumper, nullptr);
+  string path = _dir + "/TestInnerProductFallback";
+  ASSERT_EQ(0, dumper->create(path));
+  ASSERT_EQ(0, builder->dump(dumper));
+  ASSERT_EQ(0, dumper->close());
+
+  // Test OmegaSearcher with omega disabled
+  IndexSearcher::Pointer searcher =
+      IndexFactory::CreateSearcher("OmegaSearcher");
+  ASSERT_TRUE(searcher != nullptr);
+
+  ailego::Params params;
+  params.insert("omega.enabled", false);
+  ASSERT_EQ(0, searcher->init(params));
+
+  auto storage = IndexFactory::CreateStorage("FileReadStorage");
+  ASSERT_EQ(0, storage->open(path, false));
+  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
+  auto ctx = searcher->create_context();
+  ASSERT_TRUE(!!ctx);
+
+  NumericalVector<float> vec(dim);
+  for (size_t j = 0; j < dim; ++j) {
+    vec[j] = 1.0;
+  }
+  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
+  size_t topk = 50;
+  ctx->set_topk(topk);
+  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
+  auto &results = ctx->result();
+  ASSERT_EQ(topk, results.size());
+
+  // Verify results are sorted correctly for InnerProduct (descending)
+  for (size_t k = 1; k < results.size(); ++k) {
+    ASSERT_GE(results[k - 1].score(), results[k].score());
+  }
+}
+
+// Test that omega parameters don't affect HNSW fallback mode
+TEST_F(OmegaSearcherTest, TestOmegaParamsIgnoredWhenDisabled) {
+  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
+  ASSERT_NE(builder, nullptr);
+
+  auto holder =
+      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
+  size_t doc_cnt = 500UL;
+  for (size_t i = 0; i < doc_cnt; i++) {
+    NumericalVector<float> vec(dim);
+    for (size_t j = 0; j < dim; ++j) {
+      vec[j] = i;
+    }
+    ASSERT_TRUE(holder->emplace(i, vec));
+  }
+
+  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));
+  ASSERT_EQ(0, builder->train(holder));
+  ASSERT_EQ(0, builder->build(holder));
+
+  auto dumper = IndexFactory::CreateDumper("FileDumper");
+  ASSERT_NE(dumper, nullptr);
+  string path = _dir + "/TestOmegaParamsIgnored";
+  ASSERT_EQ(0, dumper->create(path));
+  ASSERT_EQ(0, builder->dump(dumper));
+  ASSERT_EQ(0, dumper->close());
+
+  // Create two OmegaSearcher instances with different omega params
+  // but both with omega disabled
+  IndexSearcher::Pointer searcher1 =
+      IndexFactory::CreateSearcher("OmegaSearcher");
+  ASSERT_TRUE(searcher1 != nullptr);
+
+  ailego::Params params1;
+  params1.insert("omega.enabled", false);
+  params1.insert("omega.target_recall", 0.95f);
+  params1.insert("omega.min_vector_threshold", 10000);
+  ASSERT_EQ(0, searcher1->init(params1));
+
+  IndexSearcher::Pointer searcher2 =
+      IndexFactory::CreateSearcher("OmegaSearcher");
+  ASSERT_TRUE(searcher2 != nullptr);
+
+  ailego::Params params2;
+  params2.insert("omega.enabled", false);
+  params2.insert("omega.target_recall", 0.85f);
+  params2.insert("omega.min_vector_threshold", 5000);
+  ASSERT_EQ(0, searcher2->init(params2));
+
+  auto storage1 = IndexFactory::CreateStorage("FileReadStorage");
+  ASSERT_EQ(0, storage1->open(path, false));
+  ASSERT_EQ(0, searcher1->load(storage1, IndexMetric::Pointer()));
+
+  auto storage2 = IndexFactory::CreateStorage("FileReadStorage");
+  ASSERT_EQ(0, storage2->open(path, false));
+  ASSERT_EQ(0, searcher2->load(storage2, IndexMetric::Pointer()));
+
+  // Search with both searchers - results should be identical
+  // since omega is disabled and both use HNSW
+  NumericalVector<float> query(dim);
+  for (size_t j = 0; j < dim; ++j) {
+    query[j] = 50.0f;
+  }
+
+  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
+  size_t topk = 30;
+
+  auto ctx1 = searcher1->create_context();
+  ctx1->set_topk(topk);
+  ASSERT_EQ(0, searcher1->search_impl(query.data(), qmeta, ctx1));
+  auto &results1 = ctx1->result();
+
+  auto ctx2 = searcher2->create_context();
+  ctx2->set_topk(topk);
+  ASSERT_EQ(0, searcher2->search_impl(query.data(), qmeta, ctx2));
+  auto &results2 = ctx2->result();
+
+  // Results should be identical despite different omega params
+  ASSERT_EQ(results1.size(), results2.size());
+  for (size_t k = 0; k < results1.size(); ++k) {
+    ASSERT_EQ(results1[k].key(), results2[k].key());
+    ASSERT_FLOAT_EQ(results1[k].score(), results2[k].score());
+  }
+}
+
+}  // namespace core
+}  // namespace zvec
+
+#if defined(__GNUC__) || defined(__GNUG__)
+#pragma GCC diagnostic pop
+#endif
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index a32eac5ef..341bbddab 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -24,4 +24,5 @@ add_subdirectory(rocksdb rocksdb EXCLUDE_FROM_ALL)
 add_subdirectory(CRoaring CRoaring EXCLUDE_FROM_ALL)
 add_subdirectory(arrow arrow EXCLUDE_FROM_ALL)
 add_subdirectory(magic_enum magic_enum EXCLUDE_FROM_ALL)
+add_subdirectory(omega omega EXCLUDE_FROM_ALL)
 
diff --git a/thirdparty/omega b/thirdparty/omega
index 3d613ec9b..beac97cab 160000
--- a/thirdparty/omega
+++ b/thirdparty/omega
@@ -1 +1 @@
-Subproject commit 3d613ec9ba9df0b382001998ecebe1b56d224039
+Subproject commit beac97cab2747379aa80fc372ae451a735d61719

From 65ee973d7190e34e16bcd71f1d36df1a36543c4f Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 30 Jan 2026 19:21:11 +0800
Subject: [PATCH 005/126] chore: update OMEGA submodule to latest commit

---
 thirdparty/omega | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega b/thirdparty/omega
index beac97cab..389cca638 160000
--- a/thirdparty/omega
+++ b/thirdparty/omega
@@ -1 +1 @@
-Subproject commit beac97cab2747379aa80fc372ae451a735d61719
+Subproject commit 389cca638506b3f5bc841a863f1e95eee3b64efb

From 4a199961fa6be8c93d0397021624c0ce62cf0e29 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 2 Mar 2026 04:13:02 +0800
Subject: [PATCH 006/126] chore: update OMEGA submodule to latest commit

---
 thirdparty/omega | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega b/thirdparty/omega
index 389cca638..14e6c9508 160000
--- a/thirdparty/omega
+++ b/thirdparty/omega
@@ -1 +1 @@
-Subproject commit 389cca638506b3f5bc841a863f1e95eee3b64efb
+Subproject commit 14e6c95080cafd8dbe9db7a39ae00071331a7c24

From 3d6cb1314cb83bbd26216e6a98d7d0591d4fa439 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 2 Mar 2026 04:31:18 +0800
Subject: [PATCH 007/126] feat: implement OMEGA adaptive search with training
 mode and per-query recall

- Implement OmegaIndex with ITrainingCapable interface for training support
- Create OmegaStreamer with training mode for feature collection during search
- Add OmegaSearcher adaptive search with OMEGA early stopping prediction
- Implement training data export and collection APIs
- Add OmegaQueryParams and OmegaContext for per-query target_recall specification
- Create omega_params.h and omega_context.h for parameter management
- Update engine_helper to convert and extract OMEGA query parameters
- Integrate training mode with Collection API (enable/disable/export methods)
- Add training data collector, query generator, and model trainer components
- Add Python training API with OmegaTrainer class
- Add debug logging for OMEGA index creation and merge operations
- Adjust HnswSearcher member access modifiers for OMEGA inheritance
- Remove test_omega_fallback.py (replaced by test_collection.py tests)
---
 python/tests/test_collection.py               | 424 ++++++++++++++++++
 python/tests/test_omega_fallback.py           | 229 ----------
 python/zvec/_omega_training.py                | 269 +++++++++++
 python/zvec/model/param/__init__.pyi          |  73 +++
 python/zvec/model/schema/field_schema.py      |  29 +-
 .../python/model/param/python_param.cc        |   2 +
 src/core/algorithm/hnsw/hnsw_searcher.h       |  21 +-
 src/core/algorithm/hnsw/hnsw_streamer.h       |   3 +-
 src/core/algorithm/omega/CMakeLists.txt       |   2 +-
 src/core/algorithm/omega/omega_context.h      |  68 +++
 src/core/algorithm/omega/omega_params.h       |  29 ++
 src/core/algorithm/omega/omega_searcher.cc    | 182 +++++++-
 src/core/algorithm/omega/omega_searcher.h     |  74 +++
 src/core/algorithm/omega/omega_streamer.cc    | 362 +++++++++++++--
 src/core/algorithm/omega/omega_streamer.h     | 146 ++----
 src/core/interface/CMakeLists.txt             |   2 +-
 src/core/interface/index.cc                   |  39 +-
 src/core/interface/index_factory.cc           |   2 +
 src/core/interface/indexes/omega_index.cc     | 192 ++++++++
 .../mixed_reducer/mixed_streamer_reducer.cc   | 118 +++++
 .../mixed_reducer/mixed_streamer_reducer.h    |   8 +
 src/db/collection.cc                          |  53 ++-
 .../column/vector_column/engine_helper.hpp    |  67 +++
 .../vector_column/vector_column_indexer.cc    | 141 ++++++
 .../vector_column/vector_column_indexer.h     |  54 +++
 src/db/index/common/proto_converter.cc        |  31 ++
 src/db/index/common/proto_converter.h         |   4 +
 src/db/index/segment/segment.cc               | 214 ++++++++-
 src/db/proto/zvec.proto                       |  10 +
 src/db/training/omega_model_trainer.cc        | 126 ++++++
 src/db/training/omega_model_trainer.h         |  89 ++++
 src/db/training/query_generator.cc            | 143 ++++++
 src/db/training/query_generator.h             |  82 ++++
 src/db/training/training_data_collector.cc    | 350 +++++++++++++++
 src/db/training/training_data_collector.h     | 118 +++++
 src/include/zvec/core/interface/index.h       |  67 +++
 src/include/zvec/core/interface/index_param.h |  11 +
 src/include/zvec/core/interface/training.h    |  62 +++
 .../zvec/core/interface/training_capable.h    |  87 ++++
 src/include/zvec/db/query_params.h            |  23 +
 40 files changed, 3601 insertions(+), 405 deletions(-)
 delete mode 100644 python/tests/test_omega_fallback.py
 create mode 100644 python/zvec/_omega_training.py
 create mode 100644 src/core/algorithm/omega/omega_context.h
 create mode 100644 src/core/algorithm/omega/omega_params.h
 create mode 100644 src/core/interface/indexes/omega_index.cc
 create mode 100644 src/db/training/omega_model_trainer.cc
 create mode 100644 src/db/training/omega_model_trainer.h
 create mode 100644 src/db/training/query_generator.cc
 create mode 100644 src/db/training/query_generator.h
 create mode 100644 src/db/training/training_data_collector.cc
 create mode 100644 src/db/training/training_data_collector.h
 create mode 100644 src/include/zvec/core/interface/training.h
 create mode 100644 src/include/zvec/core/interface/training_capable.h

diff --git a/python/tests/test_collection.py b/python/tests/test_collection.py
index 7d021d6fd..e239a88a0 100644
--- a/python/tests/test_collection.py
+++ b/python/tests/test_collection.py
@@ -32,6 +32,8 @@
     IndexType,
     VectorQuery,
     OptimizeOption,
+    OmegaIndexParam,
+    MetricType,
 )
 
 # ==================== Common ====================
@@ -1043,3 +1045,425 @@ def test_collection_query_with_weighted_reranker_by_hybrid_vector(
         self, collection_with_multiple_docs: Collection, multiple_docs
     ):
         pass
+
+
+# ----------------------------
+# OMEGA Index Test Case
+# ----------------------------
+
+
+@pytest.fixture(scope="session")
+def omega_collection_schema():
+    """Schema with OMEGA index for testing automatic training."""
+    return zvec.CollectionSchema(
+        name="omega_test_collection",
+        fields=[
+            FieldSchema(
+                "id",
+                DataType.INT64,
+                nullable=False,
+                index_param=InvertIndexParam(enable_range_optimization=True),
+            ),
+            FieldSchema("name", DataType.STRING, nullable=False),
+        ],
+        vectors=[
+            VectorSchema(
+                "embedding",
+                DataType.VECTOR_FP32,
+                dimension=128,
+                index_param=OmegaIndexParam(
+                    metric_type=MetricType.IP,
+                    m=16,
+                    ef_construction=200,
+                ),
+            ),
+        ],
+    )
+
+
+@pytest.fixture
+def omega_test_collection(
+    tmp_path_factory, omega_collection_schema, collection_option
+) -> Collection:
+    """Create a collection with OMEGA index for testing."""
+    temp_dir = tmp_path_factory.mktemp("zvec_omega")
+    collection_path = temp_dir / "omega_collection"
+
+    coll = zvec.create_and_open(
+        path=str(collection_path),
+        schema=omega_collection_schema,
+        option=collection_option,
+    )
+
+    assert coll is not None, "Failed to create OMEGA collection"
+    assert coll.schema.name == omega_collection_schema.name
+
+    # Verify OMEGA index param
+    embedding_field = coll.schema.vector("embedding")
+    assert embedding_field is not None
+    assert embedding_field.index_param is not None
+    assert embedding_field.index_param.type == IndexType.OMEGA
+
+    try:
+        yield coll
+    finally:
+        if hasattr(coll, "destroy") and coll is not None:
+            try:
+                coll.destroy()
+            except Exception as e:
+                print(f"Warning: failed to destroy OMEGA collection: {e}")
+
+
+@pytest.fixture
+def omega_docs_large():
+    """Generate 1500 documents to trigger segment dump and training."""
+    import numpy as np
+
+    docs = []
+    for i in range(1500):
+        # Generate somewhat structured vectors for better training
+        base_vector = np.random.randn(128).astype(np.float32)
+        base_vector = base_vector / np.linalg.norm(base_vector)  # Normalize
+        docs.append(
+            Doc(
+                id=f"{i}",
+                fields={"id": i, "name": f"doc_{i}"},
+                vectors={"embedding": base_vector.tolist()},
+            )
+        )
+    return docs
+
+
+@pytest.mark.usefixtures("omega_test_collection")
+class TestCollectionOmegaIndex:
+    """Test cases for OMEGA index functionality."""
+
+    def test_omega_index_param_creation(self, omega_test_collection: Collection):
+        """Test that OmegaIndexParam is correctly created and configured."""
+        embedding_field = omega_test_collection.schema.vector("embedding")
+        assert embedding_field is not None
+        assert embedding_field.name == "embedding"
+        assert embedding_field.dimension == 128
+        assert embedding_field.data_type == DataType.VECTOR_FP32
+
+        index_param = embedding_field.index_param
+        assert index_param is not None
+        assert index_param.type == IndexType.OMEGA
+        assert index_param.m == 16
+        assert index_param.ef_construction == 200
+        assert index_param.metric_type == MetricType.IP
+
+    def test_omega_index_param_to_dict(self, omega_test_collection: Collection):
+        """Test that OmegaIndexParam.to_dict() returns correct type."""
+        embedding_field = omega_test_collection.schema.vector("embedding")
+        index_param = embedding_field.index_param
+
+        param_dict = index_param.to_dict()
+        assert "type" in param_dict
+        assert param_dict["type"] == "OMEGA", f"Expected 'OMEGA', got '{param_dict['type']}'"
+        assert param_dict["metric_type"] == "IP"
+        assert param_dict["m"] == 16
+        assert param_dict["ef_construction"] == 200
+
+    def test_omega_basic_insert_and_search(self, omega_test_collection: Collection):
+        """Test basic insert and search with small dataset (no training)."""
+        import numpy as np
+
+        # Insert 10 documents
+        docs = []
+        for i in range(10):
+            vector = np.random.randn(128).astype(np.float32)
+            vector = vector / np.linalg.norm(vector)
+            docs.append(
+                Doc(
+                    id=f"{i}",
+                    fields={"id": i, "name": f"doc_{i}"},
+                    vectors={"embedding": vector.tolist()},
+                )
+            )
+
+        result = omega_test_collection.insert(docs)
+        assert len(result) == len(docs)
+        for item in result:
+            assert item.ok()
+
+        # Search with first document's vector
+        query_vector = docs[0].vector("embedding")
+        search_results = omega_test_collection.query(
+            VectorQuery(field_name="embedding", vector=query_vector),
+            topk=5
+        )
+
+        assert len(search_results) > 0
+        # First result should be the query document itself
+        assert search_results[0].id == docs[0].id
+
+    def test_omega_large_dataset_with_optimize(
+        self, omega_test_collection: Collection, omega_docs_large
+    ):
+        """Test OMEGA with large dataset to trigger automatic training."""
+        # Insert 1500 documents (should trigger segment dump)
+        batch_size = 100
+        for i in range(0, len(omega_docs_large), batch_size):
+            batch = omega_docs_large[i : i + batch_size]
+            result = omega_test_collection.insert(batch)
+            assert len(result) == len(batch)
+            for item in result:
+                assert item.ok()
+
+        # Verify all documents inserted
+        stats = omega_test_collection.stats
+        assert stats.doc_count == len(omega_docs_large)
+
+        # Call optimize to trigger segment dump and automatic training
+        optimize_result = omega_test_collection.optimize(option=OptimizeOption())
+        # optimize() may not return a value, just ensure it doesn't raise
+
+        # Perform search to verify OMEGA is working
+        query_doc = omega_docs_large[0]
+        query_vector = query_doc.vector("embedding")
+
+        search_results = omega_test_collection.query(
+            VectorQuery(field_name="embedding", vector=query_vector),
+            topk=10
+        )
+
+        assert len(search_results) > 0
+        # Should find the query document in results
+        found_query_doc = False
+        for doc in search_results:
+            if doc.id == query_doc.id:
+                found_query_doc = True
+                break
+        assert found_query_doc, "Query document not found in search results"
+
+    def test_omega_search_consistency(
+        self, omega_test_collection: Collection, omega_docs_large
+    ):
+        """Test that OMEGA search results are consistent and reasonable."""
+        import numpy as np
+
+        # Insert documents
+        batch_size = 100
+        for i in range(0, len(omega_docs_large), batch_size):
+            batch = omega_docs_large[i : i + batch_size]
+            omega_test_collection.insert(batch)
+
+        # Perform multiple searches and verify consistency
+        query_doc = omega_docs_large[100]  # Use a middle document
+        query_vector = query_doc.vector("embedding")
+
+        # Search twice with same query
+        results1 = omega_test_collection.query(
+            VectorQuery(field_name="embedding", vector=query_vector),
+            topk=20
+        )
+        results2 = omega_test_collection.query(
+            VectorQuery(field_name="embedding", vector=query_vector),
+            topk=20
+        )
+
+        assert len(results1) == len(results2)
+
+        # Results should be identical (same query)
+        for i in range(len(results1)):
+            assert results1[i].id == results2[i].id
+
+    def test_omega_with_filter(self, omega_test_collection: Collection):
+        """Test OMEGA search with filter expressions."""
+        import numpy as np
+
+        # Insert 50 documents
+        docs = []
+        for i in range(50):
+            vector = np.random.randn(128).astype(np.float32)
+            vector = vector / np.linalg.norm(vector)
+            docs.append(
+                Doc(
+                    id=f"{i}",
+                    fields={"id": i, "name": f"doc_{i}"},
+                    vectors={"embedding": vector.tolist()},
+                )
+            )
+        omega_test_collection.insert(docs)
+
+        # Search with filter
+        query_vector = docs[0].vector("embedding")
+        results = omega_test_collection.query(
+            VectorQuery(
+                field_name="embedding",
+                vector=query_vector,
+            ),
+            filter="id >= 10 and id < 20",
+            topk=20,
+        )
+
+        # All results should satisfy filter
+        for doc in results:
+            doc_id = doc.field("id")
+            assert 10 <= doc_id < 20, f"Document id {doc_id} does not satisfy filter"
+
+    def test_omega_training_and_early_stopping(
+        self, tmp_path_factory
+    ):
+        """
+        Verify OMEGA training with k_train=1 labeling logic:
+        1. Training succeeds (model files generated)
+        2. Search demonstrates early stopping
+        3. Recall meets target
+        """
+        import numpy as np
+        import os
+        import gc
+        import time
+
+        print("\n" + "="*80)
+        print("OMEGA Training Verification (k_train=1)")
+        print("="*80)
+
+        # Create fresh collection
+        temp_dir = tmp_path_factory.mktemp("zvec_omega_training")
+        collection_path = str(temp_dir / "omega_training_collection")
+
+        schema = zvec.CollectionSchema(
+            name="omega_training_test",
+            fields=[
+                FieldSchema("id", DataType.INT64, nullable=False),
+                FieldSchema("name", DataType.STRING, nullable=False),
+            ],
+            vectors=[
+                VectorSchema(
+                    "embedding",
+                    DataType.VECTOR_FP32,
+                    dimension=128,
+                    index_param=OmegaIndexParam(
+                        metric_type=MetricType.L2,
+                        m=16,
+                        ef_construction=200,
+                    ),
+                ),
+            ],
+        )
+
+        # Create and insert documents
+        print(f"\n[1/4] Creating collection and inserting 1500 documents...")
+        collection = zvec.create_and_open(
+            path=collection_path,
+            schema=schema,
+            option=CollectionOption()
+        )
+
+        # Generate documents
+        docs = []
+        for i in range(1500):
+            base_vector = np.random.randn(128).astype(np.float32)
+            base_vector = base_vector / np.linalg.norm(base_vector)
+            docs.append(
+                Doc(
+                    id=f"{i}",
+                    fields={"id": i, "name": f"doc_{i}"},
+                    vectors={"embedding": base_vector.tolist()},
+                )
+            )
+
+        batch_size = 100
+        for i in range(0, len(docs), batch_size):
+            batch = docs[i : i + batch_size]
+            collection.insert(batch)
+        print(f"   ✓ Inserted {len(docs)} documents")
+
+        # Trigger optimization
+        print(f"\n[2/4] Triggering optimize (dump + auto training)...")
+        collection.optimize(option=OptimizeOption())
+        print(f"   ✓ Optimize completed")
+
+        # Close collection
+        del collection
+        gc.collect()
+        time.sleep(1)
+
+        # Reopen to load OMEGA index
+        print(f"   Reopening collection to load OMEGA indexes...")
+        collection = zvec.open(path=collection_path, option=CollectionOption(read_only=True))
+        print(f"   ✓ Collection reopened")
+
+        # Verify model files exist
+        print(f"\n[3/4] Verifying model files...")
+        model_files_found = False
+        model_dir = None
+
+        # Look for any numeric segment directories
+        if os.path.exists(collection_path):
+            for item in os.listdir(collection_path):
+                item_path = os.path.join(collection_path, item)
+                if os.path.isdir(item_path):
+                    potential_model_dir = os.path.join(item_path, "omega_model")
+
+                    if os.path.exists(potential_model_dir):
+                        model_dir = potential_model_dir
+                        print(f"   Found model directory: {model_dir}")
+
+                        # Check for all required files
+                        required_files = [
+                            "model.txt",
+                            "threshold_table.txt",
+                            "interval_table.txt",
+                            "gt_collected_table.txt",
+                            "gt_cmps_all_table.txt"
+                        ]
+
+                        files_exist = []
+                        for fname in required_files:
+                            fpath = os.path.join(model_dir, fname)
+                            exists = os.path.exists(fpath)
+                            files_exist.append(exists)
+
+                            if exists:
+                                file_size = os.path.getsize(fpath)
+                                print(f"   ✓ {fname}: {file_size} bytes")
+                            else:
+                                print(f"   ✗ {fname}: NOT FOUND")
+
+                        if all(files_exist):
+                            model_files_found = True
+                            print(f"\n   ✅ ALL MODEL FILES GENERATED!")
+                        break
+
+        if not model_files_found:
+            print(f"   ⚠️  No OMEGA model files found in any segment directory")
+
+        assert model_files_found, "OMEGA model files not found after training"
+
+        # Test search and calculate recall
+        print(f"\n[4/4] Testing search recall...")
+        n_queries = 10
+        topk = 10
+        recalls = []
+
+        for i in range(n_queries):
+            query_doc = docs[i]
+            query_vector = query_doc.vector("embedding")
+
+            results = collection.query(
+                VectorQuery(field_name="embedding", vector=query_vector),
+                topk=topk
+            )
+
+            # Calculate recall
+            result_ids = [r.id for r in results]
+            recall = 1.0 if query_doc.id in result_ids else 0.0
+            recalls.append(recall)
+
+        avg_recall = np.mean(recalls)
+        print(f"   Average Recall@{topk}: {avg_recall:.4f}")
+
+        # Verify recall is reasonable (should be high since we use docs from collection)
+        assert avg_recall >= 0.8, f"Recall too low: {avg_recall:.4f}"
+
+        print(f"\n   ✅ RECALL MEETS THRESHOLD (>= 0.8)")
+        print("\n" + "="*80)
+        print("✅ OMEGA Training Verification PASSED")
+        print("   1. ✓ Model files generated (LightGBM + 4 tables)")
+        print("   2. ✓ Training completed with k_train=1 labeling")
+        print(f"   3. ✓ Search recall: {avg_recall:.4f} >= 0.8")
+        print("="*80)
diff --git a/python/tests/test_omega_fallback.py b/python/tests/test_omega_fallback.py
deleted file mode 100644
index 05958cb7f..000000000
--- a/python/tests/test_omega_fallback.py
+++ /dev/null
@@ -1,229 +0,0 @@
-# Copyright 2025-present the zvec project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import tempfile
-from pathlib import Path
-
-import numpy as np
-import pytest
-import zvec
-from zvec import (
-    CollectionOption,
-    CollectionSchema,
-    DataType,
-    Doc,
-    FieldSchema,
-    HnswIndexParam,
-    IndexOption,
-    MetricType,
-    OmegaIndexParam,
-    VectorQuery,
-    VectorSchema,
-)
-
-
-@pytest.fixture(scope="module", autouse=True)
-def init_zvec():
-    """Initialize zvec once for all tests in this module."""
-    zvec.init()
-
-
-def test_omega_index_param_creation():
-    """Test that OmegaIndexParam can be created with various parameters."""
-    # Default parameters
-    param1 = OmegaIndexParam()
-    assert param1.m == 50
-    assert param1.ef_construction == 500
-    assert param1.metric_type == MetricType.IP
-
-    # Custom parameters
-    param2 = OmegaIndexParam(
-        metric_type=MetricType.L2,
-        m=32,
-        ef_construction=200
-    )
-    assert param2.m == 32
-    assert param2.ef_construction == 200
-    assert param2.metric_type == MetricType.L2
-
-
-def test_omega_collection_creation():
-    """Test creating a collection with OMEGA index."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        db_path = Path(tmpdir) / "test_omega_db"
-
-        # Create schema with OMEGA index
-        schema = CollectionSchema(
-            name="test_omega_collection",
-            fields=[
-                FieldSchema("id", DataType.INT64, nullable=False),
-                FieldSchema("text", DataType.STRING, nullable=False),
-            ],
-            vectors=[
-                VectorSchema(
-                    "embedding",
-                    DataType.VECTOR_FP32,
-                    dimension=128,
-                    index_param=OmegaIndexParam(
-                        metric_type=MetricType.L2,
-                        m=16,
-                        ef_construction=200
-                    ),
-                ),
-            ],
-        )
-
-        # Create collection
-        collection = zvec.create_and_open(
-            str(db_path),
-            schema,
-            CollectionOption(read_only=False, enable_mmap=False)
-        )
-
-        # Insert some test data
-        docs = [
-            Doc(
-                id=str(i),
-                fields={"id": i, "text": f"doc_{i}"},
-                vectors={"embedding": np.random.randn(128).astype(np.float32).tolist()}
-            )
-            for i in range(100)
-        ]
-
-        status = collection.insert(docs)
-        # insert() returns a list of Status objects for multiple docs
-        assert len(status) == len(docs), "Insert returned wrong number of statuses"
-        for s in status:
-            assert s.ok(), f"Insert failed: {s.message()}"
-
-        # Create index
-        collection.create_index(
-            field_name="embedding",
-            index_param=OmegaIndexParam(metric_type=MetricType.L2, m=16, ef_construction=200),
-            option=IndexOption()
-        )
-
-        # Query
-        query_vector = np.random.randn(128).astype(np.float32).tolist()
-        results = collection.query(
-            vectors=VectorQuery(field_name="embedding", vector=query_vector),
-            topk=10
-        )
-
-        assert len(results) > 0, "Query returned no results"
-        assert len(results) <= 10, "Query returned more than top_k results"
-
-
-def test_omega_vs_hnsw_compatibility():
-    """Test that OMEGA index produces similar results to HNSW index."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        db_path_hnsw = Path(tmpdir) / "test_hnsw_db"
-        db_path_omega = Path(tmpdir) / "test_omega_db"
-
-        # Create identical schemas except for index type
-        def create_schema(name, index_param):
-            return CollectionSchema(
-                name=name,
-                fields=[
-                    FieldSchema("id", DataType.INT64, nullable=False),
-                ],
-                vectors=[
-                    VectorSchema(
-                        "embedding",
-                        DataType.VECTOR_FP32,
-                        dimension=64,
-                        index_param=index_param,
-                    ),
-                ],
-            )
-
-        # Create test data
-        np.random.seed(42)
-        test_docs = [
-            Doc(
-                id=str(i),
-                fields={"id": i},
-                vectors={"embedding": np.random.randn(64).astype(np.float32).tolist()}
-            )
-            for i in range(200)
-        ]
-
-        query_vector = np.random.randn(64).astype(np.float32).tolist()
-
-        # Test with HNSW
-        schema_hnsw = create_schema(
-            "test_hnsw",
-            HnswIndexParam(metric_type=MetricType.L2, m=16, ef_construction=200)
-        )
-        collection_hnsw = zvec.create_and_open(
-            str(db_path_hnsw),
-            schema_hnsw,
-            CollectionOption(read_only=False, enable_mmap=False)
-        )
-        collection_hnsw.insert(test_docs)
-        collection_hnsw.create_index(
-            field_name="embedding",
-            index_param=HnswIndexParam(metric_type=MetricType.L2, m=16, ef_construction=200),
-            option=IndexOption()
-        )
-        results_hnsw = collection_hnsw.query(
-            vectors=VectorQuery(field_name="embedding", vector=query_vector),
-            topk=10
-        )
-
-        # Test with OMEGA
-        schema_omega = create_schema(
-            "test_omega",
-            OmegaIndexParam(metric_type=MetricType.L2, m=16, ef_construction=200)
-        )
-        collection_omega = zvec.create_and_open(
-            str(db_path_omega),
-            schema_omega,
-            CollectionOption(read_only=False, enable_mmap=False)
-        )
-        collection_omega.insert(test_docs)
-        collection_omega.create_index(
-            field_name="embedding",
-            index_param=OmegaIndexParam(metric_type=MetricType.L2, m=16, ef_construction=200),
-            option=IndexOption()
-        )
-        results_omega = collection_omega.query(
-            vectors=VectorQuery(field_name="embedding", vector=query_vector),
-            topk=10
-        )
-
-        # Both should return results
-        assert len(results_hnsw) > 0, "HNSW query returned no results"
-        assert len(results_omega) > 0, "OMEGA query returned no results"
-
-        # Results should have the same number of documents
-        assert len(results_hnsw) == len(results_omega), \
-            f"Different number of results: HNSW={len(results_hnsw)}, OMEGA={len(results_omega)}"
-
-        # Verify that OMEGA fallback produces identical results to HNSW
-        # Since both use the same index structure (HNSW) with identical parameters,
-        # they should return the exact same documents in the same order with the same scores
-        for i, (doc_hnsw, doc_omega) in enumerate(zip(results_hnsw, results_omega)):
-            assert doc_hnsw.id == doc_omega.id, \
-                f"Document ID mismatch at position {i}: HNSW={doc_hnsw.id}, OMEGA={doc_omega.id}"
-
-            # Scores should be identical (or very close due to floating point)
-            assert abs(doc_hnsw.score - doc_omega.score) < 1e-5, \
-                f"Score mismatch at position {i} for doc {doc_hnsw.id}: " \
-                f"HNSW={doc_hnsw.score}, OMEGA={doc_omega.score}, diff={abs(doc_hnsw.score - doc_omega.score)}"
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
diff --git a/python/zvec/_omega_training.py b/python/zvec/_omega_training.py
new file mode 100644
index 000000000..361a1f79b
--- /dev/null
+++ b/python/zvec/_omega_training.py
@@ -0,0 +1,269 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""OMEGA model training module."""
+
+import argparse
+import os
+import sys
+import numpy as np
+
+try:
+    import lightgbm as lgb
+    from sklearn.model_selection import train_test_split
+    from sklearn.isotonic import IsotonicRegression
+    LIGHTGBM_AVAILABLE = True
+except ImportError:
+    LIGHTGBM_AVAILABLE = False
+
+
+def train_omega_model(csv_path: str, output_dir: str, verbose: bool = False, topk: int = 100):
+    """Train OMEGA model from CSV training data.
+
+    Args:
+        csv_path: Path to CSV file with training data
+        output_dir: Directory to save trained model and tables
+        verbose: Enable verbose logging
+        topk: Top-K value used during training data collection (default: 100)
+
+    Returns:
+        str: Path to the trained model directory
+    """
+    if not LIGHTGBM_AVAILABLE:
+        raise ImportError(
+            "LightGBM is required for OMEGA training. "
+            "Install it with: pip install lightgbm"
+        )
+
+    if verbose:
+        print(f"Loading training data from: {csv_path}")
+
+    # Load CSV data
+    import pandas as pd
+    df = pd.read_csv(csv_path)
+
+    # Extract features and labels
+    # CSV format: query_id,hops_visited,cmps_visited,dist_1st,dist_start,stat_0,...,stat_6,label
+    query_ids = df['query_id'].values.astype(np.int32)
+    X = df[['hops_visited', 'cmps_visited', 'dist_1st', 'dist_start',
+            'stat_0', 'stat_1', 'stat_2', 'stat_3', 'stat_4', 'stat_5', 'stat_6']].values
+    y = df['label'].values
+
+    if verbose:
+        print(f"Loaded {len(df)} training records from {len(np.unique(query_ids))} queries")
+        print(f"Feature shape: {X.shape}")
+        print(f"Label distribution: {np.sum(y==0)} negative, {np.sum(y==1)} positive")
+
+    # Create output directory
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Train LightGBM binary classifier
+    model_path = os.path.join(output_dir, "model.txt")
+    threshold_table_path = os.path.join(output_dir, "threshold_table.txt")
+
+    if verbose:
+        print("Training LightGBM model...")
+
+    # Split data
+    query_ids_train, query_ids_test, X_train, X_test, y_train, y_test = train_test_split(
+        query_ids, X, y, test_size=0.2, shuffle=False
+    )
+
+    # Create datasets
+    train_data = lgb.Dataset(X_train, label=y_train, free_raw_data=False)
+    test_data = lgb.Dataset(X_test, label=y_test, reference=train_data, free_raw_data=False)
+
+    # Training parameters
+    # Calculate scale_pos_weight safely
+    n_negative = np.sum(y_train == 0)
+    n_positive = np.sum(y_train == 1)
+
+    if n_positive == 0:
+        raise ValueError(f"No positive samples in training data! All labels are 0.")
+    if n_negative == 0:
+        raise ValueError(f"No negative samples in training data! All labels are 1.")
+
+    scale_pos_weight = n_negative / n_positive
+
+    params = {
+        'task': 'train',
+        'boosting_type': 'gbdt',
+        'objective': 'binary',
+        'metric': ['binary_logloss'],
+        'num_leaves': 31,
+        'boost_from_average': False,
+        'learning_rate': 0.1,
+        'feature_fraction': 1.0,
+        'bagging_fraction': 1.0,
+        'bagging_freq': 0,
+        'verbose': 0 if not verbose else 1,
+        'num_threads': 8,
+        'scale_pos_weight': scale_pos_weight,
+    }
+
+    if verbose:
+        print(f"Training samples: {len(y_train)} ({n_positive} positive, {n_negative} negative)")
+        print(f"scale_pos_weight: {scale_pos_weight:.4f}")
+
+    # Train model
+    num_round = 100
+    evals_result = {}
+    model = lgb.train(
+        params,
+        train_data,
+        valid_sets=[test_data],
+        num_boost_round=num_round,
+        callbacks=[lgb.record_evaluation(evals_result)]
+    )
+
+    # Save model
+    model.save_model(model_path)
+    if verbose:
+        print(f"Model saved to: {model_path}")
+
+    # Generate threshold table using isotonic regression
+    if verbose:
+        print("Generating threshold table...")
+
+    y_pred = model.predict(X_test, num_iteration=model.best_iteration)
+
+    # Calibrate using isotonic regression
+    isotonic_reg = IsotonicRegression(increasing=True, out_of_bounds='clip')
+    y_pred_calibrated = isotonic_reg.fit_transform(y_pred, y_test)
+
+    # Generate threshold table
+    sorted_indices = np.argsort(y_pred)
+    sorted_confidences = y_pred[sorted_indices]
+    sorted_probabilities = y_pred_calibrated[sorted_indices]
+    sorted_confidences_10000x = np.round(sorted_confidences * 10000)
+
+    # Remove duplicates
+    _, unique_indices = np.unique(sorted_confidences_10000x, return_index=True)
+    unique_confidences = sorted_confidences[unique_indices]
+    unique_probabilities = sorted_probabilities[unique_indices]
+
+    with open(threshold_table_path, "w") as f:
+        for conf, prob in zip(unique_confidences, unique_probabilities):
+            f.write(f"{conf:.4f},{prob:.6f}\n")
+
+    if verbose:
+        print(f"Threshold table saved to: {threshold_table_path}")
+
+    # Generate placeholder interval table (not used in zvec's current implementation)
+    interval_table_path = os.path.join(output_dir, "interval_table.txt")
+    with open(interval_table_path, "w") as f:
+        for recall_pct in range(0, 101, 1):
+            recall = recall_pct / 100.0
+            initial_interval = max(int(100 * (1 - recall)), 1)
+            min_interval = max(int(10 * (1 - recall)), 1)
+            f.write(f"{recall:.2f},{initial_interval},{min_interval}\n")
+
+    if verbose:
+        print(f"Interval table saved to: {interval_table_path}")
+
+    # Generate placeholder gt_collected_table and gt_cmps_all_table
+    # These tables require access to ground truth data during search, which is not
+    # available from the CSV export. They should be generated during the training
+    # data collection phase in C++.
+
+    # Create empty placeholder files with the correct format
+    gt_collected_table_path = os.path.join(output_dir, "gt_collected_table.txt")
+    with open(gt_collected_table_path, "w") as f:
+        # Format: row_index:value1,value2,...,valueK
+        # Each row represents a "collected" count, columns are ranks
+        for collected in range(topk + 1):
+            row_values = ["1.0" if i < collected else "0.0" for i in range(topk)]
+            f.write(f"{collected}:{','.join(row_values)}\n")
+
+    if verbose:
+        print(f"GT collected table (placeholder) saved to: {gt_collected_table_path}")
+
+    gt_cmps_all_table_path = os.path.join(output_dir, "gt_cmps_all_table.txt")
+    with open(gt_cmps_all_table_path, "w") as f:
+        # Format: row_index:value1,value2,...,value100
+        # Each row represents a rank, columns are percentiles (1-100)
+        for rank in range(topk + 1):
+            percentiles = [str(rank * 10 + p) for p in range(100)]  # Placeholder values
+            f.write(f"{rank}:{','.join(percentiles)}\n")
+
+    if verbose:
+        print(f"GT cmps all table (placeholder) saved to: {gt_cmps_all_table_path}")
+
+    # Print final statistics
+    if verbose:
+        print("\nTraining complete!")
+        print(f"Model directory: {output_dir}")
+        print("Generated files:")
+        print(f"  - model.txt")
+        print(f"  - threshold_table.txt")
+        print(f"  - interval_table.txt")
+        print(f"  - gt_collected_table.txt (placeholder)")
+        print(f"  - gt_cmps_all_table.txt (placeholder)")
+
+    return output_dir
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Train OMEGA model from collected training data"
+    )
+    parser.add_argument(
+        "command",
+        choices=["train"],
+        help="Command to execute"
+    )
+    parser.add_argument(
+        "--input",
+        required=True,
+        help="Input CSV file with training data"
+    )
+    parser.add_argument(
+        "--output",
+        required=True,
+        help="Output directory for trained model"
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Enable verbose output"
+    )
+    parser.add_argument(
+        "--topk",
+        type=int,
+        default=100,
+        help="Top-K value used during training (default: 100)"
+    )
+
+    args = parser.parse_args()
+
+    if args.command == "train":
+        try:
+            train_omega_model(
+                csv_path=args.input,
+                output_dir=args.output,
+                verbose=args.verbose,
+                topk=args.topk
+            )
+            print("✓ Training completed successfully")
+            sys.exit(0)
+        except Exception as e:
+            print(f"✗ Training failed: {e}", file=sys.stderr)
+            if args.verbose:
+                import traceback
+                traceback.print_exc()
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/zvec/model/param/__init__.pyi b/python/zvec/model/param/__init__.pyi
index ea2f34ecd..a038e2ee6 100644
--- a/python/zvec/model/param/__init__.pyi
+++ b/python/zvec/model/param/__init__.pyi
@@ -21,6 +21,7 @@ __all__: list[str] = [
     "IndexOption",
     "IndexParam",
     "InvertIndexParam",
+    "OmegaIndexParam",
     "OptimizeOption",
     "QueryParam",
     "SegmentOption",
@@ -523,6 +524,78 @@ class InvertIndexParam(IndexParam):
         bool: Whether range optimization is enabled for this inverted index.
         """
 
+class OmegaIndexParam(VectorIndexParam):
+    """
+
+    Parameters for configuring an OMEGA index.
+
+    OMEGA is an advanced graph-based index that can fall back to HNSW when omega
+    functionality is disabled. This class encapsulates its construction hyperparameters.
+
+    Attributes:
+        metric_type (MetricType): Distance metric used for similarity computation.
+            Default is ``MetricType.IP`` (inner product).
+        m (int): Number of bi-directional links created for every new element
+            during construction. Higher values improve accuracy but increase
+            memory usage and construction time. Default is 100.
+        ef_construction (int): Size of the dynamic candidate list for nearest
+            neighbors during index construction. Larger values yield better
+            graph quality at the cost of slower build time. Default is 500.
+        quantize_type (QuantizeType): Optional quantization type for vector
+            compression (e.g., FP16, INT8). Default is `QuantizeType.UNDEFINED` to
+            disable quantization.
+
+    Examples:
+        >>> from zvec.typing import MetricType, QuantizeType
+        >>> params = OmegaIndexParam(
+        ...     metric_type=MetricType.COSINE,
+        ...     m=16,
+        ...     ef_construction=200,
+        ...     quantize_type=QuantizeType.INT8
+        ... )
+        >>> print(params)
+        {'metric_type': 'IP', 'm': 16, 'ef_construction': 200, 'quantize_type': 'INT8'}
+    """
+
+    def __getstate__(self) -> tuple: ...
+    def __init__(
+        self,
+        metric_type: _zvec.typing.MetricType = ...,
+        m: typing.SupportsInt = 100,
+        ef_construction: typing.SupportsInt = 500,
+        quantize_type: _zvec.typing.QuantizeType = ...,
+    ) -> None:
+        """
+        Constructs an OmegaIndexParam instance.
+
+        Args:
+            metric_type (MetricType, optional): Distance metric. Defaults to MetricType.IP.
+            m (int, optional): Number of bi-directional links. Defaults to 100.
+            ef_construction (int, optional): Candidate list size during construction.
+                Defaults to 500.
+            quantize_type (QuantizeType, optional): Vector quantization type.
+                Defaults to QuantizeType.UNDEFINED.
+        """
+
+    def __repr__(self) -> str: ...
+    def __setstate__(self, arg0: tuple) -> None: ...
+    def to_dict(self) -> dict:
+        """
+        Convert to dictionary with all fields
+        """
+
+    @property
+    def ef_construction(self) -> int:
+        """
+        int: Candidate list size during index construction.
+        """
+
+    @property
+    def m(self) -> int:
+        """
+        int: Maximum number of neighbors per node in upper layers.
+        """
+
 class OptimizeOption:
     """
 
diff --git a/python/zvec/model/schema/field_schema.py b/python/zvec/model/schema/field_schema.py
index da193dd5c..dad2a3a94 100644
--- a/python/zvec/model/schema/field_schema.py
+++ b/python/zvec/model/schema/field_schema.py
@@ -23,6 +23,7 @@
     HnswIndexParam,
     InvertIndexParam,
     IVFIndexParam,
+    OmegaIndexParam,
 )
 from zvec.typing import DataType
 
@@ -188,19 +189,31 @@ class VectorSchema:
         data_type (DataType): Vector data type (e.g., VECTOR_FP32, VECTOR_INT8).
         dimension (int, optional): Dimensionality of the vector. Must be > 0 for dense vectors;
          may be `None` for sparse vectors.
-        index_param (Union[HnswIndexParam, IVFIndexParam, FlatIndexParam], optional):
+        index_param (Union[HnswIndexParam, IVFIndexParam, FlatIndexParam, OmegaIndexParam], optional):
             Index configuration for this vector field. Defaults to
-            ``HnswIndexParam()``.
+            ``FlatIndexParam()``.
 
     Examples:
-        >>> from zvec.typing import DataType
-        >>> from zvec.model.param import HnswIndexParam
-        >>> emb_field = VectorSchema(
+        >>> from zvec.typing import DataType, MetricType
+        >>> from zvec.model.param import HnswIndexParam, OmegaIndexParam
+        >>> # HNSW index
+        >>> hnsw_field = VectorSchema(
         ...     name="embedding",
         ...     data_type=DataType.VECTOR_FP32,
         ...     dimension=128,
         ...     index_param=HnswIndexParam(ef_construction=200, m=16)
         ... )
+        >>> # OMEGA index (adaptive graph-based index with automatic training)
+        >>> omega_field = VectorSchema(
+        ...     name="embedding",
+        ...     data_type=DataType.VECTOR_FP32,
+        ...     dimension=128,
+        ...     index_param=OmegaIndexParam(
+        ...         metric_type=MetricType.COSINE,
+        ...         m=16,
+        ...         ef_construction=200
+        ...     )
+        ... )
     """
 
     def __init__(
@@ -209,7 +222,7 @@ def __init__(
         data_type: DataType,
         dimension: Optional[int] = 0,
         index_param: Optional[
-            Union[HnswIndexParam, FlatIndexParam, IVFIndexParam]
+            Union[HnswIndexParam, FlatIndexParam, IVFIndexParam, OmegaIndexParam]
         ] = None,
     ):
         if name is None or not isinstance(name, str):
@@ -263,8 +276,8 @@ def dimension(self) -> int:
         return self._cpp_obj.dimension
 
     @property
-    def index_param(self) -> Union[HnswIndexParam, IVFIndexParam, FlatIndexParam]:
-        """Union[HnswIndexParam, IVFIndexParam, FlatIndexParam]: Index configuration for the vector."""
+    def index_param(self) -> Union[HnswIndexParam, IVFIndexParam, FlatIndexParam, OmegaIndexParam]:
+        """Union[HnswIndexParam, IVFIndexParam, FlatIndexParam, OmegaIndexParam]: Index configuration for the vector."""
         return self._cpp_obj.index_param
 
     def __dict__(self) -> dict[str, Any]:
diff --git a/src/binding/python/model/param/python_param.cc b/src/binding/python/model/param/python_param.cc
index 48333f62a..97680dc41 100644
--- a/src/binding/python/model/param/python_param.cc
+++ b/src/binding/python/model/param/python_param.cc
@@ -31,6 +31,8 @@ static std::string index_type_to_string(const IndexType type) {
       return "IVF";
     case IndexType::HNSW:
       return "HNSW";
+    case IndexType::OMEGA:
+      return "OMEGA";
     default:
       return "UNDEFINED";
   }
diff --git a/src/core/algorithm/hnsw/hnsw_searcher.h b/src/core/algorithm/hnsw/hnsw_searcher.h
index 5113a2461..22477c021 100644
--- a/src/core/algorithm/hnsw/hnsw_searcher.h
+++ b/src/core/algorithm/hnsw/hnsw_searcher.h
@@ -112,18 +112,9 @@ class HnswSearcher : public IndexSearcher {
   int update_context(HnswContext *ctx) const;
 
  protected:
-  uint32_t ef_{HnswEntity::kDefaultEf};
-
- private:
   enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };
 
-  HnswSearcherEntity entity_{};
-  HnswAlgorithm::UPointer alg_;  // impl graph algorithm
-
-  IndexMetric::Pointer metric_{};
-  IndexMeta meta_{};
-  ailego::Params params_{};
-  Stats stats_;
+  uint32_t ef_{HnswEntity::kDefaultEf};
   uint32_t max_scan_num_{0U};
   uint32_t bruteforce_threshold_{HnswEntity::kDefaultBruteForceThreshold};
   float max_scan_ratio_{HnswEntity::kDefaultScanRatio};
@@ -133,8 +124,16 @@ class HnswSearcher : public IndexSearcher {
   bool force_padding_topk_enabled_{false};
   float bf_negative_probility_{HnswEntity::kDefaultBFNegativeProbility};
   uint32_t magic_{0U};
-
   State state_{STATE_INIT};
+  HnswSearcherEntity entity_{};
+  IndexMetric::Pointer metric_{};
+  IndexMeta meta_{};
+
+ private:
+  HnswAlgorithm::UPointer alg_;  // impl graph algorithm
+
+  ailego::Params params_{};
+  Stats stats_;
 };
 
 }  // namespace core
diff --git a/src/core/algorithm/hnsw/hnsw_streamer.h b/src/core/algorithm/hnsw/hnsw_streamer.h
index daadd8dd4..85f4e184a 100644
--- a/src/core/algorithm/hnsw/hnsw_streamer.h
+++ b/src/core/algorithm/hnsw/hnsw_streamer.h
@@ -163,7 +163,8 @@ class HnswStreamer : public IndexStreamer {
   //! current streamer/searcher
   int update_context(HnswContext *ctx) const;
 
- private:
+ protected:
+  // Changed from private to protected to allow OmegaStreamer inheritance
   enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };
   class Stats : public IndexStreamer::Stats {
    public:
diff --git a/src/core/algorithm/omega/CMakeLists.txt b/src/core/algorithm/omega/CMakeLists.txt
index 9358aaa3e..e70c668eb 100644
--- a/src/core/algorithm/omega/CMakeLists.txt
+++ b/src/core/algorithm/omega/CMakeLists.txt
@@ -6,6 +6,6 @@ cc_library(
     STATIC SHARED STRICT ALWAYS_LINK
     SRCS *.cc
     LIBS core_framework core_knn_hnsw omega
-    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm ${PROJECT_ROOT_DIR}/thirdparty/omega/include
+    INCS . ${PROJECT_ROOT_DIR}/src/include ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm ${PROJECT_ROOT_DIR}/thirdparty/omega/include
     VERSION "${PROXIMA_ZVEC_VERSION}"
   )
diff --git a/src/core/algorithm/omega/omega_context.h b/src/core/algorithm/omega/omega_context.h
new file mode 100644
index 000000000..fa053e452
--- /dev/null
+++ b/src/core/algorithm/omega/omega_context.h
@@ -0,0 +1,68 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "../hnsw/hnsw_context.h"
+#include "omega_params.h"
+
+namespace zvec {
+namespace core {
+
+/**
+ * OmegaContext extends HnswContext to support OMEGA-specific parameters
+ * like target_recall that can be set per-query.
+ */
+class OmegaContext : public HnswContext {
+ public:
+  //! Constructor
+  OmegaContext(size_t dimension, const IndexMetric::Pointer &metric,
+               const HnswEntity::Pointer &entity)
+      : HnswContext(dimension, metric, entity), target_recall_(0.95f) {}
+
+  //! Constructor
+  OmegaContext(const IndexMetric::Pointer &metric,
+               const HnswEntity::Pointer &entity)
+      : HnswContext(metric, entity), target_recall_(0.95f) {}
+
+  //! Destructor
+  virtual ~OmegaContext() = default;
+
+  //! Get target recall for this query
+  float target_recall() const {
+    return target_recall_;
+  }
+
+  //! Update context parameters (overrides HnswContext::update)
+  int update(const ailego::Params &params) override {
+    // First call parent to update HNSW parameters
+    int ret = HnswContext::update(params);
+    if (ret != 0) {
+      return ret;
+    }
+
+    // Extract OMEGA-specific parameters
+    if (params.has(PARAM_OMEGA_SEARCHER_TARGET_RECALL)) {
+      params.get(PARAM_OMEGA_SEARCHER_TARGET_RECALL, &target_recall_);
+    }
+
+    return 0;
+  }
+
+ private:
+  float target_recall_;  // Per-query target recall
+};
+
+}  // namespace core
+}  // namespace zvec
diff --git a/src/core/algorithm/omega/omega_params.h b/src/core/algorithm/omega/omega_params.h
new file mode 100644
index 000000000..c94fb87f4
--- /dev/null
+++ b/src/core/algorithm/omega/omega_params.h
@@ -0,0 +1,29 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+
+namespace zvec::core {
+
+// OMEGA searcher parameters (used at query time)
+static const std::string PARAM_OMEGA_SEARCHER_TARGET_RECALL(
+    "proxima.omega.searcher.target_recall");
+
+// OMEGA streamer parameters (used at index time)
+static const std::string PARAM_OMEGA_STREAMER_TARGET_RECALL(
+    "proxima.omega.streamer.target_recall");
+
+}  // namespace zvec::core
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 19ed34101..18a81ee29 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -13,9 +13,12 @@
 // limitations under the License.
 
 #include "omega_searcher.h"
+#include "omega_context.h"
+#include "omega_params.h"
 #include <zvec/core/framework/index_error.h>
 #include <zvec/core/framework/index_factory.h>
 #include <zvec/core/framework/index_logger.h>
+#include <omega/search_context.h>
 #include "../hnsw/hnsw_context.h"
 #include <limits>
 #include <deque>
@@ -31,7 +34,9 @@ OmegaSearcher::OmegaSearcher(void)
       use_omega_mode_(false),
       target_recall_(0.95f),
       min_vector_threshold_(10000),
-      current_vector_count_(0) {}
+      current_vector_count_(0),
+      training_mode_enabled_(false),
+      current_query_id_(0) {}
 
 OmegaSearcher::~OmegaSearcher(void) {
   this->cleanup();
@@ -122,6 +127,45 @@ int OmegaSearcher::unload(void) {
   return HnswSearcher::unload();
 }
 
+IndexSearcher::Context::Pointer OmegaSearcher::create_context() const {
+  if (ailego_unlikely(state_ != STATE_LOADED)) {
+    LOG_ERROR("Load the index first before create context");
+    return Context::Pointer();
+  }
+  const HnswEntity::Pointer search_ctx_entity = entity_.clone();
+  if (!search_ctx_entity) {
+    LOG_ERROR("Failed to create search context entity");
+    return Context::Pointer();
+  }
+
+  // Create OmegaContext instead of HnswContext
+  OmegaContext *ctx = new (std::nothrow)
+      OmegaContext(meta_.dimension(), metric_, search_ctx_entity);
+  if (ailego_unlikely(ctx == nullptr)) {
+    LOG_ERROR("Failed to new OmegaContext");
+    return Context::Pointer();
+  }
+
+  // Initialize context with HNSW parameters
+  ctx->set_ef(ef_);
+  ctx->set_max_scan_num(max_scan_num_);
+  uint32_t filter_mode =
+      bf_enabled_ ? VisitFilter::BloomFilter : VisitFilter::ByteMap;
+  ctx->set_filter_mode(filter_mode);
+  ctx->set_filter_negative_probility(bf_negative_probility_);
+  ctx->set_magic(magic_);
+  ctx->set_force_padding_topk(force_padding_topk_enabled_);
+  ctx->set_bruteforce_threshold(bruteforce_threshold_);
+
+  if (ailego_unlikely(ctx->init(HnswContext::kSearcherContext)) != 0) {
+    LOG_ERROR("Init OmegaContext failed");
+    delete ctx;
+    return Context::Pointer();
+  }
+
+  return Context::Pointer(ctx);
+}
+
 int OmegaSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
                                uint32_t count,
                                ContextPointer &context) const {
@@ -134,35 +178,79 @@ int OmegaSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
   return adaptive_search(query, qmeta, count, context);
 }
 
+// Training mode method implementations
+zvec::Status OmegaSearcher::EnableTrainingMode(bool enable) {
+  std::lock_guard<std::mutex> lock(training_mutex_);
+  training_mode_enabled_ = enable;
+
+  if (enable) {
+    LOG_INFO("OMEGA training mode ENABLED - early stopping will be disabled");
+  } else {
+    LOG_INFO("OMEGA training mode DISABLED");
+  }
+
+  return zvec::Status::OK();
+}
+
+void OmegaSearcher::SetCurrentQueryId(int query_id) {
+  current_query_id_ = query_id;
+}
+
+std::vector<core_interface::TrainingRecord> OmegaSearcher::GetTrainingRecords() const {
+  std::lock_guard<std::mutex> lock(training_mutex_);
+  return collected_records_;  // Return a copy
+}
+
+void OmegaSearcher::ClearTrainingRecords() {
+  std::lock_guard<std::mutex> lock(training_mutex_);
+  collected_records_.clear();
+  LOG_INFO("Cleared %zu training records", collected_records_.size());
+}
+
 int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmeta,
                                    uint32_t count,
                                    ContextPointer &context) const {
+  // Cast context to OmegaContext to access OMEGA-specific features
+  auto *omega_ctx = dynamic_cast<OmegaContext*>(context.get());
+  if (omega_ctx == nullptr) {
+    LOG_ERROR("Context is not OmegaContext");
+    return IndexError_InvalidArgument;
+  }
+
+  // Read target_recall from context (per-query parameter)
+  float target_recall = omega_ctx->target_recall();
+
   // Create OMEGA search context with parameters (stateful interface)
+  // In training mode, pass NULL if model is not loaded
+  OmegaModelHandle model_to_use = omega_model_;
+  if (training_mode_enabled_ && (omega_model_ == nullptr || !omega_model_is_loaded(omega_model_))) {
+    model_to_use = nullptr;  // Training mode without model: collect features only
+  }
+
   OmegaSearchHandle omega_search = omega_search_create_with_params(
-      omega_model_, target_recall_, count, 100);  // window_size=100
+      model_to_use, target_recall, count, 100);  // window_size=100
 
   if (omega_search == nullptr) {
     LOG_WARN("Failed to create OMEGA search context, falling back to HNSW");
     return HnswSearcher::search_impl(query, qmeta, count, context);
   }
 
-  // Cast context to HnswContext to access HNSW-specific features
-  auto *hnsw_ctx = dynamic_cast<HnswContext*>(context.get());
-  if (hnsw_ctx == nullptr) {
-    LOG_ERROR("Context is not HnswContext");
-    omega_search_destroy(omega_search);
-    return IndexError_InvalidArgument;
+  // Enable training mode if active (CRITICAL: must be before search)
+  if (training_mode_enabled_) {
+    omega_search_enable_training(omega_search, current_query_id_);
+    LOG_DEBUG("Training mode enabled for query_id=%d", current_query_id_);
   }
 
+  // OmegaContext extends HnswContext, so we can use it directly
   // Initialize query in distance calculator
-  hnsw_ctx->reset_query(query);
+  omega_ctx->reset_query(query);
 
   // Get entity and distance calculator
-  const auto &entity = hnsw_ctx->get_entity();
-  auto &dc = hnsw_ctx->dist_calculator();
-  auto &visit_filter = hnsw_ctx->visit_filter();
-  auto &candidates = hnsw_ctx->candidates();
-  auto &topk_heap = hnsw_ctx->topk_heap();
+  const auto &entity = omega_ctx->get_entity();
+  auto &dc = omega_ctx->dist_calculator();
+  auto &visit_filter = omega_ctx->visit_filter();
+  auto &candidates = omega_ctx->candidates();
+  auto &topk_heap = omega_ctx->topk_heap();
 
   // Use ef from parent class (now protected, so accessible)
   uint32_t ef = ef_;
@@ -229,14 +317,17 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
       break;
     }
 
-    // OMEGA early stopping check
-    if (omega_search_should_predict(omega_search)) {
-      if (omega_search_should_stop(omega_search)) {
-        int hops, cmps, collected_gt;
-        omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
-        LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
-                  cmps, hops, collected_gt);
-        break;
+    // OMEGA early stopping check (CRITICAL: disabled in training mode)
+    // Training mode requires full search to collect complete feature data
+    if (!training_mode_enabled_) {
+      if (omega_search_should_predict(omega_search)) {
+        if (omega_search_should_stop(omega_search)) {
+          int hops, cmps, collected_gt;
+          omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
+          LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
+                    cmps, hops, collected_gt);
+          break;
+        }
       }
     }
 
@@ -304,7 +395,7 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   }
 
   // Convert results to context format
-  hnsw_ctx->topk_to_result();
+  omega_ctx->topk_to_result();
 
   // Get final statistics
   int hops, cmps, collected_gt;
@@ -312,6 +403,51 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu",
             cmps, hops, topk_heap.size());
 
+  // Collect training records if in training mode
+  if (training_mode_enabled_) {
+    size_t record_count = omega_search_get_training_records_count(omega_search);
+    if (record_count > 0) {
+      const void* records_ptr = omega_search_get_training_records(omega_search);
+
+      // Cast to omega::TrainingRecord array
+      const auto* omega_records = static_cast<const omega::TrainingRecord*>(records_ptr);
+
+      // Convert and store training records
+      std::lock_guard<std::mutex> lock(training_mutex_);
+      for (size_t i = 0; i < record_count; ++i) {
+        core_interface::TrainingRecord record;
+        record.query_id = omega_records[i].query_id;
+        record.hops_visited = omega_records[i].hops;
+        record.cmps_visited = omega_records[i].cmps;
+        record.dist_1st = omega_records[i].dist_1st;
+        record.dist_start = omega_records[i].dist_start;
+
+        // Copy 7 traversal window statistics
+        if (omega_records[i].traversal_window_stats.size() == 7) {
+          std::copy(omega_records[i].traversal_window_stats.begin(),
+                    omega_records[i].traversal_window_stats.end(),
+                    record.traversal_window_stats.begin());
+        } else {
+          LOG_WARN("Unexpected traversal_window_stats size: %zu (expected 7)",
+                   omega_records[i].traversal_window_stats.size());
+        }
+
+        // Copy collected node IDs (convert from int to uint64_t)
+        record.collected_node_ids.reserve(omega_records[i].collected_node_ids.size());
+        for (int node_id : omega_records[i].collected_node_ids) {
+          record.collected_node_ids.push_back(static_cast<uint64_t>(node_id));
+        }
+
+        record.label = omega_records[i].label;  // Default 0
+
+        collected_records_.push_back(std::move(record));
+      }
+
+      LOG_DEBUG("Collected %zu training records for query_id=%d",
+                record_count, current_query_id_);
+    }
+  }
+
   // Cleanup
   omega_search_destroy(omega_search);
 
diff --git a/src/core/algorithm/omega/omega_searcher.h b/src/core/algorithm/omega/omega_searcher.h
index 126c04a88..bbd2057b9 100644
--- a/src/core/algorithm/omega/omega_searcher.h
+++ b/src/core/algorithm/omega/omega_searcher.h
@@ -14,8 +14,12 @@
 #pragma once
 
 #include <zvec/core/framework/index_framework.h>
+#include <zvec/core/interface/training.h>
+#include <zvec/db/status.h>
 #include "../hnsw/hnsw_searcher.h"
 #include <omega/omega_api.h>
+#include <mutex>
+#include <vector>
 
 namespace zvec {
 namespace core {
@@ -32,6 +36,61 @@ class OmegaSearcher : public HnswSearcher {
   OmegaSearcher(const OmegaSearcher &) = delete;
   OmegaSearcher &operator=(const OmegaSearcher &) = delete;
 
+ public:
+  // OMEGA Training Mode Support
+  /**
+   * @brief Enable or disable training mode for collecting training features.
+   *
+   * When training mode is enabled:
+   * - Early stopping is disabled (complete HNSW search)
+   * - Training features are collected for each visited node
+   * - query_id must be set via SetCurrentQueryId() before each search
+   *
+   * @param enable True to enable training mode, false to disable
+   * @return Status indicating success or failure
+   */
+  zvec::Status EnableTrainingMode(bool enable);
+
+  /**
+   * @brief Set the query ID for the next search operation.
+   *
+   * Must be called before search_impl() when training mode is enabled.
+   * The query_id will be included in all training records collected
+   * during that search.
+   *
+   * @param query_id Unique identifier for the query
+   */
+  void SetCurrentQueryId(int query_id);
+
+  /**
+   * @brief Get all collected training records.
+   *
+   * Returns a copy of all training records collected since training mode
+   * was enabled or since the last ClearTrainingRecords() call.
+   *
+   * @return Vector of TrainingRecord structures
+   */
+  std::vector<core_interface::TrainingRecord> GetTrainingRecords() const;
+
+  /**
+   * @brief Clear all collected training records.
+   *
+   * Removes all training records from internal storage. Useful for
+   * starting a fresh training data collection session.
+   */
+  void ClearTrainingRecords();
+
+  /**
+   * @brief Public search method for OmegaStreamer to call
+   *
+   * This allows OmegaStreamer to delegate search to OmegaSearcher
+   * without needing to access protected methods.
+   */
+  int search(const void *query, const IndexQueryMeta &qmeta,
+             uint32_t count, ContextPointer &context) const {
+    return search_impl(query, qmeta, count, context);
+  }
+
  protected:
   //! Initialize Searcher
   virtual int init(const ailego::Params &params) override;
@@ -57,6 +116,9 @@ class OmegaSearcher : public HnswSearcher {
                           uint32_t count,
                           ContextPointer &context) const override;
 
+  //! Create a searcher context (creates OmegaContext instead of HnswContext)
+  virtual ContextPointer create_context() const override;
+
   // TODO: These methods call protected methods of HnswSearcher and need to be fixed
   /*
   //! Fetch vector by key (delegate to HNSW)
@@ -97,6 +159,12 @@ class OmegaSearcher : public HnswSearcher {
  private:
   //! Check if OMEGA mode should be used
   bool should_use_omega() const {
+    // Use OMEGA adaptive search if:
+    // 1. Training mode is enabled (to collect features even without model), OR
+    // 2. OMEGA is enabled and model is loaded
+    if (training_mode_enabled_) {
+      return true;  // Always use adaptive_search in training mode
+    }
     return omega_enabled_ && use_omega_mode_ &&
            omega_model_ != nullptr &&
            omega_model_is_loaded(omega_model_);
@@ -115,6 +183,12 @@ class OmegaSearcher : public HnswSearcher {
   uint32_t min_vector_threshold_;
   size_t current_vector_count_;
   std::string model_dir_;
+
+  // Training mode support
+  bool training_mode_enabled_;
+  int current_query_id_;
+  mutable std::mutex training_mutex_;
+  mutable std::vector<core_interface::TrainingRecord> collected_records_;
 };
 
 }  // namespace core
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index bdbae4f15..cc7c8c86d 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -13,48 +13,360 @@
 // limitations under the License.
 
 #include "omega_streamer.h"
-#include <zvec/core/framework/index_error.h>
 #include <zvec/core/framework/index_factory.h>
+#include <zvec/core/framework/index_helper.h>
 #include <zvec/core/framework/index_logger.h>
+#include "../hnsw/hnsw_entity.h"
+#include "../hnsw/hnsw_context.h"
+#include <omega/omega_api.h>
+#include <omega/search_context.h>
 
 namespace zvec {
 namespace core {
 
-OmegaStreamer::OmegaStreamer(void) : hnsw_streamer_(nullptr) {}
+int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
+                               Context::Pointer &context) const {
+  return search_impl(query, qmeta, 1, context);
+}
+
+int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
+                               uint32_t count,
+                               Context::Pointer &context) const {
+  fprintf(stderr, "[DEBUG] OmegaStreamer::search_impl called, training_mode_enabled_=%d, current_query_id_=%d\n",
+          training_mode_enabled_, current_query_id_);
+  fflush(stderr);
+
+  // In training mode, use OMEGA library's training feature collection
+  if (!training_mode_enabled_) {
+    // Normal mode: just use parent HNSW search for now
+    // TODO: Load OMEGA model and use adaptive search for inference
+    fprintf(stderr, "[DEBUG] OmegaStreamer: training mode disabled, using parent HNSW search\n");
+    fflush(stderr);
+    LOG_DEBUG("OmegaStreamer: training mode disabled, using parent HNSW search");
+    return HnswStreamer::search_impl(query, qmeta, count, context);
+  }
+
+  fprintf(stderr, "[DEBUG] OmegaStreamer: training mode ENABLED, proceeding with OMEGA training\n");
+  fflush(stderr);
+  LOG_INFO("OmegaStreamer: training mode enabled (query_id=%d), using OMEGA library to collect features", current_query_id_);
+
+  // Training mode: Use OMEGA library with nullptr model (training-only mode)
+  // The OMEGA library will collect training features automatically
+
+  // Create OMEGA search context in training mode (model=nullptr)
+  float target_recall = 0.95f;  // Default target recall
+  OmegaSearchHandle omega_search = omega_search_create_with_params(
+      nullptr, target_recall, count, 100);  // model=nullptr for training mode
+
+  fprintf(stderr, "[DEBUG] omega_search_create_with_params returned: %p\n", (void*)omega_search);
+  fflush(stderr);
+
+  if (omega_search == nullptr) {
+    LOG_ERROR("Failed to create OMEGA search context for training mode");
+    return IndexError_Runtime;
+  }
+
+  // Enable training mode (CRITICAL: must be before search)
+  omega_search_enable_training(omega_search, current_query_id_);
+  fprintf(stderr, "[DEBUG] omega_search_enable_training called for query_id=%d\n", current_query_id_);
+  fflush(stderr);
+  LOG_DEBUG("Training mode enabled for query_id=%d", current_query_id_);
+
+  // Cast context to HnswContext to access HNSW-specific features
+  auto *hnsw_ctx = dynamic_cast<HnswContext*>(context.get());
+  if (hnsw_ctx == nullptr) {
+    fprintf(stderr, "[DEBUG] FAILED: Context is not HnswContext\n");
+    fflush(stderr);
+    LOG_ERROR("Context is not HnswContext");
+    omega_search_destroy(omega_search);
+    return IndexError_InvalidArgument;
+  }
+  fprintf(stderr, "[DEBUG] Successfully cast context to HnswContext\n");
+  fflush(stderr);
+
+  // Initialize query in distance calculator
+  hnsw_ctx->reset_query(query);
+  fprintf(stderr, "[DEBUG] Query reset in distance calculator\n");
+  fflush(stderr);
+
+  // Get entity and distance calculator from context
+  const auto &entity = hnsw_ctx->get_entity();
+  auto &dc = hnsw_ctx->dist_calculator();
+  auto &visit_filter = hnsw_ctx->visit_filter();
+  auto &candidates = hnsw_ctx->candidates();
+  auto &topk_heap = hnsw_ctx->topk_heap();
+  fprintf(stderr, "[DEBUG] Got entity and distance calculator from context\n");
+  fflush(stderr);
+
+  // Get entry point
+  auto max_level = entity.cur_max_level();
+  auto entry_point = entity.entry_point();
+
+  fprintf(stderr, "[DEBUG] Entry point: %lu, max_level: %d\n",
+          static_cast<unsigned long>(entry_point), static_cast<int>(max_level));
+  fflush(stderr);
+
+  if (entry_point == kInvalidNodeId) {
+    fprintf(stderr, "[DEBUG] Entry point is INVALID, returning early (no nodes in index)\n");
+    fflush(stderr);
+    omega_search_destroy(omega_search);
+    return 0;
+  }
+
+  // Navigate to layer 0
+  dist_t dist = dc.dist(entry_point);
+  fprintf(stderr, "[DEBUG] Starting navigation from level %d, initial dist=%f\n",
+          static_cast<int>(max_level), dist);
+  fflush(stderr);
+
+  for (level_t cur_level = max_level; cur_level >= 1; --cur_level) {
+    const Neighbors neighbors = entity.get_neighbors(cur_level, entry_point);
+    if (neighbors.size() == 0) {
+      fprintf(stderr, "[DEBUG] No neighbors at level %d, breaking\n", static_cast<int>(cur_level));
+      fflush(stderr);
+      break;
+    }
+
+    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
+    int ret = entity.get_vector(&neighbors[0], neighbors.size(), neighbor_vec_blocks);
+    if (ret != 0) {
+      fprintf(stderr, "[DEBUG] Failed to get vectors at level %d, breaking\n", static_cast<int>(cur_level));
+      fflush(stderr);
+      break;
+    }
+
+    bool find_closer = false;
+    for (uint32_t i = 0; i < neighbors.size(); ++i) {
+      const void *neighbor_vec = neighbor_vec_blocks[i].data();
+      dist_t cur_dist = dc.dist(neighbor_vec);
+      if (cur_dist < dist) {
+        entry_point = neighbors[i];
+        dist = cur_dist;
+        find_closer = true;
+      }
+    }
+    if (!find_closer) {
+      fprintf(stderr, "[DEBUG] No closer neighbor at level %d, breaking\n", static_cast<int>(cur_level));
+      fflush(stderr);
+      break;
+    }
+  }
+
+  fprintf(stderr, "[DEBUG] Reached layer 0, entry_point=%lu, dist=%f\n",
+          static_cast<unsigned long>(entry_point), dist);
+  fflush(stderr);
+
+  // Set dist_start for OMEGA
+  omega_search_set_dist_start(omega_search, dist);
+  fprintf(stderr, "[DEBUG] omega_search_set_dist_start called with dist=%f\n", dist);
+  fflush(stderr);
+
+  // Now perform HNSW search on layer 0 with OMEGA feature collection
+  candidates.clear();
+  visit_filter.clear();
+  topk_heap.clear();
+
+  // Add entry point to search
+  visit_filter.set_visited(entry_point);
+  topk_heap.emplace(entry_point, dist);
+  candidates.emplace(entry_point, dist);
+
+  // Report initial visit to OMEGA
+  omega_search_report_visit(omega_search, entry_point, dist, 1);  // is_in_topk=1
+  fprintf(stderr, "[DEBUG] omega_search_report_visit called for entry_point=%lu, dist=%f, is_in_topk=1\n",
+          static_cast<unsigned long>(entry_point), dist);
+  fflush(stderr);
+
+  dist_t lowerBound = dist;
+
+  int loop_iterations = 0;
+  int total_visits = 0;
+
+  // Main search loop with OMEGA feature collection
+  while (!candidates.empty()) {
+    loop_iterations++;
+    if (loop_iterations == 1 || loop_iterations % 10 == 0) {
+      fprintf(stderr, "[DEBUG] Search loop iteration %d, candidates.size()=%zu, topk_heap.size()=%zu\n",
+              loop_iterations, candidates.size(), topk_heap.size());
+      fflush(stderr);
+    }
+
+    auto top = candidates.begin();
+    node_id_t current_node = top->first;
+    dist_t candidate_dist = top->second;
+
+    // Standard HNSW stopping condition
+    if (topk_heap.full() && candidate_dist > lowerBound) {
+      fprintf(stderr, "[DEBUG] Stopping condition met: topk_heap.full()=%d, candidate_dist=%f > lowerBound=%f\n",
+              topk_heap.full(), candidate_dist, lowerBound);
+      fflush(stderr);
+      break;
+    }
 
-OmegaStreamer::~OmegaStreamer(void) {
-  this->cleanup();
+    candidates.pop();
+
+    // Report hop to OMEGA
+    omega_search_report_hop(omega_search);
+
+    // Get neighbors of current node
+    const Neighbors neighbors = entity.get_neighbors(0, current_node);
+    if (neighbors.size() == 0) continue;
+
+    // Prepare to compute distances
+    std::vector<node_id_t> unvisited_neighbors;
+    for (uint32_t i = 0; i < neighbors.size(); ++i) {
+      node_id_t neighbor = neighbors[i];
+      if (!visit_filter.visited(neighbor)) {
+        visit_filter.set_visited(neighbor);
+        unvisited_neighbors.push_back(neighbor);
+      }
+    }
+
+    if (unvisited_neighbors.empty()) continue;
+
+    // Get neighbor vectors
+    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
+    int ret = entity.get_vector(unvisited_neighbors.data(),
+                                unvisited_neighbors.size(),
+                                neighbor_vec_blocks);
+    if (ret != 0) {
+      fprintf(stderr, "[DEBUG] Failed to get neighbor vectors, breaking\n");
+      fflush(stderr);
+      break;
+    }
+
+    // Compute distances and update candidates
+    for (size_t i = 0; i < unvisited_neighbors.size(); ++i) {
+      node_id_t neighbor = unvisited_neighbors[i];
+      const void *neighbor_vec = neighbor_vec_blocks[i].data();
+      dist_t neighbor_dist = dc.dist(neighbor_vec);
+
+      // Check if this node will be in topk
+      bool is_in_topk = (!topk_heap.full() || neighbor_dist < lowerBound);
+
+      // Report visit to OMEGA (this will collect training features)
+      omega_search_report_visit(omega_search, neighbor, neighbor_dist, is_in_topk ? 1 : 0);
+      total_visits++;
+
+      // Consider this candidate
+      if (is_in_topk) {
+        candidates.emplace(neighbor, neighbor_dist);
+        topk_heap.emplace(neighbor, neighbor_dist);
+
+        // Update lowerBound
+        if (neighbor_dist < lowerBound) {
+          lowerBound = neighbor_dist;
+        }
+
+        // Update lowerBound to the worst distance in topk if topk is full
+        if (topk_heap.full()) {
+          lowerBound = topk_heap[0].second;  // Max heap, so [0] is the worst
+        }
+      }
+    }
+  }
+
+  fprintf(stderr, "[DEBUG] Search loop completed: %d iterations, %d total visits, topk_heap.size()=%zu\n",
+          loop_iterations, total_visits, topk_heap.size());
+  fflush(stderr);
+
+  // Convert results to context format
+  hnsw_ctx->topk_to_result();
+
+  // Get final statistics
+  int hops, cmps, collected_gt;
+  omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
+  fprintf(stderr, "[DEBUG] omega_search_get_stats: hops=%d, cmps=%d, collected_gt=%d\n",
+          hops, cmps, collected_gt);
+  fflush(stderr);
+  LOG_DEBUG("OMEGA training search completed: cmps=%d, hops=%d, results=%zu",
+            cmps, hops, topk_heap.size());
+
+  // Collect training records from OMEGA library
+  size_t record_count = omega_search_get_training_records_count(omega_search);
+  fprintf(stderr, "[DEBUG] omega_search_get_training_records_count returned: %zu\n", record_count);
+  fflush(stderr);
+
+  if (record_count > 0) {
+    fprintf(stderr, "[DEBUG] Extracting %zu training records...\n", record_count);
+    fflush(stderr);
+
+    const void* records_ptr = omega_search_get_training_records(omega_search);
+
+    // Cast to omega::TrainingRecord array
+    const auto* omega_records = static_cast<const omega::TrainingRecord*>(records_ptr);
+
+    // Convert and store training records
+    std::lock_guard<std::mutex> lock(training_mutex_);
+    for (size_t i = 0; i < record_count; ++i) {
+      core_interface::TrainingRecord record;
+      record.query_id = omega_records[i].query_id;
+      record.hops_visited = omega_records[i].hops;
+      record.cmps_visited = omega_records[i].cmps;
+      record.dist_1st = omega_records[i].dist_1st;
+      record.dist_start = omega_records[i].dist_start;
+
+      // Copy 7 traversal window statistics
+      if (omega_records[i].traversal_window_stats.size() == 7) {
+        std::copy(omega_records[i].traversal_window_stats.begin(),
+                  omega_records[i].traversal_window_stats.end(),
+                  record.traversal_window_stats.begin());
+      } else {
+        LOG_WARN("Unexpected traversal_window_stats size: %zu (expected 7)",
+                 omega_records[i].traversal_window_stats.size());
+      }
+
+      // Copy collected_node_ids (convert int to node_id_t)
+      record.collected_node_ids.assign(
+          omega_records[i].collected_node_ids.begin(),
+          omega_records[i].collected_node_ids.end());
+
+      record.label = omega_records[i].label;  // Default 0
+
+      collected_records_.push_back(std::move(record));
+    }
+
+    fprintf(stderr, "[DEBUG] Successfully collected %zu training records for query_id=%d\n",
+            record_count, current_query_id_);
+    fflush(stderr);
+    LOG_DEBUG("Collected %zu training records for query_id=%d",
+              record_count, current_query_id_);
+  } else {
+    fprintf(stderr, "[DEBUG] WARNING: No training records collected for query_id=%d\n", current_query_id_);
+    fflush(stderr);
+    LOG_WARN("No training records collected for query_id=%d", current_query_id_);
+  }
+
+  // Destroy OMEGA search context
+  omega_search_destroy(omega_search);
+
+  return 0;
 }
 
-int OmegaStreamer::init(const IndexMeta &imeta, const ailego::Params &params) {
-  params_ = params;
+int OmegaStreamer::dump(const IndexDumper::Pointer &dumper) {
+  LOG_INFO("OmegaStreamer dump");
+
+  // Lock the shared mutex (from HnswStreamer base class)
+  shared_mutex_.lock();
+  AILEGO_DEFER([&]() { shared_mutex_.unlock(); });
 
-  // TODO: Fix design - cannot call protected init method of HnswStreamer
-  // For now, return NotImplemented error
-  LOG_ERROR("OmegaStreamer is not yet fully implemented - wrapper design needs fixing");
-  return IndexError_NotImplemented;
+  // CRITICAL: Set "OmegaSearcher" instead of "HnswSearcher"
+  // This ensures IndexFlow will create OmegaSearcher (with training support)
+  // when the index is loaded from disk
+  meta_.set_searcher("OmegaSearcher", HnswEntity::kRevision, ailego::Params());
 
-  /*
-  // Create underlying HNSW streamer
-  hnsw_streamer_ = std::make_shared<HnswStreamer>();
-  int ret = hnsw_streamer_->init(imeta, params);
+  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
   if (ret != 0) {
-    LOG_ERROR("Failed to initialize HNSW streamer");
+    LOG_ERROR("Failed to serialize meta into dumper.");
     return ret;
   }
 
-  LOG_INFO("OmegaStreamer initialized");
-  return 0;
-  */
+  // Delegate to parent class's entity dump
+  return entity_.dump(dumper);
 }
 
-int OmegaStreamer::cleanup(void) {
-  // Since init returns NotImplemented, cleanup does nothing
-  return 0;
-}
+// Register OmegaStreamer with the factory
+INDEX_FACTORY_REGISTER_STREAMER(OmegaStreamer);
 
 }  // namespace core
 }  // namespace zvec
-
-// TODO: Fix OmegaStreamer design - it tries to call protected methods of HnswStreamer
-// INDEX_FACTORY_REGISTER_STREAMER(zvec::core::OmegaStreamer);
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index 9e54631cb..2ebe6ac85 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -13,129 +13,69 @@
 // limitations under the License.
 #pragma once
 
-#include <zvec/core/framework/index_framework.h>
 #include "../hnsw/hnsw_streamer.h"
+#include <zvec/core/interface/training.h>
+#include <mutex>
+#include <memory>
 
 namespace zvec {
 namespace core {
 
-//! OMEGA Index Streamer - wraps HNSW streamer
-class OmegaStreamer : public IndexStreamer {
+/**
+ * @brief OMEGA Index Streamer
+ *
+ * Inherits from HnswStreamer and overrides dump() to set "OmegaSearcher"
+ * as the searcher type, ensuring that disk-persisted indices will use
+ * OmegaSearcher (with training support) when loaded.
+ *
+ * For in-memory indices, currently delegates to parent HNSW search.
+ * Future: Implement adaptive search with OMEGA C API directly.
+ */
+class OmegaStreamer : public HnswStreamer {
  public:
-  using ContextPointer = IndexStreamer::Context::Pointer;
-
-  OmegaStreamer(void);
-  virtual ~OmegaStreamer(void);
+  OmegaStreamer(void) : HnswStreamer() {}
+  virtual ~OmegaStreamer(void) {}
 
   OmegaStreamer(const OmegaStreamer &streamer) = delete;
   OmegaStreamer &operator=(const OmegaStreamer &streamer) = delete;
 
- protected:
-  //! Initialize Streamer
-  virtual int init(const IndexMeta &imeta,
-                   const ailego::Params &params) override;
-
-  //! Cleanup Streamer
-  virtual int cleanup(void) override;
-
-  // TODO: These methods call protected methods and need to be fixed
-  /*
-  //! Add a vector into index (delegate to HNSW)
-  virtual int add_impl(uint64_t pkey, const void *query,
-                       const IndexQueryMeta &qmeta,
-                       Context::Pointer &context) override {
-    return hnsw_streamer_->add_impl(pkey, query, qmeta, context);
+  // Training mode support (for future implementation)
+  void EnableTrainingMode(bool enable) { training_mode_enabled_ = enable; }
+  void SetCurrentQueryId(int query_id) { current_query_id_ = query_id; }
+  std::vector<core_interface::TrainingRecord> GetTrainingRecords() const {
+    std::lock_guard<std::mutex> lock(training_mutex_);
+    return collected_records_;
   }
-
-  //! Add a vector with id into index (delegate to HNSW)
-  virtual int add_with_id_impl(uint32_t id, const void *query,
-                               const IndexQueryMeta &qmeta,
-                               Context::Pointer &context) override {
-    return hnsw_streamer_->add_with_id_impl(id, query, qmeta, context);
+  void ClearTrainingRecords() {
+    std::lock_guard<std::mutex> lock(training_mutex_);
+    collected_records_.clear();
   }
 
-  //! Similarity search (delegate to HNSW)
+ protected:
+  /**
+   * @brief Override search to potentially use OMEGA adaptive search
+   *
+   * Currently delegates to parent HNSW search.
+   * Future: Implement OMEGA adaptive search with learned early stopping.
+   */
   virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
-                          Context::Pointer &context) const override {
-    return hnsw_streamer_->search_impl(query, qmeta, context);
-  }
+                          Context::Pointer &context) const override;
 
-  //! Similarity search (delegate to HNSW)
   virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                           uint32_t count,
-                          Context::Pointer &context) const override {
-    return hnsw_streamer_->search_impl(query, qmeta, count, context);
-  }
-  */
-
-  // TODO: These methods call protected methods and need to be fixed
-  /*
-  //! Similarity brute force search (delegate to HNSW)
-  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
-                             Context::Pointer &context) const override {
-    return hnsw_streamer_->search_bf_impl(query, qmeta, context);
-  }
-
-  //! Similarity brute force search (delegate to HNSW)
-  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
-                             uint32_t count,
-                             Context::Pointer &context) const override {
-    return hnsw_streamer_->search_bf_impl(query, qmeta, count, context);
-  }
-
-  //! Linear search by primary keys (delegate to HNSW)
-  virtual int search_bf_by_p_keys_impl(
-      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
-      const IndexQueryMeta &qmeta, ContextPointer &context) const override {
-    return hnsw_streamer_->search_bf_by_p_keys_impl(query, p_keys, qmeta,
-                                                     context);
-  }
+                          Context::Pointer &context) const override;
 
-  //! Linear search by primary keys (delegate to HNSW)
-  virtual int search_bf_by_p_keys_impl(
-      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
-      const IndexQueryMeta &qmeta, uint32_t count,
-      ContextPointer &context) const override {
-    return hnsw_streamer_->search_bf_by_p_keys_impl(query, p_keys, qmeta,
-                                                     count, context);
-  }
-  */
-
-  // TODO: These methods call protected methods and need to be fixed
-  /*
-  //! Remove a vector from index (delegate to HNSW)
-  virtual int remove_impl(uint64_t pkey, Context::Pointer &context) override {
-    return hnsw_streamer_->remove_impl(pkey, context);
-  }
-
-  //! Fetch vector by key (delegate to HNSW)
-  virtual const void *get_vector(uint64_t key) const override {
-    return hnsw_streamer_->get_vector(key);
-  }
-
-  //! Retrieve statistics (delegate to HNSW)
-  virtual const Stats &stats(void) const override {
-    return hnsw_streamer_->stats();
-  }
-
-  //! Retrieve meta of index (delegate to HNSW)
-  virtual const IndexMeta &meta(void) const override {
-    return hnsw_streamer_->meta();
-  }
-
-  //! Retrieve params of index - NOTE: Not overriding base class method
-  const ailego::Params &params(void) const {
-    return params_;
-  }
-
-  virtual void print_debug_info() override {
-    hnsw_streamer_->print_debug_info();
-  }
-  */
+  /**
+   * @brief Override dump to set "OmegaSearcher" instead of "HnswSearcher"
+   */
+  virtual int dump(const IndexDumper::Pointer &dumper) override;
 
  private:
-  std::shared_ptr<HnswStreamer> hnsw_streamer_;
-  ailego::Params params_{};
+  // Training mode state (for future implementation)
+  bool training_mode_enabled_{false};
+  int current_query_id_{0};
+  mutable std::mutex training_mutex_{};
+  mutable std::vector<core_interface::TrainingRecord> collected_records_{};
 };
 
 }  // namespace core
diff --git a/src/core/interface/CMakeLists.txt b/src/core/interface/CMakeLists.txt
index 82b4fa78a..2c1cdab3f 100644
--- a/src/core/interface/CMakeLists.txt
+++ b/src/core/interface/CMakeLists.txt
@@ -4,7 +4,7 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
 cc_library(
         NAME core_interface STATIC STRICT ALWAYS_LINK
         SRCS *.cc indexes/*.cc
-        INCS . ${PROJECT_ROOT_DIR}/src/ ${PROJECT_ROOT_DIR}/src/core
+        INCS . ${PROJECT_ROOT_DIR}/src/include ${PROJECT_ROOT_DIR}/src/ ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/thirdparty/omega/include
         LIBS zvec_ailego core_framework sparsehash magic_enum
         VERSION "${PROXIMA_ZVEC_VERSION}"
 )
diff --git a/src/core/interface/index.cc b/src/core/interface/index.cc
index aca409493..2b1a8daef 100644
--- a/src/core/interface/index.cc
+++ b/src/core/interface/index.cc
@@ -16,7 +16,8 @@
 #include <zvec/core/framework/index_error.h>
 #include <zvec/core/framework/index_storage.h>
 #include <zvec/core/interface/index.h>
-#include "mixed_reducer/mixed_reducer_params.h"
+#include "../mixed_reducer/mixed_streamer_reducer.h"
+#include "../mixed_reducer/mixed_reducer_params.h"
 
 namespace zvec::core_interface {
 
@@ -233,6 +234,9 @@ int Index::Init(const BaseIndexParam &param) {
 
 
 int Index::Open(const std::string &file_path, StorageOptions storage_options) {
+  // Store the file path for later use (e.g., in Merge for dump/reload)
+  file_path_ = file_path;
+
   ailego::Params storage_params;
   // storage_params.set("proxima.mmap_file.storage.memory_warmup", true);
   // storage_params.set("proxima.mmap_file.storage.segment_meta_capacity",
@@ -278,11 +282,20 @@ int Index::Open(const std::string &file_path, StorageOptions storage_options) {
               core::IndexError::What(ret));
     return core::IndexError_Runtime;
   }
+
+  fprintf(stderr, "[DEBUG] Index::Open: Before streamer_->open(), streamer_=%p, builder_=%p\n",
+          (void*)streamer_.get(), (void*)builder_.get());
+  fflush(stderr);
+
   if (streamer_ == nullptr || streamer_->open(storage_) != 0) {
     LOG_ERROR("Failed to open streamer, path: %s", file_path.c_str());
     return core::IndexError_Runtime;
   }
 
+  fprintf(stderr, "[DEBUG] Index::Open: After streamer_->open(), streamer_=%p, builder_=%p\n",
+          (void*)streamer_.get(), (void*)builder_.get());
+  fflush(stderr);
+
   // converter/reformer/metric are created in IndexFactory::CreateIndex
   // TODO: init
 
@@ -769,6 +782,20 @@ int Index::Merge(const std::vector<Index::Pointer> &indexes,
     LOG_ERROR("Failed to init reducer");
     return core::IndexError_Runtime;
   }
+
+  fprintf(stderr, "[DEBUG] Index::Merge: builder_=%p, streamer_=%p\n",
+          (void*)builder_.get(), (void*)streamer_.get());
+  fflush(stderr);
+
+  // Set storage and file path for dump/reload operations
+  auto* mixed_reducer = dynamic_cast<core::MixedStreamerReducer*>(reducer.get());
+  if (mixed_reducer != nullptr) {
+    mixed_reducer->set_storage(storage_, file_path_);
+    fprintf(stderr, "[DEBUG] Index::Merge: set storage and file_path=%s for reducer\n",
+            file_path_.c_str());
+    fflush(stderr);
+  }
+
   if (reducer->set_target_streamer_wiht_info(builder_, streamer_, converter_,
                                              reformer_,
                                              input_vector_meta_) != 0) {
@@ -788,6 +815,16 @@ int Index::Merge(const std::vector<Index::Pointer> &indexes,
     return core::IndexError_Runtime;
   }
   is_trained_ = true;
+
+  // Generic training support: Check if this index supports training capability
+  // The actual training orchestration happens at the db layer (Segment level)
+  auto* training_capable = this->GetTrainingCapability();
+  if (training_capable != nullptr) {
+    fprintf(stderr, "[DEBUG] Index::Merge: Index has training capability, training should be triggered at db layer\n");
+    fflush(stderr);
+    LOG_INFO("Index merge completed for trainable index, training can now be performed");
+  }
+
   return 0;
 }
 
diff --git a/src/core/interface/index_factory.cc b/src/core/interface/index_factory.cc
index 699c9ce0f..50c0f973b 100644
--- a/src/core/interface/index_factory.cc
+++ b/src/core/interface/index_factory.cc
@@ -43,6 +43,8 @@ Index::Pointer IndexFactory::CreateAndInitIndex(const BaseIndexParam &param) {
     ptr = std::make_shared<FlatIndex>();
   } else if (param.index_type == IndexType::kHNSW) {
     ptr = std::make_shared<HNSWIndex>();
+  } else if (param.index_type == IndexType::kOMEGA) {
+    ptr = std::make_shared<OmegaIndex>();
   } else if (param.index_type == IndexType::kIVF) {
     ptr = std::make_shared<IVFIndex>();
   } else {
diff --git a/src/core/interface/indexes/omega_index.cc b/src/core/interface/indexes/omega_index.cc
new file mode 100644
index 000000000..29d6299d2
--- /dev/null
+++ b/src/core/interface/indexes/omega_index.cc
@@ -0,0 +1,192 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <zvec/core/interface/index.h>
+#include "algorithm/omega/omega_streamer.h"
+#include "algorithm/omega/omega_params.h"
+#include "algorithm/hnsw/hnsw_params.h"
+#include <zvec/core/framework/index_factory.h>
+
+namespace zvec::core_interface {
+
+// OmegaIndex uses OmegaStreamer which provides OMEGA adaptive search
+int OmegaIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
+  fprintf(stderr, "[DEBUG] OmegaIndex::CreateAndInitStreamer CALLED!\n");
+  fflush(stderr);
+
+  // First call parent to set up all parameters and create basic streamer
+  int ret = HNSWIndex::CreateAndInitStreamer(param);
+  if (ret != core::IndexError_Success) {
+    fprintf(stderr, "[DEBUG] OmegaIndex: parent CreateAndInitStreamer failed with ret=%d\n", ret);
+    fflush(stderr);
+    return ret;
+  }
+
+  fprintf(stderr, "[DEBUG] OmegaIndex: creating HnswBuilder...\n");
+  fflush(stderr);
+
+  // Create HnswBuilder for OMEGA (needed for Merge to build the graph)
+  builder_ = core::IndexFactory::CreateBuilder("HnswBuilder");
+  if (ailego_unlikely(!builder_)) {
+    fprintf(stderr, "[DEBUG] OmegaIndex: FAILED to create HnswBuilder!\n");
+    fflush(stderr);
+    LOG_ERROR("Failed to create HnswBuilder for OMEGA");
+    return core::IndexError_Runtime;
+  }
+  fprintf(stderr, "[DEBUG] OmegaIndex: HnswBuilder created successfully, initializing...\n");
+  fflush(stderr);
+
+  if (ailego_unlikely(builder_->init(proxima_index_meta_, proxima_index_params_) != 0)) {
+    fprintf(stderr, "[DEBUG] OmegaIndex: FAILED to init HnswBuilder!\n");
+    fflush(stderr);
+    LOG_ERROR("Failed to init HnswBuilder for OMEGA");
+    return core::IndexError_Runtime;
+  }
+  fprintf(stderr, "[DEBUG] OmegaIndex: HnswBuilder initialized successfully!\n");
+  fflush(stderr);
+
+  // Now replace the HnswStreamer with OmegaStreamer
+  // Save the current meta and params before replacing streamer
+  core::IndexMeta saved_meta = proxima_index_meta_;
+  ailego::Params saved_params = proxima_index_params_;
+
+  // Create OmegaStreamer
+  fprintf(stderr, "[DEBUG] OmegaIndex: Creating OmegaStreamer...\n");
+  fflush(stderr);
+  streamer_ = core::IndexFactory::CreateStreamer("OmegaStreamer");
+  if (ailego_unlikely(!streamer_)) {
+    fprintf(stderr, "[DEBUG] OmegaIndex: FAILED to create OmegaStreamer!\n");
+    fflush(stderr);
+    LOG_ERROR("Failed to create OmegaStreamer");
+    return core::IndexError_Runtime;
+  }
+  fprintf(stderr, "[DEBUG] OmegaIndex: OmegaStreamer created successfully, streamer_=%p\n", (void*)streamer_.get());
+  fflush(stderr);
+
+  // Initialize OmegaStreamer with the same parameters
+  fprintf(stderr, "[DEBUG] OmegaIndex: Initializing OmegaStreamer...\n");
+  fflush(stderr);
+  if (ailego_unlikely(
+          streamer_->init(saved_meta, saved_params) != 0)) {
+    fprintf(stderr, "[DEBUG] OmegaIndex: FAILED to init OmegaStreamer!\n");
+    fflush(stderr);
+    LOG_ERROR("Failed to init OmegaStreamer");
+    return core::IndexError_Runtime;
+  }
+  fprintf(stderr, "[DEBUG] OmegaIndex: OmegaStreamer initialized successfully!\n");
+  fflush(stderr);
+
+  // CRITICAL: Set "OmegaSearcher" in metadata for disk-persisted indices
+  // This ensures that when the index is saved and loaded later,
+  // IndexFlow will create OmegaSearcher instead of HnswSearcher
+  proxima_index_meta_.set_searcher("OmegaSearcher", 0, ailego::Params());
+
+  return core::IndexError_Success;
+}
+
+
+zvec::Status OmegaIndex::EnableTrainingMode(bool enable) {
+  fprintf(stderr, "[DEBUG] OmegaIndex::EnableTrainingMode called with enable=%d\n", enable);
+  fflush(stderr);
+
+  LOG_INFO("OmegaIndex::EnableTrainingMode called with enable=%d", enable);
+  training_mode_enabled_ = enable;
+
+  // Delegate to OmegaStreamer if available
+  if (streamer_) {
+    fprintf(stderr, "[DEBUG] OmegaIndex: streamer_ exists\n");
+    fflush(stderr);
+
+    LOG_INFO("OmegaIndex: streamer_ exists, attempting dynamic_cast to OmegaStreamer");
+    auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
+    if (omega_streamer) {
+      fprintf(stderr, "[DEBUG] OmegaIndex: Successfully cast to OmegaStreamer\n");
+      fflush(stderr);
+
+      LOG_INFO("OmegaIndex: Successfully cast to OmegaStreamer, calling EnableTrainingMode");
+      omega_streamer->EnableTrainingMode(enable);
+      return zvec::Status::OK();
+    } else {
+      fprintf(stderr, "[DEBUG] OmegaIndex: Failed to cast to OmegaStreamer\n");
+      fflush(stderr);
+
+      LOG_WARN("OmegaIndex: Failed to cast streamer_ to OmegaStreamer");
+    }
+  } else {
+    fprintf(stderr, "[DEBUG] OmegaIndex: streamer_ is null\n");
+    fflush(stderr);
+
+    LOG_WARN("OmegaIndex: streamer_ is null");
+  }
+
+  return zvec::Status::OK();
+}
+
+void OmegaIndex::SetCurrentQueryId(int query_id) {
+  current_query_id_ = query_id;
+
+  // Delegate to OmegaStreamer if available
+  if (streamer_) {
+    auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
+    if (omega_streamer) {
+      omega_streamer->SetCurrentQueryId(query_id);
+    }
+  }
+}
+
+std::vector<TrainingRecord> OmegaIndex::GetTrainingRecords() const {
+  // Get training records from OmegaStreamer
+  if (streamer_) {
+    auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
+    if (omega_streamer) {
+      return omega_streamer->GetTrainingRecords();
+    }
+  }
+  return {};
+}
+
+void OmegaIndex::ClearTrainingRecords() {
+  // Clear training records in OmegaStreamer
+  if (streamer_) {
+    auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
+    if (omega_streamer) {
+      omega_streamer->ClearTrainingRecords();
+    }
+  }
+}
+
+int OmegaIndex::_prepare_for_search(
+    const VectorData &vector_data,
+    const BaseIndexQueryParam::Pointer &search_param,
+    core::IndexContext::Pointer &context) {
+  // First call parent for base HNSW parameter handling (ef_search, etc.)
+  int ret = HNSWIndex::_prepare_for_search(vector_data, search_param, context);
+  if (ret != 0) {
+    return ret;
+  }
+
+  // Extract OMEGA-specific parameter (target_recall)
+  const auto &omega_search_param =
+      std::dynamic_pointer_cast<OmegaQueryParam>(search_param);
+  if (omega_search_param) {
+    ailego::Params params;
+    params.set(core::PARAM_OMEGA_SEARCHER_TARGET_RECALL,
+               omega_search_param->target_recall);
+    context->update(params);
+  }
+
+  return 0;
+}
+
+}  // namespace zvec::core_interface
diff --git a/src/core/mixed_reducer/mixed_streamer_reducer.cc b/src/core/mixed_reducer/mixed_streamer_reducer.cc
index b5e241bb0..58b3c8e3e 100644
--- a/src/core/mixed_reducer/mixed_streamer_reducer.cc
+++ b/src/core/mixed_reducer/mixed_streamer_reducer.cc
@@ -170,6 +170,9 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
 
     for (size_t i = 0; i < streamers_.size(); i++) {
       read_results[i] = read_vec(i, filter, id_offset, &next_id);
+      fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: read_vec(%zu) returned %d, next_id=%u\n",
+              i, read_results[i], next_id);
+      fflush(stderr);
       id_offset += streamers_[i]->create_provider()->count();
     }
 
@@ -194,8 +197,110 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
 
   stats_.set_reduced_costtime(timer.seconds());
   state_ = STATE_REDUCE;
+
+  fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: target_builder_=%p\n",
+          target_builder_.get());
+  fflush(stderr);
+
   if (target_builder_ != nullptr) {
+    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: calling IndexBuild()\n");
+    fflush(stderr);
     IndexBuild();
+    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: IndexBuild() completed\n");
+    fflush(stderr);
+
+    // CRITICAL FIX: After IndexBuild(), the builder's entity has the graph data (1500 docs),
+    // but the streamer's entity is still empty (0 docs). They are separate objects!
+    // Solution: Dump builder to storage, then close and reopen streamer to reload the data.
+
+    if (target_storage_ == nullptr) {
+      LOG_ERROR("target_storage_ is null, cannot dump/reload");
+      return IndexError_Runtime;
+    }
+
+    if (target_file_path_.empty()) {
+      LOG_ERROR("target_file_path_ is empty, cannot dump/reload");
+      return IndexError_Runtime;
+    }
+
+    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: dumping builder to storage at path=%s\n",
+            target_file_path_.c_str());
+    fflush(stderr);
+
+    // Create a FileDumper that writes to the file
+    auto dumper = IndexFactory::CreateDumper("FileDumper");
+    if (dumper == nullptr) {
+      LOG_ERROR("Failed to create dumper");
+      return IndexError_Runtime;
+    }
+
+    // Initialize the dumper with the file path
+    int ret = dumper->create(target_file_path_);
+    if (ret != 0) {
+      LOG_ERROR("Failed to create dumper at path=%s, ret=%d", target_file_path_.c_str(), ret);
+      return ret;
+    }
+
+    // Dump the builder's entity to the file
+    ret = target_builder_->dump(dumper);
+    if (ret != 0) {
+      LOG_ERROR("Failed to dump builder, ret=%d", ret);
+      return ret;
+    }
+
+    // Close the dumper to flush data
+    ret = dumper->close();
+    if (ret != 0) {
+      LOG_ERROR("Failed to close dumper, ret=%d", ret);
+      return ret;
+    }
+
+    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: builder dumped, now closing streamer\n");
+    fflush(stderr);
+
+    // Close the streamer
+    ret = target_streamer_->close();
+    if (ret != 0) {
+      LOG_ERROR("Failed to close streamer, ret=%d", ret);
+      return ret;
+    }
+
+    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: streamer closed, now closing storage\n");
+    fflush(stderr);
+
+    // Close the storage before reopening it
+    ret = target_storage_->close();
+    if (ret != 0) {
+      LOG_ERROR("Failed to close storage, ret=%d", ret);
+      return ret;
+    }
+
+    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: storage closed, now reopening storage\n");
+    fflush(stderr);
+
+    // Reopen the storage from the file - this is critical!
+    // The storage needs to reload the data that was just dumped to the file
+    ret = target_storage_->open(target_file_path_, false);
+    if (ret != 0) {
+      LOG_ERROR("Failed to reopen storage, ret=%d", ret);
+      return ret;
+    }
+
+    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: storage reopened, now reopening streamer\n");
+    fflush(stderr);
+
+    // Now reopen the streamer with the refreshed storage
+    ret = target_streamer_->open(target_storage_);
+    if (ret != 0) {
+      LOG_ERROR("Failed to reopen streamer, ret=%d", ret);
+      return ret;
+    }
+
+    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: streamer reopened successfully\n");
+    fflush(stderr);
+  } else {
+    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: target_builder_ is null, skipping IndexBuild()\n");
+    fflush(stderr);
   }
 
   LOG_INFO("End brute force reduce. cost time: [%zu]s",
@@ -509,6 +614,9 @@ void MixedStreamerReducer::PushToDocCache(const IndexQueryMeta &meta,
 }
 
 int MixedStreamerReducer::IndexBuild() {
+  fprintf(stderr, "[DEBUG] IndexBuild: doc_cache_ size=%zu\n", doc_cache_.size());
+  fflush(stderr);
+
   const bool need_convert = !is_target_and_source_same_reformer_ &&
                             target_streamer_reformer_ != nullptr;
   IndexHolder::Pointer target_holder;
@@ -568,8 +676,18 @@ int MixedStreamerReducer::IndexBuild() {
                                             target_holder);
     target_holder = target_builder_converter_->result();
   }
+
+  fprintf(stderr, "[DEBUG] IndexBuild: calling target_builder_->train()\n");
+  fflush(stderr);
   target_builder_->train(target_holder);
+
+  fprintf(stderr, "[DEBUG] IndexBuild: calling target_builder_->build()\n");
+  fflush(stderr);
   target_builder_->build(target_holder);
+
+  fprintf(stderr, "[DEBUG] IndexBuild: build() completed\n");
+  fflush(stderr);
+
   return 0;
 }
 
diff --git a/src/core/mixed_reducer/mixed_streamer_reducer.h b/src/core/mixed_reducer/mixed_streamer_reducer.h
index ec4c62406..0a203cfd0 100644
--- a/src/core/mixed_reducer/mixed_streamer_reducer.h
+++ b/src/core/mixed_reducer/mixed_streamer_reducer.h
@@ -51,6 +51,12 @@ class MixedStreamerReducer : public IndexStreamerReducer {
       const IndexConverter::Pointer converter,
       const IndexReformer::Pointer reformer,
       const IndexQueryMeta &original_query_meta) override;
+
+  // Set the storage and file path for dump/reload operations
+  void set_storage(const IndexStorage::Pointer &storage, const std::string &file_path) {
+    target_storage_ = storage;
+    target_file_path_ = file_path;
+  }
   // feed_streamer
   int feed_streamer_with_reformer(
       IndexStreamer::Pointer streamer,
@@ -105,6 +111,8 @@ class MixedStreamerReducer : public IndexStreamerReducer {
 
   IndexBuilder::Pointer target_builder_{nullptr};
   IndexConverter::Pointer target_builder_converter_{nullptr};
+  IndexStorage::Pointer target_storage_{nullptr};
+  std::string target_file_path_;
   std::mutex mutex_{};
   std::vector<std::pair<uint64_t, std::string>> doc_cache_;
   const uint64_t kInvalidKey = std::numeric_limits<uint64_t>::max();
diff --git a/src/db/collection.cc b/src/db/collection.cc
index 6704f0f01..5e0daf718 100644
--- a/src/db/collection.cc
+++ b/src/db/collection.cc
@@ -817,7 +817,58 @@ Status CollectionImpl::Optimize(const OptimizeOptions &options) {
     return Status::OK();
   }
 
-  // build segment compact task
+  // Step 1: Build vector indexes if not ready
+  // This ensures indexes are built even for single segments that won't be compacted
+  std::vector<SegmentTask::Ptr> index_build_tasks;
+  for (auto &segment : persist_segments) {
+    if (!segment->all_vector_index_ready()) {
+      // Build all vector indexes for this segment
+      index_build_tasks.push_back(SegmentTask::CreateCreateVectorIndexTask(
+          CreateVectorIndexTask{segment, "", nullptr, options.concurrency_}));
+    }
+  }
+
+  if (!index_build_tasks.empty()) {
+    LOG_INFO("Building vector indexes for %zu segments", index_build_tasks.size());
+    auto s = execute_tasks(index_build_tasks);
+    CHECK_RETURN_STATUS(s);
+
+    // Update segment metadata
+    std::lock_guard write_lock(write_mtx_);
+    Version new_version = version_manager_->get_current_version();
+
+    for (auto &task : index_build_tasks) {
+      auto task_info = task->task_info();
+      if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {
+        auto create_index_task = std::get<CreateVectorIndexTask>(task_info);
+        s = new_version.update_persisted_segment_meta(
+            create_index_task.output_segment_meta_);
+        CHECK_RETURN_STATUS(s);
+      }
+    }
+
+    s = version_manager_->apply(new_version);
+    CHECK_RETURN_STATUS(s);
+    s = version_manager_->flush();
+    CHECK_RETURN_STATUS(s);
+
+    // Reload indexes in segments
+    for (auto &task : index_build_tasks) {
+      auto task_info = task->task_info();
+      if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {
+        auto create_index_task = std::get<CreateVectorIndexTask>(task_info);
+        s = create_index_task.input_segment_->reload_vector_index(
+            *schema_, create_index_task.output_segment_meta_,
+            create_index_task.output_vector_indexers_,
+            create_index_task.output_quant_vector_indexers_);
+        CHECK_RETURN_STATUS(s);
+      }
+    }
+
+    LOG_INFO("Completed building vector indexes");
+  }
+
+  // Step 2: build segment compact task
   auto delete_store_clone = delete_store_->clone();
   auto tasks =
       build_compact_task(schema_, persist_segments, options.concurrency_,
diff --git a/src/db/index/column/vector_column/engine_helper.hpp b/src/db/index/column/vector_column/engine_helper.hpp
index de1cfc6c0..dad423d03 100644
--- a/src/db/index/column/vector_column/engine_helper.hpp
+++ b/src/db/index/column/vector_column/engine_helper.hpp
@@ -162,6 +162,33 @@ class ProximaEngineHelper {
         return std::move(hnsw_query_param);
       }
 
+      case IndexType::OMEGA: {
+        // OMEGA uses extended query params with target_recall
+        auto omega_query_param_result =
+            _build_common_query_param<core_interface::OmegaQueryParam>(
+                query_params);
+        if (!omega_query_param_result.has_value()) {
+          return tl::make_unexpected(Status::InvalidArgument(
+              "failed to build query param: " +
+              omega_query_param_result.error().message()));
+        }
+        auto &omega_query_param = omega_query_param_result.value();
+        if (query_params.query_params) {
+          // Try to cast to OmegaQueryParams first
+          if (auto* db_omega_query_params = dynamic_cast<const OmegaQueryParams *>(
+              query_params.query_params.get())) {
+            omega_query_param->ef_search = db_omega_query_params->ef();
+            omega_query_param->target_recall = db_omega_query_params->target_recall();
+          } else if (auto* db_hnsw_query_params = dynamic_cast<const HnswQueryParams *>(
+              query_params.query_params.get())) {
+            // Fallback to HnswQueryParams (backward compatibility)
+            omega_query_param->ef_search = db_hnsw_query_params->ef();
+            // target_recall will use default value (0.95f)
+          }
+        }
+        return std::move(omega_query_param);
+      }
+
       case IndexType::IVF: {
         auto ivf_query_param_result =
             _build_common_query_param<core_interface::IVFQueryParam>(
@@ -322,6 +349,46 @@ class ProximaEngineHelper {
         return index_param_builder->Build();
       }
 
+      case IndexType::OMEGA: {
+        fprintf(stderr, "[DEBUG] convert_to_engine_index_param: OMEGA case entered!\n");
+        fflush(stderr);
+        // OMEGA uses its own index type at core_interface level
+        auto index_param_builder_result =
+            _build_common_index_param<OmegaIndexParams,
+                                      core_interface::HNSWIndexParamBuilder>(
+                field_schema);
+        if (!index_param_builder_result.has_value()) {
+          return tl::make_unexpected(Status::InvalidArgument(
+              "failed to build index param: " +
+              index_param_builder_result.error().message()));
+        }
+        auto index_param_builder = index_param_builder_result.value();
+
+        auto db_index_params = dynamic_cast<const OmegaIndexParams *>(
+            field_schema.index_params().get());
+        index_param_builder->WithM(db_index_params->m());
+        index_param_builder->WithEFConstruction(
+            db_index_params->ef_construction());
+
+        // Override index_type to kOMEGA
+        auto hnsw_param = index_param_builder->Build();
+        fprintf(stderr, "[DEBUG] convert_to_engine_index_param: Before override, index_type=%d\n",
+                static_cast<int>(hnsw_param->index_type));
+        fflush(stderr);
+        hnsw_param->index_type = core_interface::IndexType::kOMEGA;
+        fprintf(stderr, "[DEBUG] convert_to_engine_index_param: After override, index_type=%d\n",
+                static_cast<int>(hnsw_param->index_type));
+        fprintf(stderr, "[DEBUG] convert_to_engine_index_param: kOMEGA enum value=%d\n",
+                static_cast<int>(core_interface::IndexType::kOMEGA));
+        fflush(stderr);
+
+        // TODO: Store OMEGA-specific params (min_vector_threshold, model_dir)
+        // in the params field for now
+        // These will be used by the IndexFlow when creating the OmegaSearcher
+
+        return hnsw_param;
+      }
+
       case IndexType::IVF: {
         auto index_param_builder_result = _build_common_index_param<
             IVFIndexParams, core_interface::IVFIndexParamBuilder>(field_schema);
diff --git a/src/db/index/column/vector_column/vector_column_indexer.cc b/src/db/index/column/vector_column/vector_column_indexer.cc
index 1859e2490..c9a395872 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.cc
+++ b/src/db/index/column/vector_column/vector_column_indexer.cc
@@ -36,6 +36,20 @@ Status VectorColumnIndexer::Open(
 
 Status VectorColumnIndexer::CreateProximaIndex(
     const vector_column_params::ReadOptions &read_options) {
+  fprintf(stderr, "[DEBUG] CreateProximaIndex: field_schema_.name()=%s\n",
+          field_schema_.name().c_str());
+  fflush(stderr);
+
+  // CRITICAL DEBUG: Check field_schema_.index_params()->type() BEFORE conversion
+  if (field_schema_.index_params()) {
+    fprintf(stderr, "[DEBUG] CreateProximaIndex: field_schema_.index_params()->type()=%d (BEFORE conversion)\n",
+            static_cast<int>(field_schema_.index_params()->type()));
+    fflush(stderr);
+  } else {
+    fprintf(stderr, "[DEBUG] CreateProximaIndex: field_schema_.index_params() is NULL!\n");
+    fflush(stderr);
+  }
+
   auto index_param_result =
       ProximaEngineHelper::convert_to_engine_index_param(field_schema_);
   if (!index_param_result.has_value()) {
@@ -43,11 +57,20 @@ Status VectorColumnIndexer::CreateProximaIndex(
   }
   auto &index_param = index_param_result.value();
 
+  fprintf(stderr, "[DEBUG] CreateProximaIndex: index_param->index_type=%d (AFTER conversion)\n",
+          static_cast<int>(index_param->index_type));
+  fflush(stderr);
+
+  // Use IndexFactory for all index types (including OMEGA)
   index = core_interface::IndexFactory::CreateAndInitIndex(*index_param);
   if (index == nullptr) {
     return Status::InternalError("Failed to create index");
   }
 
+  fprintf(stderr, "[DEBUG] CreateProximaIndex: created index type=%s\n",
+          typeid(*index).name());
+  fflush(stderr);
+
   auto storage_type =
       read_options.use_mmap
           ? core_interface::StorageOptions::StorageType::kMMAP
@@ -108,6 +131,10 @@ Status VectorColumnIndexer::Merge(
     return Status::InvalidArgument("Index not opened");
   }
 
+  fprintf(stderr, "[DEBUG] VectorColumnIndexer::Merge: BEFORE merge, index type=%s\n",
+          typeid(*index).name());
+  fflush(stderr);
+
   if (indexers.empty()) {
     return Status::OK();
   }
@@ -118,6 +145,9 @@ Status VectorColumnIndexer::Merge(
     if (indexer->index_file_path() == this->index_file_path()) {
       continue;
     }
+    fprintf(stderr, "[DEBUG] VectorColumnIndexer::Merge: source indexer type=%s\n",
+            typeid(*indexer->index).name());
+    fflush(stderr);
     engine_indexers.push_back(indexer->index);
   }
   auto engine_filter =
@@ -130,6 +160,11 @@ Status VectorColumnIndexer::Merge(
                    {merge_options.write_concurrency, merge_options.pool})) {
     return Status::InternalError("Failed to merge index");
   }
+
+  fprintf(stderr, "[DEBUG] VectorColumnIndexer::Merge: AFTER merge, index type=%s\n",
+          typeid(*index).name());
+  fflush(stderr);
+
   return Status::OK();
 }
 
@@ -167,10 +202,27 @@ Result<vector_column_params::VectorDataBuffer> VectorColumnIndexer::Fetch(
 Result<IndexResults::Ptr> VectorColumnIndexer::Search(
     const vector_column_params::VectorData &vector_data,
     const vector_column_params::QueryParams &query_params) {
+  fprintf(stderr, "[DEBUG] VectorColumnIndexer::Search called, index=%p, training_mode_enabled_=%d\n",
+          (void*)index.get(), training_mode_enabled_);
+  fflush(stderr);
+
   if (index == nullptr) {
+    fprintf(stderr, "[DEBUG] VectorColumnIndexer::Search: index is NULL!\n");
+    fflush(stderr);
     return tl::make_unexpected(Status::InvalidArgument("Index not opened"));
   }
 
+  fprintf(stderr, "[DEBUG] VectorColumnIndexer::Search: index doc_count=%u\n",
+          index->GetDocCount());
+  fflush(stderr);
+
+  // Set query_id before search if training mode is enabled
+  if (training_mode_enabled_) {
+    if (auto* training_capable = index->GetTrainingCapability()) {
+      training_capable->SetCurrentQueryId(current_query_id_);
+    }
+  }
+
   auto engine_vector_data =
       ProximaEngineHelper::convert_to_engine_vector(vector_data, is_sparse_);
   core_interface::SearchResult search_result;
@@ -198,6 +250,16 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
         Status::InternalError("Failed to search vector"));
   }
 
+  // Collect training records after search if training mode is enabled
+  if (training_mode_enabled_) {
+    std::lock_guard<std::mutex> lock(training_mutex_);
+    if (auto* training_capable = index->GetTrainingCapability()) {
+      auto records = training_capable->GetTrainingRecords();
+      collected_records_.insert(collected_records_.end(),
+                               records.begin(), records.end());
+    }
+  }
+
   auto result = std::make_shared<VectorIndexResults>(
       is_sparse_, std::move(search_result.doc_list_),
       std::move(search_result.reverted_vector_list_),
@@ -205,4 +267,83 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
   return result;
 }
 
+// Training mode method implementations
+Status VectorColumnIndexer::EnableTrainingMode(bool enable) {
+  fprintf(stderr, "[DEBUG] VectorColumnIndexer::EnableTrainingMode called with enable=%d\n", enable);
+  fflush(stderr);
+
+  std::lock_guard<std::mutex> lock(training_mutex_);
+  training_mode_enabled_ = enable;
+
+  // Propagate to underlying index if it exists and supports training
+  if (index != nullptr) {
+    fprintf(stderr, "[DEBUG] VectorColumnIndexer: index is not null, type_name=%s\n",
+            typeid(*index).name());
+    fflush(stderr);
+
+    if (auto* training_capable = index->GetTrainingCapability()) {
+      fprintf(stderr, "[DEBUG] VectorColumnIndexer: GetTrainingCapability returned non-null\n");
+      fflush(stderr);
+      return training_capable->EnableTrainingMode(enable);
+    } else {
+      fprintf(stderr, "[DEBUG] VectorColumnIndexer: GetTrainingCapability returned null (index is type=%s)\n",
+              typeid(*index).name());
+      fflush(stderr);
+    }
+  } else {
+    fprintf(stderr, "[DEBUG] VectorColumnIndexer: index is null\n");
+    fflush(stderr);
+  }
+
+  return Status::OK();
+}
+
+void VectorColumnIndexer::SetCurrentQueryId(int query_id) {
+  current_query_id_ = query_id;
+
+  // Propagate to underlying index if it exists and supports training
+  if (index != nullptr) {
+    if (auto* training_capable = index->GetTrainingCapability()) {
+      training_capable->SetCurrentQueryId(query_id);
+    }
+  }
+}
+
+std::vector<core_interface::TrainingRecord> VectorColumnIndexer::GetTrainingRecords() const {
+  std::lock_guard<std::mutex> lock(training_mutex_);
+
+  // Get records from underlying index if it exists and supports training
+  if (index != nullptr) {
+    if (auto* training_capable = index->GetTrainingCapability()) {
+      auto index_records = training_capable->GetTrainingRecords();
+
+      // Merge with local collected records
+      std::vector<core_interface::TrainingRecord> all_records = collected_records_;
+      all_records.insert(all_records.end(), index_records.begin(), index_records.end());
+      return all_records;
+    }
+  }
+
+  return collected_records_;
+}
+
+void VectorColumnIndexer::ClearTrainingRecords() {
+  std::lock_guard<std::mutex> lock(training_mutex_);
+  collected_records_.clear();
+
+  // Propagate to underlying index if it exists and supports training
+  if (index != nullptr) {
+    if (auto* training_capable = index->GetTrainingCapability()) {
+      training_capable->ClearTrainingRecords();
+    }
+  }
+}
+
+core_interface::ITrainingCapable* VectorColumnIndexer::GetTrainingCapability() const {
+  if (index != nullptr) {
+    return index->GetTrainingCapability();
+  }
+  return nullptr;
+}
+
 }  // namespace zvec
diff --git a/src/db/index/column/vector_column/vector_column_indexer.h b/src/db/index/column/vector_column/vector_column_indexer.h
index 80766e1d6..9f9f93214 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.h
+++ b/src/db/index/column/vector_column/vector_column_indexer.h
@@ -15,11 +15,13 @@
 #include <string>
 #include <utility>
 #include <variant>
+#include <mutex>
 #include <ailego/parallel/lock.h>
 #include <zvec/ailego/pattern/expected.hpp>
 #include <zvec/ailego/utility/string_helper.h>
 #include <zvec/core/interface/index.h>
 #include <zvec/core/interface/index_param.h>
+#include <zvec/core/interface/training.h>
 #include <zvec/db/schema.h>
 #include <zvec/db/status.h>
 #include "db/common/constants.h"
@@ -88,6 +90,52 @@ class VectorColumnIndexer {
   // Result<VectorDataset> BatchFetch(const std::vector<uint32_t> &doc_ids)
   // const;
 
+ public:
+  // OMEGA Training Mode Support
+  /**
+   * @brief Check if the underlying index supports training capability.
+   *
+   * @return Pointer to ITrainingCapable interface if supported, nullptr otherwise
+   */
+  core_interface::ITrainingCapable* GetTrainingCapability() const;
+
+  /**
+   * @brief Enable or disable training mode for collecting training features.
+   *
+   * Propagates the training mode setting to the underlying index.
+   * When enabled, searches will collect training features.
+   *
+   * @param enable True to enable training mode, false to disable
+   * @return Status indicating success or failure
+   */
+  Status EnableTrainingMode(bool enable);
+
+  /**
+   * @brief Set the query ID for the next search operation.
+   *
+   * Must be called before Search() when training mode is enabled.
+   * The query_id will be propagated to the underlying index.
+   *
+   * @param query_id Unique identifier for the query
+   */
+  void SetCurrentQueryId(int query_id);
+
+  /**
+   * @brief Get all collected training records.
+   *
+   * Returns a copy of all training records collected from the
+   * underlying index since training mode was enabled.
+   *
+   * @return Vector of TrainingRecord structures
+   */
+  std::vector<core_interface::TrainingRecord> GetTrainingRecords() const;
+
+  /**
+   * @brief Clear all collected training records.
+   *
+   * Clears training records from both this layer and the underlying index.
+   */
+  void ClearTrainingRecords();
 
  public:
   std::string index_file_path() const {
@@ -124,6 +172,12 @@ class VectorColumnIndexer {
   std::string engine_name_ = "proxima";
   bool is_sparse_{false};  // TODO: eliminate the dynamic flag and make it
                            // static/template/seperate class
+
+  // Training mode support
+  bool training_mode_enabled_{false};
+  int current_query_id_{0};
+  mutable std::mutex training_mutex_;
+  mutable std::vector<core_interface::TrainingRecord> collected_records_;
 };
 
 
diff --git a/src/db/index/common/proto_converter.cc b/src/db/index/common/proto_converter.cc
index 16516c55e..520eacc22 100644
--- a/src/db/index/common/proto_converter.cc
+++ b/src/db/index/common/proto_converter.cc
@@ -75,6 +75,28 @@ proto::IVFIndexParams ProtoConverter::ToPb(const IVFIndexParams *params) {
   return params_pb;
 }
 
+// OmegaIndexParams
+OmegaIndexParams::OPtr ProtoConverter::FromPb(
+    const proto::OmegaIndexParams &params_pb) {
+  auto params = std::make_shared<OmegaIndexParams>(
+      MetricTypeCodeBook::Get(params_pb.base().metric_type()), params_pb.m(),
+      params_pb.ef_construction(),
+      QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()));
+
+  return params;
+}
+
+proto::OmegaIndexParams ProtoConverter::ToPb(const OmegaIndexParams *params) {
+  proto::OmegaIndexParams params_pb;
+  params_pb.mutable_base()->set_metric_type(
+      MetricTypeCodeBook::Get(params->metric_type()));
+  params_pb.mutable_base()->set_quantize_type(
+      QuantizeTypeCodeBook::Get(params->quantize_type()));
+  params_pb.set_ef_construction(params->ef_construction());
+  params_pb.set_m(params->m());
+  return params_pb;
+}
+
 // InvertIndexParams
 InvertIndexParams::OPtr ProtoConverter::FromPb(
     const proto::InvertIndexParams &params_pb) {
@@ -157,6 +179,8 @@ IndexParams::Ptr ProtoConverter::FromPb(const proto::IndexParams &params_pb) {
     return ProtoConverter::FromPb(params_pb.ivf());
   } else if (params_pb.has_flat()) {
     return ProtoConverter::FromPb(params_pb.flat());
+  } else if (params_pb.has_omega()) {
+    return ProtoConverter::FromPb(params_pb.omega());
   }
 
   return nullptr;
@@ -211,6 +235,13 @@ proto::IndexParams ProtoConverter::ToPb(const IndexParams *params) {
       }
       break;
     }
+    case IndexType::OMEGA: {
+      auto omega_params = dynamic_cast<const OmegaIndexParams *>(params);
+      if (omega_params) {
+        params_pb.mutable_omega()->CopyFrom(ProtoConverter::ToPb(omega_params));
+      }
+      break;
+    }
     default:
       break;
   }
diff --git a/src/db/index/common/proto_converter.h b/src/db/index/common/proto_converter.h
index 48e170165..63a6ab2b6 100644
--- a/src/db/index/common/proto_converter.h
+++ b/src/db/index/common/proto_converter.h
@@ -33,6 +33,10 @@ struct ProtoConverter {
   static IVFIndexParams::OPtr FromPb(const proto::IVFIndexParams &params_pb);
   static proto::IVFIndexParams ToPb(const IVFIndexParams *params);
 
+  // OmegaIndexParams
+  static OmegaIndexParams::OPtr FromPb(const proto::OmegaIndexParams &params_pb);
+  static proto::OmegaIndexParams ToPb(const OmegaIndexParams *params);
+
   // InvertIndexParams
   static InvertIndexParams::OPtr FromPb(
       const proto::InvertIndexParams &params_pb);
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index 1afa038ee..2c009d1b7 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -55,6 +55,8 @@
 #include "db/index/storage/wal/wal_file.h"
 #include "column_merging_reader.h"
 #include "sql_expr_parser.h"
+#include "db/training/training_data_collector.h"
+#include "db/training/omega_model_trainer.h"
 
 namespace zvec {
 
@@ -273,6 +275,11 @@ class SegmentImpl : public Segment,
   Status internal_upsert(Doc &doc);
   Status internal_delete(const Doc &doc);
 
+  // Auto-training for OMEGA index (called after Merge completes)
+  Status auto_train_omega_index_internal(
+      const std::string& field_name,
+      const std::vector<VectorColumnIndexer::Ptr>& indexers);
+
   Status recover();
   Status open_wal_file();
   Status append_wal(const Doc &doc);
@@ -1440,15 +1447,27 @@ CombinedVectorColumnIndexer::Ptr SegmentImpl::get_combined_vector_indexer(
   auto iter = vector_indexers_.find(field_name);
   if (iter != vector_indexers_.end()) {
     indexers = iter->second;
+    fprintf(stderr, "[DEBUG] get_combined_vector_indexer: found %zu persisted indexers\n",
+            indexers.size());
+    fflush(stderr);
   }
   auto m_iter = memory_vector_indexers_.find(field_name);
   if (m_iter != memory_vector_indexers_.end()) {
+    fprintf(stderr, "[DEBUG] get_combined_vector_indexer: FOUND memory indexer! Adding to list\n");
+    fflush(stderr);
     indexers.push_back(m_iter->second);
+  } else {
+    fprintf(stderr, "[DEBUG] get_combined_vector_indexer: NO memory indexer\n");
+    fflush(stderr);
   }
 
   auto field = collection_schema_->get_field(field_name);
   auto vector_index_params =
       std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());
+  fprintf(stderr, "[DEBUG] get_combined_vector_indexer: field index_type=%d\n",
+          static_cast<int>(vector_index_params->type()));
+  fflush(stderr);
+
   MetricType metric_type = vector_index_params->metric_type();
   auto blocks = get_persist_block_metas(BlockType::VECTOR_INDEX, field_name);
 
@@ -1563,12 +1582,29 @@ Status SegmentImpl::create_all_vector_index(
   new_segment_meta->set_indexed_vector_fields(vector_field_names);
   *segment_meta = new_segment_meta;
 
+  fprintf(stderr, "[DEBUG] create_vector_index_internal: marked fields as indexed, vector_field_names.size()=%zu\n",
+          vector_field_names.size());
+  for (const auto& field_name : vector_field_names) {
+    fprintf(stderr, "[DEBUG] create_vector_index_internal: indexed field=%s, vector_indexed=%d\n",
+            field_name.c_str(), new_segment_meta->vector_indexed(field_name));
+  }
+  fflush(stderr);
+
+  // Note: OMEGA training is now performed in merge_vector_indexer() immediately
+  // after the index is built via Merge(). This is the recommended approach per
+  // Alibaba expert advice - train right after reduce() completes when data is
+  // still in memory.
+
   return Status::OK();
 }
 
 Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
     const std::string &index_file_path, const std::string &column,
     const FieldSchema &field, int concurrency) {
+  fprintf(stderr, "[DEBUG] merge_vector_indexer called for field '%s', index_type=%d\n",
+          column.c_str(), static_cast<int>(field.index_params()->type()));
+  fflush(stderr);
+
   VectorColumnIndexer::Ptr vector_indexer =
       std::make_shared<VectorColumnIndexer>(index_file_path, field);
 
@@ -1578,6 +1614,11 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
   CHECK_RETURN_STATUS_EXPECTED(s);
   std::vector<VectorColumnIndexer::Ptr> to_merge_indexers =
       vector_indexers_[column];
+
+  fprintf(stderr, "[DEBUG] merge_vector_indexer: merging %zu indexers\n",
+          to_merge_indexers.size());
+  fflush(stderr);
+
   vector_column_params::MergeOptions merge_options;
   if (concurrency == 0) {
     merge_options.pool = GlobalResource::Instance().optimize_thread_pool();
@@ -1588,9 +1629,48 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
   }
   s = vector_indexer->Merge(to_merge_indexers, filter_, merge_options);
   CHECK_RETURN_STATUS_EXPECTED(s);
+
+  fprintf(stderr, "[DEBUG] merge_vector_indexer: Merge completed successfully, doc_count=%zu\n",
+          vector_indexer->doc_count());
+  fflush(stderr);
+
+  // CRITICAL: Train BEFORE Flush!
+  // After Merge, the index is in memory and searchable (builder and streamer are ready).
+  // Flush() will clear the in-memory data (doc_count becomes 0), so training must
+  // happen BEFORE Flush while the index is still searchable.
+  auto* training_capable = vector_indexer->GetTrainingCapability();
+  if (training_capable != nullptr) {
+    fprintf(stderr, "[DEBUG] merge_vector_indexer: Trainable index detected (type=%d)!\n",
+            static_cast<int>(field.index_params()->type()));
+    fflush(stderr);
+
+    LOG_INFO("Trainable index detected after merge, training BEFORE flush for field '%s' in segment %d",
+             column.c_str(), id());
+
+    // Train with the in-memory index (data is still accessible)
+    s = auto_train_omega_index_internal(column, {vector_indexer});
+    if (!s.ok()) {
+      LOG_WARN("Failed to auto-train index after merge: %s (non-fatal, continuing)",
+               s.message().c_str());
+      // Don't fail the merge operation if training fails
+    }
+  } else {
+    fprintf(stderr, "[DEBUG] merge_vector_indexer: Index does not support training (type=%d), skipping\n",
+            static_cast<int>(field.index_params()->type()));
+    fflush(stderr);
+  }
+
+  // Now flush to persist the data (this will clear in-memory data)
   s = vector_indexer->Flush();
   CHECK_RETURN_STATUS_EXPECTED(s);
 
+  fprintf(stderr, "[DEBUG] merge_vector_indexer: Flush completed, doc_count=%zu\n",
+          vector_indexer->doc_count());
+  fflush(stderr);
+
+  fprintf(stderr, "[DEBUG] merge_vector_indexer: returning vector_indexer\n");
+  fflush(stderr);
+
   return vector_indexer;
 }
 
@@ -2195,6 +2275,98 @@ Status SegmentImpl::cleanup() {
   return Status::OK();
 }
 
+Status SegmentImpl::auto_train_omega_index_internal(
+    const std::string& field_name,
+    const std::vector<VectorColumnIndexer::Ptr>& indexers) {
+  LOG_INFO("Starting auto-training for OMEGA index on field '%s' in segment %d",
+           field_name.c_str(), id());
+
+  // Step 1: Collect training data using the provided indexers
+  TrainingDataCollectorOptions collector_options;
+  collector_options.num_training_queries = 1000;  // TODO: Make configurable
+  collector_options.ef_training = 1000;           // Large ef for recall ≈ 1
+  collector_options.topk = 100;
+  collector_options.noise_scale = 0.01f;
+
+  fprintf(stderr, "[DEBUG] auto_train_omega_index_internal: calling CollectTrainingData with %zu provided indexers\n",
+          indexers.size());
+  fflush(stderr);
+
+  auto training_records_result = TrainingDataCollector::CollectTrainingData(
+      shared_from_this(), field_name, collector_options, indexers);
+
+  if (!training_records_result.has_value()) {
+    return Status::InternalError(
+        "Failed to collect training data: " +
+        training_records_result.error().message());
+  }
+
+  auto& training_records = training_records_result.value();
+  LOG_INFO("Collected %zu training records for segment %d",
+           training_records.size(), id());
+
+  if (training_records.empty()) {
+    LOG_WARN("No training records collected, skipping model training");
+    return Status::OK();
+  }
+
+  // Check if we have enough positive and negative samples
+  size_t positive_count = 0;
+  size_t negative_count = 0;
+  for (const auto& record : training_records) {
+    if (record.label == 1) {
+      positive_count++;
+    } else {
+      negative_count++;
+    }
+  }
+
+  if (positive_count == 0 || negative_count == 0) {
+    LOG_WARN("Insufficient training samples: %zu positive, %zu negative. Need both > 0. Skipping training.",
+             positive_count, negative_count);
+    return Status::OK();
+  }
+
+  // Need at least 50 samples of each class for reasonable training
+  if (positive_count < 50 || negative_count < 50) {
+    LOG_WARN("Too few training samples: %zu positive, %zu negative. Need at least 50 of each. Skipping training.",
+             positive_count, negative_count);
+    return Status::OK();
+  }
+
+  LOG_INFO("Training data stats: %zu positive, %zu negative samples",
+           positive_count, negative_count);
+
+  // Step 2: Train OMEGA model
+  OmegaModelTrainerOptions trainer_options;
+  trainer_options.output_dir = FileHelper::MakeSegmentPath(path_, id()) + "/omega_model";
+  trainer_options.verbose = true;
+
+  // Create output directory if it doesn't exist
+  if (!FileHelper::DirectoryExists(trainer_options.output_dir)) {
+    if (!FileHelper::CreateDirectory(trainer_options.output_dir)) {
+      return Status::InternalError(
+          "Failed to create model output directory: " +
+          trainer_options.output_dir);
+    }
+  }
+
+  auto train_status = OmegaModelTrainer::TrainModel(training_records, trainer_options);
+  if (!train_status.ok()) {
+    return Status::InternalError(
+        "Failed to train OMEGA model: " + train_status.message());
+  }
+
+  LOG_INFO("Successfully trained OMEGA model for segment %d, output: %s",
+           id(), trainer_options.output_dir.c_str());
+
+  // Step 3: Load model into the provided indexers
+  // TODO: Implement model loading into VectorColumnIndexer
+  // For now, the model will be loaded when the index is reopened
+
+  return Status::OK();
+}
+
 bool SegmentImpl::validate(const std::vector<std::string> &columns) const {
   if (columns.empty()) {
     LOG_ERROR("Empty columns");
@@ -3867,24 +4039,57 @@ Status SegmentImpl::load_scalar_index_blocks(bool create) {
 }
 
 Status SegmentImpl::load_vector_index_blocks() {
+  fprintf(stderr, "[DEBUG] load_vector_index_blocks: loading %zu blocks\n",
+          segment_meta_->persisted_blocks().size());
+  fflush(stderr);
+
+  int block_index = 0;
   for (const auto &block : segment_meta_->persisted_blocks()) {
+    fprintf(stderr, "[DEBUG] load_vector_index_blocks: block[%d] type=%d\n",
+            block_index++, static_cast<int>(block.type()));
+    fflush(stderr);
+
     if (block.type() == BlockType::VECTOR_INDEX ||
         block.type() == BlockType::VECTOR_INDEX_QUANTIZE) {
       // vector block only contained 1 column
       auto column = block.columns()[0];
 
+      fprintf(stderr, "[DEBUG] load_vector_index_blocks: block[%d] column=%s, vector_indexed=%d\n",
+              block_index-1, column.c_str(), segment_meta_->vector_indexed(column));
+      fflush(stderr);
+
       FieldSchema new_field_params =
           *collection_schema_->get_vector_field(column);
 
+      fprintf(stderr, "[DEBUG] load_vector_index_blocks: original schema field index_type=%d\n",
+              static_cast<int>(new_field_params.index_params()->type()));
+      fflush(stderr);
+
       auto vector_index_params = std::dynamic_pointer_cast<VectorIndexParams>(
           new_field_params.index_params());
+
+      fprintf(stderr, "[DEBUG] load_vector_index_blocks: original index_type=%d\n",
+              static_cast<int>(vector_index_params->type()));
+      fprintf(stderr, "[DEBUG] load_vector_index_blocks: quantize_type=%d\n",
+              static_cast<int>(vector_index_params->quantize_type()));
+      fflush(stderr);
+
       if (block.type_ == BlockType::VECTOR_INDEX) {
         if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED ||
             !segment_meta_->vector_indexed(column)) {
+          fprintf(stderr, "[DEBUG] load_vector_index_blocks: CONDITION MET! quantize_type=%d, vector_indexed=%d\n",
+                  static_cast<int>(vector_index_params->quantize_type()),
+                  segment_meta_->vector_indexed(column));
+          fprintf(stderr, "[DEBUG] load_vector_index_blocks: replacing with default FLAT params!\n");
+          fflush(stderr);
           new_field_params.set_index_params(
               MakeDefaultVectorIndexParams(vector_index_params->metric_type()));
+        } else {
+          fprintf(stderr, "[DEBUG] load_vector_index_blocks: CONDITION NOT MET, keeping original index_type=%d\n",
+                  static_cast<int>(vector_index_params->type()));
+          fflush(stderr);
         }
-      } else {
+      } else{
         if (!segment_meta_->vector_indexed(column)) {
           new_field_params.set_index_params(MakeDefaultQuantVectorIndexParams(
               vector_index_params->metric_type(),
@@ -3892,6 +4097,13 @@ Status SegmentImpl::load_vector_index_blocks() {
         }
       }
 
+      fprintf(stderr, "[DEBUG] load_vector_index_blocks: block[%d] creating VectorColumnIndexer with final index_type=%d\n",
+              block_index-1, static_cast<int>(std::dynamic_pointer_cast<VectorIndexParams>(new_field_params.index_params())->type()));
+      fprintf(stderr, "[DEBUG] load_vector_index_blocks: new_field_params details: name=%s, index_type=%d\n",
+              new_field_params.name().c_str(),
+              static_cast<int>(new_field_params.index_params()->type()));
+      fflush(stderr);
+
       std::string index_path;
       if (block.type_ == BlockType::VECTOR_INDEX) {
         index_path = FileHelper::MakeVectorIndexPath(
diff --git a/src/db/proto/zvec.proto b/src/db/proto/zvec.proto
index a2b310d38..0b95a2252 100644
--- a/src/db/proto/zvec.proto
+++ b/src/db/proto/zvec.proto
@@ -58,6 +58,8 @@ enum IndexType {
   IT_FLAT = 3;
   // Invert Index
   IT_INVERT = 10;
+  // OMEGA Index (HNSW with learned early stopping)
+  IT_OMEGA = 11;
 };
 
 enum QuantizeType {
@@ -98,12 +100,20 @@ message IVFIndexParams {
   bool use_soar = 4;
 }
 
+message OmegaIndexParams {
+  BaseIndexParams base = 1;
+  int32 m = 2;
+  int32 ef_construction = 3;
+  // TODO: Add OMEGA-specific params like min_vector_threshold, model_dir
+}
+
 message IndexParams {
   oneof params {
     InvertIndexParams invert = 1;
     HnswIndexParams hnsw = 2;
     FlatIndexParams flat = 3;
     IVFIndexParams ivf = 4;
+    OmegaIndexParams omega = 5;
   };
 };
 
diff --git a/src/db/training/omega_model_trainer.cc b/src/db/training/omega_model_trainer.cc
new file mode 100644
index 000000000..300390308
--- /dev/null
+++ b/src/db/training/omega_model_trainer.cc
@@ -0,0 +1,126 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "omega_model_trainer.h"
+#include <cstdio>
+#include <fstream>
+#include <sstream>
+#include <zvec/ailego/logger/logger.h>
+
+namespace zvec {
+
+Status OmegaModelTrainer::TrainModel(
+    const std::vector<core_interface::TrainingRecord>& training_records,
+    const OmegaModelTrainerOptions& options) {
+  if (training_records.empty()) {
+    return Status::InvalidArgument("Training records are empty");
+  }
+
+  if (options.output_dir.empty()) {
+    return Status::InvalidArgument("Output directory is empty");
+  }
+
+  // Step 1: Export training records to CSV
+  std::string csv_path = options.output_dir + "/training_data.csv";
+  LOG_INFO("Exporting %zu training records to CSV: %s",
+           training_records.size(), csv_path.c_str());
+
+  auto status = ExportToCSV(training_records, csv_path);
+  if (!status.ok()) {
+    return status;
+  }
+
+  // Step 2: Invoke Python training script
+  LOG_INFO("Invoking Python training script");
+  status = InvokePythonTrainer(csv_path, options);
+  if (!status.ok()) {
+    return status;
+  }
+
+  LOG_INFO("Successfully trained OMEGA model, output: %s",
+           options.output_dir.c_str());
+  return Status::OK();
+}
+
+Status OmegaModelTrainer::ExportToCSV(
+    const std::vector<core_interface::TrainingRecord>& records,
+    const std::string& csv_path) {
+  std::ofstream csv_file(csv_path);
+  if (!csv_file.is_open()) {
+    return Status::InternalError("Failed to open CSV file for writing: " + csv_path);
+  }
+
+  // Write CSV header
+  csv_file << "query_id,hops_visited,cmps_visited,dist_1st,dist_start,"
+           << "stat_0,stat_1,stat_2,stat_3,stat_4,stat_5,stat_6,label\n";
+
+  // Write training records
+  for (const auto& record : records) {
+    csv_file << record.query_id << ","
+             << record.hops_visited << ","
+             << record.cmps_visited << ","
+             << record.dist_1st << ","
+             << record.dist_start << ",";
+
+    // Write traversal window stats (7 dimensions)
+    for (size_t i = 0; i < record.traversal_window_stats.size(); ++i) {
+      csv_file << record.traversal_window_stats[i];
+      if (i < record.traversal_window_stats.size() - 1) {
+        csv_file << ",";
+      }
+    }
+
+    csv_file << "," << record.label << "\n";
+  }
+
+  csv_file.close();
+
+  if (!csv_file.good()) {
+    return Status::InternalError("Error writing CSV file: " + csv_path);
+  }
+
+  LOG_INFO("Successfully exported %zu records to CSV", records.size());
+  return Status::OK();
+}
+
+Status OmegaModelTrainer::InvokePythonTrainer(
+    const std::string& csv_path,
+    const OmegaModelTrainerOptions& options) {
+  // Build Python command
+  std::ostringstream cmd;
+  cmd << options.python_executable
+      << " -m zvec._omega_training train"
+      << " --input " << csv_path
+      << " --output " << options.output_dir;
+
+  if (options.verbose) {
+    cmd << " --verbose";
+  }
+
+  std::string command = cmd.str();
+  LOG_INFO("Executing: %s", command.c_str());
+
+  // Execute command
+  int ret = std::system(command.c_str());
+
+  if (ret != 0) {
+    return Status::InternalError(
+        "Python training script failed with exit code: " + std::to_string(ret));
+  }
+
+  LOG_INFO("Python training script completed successfully");
+  return Status::OK();
+}
+
+}  // namespace zvec
diff --git a/src/db/training/omega_model_trainer.h b/src/db/training/omega_model_trainer.h
new file mode 100644
index 000000000..97b54713a
--- /dev/null
+++ b/src/db/training/omega_model_trainer.h
@@ -0,0 +1,89 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <zvec/core/interface/training.h>
+#include <zvec/db/status.h>
+
+namespace zvec {
+
+/**
+ * @brief Configuration options for OMEGA model training
+ */
+struct OmegaModelTrainerOptions {
+  // Output directory for trained model files
+  std::string output_dir;
+
+  // Path to Python executable (default: python3)
+  std::string python_executable = "python3";
+
+  // Enable verbose logging during training
+  bool verbose = false;
+};
+
+/**
+ * @brief OMEGA model trainer (calls Python training script)
+ *
+ * This class bridges C++ training data collection with Python model training.
+ * It exports TrainingRecord data to CSV format and invokes the Python
+ * _omega_training.py script to train a LightGBM model.
+ */
+class OmegaModelTrainer {
+ public:
+  /**
+   * @brief Train OMEGA model from collected training records
+   *
+   * @param training_records Training data collected from searches
+   * @param options Training configuration
+   * @return Status indicating success or failure
+   */
+  static Status TrainModel(
+      const std::vector<core_interface::TrainingRecord>& training_records,
+      const OmegaModelTrainerOptions& options);
+
+ private:
+  /**
+   * @brief Export training records to CSV format
+   *
+   * CSV format:
+   * query_id,hops_visited,cmps_visited,dist_1st,dist_start,
+   * stat_0,stat_1,stat_2,stat_3,stat_4,stat_5,stat_6,label
+   *
+   * @param records Training records to export
+   * @param csv_path Output CSV file path
+   * @return Status indicating success or failure
+   */
+  static Status ExportToCSV(
+      const std::vector<core_interface::TrainingRecord>& records,
+      const std::string& csv_path);
+
+  /**
+   * @brief Invoke Python training script
+   *
+   * Calls: python3 -m zvec._omega_training train \
+   *          --input <csv_path> --output <output_dir> [--verbose]
+   *
+   * @param csv_path Input CSV file path
+   * @param options Training configuration
+   * @return Status indicating success or failure
+   */
+  static Status InvokePythonTrainer(
+      const std::string& csv_path,
+      const OmegaModelTrainerOptions& options);
+};
+
+}  // namespace zvec
diff --git a/src/db/training/query_generator.cc b/src/db/training/query_generator.cc
new file mode 100644
index 000000000..32832b278
--- /dev/null
+++ b/src/db/training/query_generator.cc
@@ -0,0 +1,143 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "query_generator.h"
+#include <random>
+#include <zvec/ailego/logger/logger.h>
+#include <zvec/db/doc.h>
+
+namespace zvec {
+
+std::vector<std::vector<float>> TrainingQueryGenerator::SampleBaseVectors(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    size_t num_samples,
+    uint64_t seed) {
+  std::vector<std::vector<float>> sampled_vectors;
+
+  // Get total document count
+  uint64_t doc_count = segment->doc_count();
+  if (doc_count == 0) {
+    LOG_WARN("Segment has no documents, cannot sample base vectors");
+    return sampled_vectors;
+  }
+
+  // Adjust num_samples if it exceeds doc_count
+  size_t actual_samples = std::min(num_samples, static_cast<size_t>(doc_count));
+  if (actual_samples < num_samples) {
+    LOG_WARN("Requested %zu samples but segment only has %zu documents",
+             num_samples, static_cast<size_t>(doc_count));
+  }
+
+  // Random number generator for sampling indices
+  std::mt19937_64 rng(seed);
+  std::uniform_int_distribution<uint64_t> dist(0, doc_count - 1);
+
+  sampled_vectors.reserve(actual_samples);
+
+  // Sample vectors
+  for (size_t i = 0; i < actual_samples; ++i) {
+    // Get random document index
+    uint64_t doc_idx = dist(rng);
+
+    // Fetch document
+    auto doc = segment->Fetch(doc_idx);
+    if (!doc) {
+      LOG_WARN("Failed to fetch document at index %zu, skipping",
+               static_cast<size_t>(doc_idx));
+      continue;
+    }
+
+    // Extract vector field
+    auto vector_opt = doc->get<std::vector<float>>(field_name);
+    if (!vector_opt.has_value()) {
+      LOG_WARN("Document at index %zu does not have field '%s', skipping",
+               static_cast<size_t>(doc_idx), field_name.c_str());
+      continue;
+    }
+
+    sampled_vectors.push_back(vector_opt.value());
+  }
+
+  LOG_INFO("Successfully sampled %zu/%zu vectors from segment",
+           sampled_vectors.size(), actual_samples);
+
+  return sampled_vectors;
+}
+
+std::vector<std::vector<float>> TrainingQueryGenerator::AddGaussianNoise(
+    const std::vector<std::vector<float>>& base_vectors,
+    float noise_scale,
+    uint64_t seed) {
+  if (base_vectors.empty()) {
+    LOG_WARN("Input base_vectors is empty, returning empty result");
+    return {};
+  }
+
+  std::vector<std::vector<float>> noisy_vectors;
+  noisy_vectors.reserve(base_vectors.size());
+
+  // Random number generator for Gaussian noise
+  std::mt19937 rng(seed);
+  std::normal_distribution<float> gaussian(0.0f, noise_scale);
+
+  for (const auto& base_vector : base_vectors) {
+    if (base_vector.empty()) {
+      LOG_WARN("Encountered empty vector, skipping");
+      continue;
+    }
+
+    std::vector<float> noisy_vector;
+    noisy_vector.reserve(base_vector.size());
+
+    // Add Gaussian noise to each dimension
+    for (float base_value : base_vector) {
+      float noise = gaussian(rng);
+      noisy_vector.push_back(base_value + noise);
+    }
+
+    noisy_vectors.push_back(std::move(noisy_vector));
+  }
+
+  LOG_INFO("Added Gaussian noise (scale=%.4f) to %zu vectors",
+           noise_scale, noisy_vectors.size());
+
+  return noisy_vectors;
+}
+
+std::vector<std::vector<float>> TrainingQueryGenerator::GenerateTrainingQueries(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    size_t num_queries,
+    float noise_scale,
+    uint64_t seed) {
+  // Step 1: Sample base vectors
+  auto base_vectors = SampleBaseVectors(segment, field_name, num_queries, seed);
+
+  if (base_vectors.empty()) {
+    LOG_ERROR("Failed to sample base vectors from segment");
+    return {};
+  }
+
+  // Step 2: Add Gaussian noise
+  // Use a different seed for noise generation to avoid correlation
+  auto training_queries = AddGaussianNoise(base_vectors, noise_scale, seed + 1);
+
+  LOG_INFO("Generated %zu training queries for field '%s'",
+           training_queries.size(), field_name.c_str());
+
+  return training_queries;
+}
+
+}  // namespace zvec
diff --git a/src/db/training/query_generator.h b/src/db/training/query_generator.h
new file mode 100644
index 000000000..14c9268d0
--- /dev/null
+++ b/src/db/training/query_generator.h
@@ -0,0 +1,82 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+#include "db/index/segment/segment.h"
+
+namespace zvec {
+
+/**
+ * @brief Training query generator for OMEGA model training
+ *
+ * This class provides utilities to generate training queries by:
+ * 1. Sampling base vectors from a persisted segment
+ * 2. Adding Gaussian noise to simulate realistic query variations
+ */
+class TrainingQueryGenerator {
+ public:
+  /**
+   * @brief Sample base vectors from a segment
+   *
+   * @param segment The segment to sample from (must be persisted)
+   * @param field_name The vector field name to sample
+   * @param num_samples Number of vectors to sample
+   * @param seed Random seed for reproducibility
+   * @return Vector of sampled vectors
+   */
+  static std::vector<std::vector<float>> SampleBaseVectors(
+      const Segment::Ptr& segment,
+      const std::string& field_name,
+      size_t num_samples,
+      uint64_t seed = 42);
+
+  /**
+   * @brief Add Gaussian noise to base vectors
+   *
+   * @param base_vectors Input vectors
+   * @param noise_scale Standard deviation of Gaussian noise
+   * @param seed Random seed for reproducibility
+   * @return Vectors with added noise
+   */
+  static std::vector<std::vector<float>> AddGaussianNoise(
+      const std::vector<std::vector<float>>& base_vectors,
+      float noise_scale = 0.01f,
+      uint64_t seed = 42);
+
+  /**
+   * @brief Generate training queries (sample + noise)
+   *
+   * Combines sampling and noise addition in one step.
+   *
+   * @param segment The segment to sample from
+   * @param field_name The vector field name
+   * @param num_queries Number of training queries to generate
+   * @param noise_scale Standard deviation of Gaussian noise
+   * @param seed Random seed for reproducibility
+   * @return Training query vectors
+   */
+  static std::vector<std::vector<float>> GenerateTrainingQueries(
+      const Segment::Ptr& segment,
+      const std::string& field_name,
+      size_t num_queries,
+      float noise_scale = 0.01f,
+      uint64_t seed = 42);
+};
+
+}  // namespace zvec
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
new file mode 100644
index 000000000..1d5b5c0c4
--- /dev/null
+++ b/src/db/training/training_data_collector.cc
@@ -0,0 +1,350 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "training_data_collector.h"
+#include <algorithm>
+#include <unordered_set>
+#include <zvec/ailego/logger/logger.h>
+#include <zvec/db/query_params.h>
+#include "db/index/column/vector_column/vector_column_params.h"
+#include "query_generator.h"
+
+namespace zvec {
+
+Result<std::vector<core_interface::TrainingRecord>>
+TrainingDataCollector::CollectTrainingData(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    const TrainingDataCollectorOptions& options,
+    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+  // Step 1: Generate training queries
+  LOG_INFO("Generating %zu training queries for field '%s'",
+           options.num_training_queries, field_name.c_str());
+
+  auto training_queries = TrainingQueryGenerator::GenerateTrainingQueries(
+      segment, field_name, options.num_training_queries,
+      options.noise_scale, options.seed);
+
+  fprintf(stderr, "[DEBUG] CollectTrainingData: generated %zu training queries\n",
+          training_queries.size());
+  fflush(stderr);
+
+  if (training_queries.empty()) {
+    return tl::make_unexpected(
+        Status::InternalError("Failed to generate training queries"));
+  }
+
+  // Step 2: Compute ground truth (brute force search with recall = 1)
+  LOG_INFO("Computing ground truth with brute force search (topk=%zu)",
+           options.topk);
+
+  auto ground_truth = ComputeGroundTruth(
+      segment, field_name, training_queries, options.topk);
+
+  if (ground_truth.empty()) {
+    return tl::make_unexpected(
+        Status::InternalError("Failed to compute ground truth"));
+  }
+
+  // Step 3: Choose indexers for training
+  // CRITICAL: If provided_indexers is given, use those (just-merged indexers)
+  // Otherwise, get indexers from segment (persisted indexers)
+  std::vector<VectorColumnIndexer::Ptr> indexers;
+
+  if (!provided_indexers.empty()) {
+    fprintf(stderr, "[DEBUG] CollectTrainingData: using %zu provided (just-merged) indexers\n",
+            provided_indexers.size());
+    fflush(stderr);
+    indexers = provided_indexers;
+  } else {
+    fprintf(stderr, "[DEBUG] CollectTrainingData: using indexers from segment\n");
+    fflush(stderr);
+    indexers = segment->get_vector_indexer(field_name);
+  }
+
+  if (indexers.empty()) {
+    return tl::make_unexpected(
+        Status::InternalError("No vector indexers found for field: " + field_name));
+  }
+
+  LOG_INFO("Found %zu indexers for field '%s' (will enable training on all, but only training-capable ones will collect)",
+           indexers.size(), field_name.c_str());
+
+  // Step 4: Enable training mode on all indexers
+  LOG_INFO("Enabling training mode on %zu indexers", indexers.size());
+  for (auto& indexer : indexers) {
+    auto status = indexer->EnableTrainingMode(true);
+    if (!status.ok()) {
+      LOG_WARN("Failed to enable training mode on indexer: %s",
+               status.message().c_str());
+    }
+  }
+
+  // Step 5: Perform searches with large ef and collect training records
+  LOG_INFO("Performing training searches with ef=%d", options.ef_training);
+
+  std::vector<std::vector<uint64_t>> search_results;
+  search_results.reserve(training_queries.size());
+
+  for (size_t query_idx = 0; query_idx < training_queries.size(); ++query_idx) {
+    const auto& query_vector = training_queries[query_idx];
+
+    if (query_idx == 0) {
+      fprintf(stderr, "[DEBUG] CollectTrainingData: Starting training searches, query 0 vector size=%zu\n",
+              query_vector.size());
+      fflush(stderr);
+    }
+
+    // Set query ID for this query
+    for (auto& indexer : indexers) {
+      indexer->SetCurrentQueryId(static_cast<int>(query_idx));
+    }
+
+    // Prepare query parameters
+    vector_column_params::VectorData vector_data;
+    vector_data.vector = vector_column_params::DenseVector{
+        .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))
+    };
+
+    vector_column_params::QueryParams query_params;
+    query_params.topk = options.topk;
+    query_params.fetch_vector = false;
+    query_params.filter = segment->get_filter().get();
+
+    // Create HNSW query params with large ef
+    auto hnsw_params = std::make_shared<HnswQueryParams>();
+    hnsw_params->set_ef(options.ef_training);
+    query_params.query_params = hnsw_params;
+
+    if (query_idx == 0) {
+      fprintf(stderr, "[DEBUG] CollectTrainingData: Calling indexers[0]->Search for query 0, topk=%zu, ef=%d\n",
+              options.topk, options.ef_training);
+      fflush(stderr);
+    }
+
+    // Perform search directly on the indexer (assumes single indexer, which is true for just-merged case)
+    // For multiple indexers, we would need to merge results
+    if (indexers.size() != 1) {
+      LOG_WARN("Expected 1 indexer but found %zu, using first one only", indexers.size());
+    }
+
+    auto search_result = indexers[0]->Search(vector_data, query_params);
+    if (!search_result.has_value()) {
+      LOG_WARN("Search failed for query %zu: %s", query_idx,
+               search_result.error().message().c_str());
+      fprintf(stderr, "[DEBUG] CollectTrainingData: Search FAILED for query %zu: %s\n",
+              query_idx, search_result.error().message().c_str());
+      fflush(stderr);
+      search_results.push_back({});
+      continue;
+    }
+
+    if (query_idx == 0) {
+      fprintf(stderr, "[DEBUG] CollectTrainingData: Search completed for query 0\n");
+      fflush(stderr);
+    }
+
+    // Extract result doc IDs
+    auto& results = search_result.value();
+    std::vector<uint64_t> result_ids;
+    result_ids.reserve(results->count());
+    auto iter = results->create_iterator();
+    while (iter->valid()) {
+      result_ids.push_back(iter->doc_id());
+      iter->next();
+    }
+
+    if (query_idx == 0) {
+      fprintf(stderr, "[DEBUG] CollectTrainingData: Query 0 returned %zu results\n",
+              result_ids.size());
+      fflush(stderr);
+    }
+
+    search_results.push_back(std::move(result_ids));
+  }
+
+  fprintf(stderr, "[DEBUG] CollectTrainingData: Completed all %zu training searches\n",
+          training_queries.size());
+  fflush(stderr);
+
+  // Step 6: Collect training records from all indexers
+  LOG_INFO("Collecting training records from indexers");
+
+  std::vector<core_interface::TrainingRecord> all_records;
+  for (auto& indexer : indexers) {
+    auto records = indexer->GetTrainingRecords();
+    LOG_INFO("Collected %zu records from indexer", records.size());
+    all_records.insert(all_records.end(), records.begin(), records.end());
+  }
+
+  if (all_records.empty()) {
+    LOG_WARN("No training records collected from any indexer");
+  }
+
+  // Step 7: Fill labels based on ground truth
+  LOG_INFO("Filling labels for %zu records (k_train=%zu)", all_records.size(), options.k_train);
+  FillLabels(&all_records, ground_truth, search_results, options.k_train);
+
+  // Step 8: Disable training mode and clear records
+  for (auto& indexer : indexers) {
+    indexer->EnableTrainingMode(false);
+    indexer->ClearTrainingRecords();
+  }
+
+  LOG_INFO("Successfully collected %zu training records with labels",
+           all_records.size());
+
+  return all_records;
+}
+
+std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    const std::vector<std::vector<float>>& queries,
+    size_t topk) {
+  std::vector<std::vector<uint64_t>> ground_truth;
+  ground_truth.reserve(queries.size());
+
+  // Get vector indexer (use brute force with is_linear=true)
+  auto combined_indexer = segment->get_combined_vector_indexer(field_name);
+  if (!combined_indexer) {
+    LOG_ERROR("Failed to get vector indexer for field: %s", field_name.c_str());
+    return ground_truth;
+  }
+
+  // Perform brute force search for each query
+  for (size_t query_idx = 0; query_idx < queries.size(); ++query_idx) {
+    const auto& query_vector = queries[query_idx];
+
+    // Prepare query parameters for brute force search
+    vector_column_params::VectorData vector_data;
+    vector_data.vector = vector_column_params::DenseVector{
+        .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))
+    };
+
+    vector_column_params::QueryParams query_params;
+    query_params.topk = topk;
+    query_params.fetch_vector = false;
+    query_params.filter = segment->get_filter().get();
+
+    // Use linear search (brute force) for ground truth
+    auto base_params = std::make_shared<HnswQueryParams>(topk);
+    base_params->set_is_linear(true);
+    query_params.query_params = base_params;
+
+    // Perform search
+    auto search_result = combined_indexer->Search(vector_data, query_params);
+    if (!search_result.has_value()) {
+      LOG_WARN("Ground truth search failed for query %zu: %s",
+               query_idx, search_result.error().message().c_str());
+      ground_truth.push_back({});
+      continue;
+    }
+
+    // Extract result doc IDs
+    auto& results = search_result.value();
+    std::vector<uint64_t> gt_ids;
+    gt_ids.reserve(results->count());
+    auto iter = results->create_iterator();
+    while (iter->valid()) {
+      gt_ids.push_back(iter->doc_id());
+      iter->next();
+    }
+    ground_truth.push_back(std::move(gt_ids));
+
+    if ((query_idx + 1) % 100 == 0) {
+      LOG_INFO("Computed ground truth for %zu/%zu queries",
+               query_idx + 1, queries.size());
+    }
+  }
+
+  LOG_INFO("Computed ground truth for %zu queries", queries.size());
+  return ground_truth;
+}
+
+void TrainingDataCollector::FillLabels(
+    std::vector<core_interface::TrainingRecord>* records,
+    const std::vector<std::vector<uint64_t>>& ground_truth,
+    const std::vector<std::vector<uint64_t>>& search_results,
+    size_t k_train) {
+  if (!records || records->empty()) {
+    LOG_WARN("No records to fill labels");
+    return;
+  }
+
+  if (ground_truth.empty()) {
+    LOG_WARN("Ground truth is empty, cannot fill labels");
+    return;
+  }
+
+  // Build sets from collected_node_ids for fast lookup
+  size_t labeled_count = 0;
+  size_t positive_count = 0;
+  size_t negative_count = 0;
+
+  for (auto& record : *records) {
+    int query_id = record.query_id;
+
+    // Validate query_id
+    if (query_id < 0 || query_id >= static_cast<int>(ground_truth.size())) {
+      LOG_WARN("Invalid query_id %d in training record (ground_truth size: %zu)",
+               query_id, ground_truth.size());
+      record.label = 0;
+      negative_count++;
+      continue;
+    }
+
+    const auto& gt = ground_truth[query_id];
+    if (gt.empty()) {
+      // No ground truth for this query, label as negative
+      record.label = 0;
+      negative_count++;
+      labeled_count++;
+      continue;
+    }
+
+    // Take top k_train ground truth nodes
+    size_t actual_k = std::min(k_train, gt.size());
+
+    // Convert collected_node_ids to set for fast lookup
+    std::unordered_set<uint64_t> collected_set(
+        record.collected_node_ids.begin(),
+        record.collected_node_ids.end());
+
+    // Check if ALL top k_train ground truth nodes are in collected_node_ids
+    bool all_found = true;
+    for (size_t i = 0; i < actual_k; ++i) {
+      if (collected_set.find(gt[i]) == collected_set.end()) {
+        all_found = false;
+        break;
+      }
+    }
+
+    // Label based on whether all top k_train GT nodes are collected
+    if (all_found) {
+      record.label = 1;
+      positive_count++;
+    } else {
+      record.label = 0;
+      negative_count++;
+    }
+
+    labeled_count++;
+  }
+
+  LOG_INFO("Filled labels for %zu/%zu records (%zu positive, %zu negative, k_train=%zu)",
+           labeled_count, records->size(), positive_count, negative_count, k_train);
+}
+
+}  // namespace zvec
diff --git a/src/db/training/training_data_collector.h b/src/db/training/training_data_collector.h
new file mode 100644
index 000000000..48001a211
--- /dev/null
+++ b/src/db/training/training_data_collector.h
@@ -0,0 +1,118 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+#include <zvec/ailego/pattern/expected.hpp>
+#include <zvec/core/interface/training.h>
+#include <zvec/db/status.h>
+#include "db/index/segment/segment.h"
+
+namespace zvec {
+
+/**
+ * @brief Configuration options for training data collection
+ */
+struct TrainingDataCollectorOptions {
+  // Number of training queries to generate
+  size_t num_training_queries = 1000;
+
+  // Gaussian noise scale for query generation
+  float noise_scale = 0.01f;
+
+  // ef parameter for training searches (large value for recall ≈ 1)
+  int ef_training = 1000;
+
+  // Top-K results to retrieve per query
+  size_t topk = 100;
+
+  // K_train: number of ground truth results that must be collected for label=1
+  // Label=1 iff the top K_train ground truth nodes are all in collected_node_ids
+  // Typically set to 1 (i.e., label=1 when the 1st ground truth is found)
+  size_t k_train = 1;
+
+  // Random seed for reproducibility
+  uint64_t seed = 42;
+};
+
+/**
+ * @brief Collector for OMEGA training data
+ *
+ * This class collects training data by:
+ * 1. Generating training queries from base vectors
+ * 2. Computing ground truth with brute force search
+ * 3. Performing searches in training mode with large ef
+ * 4. Labeling collected records based on ground truth
+ */
+class TrainingDataCollector {
+ public:
+  /**
+   * @brief Collect training data from a persisted segment
+   *
+   * @param segment The segment to collect data from (must be persisted)
+   * @param field_name Vector field name to train on
+   * @param options Collection options
+   * @param indexers Optional specific indexers to use (if empty, will use segment->get_vector_indexer)
+   * @return Training records with labels filled
+   */
+  static Result<std::vector<core_interface::TrainingRecord>> CollectTrainingData(
+      const Segment::Ptr& segment,
+      const std::string& field_name,
+      const TrainingDataCollectorOptions& options,
+      const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
+
+ private:
+  /**
+   * @brief Compute ground truth using brute force search
+   *
+   * @param segment The segment to search
+   * @param field_name Vector field name
+   * @param queries Training query vectors
+   * @param topk Number of top results to retrieve
+   * @return Ground truth doc IDs for each query
+   */
+  static std::vector<std::vector<uint64_t>> ComputeGroundTruth(
+      const Segment::Ptr& segment,
+      const std::string& field_name,
+      const std::vector<std::vector<float>>& queries,
+      size_t topk);
+
+  /**
+   * @brief Fill labels in training records based on ground truth
+   *
+   * Label=1 iff the top K_train ground truth nodes are ALL in collected_node_ids.
+   * Label=0 otherwise.
+   *
+   * This follows big-ann-benchmarks labeling strategy:
+   * - Training records represent search states
+   * - label=1 means "we've found enough results, can stop now"
+   * - label=0 means "need to continue searching"
+   *
+   * @param records Training records to fill (modified in-place)
+   * @param ground_truth Ground truth doc IDs per query (sorted by distance)
+   * @param search_results Search result doc IDs per query (unused but kept for compatibility)
+   * @param k_train Number of top ground truth results that must be collected
+   */
+  static void FillLabels(
+      std::vector<core_interface::TrainingRecord>* records,
+      const std::vector<std::vector<uint64_t>>& ground_truth,
+      const std::vector<std::vector<uint64_t>>& search_results,
+      size_t k_train);
+};
+
+}  // namespace zvec
diff --git a/src/include/zvec/core/interface/index.h b/src/include/zvec/core/interface/index.h
index 71258cb06..6921a4d29 100644
--- a/src/include/zvec/core/interface/index.h
+++ b/src/include/zvec/core/interface/index.h
@@ -31,6 +31,13 @@
 #include <zvec/core/framework/index_searcher.h>
 #include <zvec/core/framework/index_storage.h>
 #include <zvec/core/interface/index_param.h>
+#include <zvec/core/interface/training.h>
+#include <zvec/core/interface/training_capable.h>
+#include <zvec/db/status.h>
+
+namespace zvec::core {
+class OmegaSearcher;  // Forward declaration
+}
 
 namespace zvec::core_interface {
 
@@ -131,6 +138,28 @@ class Index {
                      const BaseIndexQueryParam::Pointer &search_param,
                      SearchResult *result);
 
+  // Capability Pattern: Query optional capabilities
+  /**
+   * @brief Get training capability interface if supported.
+   *
+   * This method allows indexes to optionally provide training functionality
+   * without polluting the base Index class. Follows the Capability Pattern.
+   *
+   * @return Pointer to ITrainingCapable interface if supported, nullptr otherwise
+   *
+   * @example
+   * @code
+   *   if (auto* training = index->GetTrainingCapability()) {
+   *       training->EnableTrainingMode(true);
+   *       // ... perform searches ...
+   *       auto records = training->GetTrainingRecords();
+   *   }
+   * @endcode
+   */
+  virtual class ITrainingCapable* GetTrainingCapability() {
+    return nullptr;  // Default: capability not supported
+  }
+
   virtual BaseIndexParam::Pointer GetParam() const {
     return std::make_shared<BaseIndexParam>(param_);
   }
@@ -214,6 +243,7 @@ class Index {
 
   size_t context_index_;
   core::IndexStorage::Pointer storage_{};
+  std::string file_path_;  // Storage file path
 
   bool is_open_{false};
   bool is_sparse_{false};
@@ -293,4 +323,41 @@ class HNSWIndex : public Index {
 };
 
 
+//! OMEGA Index - HNSW with learned early stopping
+/**
+ * OmegaIndex is a specialized HNSW index that supports training mode for
+ * collecting features to train the OMEGA early stopping model.
+ *
+ * It implements the ITrainingCapable interface to provide training functionality
+ * without modifying the generic HNSWIndex class.
+ */
+class OmegaIndex : public HNSWIndex, public ITrainingCapable {
+ public:
+  OmegaIndex() = default;
+
+  // Override GetTrainingCapability to return this
+  ITrainingCapable* GetTrainingCapability() override {
+    return this;
+  }
+
+  // Implement ITrainingCapable interface
+  zvec::Status EnableTrainingMode(bool enable) override;
+  void SetCurrentQueryId(int query_id) override;
+  std::vector<TrainingRecord> GetTrainingRecords() const override;
+  void ClearTrainingRecords() override;
+
+ protected:
+  virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;
+
+  virtual int _prepare_for_search(
+      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,
+      core::IndexContext::Pointer &context) override;
+
+ private:
+  // Training mode state (tracked locally for convenience)
+  bool training_mode_enabled_{false};
+  int current_query_id_{0};
+};
+
+
 }  // namespace zvec::core_interface
diff --git a/src/include/zvec/core/interface/index_param.h b/src/include/zvec/core/interface/index_param.h
index 98da5b124..da3a88dcd 100644
--- a/src/include/zvec/core/interface/index_param.h
+++ b/src/include/zvec/core/interface/index_param.h
@@ -61,6 +61,7 @@ enum class IndexType {
   kFlat,
   kIVF,  // it's actual a two-layer index
   kHNSW,
+  kOMEGA,  // HNSW with learned early stopping
 };
 
 enum class IVFSearchMethod { kBF, kHNSW };
@@ -186,6 +187,16 @@ struct HNSWQueryParam : public BaseIndexQueryParam {
   }
 };
 
+struct OmegaQueryParam : public HNSWQueryParam {
+  using Pointer = std::shared_ptr<OmegaQueryParam>;
+
+  float target_recall = 0.95f;
+
+  BaseIndexQueryParam::Pointer Clone() const override {
+    return std::make_shared<OmegaQueryParam>(*this);
+  }
+};
+
 struct IVFQueryParam : public BaseIndexQueryParam {
   int nprobe = 10;
   std::shared_ptr<BaseIndexQueryParam> l1QueryParam = nullptr;
diff --git a/src/include/zvec/core/interface/training.h b/src/include/zvec/core/interface/training.h
new file mode 100644
index 000000000..f2e5f951b
--- /dev/null
+++ b/src/include/zvec/core/interface/training.h
@@ -0,0 +1,62 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <array>
+#include <vector>
+
+namespace zvec::core_interface {
+
+/**
+ * @brief Training record structure for OMEGA adaptive search.
+ *
+ * This structure captures features collected during a single comparison
+ * in the HNSW/OMEGA search process. Multiple records are collected per query
+ * to train models for early stopping prediction.
+ *
+ * Features (11 dimensions total):
+ * - query_id: Unique identifier for the query
+ * - hops_visited: Number of hops in the search graph
+ * - cmps_visited: Number of node comparisons performed
+ * - dist_1st: Distance to the first (best) result found so far
+ * - dist_start: Distance to the starting node
+ * - traversal_window_stats: 7 statistical features of the traversal window
+ *   (avg, var, min, max, median, percentile25, percentile75)
+ * - collected_node_ids: Node IDs collected in topk at this search state
+ * - label: Binary label (1 if collected enough GT results, 0 otherwise)
+ *          Filled by training pipeline based on collected_node_ids vs ground truth
+ */
+struct TrainingRecord {
+  int query_id;
+  int hops_visited;
+  int cmps_visited;
+  float dist_1st;
+  float dist_start;
+  std::array<float, 7> traversal_window_stats;
+  std::vector<uint64_t> collected_node_ids;  // Node IDs in topk at this state
+  int label;  // 0 by default, to be filled by FillLabels()
+
+  TrainingRecord()
+      : query_id(0),
+        hops_visited(0),
+        cmps_visited(0),
+        dist_1st(0.0f),
+        dist_start(0.0f),
+        traversal_window_stats{},
+        collected_node_ids{},
+        label(0) {}
+};
+
+}  // namespace zvec::core_interface
diff --git a/src/include/zvec/core/interface/training_capable.h b/src/include/zvec/core/interface/training_capable.h
new file mode 100644
index 000000000..9d37567ef
--- /dev/null
+++ b/src/include/zvec/core/interface/training_capable.h
@@ -0,0 +1,87 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <vector>
+#include <zvec/core/interface/training.h>
+#include <zvec/db/status.h>
+
+namespace zvec {
+namespace core_interface {
+
+/**
+ * @brief Training capability interface for indexes that support OMEGA training mode.
+ *
+ * This interface follows the Capability Pattern, allowing indexes to optionally
+ * provide training functionality without polluting the base Index class.
+ *
+ * Example usage:
+ * @code
+ *   if (auto* training = index->GetTrainingCapability()) {
+ *       training->EnableTrainingMode(true);
+ *       // ... perform searches ...
+ *       auto records = training->GetTrainingRecords();
+ *   }
+ * @endcode
+ */
+class ITrainingCapable {
+ public:
+  virtual ~ITrainingCapable() = default;
+
+  /**
+   * @brief Enable or disable training mode for collecting training features.
+   *
+   * When training mode is enabled:
+   * - Early stopping is disabled (complete HNSW search)
+   * - Training features are collected for each visited node
+   * - query_id must be set via SetCurrentQueryId() before each search
+   *
+   * @param enable True to enable training mode, false to disable
+   * @return Status indicating success or failure
+   */
+  virtual zvec::Status EnableTrainingMode(bool enable) = 0;
+
+  /**
+   * @brief Set the query ID for the next search operation.
+   *
+   * Must be called before search when training mode is enabled.
+   * The query_id will be included in all training records collected
+   * during that search.
+   *
+   * @param query_id Unique identifier for the query
+   */
+  virtual void SetCurrentQueryId(int query_id) = 0;
+
+  /**
+   * @brief Get all collected training records.
+   *
+   * Returns a copy of all training records collected since training mode
+   * was enabled or since the last ClearTrainingRecords() call.
+   *
+   * @return Vector of TrainingRecord structures
+   */
+  virtual std::vector<TrainingRecord> GetTrainingRecords() const = 0;
+
+  /**
+   * @brief Clear all collected training records.
+   *
+   * Removes all training records from internal storage. Useful for
+   * starting a fresh training data collection session.
+   */
+  virtual void ClearTrainingRecords() = 0;
+};
+
+}  // namespace core_interface
+}  // namespace zvec
diff --git a/src/include/zvec/db/query_params.h b/src/include/zvec/db/query_params.h
index d187d7629..371eef50d 100644
--- a/src/include/zvec/db/query_params.h
+++ b/src/include/zvec/db/query_params.h
@@ -93,6 +93,29 @@ class HnswQueryParams : public QueryParams {
   int ef_;
 };
 
+class OmegaQueryParams : public HnswQueryParams {
+ public:
+  OmegaQueryParams(int ef = core_interface::kDefaultHnswEfSearch,
+                   float target_recall = 0.95f,
+                   float radius = 0.0f, bool is_linear = false,
+                   bool is_using_refiner = false)
+      : HnswQueryParams(ef, radius, is_linear, is_using_refiner),
+        target_recall_(target_recall) {}
+
+  virtual ~OmegaQueryParams() = default;
+
+  float target_recall() const {
+    return target_recall_;
+  }
+
+  void set_target_recall(float target_recall) {
+    target_recall_ = target_recall;
+  }
+
+ private:
+  float target_recall_;
+};
+
 class IVFQueryParams : public QueryParams {
  public:
   IVFQueryParams(int nprobe = 10, bool is_using_refiner = false,

From 87cc1487b558575f38f88b9c27e4c1131b042a78 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Tue, 3 Mar 2026 03:20:34 +0800
Subject: [PATCH 008/126] fix(omega): fix training memory leak and enable
 auto-training workflow

- Fix memory explosion in training data collection by clearing records after copy
- Add omega_model directory creation before training to fix CSV write failure
- Remove all debug fprintf/fflush statements and empty code blocks
---
 src/core/algorithm/hnsw/hnsw_streamer.h       |   6 +-
 src/core/algorithm/omega/omega_streamer.cc    | 114 ++++---------
 src/core/interface/index.cc                   |  14 --
 src/core/interface/indexes/omega_index.cc     |  58 +------
 .../mixed_reducer/mixed_streamer_reducer.cc   |  68 +-------
 .../vector_column/vector_column_indexer.cc    |  77 +--------
 src/db/index/segment/segment.cc               | 153 ++++++++----------
 src/db/training/training_data_collector.cc    |  38 -----
 8 files changed, 113 insertions(+), 415 deletions(-)

diff --git a/src/core/algorithm/hnsw/hnsw_streamer.h b/src/core/algorithm/hnsw/hnsw_streamer.h
index 85f4e184a..0ab9c88a7 100644
--- a/src/core/algorithm/hnsw/hnsw_streamer.h
+++ b/src/core/algorithm/hnsw/hnsw_streamer.h
@@ -158,12 +158,10 @@ class HnswStreamer : public IndexStreamer {
     return 0;
   }
 
- private:
+ protected:
   //! To share ctx across streamer/searcher, we need to update the context for
-  //! current streamer/searcher
+  //! current streamer/searcher (moved to protected for OmegaStreamer)
   int update_context(HnswContext *ctx) const;
-
- protected:
   // Changed from private to protected to allow OmegaStreamer inheritance
   enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };
   class Stats : public IndexStreamer::Stats {
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index cc7c8c86d..28185a65f 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -32,22 +32,15 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                                uint32_t count,
                                Context::Pointer &context) const {
-  fprintf(stderr, "[DEBUG] OmegaStreamer::search_impl called, training_mode_enabled_=%d, current_query_id_=%d\n",
-          training_mode_enabled_, current_query_id_);
-  fflush(stderr);
 
   // In training mode, use OMEGA library's training feature collection
   if (!training_mode_enabled_) {
     // Normal mode: just use parent HNSW search for now
     // TODO: Load OMEGA model and use adaptive search for inference
-    fprintf(stderr, "[DEBUG] OmegaStreamer: training mode disabled, using parent HNSW search\n");
-    fflush(stderr);
     LOG_DEBUG("OmegaStreamer: training mode disabled, using parent HNSW search");
     return HnswStreamer::search_impl(query, qmeta, count, context);
   }
 
-  fprintf(stderr, "[DEBUG] OmegaStreamer: training mode ENABLED, proceeding with OMEGA training\n");
-  fflush(stderr);
   LOG_INFO("OmegaStreamer: training mode enabled (query_id=%d), using OMEGA library to collect features", current_query_id_);
 
   // Training mode: Use OMEGA library with nullptr model (training-only mode)
@@ -58,8 +51,6 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   OmegaSearchHandle omega_search = omega_search_create_with_params(
       nullptr, target_recall, count, 100);  // model=nullptr for training mode
 
-  fprintf(stderr, "[DEBUG] omega_search_create_with_params returned: %p\n", (void*)omega_search);
-  fflush(stderr);
 
   if (omega_search == nullptr) {
     LOG_ERROR("Failed to create OMEGA search context for training mode");
@@ -68,26 +59,32 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 
   // Enable training mode (CRITICAL: must be before search)
   omega_search_enable_training(omega_search, current_query_id_);
-  fprintf(stderr, "[DEBUG] omega_search_enable_training called for query_id=%d\n", current_query_id_);
-  fflush(stderr);
   LOG_DEBUG("Training mode enabled for query_id=%d", current_query_id_);
 
   // Cast context to HnswContext to access HNSW-specific features
   auto *hnsw_ctx = dynamic_cast<HnswContext*>(context.get());
   if (hnsw_ctx == nullptr) {
-    fprintf(stderr, "[DEBUG] FAILED: Context is not HnswContext\n");
-    fflush(stderr);
     LOG_ERROR("Context is not HnswContext");
     omega_search_destroy(omega_search);
     return IndexError_InvalidArgument;
   }
-  fprintf(stderr, "[DEBUG] Successfully cast context to HnswContext\n");
-  fflush(stderr);
+
+  // CRITICAL: Update context if it was created by another searcher/streamer
+  // This ensures the entity reference is fresh with correct entry_point
+  if (hnsw_ctx->magic() != magic_) {
+    int ret = update_context(hnsw_ctx);
+    if (ret != 0) {
+      omega_search_destroy(omega_search);
+      return ret;
+    }
+  }
+
+  // Initialize context for search (CRITICAL: must call before topk_to_result)
+  hnsw_ctx->clear();
+  hnsw_ctx->resize_results(count);
 
   // Initialize query in distance calculator
   hnsw_ctx->reset_query(query);
-  fprintf(stderr, "[DEBUG] Query reset in distance calculator\n");
-  fflush(stderr);
 
   // Get entity and distance calculator from context
   const auto &entity = hnsw_ctx->get_entity();
@@ -95,43 +92,29 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   auto &visit_filter = hnsw_ctx->visit_filter();
   auto &candidates = hnsw_ctx->candidates();
   auto &topk_heap = hnsw_ctx->topk_heap();
-  fprintf(stderr, "[DEBUG] Got entity and distance calculator from context\n");
-  fflush(stderr);
 
   // Get entry point
   auto max_level = entity.cur_max_level();
   auto entry_point = entity.entry_point();
 
-  fprintf(stderr, "[DEBUG] Entry point: %lu, max_level: %d\n",
-          static_cast<unsigned long>(entry_point), static_cast<int>(max_level));
-  fflush(stderr);
 
   if (entry_point == kInvalidNodeId) {
-    fprintf(stderr, "[DEBUG] Entry point is INVALID, returning early (no nodes in index)\n");
-    fflush(stderr);
     omega_search_destroy(omega_search);
     return 0;
   }
 
   // Navigate to layer 0
   dist_t dist = dc.dist(entry_point);
-  fprintf(stderr, "[DEBUG] Starting navigation from level %d, initial dist=%f\n",
-          static_cast<int>(max_level), dist);
-  fflush(stderr);
 
   for (level_t cur_level = max_level; cur_level >= 1; --cur_level) {
     const Neighbors neighbors = entity.get_neighbors(cur_level, entry_point);
     if (neighbors.size() == 0) {
-      fprintf(stderr, "[DEBUG] No neighbors at level %d, breaking\n", static_cast<int>(cur_level));
-      fflush(stderr);
       break;
     }
 
     std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
     int ret = entity.get_vector(&neighbors[0], neighbors.size(), neighbor_vec_blocks);
     if (ret != 0) {
-      fprintf(stderr, "[DEBUG] Failed to get vectors at level %d, breaking\n", static_cast<int>(cur_level));
-      fflush(stderr);
       break;
     }
 
@@ -146,20 +129,13 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
       }
     }
     if (!find_closer) {
-      fprintf(stderr, "[DEBUG] No closer neighbor at level %d, breaking\n", static_cast<int>(cur_level));
-      fflush(stderr);
       break;
     }
   }
 
-  fprintf(stderr, "[DEBUG] Reached layer 0, entry_point=%lu, dist=%f\n",
-          static_cast<unsigned long>(entry_point), dist);
-  fflush(stderr);
 
   // Set dist_start for OMEGA
   omega_search_set_dist_start(omega_search, dist);
-  fprintf(stderr, "[DEBUG] omega_search_set_dist_start called with dist=%f\n", dist);
-  fflush(stderr);
 
   // Now perform HNSW search on layer 0 with OMEGA feature collection
   candidates.clear();
@@ -173,23 +149,11 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 
   // Report initial visit to OMEGA
   omega_search_report_visit(omega_search, entry_point, dist, 1);  // is_in_topk=1
-  fprintf(stderr, "[DEBUG] omega_search_report_visit called for entry_point=%lu, dist=%f, is_in_topk=1\n",
-          static_cast<unsigned long>(entry_point), dist);
-  fflush(stderr);
 
   dist_t lowerBound = dist;
 
-  int loop_iterations = 0;
-  int total_visits = 0;
-
   // Main search loop with OMEGA feature collection
   while (!candidates.empty()) {
-    loop_iterations++;
-    if (loop_iterations == 1 || loop_iterations % 10 == 0) {
-      fprintf(stderr, "[DEBUG] Search loop iteration %d, candidates.size()=%zu, topk_heap.size()=%zu\n",
-              loop_iterations, candidates.size(), topk_heap.size());
-      fflush(stderr);
-    }
 
     auto top = candidates.begin();
     node_id_t current_node = top->first;
@@ -197,9 +161,6 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 
     // Standard HNSW stopping condition
     if (topk_heap.full() && candidate_dist > lowerBound) {
-      fprintf(stderr, "[DEBUG] Stopping condition met: topk_heap.full()=%d, candidate_dist=%f > lowerBound=%f\n",
-              topk_heap.full(), candidate_dist, lowerBound);
-      fflush(stderr);
       break;
     }
 
@@ -230,8 +191,6 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                                 unvisited_neighbors.size(),
                                 neighbor_vec_blocks);
     if (ret != 0) {
-      fprintf(stderr, "[DEBUG] Failed to get neighbor vectors, breaking\n");
-      fflush(stderr);
       break;
     }
 
@@ -246,7 +205,6 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 
       // Report visit to OMEGA (this will collect training features)
       omega_search_report_visit(omega_search, neighbor, neighbor_dist, is_in_topk ? 1 : 0);
-      total_visits++;
 
       // Consider this candidate
       if (is_in_topk) {
@@ -266,9 +224,6 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
     }
   }
 
-  fprintf(stderr, "[DEBUG] Search loop completed: %d iterations, %d total visits, topk_heap.size()=%zu\n",
-          loop_iterations, total_visits, topk_heap.size());
-  fflush(stderr);
 
   // Convert results to context format
   hnsw_ctx->topk_to_result();
@@ -276,64 +231,53 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   // Get final statistics
   int hops, cmps, collected_gt;
   omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
-  fprintf(stderr, "[DEBUG] omega_search_get_stats: hops=%d, cmps=%d, collected_gt=%d\n",
-          hops, cmps, collected_gt);
-  fflush(stderr);
   LOG_DEBUG("OMEGA training search completed: cmps=%d, hops=%d, results=%zu",
             cmps, hops, topk_heap.size());
 
   // Collect training records from OMEGA library
   size_t record_count = omega_search_get_training_records_count(omega_search);
-  fprintf(stderr, "[DEBUG] omega_search_get_training_records_count returned: %zu\n", record_count);
-  fflush(stderr);
 
   if (record_count > 0) {
-    fprintf(stderr, "[DEBUG] Extracting %zu training records...\n", record_count);
-    fflush(stderr);
 
     const void* records_ptr = omega_search_get_training_records(omega_search);
 
-    // Cast to omega::TrainingRecord array
-    const auto* omega_records = static_cast<const omega::TrainingRecord*>(records_ptr);
+    // NOTE: omega_search_get_training_records returns pointer to std::vector, not array
+    const auto* records_vec = static_cast<const std::vector<omega::TrainingRecord>*>(records_ptr);
 
     // Convert and store training records
     std::lock_guard<std::mutex> lock(training_mutex_);
     for (size_t i = 0; i < record_count; ++i) {
+      const auto& omega_record = (*records_vec)[i];
       core_interface::TrainingRecord record;
-      record.query_id = omega_records[i].query_id;
-      record.hops_visited = omega_records[i].hops;
-      record.cmps_visited = omega_records[i].cmps;
-      record.dist_1st = omega_records[i].dist_1st;
-      record.dist_start = omega_records[i].dist_start;
+      record.query_id = omega_record.query_id;
+      record.hops_visited = omega_record.hops;
+      record.cmps_visited = omega_record.cmps;
+      record.dist_1st = omega_record.dist_1st;
+      record.dist_start = omega_record.dist_start;
 
       // Copy 7 traversal window statistics
-      if (omega_records[i].traversal_window_stats.size() == 7) {
-        std::copy(omega_records[i].traversal_window_stats.begin(),
-                  omega_records[i].traversal_window_stats.end(),
+      if (omega_record.traversal_window_stats.size() == 7) {
+        std::copy(omega_record.traversal_window_stats.begin(),
+                  omega_record.traversal_window_stats.end(),
                   record.traversal_window_stats.begin());
       } else {
         LOG_WARN("Unexpected traversal_window_stats size: %zu (expected 7)",
-                 omega_records[i].traversal_window_stats.size());
+                 omega_record.traversal_window_stats.size());
       }
 
       // Copy collected_node_ids (convert int to node_id_t)
       record.collected_node_ids.assign(
-          omega_records[i].collected_node_ids.begin(),
-          omega_records[i].collected_node_ids.end());
+          omega_record.collected_node_ids.begin(),
+          omega_record.collected_node_ids.end());
 
-      record.label = omega_records[i].label;  // Default 0
+      record.label = omega_record.label;  // Default 0
 
       collected_records_.push_back(std::move(record));
     }
 
-    fprintf(stderr, "[DEBUG] Successfully collected %zu training records for query_id=%d\n",
-            record_count, current_query_id_);
-    fflush(stderr);
     LOG_DEBUG("Collected %zu training records for query_id=%d",
               record_count, current_query_id_);
   } else {
-    fprintf(stderr, "[DEBUG] WARNING: No training records collected for query_id=%d\n", current_query_id_);
-    fflush(stderr);
     LOG_WARN("No training records collected for query_id=%d", current_query_id_);
   }
 
diff --git a/src/core/interface/index.cc b/src/core/interface/index.cc
index 2b1a8daef..e0fd00a8b 100644
--- a/src/core/interface/index.cc
+++ b/src/core/interface/index.cc
@@ -283,18 +283,12 @@ int Index::Open(const std::string &file_path, StorageOptions storage_options) {
     return core::IndexError_Runtime;
   }
 
-  fprintf(stderr, "[DEBUG] Index::Open: Before streamer_->open(), streamer_=%p, builder_=%p\n",
-          (void*)streamer_.get(), (void*)builder_.get());
-  fflush(stderr);
 
   if (streamer_ == nullptr || streamer_->open(storage_) != 0) {
     LOG_ERROR("Failed to open streamer, path: %s", file_path.c_str());
     return core::IndexError_Runtime;
   }
 
-  fprintf(stderr, "[DEBUG] Index::Open: After streamer_->open(), streamer_=%p, builder_=%p\n",
-          (void*)streamer_.get(), (void*)builder_.get());
-  fflush(stderr);
 
   // converter/reformer/metric are created in IndexFactory::CreateIndex
   // TODO: init
@@ -783,17 +777,11 @@ int Index::Merge(const std::vector<Index::Pointer> &indexes,
     return core::IndexError_Runtime;
   }
 
-  fprintf(stderr, "[DEBUG] Index::Merge: builder_=%p, streamer_=%p\n",
-          (void*)builder_.get(), (void*)streamer_.get());
-  fflush(stderr);
 
   // Set storage and file path for dump/reload operations
   auto* mixed_reducer = dynamic_cast<core::MixedStreamerReducer*>(reducer.get());
   if (mixed_reducer != nullptr) {
     mixed_reducer->set_storage(storage_, file_path_);
-    fprintf(stderr, "[DEBUG] Index::Merge: set storage and file_path=%s for reducer\n",
-            file_path_.c_str());
-    fflush(stderr);
   }
 
   if (reducer->set_target_streamer_wiht_info(builder_, streamer_, converter_,
@@ -820,8 +808,6 @@ int Index::Merge(const std::vector<Index::Pointer> &indexes,
   // The actual training orchestration happens at the db layer (Segment level)
   auto* training_capable = this->GetTrainingCapability();
   if (training_capable != nullptr) {
-    fprintf(stderr, "[DEBUG] Index::Merge: Index has training capability, training should be triggered at db layer\n");
-    fflush(stderr);
     LOG_INFO("Index merge completed for trainable index, training can now be performed");
   }
 
diff --git a/src/core/interface/indexes/omega_index.cc b/src/core/interface/indexes/omega_index.cc
index 29d6299d2..1d6689e66 100644
--- a/src/core/interface/indexes/omega_index.cc
+++ b/src/core/interface/indexes/omega_index.cc
@@ -22,39 +22,18 @@ namespace zvec::core_interface {
 
 // OmegaIndex uses OmegaStreamer which provides OMEGA adaptive search
 int OmegaIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
-  fprintf(stderr, "[DEBUG] OmegaIndex::CreateAndInitStreamer CALLED!\n");
-  fflush(stderr);
 
   // First call parent to set up all parameters and create basic streamer
   int ret = HNSWIndex::CreateAndInitStreamer(param);
   if (ret != core::IndexError_Success) {
-    fprintf(stderr, "[DEBUG] OmegaIndex: parent CreateAndInitStreamer failed with ret=%d\n", ret);
-    fflush(stderr);
     return ret;
   }
 
-  fprintf(stderr, "[DEBUG] OmegaIndex: creating HnswBuilder...\n");
-  fflush(stderr);
-
-  // Create HnswBuilder for OMEGA (needed for Merge to build the graph)
-  builder_ = core::IndexFactory::CreateBuilder("HnswBuilder");
-  if (ailego_unlikely(!builder_)) {
-    fprintf(stderr, "[DEBUG] OmegaIndex: FAILED to create HnswBuilder!\n");
-    fflush(stderr);
-    LOG_ERROR("Failed to create HnswBuilder for OMEGA");
-    return core::IndexError_Runtime;
-  }
-  fprintf(stderr, "[DEBUG] OmegaIndex: HnswBuilder created successfully, initializing...\n");
-  fflush(stderr);
-
-  if (ailego_unlikely(builder_->init(proxima_index_meta_, proxima_index_params_) != 0)) {
-    fprintf(stderr, "[DEBUG] OmegaIndex: FAILED to init HnswBuilder!\n");
-    fflush(stderr);
-    LOG_ERROR("Failed to init HnswBuilder for OMEGA");
-    return core::IndexError_Runtime;
-  }
-  fprintf(stderr, "[DEBUG] OmegaIndex: HnswBuilder initialized successfully!\n");
-  fflush(stderr);
+  // NOTE: We intentionally DO NOT create a builder here!
+  // HNSW works by having data written directly to the streamer during Merge
+  // (via add_with_id_impl). If we create a builder, the MixedStreamerReducer
+  // will use add_vec_with_builder() which puts data into the builder instead
+  // of the streamer, causing doc_count=0 after Merge and subsequent crashes.
 
   // Now replace the HnswStreamer with OmegaStreamer
   // Save the current meta and params before replacing streamer
@@ -62,30 +41,18 @@ int OmegaIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
   ailego::Params saved_params = proxima_index_params_;
 
   // Create OmegaStreamer
-  fprintf(stderr, "[DEBUG] OmegaIndex: Creating OmegaStreamer...\n");
-  fflush(stderr);
   streamer_ = core::IndexFactory::CreateStreamer("OmegaStreamer");
   if (ailego_unlikely(!streamer_)) {
-    fprintf(stderr, "[DEBUG] OmegaIndex: FAILED to create OmegaStreamer!\n");
-    fflush(stderr);
     LOG_ERROR("Failed to create OmegaStreamer");
     return core::IndexError_Runtime;
   }
-  fprintf(stderr, "[DEBUG] OmegaIndex: OmegaStreamer created successfully, streamer_=%p\n", (void*)streamer_.get());
-  fflush(stderr);
 
   // Initialize OmegaStreamer with the same parameters
-  fprintf(stderr, "[DEBUG] OmegaIndex: Initializing OmegaStreamer...\n");
-  fflush(stderr);
   if (ailego_unlikely(
           streamer_->init(saved_meta, saved_params) != 0)) {
-    fprintf(stderr, "[DEBUG] OmegaIndex: FAILED to init OmegaStreamer!\n");
-    fflush(stderr);
     LOG_ERROR("Failed to init OmegaStreamer");
     return core::IndexError_Runtime;
   }
-  fprintf(stderr, "[DEBUG] OmegaIndex: OmegaStreamer initialized successfully!\n");
-  fflush(stderr);
 
   // CRITICAL: Set "OmegaSearcher" in metadata for disk-persisted indices
   // This ensures that when the index is saved and loaded later,
@@ -97,36 +64,21 @@ int OmegaIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
 
 
 zvec::Status OmegaIndex::EnableTrainingMode(bool enable) {
-  fprintf(stderr, "[DEBUG] OmegaIndex::EnableTrainingMode called with enable=%d\n", enable);
-  fflush(stderr);
-
   LOG_INFO("OmegaIndex::EnableTrainingMode called with enable=%d", enable);
   training_mode_enabled_ = enable;
 
   // Delegate to OmegaStreamer if available
   if (streamer_) {
-    fprintf(stderr, "[DEBUG] OmegaIndex: streamer_ exists\n");
-    fflush(stderr);
-
     LOG_INFO("OmegaIndex: streamer_ exists, attempting dynamic_cast to OmegaStreamer");
     auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
     if (omega_streamer) {
-      fprintf(stderr, "[DEBUG] OmegaIndex: Successfully cast to OmegaStreamer\n");
-      fflush(stderr);
-
       LOG_INFO("OmegaIndex: Successfully cast to OmegaStreamer, calling EnableTrainingMode");
       omega_streamer->EnableTrainingMode(enable);
       return zvec::Status::OK();
     } else {
-      fprintf(stderr, "[DEBUG] OmegaIndex: Failed to cast to OmegaStreamer\n");
-      fflush(stderr);
-
       LOG_WARN("OmegaIndex: Failed to cast streamer_ to OmegaStreamer");
     }
   } else {
-    fprintf(stderr, "[DEBUG] OmegaIndex: streamer_ is null\n");
-    fflush(stderr);
-
     LOG_WARN("OmegaIndex: streamer_ is null");
   }
 
diff --git a/src/core/mixed_reducer/mixed_streamer_reducer.cc b/src/core/mixed_reducer/mixed_streamer_reducer.cc
index 58b3c8e3e..269adff24 100644
--- a/src/core/mixed_reducer/mixed_streamer_reducer.cc
+++ b/src/core/mixed_reducer/mixed_streamer_reducer.cc
@@ -170,9 +170,6 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
 
     for (size_t i = 0; i < streamers_.size(); i++) {
       read_results[i] = read_vec(i, filter, id_offset, &next_id);
-      fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: read_vec(%zu) returned %d, next_id=%u\n",
-              i, read_results[i], next_id);
-      fflush(stderr);
       id_offset += streamers_[i]->create_provider()->count();
     }
 
@@ -198,16 +195,9 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
   stats_.set_reduced_costtime(timer.seconds());
   state_ = STATE_REDUCE;
 
-  fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: target_builder_=%p\n",
-          target_builder_.get());
-  fflush(stderr);
 
   if (target_builder_ != nullptr) {
-    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: calling IndexBuild()\n");
-    fflush(stderr);
     IndexBuild();
-    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: IndexBuild() completed\n");
-    fflush(stderr);
 
     // CRITICAL FIX: After IndexBuild(), the builder's entity has the graph data (1500 docs),
     // but the streamer's entity is still empty (0 docs). They are separate objects!
@@ -223,9 +213,6 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
       return IndexError_Runtime;
     }
 
-    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: dumping builder to storage at path=%s\n",
-            target_file_path_.c_str());
-    fflush(stderr);
 
     // Create a FileDumper that writes to the file
     auto dumper = IndexFactory::CreateDumper("FileDumper");
@@ -255,52 +242,11 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
       return ret;
     }
 
-    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: builder dumped, now closing streamer\n");
-    fflush(stderr);
 
-    // Close the streamer
-    ret = target_streamer_->close();
-    if (ret != 0) {
-      LOG_ERROR("Failed to close streamer, ret=%d", ret);
-      return ret;
-    }
-
-    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: streamer closed, now closing storage\n");
-    fflush(stderr);
-
-    // Close the storage before reopening it
-    ret = target_storage_->close();
-    if (ret != 0) {
-      LOG_ERROR("Failed to close storage, ret=%d", ret);
-      return ret;
-    }
-
-    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: storage closed, now reopening storage\n");
-    fflush(stderr);
-
-    // Reopen the storage from the file - this is critical!
-    // The storage needs to reload the data that was just dumped to the file
-    ret = target_storage_->open(target_file_path_, false);
-    if (ret != 0) {
-      LOG_ERROR("Failed to reopen storage, ret=%d", ret);
-      return ret;
-    }
-
-    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: storage reopened, now reopening streamer\n");
-    fflush(stderr);
-
-    // Now reopen the streamer with the refreshed storage
-    ret = target_streamer_->open(target_storage_);
-    if (ret != 0) {
-      LOG_ERROR("Failed to reopen streamer, ret=%d", ret);
-      return ret;
-    }
-
-    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: streamer reopened successfully\n");
-    fflush(stderr);
+    // NOTE: We cannot safely reload the streamer here (close/open causes crashes).
+    // The streamer will properly load data when the collection is reopened.
+    // For now, auto-training will need to handle the case where streamer doc_count=0.
   } else {
-    fprintf(stderr, "[DEBUG] MixedStreamerReducer::reduce: target_builder_ is null, skipping IndexBuild()\n");
-    fflush(stderr);
   }
 
   LOG_INFO("End brute force reduce. cost time: [%zu]s",
@@ -614,8 +560,6 @@ void MixedStreamerReducer::PushToDocCache(const IndexQueryMeta &meta,
 }
 
 int MixedStreamerReducer::IndexBuild() {
-  fprintf(stderr, "[DEBUG] IndexBuild: doc_cache_ size=%zu\n", doc_cache_.size());
-  fflush(stderr);
 
   const bool need_convert = !is_target_and_source_same_reformer_ &&
                             target_streamer_reformer_ != nullptr;
@@ -677,16 +621,10 @@ int MixedStreamerReducer::IndexBuild() {
     target_holder = target_builder_converter_->result();
   }
 
-  fprintf(stderr, "[DEBUG] IndexBuild: calling target_builder_->train()\n");
-  fflush(stderr);
   target_builder_->train(target_holder);
 
-  fprintf(stderr, "[DEBUG] IndexBuild: calling target_builder_->build()\n");
-  fflush(stderr);
   target_builder_->build(target_holder);
 
-  fprintf(stderr, "[DEBUG] IndexBuild: build() completed\n");
-  fflush(stderr);
 
   return 0;
 }
diff --git a/src/db/index/column/vector_column/vector_column_indexer.cc b/src/db/index/column/vector_column/vector_column_indexer.cc
index c9a395872..0399194a7 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.cc
+++ b/src/db/index/column/vector_column/vector_column_indexer.cc
@@ -36,20 +36,6 @@ Status VectorColumnIndexer::Open(
 
 Status VectorColumnIndexer::CreateProximaIndex(
     const vector_column_params::ReadOptions &read_options) {
-  fprintf(stderr, "[DEBUG] CreateProximaIndex: field_schema_.name()=%s\n",
-          field_schema_.name().c_str());
-  fflush(stderr);
-
-  // CRITICAL DEBUG: Check field_schema_.index_params()->type() BEFORE conversion
-  if (field_schema_.index_params()) {
-    fprintf(stderr, "[DEBUG] CreateProximaIndex: field_schema_.index_params()->type()=%d (BEFORE conversion)\n",
-            static_cast<int>(field_schema_.index_params()->type()));
-    fflush(stderr);
-  } else {
-    fprintf(stderr, "[DEBUG] CreateProximaIndex: field_schema_.index_params() is NULL!\n");
-    fflush(stderr);
-  }
-
   auto index_param_result =
       ProximaEngineHelper::convert_to_engine_index_param(field_schema_);
   if (!index_param_result.has_value()) {
@@ -57,20 +43,12 @@ Status VectorColumnIndexer::CreateProximaIndex(
   }
   auto &index_param = index_param_result.value();
 
-  fprintf(stderr, "[DEBUG] CreateProximaIndex: index_param->index_type=%d (AFTER conversion)\n",
-          static_cast<int>(index_param->index_type));
-  fflush(stderr);
-
   // Use IndexFactory for all index types (including OMEGA)
   index = core_interface::IndexFactory::CreateAndInitIndex(*index_param);
   if (index == nullptr) {
     return Status::InternalError("Failed to create index");
   }
 
-  fprintf(stderr, "[DEBUG] CreateProximaIndex: created index type=%s\n",
-          typeid(*index).name());
-  fflush(stderr);
-
   auto storage_type =
       read_options.use_mmap
           ? core_interface::StorageOptions::StorageType::kMMAP
@@ -131,10 +109,6 @@ Status VectorColumnIndexer::Merge(
     return Status::InvalidArgument("Index not opened");
   }
 
-  fprintf(stderr, "[DEBUG] VectorColumnIndexer::Merge: BEFORE merge, index type=%s\n",
-          typeid(*index).name());
-  fflush(stderr);
-
   if (indexers.empty()) {
     return Status::OK();
   }
@@ -145,9 +119,6 @@ Status VectorColumnIndexer::Merge(
     if (indexer->index_file_path() == this->index_file_path()) {
       continue;
     }
-    fprintf(stderr, "[DEBUG] VectorColumnIndexer::Merge: source indexer type=%s\n",
-            typeid(*indexer->index).name());
-    fflush(stderr);
     engine_indexers.push_back(indexer->index);
   }
   auto engine_filter =
@@ -161,10 +132,6 @@ Status VectorColumnIndexer::Merge(
     return Status::InternalError("Failed to merge index");
   }
 
-  fprintf(stderr, "[DEBUG] VectorColumnIndexer::Merge: AFTER merge, index type=%s\n",
-          typeid(*index).name());
-  fflush(stderr);
-
   return Status::OK();
 }
 
@@ -202,20 +169,10 @@ Result<vector_column_params::VectorDataBuffer> VectorColumnIndexer::Fetch(
 Result<IndexResults::Ptr> VectorColumnIndexer::Search(
     const vector_column_params::VectorData &vector_data,
     const vector_column_params::QueryParams &query_params) {
-  fprintf(stderr, "[DEBUG] VectorColumnIndexer::Search called, index=%p, training_mode_enabled_=%d\n",
-          (void*)index.get(), training_mode_enabled_);
-  fflush(stderr);
-
   if (index == nullptr) {
-    fprintf(stderr, "[DEBUG] VectorColumnIndexer::Search: index is NULL!\n");
-    fflush(stderr);
     return tl::make_unexpected(Status::InvalidArgument("Index not opened"));
   }
 
-  fprintf(stderr, "[DEBUG] VectorColumnIndexer::Search: index doc_count=%u\n",
-          index->GetDocCount());
-  fflush(stderr);
-
   // Set query_id before search if training mode is enabled
   if (training_mode_enabled_) {
     if (auto* training_capable = index->GetTrainingCapability()) {
@@ -257,6 +214,9 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
       auto records = training_capable->GetTrainingRecords();
       collected_records_.insert(collected_records_.end(),
                                records.begin(), records.end());
+      // CRITICAL: Clear records from underlying index to avoid memory explosion
+      // Without this, records accumulate across queries and get copied repeatedly
+      training_capable->ClearTrainingRecords();
     }
   }
 
@@ -269,30 +229,14 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
 
 // Training mode method implementations
 Status VectorColumnIndexer::EnableTrainingMode(bool enable) {
-  fprintf(stderr, "[DEBUG] VectorColumnIndexer::EnableTrainingMode called with enable=%d\n", enable);
-  fflush(stderr);
-
   std::lock_guard<std::mutex> lock(training_mutex_);
   training_mode_enabled_ = enable;
 
   // Propagate to underlying index if it exists and supports training
   if (index != nullptr) {
-    fprintf(stderr, "[DEBUG] VectorColumnIndexer: index is not null, type_name=%s\n",
-            typeid(*index).name());
-    fflush(stderr);
-
     if (auto* training_capable = index->GetTrainingCapability()) {
-      fprintf(stderr, "[DEBUG] VectorColumnIndexer: GetTrainingCapability returned non-null\n");
-      fflush(stderr);
       return training_capable->EnableTrainingMode(enable);
-    } else {
-      fprintf(stderr, "[DEBUG] VectorColumnIndexer: GetTrainingCapability returned null (index is type=%s)\n",
-              typeid(*index).name());
-      fflush(stderr);
     }
-  } else {
-    fprintf(stderr, "[DEBUG] VectorColumnIndexer: index is null\n");
-    fflush(stderr);
   }
 
   return Status::OK();
@@ -311,19 +255,8 @@ void VectorColumnIndexer::SetCurrentQueryId(int query_id) {
 
 std::vector<core_interface::TrainingRecord> VectorColumnIndexer::GetTrainingRecords() const {
   std::lock_guard<std::mutex> lock(training_mutex_);
-
-  // Get records from underlying index if it exists and supports training
-  if (index != nullptr) {
-    if (auto* training_capable = index->GetTrainingCapability()) {
-      auto index_records = training_capable->GetTrainingRecords();
-
-      // Merge with local collected records
-      std::vector<core_interface::TrainingRecord> all_records = collected_records_;
-      all_records.insert(all_records.end(), index_records.begin(), index_records.end());
-      return all_records;
-    }
-  }
-
+  // All records are already collected in collected_records_ during Search()
+  // The underlying index records are cleared after each Search to avoid duplication
   return collected_records_;
 }
 
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index 2c009d1b7..e876b390a 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -1447,26 +1447,16 @@ CombinedVectorColumnIndexer::Ptr SegmentImpl::get_combined_vector_indexer(
   auto iter = vector_indexers_.find(field_name);
   if (iter != vector_indexers_.end()) {
     indexers = iter->second;
-    fprintf(stderr, "[DEBUG] get_combined_vector_indexer: found %zu persisted indexers\n",
-            indexers.size());
-    fflush(stderr);
   }
   auto m_iter = memory_vector_indexers_.find(field_name);
   if (m_iter != memory_vector_indexers_.end()) {
-    fprintf(stderr, "[DEBUG] get_combined_vector_indexer: FOUND memory indexer! Adding to list\n");
-    fflush(stderr);
     indexers.push_back(m_iter->second);
   } else {
-    fprintf(stderr, "[DEBUG] get_combined_vector_indexer: NO memory indexer\n");
-    fflush(stderr);
   }
 
   auto field = collection_schema_->get_field(field_name);
   auto vector_index_params =
       std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());
-  fprintf(stderr, "[DEBUG] get_combined_vector_indexer: field index_type=%d\n",
-          static_cast<int>(vector_index_params->type()));
-  fflush(stderr);
 
   MetricType metric_type = vector_index_params->metric_type();
   auto blocks = get_persist_block_metas(BlockType::VECTOR_INDEX, field_name);
@@ -1582,13 +1572,8 @@ Status SegmentImpl::create_all_vector_index(
   new_segment_meta->set_indexed_vector_fields(vector_field_names);
   *segment_meta = new_segment_meta;
 
-  fprintf(stderr, "[DEBUG] create_vector_index_internal: marked fields as indexed, vector_field_names.size()=%zu\n",
-          vector_field_names.size());
   for (const auto& field_name : vector_field_names) {
-    fprintf(stderr, "[DEBUG] create_vector_index_internal: indexed field=%s, vector_indexed=%d\n",
-            field_name.c_str(), new_segment_meta->vector_indexed(field_name));
   }
-  fflush(stderr);
 
   // Note: OMEGA training is now performed in merge_vector_indexer() immediately
   // after the index is built via Merge(). This is the recommended approach per
@@ -1601,9 +1586,6 @@ Status SegmentImpl::create_all_vector_index(
 Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
     const std::string &index_file_path, const std::string &column,
     const FieldSchema &field, int concurrency) {
-  fprintf(stderr, "[DEBUG] merge_vector_indexer called for field '%s', index_type=%d\n",
-          column.c_str(), static_cast<int>(field.index_params()->type()));
-  fflush(stderr);
 
   VectorColumnIndexer::Ptr vector_indexer =
       std::make_shared<VectorColumnIndexer>(index_file_path, field);
@@ -1615,9 +1597,6 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
   std::vector<VectorColumnIndexer::Ptr> to_merge_indexers =
       vector_indexers_[column];
 
-  fprintf(stderr, "[DEBUG] merge_vector_indexer: merging %zu indexers\n",
-          to_merge_indexers.size());
-  fflush(stderr);
 
   vector_column_params::MergeOptions merge_options;
   if (concurrency == 0) {
@@ -1630,46 +1609,86 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
   s = vector_indexer->Merge(to_merge_indexers, filter_, merge_options);
   CHECK_RETURN_STATUS_EXPECTED(s);
 
-  fprintf(stderr, "[DEBUG] merge_vector_indexer: Merge completed successfully, doc_count=%zu\n",
-          vector_indexer->doc_count());
-  fflush(stderr);
 
-  // CRITICAL: Train BEFORE Flush!
-  // After Merge, the index is in memory and searchable (builder and streamer are ready).
-  // Flush() will clear the in-memory data (doc_count becomes 0), so training must
-  // happen BEFORE Flush while the index is still searchable.
+  // Check if this is a trainable index (OMEGA)
   auto* training_capable = vector_indexer->GetTrainingCapability();
-  if (training_capable != nullptr) {
-    fprintf(stderr, "[DEBUG] merge_vector_indexer: Trainable index detected (type=%d)!\n",
-            static_cast<int>(field.index_params()->type()));
-    fflush(stderr);
+  bool needs_training = (training_capable != nullptr && vector_indexer->doc_count() >= 100);
+  std::string model_output_dir;
 
-    LOG_INFO("Trainable index detected after merge, training BEFORE flush for field '%s' in segment %d",
-             column.c_str(), id());
+  if (needs_training) {
 
-    // Train with the in-memory index (data is still accessible)
-    s = auto_train_omega_index_internal(column, {vector_indexer});
-    if (!s.ok()) {
-      LOG_WARN("Failed to auto-train index after merge: %s (non-fatal, continuing)",
-               s.message().c_str());
-      // Don't fail the merge operation if training fails
-    }
+    LOG_INFO("Trainable index detected after merge for field '%s' in segment %d (doc_count=%zu)",
+             column.c_str(), id(), vector_indexer->doc_count());
+
+    // Compute model output directory
+    std::string segment_dir = index_file_path.substr(0, index_file_path.rfind('/'));
+    model_output_dir = segment_dir + "/omega_model";
   } else {
-    fprintf(stderr, "[DEBUG] merge_vector_indexer: Index does not support training (type=%d), skipping\n",
-            static_cast<int>(field.index_params()->type()));
-    fflush(stderr);
   }
 
-  // Now flush to persist the data (this will clear in-memory data)
+  // Flush to persist the data
   s = vector_indexer->Flush();
   CHECK_RETURN_STATUS_EXPECTED(s);
 
-  fprintf(stderr, "[DEBUG] merge_vector_indexer: Flush completed, doc_count=%zu\n",
-          vector_indexer->doc_count());
-  fflush(stderr);
+  // After Flush, the indexer is persisted but in-memory graph is cleared.
+  // For training, we need to reopen the indexer to load the graph from disk.
+  if (needs_training) {
+    LOG_INFO("Starting OMEGA auto-training for field '%s' (reopening indexer after flush)", column.c_str());
+
+    // Reopen the indexer to load the persisted graph (create_new=false to load existing)
+    VectorColumnIndexer::Ptr training_indexer =
+        std::make_shared<VectorColumnIndexer>(index_file_path, field);
+    vector_column_params::ReadOptions read_options{options_.enable_mmap_, false};
+
+    auto reopen_status = training_indexer->Open(read_options);
+    if (!reopen_status.ok()) {
+      LOG_WARN("Failed to reopen indexer for training: %s", reopen_status.message().c_str());
+    } else {
+      // Collect training data
+      TrainingDataCollectorOptions collector_opts;
+      size_t doc_count = training_indexer->doc_count();
+      collector_opts.num_training_queries = std::min(doc_count, size_t(1000));
+      collector_opts.ef_training = 1000;  // Large ef for recall ≈ 1
+      collector_opts.topk = 100;
+      collector_opts.k_train = 1;  // Label=1 when top-1 GT found
+
+      std::vector<VectorColumnIndexer::Ptr> training_indexers = {training_indexer};
+
+      auto training_result = TrainingDataCollector::CollectTrainingData(
+          shared_from_this(), column, collector_opts, training_indexers);
+
+      if (training_result.has_value()) {
+        auto& records = training_result.value();
+        LOG_INFO("Collected %zu training records", records.size());
+
+        if (records.size() >= 100) {
+          // Train the model
+          OmegaModelTrainerOptions trainer_opts;
+          trainer_opts.output_dir = model_output_dir;
+          trainer_opts.verbose = true;
+
+          // Create output directory if it doesn't exist
+          if (!FileHelper::DirectoryExists(model_output_dir)) {
+            if (!FileHelper::CreateDirectory(model_output_dir)) {
+              LOG_WARN("Failed to create model output directory: %s", model_output_dir.c_str());
+            }
+          }
+
+          auto train_status = OmegaModelTrainer::TrainModel(records, trainer_opts);
+          if (train_status.ok()) {
+            LOG_INFO("OMEGA model training completed successfully: %s", trainer_opts.output_dir.c_str());
+          } else {
+            LOG_WARN("OMEGA model training failed: %s", train_status.message().c_str());
+          }
+        } else {
+          LOG_INFO("Skipping model training: only %zu records collected (need >= 100)", records.size());
+        }
+      } else {
+        LOG_WARN("Failed to collect training data: %s", training_result.error().message().c_str());
+      }
+    }
+  }
 
-  fprintf(stderr, "[DEBUG] merge_vector_indexer: returning vector_indexer\n");
-  fflush(stderr);
 
   return vector_indexer;
 }
@@ -2288,9 +2307,6 @@ Status SegmentImpl::auto_train_omega_index_internal(
   collector_options.topk = 100;
   collector_options.noise_scale = 0.01f;
 
-  fprintf(stderr, "[DEBUG] auto_train_omega_index_internal: calling CollectTrainingData with %zu provided indexers\n",
-          indexers.size());
-  fflush(stderr);
 
   auto training_records_result = TrainingDataCollector::CollectTrainingData(
       shared_from_this(), field_name, collector_options, indexers);
@@ -4039,55 +4055,30 @@ Status SegmentImpl::load_scalar_index_blocks(bool create) {
 }
 
 Status SegmentImpl::load_vector_index_blocks() {
-  fprintf(stderr, "[DEBUG] load_vector_index_blocks: loading %zu blocks\n",
-          segment_meta_->persisted_blocks().size());
-  fflush(stderr);
 
   int block_index = 0;
   for (const auto &block : segment_meta_->persisted_blocks()) {
-    fprintf(stderr, "[DEBUG] load_vector_index_blocks: block[%d] type=%d\n",
-            block_index++, static_cast<int>(block.type()));
-    fflush(stderr);
 
     if (block.type() == BlockType::VECTOR_INDEX ||
         block.type() == BlockType::VECTOR_INDEX_QUANTIZE) {
       // vector block only contained 1 column
       auto column = block.columns()[0];
 
-      fprintf(stderr, "[DEBUG] load_vector_index_blocks: block[%d] column=%s, vector_indexed=%d\n",
-              block_index-1, column.c_str(), segment_meta_->vector_indexed(column));
-      fflush(stderr);
 
       FieldSchema new_field_params =
           *collection_schema_->get_vector_field(column);
 
-      fprintf(stderr, "[DEBUG] load_vector_index_blocks: original schema field index_type=%d\n",
-              static_cast<int>(new_field_params.index_params()->type()));
-      fflush(stderr);
 
       auto vector_index_params = std::dynamic_pointer_cast<VectorIndexParams>(
           new_field_params.index_params());
 
-      fprintf(stderr, "[DEBUG] load_vector_index_blocks: original index_type=%d\n",
-              static_cast<int>(vector_index_params->type()));
-      fprintf(stderr, "[DEBUG] load_vector_index_blocks: quantize_type=%d\n",
-              static_cast<int>(vector_index_params->quantize_type()));
-      fflush(stderr);
 
       if (block.type_ == BlockType::VECTOR_INDEX) {
         if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED ||
             !segment_meta_->vector_indexed(column)) {
-          fprintf(stderr, "[DEBUG] load_vector_index_blocks: CONDITION MET! quantize_type=%d, vector_indexed=%d\n",
-                  static_cast<int>(vector_index_params->quantize_type()),
-                  segment_meta_->vector_indexed(column));
-          fprintf(stderr, "[DEBUG] load_vector_index_blocks: replacing with default FLAT params!\n");
-          fflush(stderr);
           new_field_params.set_index_params(
               MakeDefaultVectorIndexParams(vector_index_params->metric_type()));
         } else {
-          fprintf(stderr, "[DEBUG] load_vector_index_blocks: CONDITION NOT MET, keeping original index_type=%d\n",
-                  static_cast<int>(vector_index_params->type()));
-          fflush(stderr);
         }
       } else{
         if (!segment_meta_->vector_indexed(column)) {
@@ -4097,12 +4088,6 @@ Status SegmentImpl::load_vector_index_blocks() {
         }
       }
 
-      fprintf(stderr, "[DEBUG] load_vector_index_blocks: block[%d] creating VectorColumnIndexer with final index_type=%d\n",
-              block_index-1, static_cast<int>(std::dynamic_pointer_cast<VectorIndexParams>(new_field_params.index_params())->type()));
-      fprintf(stderr, "[DEBUG] load_vector_index_blocks: new_field_params details: name=%s, index_type=%d\n",
-              new_field_params.name().c_str(),
-              static_cast<int>(new_field_params.index_params()->type()));
-      fflush(stderr);
 
       std::string index_path;
       if (block.type_ == BlockType::VECTOR_INDEX) {
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 1d5b5c0c4..e0fd3328f 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -36,9 +36,6 @@ TrainingDataCollector::CollectTrainingData(
       segment, field_name, options.num_training_queries,
       options.noise_scale, options.seed);
 
-  fprintf(stderr, "[DEBUG] CollectTrainingData: generated %zu training queries\n",
-          training_queries.size());
-  fflush(stderr);
 
   if (training_queries.empty()) {
     return tl::make_unexpected(
@@ -63,13 +60,8 @@ TrainingDataCollector::CollectTrainingData(
   std::vector<VectorColumnIndexer::Ptr> indexers;
 
   if (!provided_indexers.empty()) {
-    fprintf(stderr, "[DEBUG] CollectTrainingData: using %zu provided (just-merged) indexers\n",
-            provided_indexers.size());
-    fflush(stderr);
     indexers = provided_indexers;
   } else {
-    fprintf(stderr, "[DEBUG] CollectTrainingData: using indexers from segment\n");
-    fflush(stderr);
     indexers = segment->get_vector_indexer(field_name);
   }
 
@@ -100,12 +92,6 @@ TrainingDataCollector::CollectTrainingData(
   for (size_t query_idx = 0; query_idx < training_queries.size(); ++query_idx) {
     const auto& query_vector = training_queries[query_idx];
 
-    if (query_idx == 0) {
-      fprintf(stderr, "[DEBUG] CollectTrainingData: Starting training searches, query 0 vector size=%zu\n",
-              query_vector.size());
-      fflush(stderr);
-    }
-
     // Set query ID for this query
     for (auto& indexer : indexers) {
       indexer->SetCurrentQueryId(static_cast<int>(query_idx));
@@ -127,12 +113,6 @@ TrainingDataCollector::CollectTrainingData(
     hnsw_params->set_ef(options.ef_training);
     query_params.query_params = hnsw_params;
 
-    if (query_idx == 0) {
-      fprintf(stderr, "[DEBUG] CollectTrainingData: Calling indexers[0]->Search for query 0, topk=%zu, ef=%d\n",
-              options.topk, options.ef_training);
-      fflush(stderr);
-    }
-
     // Perform search directly on the indexer (assumes single indexer, which is true for just-merged case)
     // For multiple indexers, we would need to merge results
     if (indexers.size() != 1) {
@@ -143,18 +123,10 @@ TrainingDataCollector::CollectTrainingData(
     if (!search_result.has_value()) {
       LOG_WARN("Search failed for query %zu: %s", query_idx,
                search_result.error().message().c_str());
-      fprintf(stderr, "[DEBUG] CollectTrainingData: Search FAILED for query %zu: %s\n",
-              query_idx, search_result.error().message().c_str());
-      fflush(stderr);
       search_results.push_back({});
       continue;
     }
 
-    if (query_idx == 0) {
-      fprintf(stderr, "[DEBUG] CollectTrainingData: Search completed for query 0\n");
-      fflush(stderr);
-    }
-
     // Extract result doc IDs
     auto& results = search_result.value();
     std::vector<uint64_t> result_ids;
@@ -165,19 +137,9 @@ TrainingDataCollector::CollectTrainingData(
       iter->next();
     }
 
-    if (query_idx == 0) {
-      fprintf(stderr, "[DEBUG] CollectTrainingData: Query 0 returned %zu results\n",
-              result_ids.size());
-      fflush(stderr);
-    }
-
     search_results.push_back(std::move(result_ids));
   }
 
-  fprintf(stderr, "[DEBUG] CollectTrainingData: Completed all %zu training searches\n",
-          training_queries.size());
-  fflush(stderr);
-
   // Step 6: Collect training records from all indexers
   LOG_INFO("Collecting training records from indexers");
 

From 607b17c7f3e2e89715ec60828d3ff1225416aa13 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Tue, 3 Mar 2026 19:29:45 +0800
Subject: [PATCH 009/126] perf(omega): parallelize training data collection and
 expose training params

- Parallelize ground truth computation and training searches with std::thread
- Add training_query_id support for thread-safe parallel training
- Add num_training_queries param to OmegaIndexParams (default: 1000)
- Use ef_construction as training search ef instead of hardcoded 1000
---
 python/zvec/_omega_training.py                | 133 +++-
 .../python/model/param/python_param.cc        |  55 +-
 src/core/algorithm/omega/omega_context.h      |  13 +-
 src/core/algorithm/omega/omega_params.h       |   4 +
 src/core/algorithm/omega/omega_streamer.cc    |  60 +-
 src/core/algorithm/omega/omega_streamer.h     |   6 +
 src/core/interface/indexes/omega_index.cc     |  19 +-
 .../column/vector_column/engine_helper.hpp    |  13 +-
 src/db/index/common/proto_converter.cc        |   6 +-
 src/db/index/segment/segment.cc               |  54 +-
 src/db/proto/zvec.proto                       |   3 +-
 src/db/training/omega_model_trainer.cc        |  81 ++-
 src/db/training/omega_model_trainer.h         |  34 +-
 src/db/training/training_data_collector.cc    | 621 +++++++++++++++---
 src/db/training/training_data_collector.h     |  49 +-
 src/include/zvec/core/interface/index_param.h |   1 +
 src/include/zvec/core/interface/training.h    |  23 +
 src/include/zvec/db/index_params.h            |  44 +-
 src/include/zvec/db/query_params.h            |  13 +-
 thirdparty/omega                              |   2 +-
 20 files changed, 1072 insertions(+), 162 deletions(-)

diff --git a/python/zvec/_omega_training.py b/python/zvec/_omega_training.py
index 361a1f79b..32a28a144 100644
--- a/python/zvec/_omega_training.py
+++ b/python/zvec/_omega_training.py
@@ -28,7 +28,7 @@
     LIGHTGBM_AVAILABLE = False
 
 
-def train_omega_model(csv_path: str, output_dir: str, verbose: bool = False, topk: int = 100):
+def train_omega_model(csv_path: str, output_dir: str, verbose: bool = False, topk: int = 100, gt_cmps_path: str = None):
     """Train OMEGA model from CSV training data.
 
     Args:
@@ -36,6 +36,7 @@ def train_omega_model(csv_path: str, output_dir: str, verbose: bool = False, top
         output_dir: Directory to save trained model and tables
         verbose: Enable verbose logging
         topk: Top-K value used during training data collection (default: 100)
+        gt_cmps_path: Optional path to gt_cmps.csv for generating real tables
 
     Returns:
         str: Path to the trained model directory
@@ -172,33 +173,98 @@ def train_omega_model(csv_path: str, output_dir: str, verbose: bool = False, top
     if verbose:
         print(f"Interval table saved to: {interval_table_path}")
 
-    # Generate placeholder gt_collected_table and gt_cmps_all_table
-    # These tables require access to ground truth data during search, which is not
-    # available from the CSV export. They should be generated during the training
-    # data collection phase in C++.
-
-    # Create empty placeholder files with the correct format
+    # Generate gt_collected_table and gt_cmps_all_table
     gt_collected_table_path = os.path.join(output_dir, "gt_collected_table.txt")
-    with open(gt_collected_table_path, "w") as f:
-        # Format: row_index:value1,value2,...,valueK
-        # Each row represents a "collected" count, columns are ranks
-        for collected in range(topk + 1):
-            row_values = ["1.0" if i < collected else "0.0" for i in range(topk)]
-            f.write(f"{collected}:{','.join(row_values)}\n")
-
-    if verbose:
-        print(f"GT collected table (placeholder) saved to: {gt_collected_table_path}")
-
     gt_cmps_all_table_path = os.path.join(output_dir, "gt_cmps_all_table.txt")
-    with open(gt_cmps_all_table_path, "w") as f:
-        # Format: row_index:value1,value2,...,value100
-        # Each row represents a rank, columns are percentiles (1-100)
-        for rank in range(topk + 1):
-            percentiles = [str(rank * 10 + p) for p in range(100)]  # Placeholder values
-            f.write(f"{rank}:{','.join(percentiles)}\n")
 
-    if verbose:
-        print(f"GT cmps all table (placeholder) saved to: {gt_cmps_all_table_path}")
+    if gt_cmps_path is not None and os.path.exists(gt_cmps_path):
+        # Load gt_cmps data and generate real tables
+        if verbose:
+            print(f"Loading gt_cmps data from: {gt_cmps_path}")
+
+        gt_cmps_df = pd.read_csv(gt_cmps_path)
+        num_queries = gt_cmps_df['query_id'].max() + 1
+
+        # Reshape gt_cmps into a 2D array: [query_id][rank] = cmps
+        gt_cmps = np.zeros((num_queries, topk), dtype=np.int32)
+        for _, row in gt_cmps_df.iterrows():
+            query_id = int(row['query_id'])
+            rank = int(row['rank'])
+            cmps = int(row['cmps'])
+            if query_id < num_queries and rank < topk:
+                gt_cmps[query_id, rank] = cmps
+
+        # Generate gt_cmps_all_table: percentiles of cmps for each rank
+        # Format: rank:percentile_1,percentile_2,...,percentile_100
+        if verbose:
+            print("Generating gt_cmps_all_table...")
+
+        with open(gt_cmps_all_table_path, "w") as f:
+            for rank in range(topk):
+                cmps_values = gt_cmps[:, rank]
+                # Calculate percentiles (1-100)
+                percentiles = np.percentile(cmps_values, range(1, 101))
+                percentiles_str = ','.join([str(int(p)) for p in percentiles])
+                f.write(f"{rank}:{percentiles_str}\n")
+
+        if verbose:
+            print(f"GT cmps all table saved to: {gt_cmps_all_table_path}")
+
+        # Generate gt_collected_table
+        # For each "collected" count (0 to topk), for each rank r:
+        # What's the probability that GT[r] was collected when we've found "collected" GTs
+        # This is computed by: fraction of queries where cmps[r] <= cmps[collected-1]
+        if verbose:
+            print("Generating gt_collected_table...")
+
+        with open(gt_collected_table_path, "w") as f:
+            for collected in range(topk + 1):
+                row_values = []
+                if collected == 0:
+                    # No GTs collected yet, all probabilities are 0
+                    row_values = ["0.0"] * topk
+                else:
+                    # Get the cmps threshold: when we've collected 'collected' GTs,
+                    # the threshold is the cmps when the (collected-1)th GT was found
+                    for rank in range(topk):
+                        if rank < collected:
+                            # Ranks before "collected" are always found
+                            row_values.append("1.0")
+                        else:
+                            # For ranks >= collected, compute probability
+                            # GT[rank] is collected if cmps[rank] <= cmps[collected-1]
+                            threshold_rank = collected - 1
+                            prob_found = np.mean(gt_cmps[:, rank] <= gt_cmps[:, threshold_rank])
+                            row_values.append(f"{prob_found:.6f}")
+                f.write(f"{collected}:{','.join(row_values)}\n")
+
+        if verbose:
+            print(f"GT collected table saved to: {gt_collected_table_path}")
+
+    else:
+        # Generate placeholder tables when gt_cmps is not available
+        if verbose:
+            print("Generating placeholder tables (gt_cmps not available)...")
+
+        with open(gt_collected_table_path, "w") as f:
+            # Format: row_index:value1,value2,...,valueK
+            # Each row represents a "collected" count, columns are ranks
+            for collected in range(topk + 1):
+                row_values = ["1.0" if i < collected else "0.0" for i in range(topk)]
+                f.write(f"{collected}:{','.join(row_values)}\n")
+
+        if verbose:
+            print(f"GT collected table (placeholder) saved to: {gt_collected_table_path}")
+
+        with open(gt_cmps_all_table_path, "w") as f:
+            # Format: row_index:value1,value2,...,value100
+            # Each row represents a rank, columns are percentiles (1-100)
+            for rank in range(topk + 1):
+                percentiles = [str(rank * 10 + p) for p in range(100)]  # Placeholder values
+                f.write(f"{rank}:{','.join(percentiles)}\n")
+
+        if verbose:
+            print(f"GT cmps all table (placeholder) saved to: {gt_cmps_all_table_path}")
 
     # Print final statistics
     if verbose:
@@ -208,8 +274,12 @@ def train_omega_model(csv_path: str, output_dir: str, verbose: bool = False, top
         print(f"  - model.txt")
         print(f"  - threshold_table.txt")
         print(f"  - interval_table.txt")
-        print(f"  - gt_collected_table.txt (placeholder)")
-        print(f"  - gt_cmps_all_table.txt (placeholder)")
+        if gt_cmps_path is not None and os.path.exists(gt_cmps_path):
+            print(f"  - gt_collected_table.txt")
+            print(f"  - gt_cmps_all_table.txt")
+        else:
+            print(f"  - gt_collected_table.txt (placeholder)")
+            print(f"  - gt_cmps_all_table.txt (placeholder)")
 
     return output_dir
 
@@ -244,6 +314,12 @@ def main():
         default=100,
         help="Top-K value used during training (default: 100)"
     )
+    parser.add_argument(
+        "--gt_cmps",
+        type=str,
+        default=None,
+        help="Path to gt_cmps.csv for generating real tables (optional)"
+    )
 
     args = parser.parse_args()
 
@@ -253,7 +329,8 @@ def main():
                 csv_path=args.input,
                 output_dir=args.output,
                 verbose=args.verbose,
-                topk=args.topk
+                topk=args.topk,
+                gt_cmps_path=args.gt_cmps
             )
             print("✓ Training completed successfully")
             sys.exit(0)
diff --git a/src/binding/python/model/param/python_param.cc b/src/binding/python/model/param/python_param.cc
index 97680dc41..c76e9f974 100644
--- a/src/binding/python/model/param/python_param.cc
+++ b/src/binding/python/model/param/python_param.cc
@@ -549,8 +549,9 @@ Constructs an IVFIndexParam instance.
       omega_params(m, "OmegaIndexParam", R"pbdoc(
 Parameters for configuring an OMEGA index.
 
-OMEGA is an advanced graph-based index that can fall back to HNSW when omega
-functionality is disabled. This class encapsulates its construction hyperparameters.
+OMEGA is an advanced graph-based index that uses machine learning to optimize
+search performance. It builds on HNSW and can automatically train a model to
+predict when to stop searching.
 
 Attributes:
     metric_type (MetricType): Distance metric used for similarity computation.
@@ -564,31 +565,52 @@ functionality is disabled. This class encapsulates its construction hyperparamet
     quantize_type (QuantizeType): Optional quantization type for vector
         compression (e.g., FP16, INT8). Default is `QuantizeType.UNDEFINED` to
         disable quantization.
+    min_vector_threshold (int): Minimum number of vectors required to enable
+        OMEGA optimization. Below this threshold, standard HNSW is used.
+        Default is 100000.
+    model_dir (str): Directory path for storing/loading OMEGA models.
+        Default is "./omega_models".
+    num_training_queries (int): Number of training queries to generate for
+        OMEGA model training. Default is 1000.
 
 Examples:
     >>> from zvec.typing import MetricType, QuantizeType
     >>> params = OmegaIndexParam(
-    ...     metric_type=MetricType.COSINE,
+    ...     metric_type=MetricType.L2,
     ...     m=16,
     ...     ef_construction=200,
-    ...     quantize_type=QuantizeType.INT8
+    ...     min_vector_threshold=50000,
+    ...     model_dir="./my_omega_models",
+    ...     num_training_queries=500
     ... )
-    >>> print(params)
-    {'metric_type': 'IP', 'm': 16, 'ef_construction': 200, 'quantize_type': 'INT8'}
+    >>> print(params.num_training_queries)
+    500
 )pbdoc");
   omega_params
-      .def(py::init<MetricType, int, int, QuantizeType>(),
+      .def(py::init<MetricType, int, int, QuantizeType, uint32_t, std::string, size_t>(),
            py::arg("metric_type") = MetricType::IP,
            py::arg("m") = core_interface::kDefaultHnswNeighborCnt,
            py::arg("ef_construction") =
                core_interface::kDefaultHnswEfConstruction,
-           py::arg("quantize_type") = QuantizeType::UNDEFINED)
+           py::arg("quantize_type") = QuantizeType::UNDEFINED,
+           py::arg("min_vector_threshold") = 100000,
+           py::arg("model_dir") = "./omega_models",
+           py::arg("num_training_queries") = 1000)
       .def_property_readonly(
           "m", &OmegaIndexParams::m,
           "int: Maximum number of neighbors per node in upper layers.")
       .def_property_readonly(
           "ef_construction", &OmegaIndexParams::ef_construction,
           "int: Candidate list size during index construction.")
+      .def_property_readonly(
+          "min_vector_threshold", &OmegaIndexParams::min_vector_threshold,
+          "int: Minimum vectors required to enable OMEGA optimization.")
+      .def_property_readonly(
+          "model_dir", &OmegaIndexParams::model_dir,
+          "str: Directory path for OMEGA models.")
+      .def_property_readonly(
+          "num_training_queries", &OmegaIndexParams::num_training_queries,
+          "int: Number of training queries for OMEGA model training.")
       .def(
           "to_dict",
           [](const OmegaIndexParams &self) -> py::dict {
@@ -597,6 +619,9 @@ functionality is disabled. This class encapsulates its construction hyperparamet
             dict["metric_type"] = metric_type_to_string(self.metric_type());
             dict["m"] = self.m();
             dict["ef_construction"] = self.ef_construction();
+            dict["min_vector_threshold"] = self.min_vector_threshold();
+            dict["model_dir"] = self.model_dir();
+            dict["num_training_queries"] = self.num_training_queries();
             dict["quantize_type"] =
                 quantize_type_to_string(self.quantize_type());
             return dict;
@@ -610,20 +635,28 @@ functionality is disabled. This class encapsulates its construction hyperparamet
                     ", \"m\":" + std::to_string(self.m()) +
                     ", \"ef_construction\":" +
                     std::to_string(self.ef_construction()) +
+                    ", \"min_vector_threshold\":" +
+                    std::to_string(self.min_vector_threshold()) +
+                    ", \"model_dir\":\"" + self.model_dir() + "\"" +
+                    ", \"num_training_queries\":" +
+                    std::to_string(self.num_training_queries()) +
                     ", \"quantize_type\":" +
                     quantize_type_to_string(self.quantize_type()) + "}";
            })
       .def(py::pickle(
           [](const OmegaIndexParams &self) {
             return py::make_tuple(self.metric_type(), self.m(),
-                                  self.ef_construction(), self.quantize_type());
+                                  self.ef_construction(), self.quantize_type(),
+                                  self.min_vector_threshold(), self.model_dir(),
+                                  self.num_training_queries());
           },
           [](py::tuple t) {
-            if (t.size() != 4)
+            if (t.size() != 7)
               throw std::runtime_error("Invalid state for OmegaIndexParams");
             return std::make_shared<OmegaIndexParams>(
                 t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
-                t[3].cast<QuantizeType>());
+                t[3].cast<QuantizeType>(), t[4].cast<uint32_t>(),
+                t[5].cast<std::string>(), t[6].cast<size_t>());
           }));
 }
 
diff --git a/src/core/algorithm/omega/omega_context.h b/src/core/algorithm/omega/omega_context.h
index fa053e452..6d0bf7d71 100644
--- a/src/core/algorithm/omega/omega_context.h
+++ b/src/core/algorithm/omega/omega_context.h
@@ -29,12 +29,12 @@ class OmegaContext : public HnswContext {
   //! Constructor
   OmegaContext(size_t dimension, const IndexMetric::Pointer &metric,
                const HnswEntity::Pointer &entity)
-      : HnswContext(dimension, metric, entity), target_recall_(0.95f) {}
+      : HnswContext(dimension, metric, entity), target_recall_(0.95f), training_query_id_(-1) {}
 
   //! Constructor
   OmegaContext(const IndexMetric::Pointer &metric,
                const HnswEntity::Pointer &entity)
-      : HnswContext(metric, entity), target_recall_(0.95f) {}
+      : HnswContext(metric, entity), target_recall_(0.95f), training_query_id_(-1) {}
 
   //! Destructor
   virtual ~OmegaContext() = default;
@@ -44,6 +44,11 @@ class OmegaContext : public HnswContext {
     return target_recall_;
   }
 
+  //! Get training query ID for this query (-1 means not set, use global)
+  int training_query_id() const {
+    return training_query_id_;
+  }
+
   //! Update context parameters (overrides HnswContext::update)
   int update(const ailego::Params &params) override {
     // First call parent to update HNSW parameters
@@ -56,12 +61,16 @@ class OmegaContext : public HnswContext {
     if (params.has(PARAM_OMEGA_SEARCHER_TARGET_RECALL)) {
       params.get(PARAM_OMEGA_SEARCHER_TARGET_RECALL, &target_recall_);
     }
+    if (params.has(PARAM_OMEGA_SEARCHER_TRAINING_QUERY_ID)) {
+      params.get(PARAM_OMEGA_SEARCHER_TRAINING_QUERY_ID, &training_query_id_);
+    }
 
     return 0;
   }
 
  private:
   float target_recall_;  // Per-query target recall
+  int training_query_id_;  // Per-query training query ID for parallel training
 };
 
 }  // namespace core
diff --git a/src/core/algorithm/omega/omega_params.h b/src/core/algorithm/omega/omega_params.h
index c94fb87f4..1ad91cc84 100644
--- a/src/core/algorithm/omega/omega_params.h
+++ b/src/core/algorithm/omega/omega_params.h
@@ -22,6 +22,10 @@ namespace zvec::core {
 static const std::string PARAM_OMEGA_SEARCHER_TARGET_RECALL(
     "proxima.omega.searcher.target_recall");
 
+// Training query ID for parallel training searches
+static const std::string PARAM_OMEGA_SEARCHER_TRAINING_QUERY_ID(
+    "proxima.omega.searcher.training_query_id");
+
 // OMEGA streamer parameters (used at index time)
 static const std::string PARAM_OMEGA_STREAMER_TARGET_RECALL(
     "proxima.omega.streamer.target_recall");
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 28185a65f..b52d53c22 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -18,6 +18,8 @@
 #include <zvec/core/framework/index_logger.h>
 #include "../hnsw/hnsw_entity.h"
 #include "../hnsw/hnsw_context.h"
+#include "omega_context.h"
+#include "omega_params.h"
 #include <omega/omega_api.h>
 #include <omega/search_context.h>
 
@@ -41,7 +43,15 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
     return HnswStreamer::search_impl(query, qmeta, count, context);
   }
 
-  LOG_INFO("OmegaStreamer: training mode enabled (query_id=%d), using OMEGA library to collect features", current_query_id_);
+  // Cast context to OmegaContext to access training_query_id
+  auto *omega_ctx = dynamic_cast<OmegaContext*>(context.get());
+  int query_id = current_query_id_;  // Default to member variable
+  if (omega_ctx != nullptr && omega_ctx->training_query_id() >= 0) {
+    // Use training_query_id from context (for parallel training searches)
+    query_id = omega_ctx->training_query_id();
+  }
+
+  LOG_INFO("OmegaStreamer: training mode enabled (query_id=%d), using OMEGA library to collect features", query_id);
 
   // Training mode: Use OMEGA library with nullptr model (training-only mode)
   // The OMEGA library will collect training features automatically
@@ -58,8 +68,8 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   }
 
   // Enable training mode (CRITICAL: must be before search)
-  omega_search_enable_training(omega_search, current_query_id_);
-  LOG_DEBUG("Training mode enabled for query_id=%d", current_query_id_);
+  omega_search_enable_training(omega_search, query_id);
+  LOG_DEBUG("Training mode enabled for query_id=%d", query_id);
 
   // Cast context to HnswContext to access HNSW-specific features
   auto *hnsw_ctx = dynamic_cast<HnswContext*>(context.get());
@@ -276,9 +286,9 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
     }
 
     LOG_DEBUG("Collected %zu training records for query_id=%d",
-              record_count, current_query_id_);
+              record_count, query_id);
   } else {
-    LOG_WARN("No training records collected for query_id=%d", current_query_id_);
+    LOG_WARN("No training records collected for query_id=%d", query_id);
   }
 
   // Destroy OMEGA search context
@@ -287,6 +297,46 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   return 0;
 }
 
+IndexStreamer::Context::Pointer OmegaStreamer::create_context(void) const {
+  if (ailego_unlikely(state_ != STATE_OPENED)) {
+    LOG_ERROR("Create OmegaContext failed, open storage first!");
+    return Context::Pointer();
+  }
+
+  HnswEntity::Pointer entity = entity_.clone();
+  if (ailego_unlikely(!entity)) {
+    LOG_ERROR("OmegaContext clone entity failed");
+    return Context::Pointer();
+  }
+
+  // Create OmegaContext instead of HnswContext for OMEGA-specific features
+  OmegaContext *ctx =
+      new (std::nothrow) OmegaContext(meta_.dimension(), metric_, entity);
+  if (ailego_unlikely(ctx == nullptr)) {
+    LOG_ERROR("Failed to new OmegaContext");
+    return Context::Pointer();
+  }
+
+  // Copy all HNSW settings from parent
+  ctx->set_ef(ef_);
+  ctx->set_max_scan_limit(max_scan_limit_);
+  ctx->set_min_scan_limit(min_scan_limit_);
+  ctx->set_max_scan_ratio(max_scan_ratio_);
+  ctx->set_filter_mode(bf_enabled_ ? VisitFilter::BloomFilter
+                                   : VisitFilter::ByteMap);
+  ctx->set_filter_negative_probility(bf_negative_prob_);
+  ctx->set_magic(magic_);
+  ctx->set_force_padding_topk(force_padding_topk_enabled_);
+  ctx->set_bruteforce_threshold(bruteforce_threshold_);
+
+  if (ailego_unlikely(ctx->init(HnswContext::kStreamerContext)) != 0) {
+    LOG_ERROR("Init OmegaContext failed");
+    delete ctx;
+    return Context::Pointer();
+  }
+  return Context::Pointer(ctx);
+}
+
 int OmegaStreamer::dump(const IndexDumper::Pointer &dumper) {
   LOG_INFO("OmegaStreamer dump");
 
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index 2ebe6ac85..1a980dfd3 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -14,6 +14,7 @@
 #pragma once
 
 #include "../hnsw/hnsw_streamer.h"
+#include "omega_context.h"
 #include <zvec/core/interface/training.h>
 #include <mutex>
 #include <memory>
@@ -65,6 +66,11 @@ class OmegaStreamer : public HnswStreamer {
                           uint32_t count,
                           Context::Pointer &context) const override;
 
+  /**
+   * @brief Override create_context to return OmegaContext
+   */
+  virtual Context::Pointer create_context() const override;
+
   /**
    * @brief Override dump to set "OmegaSearcher" instead of "HnswSearcher"
    */
diff --git a/src/core/interface/indexes/omega_index.cc b/src/core/interface/indexes/omega_index.cc
index 1d6689e66..7078806f1 100644
--- a/src/core/interface/indexes/omega_index.cc
+++ b/src/core/interface/indexes/omega_index.cc
@@ -128,13 +128,30 @@ int OmegaIndex::_prepare_for_search(
     return ret;
   }
 
+  ailego::Params params;
+
   // Extract OMEGA-specific parameter (target_recall)
   const auto &omega_search_param =
       std::dynamic_pointer_cast<OmegaQueryParam>(search_param);
   if (omega_search_param) {
-    ailego::Params params;
     params.set(core::PARAM_OMEGA_SEARCHER_TARGET_RECALL,
                omega_search_param->target_recall);
+    // Pass training_query_id for parallel training searches
+    if (omega_search_param->training_query_id >= 0) {
+      params.set(core::PARAM_OMEGA_SEARCHER_TRAINING_QUERY_ID,
+                 omega_search_param->training_query_id);
+    }
+  } else {
+    // Fallback: try HNSW params for training_query_id
+    const auto &hnsw_search_param =
+        std::dynamic_pointer_cast<HNSWQueryParam>(search_param);
+    if (hnsw_search_param && hnsw_search_param->training_query_id >= 0) {
+      params.set(core::PARAM_OMEGA_SEARCHER_TRAINING_QUERY_ID,
+                 hnsw_search_param->training_query_id);
+    }
+  }
+
+  if (!params.empty()) {
     context->update(params);
   }
 
diff --git a/src/db/index/column/vector_column/engine_helper.hpp b/src/db/index/column/vector_column/engine_helper.hpp
index dad423d03..40438b0bc 100644
--- a/src/db/index/column/vector_column/engine_helper.hpp
+++ b/src/db/index/column/vector_column/engine_helper.hpp
@@ -158,6 +158,7 @@ class ProximaEngineHelper {
           auto db_hnsw_query_params = dynamic_cast<const HnswQueryParams *>(
               query_params.query_params.get());
           hnsw_query_param->ef_search = db_hnsw_query_params->ef();
+          hnsw_query_param->training_query_id = db_hnsw_query_params->training_query_id();
         }
         return std::move(hnsw_query_param);
       }
@@ -179,10 +180,12 @@ class ProximaEngineHelper {
               query_params.query_params.get())) {
             omega_query_param->ef_search = db_omega_query_params->ef();
             omega_query_param->target_recall = db_omega_query_params->target_recall();
+            omega_query_param->training_query_id = db_omega_query_params->training_query_id();
           } else if (auto* db_hnsw_query_params = dynamic_cast<const HnswQueryParams *>(
               query_params.query_params.get())) {
             // Fallback to HnswQueryParams (backward compatibility)
             omega_query_param->ef_search = db_hnsw_query_params->ef();
+            omega_query_param->training_query_id = db_hnsw_query_params->training_query_id();
             // target_recall will use default value (0.95f)
           }
         }
@@ -382,9 +385,13 @@ class ProximaEngineHelper {
                 static_cast<int>(core_interface::IndexType::kOMEGA));
         fflush(stderr);
 
-        // TODO: Store OMEGA-specific params (min_vector_threshold, model_dir)
-        // in the params field for now
-        // These will be used by the IndexFlow when creating the OmegaSearcher
+        // Store OMEGA-specific params in the params field
+        // These will be used by OmegaSearcher::init()
+        hnsw_param->params.insert("omega.enabled", true);
+        hnsw_param->params.insert("omega.min_vector_threshold",
+                                  db_index_params->min_vector_threshold());
+        hnsw_param->params.insert("omega.model_dir",
+                                  db_index_params->model_dir());
 
         return hnsw_param;
       }
diff --git a/src/db/index/common/proto_converter.cc b/src/db/index/common/proto_converter.cc
index 520eacc22..3bfac9cd3 100644
--- a/src/db/index/common/proto_converter.cc
+++ b/src/db/index/common/proto_converter.cc
@@ -81,7 +81,9 @@ OmegaIndexParams::OPtr ProtoConverter::FromPb(
   auto params = std::make_shared<OmegaIndexParams>(
       MetricTypeCodeBook::Get(params_pb.base().metric_type()), params_pb.m(),
       params_pb.ef_construction(),
-      QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()));
+      QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()),
+      params_pb.min_vector_threshold(),
+      params_pb.model_dir());
 
   return params;
 }
@@ -94,6 +96,8 @@ proto::OmegaIndexParams ProtoConverter::ToPb(const OmegaIndexParams *params) {
       QuantizeTypeCodeBook::Get(params->quantize_type()));
   params_pb.set_ef_construction(params->ef_construction());
   params_pb.set_m(params->m());
+  params_pb.set_min_vector_threshold(params->min_vector_threshold());
+  params_pb.set_model_dir(params->model_dir());
   return params_pb;
 }
 
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index e876b390a..35e3276f9 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -1644,24 +1644,34 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
     if (!reopen_status.ok()) {
       LOG_WARN("Failed to reopen indexer for training: %s", reopen_status.message().c_str());
     } else {
+      // Get training params from index params
+      size_t num_training_queries = 1000;  // default
+      int ef_training = 1000;  // default
+      if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field.index_params())) {
+        num_training_queries = omega_params->num_training_queries();
+        ef_training = omega_params->ef_construction();  // Use ef_construction for training
+        LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d",
+                 num_training_queries, ef_training);
+      }
+
       // Collect training data
       TrainingDataCollectorOptions collector_opts;
       size_t doc_count = training_indexer->doc_count();
-      collector_opts.num_training_queries = std::min(doc_count, size_t(1000));
-      collector_opts.ef_training = 1000;  // Large ef for recall ≈ 1
+      collector_opts.num_training_queries = std::min(doc_count, num_training_queries);
+      collector_opts.ef_training = ef_training;
       collector_opts.topk = 100;
       collector_opts.k_train = 1;  // Label=1 when top-1 GT found
 
       std::vector<VectorColumnIndexer::Ptr> training_indexers = {training_indexer};
 
-      auto training_result = TrainingDataCollector::CollectTrainingData(
+      auto training_result = TrainingDataCollector::CollectTrainingDataWithGtCmps(
           shared_from_this(), column, collector_opts, training_indexers);
 
       if (training_result.has_value()) {
-        auto& records = training_result.value();
-        LOG_INFO("Collected %zu training records", records.size());
+        auto& result = training_result.value();
+        LOG_INFO("Collected %zu training records", result.records.size());
 
-        if (records.size() >= 100) {
+        if (result.records.size() >= 100) {
           // Train the model
           OmegaModelTrainerOptions trainer_opts;
           trainer_opts.output_dir = model_output_dir;
@@ -1674,14 +1684,15 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
             }
           }
 
-          auto train_status = OmegaModelTrainer::TrainModel(records, trainer_opts);
+          auto train_status = OmegaModelTrainer::TrainModelWithGtCmps(
+              result.records, result.gt_cmps_data, trainer_opts);
           if (train_status.ok()) {
             LOG_INFO("OMEGA model training completed successfully: %s", trainer_opts.output_dir.c_str());
           } else {
             LOG_WARN("OMEGA model training failed: %s", train_status.message().c_str());
           }
         } else {
-          LOG_INFO("Skipping model training: only %zu records collected (need >= 100)", records.size());
+          LOG_INFO("Skipping model training: only %zu records collected (need >= 100)", result.records.size());
         }
       } else {
         LOG_WARN("Failed to collect training data: %s", training_result.error().message().c_str());
@@ -2300,15 +2311,28 @@ Status SegmentImpl::auto_train_omega_index_internal(
   LOG_INFO("Starting auto-training for OMEGA index on field '%s' in segment %d",
            field_name.c_str(), id());
 
+  // Get training params from index params
+  size_t num_training_queries = 1000;  // default
+  int ef_training = 1000;  // default
+  auto field = collection_schema_->get_field(field_name);
+  if (field && field->index_params()) {
+    if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field->index_params())) {
+      num_training_queries = omega_params->num_training_queries();
+      ef_training = omega_params->ef_construction();  // Use ef_construction for training
+      LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d",
+               num_training_queries, ef_training);
+    }
+  }
+
   // Step 1: Collect training data using the provided indexers
   TrainingDataCollectorOptions collector_options;
-  collector_options.num_training_queries = 1000;  // TODO: Make configurable
-  collector_options.ef_training = 1000;           // Large ef for recall ≈ 1
+  collector_options.num_training_queries = num_training_queries;
+  collector_options.ef_training = ef_training;
   collector_options.topk = 100;
   collector_options.noise_scale = 0.01f;
 
 
-  auto training_records_result = TrainingDataCollector::CollectTrainingData(
+  auto training_records_result = TrainingDataCollector::CollectTrainingDataWithGtCmps(
       shared_from_this(), field_name, collector_options, indexers);
 
   if (!training_records_result.has_value()) {
@@ -2317,7 +2341,8 @@ Status SegmentImpl::auto_train_omega_index_internal(
         training_records_result.error().message());
   }
 
-  auto& training_records = training_records_result.value();
+  auto& training_result = training_records_result.value();
+  auto& training_records = training_result.records;
   LOG_INFO("Collected %zu training records for segment %d",
            training_records.size(), id());
 
@@ -2353,7 +2378,7 @@ Status SegmentImpl::auto_train_omega_index_internal(
   LOG_INFO("Training data stats: %zu positive, %zu negative samples",
            positive_count, negative_count);
 
-  // Step 2: Train OMEGA model
+  // Step 2: Train OMEGA model with gt_cmps data
   OmegaModelTrainerOptions trainer_options;
   trainer_options.output_dir = FileHelper::MakeSegmentPath(path_, id()) + "/omega_model";
   trainer_options.verbose = true;
@@ -2367,7 +2392,8 @@ Status SegmentImpl::auto_train_omega_index_internal(
     }
   }
 
-  auto train_status = OmegaModelTrainer::TrainModel(training_records, trainer_options);
+  auto train_status = OmegaModelTrainer::TrainModelWithGtCmps(
+      training_records, training_result.gt_cmps_data, trainer_options);
   if (!train_status.ok()) {
     return Status::InternalError(
         "Failed to train OMEGA model: " + train_status.message());
diff --git a/src/db/proto/zvec.proto b/src/db/proto/zvec.proto
index 0b95a2252..865099b3c 100644
--- a/src/db/proto/zvec.proto
+++ b/src/db/proto/zvec.proto
@@ -104,7 +104,8 @@ message OmegaIndexParams {
   BaseIndexParams base = 1;
   int32 m = 2;
   int32 ef_construction = 3;
-  // TODO: Add OMEGA-specific params like min_vector_threshold, model_dir
+  uint32 min_vector_threshold = 4;
+  string model_dir = 5;
 }
 
 message IndexParams {
diff --git a/src/db/training/omega_model_trainer.cc b/src/db/training/omega_model_trainer.cc
index 300390308..7cf754e64 100644
--- a/src/db/training/omega_model_trainer.cc
+++ b/src/db/training/omega_model_trainer.cc
@@ -53,6 +53,49 @@ Status OmegaModelTrainer::TrainModel(
   return Status::OK();
 }
 
+Status OmegaModelTrainer::TrainModelWithGtCmps(
+    const std::vector<core_interface::TrainingRecord>& training_records,
+    const core_interface::GtCmpsData& gt_cmps_data,
+    const OmegaModelTrainerOptions& options) {
+  if (training_records.empty()) {
+    return Status::InvalidArgument("Training records are empty");
+  }
+
+  if (options.output_dir.empty()) {
+    return Status::InvalidArgument("Output directory is empty");
+  }
+
+  // Step 1: Export training records to CSV
+  std::string csv_path = options.output_dir + "/training_data.csv";
+  LOG_INFO("Exporting %zu training records to CSV: %s",
+           training_records.size(), csv_path.c_str());
+
+  auto status = ExportToCSV(training_records, csv_path);
+  if (!status.ok()) {
+    return status;
+  }
+
+  // Step 2: Export gt_cmps data to CSV
+  std::string gt_cmps_path = options.output_dir + "/gt_cmps.csv";
+  LOG_INFO("Exporting gt_cmps data to CSV: %s", gt_cmps_path.c_str());
+
+  status = ExportGtCmpsToCSV(gt_cmps_data, gt_cmps_path);
+  if (!status.ok()) {
+    return status;
+  }
+
+  // Step 3: Invoke Python training script with gt_cmps
+  LOG_INFO("Invoking Python training script with gt_cmps");
+  status = InvokePythonTrainer(csv_path, options, gt_cmps_path);
+  if (!status.ok()) {
+    return status;
+  }
+
+  LOG_INFO("Successfully trained OMEGA model with gt_cmps, output: %s",
+           options.output_dir.c_str());
+  return Status::OK();
+}
+
 Status OmegaModelTrainer::ExportToCSV(
     const std::vector<core_interface::TrainingRecord>& records,
     const std::string& csv_path) {
@@ -94,9 +137,40 @@ Status OmegaModelTrainer::ExportToCSV(
   return Status::OK();
 }
 
+Status OmegaModelTrainer::ExportGtCmpsToCSV(
+    const core_interface::GtCmpsData& gt_cmps_data,
+    const std::string& csv_path) {
+  std::ofstream csv_file(csv_path);
+  if (!csv_file.is_open()) {
+    return Status::InternalError("Failed to open gt_cmps CSV file for writing: " + csv_path);
+  }
+
+  // Write CSV header
+  csv_file << "query_id,rank,cmps\n";
+
+  // Write gt_cmps data
+  for (size_t query_id = 0; query_id < gt_cmps_data.gt_cmps.size(); ++query_id) {
+    const auto& cmps_per_rank = gt_cmps_data.gt_cmps[query_id];
+    for (size_t rank = 0; rank < cmps_per_rank.size(); ++rank) {
+      csv_file << query_id << "," << rank << "," << cmps_per_rank[rank] << "\n";
+    }
+  }
+
+  csv_file.close();
+
+  if (!csv_file.good()) {
+    return Status::InternalError("Error writing gt_cmps CSV file: " + csv_path);
+  }
+
+  LOG_INFO("Successfully exported gt_cmps for %zu queries to CSV",
+           gt_cmps_data.num_queries);
+  return Status::OK();
+}
+
 Status OmegaModelTrainer::InvokePythonTrainer(
     const std::string& csv_path,
-    const OmegaModelTrainerOptions& options) {
+    const OmegaModelTrainerOptions& options,
+    const std::string& gt_cmps_path) {
   // Build Python command
   std::ostringstream cmd;
   cmd << options.python_executable
@@ -104,6 +178,11 @@ Status OmegaModelTrainer::InvokePythonTrainer(
       << " --input " << csv_path
       << " --output " << options.output_dir;
 
+  // Add gt_cmps path if provided
+  if (!gt_cmps_path.empty()) {
+    cmd << " --gt_cmps " << gt_cmps_path;
+  }
+
   if (options.verbose) {
     cmd << " --verbose";
   }
diff --git a/src/db/training/omega_model_trainer.h b/src/db/training/omega_model_trainer.h
index 97b54713a..14b288b5c 100644
--- a/src/db/training/omega_model_trainer.h
+++ b/src/db/training/omega_model_trainer.h
@@ -55,6 +55,22 @@ class OmegaModelTrainer {
       const std::vector<core_interface::TrainingRecord>& training_records,
       const OmegaModelTrainerOptions& options);
 
+  /**
+   * @brief Train OMEGA model with gt_cmps data for table generation
+   *
+   * This is the extended version that also exports gt_cmps data for
+   * generating gt_collected_table and gt_cmps_all_table.
+   *
+   * @param training_records Training data collected from searches
+   * @param gt_cmps_data Ground truth cmps data for table generation
+   * @param options Training configuration
+   * @return Status indicating success or failure
+   */
+  static Status TrainModelWithGtCmps(
+      const std::vector<core_interface::TrainingRecord>& training_records,
+      const core_interface::GtCmpsData& gt_cmps_data,
+      const OmegaModelTrainerOptions& options);
+
  private:
   /**
    * @brief Export training records to CSV format
@@ -71,6 +87,20 @@ class OmegaModelTrainer {
       const std::vector<core_interface::TrainingRecord>& records,
       const std::string& csv_path);
 
+  /**
+   * @brief Export gt_cmps data to CSV format
+   *
+   * CSV format:
+   * query_id,rank,cmps
+   *
+   * @param gt_cmps_data Ground truth cmps data
+   * @param csv_path Output CSV file path
+   * @return Status indicating success or failure
+   */
+  static Status ExportGtCmpsToCSV(
+      const core_interface::GtCmpsData& gt_cmps_data,
+      const std::string& csv_path);
+
   /**
    * @brief Invoke Python training script
    *
@@ -79,11 +109,13 @@ class OmegaModelTrainer {
    *
    * @param csv_path Input CSV file path
    * @param options Training configuration
+   * @param gt_cmps_path Optional path to gt_cmps CSV file
    * @return Status indicating success or failure
    */
   static Status InvokePythonTrainer(
       const std::string& csv_path,
-      const OmegaModelTrainerOptions& options);
+      const OmegaModelTrainerOptions& options,
+      const std::string& gt_cmps_path = "");
 };
 
 }  // namespace zvec
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index e0fd3328f..1ea4ab8ad 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -15,6 +15,13 @@
 #include "training_data_collector.h"
 #include <algorithm>
 #include <unordered_set>
+#include <chrono>
+#include <fstream>
+#include <iomanip>
+#include <thread>
+#include <future>
+#include <mutex>
+#include <atomic>
 #include <zvec/ailego/logger/logger.h>
 #include <zvec/db/query_params.h>
 #include "db/index/column/vector_column/vector_column_params.h"
@@ -22,6 +29,44 @@
 
 namespace zvec {
 
+// ============ DEBUG TIMING UTILITIES ============
+namespace {
+static std::ofstream& GetDebugLog() {
+  static std::ofstream log_file("/tmp/omega_training_debug.log", std::ios::app);
+  return log_file;
+}
+
+static void DebugLog(const std::string& msg) {
+  auto now = std::chrono::system_clock::now();
+  auto time_t_now = std::chrono::system_clock::to_time_t(now);
+  auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+      now.time_since_epoch()) % 1000;
+
+  auto& log = GetDebugLog();
+  log << std::put_time(std::localtime(&time_t_now), "%Y-%m-%d %H:%M:%S")
+      << "." << std::setfill('0') << std::setw(3) << ms.count()
+      << " | " << msg << std::endl;
+  log.flush();
+}
+
+class ScopedTimer {
+ public:
+  ScopedTimer(const std::string& name) : name_(name) {
+    start_ = std::chrono::high_resolution_clock::now();
+    DebugLog("[START] " + name_);
+  }
+  ~ScopedTimer() {
+    auto end = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start_).count();
+    DebugLog("[END]   " + name_ + " | Duration: " + std::to_string(duration) + " ms");
+  }
+ private:
+  std::string name_;
+  std::chrono::high_resolution_clock::time_point start_;
+};
+}  // namespace
+// ============ END DEBUG TIMING UTILITIES ============
+
 Result<std::vector<core_interface::TrainingRecord>>
 TrainingDataCollector::CollectTrainingData(
     const Segment::Ptr& segment,
@@ -47,7 +92,7 @@ TrainingDataCollector::CollectTrainingData(
            options.topk);
 
   auto ground_truth = ComputeGroundTruth(
-      segment, field_name, training_queries, options.topk);
+      segment, field_name, training_queries, options.topk, options.num_threads);
 
   if (ground_truth.empty()) {
     return tl::make_unexpected(
@@ -84,62 +129,97 @@ TrainingDataCollector::CollectTrainingData(
   }
 
   // Step 5: Perform searches with large ef and collect training records
-  LOG_INFO("Performing training searches with ef=%d", options.ef_training);
+  LOG_INFO("Performing training searches with ef=%d (parallel)", options.ef_training);
 
   std::vector<std::vector<uint64_t>> search_results;
-  search_results.reserve(training_queries.size());
-
-  for (size_t query_idx = 0; query_idx < training_queries.size(); ++query_idx) {
-    const auto& query_vector = training_queries[query_idx];
-
-    // Set query ID for this query
-    for (auto& indexer : indexers) {
-      indexer->SetCurrentQueryId(static_cast<int>(query_idx));
-    }
 
-    // Prepare query parameters
-    vector_column_params::VectorData vector_data;
-    vector_data.vector = vector_column_params::DenseVector{
-        .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))
-    };
+  // Determine thread count
+  size_t actual_threads = options.num_threads;
+  if (actual_threads == 0) {
+    actual_threads = std::thread::hardware_concurrency();
+  }
+  actual_threads = std::min(actual_threads, training_queries.size());
+
+  // Pre-allocate search_results for thread-safe access
+  search_results.resize(training_queries.size());
+
+  std::atomic<size_t> completed_searches{0};
+  std::mutex progress_mutex;
+  auto search_start = std::chrono::high_resolution_clock::now();
+
+  // Worker function for a range of queries
+  auto worker = [&](size_t start_idx, size_t end_idx) {
+    for (size_t query_idx = start_idx; query_idx < end_idx; ++query_idx) {
+      const auto& query_vector = training_queries[query_idx];
+
+      // Prepare query parameters
+      vector_column_params::VectorData vector_data;
+      vector_data.vector = vector_column_params::DenseVector{
+          .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))
+      };
+
+      vector_column_params::QueryParams query_params;
+      query_params.topk = options.topk;
+      query_params.fetch_vector = false;
+      query_params.filter = segment->get_filter().get();
+
+      // Create OmegaQueryParams with training_query_id for parallel search
+      auto omega_params = std::make_shared<OmegaQueryParams>();
+      omega_params->set_ef(options.ef_training);
+      omega_params->set_training_query_id(static_cast<int>(query_idx));
+      query_params.query_params = omega_params;
+
+      if (indexers.size() != 1) {
+        if (query_idx == start_idx) {
+          LOG_WARN("Expected 1 indexer but found %zu, using first one only", indexers.size());
+        }
+      }
 
-    vector_column_params::QueryParams query_params;
-    query_params.topk = options.topk;
-    query_params.fetch_vector = false;
-    query_params.filter = segment->get_filter().get();
+      auto search_result = indexers[0]->Search(vector_data, query_params);
+      if (!search_result.has_value()) {
+        LOG_WARN("Search failed for query %zu: %s", query_idx,
+                 search_result.error().message().c_str());
+        ++completed_searches;
+        continue;
+      }
 
-    // Create HNSW query params with large ef
-    auto hnsw_params = std::make_shared<HnswQueryParams>();
-    hnsw_params->set_ef(options.ef_training);
-    query_params.query_params = hnsw_params;
+      // Extract result doc IDs
+      auto& results = search_result.value();
+      std::vector<uint64_t> result_ids;
+      result_ids.reserve(results->count());
+      auto iter = results->create_iterator();
+      while (iter->valid()) {
+        result_ids.push_back(iter->doc_id());
+        iter->next();
+      }
 
-    // Perform search directly on the indexer (assumes single indexer, which is true for just-merged case)
-    // For multiple indexers, we would need to merge results
-    if (indexers.size() != 1) {
-      LOG_WARN("Expected 1 indexer but found %zu, using first one only", indexers.size());
+      search_results[query_idx] = std::move(result_ids);
+      ++completed_searches;
     }
+  };
 
-    auto search_result = indexers[0]->Search(vector_data, query_params);
-    if (!search_result.has_value()) {
-      LOG_WARN("Search failed for query %zu: %s", query_idx,
-               search_result.error().message().c_str());
-      search_results.push_back({});
-      continue;
-    }
+  // Launch threads
+  std::vector<std::thread> threads;
+  size_t queries_per_thread = (training_queries.size() + actual_threads - 1) / actual_threads;
 
-    // Extract result doc IDs
-    auto& results = search_result.value();
-    std::vector<uint64_t> result_ids;
-    result_ids.reserve(results->count());
-    auto iter = results->create_iterator();
-    while (iter->valid()) {
-      result_ids.push_back(iter->doc_id());
-      iter->next();
+  for (size_t t = 0; t < actual_threads; ++t) {
+    size_t start_idx = t * queries_per_thread;
+    size_t end_idx = std::min(start_idx + queries_per_thread, training_queries.size());
+    if (start_idx < end_idx) {
+      threads.emplace_back(worker, start_idx, end_idx);
     }
+  }
 
-    search_results.push_back(std::move(result_ids));
+  // Wait for all threads
+  for (auto& thread : threads) {
+    thread.join();
   }
 
+  auto search_end = std::chrono::high_resolution_clock::now();
+  auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(search_end - search_start).count();
+  LOG_INFO("Training searches completed in %zu ms (%zu threads)",
+           total_ms, actual_threads);
+
   // Step 6: Collect training records from all indexers
   LOG_INFO("Collecting training records from indexers");
 
@@ -174,9 +254,9 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
     const Segment::Ptr& segment,
     const std::string& field_name,
     const std::vector<std::vector<float>>& queries,
-    size_t topk) {
-  std::vector<std::vector<uint64_t>> ground_truth;
-  ground_truth.reserve(queries.size());
+    size_t topk,
+    size_t num_threads) {
+  std::vector<std::vector<uint64_t>> ground_truth(queries.size());
 
   // Get vector indexer (use brute force with is_linear=true)
   auto combined_indexer = segment->get_combined_vector_indexer(field_name);
@@ -185,53 +265,98 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
     return ground_truth;
   }
 
-  // Perform brute force search for each query
-  for (size_t query_idx = 0; query_idx < queries.size(); ++query_idx) {
-    const auto& query_vector = queries[query_idx];
-
-    // Prepare query parameters for brute force search
-    vector_column_params::VectorData vector_data;
-    vector_data.vector = vector_column_params::DenseVector{
-        .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))
-    };
+  // Determine thread count
+  size_t actual_threads = num_threads;
+  if (actual_threads == 0) {
+    actual_threads = std::thread::hardware_concurrency();
+  }
+  actual_threads = std::min(actual_threads, queries.size());
+
+  DebugLog("[ComputeGroundTruth] Starting PARALLEL brute force search for " +
+           std::to_string(queries.size()) + " queries, topk=" + std::to_string(topk) +
+           ", threads=" + std::to_string(actual_threads));
+
+  auto loop_start = std::chrono::high_resolution_clock::now();
+  std::atomic<size_t> completed_queries{0};
+  std::mutex log_mutex;
+
+  // Worker function for a range of queries
+  auto worker = [&](size_t start_idx, size_t end_idx) {
+    for (size_t query_idx = start_idx; query_idx < end_idx; ++query_idx) {
+      const auto& query_vector = queries[query_idx];
+
+      // Prepare query parameters for brute force search
+      vector_column_params::VectorData vector_data;
+      vector_data.vector = vector_column_params::DenseVector{
+          .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))
+      };
+
+      vector_column_params::QueryParams query_params;
+      query_params.topk = topk;
+      query_params.fetch_vector = false;
+      query_params.filter = segment->get_filter().get();
+
+      // Use linear search (brute force) for ground truth
+      auto base_params = std::make_shared<HnswQueryParams>(topk);
+      base_params->set_is_linear(true);
+      query_params.query_params = base_params;
+
+      // Perform search
+      auto search_result = combined_indexer->Search(vector_data, query_params);
+      if (!search_result.has_value()) {
+        LOG_WARN("Ground truth search failed for query %zu: %s",
+                 query_idx, search_result.error().message().c_str());
+        continue;
+      }
 
-    vector_column_params::QueryParams query_params;
-    query_params.topk = topk;
-    query_params.fetch_vector = false;
-    query_params.filter = segment->get_filter().get();
-
-    // Use linear search (brute force) for ground truth
-    auto base_params = std::make_shared<HnswQueryParams>(topk);
-    base_params->set_is_linear(true);
-    query_params.query_params = base_params;
-
-    // Perform search
-    auto search_result = combined_indexer->Search(vector_data, query_params);
-    if (!search_result.has_value()) {
-      LOG_WARN("Ground truth search failed for query %zu: %s",
-               query_idx, search_result.error().message().c_str());
-      ground_truth.push_back({});
-      continue;
+      // Extract result doc IDs
+      auto& results = search_result.value();
+      std::vector<uint64_t> gt_ids;
+      gt_ids.reserve(results->count());
+      auto iter = results->create_iterator();
+      while (iter->valid()) {
+        gt_ids.push_back(iter->doc_id());
+        iter->next();
+      }
+      ground_truth[query_idx] = std::move(gt_ids);
+
+      // Update progress
+      size_t completed = ++completed_queries;
+      if (completed % 100 == 0 || completed == queries.size()) {
+        std::lock_guard<std::mutex> lock(log_mutex);
+        auto now = std::chrono::high_resolution_clock::now();
+        auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now - loop_start).count();
+        DebugLog("[ComputeGroundTruth] Progress: " + std::to_string(completed) + "/" +
+                 std::to_string(queries.size()) + ", elapsed: " + std::to_string(elapsed_ms) + " ms");
+      }
     }
+  };
 
-    // Extract result doc IDs
-    auto& results = search_result.value();
-    std::vector<uint64_t> gt_ids;
-    gt_ids.reserve(results->count());
-    auto iter = results->create_iterator();
-    while (iter->valid()) {
-      gt_ids.push_back(iter->doc_id());
-      iter->next();
-    }
-    ground_truth.push_back(std::move(gt_ids));
+  // Launch threads
+  std::vector<std::thread> threads;
+  size_t queries_per_thread = (queries.size() + actual_threads - 1) / actual_threads;
 
-    if ((query_idx + 1) % 100 == 0) {
-      LOG_INFO("Computed ground truth for %zu/%zu queries",
-               query_idx + 1, queries.size());
+  for (size_t t = 0; t < actual_threads; ++t) {
+    size_t start_idx = t * queries_per_thread;
+    size_t end_idx = std::min(start_idx + queries_per_thread, queries.size());
+    if (start_idx < end_idx) {
+      threads.emplace_back(worker, start_idx, end_idx);
     }
   }
 
-  LOG_INFO("Computed ground truth for %zu queries", queries.size());
+  // Wait for all threads
+  for (auto& thread : threads) {
+    thread.join();
+  }
+
+  auto loop_end = std::chrono::high_resolution_clock::now();
+  auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(loop_end - loop_start).count();
+  DebugLog("[ComputeGroundTruth] Completed " + std::to_string(queries.size()) +
+           " queries in " + std::to_string(total_ms) + " ms (" +
+           std::to_string(actual_threads) + " threads)");
+
+  LOG_INFO("Computed ground truth for %zu queries in %zu ms (%zu threads)",
+           queries.size(), total_ms, actual_threads);
   return ground_truth;
 }
 
@@ -309,4 +434,326 @@ void TrainingDataCollector::FillLabels(
            labeled_count, records->size(), positive_count, negative_count, k_train);
 }
 
+core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
+    const std::vector<core_interface::TrainingRecord>& records,
+    const std::vector<std::vector<uint64_t>>& ground_truth,
+    size_t topk) {
+  core_interface::GtCmpsData result;
+  result.topk = topk;
+  result.num_queries = ground_truth.size();
+
+  if (records.empty() || ground_truth.empty()) {
+    LOG_WARN("Empty records or ground_truth in ComputeGtCmps");
+    return result;
+  }
+
+  // Initialize gt_cmps with -1 (not found)
+  result.gt_cmps.resize(ground_truth.size());
+  result.total_cmps.resize(ground_truth.size(), 0);
+
+  for (size_t q = 0; q < ground_truth.size(); ++q) {
+    result.gt_cmps[q].resize(topk, -1);
+  }
+
+  // Group records by query_id and find when each GT was first collected
+  // Records are ordered by (query_id, cmps_visited)
+  int current_query = -1;
+  int max_cmps_for_query = 0;
+  std::unordered_set<uint64_t> gt_set;
+  const std::vector<uint64_t>* current_gt = nullptr;
+
+  for (const auto& record : records) {
+    int query_id = record.query_id;
+
+    // Validate query_id
+    if (query_id < 0 || query_id >= static_cast<int>(ground_truth.size())) {
+      continue;
+    }
+
+    // Track max cmps for this query
+    if (query_id == current_query) {
+      max_cmps_for_query = std::max(max_cmps_for_query, record.cmps_visited);
+    } else {
+      // Save total_cmps for previous query
+      if (current_query >= 0 && current_query < static_cast<int>(result.total_cmps.size())) {
+        result.total_cmps[current_query] = max_cmps_for_query;
+      }
+
+      // Start new query
+      current_query = query_id;
+      max_cmps_for_query = record.cmps_visited;
+      current_gt = &ground_truth[query_id];
+      gt_set.clear();
+      for (size_t i = 0; i < std::min(topk, current_gt->size()); ++i) {
+        gt_set.insert((*current_gt)[i]);
+      }
+    }
+
+    // Check which GT nodes are in collected_node_ids
+    for (uint64_t node_id : record.collected_node_ids) {
+      // Check if this node is a GT node and we haven't recorded its cmps yet
+      if (gt_set.count(node_id) > 0) {
+        // Find the rank of this GT node
+        for (size_t rank = 0; rank < std::min(topk, current_gt->size()); ++rank) {
+          if ((*current_gt)[rank] == node_id && result.gt_cmps[query_id][rank] == -1) {
+            result.gt_cmps[query_id][rank] = record.cmps_visited;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // Save total_cmps for the last query
+  if (current_query >= 0 && current_query < static_cast<int>(result.total_cmps.size())) {
+    result.total_cmps[current_query] = max_cmps_for_query;
+  }
+
+  // Fill in -1 values with total_cmps (GT not found)
+  for (size_t q = 0; q < result.gt_cmps.size(); ++q) {
+    for (size_t r = 0; r < result.gt_cmps[q].size(); ++r) {
+      if (result.gt_cmps[q][r] == -1) {
+        result.gt_cmps[q][r] = result.total_cmps[q];
+      }
+    }
+  }
+
+  LOG_INFO("Computed gt_cmps for %zu queries, topk=%zu", result.num_queries, result.topk);
+  return result;
+}
+
+Result<TrainingDataCollectorResult>
+TrainingDataCollector::CollectTrainingDataWithGtCmps(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    const TrainingDataCollectorOptions& options,
+    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+  ScopedTimer total_timer("CollectTrainingDataWithGtCmps [TOTAL]");
+
+  // Step 1: Generate training queries
+  LOG_INFO("Generating %zu training queries for field '%s'",
+           options.num_training_queries, field_name.c_str());
+
+  std::vector<std::vector<float>> training_queries;
+  {
+    ScopedTimer timer("Step1: GenerateTrainingQueries");
+    training_queries = TrainingQueryGenerator::GenerateTrainingQueries(
+        segment, field_name, options.num_training_queries,
+        options.noise_scale, options.seed);
+    DebugLog("  Generated " + std::to_string(training_queries.size()) + " queries");
+  }
+
+  if (training_queries.empty()) {
+    return tl::make_unexpected(
+        Status::InternalError("Failed to generate training queries"));
+  }
+
+  // Step 2: Compute ground truth (brute force search with recall = 1)
+  LOG_INFO("Computing ground truth with brute force search (topk=%zu)",
+           options.topk);
+
+  std::vector<std::vector<uint64_t>> ground_truth;
+  {
+    ScopedTimer timer("Step2: ComputeGroundTruth (BRUTE FORCE PARALLEL)");
+    DebugLog("  num_queries=" + std::to_string(training_queries.size()) +
+             ", topk=" + std::to_string(options.topk) +
+             ", threads=" + std::to_string(options.num_threads == 0 ? std::thread::hardware_concurrency() : options.num_threads));
+    ground_truth = ComputeGroundTruth(
+        segment, field_name, training_queries, options.topk, options.num_threads);
+    DebugLog("  Computed ground truth for " + std::to_string(ground_truth.size()) + " queries");
+  }
+
+  if (ground_truth.empty()) {
+    return tl::make_unexpected(
+        Status::InternalError("Failed to compute ground truth"));
+  }
+
+  // Step 3: Choose indexers for training
+  std::vector<VectorColumnIndexer::Ptr> indexers;
+
+  if (!provided_indexers.empty()) {
+    indexers = provided_indexers;
+  } else {
+    indexers = segment->get_vector_indexer(field_name);
+  }
+
+  if (indexers.empty()) {
+    return tl::make_unexpected(
+        Status::InternalError("No vector indexers found for field: " + field_name));
+  }
+
+  LOG_INFO("Found %zu indexers for field '%s'", indexers.size(), field_name.c_str());
+  DebugLog("Step3: Found " + std::to_string(indexers.size()) + " indexers, doc_count=" +
+           std::to_string(indexers[0]->doc_count()));
+
+  // Step 4: Enable training mode on all indexers
+  LOG_INFO("Enabling training mode on %zu indexers", indexers.size());
+  for (auto& indexer : indexers) {
+    auto status = indexer->EnableTrainingMode(true);
+    if (!status.ok()) {
+      LOG_WARN("Failed to enable training mode on indexer: %s",
+               status.message().c_str());
+    }
+  }
+
+  // Step 5: Perform searches with large ef and collect training records
+  LOG_INFO("Performing training searches with ef=%d", options.ef_training);
+
+  std::vector<std::vector<uint64_t>> search_results;
+  search_results.reserve(training_queries.size());
+
+  {
+    ScopedTimer timer("Step5: TrainingSearches (HNSW with ef=" + std::to_string(options.ef_training) + ") PARALLEL");
+
+    // Determine thread count
+    size_t actual_threads = options.num_threads;
+    if (actual_threads == 0) {
+      actual_threads = std::thread::hardware_concurrency();
+    }
+    actual_threads = std::min(actual_threads, training_queries.size());
+
+    DebugLog("  num_queries=" + std::to_string(training_queries.size()) +
+             ", threads=" + std::to_string(actual_threads));
+
+    // Pre-allocate search_results for thread-safe access
+    search_results.resize(training_queries.size());
+
+    std::atomic<size_t> completed_searches{0};
+    std::mutex progress_mutex;
+    auto search_start = std::chrono::high_resolution_clock::now();
+
+    // Worker function for a range of queries
+    auto worker = [&](size_t start_idx, size_t end_idx) {
+      for (size_t query_idx = start_idx; query_idx < end_idx; ++query_idx) {
+        const auto& query_vector = training_queries[query_idx];
+
+        // Prepare query parameters
+        vector_column_params::VectorData vector_data;
+        vector_data.vector = vector_column_params::DenseVector{
+            .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))
+        };
+
+        vector_column_params::QueryParams query_params;
+        query_params.topk = options.topk;
+        query_params.fetch_vector = false;
+        query_params.filter = segment->get_filter().get();
+
+        // Create OmegaQueryParams with training_query_id for parallel search
+        auto omega_params = std::make_shared<OmegaQueryParams>();
+        omega_params->set_ef(options.ef_training);
+        omega_params->set_training_query_id(static_cast<int>(query_idx));
+        query_params.query_params = omega_params;
+
+        if (indexers.size() != 1) {
+          // Only log once
+          if (query_idx == start_idx) {
+            LOG_WARN("Expected 1 indexer but found %zu, using first one only", indexers.size());
+          }
+        }
+
+        auto search_result = indexers[0]->Search(vector_data, query_params);
+        if (!search_result.has_value()) {
+          LOG_WARN("Search failed for query %zu: %s", query_idx,
+                   search_result.error().message().c_str());
+          // search_results[query_idx] is already default empty
+          ++completed_searches;
+          continue;
+        }
+
+        // Extract result doc IDs
+        auto& results = search_result.value();
+        std::vector<uint64_t> result_ids;
+        result_ids.reserve(results->count());
+        auto iter = results->create_iterator();
+        while (iter->valid()) {
+          result_ids.push_back(iter->doc_id());
+          iter->next();
+        }
+
+        search_results[query_idx] = std::move(result_ids);
+
+        // Update progress
+        size_t completed = ++completed_searches;
+        if (completed % 100 == 0 || completed == training_queries.size()) {
+          std::lock_guard<std::mutex> lock(progress_mutex);
+          auto now = std::chrono::high_resolution_clock::now();
+          auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now - search_start).count();
+          DebugLog("  Training search progress: " + std::to_string(completed) + "/" +
+                   std::to_string(training_queries.size()) + ", elapsed: " + std::to_string(elapsed_ms) + " ms");
+        }
+      }
+    };
+
+    // Launch threads
+    std::vector<std::thread> threads;
+    size_t queries_per_thread = (training_queries.size() + actual_threads - 1) / actual_threads;
+
+    for (size_t t = 0; t < actual_threads; ++t) {
+      size_t start_idx = t * queries_per_thread;
+      size_t end_idx = std::min(start_idx + queries_per_thread, training_queries.size());
+      if (start_idx < end_idx) {
+        threads.emplace_back(worker, start_idx, end_idx);
+      }
+    }
+
+    // Wait for all threads
+    for (auto& thread : threads) {
+      thread.join();
+    }
+
+    auto search_end = std::chrono::high_resolution_clock::now();
+    auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(search_end - search_start).count();
+    LOG_INFO("Training searches completed in %zu ms (%zu threads)",
+             total_ms, actual_threads);
+  }
+
+  // Step 6: Collect training records from all indexers
+  LOG_INFO("Collecting training records from indexers");
+
+  std::vector<core_interface::TrainingRecord> all_records;
+  {
+    ScopedTimer timer("Step6: CollectTrainingRecords");
+    for (auto& indexer : indexers) {
+      auto records = indexer->GetTrainingRecords();
+      LOG_INFO("Collected %zu records from indexer", records.size());
+      all_records.insert(all_records.end(), records.begin(), records.end());
+    }
+    DebugLog("  Total records collected: " + std::to_string(all_records.size()));
+  }
+
+  if (all_records.empty()) {
+    LOG_WARN("No training records collected from any indexer");
+  }
+
+  // Step 7: Fill labels based on ground truth
+  LOG_INFO("Filling labels for %zu records (k_train=%zu)", all_records.size(), options.k_train);
+  {
+    ScopedTimer timer("Step7: FillLabels");
+    FillLabels(&all_records, ground_truth, search_results, options.k_train);
+  }
+
+  // Step 8: Compute gt_cmps data
+  LOG_INFO("Computing gt_cmps data");
+  core_interface::GtCmpsData gt_cmps_data;
+  {
+    ScopedTimer timer("Step8: ComputeGtCmps");
+    gt_cmps_data = ComputeGtCmps(all_records, ground_truth, options.topk);
+  }
+
+  // Step 9: Disable training mode and clear records
+  for (auto& indexer : indexers) {
+    indexer->EnableTrainingMode(false);
+    indexer->ClearTrainingRecords();
+  }
+
+  LOG_INFO("Successfully collected %zu training records with labels and gt_cmps",
+           all_records.size());
+
+  TrainingDataCollectorResult result;
+  result.records = std::move(all_records);
+  result.gt_cmps_data = std::move(gt_cmps_data);
+
+  return result;
+}
+
 }  // namespace zvec
diff --git a/src/db/training/training_data_collector.h b/src/db/training/training_data_collector.h
index 48001a211..b5fdbe09f 100644
--- a/src/db/training/training_data_collector.h
+++ b/src/db/training/training_data_collector.h
@@ -48,6 +48,17 @@ struct TrainingDataCollectorOptions {
 
   // Random seed for reproducibility
   uint64_t seed = 42;
+
+  // Number of threads for parallel operations (0 = hardware_concurrency)
+  size_t num_threads = 0;
+};
+
+/**
+ * @brief Result of training data collection, includes both records and gt_cmps
+ */
+struct TrainingDataCollectorResult {
+  std::vector<core_interface::TrainingRecord> records;
+  core_interface::GtCmpsData gt_cmps_data;
 };
 
 /**
@@ -76,6 +87,24 @@ class TrainingDataCollector {
       const TrainingDataCollectorOptions& options,
       const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
 
+  /**
+   * @brief Collect training data with gt_cmps information for table generation
+   *
+   * This is the extended version that also computes gt_cmps data needed for
+   * generating gt_collected_table and gt_cmps_all_table.
+   *
+   * @param segment The segment to collect data from (must be persisted)
+   * @param field_name Vector field name to train on
+   * @param options Collection options
+   * @param indexers Optional specific indexers to use
+   * @return TrainingDataCollectorResult with records and gt_cmps_data
+   */
+  static Result<TrainingDataCollectorResult> CollectTrainingDataWithGtCmps(
+      const Segment::Ptr& segment,
+      const std::string& field_name,
+      const TrainingDataCollectorOptions& options,
+      const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
+
  private:
   /**
    * @brief Compute ground truth using brute force search
@@ -84,13 +113,15 @@ class TrainingDataCollector {
    * @param field_name Vector field name
    * @param queries Training query vectors
    * @param topk Number of top results to retrieve
+   * @param num_threads Number of threads (0 = hardware_concurrency)
    * @return Ground truth doc IDs for each query
    */
   static std::vector<std::vector<uint64_t>> ComputeGroundTruth(
       const Segment::Ptr& segment,
       const std::string& field_name,
       const std::vector<std::vector<float>>& queries,
-      size_t topk);
+      size_t topk,
+      size_t num_threads);
 
   /**
    * @brief Fill labels in training records based on ground truth
@@ -113,6 +144,22 @@ class TrainingDataCollector {
       const std::vector<std::vector<uint64_t>>& ground_truth,
       const std::vector<std::vector<uint64_t>>& search_results,
       size_t k_train);
+
+  /**
+   * @brief Compute gt_cmps data from training records and ground truth
+   *
+   * For each query and each GT rank, find the cmps value when that GT was first
+   * collected. This data is used to generate gt_collected_table and gt_cmps_all_table.
+   *
+   * @param records Training records (must be sorted by query_id, then by cmps)
+   * @param ground_truth Ground truth doc IDs per query
+   * @param topk Number of top results per query
+   * @return GtCmpsData structure with computed gt_cmps
+   */
+  static core_interface::GtCmpsData ComputeGtCmps(
+      const std::vector<core_interface::TrainingRecord>& records,
+      const std::vector<std::vector<uint64_t>>& ground_truth,
+      size_t topk);
 };
 
 }  // namespace zvec
diff --git a/src/include/zvec/core/interface/index_param.h b/src/include/zvec/core/interface/index_param.h
index da3a88dcd..df356e623 100644
--- a/src/include/zvec/core/interface/index_param.h
+++ b/src/include/zvec/core/interface/index_param.h
@@ -181,6 +181,7 @@ struct HNSWQueryParam : public BaseIndexQueryParam {
   using Pointer = std::shared_ptr<HNSWQueryParam>;
 
   uint32_t ef_search = kDefaultHnswEfSearch;
+  int training_query_id = -1;  // For parallel training searches, -1 means use global
 
   BaseIndexQueryParam::Pointer Clone() const override {
     return std::make_shared<HNSWQueryParam>(*this);
diff --git a/src/include/zvec/core/interface/training.h b/src/include/zvec/core/interface/training.h
index f2e5f951b..947721fdc 100644
--- a/src/include/zvec/core/interface/training.h
+++ b/src/include/zvec/core/interface/training.h
@@ -59,4 +59,27 @@ struct TrainingRecord {
         label(0) {}
 };
 
+/**
+ * @brief Ground truth cmps data for OMEGA table generation.
+ *
+ * For each query, stores the cmps value when each ground truth result was found.
+ * This data is used to generate gt_collected_table and gt_cmps_all_table.
+ *
+ * gt_cmps[query_id][rank] = cmps value when GT[rank] was collected
+ *                         = total_cmps if GT[rank] was never found
+ */
+struct GtCmpsData {
+  // gt_cmps[query_id][rank] = cmps when GT of rank was found
+  std::vector<std::vector<int>> gt_cmps;
+
+  // total_cmps[query_id] = total comparisons for this query
+  std::vector<int> total_cmps;
+
+  // topk value used during training
+  size_t topk = 0;
+
+  // Number of queries
+  size_t num_queries = 0;
+};
+
 }  // namespace zvec::core_interface
diff --git a/src/include/zvec/db/index_params.h b/src/include/zvec/db/index_params.h
index 82891482e..9ab24f0d3 100644
--- a/src/include/zvec/db/index_params.h
+++ b/src/include/zvec/db/index_params.h
@@ -322,17 +322,24 @@ class OmegaIndexParams : public VectorIndexParams {
   OmegaIndexParams(
       MetricType metric_type, int m = core_interface::kDefaultHnswNeighborCnt,
       int ef_construction = core_interface::kDefaultHnswEfConstruction,
-      QuantizeType quantize_type = QuantizeType::UNDEFINED)
+      QuantizeType quantize_type = QuantizeType::UNDEFINED,
+      uint32_t min_vector_threshold = 100000,
+      const std::string& model_dir = "./omega_models",
+      size_t num_training_queries = 1000)
       : VectorIndexParams(IndexType::OMEGA, metric_type, quantize_type),
         m_(m),
-        ef_construction_(ef_construction) {}
+        ef_construction_(ef_construction),
+        min_vector_threshold_(min_vector_threshold),
+        model_dir_(model_dir),
+        num_training_queries_(num_training_queries) {}
 
   using OPtr = std::shared_ptr<OmegaIndexParams>;
 
  public:
   Ptr clone() const override {
     return std::make_shared<OmegaIndexParams>(metric_type_, m_, ef_construction_,
-                                             quantize_type_);
+                                             quantize_type_, min_vector_threshold_,
+                                             model_dir_, num_training_queries_);
   }
 
   std::string to_string() const override {
@@ -340,7 +347,9 @@ class OmegaIndexParams : public VectorIndexParams {
                                                   metric_type_, quantize_type_);
     std::ostringstream oss;
     oss << base_str << ",m:" << m_ << ",ef_construction:" << ef_construction_
-        << "}";
+        << ",min_vector_threshold:" << min_vector_threshold_
+        << ",model_dir:" << model_dir_
+        << ",num_training_queries:" << num_training_queries_ << "}";
     return oss.str();
   }
 
@@ -351,6 +360,12 @@ class OmegaIndexParams : public VectorIndexParams {
            m_ == static_cast<const OmegaIndexParams &>(other).m_ &&
            ef_construction_ ==
                static_cast<const OmegaIndexParams &>(other).ef_construction_ &&
+           min_vector_threshold_ ==
+               static_cast<const OmegaIndexParams &>(other).min_vector_threshold_ &&
+           model_dir_ ==
+               static_cast<const OmegaIndexParams &>(other).model_dir_ &&
+           num_training_queries_ ==
+               static_cast<const OmegaIndexParams &>(other).num_training_queries_ &&
            quantize_type() ==
                static_cast<const OmegaIndexParams &>(other).quantize_type();
   }
@@ -367,10 +382,31 @@ class OmegaIndexParams : public VectorIndexParams {
   int ef_construction() const {
     return ef_construction_;
   }
+  void set_min_vector_threshold(uint32_t min_vector_threshold) {
+    min_vector_threshold_ = min_vector_threshold;
+  }
+  uint32_t min_vector_threshold() const {
+    return min_vector_threshold_;
+  }
+  void set_model_dir(const std::string& model_dir) {
+    model_dir_ = model_dir;
+  }
+  const std::string& model_dir() const {
+    return model_dir_;
+  }
+  void set_num_training_queries(size_t num_training_queries) {
+    num_training_queries_ = num_training_queries;
+  }
+  size_t num_training_queries() const {
+    return num_training_queries_;
+  }
 
  private:
   int m_;
   int ef_construction_;
+  uint32_t min_vector_threshold_;
+  std::string model_dir_;
+  size_t num_training_queries_;
 };
 
 }  // namespace zvec
\ No newline at end of file
diff --git a/src/include/zvec/db/query_params.h b/src/include/zvec/db/query_params.h
index 371eef50d..f7a157ea5 100644
--- a/src/include/zvec/db/query_params.h
+++ b/src/include/zvec/db/query_params.h
@@ -73,7 +73,7 @@ class HnswQueryParams : public QueryParams {
   HnswQueryParams(int ef = core_interface::kDefaultHnswEfSearch,
                   float radius = 0.0f, bool is_linear = false,
                   bool is_using_refiner = false)
-      : QueryParams(IndexType::HNSW), ef_(ef) {
+      : QueryParams(IndexType::HNSW), ef_(ef), training_query_id_(-1) {
     set_radius(radius);
     set_is_linear(is_linear);
     set_is_using_refiner(is_using_refiner);
@@ -89,8 +89,19 @@ class HnswQueryParams : public QueryParams {
     ef_ = ef;
   }
 
+  // Training query ID for parallel training searches
+  // -1 means not set (use global current_query_id from indexer)
+  int training_query_id() const {
+    return training_query_id_;
+  }
+
+  void set_training_query_id(int query_id) {
+    training_query_id_ = query_id;
+  }
+
  private:
   int ef_;
+  int training_query_id_;
 };
 
 class OmegaQueryParams : public HnswQueryParams {
diff --git a/thirdparty/omega b/thirdparty/omega
index 14e6c9508..4a8b0bcaf 160000
--- a/thirdparty/omega
+++ b/thirdparty/omega
@@ -1 +1 @@
-Subproject commit 14e6c95080cafd8dbe9db7a39ae00071331a7c24
+Subproject commit 4a8b0bcaf107d1f79607b550a3dbfc69f7058143

From 9f38ea2af697ab657c4acf77e8dba562cbd9a1e7 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 6 Mar 2026 19:28:34 +0800
Subject: [PATCH 010/126] feat(omega): migrate to LightGBM C API and add
 conditional compilation

Build System Changes:
- Add ZVEC_ENABLE_OMEGA option for conditional OMEGA compilation (default: OFF)
- Add -DZVEC_ENABLE_OMEGA definition when enabled
- Update thirdparty/CMakeLists.txt to conditionally build omega library
- Update src/core/CMakeLists.txt to conditionally compile omega sources
- Update omega submodule to version with LightGBM C API support

Training System Refactor:
- Replace Python subprocess training with native LightGBM C API
  * Remove CSV export and Python _omega_training.py invocation
  * Add direct omega::OmegaTrainer integration via C++ API
  * Remove ExportToCSV, ExportGtCmpsToCSV, InvokePythonTrainer methods
- Add configurable training parameters to OmegaModelTrainerOptions:
  * num_iterations (default: 100)
  * num_leaves (default: 31)
  * learning_rate (default: 0.1)
  * num_threads (default: 8)
- Add type conversion helpers (ConvertRecord, ConvertGtCmpsData)
- Improve training performance

Training Data Collection Improvements:
- Move training record storage from OmegaStreamer to OmegaContext
  * Remove shared collected_records_ vector and training_mutex_ from OmegaStreamer
  * Store records per-query in OmegaContext via add_training_record()
  * Eliminate lock contention during parallel training searches
- Remove legacy GetTrainingRecords/ClearTrainingRecords from OmegaStreamer
- Simplify OmegaIndex training interface (return empty vectors)
- Update omega_streamer.cc to use context-based record collection

Code Cleanup:
- Wrap all OMEGA-dependent code with #ifdef ZVEC_ENABLE_OMEGA guards
- Update OmegaModelTrainerOptions documentation
- Add detailed logging for training record collection
- Improve error handling for missing OmegaContext
---
 CMakeLists.txt                                |   6 +
 pyproject.toml                                |   1 +
 src/core/CMakeLists.txt                       |  14 +-
 src/core/algorithm/CMakeLists.txt             |   6 +-
 src/core/algorithm/omega/omega_context.h      |  25 ++
 src/core/algorithm/omega/omega_streamer.cc    |  13 +-
 src/core/algorithm/omega/omega_streamer.h     |  13 +-
 src/core/interface/index.cc                   |   6 +
 src/core/interface/indexes/omega_index.cc     |  18 +-
 .../vector_column/vector_column_indexer.cc    |  23 +-
 src/db/index/segment/segment.cc               |  10 +
 src/db/training/omega_model_trainer.cc        | 221 +++++-----------
 src/db/training/omega_model_trainer.h         |  68 +----
 src/db/training/query_generator.cc            |  48 +++-
 src/db/training/query_generator.h             |  50 +++-
 src/db/training/training_data_collector.cc    | 244 ++++++++++++------
 src/db/training/training_data_collector.h     |   4 +-
 .../zvec/core/framework/index_context.h       |  10 +
 src/include/zvec/core/interface/index.h       |   3 +
 thirdparty/CMakeLists.txt                     |   9 +-
 thirdparty/omega                              |   2 +-
 21 files changed, 439 insertions(+), 355 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 294af340c..935ab0da1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,6 +28,12 @@ message(STATUS "BUILD_PYTHON_BINDINGS:${BUILD_PYTHON_BINDINGS}")
 option(BUILD_TOOLS "Build tools" ON)
 message(STATUS "BUILD_TOOLS:${BUILD_TOOLS}")
 
+option(ZVEC_ENABLE_OMEGA "Enable OMEGA support with LightGBM (requires LightGBM library)" OFF)
+message(STATUS "ZVEC_ENABLE_OMEGA:${ZVEC_ENABLE_OMEGA}")
+if(ZVEC_ENABLE_OMEGA)
+    add_definitions(-DZVEC_ENABLE_OMEGA)
+endif()
+
 cc_directory(thirdparty)
 cc_directories(src)
 cc_directories(tests)
diff --git a/pyproject.toml b/pyproject.toml
index 7bf99fa3f..456bfc082 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -116,6 +116,7 @@ sdist.include = [
 [tool.scikit-build.cmake.define]
 BUILD_TOOLS = "OFF"
 BUILD_PYTHON_BINDINGS = "ON"
+ZVEC_ENABLE_OMEGA = "OFF"
 
 # Setuptools config for test pypi
 [tool.setuptools_scm]
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 03f9bbb98..225a255df 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -12,10 +12,22 @@ cc_directory(mixed_reducer)
 git_version(GIT_SRCS_VER ${CMAKE_CURRENT_SOURCE_DIR})
 file(GLOB_RECURSE ALL_CORE_SRCS *.cc *.c *.h)
 
+# Exclude omega algorithm files when OMEGA is disabled
+if(NOT ZVEC_ENABLE_OMEGA)
+    list(FILTER ALL_CORE_SRCS EXCLUDE REGEX ".*/algorithm/omega/.*")
+    list(FILTER ALL_CORE_SRCS EXCLUDE REGEX ".*/omega_index\\.cc$")
+endif()
+
+# Conditionally link omega library
+set(CORE_LIBS zvec_ailego sparsehash magic_enum)
+if(ZVEC_ENABLE_OMEGA)
+    list(APPEND CORE_LIBS omega)
+endif()
+
 cc_library(
     NAME zvec_core STATIC STRICT PACKED
     SRCS ${ALL_CORE_SRCS}
-    LIBS zvec_ailego sparsehash magic_enum omega
+    LIBS ${CORE_LIBS}
     INCS . ${PROJECT_ROOT_DIR}/src/core
     VERSION "${GIT_SRCS_VER}"
 )
\ No newline at end of file
diff --git a/src/core/algorithm/CMakeLists.txt b/src/core/algorithm/CMakeLists.txt
index cb954a978..c3ad7f6aa 100644
--- a/src/core/algorithm/CMakeLists.txt
+++ b/src/core/algorithm/CMakeLists.txt
@@ -7,4 +7,8 @@ cc_directory(flat_sparse)
 cc_directory(ivf)
 cc_directory(hnsw)
 cc_directory(hnsw_sparse)
-cc_directory(omega)
\ No newline at end of file
+
+# Only include omega when ZVEC_ENABLE_OMEGA is ON
+if(ZVEC_ENABLE_OMEGA)
+    cc_directory(omega)
+endif()
\ No newline at end of file
diff --git a/src/core/algorithm/omega/omega_context.h b/src/core/algorithm/omega/omega_context.h
index 6d0bf7d71..4e03cc93d 100644
--- a/src/core/algorithm/omega/omega_context.h
+++ b/src/core/algorithm/omega/omega_context.h
@@ -16,6 +16,7 @@
 
 #include "../hnsw/hnsw_context.h"
 #include "omega_params.h"
+#include <zvec/core/interface/training.h>
 
 namespace zvec {
 namespace core {
@@ -23,6 +24,8 @@ namespace core {
 /**
  * OmegaContext extends HnswContext to support OMEGA-specific parameters
  * like target_recall that can be set per-query.
+ *
+ * Training records are stored per-context (no shared state, no locks needed).
  */
 class OmegaContext : public HnswContext {
  public:
@@ -49,6 +52,27 @@ class OmegaContext : public HnswContext {
     return training_query_id_;
   }
 
+  //! Get training records collected during this search (no locks needed)
+  const std::vector<core_interface::TrainingRecord>& training_records() const {
+    return training_records_;
+  }
+
+  //! Move training records out (override base class virtual method)
+  std::vector<core_interface::TrainingRecord> take_training_records() override {
+    return std::move(training_records_);
+  }
+
+  //! Add a training record
+  void add_training_record(core_interface::TrainingRecord record) {
+    training_records_.push_back(std::move(record));
+  }
+
+  //! Clear training records (override base class virtual method)
+  //! Called before each search when context is reused from pool
+  void clear_training_records() override {
+    training_records_.clear();
+  }
+
   //! Update context parameters (overrides HnswContext::update)
   int update(const ailego::Params &params) override {
     // First call parent to update HNSW parameters
@@ -71,6 +95,7 @@ class OmegaContext : public HnswContext {
  private:
   float target_recall_;  // Per-query target recall
   int training_query_id_;  // Per-query training query ID for parallel training
+  std::vector<core_interface::TrainingRecord> training_records_;  // Per-query training records
 };
 
 }  // namespace core
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index b52d53c22..772a057fd 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -244,18 +244,17 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   LOG_DEBUG("OMEGA training search completed: cmps=%d, hops=%d, results=%zu",
             cmps, hops, topk_heap.size());
 
-  // Collect training records from OMEGA library
+  // Collect training records from OMEGA library and store in context (no locks needed)
   size_t record_count = omega_search_get_training_records_count(omega_search);
 
-  if (record_count > 0) {
+  if (record_count > 0 && omega_ctx != nullptr) {
 
     const void* records_ptr = omega_search_get_training_records(omega_search);
 
     // NOTE: omega_search_get_training_records returns pointer to std::vector, not array
     const auto* records_vec = static_cast<const std::vector<omega::TrainingRecord>*>(records_ptr);
 
-    // Convert and store training records
-    std::lock_guard<std::mutex> lock(training_mutex_);
+    // Convert and store training records in context (per-query, no shared state)
     for (size_t i = 0; i < record_count; ++i) {
       const auto& omega_record = (*records_vec)[i];
       core_interface::TrainingRecord record;
@@ -282,11 +281,13 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 
       record.label = omega_record.label;  // Default 0
 
-      collected_records_.push_back(std::move(record));
+      omega_ctx->add_training_record(std::move(record));
     }
 
-    LOG_DEBUG("Collected %zu training records for query_id=%d",
+    LOG_DEBUG("Collected %zu training records for query_id=%d (stored in context)",
               record_count, query_id);
+  } else if (record_count > 0) {
+    LOG_WARN("Training records collected but context is not OmegaContext, records lost");
   } else {
     LOG_WARN("No training records collected for query_id=%d", query_id);
   }
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index 1a980dfd3..232ecde18 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -40,17 +40,9 @@ class OmegaStreamer : public HnswStreamer {
   OmegaStreamer(const OmegaStreamer &streamer) = delete;
   OmegaStreamer &operator=(const OmegaStreamer &streamer) = delete;
 
-  // Training mode support (for future implementation)
+  // Training mode support
   void EnableTrainingMode(bool enable) { training_mode_enabled_ = enable; }
   void SetCurrentQueryId(int query_id) { current_query_id_ = query_id; }
-  std::vector<core_interface::TrainingRecord> GetTrainingRecords() const {
-    std::lock_guard<std::mutex> lock(training_mutex_);
-    return collected_records_;
-  }
-  void ClearTrainingRecords() {
-    std::lock_guard<std::mutex> lock(training_mutex_);
-    collected_records_.clear();
-  }
 
  protected:
   /**
@@ -80,8 +72,7 @@ class OmegaStreamer : public HnswStreamer {
   // Training mode state (for future implementation)
   bool training_mode_enabled_{false};
   int current_query_id_{0};
-  mutable std::mutex training_mutex_{};
-  mutable std::vector<core_interface::TrainingRecord> collected_records_{};
+  // Note: training records are now stored per-context in OmegaContext, not here
 };
 
 }  // namespace core
diff --git a/src/core/interface/index.cc b/src/core/interface/index.cc
index e0fd00a8b..41d1669ab 100644
--- a/src/core/interface/index.cc
+++ b/src/core/interface/index.cc
@@ -403,6 +403,9 @@ int Index::Search(const VectorData &vector_data,
     return core::IndexError_Runtime;
   }
 
+  // Clear training records before search (context may be reused from pool)
+  context->clear_training_records();
+
   if (_prepare_for_search(vector_data, search_param, context) != 0) {
     LOG_ERROR("Failed to prepare for search");
     return core::IndexError_Runtime;
@@ -655,6 +658,9 @@ int Index::_dense_search(const VectorData &vector_data,
     }
   }
 
+  // Extract training records from context (for OMEGA training mode)
+  result->training_records_ = context->take_training_records();
+
   return 0;
 }
 
diff --git a/src/core/interface/indexes/omega_index.cc b/src/core/interface/indexes/omega_index.cc
index 7078806f1..976b22214 100644
--- a/src/core/interface/indexes/omega_index.cc
+++ b/src/core/interface/indexes/omega_index.cc
@@ -98,24 +98,14 @@ void OmegaIndex::SetCurrentQueryId(int query_id) {
 }
 
 std::vector<TrainingRecord> OmegaIndex::GetTrainingRecords() const {
-  // Get training records from OmegaStreamer
-  if (streamer_) {
-    auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
-    if (omega_streamer) {
-      return omega_streamer->GetTrainingRecords();
-    }
-  }
+  // Training records are collected via SearchResult.training_records_ (from OmegaContext),
+  // not through this method. This is kept for ITrainingCapable interface compliance.
   return {};
 }
 
 void OmegaIndex::ClearTrainingRecords() {
-  // Clear training records in OmegaStreamer
-  if (streamer_) {
-    auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
-    if (omega_streamer) {
-      omega_streamer->ClearTrainingRecords();
-    }
-  }
+  // Training records are managed per-search via OmegaContext,
+  // no shared state to clear here.
 }
 
 int OmegaIndex::_prepare_for_search(
diff --git a/src/db/index/column/vector_column/vector_column_indexer.cc b/src/db/index/column/vector_column/vector_column_indexer.cc
index 0399194a7..b9bc8f40e 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.cc
+++ b/src/db/index/column/vector_column/vector_column_indexer.cc
@@ -173,13 +173,6 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
     return tl::make_unexpected(Status::InvalidArgument("Index not opened"));
   }
 
-  // Set query_id before search if training mode is enabled
-  if (training_mode_enabled_) {
-    if (auto* training_capable = index->GetTrainingCapability()) {
-      training_capable->SetCurrentQueryId(current_query_id_);
-    }
-  }
-
   auto engine_vector_data =
       ProximaEngineHelper::convert_to_engine_vector(vector_data, is_sparse_);
   core_interface::SearchResult search_result;
@@ -207,17 +200,13 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
         Status::InternalError("Failed to search vector"));
   }
 
-  // Collect training records after search if training mode is enabled
-  if (training_mode_enabled_) {
+  // Collect training records from search result (stored in context during search)
+  // This is thread-safe because each search has its own context
+  if (training_mode_enabled_ && !search_result.training_records_.empty()) {
     std::lock_guard<std::mutex> lock(training_mutex_);
-    if (auto* training_capable = index->GetTrainingCapability()) {
-      auto records = training_capable->GetTrainingRecords();
-      collected_records_.insert(collected_records_.end(),
-                               records.begin(), records.end());
-      // CRITICAL: Clear records from underlying index to avoid memory explosion
-      // Without this, records accumulate across queries and get copied repeatedly
-      training_capable->ClearTrainingRecords();
-    }
+    collected_records_.insert(collected_records_.end(),
+                             std::make_move_iterator(search_result.training_records_.begin()),
+                             std::make_move_iterator(search_result.training_records_.end()));
   }
 
   auto result = std::make_shared<VectorIndexResults>(
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index 35e3276f9..c0c6bb84a 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -56,7 +56,9 @@
 #include "column_merging_reader.h"
 #include "sql_expr_parser.h"
 #include "db/training/training_data_collector.h"
+#ifdef ZVEC_ENABLE_OMEGA
 #include "db/training/omega_model_trainer.h"
+#endif
 
 namespace zvec {
 
@@ -1672,6 +1674,7 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
         LOG_INFO("Collected %zu training records", result.records.size());
 
         if (result.records.size() >= 100) {
+#ifdef ZVEC_ENABLE_OMEGA
           // Train the model
           OmegaModelTrainerOptions trainer_opts;
           trainer_opts.output_dir = model_output_dir;
@@ -1691,6 +1694,9 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
           } else {
             LOG_WARN("OMEGA model training failed: %s", train_status.message().c_str());
           }
+#else
+          LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
+#endif
         } else {
           LOG_INFO("Skipping model training: only %zu records collected (need >= 100)", result.records.size());
         }
@@ -2378,6 +2384,7 @@ Status SegmentImpl::auto_train_omega_index_internal(
   LOG_INFO("Training data stats: %zu positive, %zu negative samples",
            positive_count, negative_count);
 
+#ifdef ZVEC_ENABLE_OMEGA
   // Step 2: Train OMEGA model with gt_cmps data
   OmegaModelTrainerOptions trainer_options;
   trainer_options.output_dir = FileHelper::MakeSegmentPath(path_, id()) + "/omega_model";
@@ -2401,6 +2408,9 @@ Status SegmentImpl::auto_train_omega_index_internal(
 
   LOG_INFO("Successfully trained OMEGA model for segment %d, output: %s",
            id(), trainer_options.output_dir.c_str());
+#else
+  LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
+#endif
 
   // Step 3: Load model into the provided indexers
   // TODO: Implement model loading into VectorColumnIndexer
diff --git a/src/db/training/omega_model_trainer.cc b/src/db/training/omega_model_trainer.cc
index 7cf754e64..7b1b4f135 100644
--- a/src/db/training/omega_model_trainer.cc
+++ b/src/db/training/omega_model_trainer.cc
@@ -12,45 +12,51 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#ifdef ZVEC_ENABLE_OMEGA
+
 #include "omega_model_trainer.h"
-#include <cstdio>
-#include <fstream>
-#include <sstream>
+#include <omega/omega_trainer.h>
+#include <chrono>
 #include <zvec/ailego/logger/logger.h>
 
 namespace zvec {
 
-Status OmegaModelTrainer::TrainModel(
-    const std::vector<core_interface::TrainingRecord>& training_records,
-    const OmegaModelTrainerOptions& options) {
-  if (training_records.empty()) {
-    return Status::InvalidArgument("Training records are empty");
-  }
-
-  if (options.output_dir.empty()) {
-    return Status::InvalidArgument("Output directory is empty");
-  }
-
-  // Step 1: Export training records to CSV
-  std::string csv_path = options.output_dir + "/training_data.csv";
-  LOG_INFO("Exporting %zu training records to CSV: %s",
-           training_records.size(), csv_path.c_str());
+namespace {
+
+// Convert zvec TrainingRecord to omega TrainingRecord
+omega::TrainingRecord ConvertRecord(const core_interface::TrainingRecord& src) {
+  omega::TrainingRecord dst;
+  dst.query_id = src.query_id;
+  dst.hops_visited = src.hops_visited;
+  dst.cmps_visited = src.cmps_visited;
+  dst.dist_1st = src.dist_1st;
+  dst.dist_start = src.dist_start;
+  // Convert std::array<float, 7> to std::vector<float>
+  dst.traversal_window_stats.assign(src.traversal_window_stats.begin(),
+                                     src.traversal_window_stats.end());
+  dst.label = src.label;
+  dst.collected_node_ids = src.collected_node_ids;
+  return dst;
+}
 
-  auto status = ExportToCSV(training_records, csv_path);
-  if (!status.ok()) {
-    return status;
-  }
+// Convert zvec GtCmpsData to omega GtCmpsData
+omega::GtCmpsData ConvertGtCmpsData(const core_interface::GtCmpsData& src) {
+  omega::GtCmpsData dst;
+  dst.num_queries = src.num_queries;
+  dst.topk = src.topk;
+  dst.gt_cmps = src.gt_cmps;
+  dst.total_cmps = src.total_cmps;
+  return dst;
+}
 
-  // Step 2: Invoke Python training script
-  LOG_INFO("Invoking Python training script");
-  status = InvokePythonTrainer(csv_path, options);
-  if (!status.ok()) {
-    return status;
-  }
+}  // namespace
 
-  LOG_INFO("Successfully trained OMEGA model, output: %s",
-           options.output_dir.c_str());
-  return Status::OK();
+Status OmegaModelTrainer::TrainModel(
+    const std::vector<core_interface::TrainingRecord>& training_records,
+    const OmegaModelTrainerOptions& options) {
+  // Call TrainModelWithGtCmps with empty gt_cmps_data
+  core_interface::GtCmpsData empty_gt_cmps;
+  return TrainModelWithGtCmps(training_records, empty_gt_cmps, options);
 }
 
 Status OmegaModelTrainer::TrainModelWithGtCmps(
@@ -65,141 +71,48 @@ Status OmegaModelTrainer::TrainModelWithGtCmps(
     return Status::InvalidArgument("Output directory is empty");
   }
 
-  // Step 1: Export training records to CSV
-  std::string csv_path = options.output_dir + "/training_data.csv";
-  LOG_INFO("Exporting %zu training records to CSV: %s",
-           training_records.size(), csv_path.c_str());
+  auto total_start = std::chrono::high_resolution_clock::now();
 
-  auto status = ExportToCSV(training_records, csv_path);
-  if (!status.ok()) {
-    return status;
-  }
+  LOG_INFO("Training OMEGA model using C++ LightGBM API (%zu records)",
+           training_records.size());
 
-  // Step 2: Export gt_cmps data to CSV
-  std::string gt_cmps_path = options.output_dir + "/gt_cmps.csv";
-  LOG_INFO("Exporting gt_cmps data to CSV: %s", gt_cmps_path.c_str());
-
-  status = ExportGtCmpsToCSV(gt_cmps_data, gt_cmps_path);
-  if (!status.ok()) {
-    return status;
+  // Convert training records
+  std::vector<omega::TrainingRecord> omega_records;
+  omega_records.reserve(training_records.size());
+  for (const auto& r : training_records) {
+    omega_records.push_back(ConvertRecord(r));
   }
 
-  // Step 3: Invoke Python training script with gt_cmps
-  LOG_INFO("Invoking Python training script with gt_cmps");
-  status = InvokePythonTrainer(csv_path, options, gt_cmps_path);
-  if (!status.ok()) {
-    return status;
-  }
+  // Convert gt_cmps data
+  omega::GtCmpsData omega_gt_cmps = ConvertGtCmpsData(gt_cmps_data);
 
-  LOG_INFO("Successfully trained OMEGA model with gt_cmps, output: %s",
-           options.output_dir.c_str());
-  return Status::OK();
-}
+  // Setup trainer options
+  omega::OmegaTrainerOptions trainer_options;
+  trainer_options.output_dir = options.output_dir;
+  trainer_options.num_iterations = options.num_iterations;
+  trainer_options.num_leaves = options.num_leaves;
+  trainer_options.learning_rate = options.learning_rate;
+  trainer_options.num_threads = options.num_threads;
+  trainer_options.verbose = options.verbose;
+  trainer_options.topk = gt_cmps_data.topk > 0 ? gt_cmps_data.topk : 100;
 
-Status OmegaModelTrainer::ExportToCSV(
-    const std::vector<core_interface::TrainingRecord>& records,
-    const std::string& csv_path) {
-  std::ofstream csv_file(csv_path);
-  if (!csv_file.is_open()) {
-    return Status::InternalError("Failed to open CSV file for writing: " + csv_path);
-  }
+  // Train model
+  int ret = omega::OmegaTrainer::TrainModel(omega_records, omega_gt_cmps, trainer_options);
 
-  // Write CSV header
-  csv_file << "query_id,hops_visited,cmps_visited,dist_1st,dist_start,"
-           << "stat_0,stat_1,stat_2,stat_3,stat_4,stat_5,stat_6,label\n";
-
-  // Write training records
-  for (const auto& record : records) {
-    csv_file << record.query_id << ","
-             << record.hops_visited << ","
-             << record.cmps_visited << ","
-             << record.dist_1st << ","
-             << record.dist_start << ",";
-
-    // Write traversal window stats (7 dimensions)
-    for (size_t i = 0; i < record.traversal_window_stats.size(); ++i) {
-      csv_file << record.traversal_window_stats[i];
-      if (i < record.traversal_window_stats.size() - 1) {
-        csv_file << ",";
-      }
-    }
-
-    csv_file << "," << record.label << "\n";
-  }
-
-  csv_file.close();
-
-  if (!csv_file.good()) {
-    return Status::InternalError("Error writing CSV file: " + csv_path);
-  }
-
-  LOG_INFO("Successfully exported %zu records to CSV", records.size());
-  return Status::OK();
-}
-
-Status OmegaModelTrainer::ExportGtCmpsToCSV(
-    const core_interface::GtCmpsData& gt_cmps_data,
-    const std::string& csv_path) {
-  std::ofstream csv_file(csv_path);
-  if (!csv_file.is_open()) {
-    return Status::InternalError("Failed to open gt_cmps CSV file for writing: " + csv_path);
-  }
-
-  // Write CSV header
-  csv_file << "query_id,rank,cmps\n";
-
-  // Write gt_cmps data
-  for (size_t query_id = 0; query_id < gt_cmps_data.gt_cmps.size(); ++query_id) {
-    const auto& cmps_per_rank = gt_cmps_data.gt_cmps[query_id];
-    for (size_t rank = 0; rank < cmps_per_rank.size(); ++rank) {
-      csv_file << query_id << "," << rank << "," << cmps_per_rank[rank] << "\n";
-    }
-  }
-
-  csv_file.close();
-
-  if (!csv_file.good()) {
-    return Status::InternalError("Error writing gt_cmps CSV file: " + csv_path);
-  }
-
-  LOG_INFO("Successfully exported gt_cmps for %zu queries to CSV",
-           gt_cmps_data.num_queries);
-  return Status::OK();
-}
-
-Status OmegaModelTrainer::InvokePythonTrainer(
-    const std::string& csv_path,
-    const OmegaModelTrainerOptions& options,
-    const std::string& gt_cmps_path) {
-  // Build Python command
-  std::ostringstream cmd;
-  cmd << options.python_executable
-      << " -m zvec._omega_training train"
-      << " --input " << csv_path
-      << " --output " << options.output_dir;
-
-  // Add gt_cmps path if provided
-  if (!gt_cmps_path.empty()) {
-    cmd << " --gt_cmps " << gt_cmps_path;
-  }
-
-  if (options.verbose) {
-    cmd << " --verbose";
-  }
-
-  std::string command = cmd.str();
-  LOG_INFO("Executing: %s", command.c_str());
-
-  // Execute command
-  int ret = std::system(command.c_str());
+  auto total_end = std::chrono::high_resolution_clock::now();
+  auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count();
 
   if (ret != 0) {
-    return Status::InternalError(
-        "Python training script failed with exit code: " + std::to_string(ret));
+    LOG_ERROR("OMEGA model training failed (return code: %d)", ret);
+    return Status::InternalError("OMEGA model training failed");
   }
 
-  LOG_INFO("Python training script completed successfully");
+  LOG_INFO("[TIMING] TrainModelWithGtCmps (C++ LightGBM) TOTAL: %ld ms", total_ms);
+  LOG_INFO("Successfully trained OMEGA model, output: %s", options.output_dir.c_str());
+
   return Status::OK();
 }
 
 }  // namespace zvec
+
+#endif  // ZVEC_ENABLE_OMEGA
diff --git a/src/db/training/omega_model_trainer.h b/src/db/training/omega_model_trainer.h
index 14b288b5c..865cbd235 100644
--- a/src/db/training/omega_model_trainer.h
+++ b/src/db/training/omega_model_trainer.h
@@ -14,6 +14,8 @@
 
 #pragma once
 
+#ifdef ZVEC_ENABLE_OMEGA
+
 #include <string>
 #include <vector>
 #include <zvec/core/interface/training.h>
@@ -28,19 +30,21 @@ struct OmegaModelTrainerOptions {
   // Output directory for trained model files
   std::string output_dir;
 
-  // Path to Python executable (default: python3)
-  std::string python_executable = "python3";
+  // LightGBM training parameters
+  int num_iterations = 100;
+  int num_leaves = 31;
+  double learning_rate = 0.1;
+  int num_threads = 8;
 
   // Enable verbose logging during training
   bool verbose = false;
 };
 
 /**
- * @brief OMEGA model trainer (calls Python training script)
+ * @brief OMEGA model trainer using LightGBM C API
  *
- * This class bridges C++ training data collection with Python model training.
- * It exports TrainingRecord data to CSV format and invokes the Python
- * _omega_training.py script to train a LightGBM model.
+ * This class trains a LightGBM binary classifier directly in C++,
+ * eliminating the need for Python subprocess and CSV serialization.
  */
 class OmegaModelTrainer {
  public:
@@ -58,8 +62,8 @@ class OmegaModelTrainer {
   /**
    * @brief Train OMEGA model with gt_cmps data for table generation
    *
-   * This is the extended version that also exports gt_cmps data for
-   * generating gt_collected_table and gt_cmps_all_table.
+   * This is the extended version that also generates gt_collected_table
+   * and gt_cmps_all_table from gt_cmps data.
    *
    * @param training_records Training data collected from searches
    * @param gt_cmps_data Ground truth cmps data for table generation
@@ -70,52 +74,8 @@ class OmegaModelTrainer {
       const std::vector<core_interface::TrainingRecord>& training_records,
       const core_interface::GtCmpsData& gt_cmps_data,
       const OmegaModelTrainerOptions& options);
-
- private:
-  /**
-   * @brief Export training records to CSV format
-   *
-   * CSV format:
-   * query_id,hops_visited,cmps_visited,dist_1st,dist_start,
-   * stat_0,stat_1,stat_2,stat_3,stat_4,stat_5,stat_6,label
-   *
-   * @param records Training records to export
-   * @param csv_path Output CSV file path
-   * @return Status indicating success or failure
-   */
-  static Status ExportToCSV(
-      const std::vector<core_interface::TrainingRecord>& records,
-      const std::string& csv_path);
-
-  /**
-   * @brief Export gt_cmps data to CSV format
-   *
-   * CSV format:
-   * query_id,rank,cmps
-   *
-   * @param gt_cmps_data Ground truth cmps data
-   * @param csv_path Output CSV file path
-   * @return Status indicating success or failure
-   */
-  static Status ExportGtCmpsToCSV(
-      const core_interface::GtCmpsData& gt_cmps_data,
-      const std::string& csv_path);
-
-  /**
-   * @brief Invoke Python training script
-   *
-   * Calls: python3 -m zvec._omega_training train \
-   *          --input <csv_path> --output <output_dir> [--verbose]
-   *
-   * @param csv_path Input CSV file path
-   * @param options Training configuration
-   * @param gt_cmps_path Optional path to gt_cmps CSV file
-   * @return Status indicating success or failure
-   */
-  static Status InvokePythonTrainer(
-      const std::string& csv_path,
-      const OmegaModelTrainerOptions& options,
-      const std::string& gt_cmps_path = "");
 };
 
 }  // namespace zvec
+
+#endif  // ZVEC_ENABLE_OMEGA
diff --git a/src/db/training/query_generator.cc b/src/db/training/query_generator.cc
index 32832b278..d0edac84d 100644
--- a/src/db/training/query_generator.cc
+++ b/src/db/training/query_generator.cc
@@ -19,18 +19,18 @@
 
 namespace zvec {
 
-std::vector<std::vector<float>> TrainingQueryGenerator::SampleBaseVectors(
+SampledVectors TrainingQueryGenerator::SampleBaseVectorsWithIds(
     const Segment::Ptr& segment,
     const std::string& field_name,
     size_t num_samples,
     uint64_t seed) {
-  std::vector<std::vector<float>> sampled_vectors;
+  SampledVectors result;
 
   // Get total document count
   uint64_t doc_count = segment->doc_count();
   if (doc_count == 0) {
     LOG_WARN("Segment has no documents, cannot sample base vectors");
-    return sampled_vectors;
+    return result;
   }
 
   // Adjust num_samples if it exceeds doc_count
@@ -44,7 +44,8 @@ std::vector<std::vector<float>> TrainingQueryGenerator::SampleBaseVectors(
   std::mt19937_64 rng(seed);
   std::uniform_int_distribution<uint64_t> dist(0, doc_count - 1);
 
-  sampled_vectors.reserve(actual_samples);
+  result.vectors.reserve(actual_samples);
+  result.doc_ids.reserve(actual_samples);
 
   // Sample vectors
   for (size_t i = 0; i < actual_samples; ++i) {
@@ -67,13 +68,24 @@ std::vector<std::vector<float>> TrainingQueryGenerator::SampleBaseVectors(
       continue;
     }
 
-    sampled_vectors.push_back(vector_opt.value());
+    result.vectors.push_back(vector_opt.value());
+    result.doc_ids.push_back(doc_idx);
   }
 
-  LOG_INFO("Successfully sampled %zu/%zu vectors from segment",
-           sampled_vectors.size(), actual_samples);
+  LOG_INFO("Successfully sampled %zu/%zu vectors with doc_ids from segment",
+           result.vectors.size(), actual_samples);
 
-  return sampled_vectors;
+  return result;
+}
+
+std::vector<std::vector<float>> TrainingQueryGenerator::SampleBaseVectors(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    size_t num_samples,
+    uint64_t seed) {
+  // Use the new method and extract just the vectors
+  auto sampled = SampleBaseVectorsWithIds(segment, field_name, num_samples, seed);
+  return std::move(sampled.vectors);
 }
 
 std::vector<std::vector<float>> TrainingQueryGenerator::AddGaussianNoise(
@@ -116,6 +128,26 @@ std::vector<std::vector<float>> TrainingQueryGenerator::AddGaussianNoise(
   return noisy_vectors;
 }
 
+SampledVectors TrainingQueryGenerator::GenerateHeldOutQueries(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    size_t num_queries,
+    uint64_t seed) {
+  // Sample vectors directly from the index - no noise added
+  // These vectors will be used as queries, with their doc_ids excluded from ground truth
+  auto result = SampleBaseVectorsWithIds(segment, field_name, num_queries, seed);
+
+  if (result.vectors.empty()) {
+    LOG_ERROR("Failed to sample vectors from segment for held-out queries");
+    return result;
+  }
+
+  LOG_INFO("Generated %zu held-out queries (vectors sampled directly from index)",
+           result.vectors.size());
+
+  return result;
+}
+
 std::vector<std::vector<float>> TrainingQueryGenerator::GenerateTrainingQueries(
     const Segment::Ptr& segment,
     const std::string& field_name,
diff --git a/src/db/training/query_generator.h b/src/db/training/query_generator.h
index 14c9268d0..723ca1f4e 100644
--- a/src/db/training/query_generator.h
+++ b/src/db/training/query_generator.h
@@ -22,17 +22,41 @@
 
 namespace zvec {
 
+/**
+ * @brief Result of sampling base vectors, includes both vectors and doc_ids
+ */
+struct SampledVectors {
+  std::vector<std::vector<float>> vectors;
+  std::vector<uint64_t> doc_ids;  // doc_id of each sampled vector (for exclusion in GT)
+};
+
 /**
  * @brief Training query generator for OMEGA model training
  *
  * This class provides utilities to generate training queries by:
- * 1. Sampling base vectors from a persisted segment
- * 2. Adding Gaussian noise to simulate realistic query variations
+ * 1. Sampling base vectors from a persisted segment (held-out approach)
+ * 2. Using sampled vectors directly as queries (no noise)
+ * 3. Ground truth computation excludes the query vector itself
  */
 class TrainingQueryGenerator {
  public:
   /**
-   * @brief Sample base vectors from a segment
+   * @brief Sample base vectors from a segment with doc_ids
+   *
+   * @param segment The segment to sample from (must be persisted)
+   * @param field_name The vector field name to sample
+   * @param num_samples Number of vectors to sample
+   * @param seed Random seed for reproducibility
+   * @return SampledVectors with vectors and their doc_ids
+   */
+  static SampledVectors SampleBaseVectorsWithIds(
+      const Segment::Ptr& segment,
+      const std::string& field_name,
+      size_t num_samples,
+      uint64_t seed = 42);
+
+  /**
+   * @brief Sample base vectors from a segment (legacy, without doc_ids)
    *
    * @param segment The segment to sample from (must be persisted)
    * @param field_name The vector field name to sample
@@ -60,7 +84,25 @@ class TrainingQueryGenerator {
       uint64_t seed = 42);
 
   /**
-   * @brief Generate training queries (sample + noise)
+   * @brief Generate training queries using held-out approach
+   *
+   * Samples vectors directly from index and uses them as queries.
+   * Returns doc_ids so ground truth can exclude self-matches.
+   *
+   * @param segment The segment to sample from
+   * @param field_name The vector field name
+   * @param num_queries Number of training queries to generate
+   * @param seed Random seed for reproducibility
+   * @return SampledVectors with query vectors and their doc_ids
+   */
+  static SampledVectors GenerateHeldOutQueries(
+      const Segment::Ptr& segment,
+      const std::string& field_name,
+      size_t num_queries,
+      uint64_t seed = 42);
+
+  /**
+   * @brief Generate training queries (sample + noise) - legacy method
    *
    * Combines sampling and noise addition in one step.
    *
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 1ea4ab8ad..719da0a4a 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -73,26 +73,27 @@ TrainingDataCollector::CollectTrainingData(
     const std::string& field_name,
     const TrainingDataCollectorOptions& options,
     const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
-  // Step 1: Generate training queries
-  LOG_INFO("Generating %zu training queries for field '%s'",
+  // Step 1: Generate training queries using held-out approach
+  LOG_INFO("Generating %zu held-out training queries for field '%s'",
            options.num_training_queries, field_name.c_str());
 
-  auto training_queries = TrainingQueryGenerator::GenerateTrainingQueries(
-      segment, field_name, options.num_training_queries,
-      options.noise_scale, options.seed);
-
+  auto sampled = TrainingQueryGenerator::GenerateHeldOutQueries(
+      segment, field_name, options.num_training_queries, options.seed);
+  auto training_queries = std::move(sampled.vectors);
+  auto query_doc_ids = std::move(sampled.doc_ids);
 
   if (training_queries.empty()) {
     return tl::make_unexpected(
         Status::InternalError("Failed to generate training queries"));
   }
 
-  // Step 2: Compute ground truth (brute force search with recall = 1)
-  LOG_INFO("Computing ground truth with brute force search (topk=%zu)",
+  // Step 2: Compute ground truth (brute force search, excluding self-matches)
+  LOG_INFO("Computing ground truth with brute force search (topk=%zu, excluding self)",
            options.topk);
 
   auto ground_truth = ComputeGroundTruth(
-      segment, field_name, training_queries, options.topk, options.num_threads);
+      segment, field_name, training_queries, options.topk, options.num_threads,
+      query_doc_ids);
 
   if (ground_truth.empty()) {
     return tl::make_unexpected(
@@ -255,9 +256,16 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
     const std::string& field_name,
     const std::vector<std::vector<float>>& queries,
     size_t topk,
-    size_t num_threads) {
+    size_t num_threads,
+    const std::vector<uint64_t>& query_doc_ids) {
   std::vector<std::vector<uint64_t>> ground_truth(queries.size());
 
+  // Check if we have query doc_ids for self-exclusion (held-out mode)
+  bool held_out_mode = !query_doc_ids.empty() && query_doc_ids.size() == queries.size();
+  if (held_out_mode) {
+    LOG_INFO("Computing ground truth in held-out mode (excluding self-matches)");
+  }
+
   // Get vector indexer (use brute force with is_linear=true)
   auto combined_indexer = segment->get_combined_vector_indexer(field_name);
   if (!combined_indexer) {
@@ -285,6 +293,9 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
     for (size_t query_idx = start_idx; query_idx < end_idx; ++query_idx) {
       const auto& query_vector = queries[query_idx];
 
+      // In held-out mode, request topk+1 results since we'll exclude self
+      size_t search_topk = held_out_mode ? topk + 1 : topk;
+
       // Prepare query parameters for brute force search
       vector_column_params::VectorData vector_data;
       vector_data.vector = vector_column_params::DenseVector{
@@ -292,12 +303,12 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
       };
 
       vector_column_params::QueryParams query_params;
-      query_params.topk = topk;
+      query_params.topk = search_topk;
       query_params.fetch_vector = false;
       query_params.filter = segment->get_filter().get();
 
       // Use linear search (brute force) for ground truth
-      auto base_params = std::make_shared<HnswQueryParams>(topk);
+      auto base_params = std::make_shared<HnswQueryParams>(search_topk);
       base_params->set_is_linear(true);
       query_params.query_params = base_params;
 
@@ -309,13 +320,20 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
         continue;
       }
 
-      // Extract result doc IDs
+      // Extract result doc IDs, excluding self in held-out mode
       auto& results = search_result.value();
       std::vector<uint64_t> gt_ids;
-      gt_ids.reserve(results->count());
+      gt_ids.reserve(topk);
+
+      uint64_t self_doc_id = held_out_mode ? query_doc_ids[query_idx] : UINT64_MAX;
+
       auto iter = results->create_iterator();
-      while (iter->valid()) {
-        gt_ids.push_back(iter->doc_id());
+      while (iter->valid() && gt_ids.size() < topk) {
+        uint64_t doc_id = iter->doc_id();
+        // Skip self in held-out mode
+        if (doc_id != self_doc_id) {
+          gt_ids.push_back(doc_id);
+        }
         iter->next();
       }
       ground_truth[query_idx] = std::move(gt_ids);
@@ -375,69 +393,107 @@ void TrainingDataCollector::FillLabels(
     return;
   }
 
-  // Build sets from collected_node_ids for fast lookup
-  size_t labeled_count = 0;
-  size_t positive_count = 0;
-  size_t negative_count = 0;
+  auto fill_start = std::chrono::high_resolution_clock::now();
 
-  for (auto& record : *records) {
-    int query_id = record.query_id;
+  // Use parallel processing for large record counts
+  size_t num_records = records->size();
+  size_t num_threads = std::min(static_cast<size_t>(std::thread::hardware_concurrency()),
+                                 std::max(num_records / 10000, static_cast<size_t>(1)));
 
-    // Validate query_id
-    if (query_id < 0 || query_id >= static_cast<int>(ground_truth.size())) {
-      LOG_WARN("Invalid query_id %d in training record (ground_truth size: %zu)",
-               query_id, ground_truth.size());
-      record.label = 0;
-      negative_count++;
-      continue;
-    }
+  std::atomic<size_t> positive_count{0};
+  std::atomic<size_t> negative_count{0};
+  std::atomic<size_t> processed_count{0};
 
-    const auto& gt = ground_truth[query_id];
-    if (gt.empty()) {
-      // No ground truth for this query, label as negative
-      record.label = 0;
-      negative_count++;
-      labeled_count++;
-      continue;
-    }
+  auto worker = [&](size_t start_idx, size_t end_idx) {
+    size_t local_positive = 0;
+    size_t local_negative = 0;
+
+    for (size_t idx = start_idx; idx < end_idx; ++idx) {
+      auto& record = (*records)[idx];
+      int query_id = record.query_id;
 
-    // Take top k_train ground truth nodes
-    size_t actual_k = std::min(k_train, gt.size());
+      // Validate query_id
+      if (query_id < 0 || query_id >= static_cast<int>(ground_truth.size())) {
+        record.label = 0;
+        local_negative++;
+        continue;
+      }
+
+      const auto& gt = ground_truth[query_id];
+      if (gt.empty()) {
+        record.label = 0;
+        local_negative++;
+        continue;
+      }
 
-    // Convert collected_node_ids to set for fast lookup
-    std::unordered_set<uint64_t> collected_set(
-        record.collected_node_ids.begin(),
-        record.collected_node_ids.end());
+      // Take top k_train ground truth nodes
+      size_t actual_k = std::min(k_train, gt.size());
+
+      // For small k_train (typical case: k_train=1), use linear search
+      // This is faster than building a hash set for each record
+      bool all_found = true;
+      const auto& collected = record.collected_node_ids;
+
+      for (size_t i = 0; i < actual_k && all_found; ++i) {
+        uint64_t gt_node = gt[i];
+        // Linear search in collected_node_ids
+        bool found = false;
+        for (uint64_t node : collected) {
+          if (node == gt_node) {
+            found = true;
+            break;
+          }
+        }
+        if (!found) {
+          all_found = false;
+        }
+      }
 
-    // Check if ALL top k_train ground truth nodes are in collected_node_ids
-    bool all_found = true;
-    for (size_t i = 0; i < actual_k; ++i) {
-      if (collected_set.find(gt[i]) == collected_set.end()) {
-        all_found = false;
-        break;
+      if (all_found) {
+        record.label = 1;
+        local_positive++;
+      } else {
+        record.label = 0;
+        local_negative++;
       }
     }
 
-    // Label based on whether all top k_train GT nodes are collected
-    if (all_found) {
-      record.label = 1;
-      positive_count++;
-    } else {
-      record.label = 0;
-      negative_count++;
+    positive_count += local_positive;
+    negative_count += local_negative;
+    processed_count += (end_idx - start_idx);
+  };
+
+  // Launch threads
+  std::vector<std::thread> threads;
+  size_t records_per_thread = (num_records + num_threads - 1) / num_threads;
+
+  for (size_t t = 0; t < num_threads; ++t) {
+    size_t start_idx = t * records_per_thread;
+    size_t end_idx = std::min(start_idx + records_per_thread, num_records);
+    if (start_idx < end_idx) {
+      threads.emplace_back(worker, start_idx, end_idx);
     }
+  }
 
-    labeled_count++;
+  // Wait for all threads
+  for (auto& thread : threads) {
+    thread.join();
   }
 
-  LOG_INFO("Filled labels for %zu/%zu records (%zu positive, %zu negative, k_train=%zu)",
-           labeled_count, records->size(), positive_count, negative_count, k_train);
+  auto fill_end = std::chrono::high_resolution_clock::now();
+  auto fill_ms = std::chrono::duration_cast<std::chrono::milliseconds>(fill_end - fill_start).count();
+
+  LOG_INFO("Filled labels for %zu/%zu records (%zu positive, %zu negative, k_train=%zu) in %zu ms (%zu threads)",
+           processed_count.load(), records->size(), positive_count.load(), negative_count.load(), k_train,
+           fill_ms, num_threads);
 }
 
 core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
     const std::vector<core_interface::TrainingRecord>& records,
     const std::vector<std::vector<uint64_t>>& ground_truth,
     size_t topk) {
+  auto compute_start = std::chrono::high_resolution_clock::now();
+
   core_interface::GtCmpsData result;
   result.topk = topk;
   result.num_queries = ground_truth.size();
@@ -459,7 +515,11 @@ core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
   // Records are ordered by (query_id, cmps_visited)
   int current_query = -1;
   int max_cmps_for_query = 0;
-  std::unordered_set<uint64_t> gt_set;
+  size_t gt_found_count = 0;  // Track how many GT nodes we've found (for early exit)
+  size_t gt_target_count = 0; // How many GT nodes we need to find for current query
+
+  // Map from GT node_id to its rank (for O(1) lookup instead of linear search)
+  std::unordered_map<uint64_t, size_t> gt_node_to_rank;
   const std::vector<uint64_t>* current_gt = nullptr;
 
   for (const auto& record : records) {
@@ -483,20 +543,32 @@ core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
       current_query = query_id;
       max_cmps_for_query = record.cmps_visited;
       current_gt = &ground_truth[query_id];
-      gt_set.clear();
-      for (size_t i = 0; i < std::min(topk, current_gt->size()); ++i) {
-        gt_set.insert((*current_gt)[i]);
+      gt_found_count = 0;
+
+      // Build map from GT node_id to rank for O(1) lookup
+      gt_node_to_rank.clear();
+      gt_target_count = std::min(topk, current_gt->size());
+      for (size_t i = 0; i < gt_target_count; ++i) {
+        gt_node_to_rank[(*current_gt)[i]] = i;
       }
     }
 
+    // OPTIMIZATION: Early exit if we've found all GT nodes for this query
+    if (gt_found_count >= gt_target_count) {
+      continue;
+    }
+
     // Check which GT nodes are in collected_node_ids
     for (uint64_t node_id : record.collected_node_ids) {
-      // Check if this node is a GT node and we haven't recorded its cmps yet
-      if (gt_set.count(node_id) > 0) {
-        // Find the rank of this GT node
-        for (size_t rank = 0; rank < std::min(topk, current_gt->size()); ++rank) {
-          if ((*current_gt)[rank] == node_id && result.gt_cmps[query_id][rank] == -1) {
-            result.gt_cmps[query_id][rank] = record.cmps_visited;
+      auto it = gt_node_to_rank.find(node_id);
+      if (it != gt_node_to_rank.end()) {
+        size_t rank = it->second;
+        // Only record if we haven't found this GT yet
+        if (result.gt_cmps[query_id][rank] == -1) {
+          result.gt_cmps[query_id][rank] = record.cmps_visited;
+          gt_found_count++;
+          // Early exit from inner loop if all found
+          if (gt_found_count >= gt_target_count) {
             break;
           }
         }
@@ -518,7 +590,10 @@ core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
     }
   }
 
-  LOG_INFO("Computed gt_cmps for %zu queries, topk=%zu", result.num_queries, result.topk);
+  auto compute_end = std::chrono::high_resolution_clock::now();
+  auto compute_ms = std::chrono::duration_cast<std::chrono::milliseconds>(compute_end - compute_start).count();
+
+  LOG_INFO("Computed gt_cmps for %zu queries, topk=%zu in %zu ms", result.num_queries, result.topk, compute_ms);
   return result;
 }
 
@@ -530,17 +605,21 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
     const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
   ScopedTimer total_timer("CollectTrainingDataWithGtCmps [TOTAL]");
 
-  // Step 1: Generate training queries
-  LOG_INFO("Generating %zu training queries for field '%s'",
+  // Step 1: Generate training queries using held-out approach
+  // (sample vectors directly from index, no noise)
+  LOG_INFO("Generating %zu held-out training queries for field '%s'",
            options.num_training_queries, field_name.c_str());
 
   std::vector<std::vector<float>> training_queries;
+  std::vector<uint64_t> query_doc_ids;  // doc_ids for self-exclusion in GT
   {
-    ScopedTimer timer("Step1: GenerateTrainingQueries");
-    training_queries = TrainingQueryGenerator::GenerateTrainingQueries(
-        segment, field_name, options.num_training_queries,
-        options.noise_scale, options.seed);
-    DebugLog("  Generated " + std::to_string(training_queries.size()) + " queries");
+    ScopedTimer timer("Step1: GenerateHeldOutQueries");
+    auto sampled = TrainingQueryGenerator::GenerateHeldOutQueries(
+        segment, field_name, options.num_training_queries, options.seed);
+    training_queries = std::move(sampled.vectors);
+    query_doc_ids = std::move(sampled.doc_ids);
+    DebugLog("  Generated " + std::to_string(training_queries.size()) +
+             " held-out queries (with doc_ids for self-exclusion)");
   }
 
   if (training_queries.empty()) {
@@ -548,18 +627,19 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
         Status::InternalError("Failed to generate training queries"));
   }
 
-  // Step 2: Compute ground truth (brute force search with recall = 1)
-  LOG_INFO("Computing ground truth with brute force search (topk=%zu)",
+  // Step 2: Compute ground truth (brute force search, excluding self-matches)
+  LOG_INFO("Computing ground truth with brute force search (topk=%zu, excluding self)",
            options.topk);
 
   std::vector<std::vector<uint64_t>> ground_truth;
   {
-    ScopedTimer timer("Step2: ComputeGroundTruth (BRUTE FORCE PARALLEL)");
+    ScopedTimer timer("Step2: ComputeGroundTruth (BRUTE FORCE PARALLEL, HELD-OUT)");
     DebugLog("  num_queries=" + std::to_string(training_queries.size()) +
              ", topk=" + std::to_string(options.topk) +
              ", threads=" + std::to_string(options.num_threads == 0 ? std::thread::hardware_concurrency() : options.num_threads));
     ground_truth = ComputeGroundTruth(
-        segment, field_name, training_queries, options.topk, options.num_threads);
+        segment, field_name, training_queries, options.topk, options.num_threads,
+        query_doc_ids);  // Pass doc_ids for self-exclusion
     DebugLog("  Computed ground truth for " + std::to_string(ground_truth.size()) + " queries");
   }
 
diff --git a/src/db/training/training_data_collector.h b/src/db/training/training_data_collector.h
index b5fdbe09f..b633f155f 100644
--- a/src/db/training/training_data_collector.h
+++ b/src/db/training/training_data_collector.h
@@ -114,6 +114,7 @@ class TrainingDataCollector {
    * @param queries Training query vectors
    * @param topk Number of top results to retrieve
    * @param num_threads Number of threads (0 = hardware_concurrency)
+   * @param query_doc_ids Optional doc_ids of query vectors (for self-exclusion in held-out mode)
    * @return Ground truth doc IDs for each query
    */
   static std::vector<std::vector<uint64_t>> ComputeGroundTruth(
@@ -121,7 +122,8 @@ class TrainingDataCollector {
       const std::string& field_name,
       const std::vector<std::vector<float>>& queries,
       size_t topk,
-      size_t num_threads);
+      size_t num_threads,
+      const std::vector<uint64_t>& query_doc_ids = {});
 
   /**
    * @brief Fill labels in training records based on ground truth
diff --git a/src/include/zvec/core/framework/index_context.h b/src/include/zvec/core/framework/index_context.h
index c77fcf42e..6447999a6 100644
--- a/src/include/zvec/core/framework/index_context.h
+++ b/src/include/zvec/core/framework/index_context.h
@@ -22,6 +22,7 @@
 #include <zvec/core/framework/index_groupby.h>
 #include <zvec/core/framework/index_metric.h>
 #include <zvec/core/framework/index_stats.h>
+#include <zvec/core/interface/training.h>
 
 namespace zvec {
 namespace core {
@@ -248,6 +249,15 @@ class IndexContext {
     return profiler_;
   }
 
+  //! Get training records collected during search (for OMEGA training mode)
+  //! Default implementation returns empty vector. Override in OmegaContext.
+  virtual std::vector<core_interface::TrainingRecord> take_training_records() {
+    return {};
+  }
+
+  //! Clear training records (call before each search if context is reused)
+  virtual void clear_training_records() {}
+
  private:
   //! Members
   IndexFilter filter_{};
diff --git a/src/include/zvec/core/interface/index.h b/src/include/zvec/core/interface/index.h
index 6921a4d29..2888d51ae 100644
--- a/src/include/zvec/core/interface/index.h
+++ b/src/include/zvec/core/interface/index.h
@@ -28,6 +28,7 @@
 #include <zvec/core/framework/index_metric.h>
 #include <zvec/core/framework/index_reducer.h>
 #include <zvec/core/framework/index_reformer.h>
+#include <zvec/core/interface/training.h>
 #include <zvec/core/framework/index_searcher.h>
 #include <zvec/core/framework/index_storage.h>
 #include <zvec/core/interface/index_param.h>
@@ -103,6 +104,8 @@ struct SearchResult {
   // use string to manage memory
   std::vector<std::string> reverted_vector_list_{};
   std::vector<std::string> reverted_sparse_values_list_{};
+  // Training records collected during search (for OMEGA training mode)
+  std::vector<TrainingRecord> training_records_{};
 };
 
 class Index {
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index 341bbddab..644eae9d1 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -24,5 +24,12 @@ add_subdirectory(rocksdb rocksdb EXCLUDE_FROM_ALL)
 add_subdirectory(CRoaring CRoaring EXCLUDE_FROM_ALL)
 add_subdirectory(arrow arrow EXCLUDE_FROM_ALL)
 add_subdirectory(magic_enum magic_enum EXCLUDE_FROM_ALL)
-add_subdirectory(omega omega EXCLUDE_FROM_ALL)
+
+# omega is only built when ZVEC_ENABLE_OMEGA is ON
+if(ZVEC_ENABLE_OMEGA)
+  message(STATUS "ZVEC: Building omega library with LightGBM support")
+  add_subdirectory(omega omega EXCLUDE_FROM_ALL)
+else()
+  message(STATUS "ZVEC: Skipping omega library (ZVEC_ENABLE_OMEGA=OFF)")
+endif()
 
diff --git a/thirdparty/omega b/thirdparty/omega
index 4a8b0bcaf..f0b50e825 160000
--- a/thirdparty/omega
+++ b/thirdparty/omega
@@ -1 +1 @@
-Subproject commit 4a8b0bcaf107d1f79607b550a3dbfc69f7058143
+Subproject commit f0b50e8256cc535e84b5b261db154f112efff589

From 4f982d062f105818ace4c462b23e1c1c66251caa Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 8 Mar 2026 18:16:12 +0800
Subject: [PATCH 011/126] feat(omega): add OmegaQueryParam to Python API

- Expose target_recall parameter for OMEGA adaptive early stopping
- Update OMEGA tests with 100k docs and recall validation
- Remove deprecated _omega_training.py
---
 python/tests/test_collection.py               | 602 ++++++++----------
 python/zvec/__init__.py                       |   2 +
 python/zvec/__init__.pyi                      |   4 +
 python/zvec/_omega_training.py                | 346 ----------
 python/zvec/model/param/__init__.py           |   2 +
 python/zvec/model/param/__init__.pyi          |  55 ++
 python/zvec/model/param/vector_query.py       |  17 +-
 .../python/model/param/python_param.cc        |  77 +++
 src/core/algorithm/omega/omega_searcher.cc    |   4 +-
 9 files changed, 434 insertions(+), 675 deletions(-)
 delete mode 100644 python/zvec/_omega_training.py

diff --git a/python/tests/test_collection.py b/python/tests/test_collection.py
index e239a88a0..7ab49f9f5 100644
--- a/python/tests/test_collection.py
+++ b/python/tests/test_collection.py
@@ -33,6 +33,7 @@
     VectorQuery,
     OptimizeOption,
     OmegaIndexParam,
+    OmegaQueryParam,
     MetricType,
 )
 
@@ -1052,126 +1053,75 @@ def test_collection_query_with_weighted_reranker_by_hybrid_vector(
 # ----------------------------
 
 
-@pytest.fixture(scope="session")
-def omega_collection_schema():
-    """Schema with OMEGA index for testing automatic training."""
-    return zvec.CollectionSchema(
-        name="omega_test_collection",
-        fields=[
-            FieldSchema(
-                "id",
-                DataType.INT64,
-                nullable=False,
-                index_param=InvertIndexParam(enable_range_optimization=True),
-            ),
-            FieldSchema("name", DataType.STRING, nullable=False),
-        ],
-        vectors=[
-            VectorSchema(
-                "embedding",
-                DataType.VECTOR_FP32,
-                dimension=128,
-                index_param=OmegaIndexParam(
-                    metric_type=MetricType.IP,
-                    m=16,
-                    ef_construction=200,
-                ),
-            ),
-        ],
-    )
-
-
-@pytest.fixture
-def omega_test_collection(
-    tmp_path_factory, omega_collection_schema, collection_option
-) -> Collection:
-    """Create a collection with OMEGA index for testing."""
-    temp_dir = tmp_path_factory.mktemp("zvec_omega")
-    collection_path = temp_dir / "omega_collection"
-
-    coll = zvec.create_and_open(
-        path=str(collection_path),
-        schema=omega_collection_schema,
-        option=collection_option,
-    )
-
-    assert coll is not None, "Failed to create OMEGA collection"
-    assert coll.schema.name == omega_collection_schema.name
+class TestOmegaFullWorkflow:
+    """
+    Complete end-to-end test for OMEGA adaptive search.
+
+    This test validates the entire OMEGA workflow:
+    1. Collection creation with OmegaIndexParam
+    2. Data insertion (100,000 documents to meet min_vector_threshold)
+    3. Automatic training triggered by optimize()
+    4. Model file generation (LightGBM model + lookup tables)
+    5. Search functionality with OMEGA early stopping enabled
+    6. Recall validation
+    """
 
-    # Verify OMEGA index param
-    embedding_field = coll.schema.vector("embedding")
-    assert embedding_field is not None
-    assert embedding_field.index_param is not None
-    assert embedding_field.index_param.type == IndexType.OMEGA
+    def test_omega_end_to_end_workflow(self, tmp_path_factory):
+        """Full OMEGA workflow: create → insert 100k docs → train → search with early stopping."""
+        import numpy as np
+        import os
 
-    try:
-        yield coll
-    finally:
-        if hasattr(coll, "destroy") and coll is not None:
-            try:
-                coll.destroy()
-            except Exception as e:
-                print(f"Warning: failed to destroy OMEGA collection: {e}")
+        print("\n" + "="*80)
+        print("OMEGA End-to-End Workflow Test (100k documents)")
+        print("="*80)
 
+        # Step 1: Create collection with OMEGA index
+        print("\n[Step 1/6] Creating OMEGA collection...")
+        temp_dir = tmp_path_factory.mktemp("omega_e2e")
+        collection_path = str(temp_dir / "omega_collection")
 
-@pytest.fixture
-def omega_docs_large():
-    """Generate 1500 documents to trigger segment dump and training."""
-    import numpy as np
-
-    docs = []
-    for i in range(1500):
-        # Generate somewhat structured vectors for better training
-        base_vector = np.random.randn(128).astype(np.float32)
-        base_vector = base_vector / np.linalg.norm(base_vector)  # Normalize
-        docs.append(
-            Doc(
-                id=f"{i}",
-                fields={"id": i, "name": f"doc_{i}"},
-                vectors={"embedding": base_vector.tolist()},
-            )
+        schema = zvec.CollectionSchema(
+            name="omega_e2e_test",
+            fields=[
+                FieldSchema("id", DataType.INT64, nullable=False),
+                FieldSchema("name", DataType.STRING, nullable=False),
+            ],
+            vectors=[
+                VectorSchema(
+                    "embedding",
+                    DataType.VECTOR_FP32,
+                    dimension=128,
+                    index_param=OmegaIndexParam(
+                        metric_type=MetricType.L2,
+                        m=16,
+                        ef_construction=200,
+                        min_vector_threshold=100000,  # Explicitly set threshold
+                    ),
+                ),
+            ],
         )
-    return docs
-
-
-@pytest.mark.usefixtures("omega_test_collection")
-class TestCollectionOmegaIndex:
-    """Test cases for OMEGA index functionality."""
 
-    def test_omega_index_param_creation(self, omega_test_collection: Collection):
-        """Test that OmegaIndexParam is correctly created and configured."""
-        embedding_field = omega_test_collection.schema.vector("embedding")
-        assert embedding_field is not None
-        assert embedding_field.name == "embedding"
-        assert embedding_field.dimension == 128
-        assert embedding_field.data_type == DataType.VECTOR_FP32
-
-        index_param = embedding_field.index_param
-        assert index_param is not None
-        assert index_param.type == IndexType.OMEGA
-        assert index_param.m == 16
-        assert index_param.ef_construction == 200
-        assert index_param.metric_type == MetricType.IP
-
-    def test_omega_index_param_to_dict(self, omega_test_collection: Collection):
-        """Test that OmegaIndexParam.to_dict() returns correct type."""
-        embedding_field = omega_test_collection.schema.vector("embedding")
-        index_param = embedding_field.index_param
-
-        param_dict = index_param.to_dict()
-        assert "type" in param_dict
-        assert param_dict["type"] == "OMEGA", f"Expected 'OMEGA', got '{param_dict['type']}'"
-        assert param_dict["metric_type"] == "IP"
-        assert param_dict["m"] == 16
-        assert param_dict["ef_construction"] == 200
-
-    def test_omega_basic_insert_and_search(self, omega_test_collection: Collection):
-        """Test basic insert and search with small dataset (no training)."""
-        import numpy as np
+        collection = zvec.create_and_open(
+            path=collection_path,
+            schema=schema,
+            option=CollectionOption()
+        )
 
-        # Insert 10 documents
+        # Verify OMEGA index param
+        embedding_field = collection.schema.vector("embedding")
+        assert embedding_field.index_param.type == IndexType.OMEGA
+        assert embedding_field.index_param.min_vector_threshold == 100000
+        print(f"   ✓ Collection created with OMEGA index")
+        print(f"   ✓ Index params: m={embedding_field.index_param.m}, "
+              f"ef_construction={embedding_field.index_param.ef_construction}, "
+              f"min_vector_threshold={embedding_field.index_param.min_vector_threshold}")
+
+        # Step 2: Insert 100,000 documents to exceed threshold
+        print("\n[Step 2/6] Inserting 100,000 documents...")
         docs = []
-        for i in range(10):
+        np.random.seed(42)
+        num_docs = 100000
+        for i in range(num_docs):
             vector = np.random.randn(128).astype(np.float32)
             vector = vector / np.linalg.norm(vector)
             docs.append(
@@ -1182,151 +1132,158 @@ def test_omega_basic_insert_and_search(self, omega_test_collection: Collection):
                 )
             )
 
-        result = omega_test_collection.insert(docs)
-        assert len(result) == len(docs)
-        for item in result:
-            assert item.ok()
+        batch_size = 1000
+        for i in range(0, len(docs), batch_size):
+            batch = docs[i : i + batch_size]
+            result = collection.insert(batch)
+            assert all(r.ok() for r in result)
+            if (i // batch_size) % 10 == 0:
+                print(f"   Progress: {i}/{num_docs} documents inserted...")
 
-        # Search with first document's vector
-        query_vector = docs[0].vector("embedding")
-        search_results = omega_test_collection.query(
-            VectorQuery(field_name="embedding", vector=query_vector),
-            topk=5
-        )
+        assert collection.stats.doc_count == len(docs)
+        print(f"   ✓ Inserted {len(docs)} documents (exceeds min_vector_threshold)")
 
-        assert len(search_results) > 0
-        # First result should be the query document itself
-        assert search_results[0].id == docs[0].id
+        # Step 3: Flush to persist data
+        print("\n[Step 3/6] Flushing data...")
+        collection.flush()
+        print(f"   ✓ Data flushed")
 
-    def test_omega_large_dataset_with_optimize(
-        self, omega_test_collection: Collection, omega_docs_large
-    ):
-        """Test OMEGA with large dataset to trigger automatic training."""
-        # Insert 1500 documents (should trigger segment dump)
-        batch_size = 100
-        for i in range(0, len(omega_docs_large), batch_size):
-            batch = omega_docs_large[i : i + batch_size]
-            result = omega_test_collection.insert(batch)
-            assert len(result) == len(batch)
-            for item in result:
-                assert item.ok()
-
-        # Verify all documents inserted
-        stats = omega_test_collection.stats
-        assert stats.doc_count == len(omega_docs_large)
-
-        # Call optimize to trigger segment dump and automatic training
-        optimize_result = omega_test_collection.optimize(option=OptimizeOption())
-        # optimize() may not return a value, just ensure it doesn't raise
-
-        # Perform search to verify OMEGA is working
-        query_doc = omega_docs_large[0]
-        query_vector = query_doc.vector("embedding")
-
-        search_results = omega_test_collection.query(
-            VectorQuery(field_name="embedding", vector=query_vector),
-            topk=10
-        )
+        # Step 4: Trigger training via optimize
+        print("\n[Step 4/6] Triggering training via optimize()...")
+        collection.optimize(option=OptimizeOption())
+        print(f"   ✓ Optimize completed (merge + auto-training)")
 
-        assert len(search_results) > 0
-        # Should find the query document in results
-        found_query_doc = False
-        for doc in search_results:
-            if doc.id == query_doc.id:
-                found_query_doc = True
-                break
-        assert found_query_doc, "Query document not found in search results"
-
-    def test_omega_search_consistency(
-        self, omega_test_collection: Collection, omega_docs_large
-    ):
-        """Test that OMEGA search results are consistent and reasonable."""
-        import numpy as np
+        # Step 5: Verify model files
+        print("\n[Step 5/6] Verifying model files...")
+        model_files_found = False
+        required_files = [
+            "model.txt",
+            "threshold_table.txt",
+            "interval_table.txt",
+            "gt_collected_table.txt",
+            "gt_cmps_all_table.txt"
+        ]
+
+        # Search for omega_model directory
+        for item in os.listdir(collection_path):
+            item_path = os.path.join(collection_path, item)
+            if os.path.isdir(item_path):
+                model_dir = os.path.join(item_path, "omega_model")
+                if os.path.exists(model_dir):
+                    print(f"   Found model directory: {model_dir}")
+
+                    all_exist = True
+                    for fname in required_files:
+                        fpath = os.path.join(model_dir, fname)
+                        if os.path.exists(fpath):
+                            size = os.path.getsize(fpath)
+                            print(f"   ✓ {fname} ({size} bytes)")
+                        else:
+                            print(f"   ✗ {fname} MISSING")
+                            all_exist = False
+
+                    if all_exist:
+                        model_files_found = True
+                    break
 
-        # Insert documents
-        batch_size = 100
-        for i in range(0, len(omega_docs_large), batch_size):
-            batch = omega_docs_large[i : i + batch_size]
-            omega_test_collection.insert(batch)
-
-        # Perform multiple searches and verify consistency
-        query_doc = omega_docs_large[100]  # Use a middle document
-        query_vector = query_doc.vector("embedding")
-
-        # Search twice with same query
-        results1 = omega_test_collection.query(
-            VectorQuery(field_name="embedding", vector=query_vector),
-            topk=20
-        )
-        results2 = omega_test_collection.query(
-            VectorQuery(field_name="embedding", vector=query_vector),
-            topk=20
-        )
+        assert model_files_found, "OMEGA model files not found after training"
+        print(f"   ✅ All required model files generated")
+
+        # Step 6: Test search with OMEGA early stopping (vector count >= threshold)
+        print("\n[Step 6/6] Testing search with OMEGA early stopping enabled...")
+        print(f"   Note: OMEGA early stopping is ENABLED (doc_count={num_docs} >= threshold=100000)")
+        print(f"   OMEGA target_recall = 0.80")
+
+        n_test_queries = 1000
+        topk = 100
+        target_recall = 0.80
+
+        # Generate NEW random query vectors (not from base vectors)
+        print(f"   Generating {n_test_queries} random query vectors...")
+        np.random.seed(12345)  # Different seed from base vectors
+        query_vectors = []
+        for i in range(n_test_queries):
+            qv = np.random.randn(128).astype(np.float32)
+            qv = qv / np.linalg.norm(qv)
+            query_vectors.append(qv.tolist())
+
+        # Compute ground truth using brute-force
+        print(f"   Computing ground truth (brute-force) for recall evaluation...")
+        base_vectors = np.array([docs[i].vector("embedding") for i in range(num_docs)])
+        query_vectors_np = np.array(query_vectors)
+
+        # Compute all distances (L2)
+        ground_truth_indices = []
+        for i in range(n_test_queries):
+            qv = query_vectors_np[i]
+            distances = np.sum((base_vectors - qv) ** 2, axis=1)
+            gt_indices = np.argsort(distances)[:topk]
+            ground_truth_indices.append(set(str(idx) for idx in gt_indices))
+
+        # Run OMEGA search and compute recall
+        print(f"   Running {n_test_queries} OMEGA searches with topk={topk}, target_recall={target_recall}...")
+        recalls = []
+        for i in range(n_test_queries):
+            results = collection.query(
+                VectorQuery(
+                    field_name="embedding",
+                    vector=query_vectors[i],
+                    param=OmegaQueryParam(ef=1000, target_recall=target_recall)
+                ),
+                topk=topk
+            )
 
-        assert len(results1) == len(results2)
+            result_ids = {r.id for r in results}
+            gt_ids = ground_truth_indices[i]
 
-        # Results should be identical (same query)
-        for i in range(len(results1)):
-            assert results1[i].id == results2[i].id
+            # Compute recall: |intersection| / |ground_truth|
+            recall = len(result_ids & gt_ids) / len(gt_ids) if gt_ids else 1.0
+            recalls.append(recall)
 
-    def test_omega_with_filter(self, omega_test_collection: Collection):
-        """Test OMEGA search with filter expressions."""
-        import numpy as np
+            if (i + 1) % 200 == 0:
+                print(f"   Progress: {i+1}/{n_test_queries} queries completed...")
 
-        # Insert 50 documents
-        docs = []
-        for i in range(50):
-            vector = np.random.randn(128).astype(np.float32)
-            vector = vector / np.linalg.norm(vector)
-            docs.append(
-                Doc(
-                    id=f"{i}",
-                    fields={"id": i, "name": f"doc_{i}"},
-                    vectors={"embedding": vector.tolist()},
-                )
-            )
-        omega_test_collection.insert(docs)
-
-        # Search with filter
-        query_vector = docs[0].vector("embedding")
-        results = omega_test_collection.query(
-            VectorQuery(
-                field_name="embedding",
-                vector=query_vector,
-            ),
-            filter="id >= 10 and id < 20",
-            topk=20,
-        )
+        avg_recall = np.mean(recalls)
+        print(f"   Average Recall@{topk}: {avg_recall:.4f}")
 
-        # All results should satisfy filter
-        for doc in results:
-            doc_id = doc.field("id")
-            assert 10 <= doc_id < 20, f"Document id {doc_id} does not satisfy filter"
+        # Validate recall meets target
+        assert avg_recall >= target_recall, f"Recall too low: {avg_recall:.4f} < {target_recall}"
+        print(f"   ✅ Recall meets target ({avg_recall:.4f} >= {target_recall})")
 
-    def test_omega_training_and_early_stopping(
-        self, tmp_path_factory
-    ):
+        # Summary
+        print("\n" + "="*80)
+        print("✅ OMEGA End-to-End Workflow PASSED")
+        print("   1. ✓ Collection created with OMEGA index")
+        print(f"   2. ✓ {len(docs)} documents inserted (>= min_vector_threshold)")
+        print("   3. ✓ Training triggered and completed")
+        print("   4. ✓ All model files generated (5 files)")
+        print("   5. ✓ OMEGA early stopping ENABLED during search")
+        print(f"   6. ✓ {n_test_queries} queries, topk={topk}, recall: {avg_recall:.4f} >= {target_recall}")
+        print("="*80)
+
+    def test_omega_fallback_to_hnsw(self, tmp_path_factory):
         """
-        Verify OMEGA training with k_train=1 labeling logic:
-        1. Training succeeds (model files generated)
-        2. Search demonstrates early stopping
-        3. Recall meets target
+        Test OMEGA fallback behavior when document count < min_vector_threshold.
+
+        Verifies that:
+        1. Training still occurs during optimize()
+        2. Search falls back to standard HNSW (OMEGA disabled)
+        3. Results are identical to pure HNSW search
         """
         import numpy as np
         import os
-        import gc
-        import time
 
         print("\n" + "="*80)
-        print("OMEGA Training Verification (k_train=1)")
+        print("OMEGA Fallback to HNSW Test (< min_vector_threshold)")
         print("="*80)
 
-        # Create fresh collection
-        temp_dir = tmp_path_factory.mktemp("zvec_omega_training")
-        collection_path = str(temp_dir / "omega_training_collection")
+        # Step 1: Create OMEGA collection
+        print("\n[Step 1/5] Creating OMEGA collection...")
+        temp_dir = tmp_path_factory.mktemp("omega_fallback")
+        collection_path = str(temp_dir / "omega_fallback_collection")
 
         schema = zvec.CollectionSchema(
-            name="omega_training_test",
+            name="omega_fallback_test",
             fields=[
                 FieldSchema("id", DataType.INT64, nullable=False),
                 FieldSchema("name", DataType.STRING, nullable=False),
@@ -1340,130 +1297,131 @@ def test_omega_training_and_early_stopping(
                         metric_type=MetricType.L2,
                         m=16,
                         ef_construction=200,
+                        min_vector_threshold=100000,  # Explicitly set threshold
                     ),
                 ),
             ],
         )
 
-        # Create and insert documents
-        print(f"\n[1/4] Creating collection and inserting 1500 documents...")
         collection = zvec.create_and_open(
             path=collection_path,
             schema=schema,
             option=CollectionOption()
         )
 
-        # Generate documents
+        print(f"   ✓ Collection created with min_vector_threshold=100000")
+
+        # Step 2: Insert only 1000 documents (< threshold)
+        print("\n[Step 2/5] Inserting 1,000 documents (< min_vector_threshold)...")
         docs = []
-        for i in range(1500):
-            base_vector = np.random.randn(128).astype(np.float32)
-            base_vector = base_vector / np.linalg.norm(base_vector)
+        np.random.seed(42)
+        num_docs = 1000
+        for i in range(num_docs):
+            vector = np.random.randn(128).astype(np.float32)
+            vector = vector / np.linalg.norm(vector)
             docs.append(
                 Doc(
                     id=f"{i}",
                     fields={"id": i, "name": f"doc_{i}"},
-                    vectors={"embedding": base_vector.tolist()},
+                    vectors={"embedding": vector.tolist()},
                 )
             )
 
         batch_size = 100
         for i in range(0, len(docs), batch_size):
             batch = docs[i : i + batch_size]
-            collection.insert(batch)
+            result = collection.insert(batch)
+            assert all(r.ok() for r in result)
+
+        assert collection.stats.doc_count == len(docs)
         print(f"   ✓ Inserted {len(docs)} documents")
+        print(f"   ✓ Doc count ({num_docs}) < min_vector_threshold (100000)")
 
-        # Trigger optimization
-        print(f"\n[2/4] Triggering optimize (dump + auto training)...")
+        # Step 3: Flush and optimize (training will occur)
+        print("\n[Step 3/5] Flushing and optimizing...")
+        collection.flush()
         collection.optimize(option=OptimizeOption())
-        print(f"   ✓ Optimize completed")
-
-        # Close collection
-        del collection
-        gc.collect()
-        time.sleep(1)
+        print(f"   ✓ Optimize completed (training executed)")
 
-        # Reopen to load OMEGA index
-        print(f"   Reopening collection to load OMEGA indexes...")
-        collection = zvec.open(path=collection_path, option=CollectionOption(read_only=True))
-        print(f"   ✓ Collection reopened")
-
-        # Verify model files exist
-        print(f"\n[3/4] Verifying model files...")
+        # Step 4: Verify model files were generated despite fallback
+        print("\n[Step 4/5] Verifying model files (training should have occurred)...")
         model_files_found = False
-        model_dir = None
-
-        # Look for any numeric segment directories
-        if os.path.exists(collection_path):
-            for item in os.listdir(collection_path):
-                item_path = os.path.join(collection_path, item)
-                if os.path.isdir(item_path):
-                    potential_model_dir = os.path.join(item_path, "omega_model")
-
-                    if os.path.exists(potential_model_dir):
-                        model_dir = potential_model_dir
-                        print(f"   Found model directory: {model_dir}")
-
-                        # Check for all required files
-                        required_files = [
-                            "model.txt",
-                            "threshold_table.txt",
-                            "interval_table.txt",
-                            "gt_collected_table.txt",
-                            "gt_cmps_all_table.txt"
-                        ]
-
-                        files_exist = []
-                        for fname in required_files:
-                            fpath = os.path.join(model_dir, fname)
-                            exists = os.path.exists(fpath)
-                            files_exist.append(exists)
-
-                            if exists:
-                                file_size = os.path.getsize(fpath)
-                                print(f"   ✓ {fname}: {file_size} bytes")
-                            else:
-                                print(f"   ✗ {fname}: NOT FOUND")
-
-                        if all(files_exist):
-                            model_files_found = True
-                            print(f"\n   ✅ ALL MODEL FILES GENERATED!")
-                        break
-
-        if not model_files_found:
-            print(f"   ⚠️  No OMEGA model files found in any segment directory")
-
-        assert model_files_found, "OMEGA model files not found after training"
-
-        # Test search and calculate recall
-        print(f"\n[4/4] Testing search recall...")
-        n_queries = 10
+        for item in os.listdir(collection_path):
+            item_path = os.path.join(collection_path, item)
+            if os.path.isdir(item_path):
+                model_dir = os.path.join(item_path, "omega_model")
+                if os.path.exists(model_dir) and os.path.exists(os.path.join(model_dir, "model.txt")):
+                    model_files_found = True
+                    print(f"   ✓ Model files found (training executed)")
+                    break
+
+        assert model_files_found, "Model files should exist even when fallback occurs"
+
+        # Step 5: Test search with fallback behavior
+        print("\n[Step 5/5] Testing search with OMEGA disabled (fallback to HNSW)...")
+        print(f"   Note: OMEGA early stopping is DISABLED (doc_count={num_docs} < threshold=100000)")
+        print(f"   Expected: Search uses standard HNSW algorithm")
+
+        n_test_queries = 100
         topk = 10
+        target_recall = 0.80
+
+        # Generate NEW random query vectors
+        print(f"   Generating {n_test_queries} random query vectors...")
+        np.random.seed(12345)
+        query_vectors = []
+        for i in range(n_test_queries):
+            qv = np.random.randn(128).astype(np.float32)
+            qv = qv / np.linalg.norm(qv)
+            query_vectors.append(qv.tolist())
+
+        # Compute ground truth using brute-force
+        print(f"   Computing ground truth (brute-force) for recall evaluation...")
+        base_vectors = np.array([docs[i].vector("embedding") for i in range(num_docs)])
+        query_vectors_np = np.array(query_vectors)
+
+        ground_truth_indices = []
+        for i in range(n_test_queries):
+            qv = query_vectors_np[i]
+            distances = np.sum((base_vectors - qv) ** 2, axis=1)
+            gt_indices = np.argsort(distances)[:topk]
+            ground_truth_indices.append(set(str(idx) for idx in gt_indices))
+
+        # Run search and compute recall
+        print(f"   Running {n_test_queries} HNSW searches with topk={topk}...")
         recalls = []
-
-        for i in range(n_queries):
-            query_doc = docs[i]
-            query_vector = query_doc.vector("embedding")
-
+        for i in range(n_test_queries):
+            # Even with OmegaQueryParam, OMEGA early stopping is disabled
+            # because doc_count < min_vector_threshold
             results = collection.query(
-                VectorQuery(field_name="embedding", vector=query_vector),
+                VectorQuery(
+                    field_name="embedding",
+                    vector=query_vectors[i],
+                    param=OmegaQueryParam(target_recall=target_recall)
+                ),
                 topk=topk
             )
 
-            # Calculate recall
-            result_ids = [r.id for r in results]
-            recall = 1.0 if query_doc.id in result_ids else 0.0
+            result_ids = {r.id for r in results}
+            gt_ids = ground_truth_indices[i]
+
+            recall = len(result_ids & gt_ids) / len(gt_ids) if gt_ids else 1.0
             recalls.append(recall)
 
         avg_recall = np.mean(recalls)
         print(f"   Average Recall@{topk}: {avg_recall:.4f}")
 
-        # Verify recall is reasonable (should be high since we use docs from collection)
-        assert avg_recall >= 0.8, f"Recall too low: {avg_recall:.4f}"
+        # For fallback to HNSW, recall should still be high (pure HNSW performance)
+        assert avg_recall >= target_recall, f"Recall too low: {avg_recall:.4f} < {target_recall}"
+        print(f"   ✅ Recall meets target (standard HNSW performance)")
 
-        print(f"\n   ✅ RECALL MEETS THRESHOLD (>= 0.8)")
+        # Summary
         print("\n" + "="*80)
-        print("✅ OMEGA Training Verification PASSED")
-        print("   1. ✓ Model files generated (LightGBM + 4 tables)")
-        print("   2. ✓ Training completed with k_train=1 labeling")
-        print(f"   3. ✓ Search recall: {avg_recall:.4f} >= 0.8")
+        print("✅ OMEGA Fallback Test PASSED")
+        print("   1. ✓ Collection created with min_vector_threshold=100000")
+        print(f"   2. ✓ {num_docs} documents inserted (< threshold)")
+        print("   3. ✓ Training executed during optimize()")
+        print("   4. ✓ Model files generated")
+        print("   5. ✓ Search falls back to HNSW (OMEGA disabled)")
+        print(f"   6. ✓ {n_test_queries} queries, topk={topk}, recall: {avg_recall:.4f} >= {target_recall}")
         print("="*80)
diff --git a/python/zvec/__init__.py b/python/zvec/__init__.py
index 97240f98c..cb545a1b1 100644
--- a/python/zvec/__init__.py
+++ b/python/zvec/__init__.py
@@ -49,6 +49,7 @@
     IVFIndexParam,
     IVFQueryParam,
     OmegaIndexParam,
+    OmegaQueryParam,
     OptimizeOption,
 )
 from .model.param.vector_query import VectorQuery
@@ -101,6 +102,7 @@
     "AlterColumnOption",
     "HnswQueryParam",
     "IVFQueryParam",
+    "OmegaQueryParam",
     # Extensions
     "ReRanker",
     "DenseEmbeddingFunction",
diff --git a/python/zvec/__init__.pyi b/python/zvec/__init__.pyi
index 7e536c730..625c2a74c 100644
--- a/python/zvec/__init__.pyi
+++ b/python/zvec/__init__.pyi
@@ -23,6 +23,8 @@ from .model.param import (
     InvertIndexParam,
     IVFIndexParam,
     IVFQueryParam,
+    OmegaIndexParam,
+    OmegaQueryParam,
     OptimizeOption,
 )
 from .model.param.vector_query import VectorQuery
@@ -62,6 +64,8 @@ __all__: list = [
     "LogLevel",
     "LogType",
     "MetricType",
+    "OmegaIndexParam",
+    "OmegaQueryParam",
     "OptimizeOption",
     "QuantizeType",
     "ReRanker",
diff --git a/python/zvec/_omega_training.py b/python/zvec/_omega_training.py
deleted file mode 100644
index 32a28a144..000000000
--- a/python/zvec/_omega_training.py
+++ /dev/null
@@ -1,346 +0,0 @@
-# Copyright 2025-present the zvec project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""OMEGA model training module."""
-
-import argparse
-import os
-import sys
-import numpy as np
-
-try:
-    import lightgbm as lgb
-    from sklearn.model_selection import train_test_split
-    from sklearn.isotonic import IsotonicRegression
-    LIGHTGBM_AVAILABLE = True
-except ImportError:
-    LIGHTGBM_AVAILABLE = False
-
-
-def train_omega_model(csv_path: str, output_dir: str, verbose: bool = False, topk: int = 100, gt_cmps_path: str = None):
-    """Train OMEGA model from CSV training data.
-
-    Args:
-        csv_path: Path to CSV file with training data
-        output_dir: Directory to save trained model and tables
-        verbose: Enable verbose logging
-        topk: Top-K value used during training data collection (default: 100)
-        gt_cmps_path: Optional path to gt_cmps.csv for generating real tables
-
-    Returns:
-        str: Path to the trained model directory
-    """
-    if not LIGHTGBM_AVAILABLE:
-        raise ImportError(
-            "LightGBM is required for OMEGA training. "
-            "Install it with: pip install lightgbm"
-        )
-
-    if verbose:
-        print(f"Loading training data from: {csv_path}")
-
-    # Load CSV data
-    import pandas as pd
-    df = pd.read_csv(csv_path)
-
-    # Extract features and labels
-    # CSV format: query_id,hops_visited,cmps_visited,dist_1st,dist_start,stat_0,...,stat_6,label
-    query_ids = df['query_id'].values.astype(np.int32)
-    X = df[['hops_visited', 'cmps_visited', 'dist_1st', 'dist_start',
-            'stat_0', 'stat_1', 'stat_2', 'stat_3', 'stat_4', 'stat_5', 'stat_6']].values
-    y = df['label'].values
-
-    if verbose:
-        print(f"Loaded {len(df)} training records from {len(np.unique(query_ids))} queries")
-        print(f"Feature shape: {X.shape}")
-        print(f"Label distribution: {np.sum(y==0)} negative, {np.sum(y==1)} positive")
-
-    # Create output directory
-    os.makedirs(output_dir, exist_ok=True)
-
-    # Train LightGBM binary classifier
-    model_path = os.path.join(output_dir, "model.txt")
-    threshold_table_path = os.path.join(output_dir, "threshold_table.txt")
-
-    if verbose:
-        print("Training LightGBM model...")
-
-    # Split data
-    query_ids_train, query_ids_test, X_train, X_test, y_train, y_test = train_test_split(
-        query_ids, X, y, test_size=0.2, shuffle=False
-    )
-
-    # Create datasets
-    train_data = lgb.Dataset(X_train, label=y_train, free_raw_data=False)
-    test_data = lgb.Dataset(X_test, label=y_test, reference=train_data, free_raw_data=False)
-
-    # Training parameters
-    # Calculate scale_pos_weight safely
-    n_negative = np.sum(y_train == 0)
-    n_positive = np.sum(y_train == 1)
-
-    if n_positive == 0:
-        raise ValueError(f"No positive samples in training data! All labels are 0.")
-    if n_negative == 0:
-        raise ValueError(f"No negative samples in training data! All labels are 1.")
-
-    scale_pos_weight = n_negative / n_positive
-
-    params = {
-        'task': 'train',
-        'boosting_type': 'gbdt',
-        'objective': 'binary',
-        'metric': ['binary_logloss'],
-        'num_leaves': 31,
-        'boost_from_average': False,
-        'learning_rate': 0.1,
-        'feature_fraction': 1.0,
-        'bagging_fraction': 1.0,
-        'bagging_freq': 0,
-        'verbose': 0 if not verbose else 1,
-        'num_threads': 8,
-        'scale_pos_weight': scale_pos_weight,
-    }
-
-    if verbose:
-        print(f"Training samples: {len(y_train)} ({n_positive} positive, {n_negative} negative)")
-        print(f"scale_pos_weight: {scale_pos_weight:.4f}")
-
-    # Train model
-    num_round = 100
-    evals_result = {}
-    model = lgb.train(
-        params,
-        train_data,
-        valid_sets=[test_data],
-        num_boost_round=num_round,
-        callbacks=[lgb.record_evaluation(evals_result)]
-    )
-
-    # Save model
-    model.save_model(model_path)
-    if verbose:
-        print(f"Model saved to: {model_path}")
-
-    # Generate threshold table using isotonic regression
-    if verbose:
-        print("Generating threshold table...")
-
-    y_pred = model.predict(X_test, num_iteration=model.best_iteration)
-
-    # Calibrate using isotonic regression
-    isotonic_reg = IsotonicRegression(increasing=True, out_of_bounds='clip')
-    y_pred_calibrated = isotonic_reg.fit_transform(y_pred, y_test)
-
-    # Generate threshold table
-    sorted_indices = np.argsort(y_pred)
-    sorted_confidences = y_pred[sorted_indices]
-    sorted_probabilities = y_pred_calibrated[sorted_indices]
-    sorted_confidences_10000x = np.round(sorted_confidences * 10000)
-
-    # Remove duplicates
-    _, unique_indices = np.unique(sorted_confidences_10000x, return_index=True)
-    unique_confidences = sorted_confidences[unique_indices]
-    unique_probabilities = sorted_probabilities[unique_indices]
-
-    with open(threshold_table_path, "w") as f:
-        for conf, prob in zip(unique_confidences, unique_probabilities):
-            f.write(f"{conf:.4f},{prob:.6f}\n")
-
-    if verbose:
-        print(f"Threshold table saved to: {threshold_table_path}")
-
-    # Generate placeholder interval table (not used in zvec's current implementation)
-    interval_table_path = os.path.join(output_dir, "interval_table.txt")
-    with open(interval_table_path, "w") as f:
-        for recall_pct in range(0, 101, 1):
-            recall = recall_pct / 100.0
-            initial_interval = max(int(100 * (1 - recall)), 1)
-            min_interval = max(int(10 * (1 - recall)), 1)
-            f.write(f"{recall:.2f},{initial_interval},{min_interval}\n")
-
-    if verbose:
-        print(f"Interval table saved to: {interval_table_path}")
-
-    # Generate gt_collected_table and gt_cmps_all_table
-    gt_collected_table_path = os.path.join(output_dir, "gt_collected_table.txt")
-    gt_cmps_all_table_path = os.path.join(output_dir, "gt_cmps_all_table.txt")
-
-    if gt_cmps_path is not None and os.path.exists(gt_cmps_path):
-        # Load gt_cmps data and generate real tables
-        if verbose:
-            print(f"Loading gt_cmps data from: {gt_cmps_path}")
-
-        gt_cmps_df = pd.read_csv(gt_cmps_path)
-        num_queries = gt_cmps_df['query_id'].max() + 1
-
-        # Reshape gt_cmps into a 2D array: [query_id][rank] = cmps
-        gt_cmps = np.zeros((num_queries, topk), dtype=np.int32)
-        for _, row in gt_cmps_df.iterrows():
-            query_id = int(row['query_id'])
-            rank = int(row['rank'])
-            cmps = int(row['cmps'])
-            if query_id < num_queries and rank < topk:
-                gt_cmps[query_id, rank] = cmps
-
-        # Generate gt_cmps_all_table: percentiles of cmps for each rank
-        # Format: rank:percentile_1,percentile_2,...,percentile_100
-        if verbose:
-            print("Generating gt_cmps_all_table...")
-
-        with open(gt_cmps_all_table_path, "w") as f:
-            for rank in range(topk):
-                cmps_values = gt_cmps[:, rank]
-                # Calculate percentiles (1-100)
-                percentiles = np.percentile(cmps_values, range(1, 101))
-                percentiles_str = ','.join([str(int(p)) for p in percentiles])
-                f.write(f"{rank}:{percentiles_str}\n")
-
-        if verbose:
-            print(f"GT cmps all table saved to: {gt_cmps_all_table_path}")
-
-        # Generate gt_collected_table
-        # For each "collected" count (0 to topk), for each rank r:
-        # What's the probability that GT[r] was collected when we've found "collected" GTs
-        # This is computed by: fraction of queries where cmps[r] <= cmps[collected-1]
-        if verbose:
-            print("Generating gt_collected_table...")
-
-        with open(gt_collected_table_path, "w") as f:
-            for collected in range(topk + 1):
-                row_values = []
-                if collected == 0:
-                    # No GTs collected yet, all probabilities are 0
-                    row_values = ["0.0"] * topk
-                else:
-                    # Get the cmps threshold: when we've collected 'collected' GTs,
-                    # the threshold is the cmps when the (collected-1)th GT was found
-                    for rank in range(topk):
-                        if rank < collected:
-                            # Ranks before "collected" are always found
-                            row_values.append("1.0")
-                        else:
-                            # For ranks >= collected, compute probability
-                            # GT[rank] is collected if cmps[rank] <= cmps[collected-1]
-                            threshold_rank = collected - 1
-                            prob_found = np.mean(gt_cmps[:, rank] <= gt_cmps[:, threshold_rank])
-                            row_values.append(f"{prob_found:.6f}")
-                f.write(f"{collected}:{','.join(row_values)}\n")
-
-        if verbose:
-            print(f"GT collected table saved to: {gt_collected_table_path}")
-
-    else:
-        # Generate placeholder tables when gt_cmps is not available
-        if verbose:
-            print("Generating placeholder tables (gt_cmps not available)...")
-
-        with open(gt_collected_table_path, "w") as f:
-            # Format: row_index:value1,value2,...,valueK
-            # Each row represents a "collected" count, columns are ranks
-            for collected in range(topk + 1):
-                row_values = ["1.0" if i < collected else "0.0" for i in range(topk)]
-                f.write(f"{collected}:{','.join(row_values)}\n")
-
-        if verbose:
-            print(f"GT collected table (placeholder) saved to: {gt_collected_table_path}")
-
-        with open(gt_cmps_all_table_path, "w") as f:
-            # Format: row_index:value1,value2,...,value100
-            # Each row represents a rank, columns are percentiles (1-100)
-            for rank in range(topk + 1):
-                percentiles = [str(rank * 10 + p) for p in range(100)]  # Placeholder values
-                f.write(f"{rank}:{','.join(percentiles)}\n")
-
-        if verbose:
-            print(f"GT cmps all table (placeholder) saved to: {gt_cmps_all_table_path}")
-
-    # Print final statistics
-    if verbose:
-        print("\nTraining complete!")
-        print(f"Model directory: {output_dir}")
-        print("Generated files:")
-        print(f"  - model.txt")
-        print(f"  - threshold_table.txt")
-        print(f"  - interval_table.txt")
-        if gt_cmps_path is not None and os.path.exists(gt_cmps_path):
-            print(f"  - gt_collected_table.txt")
-            print(f"  - gt_cmps_all_table.txt")
-        else:
-            print(f"  - gt_collected_table.txt (placeholder)")
-            print(f"  - gt_cmps_all_table.txt (placeholder)")
-
-    return output_dir
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Train OMEGA model from collected training data"
-    )
-    parser.add_argument(
-        "command",
-        choices=["train"],
-        help="Command to execute"
-    )
-    parser.add_argument(
-        "--input",
-        required=True,
-        help="Input CSV file with training data"
-    )
-    parser.add_argument(
-        "--output",
-        required=True,
-        help="Output directory for trained model"
-    )
-    parser.add_argument(
-        "--verbose",
-        action="store_true",
-        help="Enable verbose output"
-    )
-    parser.add_argument(
-        "--topk",
-        type=int,
-        default=100,
-        help="Top-K value used during training (default: 100)"
-    )
-    parser.add_argument(
-        "--gt_cmps",
-        type=str,
-        default=None,
-        help="Path to gt_cmps.csv for generating real tables (optional)"
-    )
-
-    args = parser.parse_args()
-
-    if args.command == "train":
-        try:
-            train_omega_model(
-                csv_path=args.input,
-                output_dir=args.output,
-                verbose=args.verbose,
-                topk=args.topk,
-                gt_cmps_path=args.gt_cmps
-            )
-            print("✓ Training completed successfully")
-            sys.exit(0)
-        except Exception as e:
-            print(f"✗ Training failed: {e}", file=sys.stderr)
-            if args.verbose:
-                import traceback
-                traceback.print_exc()
-            sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/zvec/model/param/__init__.py b/python/zvec/model/param/__init__.py
index cc89a5e15..39e3b9585 100644
--- a/python/zvec/model/param/__init__.py
+++ b/python/zvec/model/param/__init__.py
@@ -25,6 +25,7 @@
     IVFIndexParam,
     IVFQueryParam,
     OmegaIndexParam,
+    OmegaQueryParam,
     OptimizeOption,
 )
 
@@ -40,5 +41,6 @@
     "IndexOption",
     "InvertIndexParam",
     "OmegaIndexParam",
+    "OmegaQueryParam",
     "OptimizeOption",
 ]
diff --git a/python/zvec/model/param/__init__.pyi b/python/zvec/model/param/__init__.pyi
index a038e2ee6..bb6cf6ece 100644
--- a/python/zvec/model/param/__init__.pyi
+++ b/python/zvec/model/param/__init__.pyi
@@ -22,6 +22,7 @@ __all__: list[str] = [
     "IndexParam",
     "InvertIndexParam",
     "OmegaIndexParam",
+    "OmegaQueryParam",
     "OptimizeOption",
     "QueryParam",
     "SegmentOption",
@@ -596,6 +597,60 @@ class OmegaIndexParam(VectorIndexParam):
         int: Maximum number of neighbors per node in upper layers.
         """
 
+class OmegaQueryParam(HnswQueryParam):
+    """
+
+    Query parameters for OMEGA index with adaptive early stopping.
+
+    OMEGA extends HNSW with machine learning-based early stopping that can
+    dynamically adjust search effort to meet a target recall.
+
+    Attributes:
+        type (IndexType): Always ``IndexType.OMEGA``.
+        ef (int): Size of the dynamic candidate list during search.
+            Larger values improve recall but slow down search.
+            Default is 300.
+        target_recall (float): Target recall for OMEGA early stopping.
+            OMEGA will stop searching when predicted recall meets this target.
+            Valid range: 0.0 to 1.0. Default is 0.95.
+        radius (float): Search radius for range queries. Default is 0.0.
+        is_linear (bool): Force linear search. Default is False.
+        is_using_refiner (bool): Whether to use refiner for the query. Default is False.
+
+    Examples:
+        >>> params = OmegaQueryParam(ef=300, target_recall=0.98)
+        >>> print(params.target_recall)
+        0.98
+    """
+    def __getstate__(self) -> tuple: ...
+    def __init__(
+        self,
+        ef: typing.SupportsInt = 300,
+        target_recall: typing.SupportsFloat = 0.95,
+        radius: typing.SupportsFloat = 0.0,
+        is_linear: bool = False,
+        is_using_refiner: bool = False,
+    ) -> None:
+        """
+        Constructs an OmegaQueryParam instance.
+
+        Args:
+            ef (int, optional): Search-time candidate list size.
+                Higher values improve accuracy. Defaults to 300.
+            target_recall (float, optional): Target recall for early stopping.
+                Valid range: 0.0 to 1.0. Defaults to 0.95.
+            radius (float, optional): Search radius for range queries. Default is 0.0.
+            is_linear (bool, optional): Force linear search. Default is False.
+            is_using_refiner (bool, optional): Whether to use refiner. Default is False.
+        """
+    def __repr__(self) -> str: ...
+    def __setstate__(self, arg0: tuple) -> None: ...
+    @property
+    def target_recall(self) -> float:
+        """
+        float: Target recall for OMEGA early stopping (0.0 to 1.0).
+        """
+
 class OptimizeOption:
     """
 
diff --git a/python/zvec/model/param/vector_query.py b/python/zvec/model/param/vector_query.py
index 97d105af6..0e27211a1 100644
--- a/python/zvec/model/param/vector_query.py
+++ b/python/zvec/model/param/vector_query.py
@@ -17,7 +17,7 @@
 from typing import Optional, Union
 
 from ...common import VectorType
-from . import HnswQueryParam, IVFQueryParam
+from . import HnswQueryParam, IVFQueryParam, OmegaQueryParam
 
 __all__ = ["VectorQuery"]
 
@@ -28,7 +28,8 @@ class VectorQuery:
 
     A `VectorQuery` can be constructed using either a document ID (to look up
     its vector) or an explicit vector. It may optionally include index-specific
-    query parameters to control search behavior (e.g., `ef` for HNSW, `nprobe` for IVF).
+    query parameters to control search behavior (e.g., `ef` for HNSW, `nprobe` for IVF,
+    `target_recall` for OMEGA).
 
     Exactly one of `id` or `vector` should be provided. If both are given,
     behavior is implementation-defined (typically `id` takes precedence).
@@ -37,25 +38,31 @@ class VectorQuery:
         field_name (str): Name of the vector field to query.
         id (Optional[str], optional): Document ID to fetch vector from. Default is None.
         vector (VectorType, optional): Explicit query vector. Default is None.
-        param (Optional[Union[HnswQueryParam, IVFQueryParam]], optional):
+        param (Optional[Union[HnswQueryParam, IVFQueryParam, OmegaQueryParam]], optional):
             Index-specific query parameters. Default is None.
 
     Examples:
         >>> import zvec
         >>> # Query by ID
         >>> q1 = zvec.VectorQuery(field_name="embedding", id="doc123")
-        >>> # Query by vector
+        >>> # Query by vector with HNSW params
         >>> q2 = zvec.VectorQuery(
         ...     field_name="embedding",
         ...     vector=[0.1, 0.2, 0.3],
         ...     param=HnswQueryParam(ef=300)
         ... )
+        >>> # Query with OMEGA params and target recall
+        >>> q3 = zvec.VectorQuery(
+        ...     field_name="embedding",
+        ...     vector=[0.1, 0.2, 0.3],
+        ...     param=OmegaQueryParam(ef=300, target_recall=0.98)
+        ... )
     """
 
     field_name: str
     id: Optional[str] = None
     vector: VectorType = None
-    param: Optional[Union[HnswQueryParam, IVFQueryParam]] = None
+    param: Optional[Union[HnswQueryParam, IVFQueryParam, OmegaQueryParam]] = None
 
     def has_id(self) -> bool:
         """Check if the query is based on a document ID.
diff --git a/src/binding/python/model/param/python_param.cc b/src/binding/python/model/param/python_param.cc
index c76e9f974..0fbe1b033 100644
--- a/src/binding/python/model/param/python_param.cc
+++ b/src/binding/python/model/param/python_param.cc
@@ -17,6 +17,7 @@
 #include <pybind11/stl.h>
 #include <zvec/core/interface/constants.h>
 #include <zvec/db/index_params.h>
+#include <zvec/db/query_params.h>
 #include "python_doc.h"
 
 namespace zvec {
@@ -775,6 +776,82 @@ Constructs an HnswQueryParam instance.
             return obj;
           }));
 
+  // binding omega query params
+  py::class_<OmegaQueryParams, HnswQueryParams, std::shared_ptr<OmegaQueryParams>>
+      omega_query_params(m, "OmegaQueryParam", R"pbdoc(
+Query parameters for OMEGA index with adaptive early stopping.
+
+OMEGA extends HNSW with machine learning-based early stopping that can
+dynamically adjust search effort to meet a target recall.
+
+Attributes:
+    type (IndexType): Always ``IndexType.OMEGA``.
+    ef (int): Size of the dynamic candidate list during search.
+        Larger values improve recall but slow down search.
+        Default is 300.
+    target_recall (float): Target recall for OMEGA early stopping.
+        OMEGA will stop searching when predicted recall meets this target.
+        Valid range: 0.0 to 1.0. Default is 0.95.
+    radius (float): Search radius for range queries. Default is 0.0.
+    is_linear (bool): Force linear search. Default is False.
+    is_using_refiner (bool): Whether to use refiner for the query. Default is False.
+
+Examples:
+    >>> params = OmegaQueryParam(ef=300, target_recall=0.98)
+    >>> print(params.target_recall)
+    0.98
+)pbdoc");
+  omega_query_params
+      .def(py::init<int, float, float, bool, bool>(),
+           py::arg("ef") = core_interface::kDefaultHnswEfSearch,
+           py::arg("target_recall") = 0.95f,
+           py::arg("radius") = 0.0f, py::arg("is_linear") = false,
+           py::arg("is_using_refiner") = false,
+           R"pbdoc(
+Constructs an OmegaQueryParam instance.
+
+Args:
+    ef (int, optional): Search-time candidate list size.
+        Higher values improve accuracy. Defaults to 300.
+    target_recall (float, optional): Target recall for early stopping.
+        Valid range: 0.0 to 1.0. Defaults to 0.95.
+    radius (float, optional): Search radius for range queries. Default is 0.0.
+    is_linear (bool, optional): Force linear search. Default is False.
+    is_using_refiner (bool, optional): Whether to use refiner. Default is False.
+)pbdoc")
+      .def_property_readonly(
+          "target_recall",
+          [](const OmegaQueryParams &self) -> float { return self.target_recall(); },
+          "float: Target recall for OMEGA early stopping (0.0 to 1.0).")
+      .def("__repr__",
+           [](const OmegaQueryParams &self) -> std::string {
+             return "{"
+                    "\"type\":" +
+                    index_type_to_string(self.type()) +
+                    ", \"ef\":" + std::to_string(self.ef()) +
+                    ", \"target_recall\":" + std::to_string(self.target_recall()) +
+                    ", \"radius\":" + std::to_string(self.radius()) +
+                    ", \"is_linear\":" + std::to_string(self.is_linear()) +
+                    ", \"is_using_refiner\":" +
+                    std::to_string(self.is_using_refiner()) + "}";
+           })
+      .def(py::pickle(
+          [](const OmegaQueryParams &self) {
+            return py::make_tuple(self.ef(), self.target_recall(),
+                                  self.radius(), self.is_linear(),
+                                  self.is_using_refiner());
+          },
+          [](py::tuple t) {
+            if (t.size() != 5)
+              throw std::runtime_error("Invalid state for OmegaQueryParams");
+            auto obj = std::make_shared<OmegaQueryParams>(
+                t[0].cast<int>(), t[1].cast<float>());
+            obj->set_radius(t[2].cast<float>());
+            obj->set_is_linear(t[3].cast<bool>());
+            obj->set_is_using_refiner(t[4].cast<bool>());
+            return obj;
+          }));
+
   // binding ivf query params
   py::class_<IVFQueryParams, QueryParams, std::shared_ptr<IVFQueryParams>>
       ivf_params(m, "IVFQueryParam", R"pbdoc(
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 18a81ee29..7740681d7 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -33,7 +33,7 @@ OmegaSearcher::OmegaSearcher(void)
       omega_enabled_(false),
       use_omega_mode_(false),
       target_recall_(0.95f),
-      min_vector_threshold_(10000),
+      min_vector_threshold_(100000),
       current_vector_count_(0),
       training_mode_enabled_(false),
       current_query_id_(0) {}
@@ -46,7 +46,7 @@ int OmegaSearcher::init(const ailego::Params &params) {
   // Get OMEGA-specific parameters
   omega_enabled_ = params.has("omega.enabled") ? params.get_as_bool("omega.enabled") : false;
   target_recall_ = params.has("omega.target_recall") ? params.get_as_float("omega.target_recall") : 0.95f;
-  min_vector_threshold_ = params.has("omega.min_vector_threshold") ? params.get_as_uint32("omega.min_vector_threshold") : 10000;
+  min_vector_threshold_ = params.has("omega.min_vector_threshold") ? params.get_as_uint32("omega.min_vector_threshold") : 100000;
   model_dir_ = params.has("omega.model_dir") ? params.get_as_string("omega.model_dir") : "";
 
   // Call parent class init

From 7e0042b1338dc538c9187a4b1e9e18627464c4ba Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 16 Mar 2026 15:06:35 +0800
Subject: [PATCH 012/126] perf(omega): optimize training memory by collecting
 data before flush

Major optimization:
- Move training data collection before Flush() to use in-memory graph
- Eliminate ~2 minute disk reload delay for 1M vectors
- Fix GT computation to use correct indexers (was using empty flushed ones)

Training improvements:
- Add ef_groundtruth parameter for faster GT computation using HNSW
- Support parallel training searches with per-query ground truth
- Add window_size parameter for early stopping control
- Expose all OMEGA params through Python API (OmegaIndexParam, OmegaQueryParam)

Code quality:
- Add TIMING logs for performance debugging
- Refactor TrainingDataCollector to use passed indexers instead of segment's
- Clean up training flow in merge_vector_indexer()
---
 .../python/model/param/python_param.cc        |  52 +-
 src/core/algorithm/omega/omega_searcher.cc    |  42 +-
 src/core/algorithm/omega/omega_searcher.h     |  15 +
 src/core/algorithm/omega/omega_streamer.cc    |  25 +-
 src/core/algorithm/omega/omega_streamer.h     |   7 +
 src/core/interface/indexes/omega_index.cc     |  11 +
 .../column/vector_column/engine_helper.hpp    |  12 +-
 .../vector_column/vector_column_indexer.cc    |  10 +
 .../vector_column/vector_column_indexer.h     |  23 +
 src/db/index/common/proto_converter.cc        |  10 +-
 src/db/index/segment/segment.cc               | 200 +++--
 src/db/proto/zvec.proto                       |   4 +
 src/db/training/omega_model_trainer.cc        |   3 +-
 src/db/training/training_data_collector.cc    | 717 ++++++++++--------
 src/db/training/training_data_collector.h     |  14 +-
 src/include/zvec/core/interface/index.h       |   2 +
 src/include/zvec/core/interface/training.h    |   7 +-
 .../zvec/core/interface/training_capable.h    |  12 +
 src/include/zvec/db/index_params.h            |  45 +-
 thirdparty/omega                              |   2 +-
 20 files changed, 787 insertions(+), 426 deletions(-)

diff --git a/src/binding/python/model/param/python_param.cc b/src/binding/python/model/param/python_param.cc
index 0fbe1b033..c150fd9d2 100644
--- a/src/binding/python/model/param/python_param.cc
+++ b/src/binding/python/model/param/python_param.cc
@@ -573,6 +573,16 @@ predict when to stop searching.
         Default is "./omega_models".
     num_training_queries (int): Number of training queries to generate for
         OMEGA model training. Default is 1000.
+    ef_training (int): Size of the candidate list (ef) used during training
+        searches. Larger values collect more training data but take longer.
+        Default is 1000.
+    window_size (int): Size of the sliding window for computing distance
+        statistics during search. Must be the same for training and inference.
+        Default is 100.
+    ef_groundtruth (int): Size of the candidate list (ef) used when computing
+        ground truth for training. If 0, brute force search is used (slower but exact).
+        If > 0, HNSW search with this ef is used (faster but approximate).
+        Default is 0 (brute force).
 
 Examples:
     >>> from zvec.typing import MetricType, QuantizeType
@@ -582,13 +592,16 @@ predict when to stop searching.
     ...     ef_construction=200,
     ...     min_vector_threshold=50000,
     ...     model_dir="./my_omega_models",
-    ...     num_training_queries=500
+    ...     num_training_queries=500,
+    ...     ef_training=800,
+    ...     window_size=100,
+    ...     ef_groundtruth=2000  # Use HNSW for faster ground truth computation
     ... )
-    >>> print(params.num_training_queries)
-    500
+    >>> print(params.ef_groundtruth)
+    2000
 )pbdoc");
   omega_params
-      .def(py::init<MetricType, int, int, QuantizeType, uint32_t, std::string, size_t>(),
+      .def(py::init<MetricType, int, int, QuantizeType, uint32_t, std::string, size_t, int, int, int>(),
            py::arg("metric_type") = MetricType::IP,
            py::arg("m") = core_interface::kDefaultHnswNeighborCnt,
            py::arg("ef_construction") =
@@ -596,7 +609,10 @@ predict when to stop searching.
            py::arg("quantize_type") = QuantizeType::UNDEFINED,
            py::arg("min_vector_threshold") = 100000,
            py::arg("model_dir") = "./omega_models",
-           py::arg("num_training_queries") = 1000)
+           py::arg("num_training_queries") = 1000,
+           py::arg("ef_training") = 1000,
+           py::arg("window_size") = 100,
+           py::arg("ef_groundtruth") = 0)
       .def_property_readonly(
           "m", &OmegaIndexParams::m,
           "int: Maximum number of neighbors per node in upper layers.")
@@ -612,6 +628,15 @@ predict when to stop searching.
       .def_property_readonly(
           "num_training_queries", &OmegaIndexParams::num_training_queries,
           "int: Number of training queries for OMEGA model training.")
+      .def_property_readonly(
+          "ef_training", &OmegaIndexParams::ef_training,
+          "int: Candidate list size (ef) used during training searches.")
+      .def_property_readonly(
+          "window_size", &OmegaIndexParams::window_size,
+          "int: Sliding window size for distance statistics.")
+      .def_property_readonly(
+          "ef_groundtruth", &OmegaIndexParams::ef_groundtruth,
+          "int: ef for ground truth computation (0=brute force, >0=HNSW).")
       .def(
           "to_dict",
           [](const OmegaIndexParams &self) -> py::dict {
@@ -623,6 +648,9 @@ predict when to stop searching.
             dict["min_vector_threshold"] = self.min_vector_threshold();
             dict["model_dir"] = self.model_dir();
             dict["num_training_queries"] = self.num_training_queries();
+            dict["ef_training"] = self.ef_training();
+            dict["window_size"] = self.window_size();
+            dict["ef_groundtruth"] = self.ef_groundtruth();
             dict["quantize_type"] =
                 quantize_type_to_string(self.quantize_type());
             return dict;
@@ -641,6 +669,12 @@ predict when to stop searching.
                     ", \"model_dir\":\"" + self.model_dir() + "\"" +
                     ", \"num_training_queries\":" +
                     std::to_string(self.num_training_queries()) +
+                    ", \"ef_training\":" +
+                    std::to_string(self.ef_training()) +
+                    ", \"window_size\":" +
+                    std::to_string(self.window_size()) +
+                    ", \"ef_groundtruth\":" +
+                    std::to_string(self.ef_groundtruth()) +
                     ", \"quantize_type\":" +
                     quantize_type_to_string(self.quantize_type()) + "}";
            })
@@ -649,15 +683,17 @@ predict when to stop searching.
             return py::make_tuple(self.metric_type(), self.m(),
                                   self.ef_construction(), self.quantize_type(),
                                   self.min_vector_threshold(), self.model_dir(),
-                                  self.num_training_queries());
+                                  self.num_training_queries(), self.ef_training(),
+                                  self.window_size(), self.ef_groundtruth());
           },
           [](py::tuple t) {
-            if (t.size() != 7)
+            if (t.size() != 10)
               throw std::runtime_error("Invalid state for OmegaIndexParams");
             return std::make_shared<OmegaIndexParams>(
                 t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
                 t[3].cast<QuantizeType>(), t[4].cast<uint32_t>(),
-                t[5].cast<std::string>(), t[6].cast<size_t>());
+                t[5].cast<std::string>(), t[6].cast<size_t>(), t[7].cast<int>(),
+                t[8].cast<int>(), t[9].cast<int>());
           }));
 }
 
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 7740681d7..650662a32 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -35,6 +35,7 @@ OmegaSearcher::OmegaSearcher(void)
       target_recall_(0.95f),
       min_vector_threshold_(100000),
       current_vector_count_(0),
+      window_size_(100),
       training_mode_enabled_(false),
       current_query_id_(0) {}
 
@@ -48,6 +49,7 @@ int OmegaSearcher::init(const ailego::Params &params) {
   target_recall_ = params.has("omega.target_recall") ? params.get_as_float("omega.target_recall") : 0.95f;
   min_vector_threshold_ = params.has("omega.min_vector_threshold") ? params.get_as_uint32("omega.min_vector_threshold") : 100000;
   model_dir_ = params.has("omega.model_dir") ? params.get_as_string("omega.model_dir") : "";
+  window_size_ = params.has("omega.window_size") ? params.get_as_int32("omega.window_size") : 100;
 
   // Call parent class init
   int ret = HnswSearcher::init(params);
@@ -57,8 +59,8 @@ int OmegaSearcher::init(const ailego::Params &params) {
   }
 
   LOG_INFO("OmegaSearcher initialized (omega_enabled=%d, target_recall=%.2f, "
-           "min_threshold=%u)",
-           omega_enabled_, target_recall_, min_vector_threshold_);
+           "min_threshold=%u, window_size=%d)",
+           omega_enabled_, target_recall_, min_vector_threshold_, window_size_);
   return 0;
 }
 
@@ -207,6 +209,14 @@ void OmegaSearcher::ClearTrainingRecords() {
   LOG_INFO("Cleared %zu training records", collected_records_.size());
 }
 
+void OmegaSearcher::SetTrainingGroundTruth(
+    const std::vector<std::vector<uint64_t>>& ground_truth, int k_train) {
+  training_ground_truth_ = ground_truth;
+  training_k_train_ = k_train;
+  LOG_INFO("Set training ground truth for %zu queries, k_train=%d",
+           ground_truth.size(), k_train);
+}
+
 int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmeta,
                                    uint32_t count,
                                    ContextPointer &context) const {
@@ -228,7 +238,7 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   }
 
   OmegaSearchHandle omega_search = omega_search_create_with_params(
-      model_to_use, target_recall, count, 100);  // window_size=100
+      model_to_use, target_recall, count, window_size_);
 
   if (omega_search == nullptr) {
     LOG_WARN("Failed to create OMEGA search context, falling back to HNSW");
@@ -237,8 +247,21 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
 
   // Enable training mode if active (CRITICAL: must be before search)
   if (training_mode_enabled_) {
-    omega_search_enable_training(omega_search, current_query_id_);
-    LOG_DEBUG("Training mode enabled for query_id=%d", current_query_id_);
+    // Get ground truth for this query if available
+    std::vector<int> gt_for_query;
+    if (current_query_id_ >= 0 &&
+        static_cast<size_t>(current_query_id_) < training_ground_truth_.size()) {
+      const auto& gt = training_ground_truth_[current_query_id_];
+      gt_for_query.reserve(gt.size());
+      for (uint64_t node_id : gt) {
+        gt_for_query.push_back(static_cast<int>(node_id));
+      }
+    }
+    omega_search_enable_training(omega_search, current_query_id_,
+                                  gt_for_query.data(), gt_for_query.size(),
+                                  training_k_train_);
+    LOG_DEBUG("Training mode enabled for query_id=%d with %zu GT nodes",
+              current_query_id_, gt_for_query.size());
   }
 
   // OmegaContext extends HnswContext, so we can use it directly
@@ -432,13 +455,8 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
                    omega_records[i].traversal_window_stats.size());
         }
 
-        // Copy collected node IDs (convert from int to uint64_t)
-        record.collected_node_ids.reserve(omega_records[i].collected_node_ids.size());
-        for (int node_id : omega_records[i].collected_node_ids) {
-          record.collected_node_ids.push_back(static_cast<uint64_t>(node_id));
-        }
-
-        record.label = omega_records[i].label;  // Default 0
+        // Label is already computed in real-time during search
+        record.label = omega_records[i].label;
 
         collected_records_.push_back(std::move(record));
       }
diff --git a/src/core/algorithm/omega/omega_searcher.h b/src/core/algorithm/omega/omega_searcher.h
index bbd2057b9..3ad2fb575 100644
--- a/src/core/algorithm/omega/omega_searcher.h
+++ b/src/core/algorithm/omega/omega_searcher.h
@@ -80,6 +80,18 @@ class OmegaSearcher : public HnswSearcher {
    */
   void ClearTrainingRecords();
 
+  /**
+   * @brief Set ground truth for training queries.
+   *
+   * Ground truth is used for real-time label computation during training.
+   * Labels are computed as: label=1 iff top k_train GT nodes are in current topk.
+   *
+   * @param ground_truth 2D vector: ground_truth[query_id][rank] = node_id
+   * @param k_train Number of GT nodes to check for label (typically 1)
+   */
+  void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
+                               int k_train = 1);
+
   /**
    * @brief Public search method for OmegaStreamer to call
    *
@@ -183,12 +195,15 @@ class OmegaSearcher : public HnswSearcher {
   uint32_t min_vector_threshold_;
   size_t current_vector_count_;
   std::string model_dir_;
+  int window_size_;
 
   // Training mode support
   bool training_mode_enabled_;
   int current_query_id_;
   mutable std::mutex training_mutex_;
   mutable std::vector<core_interface::TrainingRecord> collected_records_;
+  std::vector<std::vector<uint64_t>> training_ground_truth_;  // [query_id][rank] = node_id
+  int training_k_train_;  // Number of GT nodes to check for label
 };
 
 }  // namespace core
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 772a057fd..c7480721f 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -68,8 +68,21 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   }
 
   // Enable training mode (CRITICAL: must be before search)
-  omega_search_enable_training(omega_search, query_id);
-  LOG_DEBUG("Training mode enabled for query_id=%d", query_id);
+  // Get ground truth for this query if available
+  std::vector<int> gt_for_query;
+  if (query_id >= 0 &&
+      static_cast<size_t>(query_id) < training_ground_truth_.size()) {
+    const auto& gt = training_ground_truth_[query_id];
+    gt_for_query.reserve(gt.size());
+    for (uint64_t node_id : gt) {
+      gt_for_query.push_back(static_cast<int>(node_id));
+    }
+  }
+  omega_search_enable_training(omega_search, query_id,
+                                gt_for_query.data(), gt_for_query.size(),
+                                training_k_train_);
+  LOG_DEBUG("Training mode enabled for query_id=%d with %zu GT nodes",
+            query_id, gt_for_query.size());
 
   // Cast context to HnswContext to access HNSW-specific features
   auto *hnsw_ctx = dynamic_cast<HnswContext*>(context.get());
@@ -274,12 +287,8 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                  omega_record.traversal_window_stats.size());
       }
 
-      // Copy collected_node_ids (convert int to node_id_t)
-      record.collected_node_ids.assign(
-          omega_record.collected_node_ids.begin(),
-          omega_record.collected_node_ids.end());
-
-      record.label = omega_record.label;  // Default 0
+      // Label is already computed in real-time during search
+      record.label = omega_record.label;
 
       omega_ctx->add_training_record(std::move(record));
     }
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index 232ecde18..b19d63121 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -43,6 +43,11 @@ class OmegaStreamer : public HnswStreamer {
   // Training mode support
   void EnableTrainingMode(bool enable) { training_mode_enabled_ = enable; }
   void SetCurrentQueryId(int query_id) { current_query_id_ = query_id; }
+  void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
+                               int k_train = 1) {
+    training_ground_truth_ = ground_truth;
+    training_k_train_ = k_train;
+  }
 
  protected:
   /**
@@ -72,6 +77,8 @@ class OmegaStreamer : public HnswStreamer {
   // Training mode state (for future implementation)
   bool training_mode_enabled_{false};
   int current_query_id_{0};
+  std::vector<std::vector<uint64_t>> training_ground_truth_;  // [query_id][rank] = node_id
+  int training_k_train_{1};  // Number of GT nodes to check for label
   // Note: training records are now stored per-context in OmegaContext, not here
 };
 
diff --git a/src/core/interface/indexes/omega_index.cc b/src/core/interface/indexes/omega_index.cc
index 976b22214..4aee616e2 100644
--- a/src/core/interface/indexes/omega_index.cc
+++ b/src/core/interface/indexes/omega_index.cc
@@ -108,6 +108,17 @@ void OmegaIndex::ClearTrainingRecords() {
   // no shared state to clear here.
 }
 
+void OmegaIndex::SetTrainingGroundTruth(
+    const std::vector<std::vector<uint64_t>>& ground_truth, int k_train) {
+  // Delegate to OmegaStreamer if available
+  if (streamer_) {
+    auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
+    if (omega_streamer) {
+      omega_streamer->SetTrainingGroundTruth(ground_truth, k_train);
+    }
+  }
+}
+
 int OmegaIndex::_prepare_for_search(
     const VectorData &vector_data,
     const BaseIndexQueryParam::Pointer &search_param,
diff --git a/src/db/index/column/vector_column/engine_helper.hpp b/src/db/index/column/vector_column/engine_helper.hpp
index 40438b0bc..e80e0cc2d 100644
--- a/src/db/index/column/vector_column/engine_helper.hpp
+++ b/src/db/index/column/vector_column/engine_helper.hpp
@@ -353,8 +353,6 @@ class ProximaEngineHelper {
       }
 
       case IndexType::OMEGA: {
-        fprintf(stderr, "[DEBUG] convert_to_engine_index_param: OMEGA case entered!\n");
-        fflush(stderr);
         // OMEGA uses its own index type at core_interface level
         auto index_param_builder_result =
             _build_common_index_param<OmegaIndexParams,
@@ -375,15 +373,7 @@ class ProximaEngineHelper {
 
         // Override index_type to kOMEGA
         auto hnsw_param = index_param_builder->Build();
-        fprintf(stderr, "[DEBUG] convert_to_engine_index_param: Before override, index_type=%d\n",
-                static_cast<int>(hnsw_param->index_type));
-        fflush(stderr);
         hnsw_param->index_type = core_interface::IndexType::kOMEGA;
-        fprintf(stderr, "[DEBUG] convert_to_engine_index_param: After override, index_type=%d\n",
-                static_cast<int>(hnsw_param->index_type));
-        fprintf(stderr, "[DEBUG] convert_to_engine_index_param: kOMEGA enum value=%d\n",
-                static_cast<int>(core_interface::IndexType::kOMEGA));
-        fflush(stderr);
 
         // Store OMEGA-specific params in the params field
         // These will be used by OmegaSearcher::init()
@@ -392,6 +382,8 @@ class ProximaEngineHelper {
                                   db_index_params->min_vector_threshold());
         hnsw_param->params.insert("omega.model_dir",
                                   db_index_params->model_dir());
+        hnsw_param->params.insert("omega.window_size",
+                                  db_index_params->window_size());
 
         return hnsw_param;
       }
diff --git a/src/db/index/column/vector_column/vector_column_indexer.cc b/src/db/index/column/vector_column/vector_column_indexer.cc
index b9bc8f40e..3650bc044 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.cc
+++ b/src/db/index/column/vector_column/vector_column_indexer.cc
@@ -261,6 +261,16 @@ void VectorColumnIndexer::ClearTrainingRecords() {
   }
 }
 
+void VectorColumnIndexer::SetTrainingGroundTruth(
+    const std::vector<std::vector<uint64_t>>& ground_truth, int k_train) {
+  // Propagate to underlying index if it exists and supports training
+  if (index != nullptr) {
+    if (auto* training_capable = index->GetTrainingCapability()) {
+      training_capable->SetTrainingGroundTruth(ground_truth, k_train);
+    }
+  }
+}
+
 core_interface::ITrainingCapable* VectorColumnIndexer::GetTrainingCapability() const {
   if (index != nullptr) {
     return index->GetTrainingCapability();
diff --git a/src/db/index/column/vector_column/vector_column_indexer.h b/src/db/index/column/vector_column/vector_column_indexer.h
index 9f9f93214..d61fc2b2e 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.h
+++ b/src/db/index/column/vector_column/vector_column_indexer.h
@@ -137,6 +137,18 @@ class VectorColumnIndexer {
    */
   void ClearTrainingRecords();
 
+  /**
+   * @brief Set ground truth for training queries.
+   *
+   * Ground truth is used for real-time label computation during training.
+   * Labels are computed as: label=1 iff top k_train GT nodes are in current topk.
+   *
+   * @param ground_truth 2D vector: ground_truth[query_id][rank] = node_id
+   * @param k_train Number of GT nodes to check for label (typically 1)
+   */
+  void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
+                               int k_train = 1);
+
  public:
   std::string index_file_path() const {
     return index_file_path_;
@@ -149,6 +161,17 @@ class VectorColumnIndexer {
     return index->GetDocCount();
   }
 
+  MetricType metric_type() const {
+    auto index_params = field_schema_.index_params();
+    if (index_params) {
+      auto vector_params = std::dynamic_pointer_cast<VectorIndexParams>(index_params);
+      if (vector_params) {
+        return vector_params->metric_type();
+      }
+    }
+    return MetricType::IP;  // default
+  }
+
   // for ut
  protected:
   VectorColumnIndexer() = default;
diff --git a/src/db/index/common/proto_converter.cc b/src/db/index/common/proto_converter.cc
index 3bfac9cd3..2c1cb602f 100644
--- a/src/db/index/common/proto_converter.cc
+++ b/src/db/index/common/proto_converter.cc
@@ -83,7 +83,11 @@ OmegaIndexParams::OPtr ProtoConverter::FromPb(
       params_pb.ef_construction(),
       QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()),
       params_pb.min_vector_threshold(),
-      params_pb.model_dir());
+      params_pb.model_dir(),
+      params_pb.num_training_queries(),
+      params_pb.ef_training(),
+      params_pb.window_size(),
+      params_pb.ef_groundtruth());
 
   return params;
 }
@@ -98,6 +102,10 @@ proto::OmegaIndexParams ProtoConverter::ToPb(const OmegaIndexParams *params) {
   params_pb.set_m(params->m());
   params_pb.set_min_vector_threshold(params->min_vector_threshold());
   params_pb.set_model_dir(params->model_dir());
+  params_pb.set_num_training_queries(params->num_training_queries());
+  params_pb.set_ef_training(params->ef_training());
+  params_pb.set_window_size(params->window_size());
+  params_pb.set_ef_groundtruth(params->ef_groundtruth());
   return params_pb;
 }
 
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index c0c6bb84a..9a2c8781b 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -14,11 +14,13 @@
 
 #include "segment.h"
 #include <algorithm>
+#include <chrono>
 #include <cstddef>
 #include <cstdint>
 #include <filesystem>
 #include <memory>
 #include <mutex>
+#include <optional>
 #include <string>
 #include <unordered_map>
 #include <ailego/parallel/multi_thread_list.h>
@@ -1589,15 +1591,25 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
     const std::string &index_file_path, const std::string &column,
     const FieldSchema &field, int concurrency) {
 
+  LOG_INFO("[TIMING] merge_vector_indexer START for field '%s'", column.c_str());
+  auto timing_start = std::chrono::steady_clock::now();
+
   VectorColumnIndexer::Ptr vector_indexer =
       std::make_shared<VectorColumnIndexer>(index_file_path, field);
 
   vector_column_params::ReadOptions options{options_.enable_mmap_, true};
 
+  LOG_INFO("[TIMING] About to Open (create_new=true)");
+  auto open_start = std::chrono::steady_clock::now();
   auto s = vector_indexer->Open(options);
   CHECK_RETURN_STATUS_EXPECTED(s);
+  auto open_end = std::chrono::steady_clock::now();
+  LOG_INFO("[TIMING] Open completed in %ld ms",
+           std::chrono::duration_cast<std::chrono::milliseconds>(open_end - open_start).count());
+
   std::vector<VectorColumnIndexer::Ptr> to_merge_indexers =
       vector_indexers_[column];
+  LOG_INFO("[TIMING] to_merge_indexers count: %zu", to_merge_indexers.size());
 
 
   vector_column_params::MergeOptions merge_options;
@@ -1608,101 +1620,119 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
   } else {
     merge_options.write_concurrency = concurrency;
   }
+  LOG_INFO("[TIMING] About to Merge");
+  auto merge_start = std::chrono::steady_clock::now();
   s = vector_indexer->Merge(to_merge_indexers, filter_, merge_options);
   CHECK_RETURN_STATUS_EXPECTED(s);
+  auto merge_end = std::chrono::steady_clock::now();
+  LOG_INFO("[TIMING] Merge completed in %ld ms",
+           std::chrono::duration_cast<std::chrono::milliseconds>(merge_end - merge_start).count());
 
 
   // Check if this is a trainable index (OMEGA)
   auto* training_capable = vector_indexer->GetTrainingCapability();
-  bool needs_training = (training_capable != nullptr && vector_indexer->doc_count() >= 100);
+  bool needs_training = false;
   std::string model_output_dir;
 
-  if (needs_training) {
-
-    LOG_INFO("Trainable index detected after merge for field '%s' in segment %d (doc_count=%zu)",
-             column.c_str(), id(), vector_indexer->doc_count());
+  if (training_capable != nullptr) {
+    // Get min_vector_threshold from OMEGA index params
+    uint32_t min_vector_threshold = 100000;  // default
+    if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field.index_params())) {
+      min_vector_threshold = omega_params->min_vector_threshold();
+    }
 
-    // Compute model output directory
-    std::string segment_dir = index_file_path.substr(0, index_file_path.rfind('/'));
-    model_output_dir = segment_dir + "/omega_model";
-  } else {
+    size_t doc_count = vector_indexer->doc_count();
+    if (doc_count >= min_vector_threshold) {
+      needs_training = true;
+      LOG_INFO("Trainable index detected after merge for field '%s' in segment %d (doc_count=%zu >= min_vector_threshold=%u)",
+               column.c_str(), id(), doc_count, min_vector_threshold);
+    } else {
+      LOG_INFO("Skipping OMEGA training for field '%s': doc_count=%zu < min_vector_threshold=%u",
+               column.c_str(), doc_count, min_vector_threshold);
+    }
   }
 
-  // Flush to persist the data
-  s = vector_indexer->Flush();
-  CHECK_RETURN_STATUS_EXPECTED(s);
+  // OPTIMIZATION: Collect training data BEFORE Flush() while the in-memory graph still exists.
+  // This avoids the expensive disk reload (~2 minutes for 1M vectors) that was previously needed.
+  // The model training itself doesn't need the graph, only the collected records.
+  std::optional<TrainingDataCollectorResult> training_result_opt;
 
-  // After Flush, the indexer is persisted but in-memory graph is cleared.
-  // For training, we need to reopen the indexer to load the graph from disk.
   if (needs_training) {
-    LOG_INFO("Starting OMEGA auto-training for field '%s' (reopening indexer after flush)", column.c_str());
+    // Compute model output directory
+    std::string segment_dir = index_file_path.substr(0, index_file_path.rfind('/'));
+    model_output_dir = segment_dir + "/omega_model";
 
-    // Reopen the indexer to load the persisted graph (create_new=false to load existing)
-    VectorColumnIndexer::Ptr training_indexer =
-        std::make_shared<VectorColumnIndexer>(index_file_path, field);
-    vector_column_params::ReadOptions read_options{options_.enable_mmap_, false};
+    LOG_INFO("Starting OMEGA training data collection for field '%s' (using in-memory graph before flush)", column.c_str());
 
-    auto reopen_status = training_indexer->Open(read_options);
-    if (!reopen_status.ok()) {
-      LOG_WARN("Failed to reopen indexer for training: %s", reopen_status.message().c_str());
+    // Get training params from index params
+    size_t num_training_queries = 1000;  // default
+    int ef_training = 1000;  // default
+    int ef_groundtruth = 0;  // default: brute force
+    if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field.index_params())) {
+      num_training_queries = omega_params->num_training_queries();
+      ef_training = omega_params->ef_training();
+      ef_groundtruth = omega_params->ef_groundtruth();
+      LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d",
+               num_training_queries, ef_training, ef_groundtruth);
+    }
+
+    // Collect training data using the current indexer (in-memory graph still exists)
+    TrainingDataCollectorOptions collector_opts;
+    size_t doc_count = vector_indexer->doc_count();
+    collector_opts.num_training_queries = std::min(doc_count, num_training_queries);
+    collector_opts.ef_training = ef_training;
+    collector_opts.ef_groundtruth = ef_groundtruth;
+    collector_opts.topk = 100;
+    collector_opts.k_train = 1;  // Label=1 when top-1 GT found
+
+    // Use the current vector_indexer which still has the in-memory graph
+    std::vector<VectorColumnIndexer::Ptr> training_indexers = {vector_indexer};
+
+    auto training_result = TrainingDataCollector::CollectTrainingDataWithGtCmps(
+        shared_from_this(), column, collector_opts, training_indexers);
+
+    if (training_result.has_value()) {
+      training_result_opt = std::move(training_result.value());
+      LOG_INFO("Collected %zu training records (before flush)", training_result_opt->records.size());
     } else {
-      // Get training params from index params
-      size_t num_training_queries = 1000;  // default
-      int ef_training = 1000;  // default
-      if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field.index_params())) {
-        num_training_queries = omega_params->num_training_queries();
-        ef_training = omega_params->ef_construction();  // Use ef_construction for training
-        LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d",
-                 num_training_queries, ef_training);
-      }
-
-      // Collect training data
-      TrainingDataCollectorOptions collector_opts;
-      size_t doc_count = training_indexer->doc_count();
-      collector_opts.num_training_queries = std::min(doc_count, num_training_queries);
-      collector_opts.ef_training = ef_training;
-      collector_opts.topk = 100;
-      collector_opts.k_train = 1;  // Label=1 when top-1 GT found
-
-      std::vector<VectorColumnIndexer::Ptr> training_indexers = {training_indexer};
+      LOG_WARN("Failed to collect training data: %s", training_result.error().message().c_str());
+    }
+  }
 
-      auto training_result = TrainingDataCollector::CollectTrainingDataWithGtCmps(
-          shared_from_this(), column, collector_opts, training_indexers);
+  // Now flush to persist the data (this clears the in-memory graph)
+  s = vector_indexer->Flush();
+  CHECK_RETURN_STATUS_EXPECTED(s);
 
-      if (training_result.has_value()) {
-        auto& result = training_result.value();
-        LOG_INFO("Collected %zu training records", result.records.size());
+  // Train the model using the previously collected data (doesn't need the graph)
+  if (needs_training && training_result_opt.has_value()) {
+    auto& result = training_result_opt.value();
 
-        if (result.records.size() >= 100) {
+    if (result.records.size() >= 100) {
 #ifdef ZVEC_ENABLE_OMEGA
-          // Train the model
-          OmegaModelTrainerOptions trainer_opts;
-          trainer_opts.output_dir = model_output_dir;
-          trainer_opts.verbose = true;
-
-          // Create output directory if it doesn't exist
-          if (!FileHelper::DirectoryExists(model_output_dir)) {
-            if (!FileHelper::CreateDirectory(model_output_dir)) {
-              LOG_WARN("Failed to create model output directory: %s", model_output_dir.c_str());
-            }
-          }
-
-          auto train_status = OmegaModelTrainer::TrainModelWithGtCmps(
-              result.records, result.gt_cmps_data, trainer_opts);
-          if (train_status.ok()) {
-            LOG_INFO("OMEGA model training completed successfully: %s", trainer_opts.output_dir.c_str());
-          } else {
-            LOG_WARN("OMEGA model training failed: %s", train_status.message().c_str());
-          }
-#else
-          LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
-#endif
-        } else {
-          LOG_INFO("Skipping model training: only %zu records collected (need >= 100)", result.records.size());
+      // Train the model
+      OmegaModelTrainerOptions trainer_opts;
+      trainer_opts.output_dir = model_output_dir;
+      trainer_opts.verbose = true;
+
+      // Create output directory if it doesn't exist
+      if (!FileHelper::DirectoryExists(model_output_dir)) {
+        if (!FileHelper::CreateDirectory(model_output_dir)) {
+          LOG_WARN("Failed to create model output directory: %s", model_output_dir.c_str());
         }
+      }
+
+      auto train_status = OmegaModelTrainer::TrainModelWithGtCmps(
+          result.records, result.gt_cmps_data, trainer_opts);
+      if (train_status.ok()) {
+        LOG_INFO("OMEGA model training completed successfully: %s", trainer_opts.output_dir.c_str());
       } else {
-        LOG_WARN("Failed to collect training data: %s", training_result.error().message().c_str());
+        LOG_WARN("OMEGA model training failed: %s", train_status.message().c_str());
       }
+#else
+      LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
+#endif
+    } else {
+      LOG_INFO("Skipping model training: only %zu records collected (need >= 100)", result.records.size());
     }
   }
 
@@ -2320,20 +2350,40 @@ Status SegmentImpl::auto_train_omega_index_internal(
   // Get training params from index params
   size_t num_training_queries = 1000;  // default
   int ef_training = 1000;  // default
+  int ef_groundtruth = 0;  // default: brute force
+  uint32_t min_vector_threshold = 100000;  // default
   auto field = collection_schema_->get_field(field_name);
   if (field && field->index_params()) {
     if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field->index_params())) {
       num_training_queries = omega_params->num_training_queries();
-      ef_training = omega_params->ef_construction();  // Use ef_construction for training
-      LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d",
-               num_training_queries, ef_training);
+      ef_training = omega_params->ef_training();
+      ef_groundtruth = omega_params->ef_groundtruth();
+      min_vector_threshold = omega_params->min_vector_threshold();
+      LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d, min_vector_threshold=%u",
+               num_training_queries, ef_training, ef_groundtruth, min_vector_threshold);
     }
   }
 
+  // Check if we have enough vectors to justify training
+  size_t total_doc_count = 0;
+  for (const auto& indexer : indexers) {
+    total_doc_count += indexer->doc_count();
+  }
+
+  if (total_doc_count < min_vector_threshold) {
+    LOG_INFO("Skipping OMEGA training for field '%s': doc_count=%zu < min_vector_threshold=%u",
+             field_name.c_str(), total_doc_count, min_vector_threshold);
+    return Status::OK();
+  }
+
+  LOG_INFO("Proceeding with OMEGA training: doc_count=%zu >= min_vector_threshold=%u",
+           total_doc_count, min_vector_threshold);
+
   // Step 1: Collect training data using the provided indexers
   TrainingDataCollectorOptions collector_options;
   collector_options.num_training_queries = num_training_queries;
   collector_options.ef_training = ef_training;
+  collector_options.ef_groundtruth = ef_groundtruth;
   collector_options.topk = 100;
   collector_options.noise_scale = 0.01f;
 
diff --git a/src/db/proto/zvec.proto b/src/db/proto/zvec.proto
index 865099b3c..a1de4a404 100644
--- a/src/db/proto/zvec.proto
+++ b/src/db/proto/zvec.proto
@@ -106,6 +106,10 @@ message OmegaIndexParams {
   int32 ef_construction = 3;
   uint32 min_vector_threshold = 4;
   string model_dir = 5;
+  uint32 num_training_queries = 6;
+  int32 ef_training = 7;
+  int32 window_size = 8;
+  int32 ef_groundtruth = 9;  // 0 = brute force, >0 = HNSW with this ef
 }
 
 message IndexParams {
diff --git a/src/db/training/omega_model_trainer.cc b/src/db/training/omega_model_trainer.cc
index 7b1b4f135..59a061145 100644
--- a/src/db/training/omega_model_trainer.cc
+++ b/src/db/training/omega_model_trainer.cc
@@ -34,8 +34,7 @@ omega::TrainingRecord ConvertRecord(const core_interface::TrainingRecord& src) {
   // Convert std::array<float, 7> to std::vector<float>
   dst.traversal_window_stats.assign(src.traversal_window_stats.begin(),
                                      src.traversal_window_stats.end());
-  dst.label = src.label;
-  dst.collected_node_ids = src.collected_node_ids;
+  dst.label = src.label;  // Already computed in real-time during search
   return dst;
 }
 
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 719da0a4a..98c1c866e 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -26,6 +26,7 @@
 #include <zvec/db/query_params.h>
 #include "db/index/column/vector_column/vector_column_params.h"
 #include "query_generator.h"
+#include <omega/ground_truth.h>
 
 namespace zvec {
 
@@ -73,7 +74,23 @@ TrainingDataCollector::CollectTrainingData(
     const std::string& field_name,
     const TrainingDataCollectorOptions& options,
     const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
-  // Step 1: Generate training queries using held-out approach
+  // Step 1: Get indexers first (needed for metric type)
+  std::vector<VectorColumnIndexer::Ptr> indexers;
+  if (!provided_indexers.empty()) {
+    indexers = provided_indexers;
+  } else {
+    indexers = segment->get_vector_indexer(field_name);
+  }
+
+  if (indexers.empty()) {
+    return tl::make_unexpected(
+        Status::InternalError("No vector indexers found for field: " + field_name));
+  }
+
+  // Get metric type from first indexer
+  MetricType metric_type = indexers[0]->metric_type();
+
+  // Step 2: Generate training queries using held-out approach
   LOG_INFO("Generating %zu held-out training queries for field '%s'",
            options.num_training_queries, field_name.c_str());
 
@@ -87,41 +104,29 @@ TrainingDataCollector::CollectTrainingData(
         Status::InternalError("Failed to generate training queries"));
   }
 
-  // Step 2: Compute ground truth (brute force search, excluding self-matches)
-  LOG_INFO("Computing ground truth with brute force search (topk=%zu, excluding self)",
-           options.topk);
+  // Step 3: Compute ground truth (brute force or HNSW search, excluding self-matches)
+  LOG_INFO("Computing ground truth (topk=%zu, ef_groundtruth=%d, excluding self)",
+           options.topk, options.ef_groundtruth);
 
   auto ground_truth = ComputeGroundTruth(
       segment, field_name, training_queries, options.topk, options.num_threads,
-      query_doc_ids);
+      query_doc_ids, options.ef_groundtruth, metric_type, indexers);
 
   if (ground_truth.empty()) {
     return tl::make_unexpected(
         Status::InternalError("Failed to compute ground truth"));
   }
 
-  // Step 3: Choose indexers for training
-  // CRITICAL: If provided_indexers is given, use those (just-merged indexers)
-  // Otherwise, get indexers from segment (persisted indexers)
-  std::vector<VectorColumnIndexer::Ptr> indexers;
-
-  if (!provided_indexers.empty()) {
-    indexers = provided_indexers;
-  } else {
-    indexers = segment->get_vector_indexer(field_name);
-  }
-
-  if (indexers.empty()) {
-    return tl::make_unexpected(
-        Status::InternalError("No vector indexers found for field: " + field_name));
-  }
-
   LOG_INFO("Found %zu indexers for field '%s' (will enable training on all, but only training-capable ones will collect)",
            indexers.size(), field_name.c_str());
 
-  // Step 4: Enable training mode on all indexers
-  LOG_INFO("Enabling training mode on %zu indexers", indexers.size());
+  // Step 4: Set ground truth and enable training mode on all indexers
+  LOG_INFO("Setting ground truth (%zu queries) and enabling training mode on %zu indexers",
+           ground_truth.size(), indexers.size());
   for (auto& indexer : indexers) {
+    // Set ground truth for real-time label computation
+    indexer->SetTrainingGroundTruth(ground_truth, options.k_train);
+
     auto status = indexer->EnableTrainingMode(true);
     if (!status.ok()) {
       LOG_WARN("Failed to enable training mode on indexer: %s",
@@ -235,11 +240,17 @@ TrainingDataCollector::CollectTrainingData(
     LOG_WARN("No training records collected from any indexer");
   }
 
-  // Step 7: Fill labels based on ground truth
-  LOG_INFO("Filling labels for %zu records (k_train=%zu)", all_records.size(), options.k_train);
-  FillLabels(&all_records, ground_truth, search_results, options.k_train);
+  // Labels are now computed in real-time during search (no FillLabels needed)
+  // Count positive/negative labels for verification
+  size_t positive_count = 0, negative_count = 0;
+  for (const auto& record : all_records) {
+    if (record.label > 0) positive_count++;
+    else negative_count++;
+  }
+  LOG_INFO("Collected %zu records: %zu positive, %zu negative",
+           all_records.size(), positive_count, negative_count);
 
-  // Step 8: Disable training mode and clear records
+  // Step 7: Disable training mode and clear records
   for (auto& indexer : indexers) {
     indexer->EnableTrainingMode(false);
     indexer->ClearTrainingRecords();
@@ -257,124 +268,338 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
     const std::vector<std::vector<float>>& queries,
     size_t topk,
     size_t num_threads,
-    const std::vector<uint64_t>& query_doc_ids) {
+    const std::vector<uint64_t>& query_doc_ids,
+    int ef_groundtruth,
+    MetricType metric_type,
+    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
   std::vector<std::vector<uint64_t>> ground_truth(queries.size());
 
+  if (queries.empty()) {
+    return ground_truth;
+  }
+
   // Check if we have query doc_ids for self-exclusion (held-out mode)
   bool held_out_mode = !query_doc_ids.empty() && query_doc_ids.size() == queries.size();
   if (held_out_mode) {
     LOG_INFO("Computing ground truth in held-out mode (excluding self-matches)");
   }
 
-  // Get vector indexer (use brute force with is_linear=true)
-  auto combined_indexer = segment->get_combined_vector_indexer(field_name);
-  if (!combined_indexer) {
-    LOG_ERROR("Failed to get vector indexer for field: %s", field_name.c_str());
-    return ground_truth;
-  }
+  // Get total document count
+  uint64_t doc_count = segment->doc_count();
+  size_t dim = queries[0].size();
 
-  // Determine thread count
-  size_t actual_threads = num_threads;
-  if (actual_threads == 0) {
-    actual_threads = std::thread::hardware_concurrency();
-  }
-  actual_threads = std::min(actual_threads, queries.size());
+  LOG_INFO("Computing ground truth: %zu queries, %zu base vectors, dim=%zu, topk=%zu, metric=%s, ef_groundtruth=%d",
+           queries.size(), static_cast<size_t>(doc_count), dim, topk,
+           MetricTypeCodeBook::AsString(metric_type).c_str(), ef_groundtruth);
 
-  DebugLog("[ComputeGroundTruth] Starting PARALLEL brute force search for " +
-           std::to_string(queries.size()) + " queries, topk=" + std::to_string(topk) +
-           ", threads=" + std::to_string(actual_threads));
+  auto start_time = std::chrono::high_resolution_clock::now();
 
-  auto loop_start = std::chrono::high_resolution_clock::now();
-  std::atomic<size_t> completed_queries{0};
-  std::mutex log_mutex;
+  // ============================================================
+  // Branch 1: HNSW search (ef_groundtruth > 0)
+  // Faster for large datasets, approximate results
+  // ============================================================
+  if (ef_groundtruth > 0) {
+    DebugLog("[ComputeGroundTruth] Using HNSW search with ef=" + std::to_string(ef_groundtruth));
 
-  // Worker function for a range of queries
-  auto worker = [&](size_t start_idx, size_t end_idx) {
-    for (size_t query_idx = start_idx; query_idx < end_idx; ++query_idx) {
-      const auto& query_vector = queries[query_idx];
+    // Use provided indexers if available, otherwise get from segment
+    // IMPORTANT: We must use provided_indexers when available because after Flush,
+    // segment->get_vector_indexer() returns stale indexers with cleared in-memory data
+    std::vector<VectorColumnIndexer::Ptr> indexers;
+    if (!provided_indexers.empty()) {
+      indexers = provided_indexers;
+      DebugLog("[ComputeGroundTruth] Using provided indexers (count=" + std::to_string(indexers.size()) + ")");
+    } else {
+      indexers = segment->get_vector_indexer(field_name);
+      DebugLog("[ComputeGroundTruth] Using indexers from segment (count=" + std::to_string(indexers.size()) + ")");
+    }
 
-      // In held-out mode, request topk+1 results since we'll exclude self
-      size_t search_topk = held_out_mode ? topk + 1 : topk;
+    if (indexers.empty()) {
+      LOG_ERROR("No vector indexers found for field '%s', falling back to brute force", field_name.c_str());
+      ef_groundtruth = 0;  // Fall back to brute force
+    } else {
+      // For held-out mode, we need topk+1 to exclude self
+      size_t actual_topk = held_out_mode ? topk + 1 : topk;
+
+      // ========================================================
+      // WARM-UP STEP: Pre-load index data to avoid cold-start penalty
+      // Without this, the first searches are very slow due to lazy loading.
+      // We use parallel warmup with all threads to load data faster.
+      // ========================================================
+      {
+        DebugLog("[ComputeGroundTruth] Warming up HNSW index...");
+        auto warmup_start = std::chrono::high_resolution_clock::now();
+
+        // Warmup count: use a fraction of queries spread across threads
+        // This helps load different parts of the index in parallel
+        size_t actual_threads = num_threads > 0 ? num_threads : std::thread::hardware_concurrency();
+        size_t warmup_per_thread = 5;  // Each thread does 5 warmup queries
+        size_t warmup_total = std::min(actual_threads * warmup_per_thread, queries.size());
+
+        // Parallel warmup using std::thread
+        std::vector<std::thread> warmup_threads;
+        std::atomic<size_t> warmup_completed{0};
+
+        auto warmup_worker = [&](size_t start_idx, size_t count) {
+          for (size_t i = 0; i < count && (start_idx + i) < queries.size(); ++i) {
+            size_t q_idx = start_idx + i;
+            vector_column_params::VectorData vector_data;
+            vector_data.vector = vector_column_params::DenseVector{
+                .data = const_cast<void*>(static_cast<const void*>(queries[q_idx].data()))
+            };
+
+            vector_column_params::QueryParams query_params;
+            query_params.topk = actual_topk;
+            query_params.fetch_vector = false;
+            query_params.filter = segment->get_filter().get();
+
+            auto omega_params = std::make_shared<OmegaQueryParams>();
+            omega_params->set_ef(ef_groundtruth);
+            omega_params->set_training_query_id(-1);  // Warmup, don't collect training data
+            query_params.query_params = omega_params;
+
+            indexers[0]->Search(vector_data, query_params);
+            ++warmup_completed;
+          }
+        };
 
-      // Prepare query parameters for brute force search
-      vector_column_params::VectorData vector_data;
-      vector_data.vector = vector_column_params::DenseVector{
-          .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))
-      };
+        // Launch warmup threads
+        size_t queries_per_warmup_thread = warmup_total / actual_threads;
+        for (size_t t = 0; t < actual_threads && t * queries_per_warmup_thread < warmup_total; ++t) {
+          size_t start = t * queries_per_warmup_thread;
+          size_t count = std::min(queries_per_warmup_thread, warmup_total - start);
+          if (count > 0) {
+            warmup_threads.emplace_back(warmup_worker, start, count);
+          }
+        }
 
-      vector_column_params::QueryParams query_params;
-      query_params.topk = search_topk;
-      query_params.fetch_vector = false;
-      query_params.filter = segment->get_filter().get();
+        for (auto& t : warmup_threads) {
+          t.join();
+        }
 
-      // Use linear search (brute force) for ground truth
-      auto base_params = std::make_shared<HnswQueryParams>(search_topk);
-      base_params->set_is_linear(true);
-      query_params.query_params = base_params;
+        auto warmup_end = std::chrono::high_resolution_clock::now();
+        auto warmup_ms = std::chrono::duration_cast<std::chrono::milliseconds>(warmup_end - warmup_start).count();
+        DebugLog("[ComputeGroundTruth] Warmup completed in " + std::to_string(warmup_ms) +
+                 " ms (" + std::to_string(warmup_completed.load()) + " queries, " +
+                 std::to_string(warmup_threads.size()) + " threads)");
 
-      // Perform search
-      auto search_result = combined_indexer->Search(vector_data, query_params);
-      if (!search_result.has_value()) {
-        LOG_WARN("Ground truth search failed for query %zu: %s",
-                 query_idx, search_result.error().message().c_str());
-        continue;
+        // Note: If warmup takes very long (>60s), recommend using ef_groundtruth=0 (Eigen brute force)
+        if (warmup_ms > 60000) {
+          LOG_WARN("HNSW warmup took %zu ms. For cold indexes, consider using ef_groundtruth=0 (Eigen brute force)",
+                   warmup_ms);
+        }
       }
 
-      // Extract result doc IDs, excluding self in held-out mode
-      auto& results = search_result.value();
-      std::vector<uint64_t> gt_ids;
-      gt_ids.reserve(topk);
+      // Pre-allocate ground_truth for thread-safe access
+      ground_truth.resize(queries.size());
+
+      // Use std::thread instead of OpenMP (same as training searches)
+      size_t actual_threads = num_threads > 0 ? num_threads : std::thread::hardware_concurrency();
+      actual_threads = std::min(actual_threads, queries.size());
+
+      std::atomic<size_t> completed{0};
+      auto search_start = std::chrono::high_resolution_clock::now();
+
+      auto worker = [&](size_t start_idx, size_t end_idx) {
+        for (size_t q = start_idx; q < end_idx; ++q) {
+          // Prepare query parameters (exactly same as training searches)
+          vector_column_params::VectorData vector_data;
+          vector_data.vector = vector_column_params::DenseVector{
+              .data = const_cast<void*>(static_cast<const void*>(queries[q].data()))
+          };
+
+          vector_column_params::QueryParams query_params;
+          query_params.topk = actual_topk;
+          query_params.fetch_vector = false;
+          query_params.filter = segment->get_filter().get();
+
+          // Create OmegaQueryParams (same as training searches)
+          auto omega_params = std::make_shared<OmegaQueryParams>();
+          omega_params->set_ef(ef_groundtruth);
+          omega_params->set_training_query_id(static_cast<int>(q));
+          query_params.query_params = omega_params;
+
+          // Search on first indexer (same as training searches)
+          auto search_result = indexers[0]->Search(vector_data, query_params);
+          if (search_result.has_value()) {
+            auto& results = search_result.value();
+            std::vector<uint64_t> result_ids;
+            result_ids.reserve(results->count());
+            auto iter = results->create_iterator();
+            while (iter->valid()) {
+              uint64_t doc_id = iter->doc_id();
+              // Skip self in held-out mode
+              if (held_out_mode && doc_id == query_doc_ids[q]) {
+                iter->next();
+                continue;
+              }
+              result_ids.push_back(doc_id);
+              if (result_ids.size() >= topk) break;
+              iter->next();
+            }
+            ground_truth[q] = std::move(result_ids);
+          }
 
-      uint64_t self_doc_id = held_out_mode ? query_doc_ids[query_idx] : UINT64_MAX;
+          // Progress logging
+          size_t done = ++completed;
+          if (done % 500 == 0 || done == queries.size()) {
+            auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
+                std::chrono::high_resolution_clock::now() - search_start).count();
+            DebugLog("[ComputeGroundTruth] HNSW progress: " + std::to_string(done) + "/" +
+                     std::to_string(queries.size()) + ", elapsed: " + std::to_string(elapsed) + " ms");
+          }
+        }
+      };
 
-      auto iter = results->create_iterator();
-      while (iter->valid() && gt_ids.size() < topk) {
-        uint64_t doc_id = iter->doc_id();
-        // Skip self in held-out mode
-        if (doc_id != self_doc_id) {
-          gt_ids.push_back(doc_id);
+      // Launch threads
+      std::vector<std::thread> threads;
+      size_t queries_per_thread = (queries.size() + actual_threads - 1) / actual_threads;
+      for (size_t t = 0; t < actual_threads; ++t) {
+        size_t start_idx = t * queries_per_thread;
+        size_t end_idx = std::min(start_idx + queries_per_thread, queries.size());
+        if (start_idx < end_idx) {
+          threads.emplace_back(worker, start_idx, end_idx);
         }
-        iter->next();
       }
-      ground_truth[query_idx] = std::move(gt_ids);
-
-      // Update progress
-      size_t completed = ++completed_queries;
-      if (completed % 100 == 0 || completed == queries.size()) {
-        std::lock_guard<std::mutex> lock(log_mutex);
-        auto now = std::chrono::high_resolution_clock::now();
-        auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now - loop_start).count();
-        DebugLog("[ComputeGroundTruth] Progress: " + std::to_string(completed) + "/" +
-                 std::to_string(queries.size()) + ", elapsed: " + std::to_string(elapsed_ms) + " ms");
+
+      // Wait for all threads
+      for (auto& thread : threads) {
+        thread.join();
+      }
+
+      auto end_time = std::chrono::high_resolution_clock::now();
+      auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
+      DebugLog("[ComputeGroundTruth] HNSW search completed in " + std::to_string(total_ms) + " ms");
+      LOG_INFO("Computed ground truth (HNSW ef=%d) for %zu queries in %zu ms",
+               ef_groundtruth, queries.size(), total_ms);
+      return ground_truth;
+    }
+  }
+
+  // ============================================================
+  // Branch 2: Eigen brute force (ef_groundtruth == 0)
+  // Exact results, uses batch matrix multiplication
+  // ============================================================
+  DebugLog("[ComputeGroundTruth] Using Eigen brute force search");
+
+  // Convert zvec MetricType to omega MetricType
+  omega::MetricType omega_metric;
+  switch (metric_type) {
+    case MetricType::L2:
+      omega_metric = omega::MetricType::L2;
+      break;
+    case MetricType::COSINE:
+      omega_metric = omega::MetricType::COSINE;
+      break;
+    case MetricType::IP:
+    default:
+      omega_metric = omega::MetricType::IP;
+      break;
+  }
+
+  // Step 1: Load all base vectors into memory
+  DebugLog("[ComputeGroundTruth] Loading " + std::to_string(doc_count) + " base vectors...");
+  auto load_start = std::chrono::high_resolution_clock::now();
+
+  std::vector<float> base_vectors(doc_count * dim);
+  std::atomic<size_t> loaded_count{0};
+  std::atomic<bool> load_error{false};
+
+  // Load vectors in parallel
+  size_t actual_threads = num_threads > 0 ? num_threads : std::thread::hardware_concurrency();
+  actual_threads = std::min(actual_threads, static_cast<size_t>(doc_count));
+
+  auto load_worker = [&](size_t start_idx, size_t end_idx) {
+    for (size_t doc_idx = start_idx; doc_idx < end_idx && !load_error; ++doc_idx) {
+      auto doc = segment->Fetch(doc_idx);
+      if (!doc) {
+        LOG_WARN("Failed to fetch document at index %zu", doc_idx);
+        load_error = true;
+        continue;
+      }
+
+      auto vector_opt = doc->get<std::vector<float>>(field_name);
+      if (!vector_opt.has_value()) {
+        LOG_WARN("Document at index %zu does not have field '%s'", doc_idx, field_name.c_str());
+        load_error = true;
+        continue;
+      }
+
+      const auto& vec = vector_opt.value();
+      if (vec.size() != dim) {
+        LOG_WARN("Vector at index %zu has wrong dimension: %zu vs %zu", doc_idx, vec.size(), dim);
+        load_error = true;
+        continue;
+      }
+
+      std::memcpy(base_vectors.data() + doc_idx * dim, vec.data(), dim * sizeof(float));
+      ++loaded_count;
+
+      // Progress logging
+      size_t count = loaded_count.load();
+      if (count % 100000 == 0) {
+        DebugLog("[ComputeGroundTruth] Loaded " + std::to_string(count) + "/" +
+                 std::to_string(doc_count) + " vectors");
       }
     }
   };
 
-  // Launch threads
-  std::vector<std::thread> threads;
-  size_t queries_per_thread = (queries.size() + actual_threads - 1) / actual_threads;
+  std::vector<std::thread> load_threads;
+  size_t docs_per_thread = (doc_count + actual_threads - 1) / actual_threads;
 
   for (size_t t = 0; t < actual_threads; ++t) {
-    size_t start_idx = t * queries_per_thread;
-    size_t end_idx = std::min(start_idx + queries_per_thread, queries.size());
+    size_t start_idx = t * docs_per_thread;
+    size_t end_idx = std::min(start_idx + docs_per_thread, static_cast<size_t>(doc_count));
     if (start_idx < end_idx) {
-      threads.emplace_back(worker, start_idx, end_idx);
+      load_threads.emplace_back(load_worker, start_idx, end_idx);
     }
   }
 
-  // Wait for all threads
-  for (auto& thread : threads) {
+  for (auto& thread : load_threads) {
     thread.join();
   }
 
-  auto loop_end = std::chrono::high_resolution_clock::now();
-  auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(loop_end - loop_start).count();
-  DebugLog("[ComputeGroundTruth] Completed " + std::to_string(queries.size()) +
-           " queries in " + std::to_string(total_ms) + " ms (" +
-           std::to_string(actual_threads) + " threads)");
+  auto load_end = std::chrono::high_resolution_clock::now();
+  auto load_ms = std::chrono::duration_cast<std::chrono::milliseconds>(load_end - load_start).count();
+  DebugLog("[ComputeGroundTruth] Loaded " + std::to_string(loaded_count) +
+           " vectors in " + std::to_string(load_ms) + " ms");
+
+  if (load_error) {
+    LOG_ERROR("Failed to load all base vectors, cannot compute ground truth");
+    return ground_truth;
+  }
+
+  // Step 2: Flatten query vectors
+  std::vector<float> query_flat(queries.size() * dim);
+  for (size_t i = 0; i < queries.size(); ++i) {
+    std::memcpy(query_flat.data() + i * dim, queries[i].data(), dim * sizeof(float));
+  }
+
+  // Step 3: Call OmegaLib's fast ground truth computation (Eigen)
+  DebugLog("[ComputeGroundTruth] Computing ground truth with Eigen...");
+  auto compute_start = std::chrono::high_resolution_clock::now();
+
+  ground_truth = omega::ComputeGroundTruth(
+      base_vectors.data(),
+      query_flat.data(),
+      doc_count,
+      queries.size(),
+      dim,
+      topk,
+      omega_metric,
+      held_out_mode);
+
+  auto compute_end = std::chrono::high_resolution_clock::now();
+  auto compute_ms = std::chrono::duration_cast<std::chrono::milliseconds>(compute_end - compute_start).count();
+  DebugLog("[ComputeGroundTruth] Computed ground truth in " + std::to_string(compute_ms) + " ms");
+
+  auto total_end = std::chrono::high_resolution_clock::now();
+  auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(total_end - start_time).count();
+  DebugLog("[ComputeGroundTruth] Total time: " + std::to_string(total_ms) +
+           " ms (load: " + std::to_string(load_ms) + " ms, compute: " + std::to_string(compute_ms) + " ms)");
+
+  LOG_INFO("Computed ground truth (Eigen brute force) for %zu queries in %zu ms (load: %zu ms, compute: %zu ms)",
+           queries.size(), total_ms, load_ms, compute_ms);
 
-  LOG_INFO("Computed ground truth for %zu queries in %zu ms (%zu threads)",
-           queries.size(), total_ms, actual_threads);
   return ground_truth;
 }
 
@@ -383,116 +608,39 @@ void TrainingDataCollector::FillLabels(
     const std::vector<std::vector<uint64_t>>& ground_truth,
     const std::vector<std::vector<uint64_t>>& search_results,
     size_t k_train) {
+  // NOTE: Labels are now computed in real-time during search.
+  // This function is kept for backward compatibility but only counts existing labels.
+  (void)ground_truth;
+  (void)search_results;
+  (void)k_train;
+
   if (!records || records->empty()) {
     LOG_WARN("No records to fill labels");
     return;
   }
 
-  if (ground_truth.empty()) {
-    LOG_WARN("Ground truth is empty, cannot fill labels");
-    return;
-  }
-
-  auto fill_start = std::chrono::high_resolution_clock::now();
-
-  // Use parallel processing for large record counts
-  size_t num_records = records->size();
-  size_t num_threads = std::min(static_cast<size_t>(std::thread::hardware_concurrency()),
-                                 std::max(num_records / 10000, static_cast<size_t>(1)));
-
-  std::atomic<size_t> positive_count{0};
-  std::atomic<size_t> negative_count{0};
-  std::atomic<size_t> processed_count{0};
-
-  auto worker = [&](size_t start_idx, size_t end_idx) {
-    size_t local_positive = 0;
-    size_t local_negative = 0;
-
-    for (size_t idx = start_idx; idx < end_idx; ++idx) {
-      auto& record = (*records)[idx];
-      int query_id = record.query_id;
-
-      // Validate query_id
-      if (query_id < 0 || query_id >= static_cast<int>(ground_truth.size())) {
-        record.label = 0;
-        local_negative++;
-        continue;
-      }
-
-      const auto& gt = ground_truth[query_id];
-      if (gt.empty()) {
-        record.label = 0;
-        local_negative++;
-        continue;
-      }
-
-      // Take top k_train ground truth nodes
-      size_t actual_k = std::min(k_train, gt.size());
-
-      // For small k_train (typical case: k_train=1), use linear search
-      // This is faster than building a hash set for each record
-      bool all_found = true;
-      const auto& collected = record.collected_node_ids;
-
-      for (size_t i = 0; i < actual_k && all_found; ++i) {
-        uint64_t gt_node = gt[i];
-        // Linear search in collected_node_ids
-        bool found = false;
-        for (uint64_t node : collected) {
-          if (node == gt_node) {
-            found = true;
-            break;
-          }
-        }
-        if (!found) {
-          all_found = false;
-        }
-      }
-
-      if (all_found) {
-        record.label = 1;
-        local_positive++;
-      } else {
-        record.label = 0;
-        local_negative++;
-      }
-    }
-
-    positive_count += local_positive;
-    negative_count += local_negative;
-    processed_count += (end_idx - start_idx);
-  };
-
-  // Launch threads
-  std::vector<std::thread> threads;
-  size_t records_per_thread = (num_records + num_threads - 1) / num_threads;
-
-  for (size_t t = 0; t < num_threads; ++t) {
-    size_t start_idx = t * records_per_thread;
-    size_t end_idx = std::min(start_idx + records_per_thread, num_records);
-    if (start_idx < end_idx) {
-      threads.emplace_back(worker, start_idx, end_idx);
+  // Count existing labels (already computed in real-time during search)
+  size_t positive_count = 0;
+  size_t negative_count = 0;
+  for (const auto& record : *records) {
+    if (record.label > 0) {
+      positive_count++;
+    } else {
+      negative_count++;
     }
   }
 
-  // Wait for all threads
-  for (auto& thread : threads) {
-    thread.join();
-  }
-
-  auto fill_end = std::chrono::high_resolution_clock::now();
-  auto fill_ms = std::chrono::duration_cast<std::chrono::milliseconds>(fill_end - fill_start).count();
-
-  LOG_INFO("Filled labels for %zu/%zu records (%zu positive, %zu negative, k_train=%zu) in %zu ms (%zu threads)",
-           processed_count.load(), records->size(), positive_count.load(), negative_count.load(), k_train,
-           fill_ms, num_threads);
+  LOG_INFO("Labels already computed in real-time: %zu positive, %zu negative (k_train=%zu)",
+           positive_count, negative_count, k_train);
 }
 
 core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
     const std::vector<core_interface::TrainingRecord>& records,
     const std::vector<std::vector<uint64_t>>& ground_truth,
     size_t topk) {
-  auto compute_start = std::chrono::high_resolution_clock::now();
+  // NOTE: gt_cmps computation requires collected_node_ids which was removed
+  // for memory optimization. This function now returns default values based on
+  // record.cmps_visited as a simple approximation.
 
   core_interface::GtCmpsData result;
   result.topk = topk;
@@ -503,97 +651,57 @@ core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
     return result;
   }
 
-  // Initialize gt_cmps with -1 (not found)
+  // Initialize gt_cmps and total_cmps
   result.gt_cmps.resize(ground_truth.size());
   result.total_cmps.resize(ground_truth.size(), 0);
 
   for (size_t q = 0; q < ground_truth.size(); ++q) {
-    result.gt_cmps[q].resize(topk, -1);
+    result.gt_cmps[q].resize(topk, 0);
   }
 
-  // Group records by query_id and find when each GT was first collected
-  // Records are ordered by (query_id, cmps_visited)
-  int current_query = -1;
-  int max_cmps_for_query = 0;
-  size_t gt_found_count = 0;  // Track how many GT nodes we've found (for early exit)
-  size_t gt_target_count = 0; // How many GT nodes we need to find for current query
-
-  // Map from GT node_id to its rank (for O(1) lookup instead of linear search)
-  std::unordered_map<uint64_t, size_t> gt_node_to_rank;
-  const std::vector<uint64_t>* current_gt = nullptr;
+  // Group records by query_id and track max cmps
+  // For gt_cmps, we use a simple heuristic:
+  // - If label=1 (GT found), use cmps_visited as gt_cmps for all GT ranks
+  // - If label=0 (GT not found), use the max cmps for that query
+  std::unordered_map<int, int> query_max_cmps;
+  std::unordered_map<int, int> query_first_found_cmps;
 
   for (const auto& record : records) {
     int query_id = record.query_id;
-
-    // Validate query_id
     if (query_id < 0 || query_id >= static_cast<int>(ground_truth.size())) {
       continue;
     }
 
-    // Track max cmps for this query
-    if (query_id == current_query) {
-      max_cmps_for_query = std::max(max_cmps_for_query, record.cmps_visited);
-    } else {
-      // Save total_cmps for previous query
-      if (current_query >= 0 && current_query < static_cast<int>(result.total_cmps.size())) {
-        result.total_cmps[current_query] = max_cmps_for_query;
-      }
-
-      // Start new query
-      current_query = query_id;
-      max_cmps_for_query = record.cmps_visited;
-      current_gt = &ground_truth[query_id];
-      gt_found_count = 0;
-
-      // Build map from GT node_id to rank for O(1) lookup
-      gt_node_to_rank.clear();
-      gt_target_count = std::min(topk, current_gt->size());
-      for (size_t i = 0; i < gt_target_count; ++i) {
-        gt_node_to_rank[(*current_gt)[i]] = i;
-      }
-    }
-
-    // OPTIMIZATION: Early exit if we've found all GT nodes for this query
-    if (gt_found_count >= gt_target_count) {
-      continue;
-    }
+    // Track max cmps for each query
+    auto& max_cmps = query_max_cmps[query_id];
+    max_cmps = std::max(max_cmps, record.cmps_visited);
 
-    // Check which GT nodes are in collected_node_ids
-    for (uint64_t node_id : record.collected_node_ids) {
-      auto it = gt_node_to_rank.find(node_id);
-      if (it != gt_node_to_rank.end()) {
-        size_t rank = it->second;
-        // Only record if we haven't found this GT yet
-        if (result.gt_cmps[query_id][rank] == -1) {
-          result.gt_cmps[query_id][rank] = record.cmps_visited;
-          gt_found_count++;
-          // Early exit from inner loop if all found
-          if (gt_found_count >= gt_target_count) {
-            break;
-          }
-        }
+    // Track first cmps where label became 1
+    if (record.label > 0) {
+      auto it = query_first_found_cmps.find(query_id);
+      if (it == query_first_found_cmps.end()) {
+        query_first_found_cmps[query_id] = record.cmps_visited;
+      } else {
+        it->second = std::min(it->second, record.cmps_visited);
       }
     }
   }
 
-  // Save total_cmps for the last query
-  if (current_query >= 0 && current_query < static_cast<int>(result.total_cmps.size())) {
-    result.total_cmps[current_query] = max_cmps_for_query;
-  }
+  // Fill in the result
+  for (size_t q = 0; q < ground_truth.size(); ++q) {
+    int query_id = static_cast<int>(q);
+    result.total_cmps[q] = query_max_cmps[query_id];
+
+    // Use first_found_cmps if available, otherwise use total_cmps
+    auto it = query_first_found_cmps.find(query_id);
+    int gt_cmps_value = (it != query_first_found_cmps.end()) ? it->second : result.total_cmps[q];
 
-  // Fill in -1 values with total_cmps (GT not found)
-  for (size_t q = 0; q < result.gt_cmps.size(); ++q) {
     for (size_t r = 0; r < result.gt_cmps[q].size(); ++r) {
-      if (result.gt_cmps[q][r] == -1) {
-        result.gt_cmps[q][r] = result.total_cmps[q];
-      }
+      result.gt_cmps[q][r] = gt_cmps_value;
     }
   }
 
-  auto compute_end = std::chrono::high_resolution_clock::now();
-  auto compute_ms = std::chrono::duration_cast<std::chrono::milliseconds>(compute_end - compute_start).count();
-
-  LOG_INFO("Computed gt_cmps for %zu queries, topk=%zu in %zu ms", result.num_queries, result.topk, compute_ms);
+  LOG_INFO("Computed gt_cmps (approximation) for %zu queries, topk=%zu", result.num_queries, result.topk);
   return result;
 }
 
@@ -605,6 +713,22 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
     const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
   ScopedTimer total_timer("CollectTrainingDataWithGtCmps [TOTAL]");
 
+  // Step 0: Get indexers first (needed for metric type)
+  std::vector<VectorColumnIndexer::Ptr> indexers;
+  if (!provided_indexers.empty()) {
+    indexers = provided_indexers;
+  } else {
+    indexers = segment->get_vector_indexer(field_name);
+  }
+
+  if (indexers.empty()) {
+    return tl::make_unexpected(
+        Status::InternalError("No vector indexers found for field: " + field_name));
+  }
+
+  // Get metric type from first indexer
+  MetricType metric_type = indexers[0]->metric_type();
+
   // Step 1: Generate training queries using held-out approach
   // (sample vectors directly from index, no noise)
   LOG_INFO("Generating %zu held-out training queries for field '%s'",
@@ -627,19 +751,23 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
         Status::InternalError("Failed to generate training queries"));
   }
 
-  // Step 2: Compute ground truth (brute force search, excluding self-matches)
-  LOG_INFO("Computing ground truth with brute force search (topk=%zu, excluding self)",
-           options.topk);
+  // Step 2: Compute ground truth (brute force or HNSW search, excluding self-matches)
+  LOG_INFO("Computing ground truth (topk=%zu, ef_groundtruth=%d, excluding self)",
+           options.topk, options.ef_groundtruth);
 
   std::vector<std::vector<uint64_t>> ground_truth;
   {
-    ScopedTimer timer("Step2: ComputeGroundTruth (BRUTE FORCE PARALLEL, HELD-OUT)");
+    std::string timer_name = options.ef_groundtruth > 0
+        ? "Step2: ComputeGroundTruth (HNSW ef=" + std::to_string(options.ef_groundtruth) + " PARALLEL, HELD-OUT)"
+        : "Step2: ComputeGroundTruth (BRUTE FORCE PARALLEL, HELD-OUT)";
+    ScopedTimer timer(timer_name);
     DebugLog("  num_queries=" + std::to_string(training_queries.size()) +
              ", topk=" + std::to_string(options.topk) +
+             ", ef_groundtruth=" + std::to_string(options.ef_groundtruth) +
              ", threads=" + std::to_string(options.num_threads == 0 ? std::thread::hardware_concurrency() : options.num_threads));
     ground_truth = ComputeGroundTruth(
         segment, field_name, training_queries, options.topk, options.num_threads,
-        query_doc_ids);  // Pass doc_ids for self-exclusion
+        query_doc_ids, options.ef_groundtruth, metric_type, indexers);  // Pass indexers to avoid stale data
     DebugLog("  Computed ground truth for " + std::to_string(ground_truth.size()) + " queries");
   }
 
@@ -648,27 +776,17 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
         Status::InternalError("Failed to compute ground truth"));
   }
 
-  // Step 3: Choose indexers for training
-  std::vector<VectorColumnIndexer::Ptr> indexers;
-
-  if (!provided_indexers.empty()) {
-    indexers = provided_indexers;
-  } else {
-    indexers = segment->get_vector_indexer(field_name);
-  }
-
-  if (indexers.empty()) {
-    return tl::make_unexpected(
-        Status::InternalError("No vector indexers found for field: " + field_name));
-  }
-
   LOG_INFO("Found %zu indexers for field '%s'", indexers.size(), field_name.c_str());
   DebugLog("Step3: Found " + std::to_string(indexers.size()) + " indexers, doc_count=" +
            std::to_string(indexers[0]->doc_count()));
 
-  // Step 4: Enable training mode on all indexers
-  LOG_INFO("Enabling training mode on %zu indexers", indexers.size());
+  // Step 4: Set ground truth and enable training mode on all indexers
+  LOG_INFO("Setting ground truth (%zu queries) and enabling training mode on %zu indexers",
+           ground_truth.size(), indexers.size());
   for (auto& indexer : indexers) {
+    // Set ground truth for real-time label computation
+    indexer->SetTrainingGroundTruth(ground_truth, options.k_train);
+
     auto status = indexer->EnableTrainingMode(true);
     if (!status.ok()) {
       LOG_WARN("Failed to enable training mode on indexer: %s",
@@ -805,12 +923,15 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
     LOG_WARN("No training records collected from any indexer");
   }
 
-  // Step 7: Fill labels based on ground truth
-  LOG_INFO("Filling labels for %zu records (k_train=%zu)", all_records.size(), options.k_train);
-  {
-    ScopedTimer timer("Step7: FillLabels");
-    FillLabels(&all_records, ground_truth, search_results, options.k_train);
+  // Step 7: Labels are now computed in real-time during search (no FillLabels needed)
+  // Count positive/negative labels for verification
+  size_t positive_count = 0, negative_count = 0;
+  for (const auto& record : all_records) {
+    if (record.label > 0) positive_count++;
+    else negative_count++;
   }
+  LOG_INFO("Collected %zu records: %zu positive, %zu negative (labels computed in real-time)",
+           all_records.size(), positive_count, negative_count);
 
   // Step 8: Compute gt_cmps data
   LOG_INFO("Computing gt_cmps data");
diff --git a/src/db/training/training_data_collector.h b/src/db/training/training_data_collector.h
index b633f155f..7e3193421 100644
--- a/src/db/training/training_data_collector.h
+++ b/src/db/training/training_data_collector.h
@@ -38,6 +38,10 @@ struct TrainingDataCollectorOptions {
   // ef parameter for training searches (large value for recall ≈ 1)
   int ef_training = 1000;
 
+  // ef parameter for ground truth computation (0 = brute force, >0 = HNSW with this ef)
+  // Using HNSW with large ef is much faster than brute force while maintaining high accuracy
+  int ef_groundtruth = 0;
+
   // Top-K results to retrieve per query
   size_t topk = 100;
 
@@ -107,7 +111,7 @@ class TrainingDataCollector {
 
  private:
   /**
-   * @brief Compute ground truth using brute force search
+   * @brief Compute ground truth using brute force or HNSW search
    *
    * @param segment The segment to search
    * @param field_name Vector field name
@@ -115,6 +119,9 @@ class TrainingDataCollector {
    * @param topk Number of top results to retrieve
    * @param num_threads Number of threads (0 = hardware_concurrency)
    * @param query_doc_ids Optional doc_ids of query vectors (for self-exclusion in held-out mode)
+   * @param ef_groundtruth ef value for HNSW search (0 = brute force, >0 = HNSW)
+   * @param metric_type Distance metric type (L2, IP, COSINE)
+   * @param indexers Optional pre-opened indexers (for HNSW GT, avoids using stale indexers from segment)
    * @return Ground truth doc IDs for each query
    */
   static std::vector<std::vector<uint64_t>> ComputeGroundTruth(
@@ -123,7 +130,10 @@ class TrainingDataCollector {
       const std::vector<std::vector<float>>& queries,
       size_t topk,
       size_t num_threads,
-      const std::vector<uint64_t>& query_doc_ids = {});
+      const std::vector<uint64_t>& query_doc_ids = {},
+      int ef_groundtruth = 0,
+      MetricType metric_type = MetricType::IP,
+      const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
 
   /**
    * @brief Fill labels in training records based on ground truth
diff --git a/src/include/zvec/core/interface/index.h b/src/include/zvec/core/interface/index.h
index 2888d51ae..f7df0fe7a 100644
--- a/src/include/zvec/core/interface/index.h
+++ b/src/include/zvec/core/interface/index.h
@@ -348,6 +348,8 @@ class OmegaIndex : public HNSWIndex, public ITrainingCapable {
   void SetCurrentQueryId(int query_id) override;
   std::vector<TrainingRecord> GetTrainingRecords() const override;
   void ClearTrainingRecords() override;
+  void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
+                               int k_train = 1) override;
 
  protected:
   virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;
diff --git a/src/include/zvec/core/interface/training.h b/src/include/zvec/core/interface/training.h
index 947721fdc..d7f357a6c 100644
--- a/src/include/zvec/core/interface/training.h
+++ b/src/include/zvec/core/interface/training.h
@@ -34,9 +34,8 @@ namespace zvec::core_interface {
  * - dist_start: Distance to the starting node
  * - traversal_window_stats: 7 statistical features of the traversal window
  *   (avg, var, min, max, median, percentile25, percentile75)
- * - collected_node_ids: Node IDs collected in topk at this search state
  * - label: Binary label (1 if collected enough GT results, 0 otherwise)
- *          Filled by training pipeline based on collected_node_ids vs ground truth
+ *          Computed in real-time during search (memory optimized)
  */
 struct TrainingRecord {
   int query_id;
@@ -45,8 +44,7 @@ struct TrainingRecord {
   float dist_1st;
   float dist_start;
   std::array<float, 7> traversal_window_stats;
-  std::vector<uint64_t> collected_node_ids;  // Node IDs in topk at this state
-  int label;  // 0 by default, to be filled by FillLabels()
+  int label;  // Computed in real-time during search
 
   TrainingRecord()
       : query_id(0),
@@ -55,7 +53,6 @@ struct TrainingRecord {
         dist_1st(0.0f),
         dist_start(0.0f),
         traversal_window_stats{},
-        collected_node_ids{},
         label(0) {}
 };
 
diff --git a/src/include/zvec/core/interface/training_capable.h b/src/include/zvec/core/interface/training_capable.h
index 9d37567ef..a527dd78c 100644
--- a/src/include/zvec/core/interface/training_capable.h
+++ b/src/include/zvec/core/interface/training_capable.h
@@ -81,6 +81,18 @@ class ITrainingCapable {
    * starting a fresh training data collection session.
    */
   virtual void ClearTrainingRecords() = 0;
+
+  /**
+   * @brief Set ground truth for training queries.
+   *
+   * Ground truth is used for real-time label computation during training.
+   * Labels are computed as: label=1 iff top k_train GT nodes are in current topk.
+   *
+   * @param ground_truth 2D vector: ground_truth[query_id][rank] = node_id
+   * @param k_train Number of GT nodes to check for label (typically 1)
+   */
+  virtual void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
+                                       int k_train = 1) = 0;
 };
 
 }  // namespace core_interface
diff --git a/src/include/zvec/db/index_params.h b/src/include/zvec/db/index_params.h
index 9ab24f0d3..1310dcf2e 100644
--- a/src/include/zvec/db/index_params.h
+++ b/src/include/zvec/db/index_params.h
@@ -325,13 +325,19 @@ class OmegaIndexParams : public VectorIndexParams {
       QuantizeType quantize_type = QuantizeType::UNDEFINED,
       uint32_t min_vector_threshold = 100000,
       const std::string& model_dir = "./omega_models",
-      size_t num_training_queries = 1000)
+      size_t num_training_queries = 1000,
+      int ef_training = 1000,
+      int window_size = 100,
+      int ef_groundtruth = 0)  // 0 means use brute force, >0 means use HNSW with this ef
       : VectorIndexParams(IndexType::OMEGA, metric_type, quantize_type),
         m_(m),
         ef_construction_(ef_construction),
         min_vector_threshold_(min_vector_threshold),
         model_dir_(model_dir),
-        num_training_queries_(num_training_queries) {}
+        num_training_queries_(num_training_queries),
+        ef_training_(ef_training),
+        window_size_(window_size),
+        ef_groundtruth_(ef_groundtruth) {}
 
   using OPtr = std::shared_ptr<OmegaIndexParams>;
 
@@ -339,7 +345,8 @@ class OmegaIndexParams : public VectorIndexParams {
   Ptr clone() const override {
     return std::make_shared<OmegaIndexParams>(metric_type_, m_, ef_construction_,
                                              quantize_type_, min_vector_threshold_,
-                                             model_dir_, num_training_queries_);
+                                             model_dir_, num_training_queries_,
+                                             ef_training_, window_size_, ef_groundtruth_);
   }
 
   std::string to_string() const override {
@@ -349,7 +356,10 @@ class OmegaIndexParams : public VectorIndexParams {
     oss << base_str << ",m:" << m_ << ",ef_construction:" << ef_construction_
         << ",min_vector_threshold:" << min_vector_threshold_
         << ",model_dir:" << model_dir_
-        << ",num_training_queries:" << num_training_queries_ << "}";
+        << ",num_training_queries:" << num_training_queries_
+        << ",ef_training:" << ef_training_
+        << ",window_size:" << window_size_
+        << ",ef_groundtruth:" << ef_groundtruth_ << "}";
     return oss.str();
   }
 
@@ -366,6 +376,12 @@ class OmegaIndexParams : public VectorIndexParams {
                static_cast<const OmegaIndexParams &>(other).model_dir_ &&
            num_training_queries_ ==
                static_cast<const OmegaIndexParams &>(other).num_training_queries_ &&
+           ef_training_ ==
+               static_cast<const OmegaIndexParams &>(other).ef_training_ &&
+           window_size_ ==
+               static_cast<const OmegaIndexParams &>(other).window_size_ &&
+           ef_groundtruth_ ==
+               static_cast<const OmegaIndexParams &>(other).ef_groundtruth_ &&
            quantize_type() ==
                static_cast<const OmegaIndexParams &>(other).quantize_type();
   }
@@ -400,6 +416,24 @@ class OmegaIndexParams : public VectorIndexParams {
   size_t num_training_queries() const {
     return num_training_queries_;
   }
+  void set_ef_training(int ef_training) {
+    ef_training_ = ef_training;
+  }
+  int ef_training() const {
+    return ef_training_;
+  }
+  void set_window_size(int window_size) {
+    window_size_ = window_size;
+  }
+  int window_size() const {
+    return window_size_;
+  }
+  void set_ef_groundtruth(int ef_groundtruth) {
+    ef_groundtruth_ = ef_groundtruth;
+  }
+  int ef_groundtruth() const {
+    return ef_groundtruth_;
+  }
 
  private:
   int m_;
@@ -407,6 +441,9 @@ class OmegaIndexParams : public VectorIndexParams {
   uint32_t min_vector_threshold_;
   std::string model_dir_;
   size_t num_training_queries_;
+  int ef_training_;
+  int window_size_;
+  int ef_groundtruth_;  // 0 = brute force, >0 = use HNSW with this ef
 };
 
 }  // namespace zvec
\ No newline at end of file
diff --git a/thirdparty/omega b/thirdparty/omega
index f0b50e825..9bbb1157c 160000
--- a/thirdparty/omega
+++ b/thirdparty/omega
@@ -1 +1 @@
-Subproject commit f0b50e8256cc535e84b5b261db154f112efff589
+Subproject commit 9bbb1157c56db05bd0d2aac2d32151bc704fb38e

From 8640773d4d9cb9c4fc689cc9fe57dcd428c9a9c9 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 18 Mar 2026 04:04:18 +0800
Subject: [PATCH 013/126] feat(omega): integrate the reference-aligned OMEGA
 training flow and query-side search path

OMEGA integration updates:
- wire the updated omega training and search behavior into zvec index build, load and query execution paths
- expose and propagate OMEGA training/query parameters through the Python API, index params and engine helper conversions
- update omega builder, searcher, streamer and context handling to match the reference behavior more closely

Training and validation updates:
- update training data collection and model training integration for the reference-aligned OMEGA workflow

Performance and debugging updates:
- add an OMEGA prediction microbenchmark for query-side inference analysis
- improve storage/index plumbing needed by the OMEGA workflow
- add query-side diagnostics to investigate early-stop calibration and repeated prediction overhead
---
 .../python/model/param/python_param.cc        |  33 +-
 src/core/algorithm/omega/omega_builder.cc     |  10 +-
 src/core/algorithm/omega/omega_context.h      |  35 ++
 src/core/algorithm/omega/omega_searcher.cc    |  81 ++--
 src/core/algorithm/omega/omega_searcher.h     |   5 +-
 src/core/algorithm/omega/omega_streamer.cc    | 396 +++++++++++++-----
 src/core/algorithm/omega/omega_streamer.h     |  55 ++-
 src/core/interface/index.cc                   |   5 +
 src/core/utility/buffer_storage.cc            |   5 +
 src/core/utility/mmap_file_storage.cc         |   7 +
 src/db/CMakeLists.txt                         |   2 +-
 .../column/vector_column/engine_helper.hpp    |   4 +-
 .../vector_column/vector_column_indexer.cc    |  56 +++
 .../vector_column/vector_column_indexer.h     |  13 +
 src/db/index/common/proto_converter.cc        |   4 +-
 src/db/training/omega_model_trainer.cc        |  15 +
 src/db/training/training_data_collector.cc    |  31 +-
 .../zvec/core/framework/index_context.h       |  17 +
 .../zvec/core/framework/index_storage.h       |   5 +
 src/include/zvec/core/interface/index.h       |   5 +
 src/include/zvec/db/index_params.h            |  18 +-
 thirdparty/omega                              |   2 +-
 tools/core/CMakeLists.txt                     |   8 +
 tools/core/omega_predict_microbench.cc        | 267 ++++++++++++
 24 files changed, 871 insertions(+), 208 deletions(-)
 create mode 100644 tools/core/omega_predict_microbench.cc

diff --git a/src/binding/python/model/param/python_param.cc b/src/binding/python/model/param/python_param.cc
index c150fd9d2..ee1fd9e9b 100644
--- a/src/binding/python/model/param/python_param.cc
+++ b/src/binding/python/model/param/python_param.cc
@@ -569,8 +569,6 @@ predict when to stop searching.
     min_vector_threshold (int): Minimum number of vectors required to enable
         OMEGA optimization. Below this threshold, standard HNSW is used.
         Default is 100000.
-    model_dir (str): Directory path for storing/loading OMEGA models.
-        Default is "./omega_models".
     num_training_queries (int): Number of training queries to generate for
         OMEGA model training. Default is 1000.
     ef_training (int): Size of the candidate list (ef) used during training
@@ -591,7 +589,6 @@ predict when to stop searching.
     ...     m=16,
     ...     ef_construction=200,
     ...     min_vector_threshold=50000,
-    ...     model_dir="./my_omega_models",
     ...     num_training_queries=500,
     ...     ef_training=800,
     ...     window_size=100,
@@ -601,14 +598,13 @@ predict when to stop searching.
     2000
 )pbdoc");
   omega_params
-      .def(py::init<MetricType, int, int, QuantizeType, uint32_t, std::string, size_t, int, int, int>(),
+      .def(py::init<MetricType, int, int, QuantizeType, uint32_t, size_t, int, int, int>(),
            py::arg("metric_type") = MetricType::IP,
            py::arg("m") = core_interface::kDefaultHnswNeighborCnt,
            py::arg("ef_construction") =
                core_interface::kDefaultHnswEfConstruction,
            py::arg("quantize_type") = QuantizeType::UNDEFINED,
            py::arg("min_vector_threshold") = 100000,
-           py::arg("model_dir") = "./omega_models",
            py::arg("num_training_queries") = 1000,
            py::arg("ef_training") = 1000,
            py::arg("window_size") = 100,
@@ -622,9 +618,6 @@ predict when to stop searching.
       .def_property_readonly(
           "min_vector_threshold", &OmegaIndexParams::min_vector_threshold,
           "int: Minimum vectors required to enable OMEGA optimization.")
-      .def_property_readonly(
-          "model_dir", &OmegaIndexParams::model_dir,
-          "str: Directory path for OMEGA models.")
       .def_property_readonly(
           "num_training_queries", &OmegaIndexParams::num_training_queries,
           "int: Number of training queries for OMEGA model training.")
@@ -646,7 +639,6 @@ predict when to stop searching.
             dict["m"] = self.m();
             dict["ef_construction"] = self.ef_construction();
             dict["min_vector_threshold"] = self.min_vector_threshold();
-            dict["model_dir"] = self.model_dir();
             dict["num_training_queries"] = self.num_training_queries();
             dict["ef_training"] = self.ef_training();
             dict["window_size"] = self.window_size();
@@ -666,7 +658,6 @@ predict when to stop searching.
                     std::to_string(self.ef_construction()) +
                     ", \"min_vector_threshold\":" +
                     std::to_string(self.min_vector_threshold()) +
-                    ", \"model_dir\":\"" + self.model_dir() + "\"" +
                     ", \"num_training_queries\":" +
                     std::to_string(self.num_training_queries()) +
                     ", \"ef_training\":" +
@@ -682,18 +673,26 @@ predict when to stop searching.
           [](const OmegaIndexParams &self) {
             return py::make_tuple(self.metric_type(), self.m(),
                                   self.ef_construction(), self.quantize_type(),
-                                  self.min_vector_threshold(), self.model_dir(),
-                                  self.num_training_queries(), self.ef_training(),
-                                  self.window_size(), self.ef_groundtruth());
+                                  self.min_vector_threshold(),
+                                  self.num_training_queries(),
+                                  self.ef_training(), self.window_size(),
+                                  self.ef_groundtruth());
           },
           [](py::tuple t) {
-            if (t.size() != 10)
+            if (t.size() == 10) {
+              return std::make_shared<OmegaIndexParams>(
+                  t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
+                  t[3].cast<QuantizeType>(), t[4].cast<uint32_t>(),
+                  t[6].cast<size_t>(), t[7].cast<int>(), t[8].cast<int>(),
+                  t[9].cast<int>());
+            }
+            if (t.size() != 9)
               throw std::runtime_error("Invalid state for OmegaIndexParams");
             return std::make_shared<OmegaIndexParams>(
                 t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
                 t[3].cast<QuantizeType>(), t[4].cast<uint32_t>(),
-                t[5].cast<std::string>(), t[6].cast<size_t>(), t[7].cast<int>(),
-                t[8].cast<int>(), t[9].cast<int>());
+                t[5].cast<size_t>(), t[6].cast<int>(), t[7].cast<int>(),
+                t[8].cast<int>());
           }));
 }
 
@@ -1450,4 +1449,4 @@ void ZVecPyParams::bind_vector_query(py::module_ &m) {
             return obj;
           }));
 }
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/src/core/algorithm/omega/omega_builder.cc b/src/core/algorithm/omega/omega_builder.cc
index e9e5bc0ce..d44ed0bc3 100644
--- a/src/core/algorithm/omega/omega_builder.cc
+++ b/src/core/algorithm/omega/omega_builder.cc
@@ -28,9 +28,10 @@ int OmegaBuilder::init(const IndexMeta &meta, const ailego::Params &params) {
     return IndexError_Duplicate;
   }
 
-  // TODO: Fix design - cannot call protected init method of HnswBuilder
-  // For now, return NotImplemented error
-  LOG_ERROR("OmegaBuilder is not yet fully implemented - wrapper design needs fixing");
+  // NOTE: OmegaBuilder is intentionally not implemented.
+  // OMEGA index building uses OmegaStreamer (which extends HnswStreamer) instead.
+  // This class exists for potential future use but is not currently needed.
+  LOG_ERROR("OmegaBuilder is not implemented - use OmegaStreamer for index building");
   return IndexError_NotImplemented;
 
   /*
@@ -132,5 +133,6 @@ int OmegaBuilder::dump(const IndexDumper::Pointer &dumper) {
 }  // namespace core
 }  // namespace zvec
 
-// TODO: Fix OmegaBuilder design - it tries to call protected methods of HnswBuilder
+// NOTE: OmegaBuilder is not registered because OMEGA index building uses
+// OmegaStreamer (which extends HnswStreamer) instead.
 // INDEX_FACTORY_REGISTER_BUILDER(zvec::core::OmegaBuilder);
diff --git a/src/core/algorithm/omega/omega_context.h b/src/core/algorithm/omega/omega_context.h
index 4e03cc93d..524dfeb9b 100644
--- a/src/core/algorithm/omega/omega_context.h
+++ b/src/core/algorithm/omega/omega_context.h
@@ -71,6 +71,39 @@ class OmegaContext : public HnswContext {
   //! Called before each search when context is reused from pool
   void clear_training_records() override {
     training_records_.clear();
+    gt_cmps_per_rank_.clear();
+    total_cmps_ = 0;
+  }
+
+  //! Set gt_cmps data for this query
+  void set_gt_cmps(const std::vector<int>& gt_cmps, int total_cmps) {
+    gt_cmps_per_rank_ = gt_cmps;
+    total_cmps_ = total_cmps;
+  }
+
+  //! Get gt_cmps per rank
+  const std::vector<int>& gt_cmps_per_rank() const {
+    return gt_cmps_per_rank_;
+  }
+
+  //! Get total cmps for this search
+  int total_cmps() const {
+    return total_cmps_;
+  }
+
+  //! Take gt_cmps data (override base class virtual method)
+  std::vector<int> take_gt_cmps() override {
+    return std::move(gt_cmps_per_rank_);
+  }
+
+  //! Get total comparisons (override base class virtual method)
+  int get_total_cmps() const override {
+    return total_cmps_;
+  }
+
+  //! Get training query ID (override base class virtual method)
+  int get_training_query_id() const override {
+    return training_query_id_;
   }
 
   //! Update context parameters (overrides HnswContext::update)
@@ -96,6 +129,8 @@ class OmegaContext : public HnswContext {
   float target_recall_;  // Per-query target recall
   int training_query_id_;  // Per-query training query ID for parallel training
   std::vector<core_interface::TrainingRecord> training_records_;  // Per-query training records
+  std::vector<int> gt_cmps_per_rank_;  // cmps value when each GT rank was found
+  int total_cmps_ = 0;  // Total cmps for this search
 };
 
 }  // namespace core
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 650662a32..bda22393e 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -48,7 +48,6 @@ int OmegaSearcher::init(const ailego::Params &params) {
   omega_enabled_ = params.has("omega.enabled") ? params.get_as_bool("omega.enabled") : false;
   target_recall_ = params.has("omega.target_recall") ? params.get_as_float("omega.target_recall") : 0.95f;
   min_vector_threshold_ = params.has("omega.min_vector_threshold") ? params.get_as_uint32("omega.min_vector_threshold") : 100000;
-  model_dir_ = params.has("omega.model_dir") ? params.get_as_string("omega.model_dir") : "";
   window_size_ = params.has("omega.window_size") ? params.get_as_int32("omega.window_size") : 100;
 
   // Call parent class init
@@ -90,22 +89,35 @@ int OmegaSearcher::load(IndexStorage::Pointer container,
   // Try to load OMEGA model if enabled and threshold met
   use_omega_mode_ = false;
   if (omega_enabled_ && current_vector_count_ >= min_vector_threshold_) {
-    if (!model_dir_.empty()) {
+    // Load the model colocated with the persisted index file.
+    std::string effective_model_dir;
+    if (container) {
+      std::string index_path = container->file_path();
+      if (!index_path.empty()) {
+        size_t last_slash = index_path.rfind('/');
+        if (last_slash != std::string::npos) {
+          effective_model_dir =
+              index_path.substr(0, last_slash) + "/omega_model";
+        }
+      }
+    }
+
+    if (!effective_model_dir.empty()) {
       omega_model_ = omega_model_create();
       if (omega_model_ != nullptr) {
-        ret = omega_model_load(omega_model_, model_dir_.c_str());
+        ret = omega_model_load(omega_model_, effective_model_dir.c_str());
         if (ret == 0 && omega_model_is_loaded(omega_model_)) {
           use_omega_mode_ = true;
-          LOG_INFO("OMEGA model loaded successfully from %s", model_dir_.c_str());
+          LOG_INFO("OMEGA model loaded successfully from %s", effective_model_dir.c_str());
         } else {
           LOG_WARN("Failed to load OMEGA model from %s, falling back to HNSW",
-                   model_dir_.c_str());
+                   effective_model_dir.c_str());
           omega_model_destroy(omega_model_);
           omega_model_ = nullptr;
         }
       }
     } else {
-      LOG_WARN("OMEGA enabled but model_dir not specified, falling back to HNSW");
+      LOG_WARN("OMEGA enabled but cannot derive omega_model path from index storage, falling back to HNSW");
     }
   } else {
     if (omega_enabled_) {
@@ -230,7 +242,13 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   // Read target_recall from context (per-query parameter)
   float target_recall = omega_ctx->target_recall();
 
-  // Create OMEGA search context with parameters (stateful interface)
+  // Create OMEGA search context with parameters (stateful interface).
+  // OMEGA's k is the requested top-k, not the batch/query count.
+  int omega_topk = static_cast<int>(omega_ctx->topk());
+  if (omega_topk <= 0) {
+    omega_topk = static_cast<int>(count);
+  }
+
   // In training mode, pass NULL if model is not loaded
   OmegaModelHandle model_to_use = omega_model_;
   if (training_mode_enabled_ && (omega_model_ == nullptr || !omega_model_is_loaded(omega_model_))) {
@@ -238,7 +256,7 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   }
 
   OmegaSearchHandle omega_search = omega_search_create_with_params(
-      model_to_use, target_recall, count, window_size_);
+      model_to_use, target_recall, omega_topk, window_size_);
 
   if (omega_search == nullptr) {
     LOG_WARN("Failed to create OMEGA search context, falling back to HNSW");
@@ -325,40 +343,27 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   candidates.emplace(entry_point, dist);
 
   // Report initial visit to OMEGA
-  omega_search_report_visit(omega_search, entry_point, dist, 1);  // is_in_topk=1
+  omega_search_report_visit_candidate(omega_search, entry_point, dist, 1);
 
   dist_t lowerBound = dist;
 
   // Main search loop with OMEGA predictions
+  bool early_stop_hit = false;
   while (!candidates.empty()) {
     auto top = candidates.begin();
     node_id_t current_node = top->first;
     dist_t candidate_dist = top->second;
 
+    // Reference semantics: count the hop before the stop-condition check.
+    omega_search_report_hop(omega_search);
+
     // Standard HNSW stopping condition
     if (candidate_dist > lowerBound && topk_heap.size() >= ef) {
       break;
     }
 
-    // OMEGA early stopping check (CRITICAL: disabled in training mode)
-    // Training mode requires full search to collect complete feature data
-    if (!training_mode_enabled_) {
-      if (omega_search_should_predict(omega_search)) {
-        if (omega_search_should_stop(omega_search)) {
-          int hops, cmps, collected_gt;
-          omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
-          LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
-                    cmps, hops, collected_gt);
-          break;
-        }
-      }
-    }
-
     candidates.pop();
 
-    // Report hop to OMEGA
-    omega_search_report_hop(omega_search);
-
     // Get neighbors of current node
     const Neighbors neighbors = entity.get_neighbors(0, current_node);
     if (neighbors.size() == 0) continue;
@@ -388,14 +393,24 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
       const void *neighbor_vec = neighbor_vec_blocks[i].data();
       dist_t neighbor_dist = dc.dist(neighbor_vec);
 
-      // Check if this node will be in topk
-      bool is_in_topk = (topk_heap.size() < ef || neighbor_dist < lowerBound);
+      bool should_consider_candidate =
+          (topk_heap.size() < ef || neighbor_dist < lowerBound);
+      omega_search_report_visit_candidate(omega_search, neighbor, neighbor_dist,
+                                          should_consider_candidate ? 1 : 0);
 
-      // Report visit to OMEGA
-      omega_search_report_visit(omega_search, neighbor, neighbor_dist, is_in_topk ? 1 : 0);
+      if (!training_mode_enabled_ && omega_search_should_predict(omega_search)) {
+        if (omega_search_should_stop(omega_search)) {
+          int hops, cmps, collected_gt;
+          omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
+          LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
+                    cmps, hops, collected_gt);
+          early_stop_hit = true;
+          break;
+        }
+      }
 
       // Consider this candidate
-      if (is_in_topk) {
+      if (should_consider_candidate) {
         candidates.emplace(neighbor, neighbor_dist);
         topk_heap.emplace(neighbor, neighbor_dist);
 
@@ -415,6 +430,10 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
         }
       }
     }
+
+    if (early_stop_hit) {
+      break;
+    }
   }
 
   // Convert results to context format
diff --git a/src/core/algorithm/omega/omega_searcher.h b/src/core/algorithm/omega/omega_searcher.h
index 3ad2fb575..31b2a819e 100644
--- a/src/core/algorithm/omega/omega_searcher.h
+++ b/src/core/algorithm/omega/omega_searcher.h
@@ -131,7 +131,9 @@ class OmegaSearcher : public HnswSearcher {
   //! Create a searcher context (creates OmegaContext instead of HnswContext)
   virtual ContextPointer create_context() const override;
 
-  // TODO: These methods call protected methods of HnswSearcher and need to be fixed
+  // NOTE: The commented-out delegation methods below are intentionally not used.
+  // OmegaSearcher inherits from HnswSearcher and overrides only the necessary methods.
+  // The base class implementations are sufficient for the remaining functionality.
   /*
   //! Fetch vector by key (delegate to HNSW)
   virtual const void *get_vector(uint64_t key) const override {
@@ -194,7 +196,6 @@ class OmegaSearcher : public HnswSearcher {
   float target_recall_;
   uint32_t min_vector_threshold_;
   size_t current_vector_count_;
-  std::string model_dir_;
   int window_size_;
 
   // Training mode support
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index c7480721f..dfaf2d910 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -16,6 +16,7 @@
 #include <zvec/core/framework/index_factory.h>
 #include <zvec/core/framework/index_helper.h>
 #include <zvec/core/framework/index_logger.h>
+#include <zvec/ailego/io/file.h>
 #include "../hnsw/hnsw_entity.h"
 #include "../hnsw/hnsw_context.h"
 #include "omega_context.h"
@@ -26,6 +27,81 @@
 namespace zvec {
 namespace core {
 
+bool OmegaStreamer::LoadModel(const std::string& model_dir) {
+  std::lock_guard<std::mutex> lock(model_mutex_);
+
+  if (omega_model_ != nullptr) {
+    omega_model_destroy(omega_model_);
+    omega_model_ = nullptr;
+  }
+
+  omega_model_ = omega_model_create();
+  if (omega_model_ == nullptr) {
+    LOG_ERROR("Failed to create OMEGA model manager");
+    return false;
+  }
+
+  if (omega_model_load(omega_model_, model_dir.c_str()) != 0) {
+    LOG_ERROR("Failed to load OMEGA model from %s", model_dir.c_str());
+    omega_model_destroy(omega_model_);
+    omega_model_ = nullptr;
+    return false;
+  }
+
+  LOG_INFO("OMEGA model loaded successfully from %s", model_dir.c_str());
+  return true;
+}
+
+bool OmegaStreamer::IsModelLoaded() const {
+  std::lock_guard<std::mutex> lock(model_mutex_);
+  return omega_model_ != nullptr && omega_model_is_loaded(omega_model_);
+}
+
+int OmegaStreamer::open(IndexStorage::Pointer stg) {
+  std::string index_path = stg ? stg->file_path() : "";
+  debug_stats_logged_.store(false);
+
+  int ret = HnswStreamer::open(std::move(stg));
+  if (ret != 0) {
+    return ret;
+  }
+
+  {
+    std::lock_guard<std::mutex> lock(model_mutex_);
+    if (omega_model_ != nullptr) {
+      omega_model_destroy(omega_model_);
+      omega_model_ = nullptr;
+    }
+  }
+
+  if (index_path.empty()) {
+    LOG_WARN("OmegaStreamer open: storage file path is empty, using HNSW fallback");
+    return 0;
+  }
+
+  size_t last_slash = index_path.rfind('/');
+  if (last_slash == std::string::npos) {
+    LOG_WARN("OmegaStreamer open: cannot derive omega_model path from index path %s",
+             index_path.c_str());
+    return 0;
+  }
+
+  std::string model_dir = index_path.substr(0, last_slash) + "/omega_model";
+  std::string model_path = model_dir + "/model.txt";
+  if (!ailego::File::IsExist(model_path)) {
+    LOG_INFO("OmegaStreamer open: no OMEGA model found at %s, using HNSW fallback",
+             model_dir.c_str());
+    return 0;
+  }
+
+  if (!LoadModel(model_dir)) {
+    LOG_WARN("OmegaStreamer open: failed to load OMEGA model from %s, using HNSW fallback",
+             model_dir.c_str());
+  }
+
+  return 0;
+}
+
 int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                                Context::Pointer &context) const {
   return search_impl(query, qmeta, 1, context);
@@ -34,66 +110,83 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                                uint32_t count,
                                Context::Pointer &context) const {
+  // Determine mode: training (no early stopping) vs inference (with early stopping)
+  bool enable_early_stopping = !training_mode_enabled_ && IsModelLoaded();
 
-  // In training mode, use OMEGA library's training feature collection
-  if (!training_mode_enabled_) {
-    // Normal mode: just use parent HNSW search for now
-    // TODO: Load OMEGA model and use adaptive search for inference
-    LOG_DEBUG("OmegaStreamer: training mode disabled, using parent HNSW search");
+  if (training_mode_enabled_) {
+    LOG_DEBUG("OmegaStreamer: training mode, early stopping DISABLED");
+  } else if (enable_early_stopping) {
+    LOG_DEBUG("OmegaStreamer: inference mode with OMEGA model");
+  } else {
+    // No model loaded and not in training mode - use parent HNSW search
+    LOG_DEBUG("OmegaStreamer: no model loaded, using parent HNSW search");
     return HnswStreamer::search_impl(query, qmeta, count, context);
   }
 
+  return omega_search_impl(query, qmeta, count, context, enable_early_stopping);
+}
+
+int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qmeta,
+                                     uint32_t count, Context::Pointer &context,
+                                     bool enable_early_stopping) const {
   // Cast context to OmegaContext to access training_query_id
   auto *omega_ctx = dynamic_cast<OmegaContext*>(context.get());
   int query_id = current_query_id_;  // Default to member variable
   if (omega_ctx != nullptr && omega_ctx->training_query_id() >= 0) {
-    // Use training_query_id from context (for parallel training searches)
     query_id = omega_ctx->training_query_id();
   }
 
-  LOG_INFO("OmegaStreamer: training mode enabled (query_id=%d), using OMEGA library to collect features", query_id);
+  // Get target recall from context if available
+  float target_recall = target_recall_;
+  if (omega_ctx != nullptr) {
+    target_recall = omega_ctx->target_recall();
+  }
 
-  // Training mode: Use OMEGA library with nullptr model (training-only mode)
-  // The OMEGA library will collect training features automatically
+  // Cast context to HnswContext to access HNSW-specific features
+  auto *hnsw_ctx = dynamic_cast<HnswContext*>(context.get());
+  if (hnsw_ctx == nullptr) {
+    LOG_ERROR("Context is not HnswContext");
+    return IndexError_InvalidArgument;
+  }
 
-  // Create OMEGA search context in training mode (model=nullptr)
-  float target_recall = 0.95f;  // Default target recall
-  OmegaSearchHandle omega_search = omega_search_create_with_params(
-      nullptr, target_recall, count, 100);  // model=nullptr for training mode
+  // Create OMEGA search context.
+  // OMEGA's k is the search top-k, not the batch/query count.
+  int omega_topk = static_cast<int>(hnsw_ctx->topk());
+  if (omega_topk <= 0) {
+    omega_topk = static_cast<int>(count);
+  }
+
+  // In training mode: model=nullptr (collect features only)
+  // In inference mode: model=omega_model_ (use for early stopping)
+  OmegaModelHandle model_to_use = enable_early_stopping ? omega_model_ : nullptr;
 
+  OmegaSearchHandle omega_search = omega_search_create_with_params(
+      model_to_use, target_recall, omega_topk, window_size_);
 
   if (omega_search == nullptr) {
-    LOG_ERROR("Failed to create OMEGA search context for training mode");
+    LOG_ERROR("Failed to create OMEGA search context");
     return IndexError_Runtime;
   }
 
-  // Enable training mode (CRITICAL: must be before search)
-  // Get ground truth for this query if available
-  std::vector<int> gt_for_query;
-  if (query_id >= 0 &&
-      static_cast<size_t>(query_id) < training_ground_truth_.size()) {
-    const auto& gt = training_ground_truth_[query_id];
-    gt_for_query.reserve(gt.size());
-    for (uint64_t node_id : gt) {
-      gt_for_query.push_back(static_cast<int>(node_id));
+  // Enable training mode if active (CRITICAL: must be before search)
+  if (training_mode_enabled_) {
+    std::vector<int> gt_for_query;
+    if (query_id >= 0 &&
+        static_cast<size_t>(query_id) < training_ground_truth_.size()) {
+      const auto& gt = training_ground_truth_[query_id];
+      gt_for_query.reserve(gt.size());
+      for (uint64_t node_id : gt) {
+        gt_for_query.push_back(static_cast<int>(node_id));
+      }
     }
-  }
-  omega_search_enable_training(omega_search, query_id,
-                                gt_for_query.data(), gt_for_query.size(),
-                                training_k_train_);
-  LOG_DEBUG("Training mode enabled for query_id=%d with %zu GT nodes",
-            query_id, gt_for_query.size());
-
-  // Cast context to HnswContext to access HNSW-specific features
-  auto *hnsw_ctx = dynamic_cast<HnswContext*>(context.get());
-  if (hnsw_ctx == nullptr) {
-    LOG_ERROR("Context is not HnswContext");
-    omega_search_destroy(omega_search);
-    return IndexError_InvalidArgument;
+    omega_search_enable_training(omega_search, query_id,
+                                 gt_for_query.data(), gt_for_query.size(),
+                                 training_k_train_);
+    LOG_DEBUG("Training mode enabled for query_id=%d with %zu GT nodes",
+              query_id, gt_for_query.size());
   }
 
   // CRITICAL: Update context if it was created by another searcher/streamer
-  // This ensures the entity reference is fresh with correct entry_point
   if (hnsw_ctx->magic() != magic_) {
     int ret = update_context(hnsw_ctx);
     if (ret != 0) {
@@ -102,11 +195,9 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
     }
   }
 
-  // Initialize context for search (CRITICAL: must call before topk_to_result)
+  // Initialize context for search
   hnsw_ctx->clear();
   hnsw_ctx->resize_results(count);
-
-  // Initialize query in distance calculator
   hnsw_ctx->reset_query(query);
 
   // Get entity and distance calculator from context
@@ -120,7 +211,6 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   auto max_level = entity.cur_max_level();
   auto entry_point = entity.entry_point();
 
-
   if (entry_point == kInvalidNodeId) {
     omega_search_destroy(omega_search);
     return 0;
@@ -156,11 +246,10 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
     }
   }
 
-
   // Set dist_start for OMEGA
   omega_search_set_dist_start(omega_search, dist);
 
-  // Now perform HNSW search on layer 0 with OMEGA feature collection
+  // Perform HNSW search on layer 0 with OMEGA
   candidates.clear();
   visit_filter.clear();
   topk_heap.clear();
@@ -171,17 +260,21 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   candidates.emplace(entry_point, dist);
 
   // Report initial visit to OMEGA
-  omega_search_report_visit(omega_search, entry_point, dist, 1);  // is_in_topk=1
+  omega_search_report_visit_candidate(omega_search, entry_point, dist, 1);
 
   dist_t lowerBound = dist;
 
-  // Main search loop with OMEGA feature collection
+  // Main search loop with OMEGA feature collection and early stopping
+  bool early_stop_hit = false;
   while (!candidates.empty()) {
-
     auto top = candidates.begin();
     node_id_t current_node = top->first;
     dist_t candidate_dist = top->second;
 
+    // Reference semantics: count the hop as soon as the current candidate is
+    // examined, before stop-condition evaluation.
+    omega_search_report_hop(omega_search);
+
     // Standard HNSW stopping condition
     if (topk_heap.full() && candidate_dist > lowerBound) {
       break;
@@ -189,9 +282,6 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 
     candidates.pop();
 
-    // Report hop to OMEGA
-    omega_search_report_hop(omega_search);
-
     // Get neighbors of current node
     const Neighbors neighbors = entity.get_neighbors(0, current_node);
     if (neighbors.size() == 0) continue;
@@ -223,14 +313,28 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
       const void *neighbor_vec = neighbor_vec_blocks[i].data();
       dist_t neighbor_dist = dc.dist(neighbor_vec);
 
-      // Check if this node will be in topk
-      bool is_in_topk = (!topk_heap.full() || neighbor_dist < lowerBound);
-
-      // Report visit to OMEGA (this will collect training features)
-      omega_search_report_visit(omega_search, neighbor, neighbor_dist, is_in_topk ? 1 : 0);
+      // Reference semantics:
+      // 1. `should_consider_candidate` is driven by the ef-bounded heap
+      // 2. OMEGA's top-candidate updates are driven by insertion into the
+      //    result-set-sized top-k structure, not by ef admission alone.
+      bool should_consider_candidate =
+          (!topk_heap.full() || neighbor_dist < lowerBound);
+      omega_search_report_visit_candidate(omega_search, neighbor, neighbor_dist,
+                                          should_consider_candidate ? 1 : 0);
+
+      if (enable_early_stopping && omega_search_should_predict(omega_search)) {
+        if (omega_search_should_stop(omega_search)) {
+          int hops, cmps, collected_gt;
+          omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
+          LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
+                    cmps, hops, collected_gt);
+          early_stop_hit = true;
+          break;
+        }
+      }
 
       // Consider this candidate
-      if (is_in_topk) {
+      if (should_consider_candidate) {
         candidates.emplace(neighbor, neighbor_dist);
         topk_heap.emplace(neighbor, neighbor_dist);
 
@@ -239,71 +343,123 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
           lowerBound = neighbor_dist;
         }
 
-        // Update lowerBound to the worst distance in topk if topk is full
         if (topk_heap.full()) {
-          lowerBound = topk_heap[0].second;  // Max heap, so [0] is the worst
+          lowerBound = topk_heap[0].second;
         }
       }
     }
-  }
 
+    if (early_stop_hit) {
+      break;
+    }
+  }
 
   // Convert results to context format
   hnsw_ctx->topk_to_result();
 
   // Get final statistics
   int hops, cmps, collected_gt;
+  float predicted_recall_avg = 0.0f;
+  float predicted_recall_at_target = 0.0f;
+  int omega_early_stop_hit = 0;
+  unsigned long long should_stop_calls = 0;
+  unsigned long long prediction_calls = 0;
+  unsigned long long should_stop_time_ns = 0;
+  unsigned long long prediction_eval_time_ns = 0;
+  unsigned long long sorted_window_time_ns = 0;
+  unsigned long long average_recall_eval_time_ns = 0;
+  unsigned long long prediction_feature_prep_time_ns = 0;
+  unsigned long long collected_gt_advance_count = 0;
+  unsigned long long should_stop_calls_with_advance = 0;
+  unsigned long long max_prediction_calls_per_should_stop = 0;
   omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
-  LOG_DEBUG("OMEGA training search completed: cmps=%d, hops=%d, results=%zu",
-            cmps, hops, topk_heap.size());
-
-  // Collect training records from OMEGA library and store in context (no locks needed)
-  size_t record_count = omega_search_get_training_records_count(omega_search);
-
-  if (record_count > 0 && omega_ctx != nullptr) {
-
-    const void* records_ptr = omega_search_get_training_records(omega_search);
-
-    // NOTE: omega_search_get_training_records returns pointer to std::vector, not array
-    const auto* records_vec = static_cast<const std::vector<omega::TrainingRecord>*>(records_ptr);
-
-    // Convert and store training records in context (per-query, no shared state)
-    for (size_t i = 0; i < record_count; ++i) {
-      const auto& omega_record = (*records_vec)[i];
-      core_interface::TrainingRecord record;
-      record.query_id = omega_record.query_id;
-      record.hops_visited = omega_record.hops;
-      record.cmps_visited = omega_record.cmps;
-      record.dist_1st = omega_record.dist_1st;
-      record.dist_start = omega_record.dist_start;
-
-      // Copy 7 traversal window statistics
-      if (omega_record.traversal_window_stats.size() == 7) {
-        std::copy(omega_record.traversal_window_stats.begin(),
-                  omega_record.traversal_window_stats.end(),
-                  record.traversal_window_stats.begin());
-      } else {
-        LOG_WARN("Unexpected traversal_window_stats size: %zu (expected 7)",
-                 omega_record.traversal_window_stats.size());
-      }
+  omega_search_get_debug_stats(omega_search, &predicted_recall_avg,
+                               &predicted_recall_at_target,
+                               &omega_early_stop_hit,
+                               &should_stop_calls, &prediction_calls,
+                               &should_stop_time_ns,
+                               &prediction_eval_time_ns,
+                               &sorted_window_time_ns,
+                               &average_recall_eval_time_ns,
+                               &prediction_feature_prep_time_ns,
+                               &collected_gt_advance_count,
+                               &should_stop_calls_with_advance,
+                               &max_prediction_calls_per_should_stop);
+  LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu, early_stop=%d",
+            cmps, hops, topk_heap.size(), enable_early_stopping);
+  if (enable_early_stopping) {
+    bool expected = false;
+    if (debug_stats_logged_.compare_exchange_strong(expected, true)) {
+      LOG_WARN("OMEGA runtime stats: model_loaded=%d target_recall=%.4f cmps=%d "
+               "collected_gt=%d predicted_recall_avg=%.4f "
+               "predicted_recall_at_target=%.4f early_stop_hit=%d "
+               "should_stop_calls=%llu prediction_calls=%llu "
+               "advance_calls=%llu collected_gt_advance=%llu "
+               "max_pred_per_stop=%llu should_stop_ms=%.3f "
+               "prediction_eval_ms=%.3f sorted_window_ms=%.3f "
+               "avg_recall_eval_ms=%.3f feature_prep_ms=%.3f",
+               IsModelLoaded() ? 1 : 0, target_recall, cmps, collected_gt,
+               predicted_recall_avg, predicted_recall_at_target,
+               (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
+               should_stop_calls, prediction_calls,
+               should_stop_calls_with_advance, collected_gt_advance_count,
+               max_prediction_calls_per_should_stop,
+               static_cast<double>(should_stop_time_ns) / 1e6,
+               static_cast<double>(prediction_eval_time_ns) / 1e6,
+               static_cast<double>(sorted_window_time_ns) / 1e6,
+               static_cast<double>(average_recall_eval_time_ns) / 1e6,
+               static_cast<double>(prediction_feature_prep_time_ns) / 1e6);
+    }
+  }
 
-      // Label is already computed in real-time during search
-      record.label = omega_record.label;
+  // Collect training records (only in training mode)
+  if (training_mode_enabled_) {
+    size_t record_count = omega_search_get_training_records_count(omega_search);
+
+    if (record_count > 0 && omega_ctx != nullptr) {
+      const void* records_ptr = omega_search_get_training_records(omega_search);
+      const auto* records_vec = static_cast<const std::vector<omega::TrainingRecord>*>(records_ptr);
+
+      for (size_t i = 0; i < record_count; ++i) {
+        const auto& omega_record = (*records_vec)[i];
+        core_interface::TrainingRecord record;
+        record.query_id = omega_record.query_id;
+        record.hops_visited = omega_record.hops;
+        record.cmps_visited = omega_record.cmps;
+        record.dist_1st = omega_record.dist_1st;
+        record.dist_start = omega_record.dist_start;
+
+        if (omega_record.traversal_window_stats.size() == 7) {
+          std::copy(omega_record.traversal_window_stats.begin(),
+                    omega_record.traversal_window_stats.end(),
+                    record.traversal_window_stats.begin());
+        }
+
+        record.label = omega_record.label;
+        omega_ctx->add_training_record(std::move(record));
+      }
 
-      omega_ctx->add_training_record(std::move(record));
+      LOG_DEBUG("Collected %zu training records for query_id=%d", record_count, query_id);
     }
 
-    LOG_DEBUG("Collected %zu training records for query_id=%d (stored in context)",
-              record_count, query_id);
-  } else if (record_count > 0) {
-    LOG_WARN("Training records collected but context is not OmegaContext, records lost");
-  } else {
-    LOG_WARN("No training records collected for query_id=%d", query_id);
+    // Collect gt_cmps data
+    if (omega_ctx != nullptr) {
+      size_t gt_cmps_count = omega_search_get_gt_cmps_count(omega_search);
+      if (gt_cmps_count > 0) {
+        const int* gt_cmps_ptr = omega_search_get_gt_cmps(omega_search);
+        int total_cmps = omega_search_get_total_cmps(omega_search);
+        if (gt_cmps_ptr != nullptr) {
+          std::vector<int> gt_cmps_vec(gt_cmps_ptr, gt_cmps_ptr + gt_cmps_count);
+          for (auto& v : gt_cmps_vec) {
+            if (v < 0) v = total_cmps;
+          }
+          omega_ctx->set_gt_cmps(gt_cmps_vec, total_cmps);
+        }
+      }
+    }
   }
 
-  // Destroy OMEGA search context
   omega_search_destroy(omega_search);
-
   return 0;
 }
 
@@ -319,7 +475,6 @@ IndexStreamer::Context::Pointer OmegaStreamer::create_context(void) const {
     return Context::Pointer();
   }
 
-  // Create OmegaContext instead of HnswContext for OMEGA-specific features
   OmegaContext *ctx =
       new (std::nothrow) OmegaContext(meta_.dimension(), metric_, entity);
   if (ailego_unlikely(ctx == nullptr)) {
@@ -327,7 +482,6 @@ IndexStreamer::Context::Pointer OmegaStreamer::create_context(void) const {
     return Context::Pointer();
   }
 
-  // Copy all HNSW settings from parent
   ctx->set_ef(ef_);
   ctx->set_max_scan_limit(max_scan_limit_);
   ctx->set_min_scan_limit(min_scan_limit_);
@@ -350,14 +504,38 @@ IndexStreamer::Context::Pointer OmegaStreamer::create_context(void) const {
 int OmegaStreamer::dump(const IndexDumper::Pointer &dumper) {
   LOG_INFO("OmegaStreamer dump");
 
-  // Lock the shared mutex (from HnswStreamer base class)
   shared_mutex_.lock();
   AILEGO_DEFER([&]() { shared_mutex_.unlock(); });
 
-  // CRITICAL: Set "OmegaSearcher" instead of "HnswSearcher"
-  // This ensures IndexFlow will create OmegaSearcher (with training support)
-  // when the index is loaded from disk
-  meta_.set_searcher("OmegaSearcher", HnswEntity::kRevision, ailego::Params());
+  // Extract OMEGA params from streamer params and pass to searcher
+  // This ensures OmegaSearcher gets the necessary params when loaded
+  ailego::Params searcher_params;
+  const auto& streamer_params = meta_.streamer_params();
+
+  // Copy omega.* params from streamer to searcher
+  if (streamer_params.has("omega.enabled")) {
+    searcher_params.insert("omega.enabled",
+                           streamer_params.get_as_bool("omega.enabled"));
+  }
+  if (streamer_params.has("omega.min_vector_threshold")) {
+    searcher_params.insert("omega.min_vector_threshold",
+                           streamer_params.get_as_uint32("omega.min_vector_threshold"));
+  }
+  if (streamer_params.has("omega.window_size")) {
+    searcher_params.insert("omega.window_size",
+                           streamer_params.get_as_int32("omega.window_size"));
+  }
+
+  LOG_INFO("OmegaStreamer::dump: passing omega params to searcher "
+           "(enabled=%d, min_threshold=%u, window_size=%d)",
+           searcher_params.has("omega.enabled") ?
+               searcher_params.get_as_bool("omega.enabled") : false,
+           searcher_params.has("omega.min_vector_threshold") ?
+               searcher_params.get_as_uint32("omega.min_vector_threshold") : 0,
+           searcher_params.has("omega.window_size") ?
+               searcher_params.get_as_int32("omega.window_size") : 0);
+
+  meta_.set_searcher("OmegaSearcher", HnswEntity::kRevision, searcher_params);
 
   int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
   if (ret != 0) {
@@ -365,11 +543,9 @@ int OmegaStreamer::dump(const IndexDumper::Pointer &dumper) {
     return ret;
   }
 
-  // Delegate to parent class's entity dump
   return entity_.dump(dumper);
 }
 
-// Register OmegaStreamer with the factory
 INDEX_FACTORY_REGISTER_STREAMER(OmegaStreamer);
 
 }  // namespace core
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index b19d63121..6bd57c22b 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -16,6 +16,8 @@
 #include "../hnsw/hnsw_streamer.h"
 #include "omega_context.h"
 #include <zvec/core/interface/training.h>
+#include <omega/omega_api.h>
+#include <atomic>
 #include <mutex>
 #include <memory>
 
@@ -29,13 +31,18 @@ namespace core {
  * as the searcher type, ensuring that disk-persisted indices will use
  * OmegaSearcher (with training support) when loaded.
  *
- * For in-memory indices, currently delegates to parent HNSW search.
- * Future: Implement adaptive search with OMEGA C API directly.
+ * Supports both training mode (feature collection) and inference mode
+ * (adaptive search with learned early stopping).
  */
 class OmegaStreamer : public HnswStreamer {
  public:
-  OmegaStreamer(void) : HnswStreamer() {}
-  virtual ~OmegaStreamer(void) {}
+  OmegaStreamer(void) : HnswStreamer(), omega_model_(nullptr) {}
+  virtual ~OmegaStreamer(void) {
+    if (omega_model_ != nullptr) {
+      omega_model_destroy(omega_model_);
+      omega_model_ = nullptr;
+    }
+  }
 
   OmegaStreamer(const OmegaStreamer &streamer) = delete;
   OmegaStreamer &operator=(const OmegaStreamer &streamer) = delete;
@@ -49,12 +56,18 @@ class OmegaStreamer : public HnswStreamer {
     training_k_train_ = k_train;
   }
 
+  // Inference mode support
+  bool LoadModel(const std::string& model_dir);
+  bool IsModelLoaded() const;
+  void SetTargetRecall(float target_recall) { target_recall_ = target_recall; }
+  void SetWindowSize(int window_size) { window_size_ = window_size; }
+
  protected:
   /**
-   * @brief Override search to potentially use OMEGA adaptive search
+   * @brief Override search to use OMEGA adaptive search
    *
-   * Currently delegates to parent HNSW search.
-   * Future: Implement OMEGA adaptive search with learned early stopping.
+   * In training mode: collects features without early stopping
+   * In inference mode: uses OMEGA model for adaptive early stopping
    */
   virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                           Context::Pointer &context) const override;
@@ -63,6 +76,11 @@ class OmegaStreamer : public HnswStreamer {
                           uint32_t count,
                           Context::Pointer &context) const override;
 
+  /**
+   * @brief Override open to auto-load omega_model from the index directory.
+   */
+  virtual int open(IndexStorage::Pointer stg) override;
+
   /**
    * @brief Override create_context to return OmegaContext
    */
@@ -74,12 +92,23 @@ class OmegaStreamer : public HnswStreamer {
   virtual int dump(const IndexDumper::Pointer &dumper) override;
 
  private:
-  // Training mode state (for future implementation)
-  bool training_mode_enabled_{false};
-  int current_query_id_{0};
-  std::vector<std::vector<uint64_t>> training_ground_truth_;  // [query_id][rank] = node_id
-  int training_k_train_{1};  // Number of GT nodes to check for label
-  // Note: training records are now stored per-context in OmegaContext, not here
+  // Perform OMEGA adaptive search (shared between training and inference mode)
+  int omega_search_impl(const void *query, const IndexQueryMeta &qmeta,
+                        uint32_t count, Context::Pointer &context,
+                        bool enable_early_stopping) const;
+
+  // Training mode state
+  mutable bool training_mode_enabled_{false};
+  mutable int current_query_id_{0};
+  std::vector<std::vector<uint64_t>> training_ground_truth_;
+  int training_k_train_{1};
+
+  // Inference mode state
+  mutable OmegaModelHandle omega_model_{nullptr};
+  mutable std::mutex model_mutex_;
+  mutable std::atomic<bool> debug_stats_logged_{false};
+  float target_recall_{0.95f};
+  int window_size_{100};
 };
 
 }  // namespace core
diff --git a/src/core/interface/index.cc b/src/core/interface/index.cc
index 41d1669ab..37f95af7c 100644
--- a/src/core/interface/index.cc
+++ b/src/core/interface/index.cc
@@ -661,6 +661,11 @@ int Index::_dense_search(const VectorData &vector_data,
   // Extract training records from context (for OMEGA training mode)
   result->training_records_ = context->take_training_records();
 
+  // Extract gt_cmps data from context (for OMEGA training mode)
+  result->gt_cmps_per_rank_ = context->take_gt_cmps();
+  result->total_cmps_ = context->get_total_cmps();
+  result->training_query_id_ = context->get_training_query_id();
+
   return 0;
 }
 
diff --git a/src/core/utility/buffer_storage.cc b/src/core/utility/buffer_storage.cc
index 4ac3c6b3f..8e46d2506 100644
--- a/src/core/utility/buffer_storage.cc
+++ b/src/core/utility/buffer_storage.cc
@@ -325,6 +325,11 @@ class BufferStorage : public IndexStorage {
     return header_.magic;
   }
 
+  //! Retrieve file path of storage
+  std::string file_path(void) const override {
+    return file_name_;
+  }
+
   uint32_t get_context_offset() {
     return header_.content_offset;
   }
diff --git a/src/core/utility/mmap_file_storage.cc b/src/core/utility/mmap_file_storage.cc
index e4a8e2387..74044f1eb 100644
--- a/src/core/utility/mmap_file_storage.cc
+++ b/src/core/utility/mmap_file_storage.cc
@@ -172,6 +172,7 @@ class MMapFileStorage : public IndexStorage {
 
   //! Open storage
   int open(const std::string &path, bool create) override {
+    file_path_ = path;  // Store the file path for later retrieval
     if (!ailego::File::IsExist(path) && create) {
       size_t last_slash = path.rfind('/');
       if (last_slash != std::string::npos) {
@@ -231,6 +232,11 @@ class MMapFileStorage : public IndexStorage {
     return mapping_.magic();
   }
 
+  //! Retrieve file path of storage
+  std::string file_path(void) const override {
+    return file_path_;
+  }
+
  protected:
   //! Initialize index version segment
   int init_version_segment(void) {
@@ -328,6 +334,7 @@ class MMapFileStorage : public IndexStorage {
   }
 
  private:
+  std::string file_path_{};  // Store the file path for retrieval
   uint32_t segment_meta_capacity_{1024 * 1024};
   bool copy_on_write_{false};
   bool force_flush_{false};
diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt
index fd95c1dcc..7d57a04ef 100644
--- a/src/db/CMakeLists.txt
+++ b/src/db/CMakeLists.txt
@@ -16,7 +16,7 @@ file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h)
 cc_library(
   NAME zvec_db STATIC STRICT SRCS_NO_GLOB
   SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc
-  INCS . ${CMAKE_CURRENT_BINARY_DIR}
+  INCS . ${CMAKE_CURRENT_BINARY_DIR} ${PROJECT_ROOT_DIR}/thirdparty/omega/include
   LIBS 
     zvec_ailego
     zvec_core
diff --git a/src/db/index/column/vector_column/engine_helper.hpp b/src/db/index/column/vector_column/engine_helper.hpp
index e80e0cc2d..b19c6203f 100644
--- a/src/db/index/column/vector_column/engine_helper.hpp
+++ b/src/db/index/column/vector_column/engine_helper.hpp
@@ -380,8 +380,6 @@ class ProximaEngineHelper {
         hnsw_param->params.insert("omega.enabled", true);
         hnsw_param->params.insert("omega.min_vector_threshold",
                                   db_index_params->min_vector_threshold());
-        hnsw_param->params.insert("omega.model_dir",
-                                  db_index_params->model_dir());
         hnsw_param->params.insert("omega.window_size",
                                   db_index_params->window_size());
 
@@ -412,4 +410,4 @@ class ProximaEngineHelper {
     }
   }
 };
-};  // namespace zvec
\ No newline at end of file
+};  // namespace zvec
diff --git a/src/db/index/column/vector_column/vector_column_indexer.cc b/src/db/index/column/vector_column/vector_column_indexer.cc
index 3650bc044..bf7eb6d4e 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.cc
+++ b/src/db/index/column/vector_column/vector_column_indexer.cc
@@ -209,6 +209,16 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
                              std::make_move_iterator(search_result.training_records_.end()));
   }
 
+  // Collect gt_cmps data from search result (for OMEGA training)
+  if (training_mode_enabled_ && !search_result.gt_cmps_per_rank_.empty() &&
+      search_result.training_query_id_ >= 0) {
+    std::lock_guard<std::mutex> lock(training_mutex_);
+    gt_cmps_map_[search_result.training_query_id_] = {
+        std::move(search_result.gt_cmps_per_rank_),
+        search_result.total_cmps_
+    };
+  }
+
   auto result = std::make_shared<VectorIndexResults>(
       is_sparse_, std::move(search_result.doc_list_),
       std::move(search_result.reverted_vector_list_),
@@ -252,6 +262,7 @@ std::vector<core_interface::TrainingRecord> VectorColumnIndexer::GetTrainingReco
 void VectorColumnIndexer::ClearTrainingRecords() {
   std::lock_guard<std::mutex> lock(training_mutex_);
   collected_records_.clear();
+  gt_cmps_map_.clear();
 
   // Propagate to underlying index if it exists and supports training
   if (index != nullptr) {
@@ -271,6 +282,51 @@ void VectorColumnIndexer::SetTrainingGroundTruth(
   }
 }
 
+core_interface::GtCmpsData VectorColumnIndexer::GetGtCmpsData() const {
+  std::lock_guard<std::mutex> lock(training_mutex_);
+
+  core_interface::GtCmpsData result;
+  if (gt_cmps_map_.empty()) {
+    return result;
+  }
+
+  // Find max query_id to determine array size
+  int max_query_id = gt_cmps_map_.rbegin()->first;
+  result.num_queries = max_query_id + 1;
+
+  // Determine topk from first non-empty entry
+  for (const auto& entry : gt_cmps_map_) {
+    if (!entry.second.first.empty()) {
+      result.topk = entry.second.first.size();
+      break;
+    }
+  }
+
+  // Initialize arrays
+  result.gt_cmps.resize(result.num_queries);
+  result.total_cmps.resize(result.num_queries, 0);
+
+  for (size_t q = 0; q < result.num_queries; ++q) {
+    result.gt_cmps[q].resize(result.topk, 0);
+  }
+
+  // Fill in collected data
+  for (const auto& entry : gt_cmps_map_) {
+    int query_id = entry.first;
+    const auto& gt_cmps_vec = entry.second.first;
+    int total = entry.second.second;
+
+    if (query_id >= 0 && query_id < static_cast<int>(result.num_queries)) {
+      result.total_cmps[query_id] = total;
+      for (size_t r = 0; r < gt_cmps_vec.size() && r < result.topk; ++r) {
+        result.gt_cmps[query_id][r] = gt_cmps_vec[r];
+      }
+    }
+  }
+
+  return result;
+}
+
 core_interface::ITrainingCapable* VectorColumnIndexer::GetTrainingCapability() const {
   if (index != nullptr) {
     return index->GetTrainingCapability();
diff --git a/src/db/index/column/vector_column/vector_column_indexer.h b/src/db/index/column/vector_column/vector_column_indexer.h
index d61fc2b2e..98f4a365b 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.h
+++ b/src/db/index/column/vector_column/vector_column_indexer.h
@@ -16,6 +16,7 @@
 #include <utility>
 #include <variant>
 #include <mutex>
+#include <map>
 #include <ailego/parallel/lock.h>
 #include <zvec/ailego/pattern/expected.hpp>
 #include <zvec/ailego/utility/string_helper.h>
@@ -149,6 +150,16 @@ class VectorColumnIndexer {
   void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
                                int k_train = 1);
 
+  /**
+   * @brief Get collected gt_cmps data for all queries.
+   *
+   * Returns the gt_cmps data collected during training searches.
+   * The data is indexed by query_id.
+   *
+   * @return GtCmpsData structure with per-query gt_cmps values
+   */
+  core_interface::GtCmpsData GetGtCmpsData() const;
+
  public:
   std::string index_file_path() const {
     return index_file_path_;
@@ -201,6 +212,8 @@ class VectorColumnIndexer {
   int current_query_id_{0};
   mutable std::mutex training_mutex_;
   mutable std::vector<core_interface::TrainingRecord> collected_records_;
+  // GT cmps data: gt_cmps_map_[query_id] = {gt_cmps_per_rank, total_cmps}
+  mutable std::map<int, std::pair<std::vector<int>, int>> gt_cmps_map_;
 };
 
 
diff --git a/src/db/index/common/proto_converter.cc b/src/db/index/common/proto_converter.cc
index 2c1cb602f..f09f3d7b3 100644
--- a/src/db/index/common/proto_converter.cc
+++ b/src/db/index/common/proto_converter.cc
@@ -83,7 +83,6 @@ OmegaIndexParams::OPtr ProtoConverter::FromPb(
       params_pb.ef_construction(),
       QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()),
       params_pb.min_vector_threshold(),
-      params_pb.model_dir(),
       params_pb.num_training_queries(),
       params_pb.ef_training(),
       params_pb.window_size(),
@@ -101,7 +100,6 @@ proto::OmegaIndexParams ProtoConverter::ToPb(const OmegaIndexParams *params) {
   params_pb.set_ef_construction(params->ef_construction());
   params_pb.set_m(params->m());
   params_pb.set_min_vector_threshold(params->min_vector_threshold());
-  params_pb.set_model_dir(params->model_dir());
   params_pb.set_num_training_queries(params->num_training_queries());
   params_pb.set_ef_training(params->ef_training());
   params_pb.set_window_size(params->window_size());
@@ -322,4 +320,4 @@ proto::SegmentMeta ProtoConverter::ToPb(const SegmentMeta &meta) {
   return meta_pb;
 }
 
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/src/db/training/omega_model_trainer.cc b/src/db/training/omega_model_trainer.cc
index 59a061145..b8fecd1ef 100644
--- a/src/db/training/omega_model_trainer.cc
+++ b/src/db/training/omega_model_trainer.cc
@@ -16,6 +16,7 @@
 
 #include "omega_model_trainer.h"
 #include <omega/omega_trainer.h>
+#include <algorithm>
 #include <chrono>
 #include <zvec/ailego/logger/logger.h>
 
@@ -81,6 +82,20 @@ Status OmegaModelTrainer::TrainModelWithGtCmps(
   for (const auto& r : training_records) {
     omega_records.push_back(ConvertRecord(r));
   }
+  std::sort(omega_records.begin(), omega_records.end(),
+            [](const omega::TrainingRecord& lhs,
+               const omega::TrainingRecord& rhs) {
+              if (lhs.query_id != rhs.query_id) {
+                return lhs.query_id < rhs.query_id;
+              }
+              if (lhs.cmps_visited != rhs.cmps_visited) {
+                return lhs.cmps_visited < rhs.cmps_visited;
+              }
+              if (lhs.hops_visited != rhs.hops_visited) {
+                return lhs.hops_visited < rhs.hops_visited;
+              }
+              return lhs.label < rhs.label;
+            });
 
   // Convert gt_cmps data
   omega::GtCmpsData omega_gt_cmps = ConvertGtCmpsData(gt_cmps_data);
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 98c1c866e..8d43ac59b 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -586,7 +586,8 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
       dim,
       topk,
       omega_metric,
-      held_out_mode);
+      held_out_mode,
+      query_doc_ids);  // Pass query-to-base mapping for correct self-exclusion
 
   auto compute_end = std::chrono::high_resolution_clock::now();
   auto compute_ms = std::chrono::duration_cast<std::chrono::milliseconds>(compute_end - compute_start).count();
@@ -638,9 +639,12 @@ core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
     const std::vector<core_interface::TrainingRecord>& records,
     const std::vector<std::vector<uint64_t>>& ground_truth,
     size_t topk) {
-  // NOTE: gt_cmps computation requires collected_node_ids which was removed
-  // for memory optimization. This function now returns default values based on
-  // record.cmps_visited as a simple approximation.
+  // NOTE: This is a FALLBACK approximation method.
+  // The preferred method is to collect actual gt_cmps during search via
+  // VectorColumnIndexer::GetGtCmpsData(), which tracks the exact cmps value
+  // when each GT rank first enters the topk during HNSW traversal.
+  //
+  // This approximation uses record.cmps_visited as a simple heuristic.
 
   core_interface::GtCmpsData result;
   result.topk = topk;
@@ -933,12 +937,23 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
   LOG_INFO("Collected %zu records: %zu positive, %zu negative (labels computed in real-time)",
            all_records.size(), positive_count, negative_count);
 
-  // Step 8: Compute gt_cmps data
-  LOG_INFO("Computing gt_cmps data");
+  // Step 8: Get gt_cmps data directly from indexers (collected during search)
+  LOG_INFO("Collecting gt_cmps data from indexers");
   core_interface::GtCmpsData gt_cmps_data;
   {
-    ScopedTimer timer("Step8: ComputeGtCmps");
-    gt_cmps_data = ComputeGtCmps(all_records, ground_truth, options.topk);
+    ScopedTimer timer("Step8: GetGtCmpsData");
+    // Get gt_cmps from first indexer (all indexers should have the same data)
+    if (!indexers.empty()) {
+      gt_cmps_data = indexers[0]->GetGtCmpsData();
+      if (gt_cmps_data.gt_cmps.empty()) {
+        // Fallback to approximation if actual data not available
+        LOG_WARN("No actual gt_cmps data collected, falling back to approximation");
+        gt_cmps_data = ComputeGtCmps(all_records, ground_truth, options.topk);
+      } else {
+        LOG_INFO("Got actual gt_cmps data for %zu queries, topk=%zu",
+                 gt_cmps_data.num_queries, gt_cmps_data.topk);
+      }
+    }
   }
 
   // Step 9: Disable training mode and clear records
diff --git a/src/include/zvec/core/framework/index_context.h b/src/include/zvec/core/framework/index_context.h
index 6447999a6..7d185ce0f 100644
--- a/src/include/zvec/core/framework/index_context.h
+++ b/src/include/zvec/core/framework/index_context.h
@@ -258,6 +258,23 @@ class IndexContext {
   //! Clear training records (call before each search if context is reused)
   virtual void clear_training_records() {}
 
+  //! Get gt_cmps data (cmps when each GT rank was found) for OMEGA training
+  //! Returns vector where gt_cmps[rank] = cmps when GT[rank] first entered topk
+  //! Default implementation returns empty vector. Override in OmegaContext.
+  virtual std::vector<int> take_gt_cmps() {
+    return {};
+  }
+
+  //! Get total comparisons for this search (OMEGA training)
+  virtual int get_total_cmps() const {
+    return 0;
+  }
+
+  //! Get training query ID for this search (-1 means not set)
+  virtual int get_training_query_id() const {
+    return -1;
+  }
+
  private:
   //! Members
   IndexFilter filter_{};
diff --git a/src/include/zvec/core/framework/index_storage.h b/src/include/zvec/core/framework/index_storage.h
index 8673d63e6..67282e308 100644
--- a/src/include/zvec/core/framework/index_storage.h
+++ b/src/include/zvec/core/framework/index_storage.h
@@ -266,6 +266,11 @@ class IndexStorage : public IndexModule {
   virtual bool isHugePage(void) const {
     return false;
   }
+
+  //! Retrieve file path of storage (for OMEGA model loading)
+  virtual std::string file_path(void) const {
+    return "";  // Default: empty (not all storages have file paths)
+  }
 };
 
 }  // namespace core
diff --git a/src/include/zvec/core/interface/index.h b/src/include/zvec/core/interface/index.h
index f7df0fe7a..11b6888e3 100644
--- a/src/include/zvec/core/interface/index.h
+++ b/src/include/zvec/core/interface/index.h
@@ -106,6 +106,11 @@ struct SearchResult {
   std::vector<std::string> reverted_sparse_values_list_{};
   // Training records collected during search (for OMEGA training mode)
   std::vector<TrainingRecord> training_records_{};
+  // GT cmps data: cmps value when each GT rank was found (for OMEGA training)
+  // gt_cmps_per_rank_[rank] = cmps when GT[rank] first entered topk (-1 if not found)
+  std::vector<int> gt_cmps_per_rank_{};
+  int total_cmps_{0};  // Total comparisons in this search
+  int training_query_id_{-1};  // Query ID for this search (-1 if not training)
 };
 
 class Index {
diff --git a/src/include/zvec/db/index_params.h b/src/include/zvec/db/index_params.h
index 1310dcf2e..db781977c 100644
--- a/src/include/zvec/db/index_params.h
+++ b/src/include/zvec/db/index_params.h
@@ -324,7 +324,6 @@ class OmegaIndexParams : public VectorIndexParams {
       int ef_construction = core_interface::kDefaultHnswEfConstruction,
       QuantizeType quantize_type = QuantizeType::UNDEFINED,
       uint32_t min_vector_threshold = 100000,
-      const std::string& model_dir = "./omega_models",
       size_t num_training_queries = 1000,
       int ef_training = 1000,
       int window_size = 100,
@@ -333,7 +332,6 @@ class OmegaIndexParams : public VectorIndexParams {
         m_(m),
         ef_construction_(ef_construction),
         min_vector_threshold_(min_vector_threshold),
-        model_dir_(model_dir),
         num_training_queries_(num_training_queries),
         ef_training_(ef_training),
         window_size_(window_size),
@@ -345,8 +343,8 @@ class OmegaIndexParams : public VectorIndexParams {
   Ptr clone() const override {
     return std::make_shared<OmegaIndexParams>(metric_type_, m_, ef_construction_,
                                              quantize_type_, min_vector_threshold_,
-                                             model_dir_, num_training_queries_,
-                                             ef_training_, window_size_, ef_groundtruth_);
+                                             num_training_queries_, ef_training_,
+                                             window_size_, ef_groundtruth_);
   }
 
   std::string to_string() const override {
@@ -355,7 +353,6 @@ class OmegaIndexParams : public VectorIndexParams {
     std::ostringstream oss;
     oss << base_str << ",m:" << m_ << ",ef_construction:" << ef_construction_
         << ",min_vector_threshold:" << min_vector_threshold_
-        << ",model_dir:" << model_dir_
         << ",num_training_queries:" << num_training_queries_
         << ",ef_training:" << ef_training_
         << ",window_size:" << window_size_
@@ -372,8 +369,6 @@ class OmegaIndexParams : public VectorIndexParams {
                static_cast<const OmegaIndexParams &>(other).ef_construction_ &&
            min_vector_threshold_ ==
                static_cast<const OmegaIndexParams &>(other).min_vector_threshold_ &&
-           model_dir_ ==
-               static_cast<const OmegaIndexParams &>(other).model_dir_ &&
            num_training_queries_ ==
                static_cast<const OmegaIndexParams &>(other).num_training_queries_ &&
            ef_training_ ==
@@ -404,12 +399,6 @@ class OmegaIndexParams : public VectorIndexParams {
   uint32_t min_vector_threshold() const {
     return min_vector_threshold_;
   }
-  void set_model_dir(const std::string& model_dir) {
-    model_dir_ = model_dir;
-  }
-  const std::string& model_dir() const {
-    return model_dir_;
-  }
   void set_num_training_queries(size_t num_training_queries) {
     num_training_queries_ = num_training_queries;
   }
@@ -439,11 +428,10 @@ class OmegaIndexParams : public VectorIndexParams {
   int m_;
   int ef_construction_;
   uint32_t min_vector_threshold_;
-  std::string model_dir_;
   size_t num_training_queries_;
   int ef_training_;
   int window_size_;
   int ef_groundtruth_;  // 0 = brute force, >0 = use HNSW with this ef
 };
 
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/thirdparty/omega b/thirdparty/omega
index 9bbb1157c..aace108d5 160000
--- a/thirdparty/omega
+++ b/thirdparty/omega
@@ -1 +1 @@
-Subproject commit 9bbb1157c56db05bd0d2aac2d32151bc704fb38e
+Subproject commit aace108d551d2e4b3515789f432e94a5d4bad16b
diff --git a/tools/core/CMakeLists.txt b/tools/core/CMakeLists.txt
index 46efc39f3..d561312fb 100644
--- a/tools/core/CMakeLists.txt
+++ b/tools/core/CMakeLists.txt
@@ -33,6 +33,14 @@ cc_binary(
     LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_cluster core_knn_ivf roaring core_interface
 )
 
+cc_binary(
+    NAME omega_predict_microbench
+    STRICT PACKED
+    SRCS omega_predict_microbench.cc
+    INCS ${PROJECT_ROOT_DIR}/src/core/ ${PROJECT_ROOT_DIR}/thirdparty/omega/include
+    LIBS omega
+)
+
 
 cc_binary(
     NAME recall_original
diff --git a/tools/core/omega_predict_microbench.cc b/tools/core/omega_predict_microbench.cc
new file mode 100644
index 000000000..04d913b68
--- /dev/null
+++ b/tools/core/omega_predict_microbench.cc
@@ -0,0 +1,267 @@
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <chrono>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <iomanip>
+#include <iostream>
+#include <random>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "omega/model_manager.h"
+
+namespace {
+
+struct Options {
+  std::string model_dir;
+  uint64_t iterations = 1000000;
+  uint64_t warmup = 10000;
+  int threads = 1;
+  size_t feature_pool_size = 1024;
+  bool random_features = false;
+};
+
+struct Stats {
+  double elapsed_sec = 0.0;
+  double avg_us_per_call = 0.0;
+  double qps = 0.0;
+  double checksum = 0.0;
+};
+
+void PrintUsage(const char* argv0) {
+  std::cerr
+      << "Usage: " << argv0 << " --model-dir <path> [options]\n"
+      << "Options:\n"
+      << "  --iterations <n>         Total measured calls across all threads\n"
+      << "  --warmup <n>             Warmup calls per thread\n"
+      << "  --threads <n>            Number of benchmark threads\n"
+      << "  --feature-pool-size <n>  Number of synthetic feature rows\n"
+      << "  --random-features        Use random synthetic features\n";
+}
+
+bool ParseArgs(int argc, char** argv, Options* opts) {
+  for (int i = 1; i < argc; ++i) {
+    const char* arg = argv[i];
+    if (std::strcmp(arg, "--model-dir") == 0 && i + 1 < argc) {
+      opts->model_dir = argv[++i];
+    } else if (std::strcmp(arg, "--iterations") == 0 && i + 1 < argc) {
+      opts->iterations = std::strtoull(argv[++i], nullptr, 10);
+    } else if (std::strcmp(arg, "--warmup") == 0 && i + 1 < argc) {
+      opts->warmup = std::strtoull(argv[++i], nullptr, 10);
+    } else if (std::strcmp(arg, "--threads") == 0 && i + 1 < argc) {
+      opts->threads = std::max(1, std::atoi(argv[++i]));
+    } else if (std::strcmp(arg, "--feature-pool-size") == 0 && i + 1 < argc) {
+      opts->feature_pool_size =
+          std::max<uint64_t>(1, std::strtoull(argv[++i], nullptr, 10));
+    } else if (std::strcmp(arg, "--random-features") == 0) {
+      opts->random_features = true;
+    } else if (std::strcmp(arg, "--help") == 0 ||
+               std::strcmp(arg, "-h") == 0) {
+      PrintUsage(argv[0]);
+      return false;
+    } else {
+      std::cerr << "Unknown argument: " << arg << "\n";
+      PrintUsage(argv[0]);
+      return false;
+    }
+  }
+  if (opts->model_dir.empty()) {
+    PrintUsage(argv[0]);
+    return false;
+  }
+  return true;
+}
+
+std::vector<std::array<float, 11>> BuildFeaturePool(const Options& opts) {
+  std::vector<std::array<float, 11>> pool(opts.feature_pool_size);
+  std::mt19937 rng(12345);
+  std::uniform_real_distribution<float> frac_dist(0.0f, 1.0f);
+  std::uniform_int_distribution<int> hops_dist(3, 64);
+  std::uniform_int_distribution<int> cmps_dist(150, 5000);
+
+  for (size_t i = 0; i < pool.size(); ++i) {
+    auto& f = pool[i];
+    if (opts.random_features) {
+      f[0] = static_cast<float>(hops_dist(rng));
+      f[1] = static_cast<float>(cmps_dist(rng));
+      f[2] = 0.10f + 0.25f * frac_dist(rng);
+      f[3] = 0.20f + 0.30f * frac_dist(rng);
+      f[4] = 0.10f + 0.20f * frac_dist(rng);
+      f[5] = 0.0005f + 0.02f * frac_dist(rng);
+      f[6] = 0.01f + 0.08f * frac_dist(rng);
+      f[7] = 0.20f + 0.30f * frac_dist(rng);
+      f[8] = 0.11f + 0.20f * frac_dist(rng);
+      f[9] = 0.10f + 0.18f * frac_dist(rng);
+      f[10] = 0.13f + 0.24f * frac_dist(rng);
+    } else {
+      f = {20.0f + static_cast<float>(i % 7),
+           1800.0f + static_cast<float>((i * 37) % 700),
+           0.125f + 0.001f * static_cast<float>(i % 11),
+           0.337f + 0.001f * static_cast<float>(i % 13),
+           0.182f + 0.001f * static_cast<float>(i % 17),
+           0.008f + 0.0001f * static_cast<float>(i % 19),
+           0.091f + 0.0007f * static_cast<float>(i % 23),
+           0.304f + 0.0008f * static_cast<float>(i % 29),
+           0.171f + 0.0005f * static_cast<float>(i % 31),
+           0.149f + 0.0005f * static_cast<float>(i % 37),
+           0.212f + 0.0006f * static_cast<float>(i % 41)};
+    }
+  }
+  return pool;
+}
+
+float CalibrateProbability(const omega::ModelTables& tables, double probability) {
+  if (tables.threshold_table.empty()) {
+    return static_cast<float>(probability);
+  }
+  int score_key = static_cast<int>(std::round(probability * 10000.0));
+  auto it = tables.threshold_table.upper_bound(score_key);
+  if (it != tables.threshold_table.begin()) {
+    --it;
+  }
+  return it->second;
+}
+
+template <typename Fn>
+Stats RunBenchmark(const std::string& name, const Options& opts, Fn fn) {
+  const uint64_t total_iterations = std::max<uint64_t>(1, opts.iterations);
+  const int thread_count = std::max(1, opts.threads);
+  const uint64_t base_iters = total_iterations / thread_count;
+  const uint64_t extra_iters = total_iterations % thread_count;
+
+  std::atomic<int> ready{0};
+  std::atomic<bool> go{false};
+  std::vector<std::thread> workers;
+  std::vector<double> checksums(thread_count, 0.0);
+
+  auto start = std::chrono::steady_clock::time_point{};
+  auto end = std::chrono::steady_clock::time_point{};
+
+  for (int tid = 0; tid < thread_count; ++tid) {
+    workers.emplace_back([&, tid]() {
+      const uint64_t iters = base_iters + (static_cast<uint64_t>(tid) < extra_iters ? 1 : 0);
+      double local_sum = 0.0;
+      for (uint64_t i = 0; i < opts.warmup; ++i) {
+        local_sum += fn(tid, i);
+      }
+      ready.fetch_add(1, std::memory_order_release);
+      while (!go.load(std::memory_order_acquire)) {
+      }
+      for (uint64_t i = 0; i < iters; ++i) {
+        local_sum += fn(tid, i + opts.warmup);
+      }
+      checksums[tid] = local_sum;
+    });
+  }
+
+  while (ready.load(std::memory_order_acquire) != thread_count) {
+  }
+  start = std::chrono::steady_clock::now();
+  go.store(true, std::memory_order_release);
+
+  for (auto& worker : workers) {
+    worker.join();
+  }
+  end = std::chrono::steady_clock::now();
+
+  const double elapsed_sec =
+      std::chrono::duration_cast<std::chrono::duration<double>>(end - start)
+          .count();
+  double checksum = 0.0;
+  for (double value : checksums) {
+    checksum += value;
+  }
+
+  Stats stats;
+  stats.elapsed_sec = elapsed_sec;
+  stats.avg_us_per_call = elapsed_sec * 1e6 / static_cast<double>(total_iterations);
+  stats.qps = static_cast<double>(total_iterations) / elapsed_sec;
+  stats.checksum = checksum;
+
+  std::cout << std::fixed << std::setprecision(3)
+            << name << ": total_calls=" << total_iterations
+            << " threads=" << thread_count
+            << " elapsed_s=" << stats.elapsed_sec
+            << " avg_us_per_call=" << stats.avg_us_per_call
+            << " qps=" << stats.qps
+            << " checksum=" << stats.checksum << "\n";
+
+  return stats;
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+  Options opts;
+  if (!ParseArgs(argc, argv, &opts)) {
+    return 1;
+  }
+
+  omega::ModelManager manager;
+  if (!manager.LoadModel(opts.model_dir)) {
+    std::cerr << "Failed to load model from " << opts.model_dir << "\n";
+    return 2;
+  }
+
+  const omega::GBDTModel* model = manager.GetModel();
+  const omega::ModelTables* tables = manager.GetTables();
+  if (model == nullptr || tables == nullptr || !model->IsLoaded()) {
+    std::cerr << "Model manager did not return a loaded model\n";
+    return 3;
+  }
+
+  auto feature_pool = BuildFeaturePool(opts);
+  std::vector<std::array<double, 11>> feature_pool_double(feature_pool.size());
+  for (size_t i = 0; i < feature_pool.size(); ++i) {
+    for (size_t j = 0; j < feature_pool[i].size(); ++j) {
+      feature_pool_double[i][j] = static_cast<double>(feature_pool[i][j]);
+    }
+  }
+
+  std::cout << "OMEGA prediction microbenchmark\n";
+  std::cout << "model_dir=" << opts.model_dir
+            << " iterations=" << opts.iterations
+            << " warmup=" << opts.warmup
+            << " threads=" << opts.threads
+            << " feature_pool_size=" << opts.feature_pool_size
+            << " random_features=" << (opts.random_features ? 1 : 0) << "\n";
+
+  RunBenchmark("pack_only", opts, [&](int tid, uint64_t iter) -> double {
+    const auto& src = feature_pool[(iter + static_cast<uint64_t>(tid)) % feature_pool.size()];
+    std::array<double, 11> dst{};
+    for (size_t j = 0; j < src.size(); ++j) {
+      dst[j] = static_cast<double>(src[j]);
+    }
+    return dst[0] + dst[10];
+  });
+
+  RunBenchmark("predict_raw_prebuilt", opts, [&](int tid, uint64_t iter) -> double {
+    const auto& features =
+        feature_pool_double[(iter + static_cast<uint64_t>(tid)) % feature_pool_double.size()];
+    return model->PredictRaw(features.data(), static_cast<int32_t>(features.size()));
+  });
+
+  RunBenchmark("predict_prob_prebuilt", opts, [&](int tid, uint64_t iter) -> double {
+    const auto& features =
+        feature_pool_double[(iter + static_cast<uint64_t>(tid)) % feature_pool_double.size()];
+    return model->Predict(features.data(), static_cast<int32_t>(features.size()));
+  });
+
+  RunBenchmark("predict_calibrated_pack", opts, [&](int tid, uint64_t iter) -> double {
+    const auto& src = feature_pool[(iter + static_cast<uint64_t>(tid)) % feature_pool.size()];
+    std::array<double, 11> dst{};
+    for (size_t j = 0; j < src.size(); ++j) {
+      dst[j] = static_cast<double>(src[j]);
+    }
+    double raw_score =
+        model->PredictRaw(dst.data(), static_cast<int32_t>(dst.size()));
+    double probability = 1.0 / (1.0 + std::exp(-raw_score));
+    return CalibrateProbability(*tables, probability);
+  });
+
+  return 0;
+}

From f770de34e6c42668ec1f74fb74ab03823014ab5f Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 19 Mar 2026 17:02:51 +0800
Subject: [PATCH 014/126] perf(omega): batch OMEGA distance evaluation, clean
 temporary training hooks, and add query-side profiling

---
 python/zvec/__init__.py                       |   1 -
 src/binding/python/binding.cc                 |   1 +
 src/binding/python/model/python_collection.cc |   2 +-
 src/core/algorithm/hnsw/hnsw_context.h        |   9 +-
 .../algorithm/hnsw/hnsw_dist_calculator.h     |  21 +-
 src/core/algorithm/hnsw/hnsw_streamer.cc      |  66 ++-
 src/core/algorithm/omega/omega_searcher.cc    |  69 +--
 src/core/algorithm/omega/omega_streamer.cc    | 172 ++++++-
 src/core/algorithm/omega/omega_streamer.h     |   1 +
 src/core/interface/indexes/hnsw_index.cc      |  15 +-
 src/db/collection.cc                          |   7 +-
 .../vector_column/vector_column_indexer.cc    |   7 +
 src/db/index/segment/segment.cc               |   8 +-
 src/db/training/omega_model_trainer.h         |   1 +
 src/db/training/training_data_collector.cc    | 467 +++++++++---------
 src/db/training/training_data_collector.h     |   9 +
 src/include/zvec/db/collection.h              |   2 +-
 src/include/zvec/db/options.h                 |   4 +-
 thirdparty/omega                              |   2 +-
 tools/core/CMakeLists.txt                     |   1 -
 20 files changed, 565 insertions(+), 300 deletions(-)

diff --git a/python/zvec/__init__.py b/python/zvec/__init__.py
index cb545a1b1..497e0c7a4 100644
--- a/python/zvec/__init__.py
+++ b/python/zvec/__init__.py
@@ -71,7 +71,6 @@
 
 # —— lifecycle ——
 from .zvec import create_and_open, init, open
-
 # ==============================
 # Public interface declaration
 # ==============================
diff --git a/src/binding/python/binding.cc b/src/binding/python/binding.cc
index ed8d6918d..091878152 100644
--- a/src/binding/python/binding.cc
+++ b/src/binding/python/binding.cc
@@ -18,6 +18,7 @@
 #include "python_param.h"
 #include "python_schema.h"
 #include "python_type.h"
+#include <pybind11/stl.h>
 
 namespace zvec {
 PYBIND11_MODULE(_zvec, m) {
diff --git a/src/binding/python/model/python_collection.cc b/src/binding/python/model/python_collection.cc
index 838cfb0ad..32fe7a919 100644
--- a/src/binding/python/model/python_collection.cc
+++ b/src/binding/python/model/python_collection.cc
@@ -215,4 +215,4 @@ void ZVecPyCollection::bind_dql_methods(
            });
 }
 
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/src/core/algorithm/hnsw/hnsw_context.h b/src/core/algorithm/hnsw/hnsw_context.h
index 3358e8632..28fed0e2e 100644
--- a/src/core/algorithm/hnsw/hnsw_context.h
+++ b/src/core/algorithm/hnsw/hnsw_context.h
@@ -98,8 +98,9 @@ class HnswContext : public IndexContext {
     char buf[4096];
     size_t size = snprintf(
         buf, sizeof(buf),
-        "scan_cnt=%zu,get_vector_cnt=%u,get_neighbors_cnt=%u,dup_node=%u",
-        get_scan_num(), stats_get_vector_cnt_, stats_get_neighbors_cnt_,
+        "scan_cnt=%zu,pairwise_dist_cnt=%zu,get_vector_cnt=%u,get_neighbors_cnt=%u,dup_node=%u",
+        get_scan_num(), get_pairwise_dist_num(),
+        stats_get_vector_cnt_, stats_get_neighbors_cnt_,
         stats_visit_dup_cnt_);
     return std::string(buf, size);
   }
@@ -403,6 +404,10 @@ class HnswContext : public IndexContext {
     return dc_.compare_cnt();
   }
 
+  inline uint64_t get_pairwise_dist_num() const {
+    return dc_.pairwise_dist_cnt();
+  }
+
   inline uint64_t reach_scan_limit() const {
     return dc_.compare_cnt() >= max_scan_num_;
   }
diff --git a/src/core/algorithm/hnsw/hnsw_dist_calculator.h b/src/core/algorithm/hnsw/hnsw_dist_calculator.h
index 84faba40b..85e0ab8fc 100644
--- a/src/core/algorithm/hnsw/hnsw_dist_calculator.h
+++ b/src/core/algorithm/hnsw/hnsw_dist_calculator.h
@@ -40,7 +40,8 @@ class HnswDistCalculator {
         batch_distance_(metric->batch_distance()),
         query_(nullptr),
         dim_(dim),
-        compare_cnt_(0) {}
+        compare_cnt_(0),
+        pairwise_dist_cnt_(0) {}
 
   //! Constructor
   HnswDistCalculator(const HnswEntity *entity,
@@ -51,7 +52,8 @@ class HnswDistCalculator {
         batch_distance_(metric->batch_distance()),
         query_(query),
         dim_(dim),
-        compare_cnt_(0) {}
+        compare_cnt_(0),
+        pairwise_dist_cnt_(0) {}
 
   //! Constructor
   HnswDistCalculator(const HnswEntity *entity,
@@ -61,7 +63,8 @@ class HnswDistCalculator {
         batch_distance_(metric->batch_distance()),
         query_(nullptr),
         dim_(0),
-        compare_cnt_(0) {}
+        compare_cnt_(0),
+        pairwise_dist_cnt_(0) {}
 
   void update(const HnswEntity *entity, const IndexMetric::Pointer &metric) {
     entity_ = entity;
@@ -108,6 +111,7 @@ class HnswDistCalculator {
   //! Returns distance between query and vec.
   inline dist_t dist(const void *vec) {
     compare_cnt_++;
+    pairwise_dist_cnt_++;
 
     return dist(vec, query_);
   }
@@ -115,6 +119,7 @@ class HnswDistCalculator {
   //! Return distance between query and node id.
   inline dist_t dist(node_id_t id) {
     compare_cnt_++;
+    pairwise_dist_cnt_++;
 
     const void *feat = entity_->get_vector(id);
     if (ailego_unlikely(feat == nullptr)) {
@@ -129,6 +134,7 @@ class HnswDistCalculator {
   //! Return dist node lhs between node rhs
   inline dist_t dist(node_id_t lhs, node_id_t rhs) {
     compare_cnt_++;
+    pairwise_dist_cnt_++;
 
     const void *feat = entity_->get_vector(lhs);
     const void *query = entity_->get_vector(rhs);
@@ -155,12 +161,14 @@ class HnswDistCalculator {
 
   void batch_dist(const void **vecs, size_t num, dist_t *distances) {
     compare_cnt_++;
+    pairwise_dist_cnt_ += num;
 
     batch_distance_(vecs, query_, num, dim_, distances);
   }
 
   inline dist_t batch_dist(node_id_t id) {
     compare_cnt_++;
+    pairwise_dist_cnt_++;
 
     const void *feat = entity_->get_vector(id);
     if (ailego_unlikely(feat == nullptr)) {
@@ -176,11 +184,13 @@ class HnswDistCalculator {
 
   inline void clear() {
     compare_cnt_ = 0;
+    pairwise_dist_cnt_ = 0;
     error_ = false;
   }
 
   inline void clear_compare_cnt() {
     compare_cnt_ = 0;
+    pairwise_dist_cnt_ = 0;
   }
 
   inline bool error() const {
@@ -192,6 +202,10 @@ class HnswDistCalculator {
     return compare_cnt_;
   }
 
+  inline uint64_t pairwise_dist_cnt() const {
+    return pairwise_dist_cnt_;
+  }
+
   inline uint32_t dimension() const {
     return dim_;
   }
@@ -210,6 +224,7 @@ class HnswDistCalculator {
   uint32_t dim_;
 
   uint32_t compare_cnt_;  // record distance compute times
+  uint64_t pairwise_dist_cnt_;  // record actual pairwise distance work
   // uint32_t compare_cnt_batch_;  // record batch distance compute time
   bool error_{false};
 };
diff --git a/src/core/algorithm/hnsw/hnsw_streamer.cc b/src/core/algorithm/hnsw/hnsw_streamer.cc
index be01f5d0d..bd5a2e39d 100644
--- a/src/core/algorithm/hnsw/hnsw_streamer.cc
+++ b/src/core/algorithm/hnsw/hnsw_streamer.cc
@@ -13,6 +13,9 @@
 // limitations under the License.
 #include "hnsw_streamer.h"
 #include <iostream>
+#include <atomic>
+#include <chrono>
+#include <cstdlib>
 #include <ailego/internal/cpu_features.h>
 #include <ailego/pattern/defer.h>
 #include <ailego/utility/memory_helper.h>
@@ -25,6 +28,43 @@
 namespace zvec {
 namespace core {
 
+namespace {
+
+bool ShouldLogHnswQueryStats(uint64_t query_seq) {
+  static const bool enabled = []() {
+    const char* value = std::getenv("ZVEC_HNSW_LOG_QUERY_STATS");
+    if (value == nullptr) {
+      return false;
+    }
+    return std::string(value) != "0";
+  }();
+  if (!enabled) {
+    return false;
+  }
+
+  static const uint64_t limit = []() -> uint64_t {
+    const char* value = std::getenv("ZVEC_HNSW_LOG_QUERY_LIMIT");
+    if (value == nullptr || *value == '\0') {
+      return std::numeric_limits<uint64_t>::max();
+    }
+    char* end = nullptr;
+    unsigned long long parsed = std::strtoull(value, &end, 10);
+    if (end == value) {
+      return std::numeric_limits<uint64_t>::max();
+    }
+    return static_cast<uint64_t>(parsed);
+  }();
+
+  return query_seq < limit;
+}
+
+std::atomic<uint64_t>& HnswQueryStatsSequence() {
+  static std::atomic<uint64_t> sequence{0};
+  return sequence;
+}
+
+}  // namespace
+
 HnswStreamer::HnswStreamer() : entity_(stats_) {}
 
 HnswStreamer::~HnswStreamer() {
@@ -620,12 +660,24 @@ int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   ctx->resize_results(count);
   ctx->check_need_adjuct_ctx(entity_.doc_cnt());
   for (size_t q = 0; q < count; ++q) {
+    auto query_start = std::chrono::steady_clock::now();
     ctx->reset_query(query);
     ret = alg_->search(ctx);
     if (ailego_unlikely(ret != 0)) {
       LOG_ERROR("Hnsw searcher fast search failed");
       return ret;
     }
+    auto query_latency_ns =
+        std::chrono::duration_cast<std::chrono::nanoseconds>(
+            std::chrono::steady_clock::now() - query_start)
+            .count();
+    uint64_t query_seq = HnswQueryStatsSequence().fetch_add(1);
+    if (ShouldLogHnswQueryStats(query_seq)) {
+      LOG_INFO("HNSW query stats: query_seq=%llu cmps=%zu pairwise_dist_cnt=%zu latency_ms=%.3f",
+               static_cast<unsigned long long>(query_seq), ctx->get_scan_num(),
+               ctx->get_pairwise_dist_num(),
+               static_cast<double>(query_latency_ns) / 1e6);
+    }
     ctx->topk_to_result(q);
     query = static_cast<const char *>(query) + qmeta.element_size();
   }
@@ -728,6 +780,7 @@ int HnswStreamer::search_bf_impl(
     auto &topk = ctx->topk_heap();
 
     for (size_t q = 0; q < count; ++q) {
+      auto query_start = std::chrono::steady_clock::now();
       ctx->reset_query(query);
       topk.clear();
       for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
@@ -740,6 +793,17 @@ int HnswStreamer::search_bf_impl(
           topk.emplace(id, dist);
         }
       }
+      auto query_latency_ns =
+          std::chrono::duration_cast<std::chrono::nanoseconds>(
+              std::chrono::steady_clock::now() - query_start)
+              .count();
+      uint64_t query_seq = HnswQueryStatsSequence().fetch_add(1);
+      if (ShouldLogHnswQueryStats(query_seq)) {
+        LOG_INFO("HNSW query stats: query_seq=%llu cmps=%zu pairwise_dist_cnt=%zu latency_ms=%.3f",
+                 static_cast<unsigned long long>(query_seq), ctx->get_scan_num(),
+                 ctx->get_pairwise_dist_num(),
+                 static_cast<double>(query_latency_ns) / 1e6);
+      }
       ctx->topk_to_result(q);
       query = static_cast<const char *>(query) + qmeta.element_size();
     }
@@ -849,4 +913,4 @@ int HnswStreamer::search_bf_by_p_keys_impl(
 INDEX_FACTORY_REGISTER_STREAMER(HnswStreamer);
 
 }  // namespace core
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index bda22393e..2e572f59a 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -239,6 +239,11 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     return IndexError_InvalidArgument;
   }
 
+  int query_id = current_query_id_;
+  if (omega_ctx->training_query_id() >= 0) {
+    query_id = omega_ctx->training_query_id();
+  }
+
   // Read target_recall from context (per-query parameter)
   float target_recall = omega_ctx->target_recall();
 
@@ -249,11 +254,9 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     omega_topk = static_cast<int>(count);
   }
 
-  // In training mode, pass NULL if model is not loaded
-  OmegaModelHandle model_to_use = omega_model_;
-  if (training_mode_enabled_ && (omega_model_ == nullptr || !omega_model_is_loaded(omega_model_))) {
-    model_to_use = nullptr;  // Training mode without model: collect features only
-  }
+  // Match OmegaStreamer/reference behavior:
+  // training mode collects features only and must not run model inference.
+  OmegaModelHandle model_to_use = training_mode_enabled_ ? nullptr : omega_model_;
 
   OmegaSearchHandle omega_search = omega_search_create_with_params(
       model_to_use, target_recall, omega_topk, window_size_);
@@ -267,19 +270,19 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   if (training_mode_enabled_) {
     // Get ground truth for this query if available
     std::vector<int> gt_for_query;
-    if (current_query_id_ >= 0 &&
-        static_cast<size_t>(current_query_id_) < training_ground_truth_.size()) {
-      const auto& gt = training_ground_truth_[current_query_id_];
+    if (query_id >= 0 &&
+        static_cast<size_t>(query_id) < training_ground_truth_.size()) {
+      const auto& gt = training_ground_truth_[query_id];
       gt_for_query.reserve(gt.size());
       for (uint64_t node_id : gt) {
         gt_for_query.push_back(static_cast<int>(node_id));
       }
     }
-    omega_search_enable_training(omega_search, current_query_id_,
+    omega_search_enable_training(omega_search, query_id,
                                   gt_for_query.data(), gt_for_query.size(),
                                   training_k_train_);
     LOG_DEBUG("Training mode enabled for query_id=%d with %zu GT nodes",
-              current_query_id_, gt_for_query.size());
+              query_id, gt_for_query.size());
   }
 
   // OmegaContext extends HnswContext, so we can use it directly
@@ -450,38 +453,48 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     size_t record_count = omega_search_get_training_records_count(omega_search);
     if (record_count > 0) {
       const void* records_ptr = omega_search_get_training_records(omega_search);
+      const auto* records_vec =
+          static_cast<const std::vector<omega::TrainingRecord>*>(records_ptr);
 
-      // Cast to omega::TrainingRecord array
-      const auto* omega_records = static_cast<const omega::TrainingRecord*>(records_ptr);
-
-      // Convert and store training records
-      std::lock_guard<std::mutex> lock(training_mutex_);
       for (size_t i = 0; i < record_count; ++i) {
+        const auto& omega_record = (*records_vec)[i];
         core_interface::TrainingRecord record;
-        record.query_id = omega_records[i].query_id;
-        record.hops_visited = omega_records[i].hops;
-        record.cmps_visited = omega_records[i].cmps;
-        record.dist_1st = omega_records[i].dist_1st;
-        record.dist_start = omega_records[i].dist_start;
+        record.query_id = omega_record.query_id;
+        record.hops_visited = omega_record.hops;
+        record.cmps_visited = omega_record.cmps;
+        record.dist_1st = omega_record.dist_1st;
+        record.dist_start = omega_record.dist_start;
 
         // Copy 7 traversal window statistics
-        if (omega_records[i].traversal_window_stats.size() == 7) {
-          std::copy(omega_records[i].traversal_window_stats.begin(),
-                    omega_records[i].traversal_window_stats.end(),
+        if (omega_record.traversal_window_stats.size() == 7) {
+          std::copy(omega_record.traversal_window_stats.begin(),
+                    omega_record.traversal_window_stats.end(),
                     record.traversal_window_stats.begin());
         } else {
           LOG_WARN("Unexpected traversal_window_stats size: %zu (expected 7)",
-                   omega_records[i].traversal_window_stats.size());
+                   omega_record.traversal_window_stats.size());
         }
 
         // Label is already computed in real-time during search
-        record.label = omega_records[i].label;
-
-        collected_records_.push_back(std::move(record));
+        record.label = omega_record.label;
+        omega_ctx->add_training_record(std::move(record));
       }
 
       LOG_DEBUG("Collected %zu training records for query_id=%d",
-                record_count, current_query_id_);
+                record_count, query_id);
+    }
+
+    size_t gt_cmps_count = omega_search_get_gt_cmps_count(omega_search);
+    if (gt_cmps_count > 0) {
+      const int* gt_cmps_ptr = omega_search_get_gt_cmps(omega_search);
+      int total_cmps = omega_search_get_total_cmps(omega_search);
+      if (gt_cmps_ptr != nullptr) {
+        std::vector<int> gt_cmps_vec(gt_cmps_ptr, gt_cmps_ptr + gt_cmps_count);
+        for (auto& v : gt_cmps_vec) {
+          if (v < 0) v = total_cmps;
+        }
+        omega_ctx->set_gt_cmps(gt_cmps_vec, total_cmps);
+      }
     }
   }
 
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index dfaf2d910..d39ec56b9 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -23,10 +23,45 @@
 #include "omega_params.h"
 #include <omega/omega_api.h>
 #include <omega/search_context.h>
+#include <chrono>
+#include <cstdlib>
 
 namespace zvec {
 namespace core {
 
+namespace {
+
+bool ShouldLogEveryQueryStats() {
+  const char* value = std::getenv("ZVEC_OMEGA_LOG_QUERY_STATS");
+  if (value == nullptr) {
+    return false;
+  }
+  return std::string(value) != "0";
+}
+
+uint64_t GetQueryStatsLimit() {
+  const char* value = std::getenv("ZVEC_OMEGA_LOG_QUERY_LIMIT");
+  if (value == nullptr || *value == '\0') {
+    return 0;
+  }
+  char* end = nullptr;
+  unsigned long long parsed = std::strtoull(value, &end, 10);
+  if (end == value) {
+    return 0;
+  }
+  return static_cast<uint64_t>(parsed);
+}
+
+bool ShouldLogQueryStats(uint64_t query_seq) {
+  if (!ShouldLogEveryQueryStats()) {
+    return false;
+  }
+  uint64_t limit = GetQueryStatsLimit();
+  return limit == 0 || query_seq < limit;
+}
+
+}  // namespace
+
 bool OmegaStreamer::LoadModel(const std::string& model_dir) {
   std::lock_guard<std::mutex> lock(model_mutex_);
 
@@ -60,6 +95,7 @@ bool OmegaStreamer::IsModelLoaded() const {
 int OmegaStreamer::open(IndexStorage::Pointer stg) {
   std::string index_path = stg ? stg->file_path() : "";
   debug_stats_logged_.store(false);
+  query_stats_sequence_.store(0);
 
   int ret = HnswStreamer::open(std::move(stg));
   if (ret != 0) {
@@ -129,6 +165,9 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qmeta,
                                      uint32_t count, Context::Pointer &context,
                                      bool enable_early_stopping) const {
+  auto query_start = std::chrono::steady_clock::now();
+  uint64_t omega_control_time_ns = 0;
+
   // Cast context to OmegaContext to access training_query_id
   auto *omega_ctx = dynamic_cast<OmegaContext*>(context.get());
   int query_id = current_query_id_;  // Default to member variable
@@ -232,9 +271,16 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     }
 
     bool find_closer = false;
+    float dists[neighbors.size()];
+    const void *neighbor_vecs[neighbors.size()];
     for (uint32_t i = 0; i < neighbors.size(); ++i) {
-      const void *neighbor_vec = neighbor_vec_blocks[i].data();
-      dist_t cur_dist = dc.dist(neighbor_vec);
+      neighbor_vecs[i] = neighbor_vec_blocks[i].data();
+    }
+
+    dc.batch_dist(neighbor_vecs, neighbors.size(), dists);
+
+    for (uint32_t i = 0; i < neighbors.size(); ++i) {
+      dist_t cur_dist = dists[i];
       if (cur_dist < dist) {
         entry_point = neighbors[i];
         dist = cur_dist;
@@ -247,7 +293,14 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   }
 
   // Set dist_start for OMEGA
-  omega_search_set_dist_start(omega_search, dist);
+  {
+    auto control_start = std::chrono::steady_clock::now();
+    omega_search_set_dist_start(omega_search, dist);
+    omega_control_time_ns +=
+        std::chrono::duration_cast<std::chrono::nanoseconds>(
+            std::chrono::steady_clock::now() - control_start)
+            .count();
+  }
 
   // Perform HNSW search on layer 0 with OMEGA
   candidates.clear();
@@ -260,7 +313,14 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   candidates.emplace(entry_point, dist);
 
   // Report initial visit to OMEGA
-  omega_search_report_visit_candidate(omega_search, entry_point, dist, 1);
+  {
+    auto control_start = std::chrono::steady_clock::now();
+    omega_search_report_visit_candidate(omega_search, entry_point, dist, 1);
+    omega_control_time_ns +=
+        std::chrono::duration_cast<std::chrono::nanoseconds>(
+            std::chrono::steady_clock::now() - control_start)
+            .count();
+  }
 
   dist_t lowerBound = dist;
 
@@ -273,7 +333,14 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
 
     // Reference semantics: count the hop as soon as the current candidate is
     // examined, before stop-condition evaluation.
-    omega_search_report_hop(omega_search);
+    {
+      auto control_start = std::chrono::steady_clock::now();
+      omega_search_report_hop(omega_search);
+      omega_control_time_ns +=
+          std::chrono::duration_cast<std::chrono::nanoseconds>(
+              std::chrono::steady_clock::now() - control_start)
+              .count();
+    }
 
     // Standard HNSW stopping condition
     if (topk_heap.full() && candidate_dist > lowerBound) {
@@ -307,11 +374,26 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       break;
     }
 
+    static constexpr node_id_t BATCH_SIZE = 12;
+    static constexpr node_id_t PREFETCH_STEP = 2;
+    for (size_t i = 0;
+         i < std::min(static_cast<size_t>(BATCH_SIZE * PREFETCH_STEP),
+                      unvisited_neighbors.size());
+         ++i) {
+      ailego_prefetch(neighbor_vec_blocks[i].data());
+    }
+
+    float dists[unvisited_neighbors.size()];
+    const void *neighbor_vecs[unvisited_neighbors.size()];
+    for (size_t i = 0; i < unvisited_neighbors.size(); ++i) {
+      neighbor_vecs[i] = neighbor_vec_blocks[i].data();
+    }
+    dc.batch_dist(neighbor_vecs, unvisited_neighbors.size(), dists);
+
     // Compute distances and update candidates
     for (size_t i = 0; i < unvisited_neighbors.size(); ++i) {
       node_id_t neighbor = unvisited_neighbors[i];
-      const void *neighbor_vec = neighbor_vec_blocks[i].data();
-      dist_t neighbor_dist = dc.dist(neighbor_vec);
+      dist_t neighbor_dist = dists[i];
 
       // Reference semantics:
       // 1. `should_consider_candidate` is driven by the ef-bounded heap
@@ -319,11 +401,33 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       //    result-set-sized top-k structure, not by ef admission alone.
       bool should_consider_candidate =
           (!topk_heap.full() || neighbor_dist < lowerBound);
-      omega_search_report_visit_candidate(omega_search, neighbor, neighbor_dist,
-                                          should_consider_candidate ? 1 : 0);
+      {
+        auto control_start = std::chrono::steady_clock::now();
+        omega_search_report_visit_candidate(omega_search, neighbor, neighbor_dist,
+                                            should_consider_candidate ? 1 : 0);
+        omega_control_time_ns +=
+            std::chrono::duration_cast<std::chrono::nanoseconds>(
+                std::chrono::steady_clock::now() - control_start)
+                .count();
+      }
 
-      if (enable_early_stopping && omega_search_should_predict(omega_search)) {
-        if (omega_search_should_stop(omega_search)) {
+      bool should_predict = false;
+      if (enable_early_stopping) {
+        auto control_start = std::chrono::steady_clock::now();
+        should_predict = omega_search_should_predict(omega_search);
+        omega_control_time_ns +=
+            std::chrono::duration_cast<std::chrono::nanoseconds>(
+                std::chrono::steady_clock::now() - control_start)
+                .count();
+      }
+      if (enable_early_stopping && should_predict) {
+        auto control_start = std::chrono::steady_clock::now();
+        bool should_stop = omega_search_should_stop(omega_search);
+        omega_control_time_ns +=
+            std::chrono::duration_cast<std::chrono::nanoseconds>(
+                std::chrono::steady_clock::now() - control_start)
+                .count();
+        if (should_stop) {
           int hops, cmps, collected_gt;
           omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
           LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
@@ -372,6 +476,11 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   unsigned long long collected_gt_advance_count = 0;
   unsigned long long should_stop_calls_with_advance = 0;
   unsigned long long max_prediction_calls_per_should_stop = 0;
+  uint64_t query_total_time_ns =
+      std::chrono::duration_cast<std::chrono::nanoseconds>(
+          std::chrono::steady_clock::now() - query_start)
+          .count();
+  uint64_t query_seq = query_stats_sequence_.fetch_add(1);
   omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
   omega_search_get_debug_stats(omega_search, &predicted_recall_avg,
                                &predicted_recall_at_target,
@@ -388,9 +497,16 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu, early_stop=%d",
             cmps, hops, topk_heap.size(), enable_early_stopping);
   if (enable_early_stopping) {
+    size_t scan_cmps = hnsw_ctx->get_scan_num();
+    uint64_t pairwise_dist_cnt = hnsw_ctx->get_pairwise_dist_num();
+    uint64_t pure_search_time_ns =
+        query_total_time_ns > omega_control_time_ns
+            ? (query_total_time_ns - omega_control_time_ns)
+            : 0;
     bool expected = false;
     if (debug_stats_logged_.compare_exchange_strong(expected, true)) {
-      LOG_WARN("OMEGA runtime stats: model_loaded=%d target_recall=%.4f cmps=%d "
+      LOG_INFO("OMEGA runtime stats: model_loaded=%d target_recall=%.4f "
+               "scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d "
                "collected_gt=%d predicted_recall_avg=%.4f "
                "predicted_recall_at_target=%.4f early_stop_hit=%d "
                "should_stop_calls=%llu prediction_calls=%llu "
@@ -398,7 +514,9 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                "max_pred_per_stop=%llu should_stop_ms=%.3f "
                "prediction_eval_ms=%.3f sorted_window_ms=%.3f "
                "avg_recall_eval_ms=%.3f feature_prep_ms=%.3f",
-               IsModelLoaded() ? 1 : 0, target_recall, cmps, collected_gt,
+               IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
+               static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
+               collected_gt,
                predicted_recall_avg, predicted_recall_at_target,
                (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
                should_stop_calls, prediction_calls,
@@ -410,6 +528,34 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                static_cast<double>(average_recall_eval_time_ns) / 1e6,
                static_cast<double>(prediction_feature_prep_time_ns) / 1e6);
     }
+    if (ShouldLogQueryStats(query_seq)) {
+      LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
+               "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
+               "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
+               "early_stop_hit=%d should_stop_calls=%llu "
+               "prediction_calls=%llu advance_calls=%llu "
+               "collected_gt_advance=%llu max_pred_per_stop=%llu "
+               "should_stop_ms=%.3f prediction_eval_ms=%.3f "
+               "sorted_window_ms=%.3f avg_recall_eval_ms=%.3f "
+               "feature_prep_ms=%.3f omega_control_ms=%.3f pure_search_ms=%.3f total_ms=%.3f",
+               static_cast<unsigned long long>(query_seq),
+               IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
+               static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
+               collected_gt,
+               predicted_recall_avg, predicted_recall_at_target,
+               (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
+               should_stop_calls, prediction_calls,
+               should_stop_calls_with_advance, collected_gt_advance_count,
+               max_prediction_calls_per_should_stop,
+               static_cast<double>(should_stop_time_ns) / 1e6,
+               static_cast<double>(prediction_eval_time_ns) / 1e6,
+               static_cast<double>(sorted_window_time_ns) / 1e6,
+               static_cast<double>(average_recall_eval_time_ns) / 1e6,
+               static_cast<double>(prediction_feature_prep_time_ns) / 1e6,
+               static_cast<double>(omega_control_time_ns) / 1e6,
+               static_cast<double>(pure_search_time_ns) / 1e6,
+               static_cast<double>(query_total_time_ns) / 1e6);
+    }
   }
 
   // Collect training records (only in training mode)
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index 6bd57c22b..86029c57e 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -107,6 +107,7 @@ class OmegaStreamer : public HnswStreamer {
   mutable OmegaModelHandle omega_model_{nullptr};
   mutable std::mutex model_mutex_;
   mutable std::atomic<bool> debug_stats_logged_{false};
+  mutable std::atomic<uint64_t> query_stats_sequence_{0};
   float target_recall_{0.95f};
   int window_size_{100};
 };
diff --git a/src/core/interface/indexes/hnsw_index.cc b/src/core/interface/indexes/hnsw_index.cc
index 921db8ffa..171ad3d8b 100644
--- a/src/core/interface/indexes/hnsw_index.cc
+++ b/src/core/interface/indexes/hnsw_index.cc
@@ -15,6 +15,7 @@
 #include <memory>
 #include <string>
 #include <zvec/core/interface/index.h>
+#include "algorithm/omega/omega_params.h"
 #include "algorithm/hnsw/hnsw_params.h"
 #include "algorithm/hnsw_sparse/hnsw_sparse_params.h"
 
@@ -104,6 +105,18 @@ int HNSWIndex::_prepare_for_search(
   const int real_search_ef =
       std::max(1u, std::min(2048u, hnsw_search_param->ef_search));
   params.set(core::PARAM_HNSW_STREAMER_EF, real_search_ef);
+
+  if (hnsw_search_param->training_query_id >= 0) {
+    params.set(core::PARAM_OMEGA_SEARCHER_TRAINING_QUERY_ID,
+               hnsw_search_param->training_query_id);
+  }
+
+  if (const auto& omega_search_param =
+          std::dynamic_pointer_cast<OmegaQueryParam>(search_param)) {
+    params.set(core::PARAM_OMEGA_SEARCHER_TARGET_RECALL,
+               omega_search_param->target_recall);
+  }
+
   context->update(params);
   return 0;
 }
@@ -118,4 +131,4 @@ int HNSWIndex::_get_coarse_search_topk(
   return ret;
 }
 
-}  // namespace zvec::core_interface
\ No newline at end of file
+}  // namespace zvec::core_interface
diff --git a/src/db/collection.cc b/src/db/collection.cc
index 5e0daf718..d0c057646 100644
--- a/src/db/collection.cc
+++ b/src/db/collection.cc
@@ -14,6 +14,7 @@
 
 #include <atomic>
 #include <cstdint>
+#include <filesystem>
 #include <memory>
 #include <mutex>
 #include <shared_mutex>
@@ -43,6 +44,10 @@
 #include "db/index/segment/segment_helper.h"
 #include "db/index/segment/segment_manager.h"
 #include "db/sqlengine/sqlengine.h"
+#ifdef ZVEC_ENABLE_OMEGA
+#include "db/training/omega_model_trainer.h"
+#include "db/training/training_data_collector.h"
+#endif
 
 namespace zvec {
 
@@ -1935,4 +1940,4 @@ std::vector<Segment::Ptr> CollectionImpl::get_all_persist_segments() const {
   return segment_manager_->get_segments();
 }
 
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/src/db/index/column/vector_column/vector_column_indexer.cc b/src/db/index/column/vector_column/vector_column_indexer.cc
index bf7eb6d4e..b31e5b31f 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.cc
+++ b/src/db/index/column/vector_column/vector_column_indexer.cc
@@ -200,6 +200,13 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
         Status::InternalError("Failed to search vector"));
   }
 
+  if (training_mode_enabled_) {
+    LOG_INFO(
+        "VectorColumnIndexer training search: query_id=%d records=%zu gt_cmps=%zu total_cmps=%d",
+        search_result.training_query_id_, search_result.training_records_.size(),
+        search_result.gt_cmps_per_rank_.size(), search_result.total_cmps_);
+  }
+
   // Collect training records from search result (stored in context during search)
   // This is thread-safe because each search has its own context
   if (training_mode_enabled_ && !search_result.training_records_.empty()) {
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index 9a2c8781b..21960bd54 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -2387,9 +2387,9 @@ Status SegmentImpl::auto_train_omega_index_internal(
   collector_options.topk = 100;
   collector_options.noise_scale = 0.01f;
 
-
-  auto training_records_result = TrainingDataCollector::CollectTrainingDataWithGtCmps(
-      shared_from_this(), field_name, collector_options, indexers);
+  Result<TrainingDataCollectorResult> training_records_result =
+      TrainingDataCollector::CollectTrainingDataWithGtCmps(
+          shared_from_this(), field_name, collector_options, indexers);
 
   if (!training_records_result.has_value()) {
     return Status::InternalError(
@@ -4605,4 +4605,4 @@ Result<Segment::Ptr> Segment::Open(const std::string &path,
   return segment;
 }
 
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/src/db/training/omega_model_trainer.h b/src/db/training/omega_model_trainer.h
index 865cbd235..7b5276ef8 100644
--- a/src/db/training/omega_model_trainer.h
+++ b/src/db/training/omega_model_trainer.h
@@ -74,6 +74,7 @@ class OmegaModelTrainer {
       const std::vector<core_interface::TrainingRecord>& training_records,
       const core_interface::GtCmpsData& gt_cmps_data,
       const OmegaModelTrainerOptions& options);
+
 };
 
 }  // namespace zvec
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 8d43ac59b..45bd1e7b6 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -66,6 +66,227 @@ class ScopedTimer {
   std::chrono::high_resolution_clock::time_point start_;
 };
 }  // namespace
+
+Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFromQueriesImpl(
+    const Segment::Ptr& segment, const std::string& field_name,
+    const std::vector<std::vector<float>>& training_queries,
+    const std::vector<std::vector<uint64_t>>& provided_ground_truth,
+    const TrainingDataCollectorOptions& options,
+    const std::vector<uint64_t>& query_doc_ids,
+    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+  std::vector<VectorColumnIndexer::Ptr> indexers;
+  if (!provided_indexers.empty()) {
+    indexers = provided_indexers;
+  } else {
+    indexers = segment->get_vector_indexer(field_name);
+  }
+
+  if (indexers.empty()) {
+    return tl::make_unexpected(
+        Status::InternalError("No vector indexers found for field: " + field_name));
+  }
+
+  if (training_queries.empty()) {
+    return tl::make_unexpected(
+        Status::InvalidArgument("Training queries are empty"));
+  }
+
+  MetricType metric_type = indexers[0]->metric_type();
+
+  std::vector<std::vector<uint64_t>> ground_truth = provided_ground_truth;
+  if (ground_truth.empty()) {
+    LOG_INFO("Computing ground truth (topk=%zu, ef_groundtruth=%d)",
+             options.topk, options.ef_groundtruth);
+    std::string timer_name = options.ef_groundtruth > 0
+        ? "ComputeGroundTruth (HNSW ef=" +
+              std::to_string(options.ef_groundtruth) + ")"
+        : "ComputeGroundTruth (BRUTE FORCE)";
+    ScopedTimer timer(timer_name);
+    ground_truth = TrainingDataCollector::ComputeGroundTruth(
+        segment, field_name, training_queries, options.topk, options.num_threads,
+        query_doc_ids, options.ef_groundtruth, metric_type, indexers);
+  } else if (ground_truth.size() != training_queries.size()) {
+    return tl::make_unexpected(Status::InvalidArgument(
+        "Ground truth size does not match query count"));
+  }
+
+  if (ground_truth.empty()) {
+    return tl::make_unexpected(Status::InternalError(
+        "Failed to obtain ground truth"));
+  }
+
+  LOG_INFO("Setting ground truth (%zu queries) and enabling training mode on %zu indexers",
+           ground_truth.size(), indexers.size());
+  for (auto& indexer : indexers) {
+    indexer->SetTrainingGroundTruth(ground_truth, options.k_train);
+    auto status = indexer->EnableTrainingMode(true);
+    if (!status.ok()) {
+      LOG_WARN("Failed to enable training mode on indexer: %s",
+               status.message().c_str());
+    }
+  }
+
+  LOG_INFO("Performing training searches with ef=%d", options.ef_training);
+  std::vector<std::vector<uint64_t>> search_results;
+  search_results.reserve(training_queries.size());
+
+  {
+    ScopedTimer timer("External: TrainingSearches (HNSW with ef=" +
+                      std::to_string(options.ef_training) + ") PARALLEL");
+
+    size_t actual_threads = options.num_threads;
+    if (actual_threads == 0) {
+      actual_threads = std::thread::hardware_concurrency();
+    }
+    actual_threads = std::min(actual_threads, training_queries.size());
+
+    search_results.resize(training_queries.size());
+
+    std::atomic<size_t> completed_searches{0};
+    std::mutex progress_mutex;
+    auto search_start = std::chrono::high_resolution_clock::now();
+
+    auto worker = [&](size_t start_idx, size_t end_idx) {
+      for (size_t query_idx = start_idx; query_idx < end_idx; ++query_idx) {
+        const auto& query_vector = training_queries[query_idx];
+
+        vector_column_params::VectorData vector_data;
+        vector_data.vector = vector_column_params::DenseVector{
+            .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))};
+
+        vector_column_params::QueryParams query_params;
+        query_params.topk = options.topk;
+        query_params.fetch_vector = false;
+        query_params.filter = segment->get_filter().get();
+
+        auto omega_params = std::make_shared<OmegaQueryParams>();
+        omega_params->set_ef(options.ef_training);
+        omega_params->set_training_query_id(static_cast<int>(query_idx));
+        query_params.query_params = omega_params;
+
+        if (indexers.size() != 1 && query_idx == start_idx) {
+          LOG_WARN("Expected 1 indexer but found %zu, using first one only", indexers.size());
+        }
+
+        // Persisted OMEGA collections currently do not propagate per-query
+        // training_query_id through the search context reliably. In the
+        // single-threaded calibration path, fall back to the legacy global
+        // query-id setter to preserve correct labels without races.
+        if (actual_threads == 1) {
+          indexers[0]->SetCurrentQueryId(static_cast<int>(query_idx));
+        }
+
+        auto search_result = indexers[0]->Search(vector_data, query_params);
+        if (!search_result.has_value()) {
+          LOG_WARN("Search failed for query %zu: %s", query_idx,
+                   search_result.error().message().c_str());
+          ++completed_searches;
+          continue;
+        }
+
+        auto& results = search_result.value();
+        std::vector<uint64_t> result_ids;
+        result_ids.reserve(results->count());
+        auto iter = results->create_iterator();
+        while (iter->valid()) {
+          result_ids.push_back(iter->doc_id());
+          iter->next();
+        }
+
+        search_results[query_idx] = std::move(result_ids);
+
+        size_t completed = ++completed_searches;
+        if (completed % 100 == 0 || completed == training_queries.size()) {
+          std::lock_guard<std::mutex> lock(progress_mutex);
+          auto now = std::chrono::high_resolution_clock::now();
+          auto elapsed_ms =
+              std::chrono::duration_cast<std::chrono::milliseconds>(now - search_start)
+                  .count();
+          DebugLog("  External training search progress: " +
+                   std::to_string(completed) + "/" +
+                   std::to_string(training_queries.size()) + ", elapsed: " +
+                   std::to_string(elapsed_ms) + " ms");
+        }
+      }
+    };
+
+    std::vector<std::thread> threads;
+    size_t queries_per_thread =
+        (training_queries.size() + actual_threads - 1) / actual_threads;
+    for (size_t t = 0; t < actual_threads; ++t) {
+      size_t start_idx = t * queries_per_thread;
+      size_t end_idx = std::min(start_idx + queries_per_thread, training_queries.size());
+      if (start_idx < end_idx) {
+        threads.emplace_back(worker, start_idx, end_idx);
+      }
+    }
+
+    for (auto& thread : threads) {
+      thread.join();
+    }
+
+    auto search_end = std::chrono::high_resolution_clock::now();
+    auto total_ms =
+        std::chrono::duration_cast<std::chrono::milliseconds>(search_end - search_start)
+            .count();
+    LOG_INFO("Training searches completed in %zu ms (%zu threads)",
+             total_ms, actual_threads);
+  }
+
+  LOG_INFO("Collecting training records from indexers");
+  std::vector<core_interface::TrainingRecord> all_records;
+  {
+    ScopedTimer timer("External: CollectTrainingRecords");
+    for (auto& indexer : indexers) {
+      auto records = indexer->GetTrainingRecords();
+      LOG_INFO("Collected %zu records from indexer", records.size());
+      all_records.insert(all_records.end(), records.begin(), records.end());
+    }
+  }
+
+  if (all_records.empty()) {
+    LOG_WARN("No training records collected from any indexer");
+  }
+
+  size_t positive_count = 0;
+  size_t negative_count = 0;
+  for (const auto& record : all_records) {
+    if (record.label > 0) {
+      ++positive_count;
+    } else {
+      ++negative_count;
+    }
+  }
+  LOG_INFO("Collected %zu records: %zu positive, %zu negative (labels computed in real-time)",
+           all_records.size(), positive_count, negative_count);
+
+  LOG_INFO("Collecting gt_cmps data from indexers");
+  core_interface::GtCmpsData gt_cmps_data;
+  {
+    ScopedTimer timer("External: GetGtCmpsData");
+    if (!indexers.empty()) {
+      gt_cmps_data = indexers[0]->GetGtCmpsData();
+      if (gt_cmps_data.gt_cmps.empty()) {
+        LOG_WARN("No actual gt_cmps data collected, falling back to approximation");
+        gt_cmps_data =
+            TrainingDataCollector::ComputeGtCmps(all_records, ground_truth, options.topk);
+      } else {
+        LOG_INFO("Got actual gt_cmps data for %zu queries, topk=%zu",
+                 gt_cmps_data.num_queries, gt_cmps_data.topk);
+      }
+    }
+  }
+
+  for (auto& indexer : indexers) {
+    indexer->EnableTrainingMode(false);
+    indexer->ClearTrainingRecords();
+  }
+
+  TrainingDataCollectorResult result;
+  result.records = std::move(all_records);
+  result.gt_cmps_data = std::move(gt_cmps_data);
+  return result;
+}
 // ============ END DEBUG TIMING UTILITIES ============
 
 Result<std::vector<core_interface::TrainingRecord>>
@@ -384,7 +605,7 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
         // Note: If warmup takes very long (>60s), recommend using ef_groundtruth=0 (Eigen brute force)
         if (warmup_ms > 60000) {
-          LOG_WARN("HNSW warmup took %zu ms. For cold indexes, consider using ef_groundtruth=0 (Eigen brute force)",
+          LOG_INFO("HNSW warmup took %zu ms. For cold indexes, consider using ef_groundtruth=0 (Eigen brute force)",
                    warmup_ms);
         }
       }
@@ -716,30 +937,11 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
     const TrainingDataCollectorOptions& options,
     const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
   ScopedTimer total_timer("CollectTrainingDataWithGtCmps [TOTAL]");
-
-  // Step 0: Get indexers first (needed for metric type)
-  std::vector<VectorColumnIndexer::Ptr> indexers;
-  if (!provided_indexers.empty()) {
-    indexers = provided_indexers;
-  } else {
-    indexers = segment->get_vector_indexer(field_name);
-  }
-
-  if (indexers.empty()) {
-    return tl::make_unexpected(
-        Status::InternalError("No vector indexers found for field: " + field_name));
-  }
-
-  // Get metric type from first indexer
-  MetricType metric_type = indexers[0]->metric_type();
-
-  // Step 1: Generate training queries using held-out approach
-  // (sample vectors directly from index, no noise)
   LOG_INFO("Generating %zu held-out training queries for field '%s'",
            options.num_training_queries, field_name.c_str());
 
   std::vector<std::vector<float>> training_queries;
-  std::vector<uint64_t> query_doc_ids;  // doc_ids for self-exclusion in GT
+  std::vector<uint64_t> query_doc_ids;
   {
     ScopedTimer timer("Step1: GenerateHeldOutQueries");
     auto sampled = TrainingQueryGenerator::GenerateHeldOutQueries(
@@ -750,226 +952,9 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
              " held-out queries (with doc_ids for self-exclusion)");
   }
 
-  if (training_queries.empty()) {
-    return tl::make_unexpected(
-        Status::InternalError("Failed to generate training queries"));
-  }
-
-  // Step 2: Compute ground truth (brute force or HNSW search, excluding self-matches)
-  LOG_INFO("Computing ground truth (topk=%zu, ef_groundtruth=%d, excluding self)",
-           options.topk, options.ef_groundtruth);
-
-  std::vector<std::vector<uint64_t>> ground_truth;
-  {
-    std::string timer_name = options.ef_groundtruth > 0
-        ? "Step2: ComputeGroundTruth (HNSW ef=" + std::to_string(options.ef_groundtruth) + " PARALLEL, HELD-OUT)"
-        : "Step2: ComputeGroundTruth (BRUTE FORCE PARALLEL, HELD-OUT)";
-    ScopedTimer timer(timer_name);
-    DebugLog("  num_queries=" + std::to_string(training_queries.size()) +
-             ", topk=" + std::to_string(options.topk) +
-             ", ef_groundtruth=" + std::to_string(options.ef_groundtruth) +
-             ", threads=" + std::to_string(options.num_threads == 0 ? std::thread::hardware_concurrency() : options.num_threads));
-    ground_truth = ComputeGroundTruth(
-        segment, field_name, training_queries, options.topk, options.num_threads,
-        query_doc_ids, options.ef_groundtruth, metric_type, indexers);  // Pass indexers to avoid stale data
-    DebugLog("  Computed ground truth for " + std::to_string(ground_truth.size()) + " queries");
-  }
-
-  if (ground_truth.empty()) {
-    return tl::make_unexpected(
-        Status::InternalError("Failed to compute ground truth"));
-  }
-
-  LOG_INFO("Found %zu indexers for field '%s'", indexers.size(), field_name.c_str());
-  DebugLog("Step3: Found " + std::to_string(indexers.size()) + " indexers, doc_count=" +
-           std::to_string(indexers[0]->doc_count()));
-
-  // Step 4: Set ground truth and enable training mode on all indexers
-  LOG_INFO("Setting ground truth (%zu queries) and enabling training mode on %zu indexers",
-           ground_truth.size(), indexers.size());
-  for (auto& indexer : indexers) {
-    // Set ground truth for real-time label computation
-    indexer->SetTrainingGroundTruth(ground_truth, options.k_train);
-
-    auto status = indexer->EnableTrainingMode(true);
-    if (!status.ok()) {
-      LOG_WARN("Failed to enable training mode on indexer: %s",
-               status.message().c_str());
-    }
-  }
-
-  // Step 5: Perform searches with large ef and collect training records
-  LOG_INFO("Performing training searches with ef=%d", options.ef_training);
-
-  std::vector<std::vector<uint64_t>> search_results;
-  search_results.reserve(training_queries.size());
-
-  {
-    ScopedTimer timer("Step5: TrainingSearches (HNSW with ef=" + std::to_string(options.ef_training) + ") PARALLEL");
-
-    // Determine thread count
-    size_t actual_threads = options.num_threads;
-    if (actual_threads == 0) {
-      actual_threads = std::thread::hardware_concurrency();
-    }
-    actual_threads = std::min(actual_threads, training_queries.size());
-
-    DebugLog("  num_queries=" + std::to_string(training_queries.size()) +
-             ", threads=" + std::to_string(actual_threads));
-
-    // Pre-allocate search_results for thread-safe access
-    search_results.resize(training_queries.size());
-
-    std::atomic<size_t> completed_searches{0};
-    std::mutex progress_mutex;
-    auto search_start = std::chrono::high_resolution_clock::now();
-
-    // Worker function for a range of queries
-    auto worker = [&](size_t start_idx, size_t end_idx) {
-      for (size_t query_idx = start_idx; query_idx < end_idx; ++query_idx) {
-        const auto& query_vector = training_queries[query_idx];
-
-        // Prepare query parameters
-        vector_column_params::VectorData vector_data;
-        vector_data.vector = vector_column_params::DenseVector{
-            .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))
-        };
-
-        vector_column_params::QueryParams query_params;
-        query_params.topk = options.topk;
-        query_params.fetch_vector = false;
-        query_params.filter = segment->get_filter().get();
-
-        // Create OmegaQueryParams with training_query_id for parallel search
-        auto omega_params = std::make_shared<OmegaQueryParams>();
-        omega_params->set_ef(options.ef_training);
-        omega_params->set_training_query_id(static_cast<int>(query_idx));
-        query_params.query_params = omega_params;
-
-        if (indexers.size() != 1) {
-          // Only log once
-          if (query_idx == start_idx) {
-            LOG_WARN("Expected 1 indexer but found %zu, using first one only", indexers.size());
-          }
-        }
-
-        auto search_result = indexers[0]->Search(vector_data, query_params);
-        if (!search_result.has_value()) {
-          LOG_WARN("Search failed for query %zu: %s", query_idx,
-                   search_result.error().message().c_str());
-          // search_results[query_idx] is already default empty
-          ++completed_searches;
-          continue;
-        }
-
-        // Extract result doc IDs
-        auto& results = search_result.value();
-        std::vector<uint64_t> result_ids;
-        result_ids.reserve(results->count());
-        auto iter = results->create_iterator();
-        while (iter->valid()) {
-          result_ids.push_back(iter->doc_id());
-          iter->next();
-        }
-
-        search_results[query_idx] = std::move(result_ids);
-
-        // Update progress
-        size_t completed = ++completed_searches;
-        if (completed % 100 == 0 || completed == training_queries.size()) {
-          std::lock_guard<std::mutex> lock(progress_mutex);
-          auto now = std::chrono::high_resolution_clock::now();
-          auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now - search_start).count();
-          DebugLog("  Training search progress: " + std::to_string(completed) + "/" +
-                   std::to_string(training_queries.size()) + ", elapsed: " + std::to_string(elapsed_ms) + " ms");
-        }
-      }
-    };
-
-    // Launch threads
-    std::vector<std::thread> threads;
-    size_t queries_per_thread = (training_queries.size() + actual_threads - 1) / actual_threads;
-
-    for (size_t t = 0; t < actual_threads; ++t) {
-      size_t start_idx = t * queries_per_thread;
-      size_t end_idx = std::min(start_idx + queries_per_thread, training_queries.size());
-      if (start_idx < end_idx) {
-        threads.emplace_back(worker, start_idx, end_idx);
-      }
-    }
-
-    // Wait for all threads
-    for (auto& thread : threads) {
-      thread.join();
-    }
-
-    auto search_end = std::chrono::high_resolution_clock::now();
-    auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(search_end - search_start).count();
-    LOG_INFO("Training searches completed in %zu ms (%zu threads)",
-             total_ms, actual_threads);
-  }
-
-  // Step 6: Collect training records from all indexers
-  LOG_INFO("Collecting training records from indexers");
-
-  std::vector<core_interface::TrainingRecord> all_records;
-  {
-    ScopedTimer timer("Step6: CollectTrainingRecords");
-    for (auto& indexer : indexers) {
-      auto records = indexer->GetTrainingRecords();
-      LOG_INFO("Collected %zu records from indexer", records.size());
-      all_records.insert(all_records.end(), records.begin(), records.end());
-    }
-    DebugLog("  Total records collected: " + std::to_string(all_records.size()));
-  }
-
-  if (all_records.empty()) {
-    LOG_WARN("No training records collected from any indexer");
-  }
-
-  // Step 7: Labels are now computed in real-time during search (no FillLabels needed)
-  // Count positive/negative labels for verification
-  size_t positive_count = 0, negative_count = 0;
-  for (const auto& record : all_records) {
-    if (record.label > 0) positive_count++;
-    else negative_count++;
-  }
-  LOG_INFO("Collected %zu records: %zu positive, %zu negative (labels computed in real-time)",
-           all_records.size(), positive_count, negative_count);
-
-  // Step 8: Get gt_cmps data directly from indexers (collected during search)
-  LOG_INFO("Collecting gt_cmps data from indexers");
-  core_interface::GtCmpsData gt_cmps_data;
-  {
-    ScopedTimer timer("Step8: GetGtCmpsData");
-    // Get gt_cmps from first indexer (all indexers should have the same data)
-    if (!indexers.empty()) {
-      gt_cmps_data = indexers[0]->GetGtCmpsData();
-      if (gt_cmps_data.gt_cmps.empty()) {
-        // Fallback to approximation if actual data not available
-        LOG_WARN("No actual gt_cmps data collected, falling back to approximation");
-        gt_cmps_data = ComputeGtCmps(all_records, ground_truth, options.topk);
-      } else {
-        LOG_INFO("Got actual gt_cmps data for %zu queries, topk=%zu",
-                 gt_cmps_data.num_queries, gt_cmps_data.topk);
-      }
-    }
-  }
-
-  // Step 9: Disable training mode and clear records
-  for (auto& indexer : indexers) {
-    indexer->EnableTrainingMode(false);
-    indexer->ClearTrainingRecords();
-  }
-
-  LOG_INFO("Successfully collected %zu training records with labels and gt_cmps",
-           all_records.size());
-
-  TrainingDataCollectorResult result;
-  result.records = std::move(all_records);
-  result.gt_cmps_data = std::move(gt_cmps_data);
-
-  return result;
+  return CollectTrainingDataFromQueriesImpl(segment, field_name,
+                                            training_queries, {}, options,
+                                            query_doc_ids, provided_indexers);
 }
 
 }  // namespace zvec
diff --git a/src/db/training/training_data_collector.h b/src/db/training/training_data_collector.h
index 7e3193421..2195efd04 100644
--- a/src/db/training/training_data_collector.h
+++ b/src/db/training/training_data_collector.h
@@ -172,6 +172,15 @@ class TrainingDataCollector {
       const std::vector<core_interface::TrainingRecord>& records,
       const std::vector<std::vector<uint64_t>>& ground_truth,
       size_t topk);
+
+  static Result<TrainingDataCollectorResult> CollectTrainingDataFromQueriesImpl(
+      const Segment::Ptr& segment,
+      const std::string& field_name,
+      const std::vector<std::vector<float>>& training_queries,
+      const std::vector<std::vector<uint64_t>>& provided_ground_truth,
+      const TrainingDataCollectorOptions& options,
+      const std::vector<uint64_t>& query_doc_ids,
+      const std::vector<VectorColumnIndexer::Ptr>& provided_indexers);
 };
 
 }  // namespace zvec
diff --git a/src/include/zvec/db/collection.h b/src/include/zvec/db/collection.h
index 3e868bbd3..293861c05 100644
--- a/src/include/zvec/db/collection.h
+++ b/src/include/zvec/db/collection.h
@@ -106,4 +106,4 @@ class Collection {
       const std::vector<std::string> &pks) const = 0;
 };
 
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/src/include/zvec/db/options.h b/src/include/zvec/db/options.h
index 1f2a9cbf2..02d9d7fe6 100644
--- a/src/include/zvec/db/options.h
+++ b/src/include/zvec/db/options.h
@@ -13,7 +13,9 @@
 // limitations under the License.
 #pragma once
 
+#include <cstddef>
 #include <cstdint>
+#include <string>
 
 namespace zvec {
 
@@ -66,4 +68,4 @@ struct AlterColumnOptions {
   int concurrency_{0};
 };
 
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/thirdparty/omega b/thirdparty/omega
index aace108d5..3e9116e36 160000
--- a/thirdparty/omega
+++ b/thirdparty/omega
@@ -1 +1 @@
-Subproject commit aace108d551d2e4b3515789f432e94a5d4bad16b
+Subproject commit 3e9116e360afb9f32402be60cf0e99bb98b7f3ba
diff --git a/tools/core/CMakeLists.txt b/tools/core/CMakeLists.txt
index d561312fb..e4f70de87 100644
--- a/tools/core/CMakeLists.txt
+++ b/tools/core/CMakeLists.txt
@@ -41,7 +41,6 @@ cc_binary(
     LIBS omega
 )
 
-
 cc_binary(
     NAME recall_original
     STRICT PACKED

From 347bcf492bfe177012b4a09145df4b2e5f88712b Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sat, 21 Mar 2026 16:24:24 +0800
Subject: [PATCH 015/126] feat(omega): add retrain-only model refresh with
 cached held-out queries

---
 python/zvec/model/param/__init__.pyi          |  17 +-
 .../python/model/param/python_param.cc        |  24 ++-
 src/db/collection.cc                          |  10 +
 src/db/index/segment/segment.cc               | 179 +++++++++++++++++-
 src/db/index/segment/segment.h                |   3 +-
 src/db/training/omega_model_trainer.h         |  12 +-
 src/db/training/training_data_collector.cc    | 100 ++++++++--
 src/db/training/training_data_collector.h     |  17 ++
 src/include/zvec/db/collection.h              |   2 +-
 src/include/zvec/db/options.h                 |   1 +
 thirdparty/omega                              |   2 +-
 11 files changed, 332 insertions(+), 35 deletions(-)

diff --git a/python/zvec/model/param/__init__.pyi b/python/zvec/model/param/__init__.pyi
index bb6cf6ece..ed32d99b8 100644
--- a/python/zvec/model/param/__init__.pyi
+++ b/python/zvec/model/param/__init__.pyi
@@ -660,21 +660,29 @@ class OptimizeOption:
         concurrency (int): Number of threads to use during optimization.
             If 0, the system will choose an optimal value automatically.
             Default is 0.
+        retrain_only (bool): Reuse existing indexes and only retrain OMEGA
+            models. Default is False.
 
     Examples:
-        >>> opt = OptimizeOption(concurrency=2)
+        >>> opt = OptimizeOption(concurrency=2, retrain_only=True)
         >>> print(opt.concurrency)
         2
     """
 
     def __getstate__(self) -> tuple: ...
-    def __init__(self, concurrency: typing.SupportsInt = 0) -> None:
+    def __init__(
+        self,
+        concurrency: typing.SupportsInt = 0,
+        retrain_only: bool = False,
+    ) -> None:
         """
         Constructs an OptimizeOption instance.
 
         Args:
             concurrency (int, optional): Number of concurrent threads.
                 0 means auto-detect. Defaults to 0.
+            retrain_only (bool, optional): Reuse existing indexes and only
+                retrain OMEGA models. Defaults to False.
         """
 
     def __setstate__(self, arg0: tuple) -> None: ...
@@ -683,6 +691,11 @@ class OptimizeOption:
         """
         int: Number of threads used for optimization (0 = auto).
         """
+    @property
+    def retrain_only(self) -> bool:
+        """
+        bool: Whether to reuse existing indexes and only retrain OMEGA.
+        """
 
 class QueryParam:
     """
diff --git a/src/binding/python/model/param/python_param.cc b/src/binding/python/model/param/python_param.cc
index ee1fd9e9b..73bb9b5e0 100644
--- a/src/binding/python/model/param/python_param.cc
+++ b/src/binding/python/model/param/python_param.cc
@@ -1099,34 +1099,50 @@ Options for optimizing a collection (e.g., merging segments).
     concurrency (int): Number of threads to use during optimization.
         If 0, the system will choose an optimal value automatically.
         Default is 0.
+    retrain_only (bool): Reuse existing indexes and only retrain OMEGA models.
+        This skips index rebuild/merge work and is only meaningful for OMEGA.
+        Default is False.
 
 Examples:
-    >>> opt = OptimizeOption(concurrency=2)
+    >>> opt = OptimizeOption(concurrency=2, retrain_only=True)
     >>> print(opt.concurrency)
     2
 )pbdoc")
-      .def(py::init<int>(), py::arg("concurrency") = 0,
+      .def(py::init([](int concurrency, bool retrain_only) {
+             OptimizeOptions obj{};
+             obj.concurrency_ = concurrency;
+             obj.retrain_only_ = retrain_only;
+             return obj;
+           }),
+           py::arg("concurrency") = 0, py::arg("retrain_only") = false,
            R"pbdoc(
 Constructs an OptimizeOption instance.
 
 Args:
     concurrency (int, optional): Number of concurrent threads.
         0 means auto-detect. Defaults to 0.
+    retrain_only (bool, optional): Reuse existing indexes and only retrain
+        OMEGA models. Defaults to False.
 )pbdoc")
       .def_property_readonly(
           "concurrency",
           [](const OptimizeOptions &self) { return self.concurrency_; },
           "int: Number of threads used for optimization (0 = auto).")
+      .def_property_readonly(
+          "retrain_only",
+          [](const OptimizeOptions &self) { return self.retrain_only_; },
+          "bool: Whether to reuse existing indexes and only retrain OMEGA.")
       .def(py::pickle(
           [](const OptimizeOptions &self) {
-            return py::make_tuple(self.concurrency_);
+            return py::make_tuple(self.concurrency_, self.retrain_only_);
           },
           [](py::tuple t) {
-            if (t.size() != 1)
+            if (t.size() != 2)
               throw std::runtime_error(
                   "Invalid pickle data for OptimizeOptions");
             OptimizeOptions obj{};
             obj.concurrency_ = t[0].cast<int>();
+            obj.retrain_only_ = t[1].cast<bool>();
             return obj;
           }));
 
diff --git a/src/db/collection.cc b/src/db/collection.cc
index d0c057646..d203227b7 100644
--- a/src/db/collection.cc
+++ b/src/db/collection.cc
@@ -822,6 +822,16 @@ Status CollectionImpl::Optimize(const OptimizeOptions &options) {
     return Status::OK();
   }
 
+  if (options.retrain_only_) {
+    LOG_WARN("Optimize running in OMEGA retrain-only mode on %zu persisted segments",
+             persist_segments.size());
+    for (auto& segment : persist_segments) {
+      auto s = segment->retrain_omega_model();
+      CHECK_RETURN_STATUS(s);
+    }
+    return Status::OK();
+  }
+
   // Step 1: Build vector indexes if not ready
   // This ensures indexes are built even for single segments that won't be compacted
   std::vector<SegmentTask::Ptr> index_build_tasks;
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index 21960bd54..6508283bf 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -18,6 +18,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <filesystem>
+#include <fstream>
 #include <memory>
 #include <mutex>
 #include <optional>
@@ -64,6 +65,104 @@
 
 namespace zvec {
 
+namespace {
+
+constexpr uint32_t kOmegaQueryCacheMagic = 0x4F514359;  // OQCY
+constexpr uint32_t kOmegaQueryCacheVersion = 1;
+
+void WriteTimingStatsJson(
+    const std::string& output_path,
+    const std::vector<std::pair<std::string, int64_t>>& stats) {
+  std::ofstream ofs(output_path);
+  if (!ofs.is_open()) {
+    return;
+  }
+  ofs << "{\n";
+  for (size_t i = 0; i < stats.size(); ++i) {
+    ofs << "  \"" << stats[i].first << "\": " << stats[i].second;
+    if (i + 1 < stats.size()) {
+      ofs << ",";
+    }
+    ofs << "\n";
+  }
+  ofs << "}\n";
+}
+
+std::string OmegaQueryCachePath(const std::string& model_output_dir) {
+  return model_output_dir + "/training_queries.bin";
+}
+
+bool SaveOmegaTrainingQueryCache(
+    const std::string& model_output_dir,
+    const std::vector<std::vector<float>>& queries,
+    const std::vector<uint64_t>& query_doc_ids) {
+  if (queries.empty() || queries.size() != query_doc_ids.size()) {
+    return false;
+  }
+  const uint32_t dim = static_cast<uint32_t>(queries[0].size());
+  for (const auto& query : queries) {
+    if (query.size() != dim) {
+      return false;
+    }
+  }
+
+  std::ofstream ofs(OmegaQueryCachePath(model_output_dir), std::ios::binary);
+  if (!ofs.is_open()) {
+    return false;
+  }
+
+  const uint64_t num_queries = queries.size();
+  ofs.write(reinterpret_cast<const char*>(&kOmegaQueryCacheMagic), sizeof(kOmegaQueryCacheMagic));
+  ofs.write(reinterpret_cast<const char*>(&kOmegaQueryCacheVersion), sizeof(kOmegaQueryCacheVersion));
+  ofs.write(reinterpret_cast<const char*>(&num_queries), sizeof(num_queries));
+  ofs.write(reinterpret_cast<const char*>(&dim), sizeof(dim));
+  for (size_t i = 0; i < queries.size(); ++i) {
+    ofs.write(reinterpret_cast<const char*>(&query_doc_ids[i]), sizeof(query_doc_ids[i]));
+    ofs.write(reinterpret_cast<const char*>(queries[i].data()),
+              static_cast<std::streamsize>(dim * sizeof(float)));
+  }
+  return ofs.good();
+}
+
+bool LoadOmegaTrainingQueryCache(
+    const std::string& model_output_dir,
+    std::vector<std::vector<float>>* queries,
+    std::vector<uint64_t>* query_doc_ids) {
+  std::ifstream ifs(OmegaQueryCachePath(model_output_dir), std::ios::binary);
+  if (!ifs.is_open()) {
+    return false;
+  }
+
+  uint32_t magic = 0;
+  uint32_t version = 0;
+  uint64_t num_queries = 0;
+  uint32_t dim = 0;
+  ifs.read(reinterpret_cast<char*>(&magic), sizeof(magic));
+  ifs.read(reinterpret_cast<char*>(&version), sizeof(version));
+  ifs.read(reinterpret_cast<char*>(&num_queries), sizeof(num_queries));
+  ifs.read(reinterpret_cast<char*>(&dim), sizeof(dim));
+  if (!ifs.good() || magic != kOmegaQueryCacheMagic || version != kOmegaQueryCacheVersion ||
+      num_queries == 0 || dim == 0) {
+    return false;
+  }
+
+  queries->assign(num_queries, std::vector<float>(dim));
+  query_doc_ids->assign(num_queries, 0);
+  for (size_t i = 0; i < num_queries; ++i) {
+    ifs.read(reinterpret_cast<char*>(&(*query_doc_ids)[i]), sizeof(uint64_t));
+    ifs.read(reinterpret_cast<char*>((*queries)[i].data()),
+             static_cast<std::streamsize>(dim * sizeof(float)));
+    if (!ifs.good()) {
+      queries->clear();
+      query_doc_ids->clear();
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
 void global_init() {
   static std::once_flag once;
   // run once
@@ -204,6 +303,8 @@ class SegmentImpl : public Segment,
 
   Status flush() override;
 
+  Status retrain_omega_model() override;
+
   Status destroy() override;
 
   TablePtr fetch(const std::vector<std::string> &columns,
@@ -1693,6 +1794,19 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
 
     if (training_result.has_value()) {
       training_result_opt = std::move(training_result.value());
+      if (!FileHelper::DirectoryExists(model_output_dir)) {
+        FileHelper::CreateDirectory(model_output_dir);
+      }
+      if (!SaveOmegaTrainingQueryCache(
+              model_output_dir,
+              training_result_opt->training_queries,
+              training_result_opt->query_doc_ids)) {
+        LOG_WARN("Failed to persist OMEGA training query cache: %s",
+                 OmegaQueryCachePath(model_output_dir).c_str());
+      }
+      WriteTimingStatsJson(
+          model_output_dir + "/training_collection_timing.json",
+          TrainingDataCollector::ConsumeTimingStats());
       LOG_INFO("Collected %zu training records (before flush)", training_result_opt->records.size());
     } else {
       LOG_WARN("Failed to collect training data: %s", training_result.error().message().c_str());
@@ -2344,7 +2458,7 @@ Status SegmentImpl::cleanup() {
 Status SegmentImpl::auto_train_omega_index_internal(
     const std::string& field_name,
     const std::vector<VectorColumnIndexer::Ptr>& indexers) {
-  LOG_INFO("Starting auto-training for OMEGA index on field '%s' in segment %d",
+  LOG_WARN("Starting auto-training for OMEGA index on field '%s' in segment %d",
            field_name.c_str(), id());
 
   // Get training params from index params
@@ -2380,6 +2494,8 @@ Status SegmentImpl::auto_train_omega_index_internal(
            total_doc_count, min_vector_threshold);
 
   // Step 1: Collect training data using the provided indexers
+  LOG_WARN("OMEGA retrain step 1/2: start collecting training data for field '%s' in segment %d",
+           field_name.c_str(), id());
   TrainingDataCollectorOptions collector_options;
   collector_options.num_training_queries = num_training_queries;
   collector_options.ef_training = ef_training;
@@ -2387,9 +2503,24 @@ Status SegmentImpl::auto_train_omega_index_internal(
   collector_options.topk = 100;
   collector_options.noise_scale = 0.01f;
 
-  Result<TrainingDataCollectorResult> training_records_result =
-      TrainingDataCollector::CollectTrainingDataWithGtCmps(
-          shared_from_this(), field_name, collector_options, indexers);
+  std::vector<std::vector<float>> cached_queries;
+  std::vector<uint64_t> cached_query_doc_ids;
+  const std::string model_output_dir =
+      FileHelper::MakeSegmentPath(path_, id()) + "/omega_model";
+  Result<TrainingDataCollectorResult> training_records_result;
+  if (LoadOmegaTrainingQueryCache(model_output_dir, &cached_queries, &cached_query_doc_ids)) {
+    LOG_WARN("Loaded %zu cached held-out queries for OMEGA retraining from %s",
+             cached_queries.size(), OmegaQueryCachePath(model_output_dir).c_str());
+    training_records_result =
+        TrainingDataCollector::CollectTrainingDataWithGtCmpsFromQueries(
+            shared_from_this(), field_name, cached_queries, cached_query_doc_ids,
+            collector_options, indexers);
+  } else {
+    LOG_WARN("OMEGA retrain query cache not found, falling back to sampling held-out queries from persisted segment");
+    training_records_result =
+        TrainingDataCollector::CollectTrainingDataWithGtCmps(
+            shared_from_this(), field_name, collector_options, indexers);
+  }
 
   if (!training_records_result.has_value()) {
     return Status::InternalError(
@@ -2397,6 +2528,9 @@ Status SegmentImpl::auto_train_omega_index_internal(
         training_records_result.error().message());
   }
 
+  LOG_WARN("OMEGA retrain step 1/2: finished collecting training data for field '%s' in segment %d",
+           field_name.c_str(), id());
+
   auto& training_result = training_records_result.value();
   auto& training_records = training_result.records;
   LOG_INFO("Collected %zu training records for segment %d",
@@ -2436,8 +2570,10 @@ Status SegmentImpl::auto_train_omega_index_internal(
 
 #ifdef ZVEC_ENABLE_OMEGA
   // Step 2: Train OMEGA model with gt_cmps data
+  LOG_WARN("OMEGA retrain step 2/2: start model training for field '%s' in segment %d",
+           field_name.c_str(), id());
   OmegaModelTrainerOptions trainer_options;
-  trainer_options.output_dir = FileHelper::MakeSegmentPath(path_, id()) + "/omega_model";
+  trainer_options.output_dir = model_output_dir;
   trainer_options.verbose = true;
 
   // Create output directory if it doesn't exist
@@ -2449,6 +2585,10 @@ Status SegmentImpl::auto_train_omega_index_internal(
     }
   }
 
+  WriteTimingStatsJson(
+      trainer_options.output_dir + "/training_collection_timing.json",
+      TrainingDataCollector::ConsumeTimingStats());
+
   auto train_status = OmegaModelTrainer::TrainModelWithGtCmps(
       training_records, training_result.gt_cmps_data, trainer_options);
   if (!train_status.ok()) {
@@ -2456,7 +2596,7 @@ Status SegmentImpl::auto_train_omega_index_internal(
         "Failed to train OMEGA model: " + train_status.message());
   }
 
-  LOG_INFO("Successfully trained OMEGA model for segment %d, output: %s",
+  LOG_WARN("OMEGA retrain step 2/2: finished model training for segment %d, output: %s",
            id(), trainer_options.output_dir.c_str());
 #else
   LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
@@ -2469,6 +2609,33 @@ Status SegmentImpl::auto_train_omega_index_internal(
   return Status::OK();
 }
 
+Status SegmentImpl::retrain_omega_model() {
+  for (const auto& field : collection_schema_->vector_fields()) {
+    if (!field->index_params()) {
+      continue;
+    }
+    auto omega_params =
+        std::dynamic_pointer_cast<OmegaIndexParams>(field->index_params());
+    if (!omega_params) {
+      continue;
+    }
+
+    auto indexers = get_vector_indexer(field->name());
+    if (indexers.empty()) {
+      LOG_INFO("Skipping OMEGA retraining for field '%s' in segment %d: no vector indexers loaded",
+               field->name().c_str(), id());
+      continue;
+    }
+
+    LOG_WARN("Retraining OMEGA model for field '%s' in segment %d using existing index",
+             field->name().c_str(), id());
+    auto s = auto_train_omega_index_internal(field->name(), indexers);
+    CHECK_RETURN_STATUS(s);
+  }
+
+  return Status::OK();
+}
+
 bool SegmentImpl::validate(const std::vector<std::string> &columns) const {
   if (columns.empty()) {
     LOG_ERROR("Empty columns");
diff --git a/src/db/index/segment/segment.h b/src/db/index/segment/segment.h
index 2e6e9bbca..9f36772fb 100644
--- a/src/db/index/segment/segment.h
+++ b/src/db/index/segment/segment.h
@@ -175,9 +175,10 @@ class Segment {
   // for others
   virtual Status flush() = 0;
   virtual Status dump() = 0;
+  virtual Status retrain_omega_model() = 0;
 
   // only mark need_destroyed
   virtual Status destroy() = 0;
 };
 
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/src/db/training/omega_model_trainer.h b/src/db/training/omega_model_trainer.h
index 7b5276ef8..4560ebd9d 100644
--- a/src/db/training/omega_model_trainer.h
+++ b/src/db/training/omega_model_trainer.h
@@ -16,6 +16,8 @@
 
 #ifdef ZVEC_ENABLE_OMEGA
 
+#include <algorithm>
+#include <thread>
 #include <string>
 #include <vector>
 #include <zvec/core/interface/training.h>
@@ -23,6 +25,14 @@
 
 namespace zvec {
 
+inline int DefaultOmegaTrainerThreads() {
+  const unsigned int hc = std::thread::hardware_concurrency();
+  if (hc == 0) {
+    return 8;
+  }
+  return static_cast<int>(std::max(1u, hc / 2));
+}
+
 /**
  * @brief Configuration options for OMEGA model training
  */
@@ -34,7 +44,7 @@ struct OmegaModelTrainerOptions {
   int num_iterations = 100;
   int num_leaves = 31;
   double learning_rate = 0.1;
-  int num_threads = 8;
+  int num_threads = DefaultOmegaTrainerThreads();
 
   // Enable verbose logging during training
   bool verbose = false;
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 45bd1e7b6..0c41b1e84 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -22,6 +22,7 @@
 #include <future>
 #include <mutex>
 #include <atomic>
+#include <unordered_map>
 #include <zvec/ailego/logger/logger.h>
 #include <zvec/db/query_params.h>
 #include "db/index/column/vector_column/vector_column_params.h"
@@ -32,6 +33,29 @@ namespace zvec {
 
 // ============ DEBUG TIMING UTILITIES ============
 namespace {
+struct TimingStatsState {
+  std::mutex mu;
+  std::vector<std::pair<std::string, int64_t>> ordered_stats;
+  std::unordered_map<std::string, size_t> index_by_name;
+};
+
+TimingStatsState& GetTimingStatsState() {
+  static TimingStatsState state;
+  return state;
+}
+
+void RecordTimingStat(const std::string& name, int64_t duration_ms) {
+  auto& state = GetTimingStatsState();
+  std::lock_guard<std::mutex> lock(state.mu);
+  auto it = state.index_by_name.find(name);
+  if (it == state.index_by_name.end()) {
+    state.index_by_name[name] = state.ordered_stats.size();
+    state.ordered_stats.emplace_back(name, duration_ms);
+  } else {
+    state.ordered_stats[it->second].second = duration_ms;
+  }
+}
+
 static std::ofstream& GetDebugLog() {
   static std::ofstream log_file("/tmp/omega_training_debug.log", std::ios::app);
   return log_file;
@@ -59,6 +83,7 @@ class ScopedTimer {
   ~ScopedTimer() {
     auto end = std::chrono::high_resolution_clock::now();
     auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start_).count();
+    RecordTimingStat(name_, duration);
     DebugLog("[END]   " + name_ + " | Duration: " + std::to_string(duration) + " ms");
   }
  private:
@@ -67,6 +92,22 @@ class ScopedTimer {
 };
 }  // namespace
 
+void TrainingDataCollector::ResetTimingStats() {
+  auto& state = GetTimingStatsState();
+  std::lock_guard<std::mutex> lock(state.mu);
+  state.ordered_stats.clear();
+  state.index_by_name.clear();
+}
+
+TrainingDataCollector::TimingStats TrainingDataCollector::ConsumeTimingStats() {
+  auto& state = GetTimingStatsState();
+  std::lock_guard<std::mutex> lock(state.mu);
+  TimingStats timings = std::move(state.ordered_stats);
+  state.ordered_stats.clear();
+  state.index_by_name.clear();
+  return timings;
+}
+
 Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFromQueriesImpl(
     const Segment::Ptr& segment, const std::string& field_name,
     const std::vector<std::vector<float>>& training_queries,
@@ -97,11 +138,7 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
   if (ground_truth.empty()) {
     LOG_INFO("Computing ground truth (topk=%zu, ef_groundtruth=%d)",
              options.topk, options.ef_groundtruth);
-    std::string timer_name = options.ef_groundtruth > 0
-        ? "ComputeGroundTruth (HNSW ef=" +
-              std::to_string(options.ef_groundtruth) + ")"
-        : "ComputeGroundTruth (BRUTE FORCE)";
-    ScopedTimer timer(timer_name);
+    ScopedTimer timer("Step2: ComputeGroundTruth");
     ground_truth = TrainingDataCollector::ComputeGroundTruth(
         segment, field_name, training_queries, options.topk, options.num_threads,
         query_doc_ids, options.ef_groundtruth, metric_type, indexers);
@@ -117,12 +154,15 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
 
   LOG_INFO("Setting ground truth (%zu queries) and enabling training mode on %zu indexers",
            ground_truth.size(), indexers.size());
-  for (auto& indexer : indexers) {
-    indexer->SetTrainingGroundTruth(ground_truth, options.k_train);
-    auto status = indexer->EnableTrainingMode(true);
-    if (!status.ok()) {
-      LOG_WARN("Failed to enable training mode on indexer: %s",
-               status.message().c_str());
+  {
+    ScopedTimer timer("Step3: EnableTrainingMode");
+    for (auto& indexer : indexers) {
+      indexer->SetTrainingGroundTruth(ground_truth, options.k_train);
+      auto status = indexer->EnableTrainingMode(true);
+      if (!status.ok()) {
+        LOG_WARN("Failed to enable training mode on indexer: %s",
+                 status.message().c_str());
+      }
     }
   }
 
@@ -131,8 +171,7 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
   search_results.reserve(training_queries.size());
 
   {
-    ScopedTimer timer("External: TrainingSearches (HNSW with ef=" +
-                      std::to_string(options.ef_training) + ") PARALLEL");
+    ScopedTimer timer("Step4: TrainingSearches");
 
     size_t actual_threads = options.num_threads;
     if (actual_threads == 0) {
@@ -202,7 +241,7 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
           auto elapsed_ms =
               std::chrono::duration_cast<std::chrono::milliseconds>(now - search_start)
                   .count();
-          DebugLog("  External training search progress: " +
+          DebugLog("  Training search progress: " +
                    std::to_string(completed) + "/" +
                    std::to_string(training_queries.size()) + ", elapsed: " +
                    std::to_string(elapsed_ms) + " ms");
@@ -236,7 +275,7 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
   LOG_INFO("Collecting training records from indexers");
   std::vector<core_interface::TrainingRecord> all_records;
   {
-    ScopedTimer timer("External: CollectTrainingRecords");
+    ScopedTimer timer("Step5: CollectTrainingRecords");
     for (auto& indexer : indexers) {
       auto records = indexer->GetTrainingRecords();
       LOG_INFO("Collected %zu records from indexer", records.size());
@@ -263,7 +302,7 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
   LOG_INFO("Collecting gt_cmps data from indexers");
   core_interface::GtCmpsData gt_cmps_data;
   {
-    ScopedTimer timer("External: GetGtCmpsData");
+    ScopedTimer timer("Step6: GetGtCmpsData");
     if (!indexers.empty()) {
       gt_cmps_data = indexers[0]->GetGtCmpsData();
       if (gt_cmps_data.gt_cmps.empty()) {
@@ -277,14 +316,19 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
     }
   }
 
-  for (auto& indexer : indexers) {
-    indexer->EnableTrainingMode(false);
-    indexer->ClearTrainingRecords();
+  {
+    ScopedTimer timer("Step7: DisableTrainingMode");
+    for (auto& indexer : indexers) {
+      indexer->EnableTrainingMode(false);
+      indexer->ClearTrainingRecords();
+    }
   }
 
   TrainingDataCollectorResult result;
   result.records = std::move(all_records);
   result.gt_cmps_data = std::move(gt_cmps_data);
+  result.training_queries = training_queries;
+  result.query_doc_ids = query_doc_ids;
   return result;
 }
 // ============ END DEBUG TIMING UTILITIES ============
@@ -936,6 +980,7 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
     const std::string& field_name,
     const TrainingDataCollectorOptions& options,
     const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+  ResetTimingStats();
   ScopedTimer total_timer("CollectTrainingDataWithGtCmps [TOTAL]");
   LOG_INFO("Generating %zu held-out training queries for field '%s'",
            options.num_training_queries, field_name.c_str());
@@ -957,4 +1002,21 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
                                             query_doc_ids, provided_indexers);
 }
 
+Result<TrainingDataCollectorResult>
+TrainingDataCollector::CollectTrainingDataWithGtCmpsFromQueries(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    const std::vector<std::vector<float>>& training_queries,
+    const std::vector<uint64_t>& query_doc_ids,
+    const TrainingDataCollectorOptions& options,
+    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+  ResetTimingStats();
+  ScopedTimer total_timer("CollectTrainingDataWithGtCmps [TOTAL]");
+  LOG_INFO("Reusing %zu cached held-out training queries for field '%s'",
+           training_queries.size(), field_name.c_str());
+  return CollectTrainingDataFromQueriesImpl(segment, field_name,
+                                            training_queries, {}, options,
+                                            query_doc_ids, provided_indexers);
+}
+
 }  // namespace zvec
diff --git a/src/db/training/training_data_collector.h b/src/db/training/training_data_collector.h
index 2195efd04..b456a8d06 100644
--- a/src/db/training/training_data_collector.h
+++ b/src/db/training/training_data_collector.h
@@ -17,6 +17,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <string>
+#include <utility>
 #include <vector>
 #include <zvec/ailego/pattern/expected.hpp>
 #include <zvec/core/interface/training.h>
@@ -63,6 +64,8 @@ struct TrainingDataCollectorOptions {
 struct TrainingDataCollectorResult {
   std::vector<core_interface::TrainingRecord> records;
   core_interface::GtCmpsData gt_cmps_data;
+  std::vector<std::vector<float>> training_queries;
+  std::vector<uint64_t> query_doc_ids;
 };
 
 /**
@@ -76,6 +79,12 @@ struct TrainingDataCollectorResult {
  */
 class TrainingDataCollector {
  public:
+  using TimingStats = std::vector<std::pair<std::string, int64_t>>;
+
+  static void ResetTimingStats();
+
+  static TimingStats ConsumeTimingStats();
+
   /**
    * @brief Collect training data from a persisted segment
    *
@@ -109,6 +118,14 @@ class TrainingDataCollector {
       const TrainingDataCollectorOptions& options,
       const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
 
+  static Result<TrainingDataCollectorResult> CollectTrainingDataWithGtCmpsFromQueries(
+      const Segment::Ptr& segment,
+      const std::string& field_name,
+      const std::vector<std::vector<float>>& training_queries,
+      const std::vector<uint64_t>& query_doc_ids,
+      const TrainingDataCollectorOptions& options,
+      const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
+
  private:
   /**
    * @brief Compute ground truth using brute force or HNSW search
diff --git a/src/include/zvec/db/collection.h b/src/include/zvec/db/collection.h
index 293861c05..0489d5c91 100644
--- a/src/include/zvec/db/collection.h
+++ b/src/include/zvec/db/collection.h
@@ -72,7 +72,7 @@ class Collection {
   virtual Status DropIndex(const std::string &column_name) = 0;
 
   virtual Status Optimize(const OptimizeOptions &options = OptimizeOptions{
-                              0}) = 0;
+                              0, false}) = 0;
 
   virtual Status AddColumn(const std::string &column_name,
                            const FieldSchema::Ptr &column_schema,
diff --git a/src/include/zvec/db/options.h b/src/include/zvec/db/options.h
index 02d9d7fe6..8fc00ae78 100644
--- a/src/include/zvec/db/options.h
+++ b/src/include/zvec/db/options.h
@@ -58,6 +58,7 @@ struct CreateIndexOptions {
 
 struct OptimizeOptions {
   int concurrency_{0};
+  bool retrain_only_{false};
 };
 
 struct AddColumnOptions {
diff --git a/thirdparty/omega b/thirdparty/omega
index 3e9116e36..1e3b3e397 160000
--- a/thirdparty/omega
+++ b/thirdparty/omega
@@ -1 +1 @@
-Subproject commit 3e9116e360afb9f32402be60cf0e99bb98b7f3ba
+Subproject commit 1e3b3e39702493b3962294b59ecfcc2d12e9871b

From bb8c5a23a3db8c6b322ff075f4c79e97253771ca Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sat, 21 Mar 2026 16:30:45 +0800
Subject: [PATCH 016/126] chore(thirdparty): nest OMEGALib under
 thirdparty/omega

---
 .gitmodules                             | 4 ++--
 src/core/algorithm/omega/CMakeLists.txt | 2 +-
 src/core/interface/CMakeLists.txt       | 2 +-
 src/db/CMakeLists.txt                   | 4 ++--
 thirdparty/CMakeLists.txt               | 3 +--
 thirdparty/{omega => omega/OMEGALib}    | 0
 tools/core/CMakeLists.txt               | 2 +-
 7 files changed, 8 insertions(+), 9 deletions(-)
 rename thirdparty/{omega => omega/OMEGALib} (100%)

diff --git a/.gitmodules b/.gitmodules
index b1baf0770..cfb284bd1 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -39,6 +39,6 @@
 	path = thirdparty/magic_enum/magic_enum-0.9.7
 	url = https://github.com/Neargye/magic_enum.git
 	ignore = all
-[submodule "thirdparty/omega"]
-	path = thirdparty/omega
+[submodule "thirdparty/omega/OMEGALib"]
+	path = thirdparty/omega/OMEGALib
 	url = https://github.com/driPyf/OMEGALib.git
diff --git a/src/core/algorithm/omega/CMakeLists.txt b/src/core/algorithm/omega/CMakeLists.txt
index e70c668eb..cf7548228 100644
--- a/src/core/algorithm/omega/CMakeLists.txt
+++ b/src/core/algorithm/omega/CMakeLists.txt
@@ -6,6 +6,6 @@ cc_library(
     STATIC SHARED STRICT ALWAYS_LINK
     SRCS *.cc
     LIBS core_framework core_knn_hnsw omega
-    INCS . ${PROJECT_ROOT_DIR}/src/include ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm ${PROJECT_ROOT_DIR}/thirdparty/omega/include
+    INCS . ${PROJECT_ROOT_DIR}/src/include ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
     VERSION "${PROXIMA_ZVEC_VERSION}"
   )
diff --git a/src/core/interface/CMakeLists.txt b/src/core/interface/CMakeLists.txt
index 2c1cdab3f..6a0c79a23 100644
--- a/src/core/interface/CMakeLists.txt
+++ b/src/core/interface/CMakeLists.txt
@@ -4,7 +4,7 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
 cc_library(
         NAME core_interface STATIC STRICT ALWAYS_LINK
         SRCS *.cc indexes/*.cc
-        INCS . ${PROJECT_ROOT_DIR}/src/include ${PROJECT_ROOT_DIR}/src/ ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/thirdparty/omega/include
+        INCS . ${PROJECT_ROOT_DIR}/src/include ${PROJECT_ROOT_DIR}/src/ ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
         LIBS zvec_ailego core_framework sparsehash magic_enum
         VERSION "${PROXIMA_ZVEC_VERSION}"
 )
diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt
index 7d57a04ef..e7d7add59 100644
--- a/src/db/CMakeLists.txt
+++ b/src/db/CMakeLists.txt
@@ -16,7 +16,7 @@ file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h)
 cc_library(
   NAME zvec_db STATIC STRICT SRCS_NO_GLOB
   SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc
-  INCS . ${CMAKE_CURRENT_BINARY_DIR} ${PROJECT_ROOT_DIR}/thirdparty/omega/include
+  INCS . ${CMAKE_CURRENT_BINARY_DIR} ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
   LIBS 
     zvec_ailego
     zvec_core
@@ -31,4 +31,4 @@ cc_library(
     Arrow::arrow_acero
   DEPS zvec_proto
   VERSION "${PROXIMA_ZVEC_VERSION}"
-)
\ No newline at end of file
+)
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index 644eae9d1..4c8ff419b 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -28,8 +28,7 @@ add_subdirectory(magic_enum magic_enum EXCLUDE_FROM_ALL)
 # omega is only built when ZVEC_ENABLE_OMEGA is ON
 if(ZVEC_ENABLE_OMEGA)
   message(STATUS "ZVEC: Building omega library with LightGBM support")
-  add_subdirectory(omega omega EXCLUDE_FROM_ALL)
+  add_subdirectory(omega/OMEGALib omega EXCLUDE_FROM_ALL)
 else()
   message(STATUS "ZVEC: Skipping omega library (ZVEC_ENABLE_OMEGA=OFF)")
 endif()
-
diff --git a/thirdparty/omega b/thirdparty/omega/OMEGALib
similarity index 100%
rename from thirdparty/omega
rename to thirdparty/omega/OMEGALib
diff --git a/tools/core/CMakeLists.txt b/tools/core/CMakeLists.txt
index e4f70de87..df34fb454 100644
--- a/tools/core/CMakeLists.txt
+++ b/tools/core/CMakeLists.txt
@@ -37,7 +37,7 @@ cc_binary(
     NAME omega_predict_microbench
     STRICT PACKED
     SRCS omega_predict_microbench.cc
-    INCS ${PROJECT_ROOT_DIR}/src/core/ ${PROJECT_ROOT_DIR}/thirdparty/omega/include
+    INCS ${PROJECT_ROOT_DIR}/src/core/ ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
     LIBS omega
 )
 

From 55875cd3ba56f3802ccf719fbe7e5e6192a0c405 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sat, 21 Mar 2026 16:35:38 +0800
Subject: [PATCH 017/126] chore(thirdparty): add omega wrapper cmake layer

---
 thirdparty/CMakeLists.txt       | 2 +-
 thirdparty/omega/CMakeLists.txt | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 thirdparty/omega/CMakeLists.txt

diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index 4c8ff419b..f3422abda 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -28,7 +28,7 @@ add_subdirectory(magic_enum magic_enum EXCLUDE_FROM_ALL)
 # omega is only built when ZVEC_ENABLE_OMEGA is ON
 if(ZVEC_ENABLE_OMEGA)
   message(STATUS "ZVEC: Building omega library with LightGBM support")
-  add_subdirectory(omega/OMEGALib omega EXCLUDE_FROM_ALL)
+  add_subdirectory(omega omega EXCLUDE_FROM_ALL)
 else()
   message(STATUS "ZVEC: Skipping omega library (ZVEC_ENABLE_OMEGA=OFF)")
 endif()
diff --git a/thirdparty/omega/CMakeLists.txt b/thirdparty/omega/CMakeLists.txt
new file mode 100644
index 000000000..1ffc9fcc8
--- /dev/null
+++ b/thirdparty/omega/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(OMEGALib)

From 5a195f0be8c3ea4a06301bddfd3094af0fd4b394 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 22 Mar 2026 14:27:22 +0800
Subject: [PATCH 018/126] feat(scripts): align benchmark presets and export
 profiling summaries

---
 scripts/benchmark_cohere_10m.py | 814 +++++++++++++++++++++++++++++++
 scripts/benchmark_cohere_1m.py  | 838 ++++++++++++++++++++++++++++++++
 2 files changed, 1652 insertions(+)
 create mode 100644 scripts/benchmark_cohere_10m.py
 create mode 100755 scripts/benchmark_cohere_1m.py

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
new file mode 100644
index 000000000..5c5814298
--- /dev/null
+++ b/scripts/benchmark_cohere_10m.py
@@ -0,0 +1,814 @@
+#!/usr/bin/env python3
+"""
+VectorDBBench: Zvec vs Zvec+OMEGA Comparison on Cohere-10M
+
+Based on official zvec.org Cohere-10M benchmark parameters.
+
+Usage:
+    python benchmark_cohere_10m.py [--dry-run] [--target-recalls 0.90,0.95]
+"""
+
+import argparse
+import json
+import subprocess
+import sys
+import os
+import importlib
+import re
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+
+
+@dataclass
+class BenchmarkResult:
+    type: str
+    ef_search: int
+    target_recall: float | None
+    path: str
+    success: bool
+    load_duration: float | None = None
+    qps: float | None = None
+    recall: float | None = None
+    profiling: dict | None = None
+
+
+def resolve_paths(
+    zvec_root_arg: str | None,
+    vectordbbench_root_arg: str | None,
+    benchmark_dir_arg: str | None,
+    results_dir_arg: str | None,
+) -> tuple[Path, Path, Path, Path]:
+    script_path = Path(__file__).resolve()
+    zvec_root = Path(zvec_root_arg).resolve() if zvec_root_arg else script_path.parent.parent
+    vectordbbench_root = (
+        Path(vectordbbench_root_arg).resolve()
+        if vectordbbench_root_arg
+        else Path(os.environ.get("VECTORDBBENCH_ROOT", zvec_root.parent / "VectorDBBench")).resolve()
+    )
+    benchmark_dir = (
+        Path(benchmark_dir_arg).resolve()
+        if benchmark_dir_arg
+        else Path(os.environ.get("ZVEC_BENCHMARK_DIR", zvec_root / "benchmark_results")).resolve()
+    )
+    if results_dir_arg:
+        results_dir = Path(results_dir_arg).resolve()
+    else:
+        results_dir = None
+        try:
+            config = importlib.import_module("vectordb_bench").config
+            results_dir = Path(config.RESULTS_LOCAL_DIR).resolve() / "Zvec"
+        except Exception:
+            results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
+    return zvec_root, vectordbbench_root, benchmark_dir, results_dir
+
+
+def resolve_vectordbbench_command() -> list[str]:
+    return [sys.executable, "-m", "vectordb_bench"]
+
+
+KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")
+
+
+def parse_scalar(value: str):
+    lower = value.lower()
+    if lower in {"true", "false"}:
+        return lower == "true"
+    try:
+        if any(ch in value for ch in [".", "e", "E"]):
+            return float(value)
+        return int(value)
+    except ValueError:
+        return value
+
+
+def parse_key_values(line: str) -> dict:
+    return {key: parse_scalar(value) for key, value in KV_PATTERN.findall(line)}
+
+
+def avg_metric(records: list[dict], key: str) -> float | None:
+    values = [float(record[key]) for record in records if key in record]
+    if not values:
+        return None
+    return sum(values) / len(values)
+
+
+def parse_serial_runner_summary(output: str) -> dict:
+    summary = {}
+    for line in output.splitlines():
+        if "search entire test_data:" not in line:
+            continue
+        summary = parse_key_values(line)
+    return summary
+
+
+def parse_query_records(output: str, prefix: str) -> list[dict]:
+    records = []
+    for line in output.splitlines():
+        if prefix not in line:
+            continue
+        records.append(parse_key_values(line))
+    return records
+
+
+def build_hnsw_profile(metrics: dict, output: str) -> dict:
+    query_records = parse_query_records(output, "HNSW query stats:")
+    serial_summary = parse_serial_runner_summary(output)
+    return {
+        "query_count": len(query_records),
+        "recall": metrics.get("recall"),
+        "qps": metrics.get("qps"),
+        "avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
+        "avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
+        "avg_scan_cmps": avg_metric(query_records, "cmps"),
+        "serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "serial_p99_s": serial_summary.get("p99"),
+        "serial_p95_s": serial_summary.get("p95"),
+        "serial_avg_recall": serial_summary.get("avg_recall"),
+    }
+
+
+def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -> dict:
+    query_records = parse_query_records(output, "OMEGA query stats:")
+    serial_summary = parse_serial_runner_summary(output)
+
+    avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
+    avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
+    avg_omega_control_ms = avg_metric(query_records, "omega_control_ms")
+
+    cmp_time_ms = None
+    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_pure_search_ms is not None:
+        cmp_time_ms = avg_pure_search_ms / avg_pairwise_dist_cnt
+
+    model_overhead_cmp_equiv = None
+    if cmp_time_ms and cmp_time_ms > 0 and avg_omega_control_ms is not None:
+        model_overhead_cmp_equiv = avg_omega_control_ms / cmp_time_ms
+
+    avg_saved_cmps = None
+    if hnsw_profile and hnsw_profile.get("avg_cmps") is not None and avg_pairwise_dist_cnt is not None:
+        avg_saved_cmps = hnsw_profile["avg_cmps"] - avg_pairwise_dist_cnt
+
+    return {
+        "query_count": len(query_records),
+        "recall": metrics.get("recall"),
+        "qps": metrics.get("qps"),
+        "avg_end2end_latency_ms": avg_metric(query_records, "total_ms"),
+        "avg_cmps": avg_pairwise_dist_cnt,
+        "avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
+        "avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
+        "avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
+        "avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
+        "avg_advance_calls": avg_metric(query_records, "advance_calls"),
+        "avg_model_overhead_ms": avg_omega_control_ms,
+        "avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
+        "avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
+        "avg_feature_prep_ms": avg_metric(query_records, "feature_prep_ms"),
+        "avg_pure_search_ms": avg_pure_search_ms,
+        "avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
+        "avg_early_stop_saved_cmps": avg_saved_cmps,
+        "avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
+        "serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "serial_p99_s": serial_summary.get("p99"),
+        "serial_p95_s": serial_summary.get("p95"),
+        "serial_avg_recall": serial_summary.get("avg_recall"),
+    }
+
+
+def profiling_output_path(benchmark_dir: Path) -> Path:
+    return benchmark_dir / "cohere_10m_profiling_summary.json"
+
+
+def write_profiling_summary(benchmark_dir: Path, payload: dict) -> None:
+    with open(profiling_output_path(benchmark_dir), "w") as f:
+        json.dump(payload, f, indent=2, sort_keys=True)
+
+
+def get_latest_result(db_label: str, results_dir: Path) -> dict:
+    if not results_dir.exists():
+        return {}
+
+    result_files = sorted(
+        results_dir.glob("result_*.json"),
+        key=lambda f: f.stat().st_mtime,
+        reverse=True,
+    )
+
+    for result_file in result_files:
+        try:
+            with open(result_file) as f:
+                data = json.load(f)
+            for result in data.get("results", []):
+                task_config = result.get("task_config", {})
+                db_config = task_config.get("db_config", {})
+                if db_config.get("db_label") == db_label:
+                    metrics = result.get("metrics", {})
+                    return {
+                        "insert_duration": metrics.get("insert_duration"),
+                        "optimize_duration": metrics.get("optimize_duration"),
+                        "load_duration": metrics.get("load_duration"),
+                        "qps": metrics.get("qps"),
+                        "recall": metrics.get("recall"),
+                    }
+        except Exception:
+            continue
+
+    return {}
+
+
+def snapshot_result_files(results_dir: Path) -> set[str]:
+    if not results_dir.exists():
+        return set()
+    return {str(p) for p in results_dir.glob("result_*.json")}
+
+
+def extract_result_from_file(result_file: Path, db_label: str) -> dict:
+    try:
+        with open(result_file) as f:
+            data = json.load(f)
+        for result in data.get("results", []):
+            task_config = result.get("task_config", {})
+            db_config = task_config.get("db_config", {})
+            if db_config.get("db_label") == db_label:
+                metrics = result.get("metrics", {})
+                return {
+                    "insert_duration": metrics.get("insert_duration"),
+                    "optimize_duration": metrics.get("optimize_duration"),
+                    "load_duration": metrics.get("load_duration"),
+                    "qps": metrics.get("qps"),
+                    "recall": metrics.get("recall"),
+                }
+    except Exception:
+        return {}
+    return {}
+
+
+def get_run_result(db_label: str, before_files: set[str], results_dir: Path) -> dict:
+    if not results_dir.exists():
+        return {}
+
+    current_files = {str(p) for p in results_dir.glob("result_*.json")}
+    new_files = sorted(
+        [Path(p) for p in current_files - before_files],
+        key=lambda p: p.stat().st_mtime,
+        reverse=True,
+    )
+
+    for result_file in new_files:
+        metrics = extract_result_from_file(result_file, db_label)
+        if metrics:
+            return metrics
+
+    return get_latest_result(db_label, results_dir)
+
+
+def offline_summary_path(index_path: Path) -> Path:
+    return index_path / "offline_benchmark_summary.json"
+
+
+def read_json_if_exists(path: Path) -> dict:
+    if not path.exists():
+        return {}
+    try:
+        with open(path) as f:
+            return json.load(f)
+    except Exception:
+        return {}
+
+
+def find_omega_model_dir(index_path: Path) -> Path | None:
+    candidates = sorted(index_path.glob("*/omega_model"))
+    return candidates[0] if candidates else None
+
+
+def sum_timing_ms(data: dict) -> int:
+    return sum(v for v in data.values() if isinstance(v, (int, float)))
+
+
+def build_offline_summary(
+    index_path: Path,
+    db_label: str,
+    metrics: dict,
+    retrain_only: bool = False,
+) -> dict:
+    previous_summary = read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
+    previous_offline = previous_summary.get("offline", {})
+    previous_omega_training = previous_summary.get("omega_training", {})
+
+    insert_duration = metrics.get("insert_duration")
+    optimize_duration = metrics.get("optimize_duration")
+    load_duration = metrics.get("load_duration")
+
+    omega_model_dir = find_omega_model_dir(index_path)
+    omega_training = {}
+    if omega_model_dir is not None:
+        omega_training = {
+            "collection_timing_ms": read_json_if_exists(
+                omega_model_dir / "training_collection_timing.json"
+            ),
+            "lightgbm_timing_ms": read_json_if_exists(
+                omega_model_dir / "lightgbm_training_timing.json"
+            ),
+        }
+
+    if retrain_only:
+        insert_duration = previous_offline.get("insert_duration_s")
+        old_optimize_duration = previous_offline.get("optimize_duration_s")
+        old_training_s = (
+            sum_timing_ms(previous_omega_training.get("collection_timing_ms", {}))
+            + sum_timing_ms(previous_omega_training.get("lightgbm_timing_ms", {}))
+        ) / 1000.0
+        new_training_s = (
+            sum_timing_ms(omega_training.get("collection_timing_ms", {}))
+            + sum_timing_ms(omega_training.get("lightgbm_timing_ms", {}))
+        ) / 1000.0
+        if old_optimize_duration is not None:
+            optimize_duration = round(old_optimize_duration - old_training_s + new_training_s, 4)
+        else:
+            optimize_duration = metrics.get("optimize_duration")
+        load_duration = (
+            round(insert_duration + optimize_duration, 4)
+            if insert_duration is not None and optimize_duration is not None
+            else metrics.get("load_duration")
+        )
+
+    summary = {
+        "db_label": db_label,
+        "index_path": str(index_path),
+        "generated_at": datetime.now().isoformat(),
+        "offline": {
+            "insert_duration_s": insert_duration,
+            "optimize_duration_s": optimize_duration,
+            "load_duration_s": load_duration,
+        },
+    }
+
+    if omega_training:
+        summary["omega_training"] = omega_training
+
+    return summary
+
+
+def write_offline_summary(
+    index_path: Path,
+    db_label: str,
+    metrics: dict,
+    retrain_only: bool = False,
+) -> None:
+    summary = build_offline_summary(index_path, db_label, metrics, retrain_only=retrain_only)
+    with open(offline_summary_path(index_path), "w") as f:
+        json.dump(summary, f, indent=2, sort_keys=True)
+
+
+def get_offline_load_duration(index_path: Path) -> float | None:
+    summary = read_json_if_exists(offline_summary_path(index_path))
+    return summary.get("offline", {}).get("load_duration_s")
+
+
+def run_command(
+    cmd: list[str],
+    vectordbbench_root: Path,
+    dry_run: bool = False,
+    extra_env: dict[str, str] | None = None,
+) -> tuple[int, str]:
+    cmd_str = " \\\n    ".join(cmd)
+    print(f"\n{'=' * 60}")
+    print(f"Command:\n{cmd_str}")
+    print(f"{'=' * 60}\n")
+
+    if dry_run:
+        print("[DRY RUN] Command not executed")
+        return 0, ""
+
+    cwd = vectordbbench_root if vectordbbench_root.exists() else None
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+
+    process = subprocess.Popen(
+        cmd,
+        cwd=cwd,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        bufsize=1,
+    )
+    lines: list[str] = []
+    assert process.stdout is not None
+    for line in process.stdout:
+        print(line, end="")
+        lines.append(line)
+    return process.wait(), "".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Benchmark Zvec HNSW vs OMEGA on Cohere-10M dataset"
+    )
+    parser.add_argument("--dry-run", action="store_true", help="Print commands without executing")
+    parser.add_argument(
+        "--target-recalls",
+        type=str,
+        default="0.95",
+        help="Comma-separated target recalls for OMEGA (default: 0.95)",
+    )
+    parser.add_argument("--skip-hnsw", action="store_true", help="Skip HNSW benchmark")
+    parser.add_argument("--skip-omega", action="store_true", help="Skip OMEGA benchmark")
+    parser.add_argument("--build-only", action="store_true", help="Only build index, skip search")
+    parser.add_argument("--search-only", action="store_true", help="Only run search on existing index")
+    parser.add_argument(
+        "--retrain-only",
+        action="store_true",
+        help="Reuse existing OMEGA index and only retrain the model during the build phase",
+    )
+    parser.add_argument(
+        "--zvec-root",
+        type=str,
+        default=None,
+        help="Path to the zvec repository root (default: auto-detect from this script)",
+    )
+    parser.add_argument(
+        "--vectordbbench-root",
+        type=str,
+        default=None,
+        help="Path to the VectorDBBench repository root "
+             "(default: $VECTORDBBENCH_ROOT or sibling repo next to zvec)",
+    )
+    parser.add_argument(
+        "--benchmark-dir",
+        type=str,
+        default=None,
+        help="Directory used to store built benchmark artifacts "
+             "(default: $ZVEC_BENCHMARK_DIR or <zvec_root>/benchmark_results)",
+    )
+    parser.add_argument(
+        "--results-dir",
+        type=str,
+        default=None,
+        help="Directory containing VectorDBBench JSON result files "
+             "(default: runtime vectordb_bench.config.RESULTS_LOCAL_DIR/Zvec)",
+    )
+    args = parser.parse_args()
+
+    zvec_root, vectordbbench_root, benchmark_dir, results_dir = resolve_paths(
+        args.zvec_root, args.vectordbbench_root, args.benchmark_dir, args.results_dir
+    )
+    vectordbbench_cmd = resolve_vectordbbench_command()
+    benchmark_dir.mkdir(parents=True, exist_ok=True)
+
+    CASE_TYPE = "Performance768D10M"
+    M = 50
+    EF_SEARCH = 118
+    QUANTIZE_TYPE = "int8"
+    USE_REFINER = True
+    NUM_CONCURRENCY = "12,14,16,18,20"
+    CONCURRENCY_DURATION = 30
+    K = 100
+
+    MIN_VECTOR_THRESHOLD = 100000
+    NUM_TRAINING_QUERIES = 4000
+    EF_TRAINING = 300
+    WINDOW_SIZE = 100
+    EF_GROUNDTRUTH = 500
+
+    target_recalls = [float(x) for x in args.target_recalls.split(",")]
+
+    hnsw_path = benchmark_dir / "cohere_10m_hnsw"
+    omega_path = benchmark_dir / "cohere_10m_omega"
+
+    print("=" * 70)
+    print("VectorDBBench: Zvec HNSW vs OMEGA (Cohere-10M)")
+    print("Based on official zvec.org benchmark parameters")
+    print("=" * 70)
+    print()
+    print("Official HNSW Parameters:")
+    print(f"  M: {M}")
+    print(f"  ef_search: {EF_SEARCH}")
+    print(f"  quantize_type: {QUANTIZE_TYPE}")
+    print(f"  is_using_refiner: {USE_REFINER}")
+    print(f"  num_concurrency: {NUM_CONCURRENCY}")
+    print()
+    print("OMEGA Parameters:")
+    print(f"  min_vector_threshold: {MIN_VECTOR_THRESHOLD}")
+    print(f"  num_training_queries: {NUM_TRAINING_QUERIES}")
+    print(f"  ef_training: {EF_TRAINING}")
+    print(f"  window_size: {WINDOW_SIZE}")
+    print(f"  ef_groundtruth: {EF_GROUNDTRUTH}")
+    print(f"  target_recalls: {target_recalls}")
+    print(f"  build_mode: {'retrain model only (reuse existing index)' if args.retrain_only else 'build index + train model'}")
+    print(f"zvec_root: {zvec_root}")
+    print(f"vectordbbench_root: {vectordbbench_root}")
+    print(f"vectordbbench_cmd: {' '.join(vectordbbench_cmd)}")
+    print(f"benchmark_dir: {benchmark_dir}")
+    print(f"results_dir: {results_dir}")
+    print("=" * 70)
+
+    results: list[BenchmarkResult] = []
+
+    if not args.skip_hnsw:
+        print(f"\n\n{'#' * 70}")
+        print("# HNSW Benchmark")
+        print(f"{'#' * 70}")
+
+        hnsw_db_label = "16c64g-v0.1"
+
+        common_hnsw_args = [
+            *vectordbbench_cmd,
+            "zvec",
+            "--path",
+            str(hnsw_path),
+            "--db-label",
+            hnsw_db_label,
+            "--case-type",
+            CASE_TYPE,
+            "--num-concurrency",
+            NUM_CONCURRENCY,
+            "--quantize-type",
+            QUANTIZE_TYPE,
+            "--m",
+            str(M),
+            "--ef-search",
+            str(EF_SEARCH),
+            "--k",
+            str(K),
+            "--concurrency-duration",
+            str(CONCURRENCY_DURATION),
+        ]
+        if USE_REFINER:
+            common_hnsw_args.append("--is-using-refiner")
+
+        if not args.search_only:
+            print("\n[Phase 1] Building HNSW index...")
+            before_files = snapshot_result_files(results_dir)
+            cmd = common_hnsw_args + [
+                "--skip-search-serial",
+                "--skip-search-concurrent",
+            ]
+            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            if ret != 0 and not args.dry_run:
+                print("ERROR: HNSW build failed!")
+                return 1
+            if not args.dry_run:
+                write_offline_summary(
+                    hnsw_path,
+                    hnsw_db_label,
+                    get_run_result(hnsw_db_label, before_files, results_dir),
+                )
+
+        if not args.build_only:
+            print("\n[Phase 2] Running HNSW search benchmark...")
+            before_files = snapshot_result_files(results_dir)
+            cmd = common_hnsw_args + [
+                "--skip-drop-old",
+                "--skip-load",
+            ]
+            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            metrics = get_run_result(hnsw_db_label, before_files, results_dir) if not args.dry_run else {}
+            load_duration = get_offline_load_duration(hnsw_path)
+            hnsw_profile = None
+            if ret == 0 and not args.dry_run:
+                print("\n[Profiling] Running HNSW serial-only profiling pass...")
+                profile_cmd = common_hnsw_args + [
+                    "--skip-drop-old",
+                    "--skip-load",
+                    "--skip-search-concurrent",
+                ]
+                _, profile_output = run_command(
+                    profile_cmd,
+                    vectordbbench_root,
+                    dry_run=False,
+                    extra_env={
+                        "ZVEC_HNSW_LOG_QUERY_STATS": "1",
+                        "ZVEC_HNSW_LOG_QUERY_LIMIT": "2000",
+                    },
+                )
+                hnsw_profile = build_hnsw_profile(metrics, profile_output)
+            results.append(
+                BenchmarkResult(
+                    type="HNSW",
+                    ef_search=EF_SEARCH,
+                    target_recall=None,
+                    path=str(hnsw_path),
+                    success=ret == 0,
+                    load_duration=load_duration if load_duration is not None else metrics.get("load_duration"),
+                    qps=metrics.get("qps"),
+                    recall=metrics.get("recall"),
+                    profiling=hnsw_profile,
+                )
+            )
+
+    if not args.skip_omega:
+        omega_db_label = f"omega-m{M}-ef{EF_SEARCH}-refiner-int8"
+        build_target_recall = target_recalls[0]
+
+        common_omega_args = [
+            *vectordbbench_cmd,
+            "zvecomega",
+            "--path",
+            str(omega_path),
+            "--db-label",
+            omega_db_label,
+            "--case-type",
+            CASE_TYPE,
+            "--num-concurrency",
+            NUM_CONCURRENCY,
+            "--quantize-type",
+            QUANTIZE_TYPE,
+            "--m",
+            str(M),
+            "--ef-search",
+            str(EF_SEARCH),
+            "--k",
+            str(K),
+            "--concurrency-duration",
+            str(CONCURRENCY_DURATION),
+            "--min-vector-threshold",
+            str(MIN_VECTOR_THRESHOLD),
+            "--num-training-queries",
+            str(NUM_TRAINING_QUERIES),
+            "--ef-training",
+            str(EF_TRAINING),
+            "--window-size",
+            str(WINDOW_SIZE),
+            "--ef-groundtruth",
+            str(EF_GROUNDTRUTH),
+        ]
+        if USE_REFINER:
+            common_omega_args.append("--is-using-refiner")
+
+        if not args.search_only:
+            print(f"\n\n{'#' * 70}")
+            print("# OMEGA Offline Phase")
+            print(f"{'#' * 70}")
+            if args.retrain_only:
+                print("\n[Phase 1] Retraining OMEGA model only (reusing existing index)...")
+                print(
+                    f"Reusing existing OMEGA path/db_label: "
+                    f"path={omega_path}, db_label={omega_db_label}"
+                )
+            else:
+                print("\n[Phase 1] Building OMEGA index + training model...")
+                print(
+                    f"Using shared OMEGA path/db_label for all target recalls: "
+                    f"path={omega_path}, db_label={omega_db_label}"
+                )
+            print(
+                "Build-time target_recall is ignored by training; "
+                f"using first requested value for CLI compatibility: {build_target_recall}"
+            )
+            before_files = snapshot_result_files(results_dir)
+            cmd = common_omega_args + [
+                "--target-recall",
+                str(build_target_recall),
+                "--skip-search-serial",
+                "--skip-search-concurrent",
+            ]
+            if args.retrain_only:
+                cmd += [
+                    "--skip-drop-old",
+                    "--skip-load",
+                    "--retrain-only",
+                ]
+            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            if ret != 0 and not args.dry_run:
+                print("ERROR: OMEGA build failed!")
+                return 1
+            if not args.dry_run:
+                write_offline_summary(
+                    omega_path,
+                    omega_db_label,
+                    get_run_result(omega_db_label, before_files, results_dir),
+                    retrain_only=args.retrain_only,
+                )
+
+        if not args.build_only:
+            for target_recall in target_recalls:
+                print(f"\n\n{'#' * 70}")
+                print(f"# OMEGA Benchmark (target_recall={target_recall})")
+                print(f"{'#' * 70}")
+                print("\n[Phase 2] Running OMEGA search benchmark...")
+                if args.retrain_only:
+                    print("Search is using the newly retrained model on the existing index.")
+                before_files = snapshot_result_files(results_dir)
+                cmd = common_omega_args + [
+                    "--target-recall",
+                    str(target_recall),
+                    "--skip-drop-old",
+                    "--skip-load",
+                ]
+                if args.retrain_only:
+                    cmd.append("--retrain-only")
+                ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+                metrics = get_run_result(omega_db_label, before_files, results_dir) if not args.dry_run else {}
+                load_duration = get_offline_load_duration(omega_path)
+                omega_profile = None
+                if ret == 0 and not args.dry_run:
+                    print("\n[Profiling] Running OMEGA serial-only profiling pass...")
+                    profile_cmd = common_omega_args + [
+                        "--target-recall",
+                        str(target_recall),
+                        "--skip-drop-old",
+                        "--skip-load",
+                        "--skip-search-concurrent",
+                    ]
+                    if args.retrain_only:
+                        profile_cmd.append("--retrain-only")
+                    _, profile_output = run_command(
+                        profile_cmd,
+                        vectordbbench_root,
+                        dry_run=False,
+                        extra_env={
+                            "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
+                            "ZVEC_OMEGA_LOG_QUERY_LIMIT": "2000",
+                        },
+                    )
+                    baseline_profile = next(
+                        (result.profiling for result in results if result.type == "HNSW" and result.profiling),
+                        None,
+                    )
+                    omega_profile = build_omega_profile(metrics, profile_output, baseline_profile)
+                results.append(
+                    BenchmarkResult(
+                        type="OMEGA",
+                        ef_search=EF_SEARCH,
+                        target_recall=target_recall,
+                        path=str(omega_path),
+                        success=ret == 0,
+                        load_duration=load_duration if load_duration is not None else metrics.get("load_duration"),
+                        qps=metrics.get("qps"),
+                        recall=metrics.get("recall"),
+                        profiling=omega_profile,
+                    )
+                )
+
+    if results:
+        write_profiling_summary(
+            benchmark_dir,
+            {
+                "generated_at": datetime.now().isoformat(),
+                "dataset": "cohere_10m",
+                "results": [
+                    {
+                        "type": result.type,
+                        "target_recall": result.target_recall,
+                        "path": result.path,
+                        "load_duration_s": result.load_duration,
+                        "qps": result.qps,
+                        "recall": result.recall,
+                        "profiling": result.profiling,
+                    }
+                    for result in results
+                ],
+            },
+        )
+        print("\n\n" + "=" * 70)
+        print("Benchmark Summary")
+        print("=" * 70)
+        print()
+        print(f"{'Type':<10} {'target_recall':<15} {'load_dur(s)':<12} {'qps':<12} {'recall':<10} {'Status':<10}")
+        print("-" * 75)
+        for r in results:
+            tr = f"{r.target_recall:.2f}" if r.target_recall else "N/A"
+            status = "OK" if r.success else "FAILED"
+            ld = f"{r.load_duration:.1f}" if r.load_duration else "N/A"
+            qps = f"{r.qps:.1f}" if r.qps else "N/A"
+            recall = f"{r.recall:.4f}" if r.recall else "N/A"
+            print(f"{r.type:<10} {tr:<15} {ld:<12} {qps:<12} {recall:<10} {status:<10}")
+
+        print()
+        print("Profiling Summary")
+        print("-" * 75)
+        print(f"{'Type':<10} {'target_recall':<15} {'avg_lat(ms)':<12} {'avg_cmps':<12} {'avg_pred_calls':<16} {'avg_model_ms':<14} {'saved_cmps':<12}")
+        for r in results:
+            profile = r.profiling or {}
+            tr = f"{r.target_recall:.2f}" if r.target_recall else "N/A"
+            avg_lat = profile.get("avg_end2end_latency_ms")
+            avg_cmps = profile.get("avg_cmps")
+            avg_pred_calls = profile.get("avg_prediction_calls")
+            avg_model_ms = profile.get("avg_model_overhead_ms")
+            saved_cmps = profile.get("avg_early_stop_saved_cmps")
+            print(
+                f"{r.type:<10} "
+                f"{tr:<15} "
+                f"{(f'{avg_lat:.3f}' if avg_lat is not None else 'N/A'):<12} "
+                f"{(f'{avg_cmps:.1f}' if avg_cmps is not None else 'N/A'):<12} "
+                f"{(f'{avg_pred_calls:.2f}' if avg_pred_calls is not None else 'N/A'):<16} "
+                f"{(f'{avg_model_ms:.3f}' if avg_model_ms is not None else 'N/A'):<14} "
+                f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
+            )
+        print()
+        print(f"Profiling JSON: {profiling_output_path(benchmark_dir)}")
+
+    print()
+    print("To view results:")
+    print("  vectordbbench results")
+    print()
+    print("Or start the web UI:")
+    print("  vectordbbench start")
+    print()
+
+    return 0 if all(r.success for r in results) else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
new file mode 100755
index 000000000..05360aa7e
--- /dev/null
+++ b/scripts/benchmark_cohere_1m.py
@@ -0,0 +1,838 @@
+#!/usr/bin/env python3
+"""
+VectorDBBench: Zvec vs Zvec+OMEGA Comparison on Cohere-1M
+
+Based on official zvec.org benchmark parameters.
+
+Usage:
+    python benchmark_cohere_1m.py [--dry-run] [--target-recalls 0.90,0.95,0.98]
+"""
+
+import argparse
+import json
+import subprocess
+import sys
+import os
+import importlib
+import re
+from datetime import datetime
+from pathlib import Path
+from dataclasses import dataclass
+
+
+@dataclass
+class BenchmarkResult:
+    """Parsed benchmark result from VectorDBBench output."""
+    type: str
+    ef_search: int
+    target_recall: float | None
+    path: str
+    success: bool
+    load_duration: float | None = None
+    qps: float | None = None
+    recall: float | None = None
+    profiling: dict | None = None
+
+
+def resolve_paths(
+    zvec_root_arg: str | None,
+    vectordbbench_root_arg: str | None,
+    benchmark_dir_arg: str | None,
+    results_dir_arg: str | None,
+) -> tuple[Path, Path, Path, Path]:
+    script_path = Path(__file__).resolve()
+    zvec_root = Path(zvec_root_arg).resolve() if zvec_root_arg else script_path.parent.parent
+    vectordbbench_root = (
+        Path(vectordbbench_root_arg).resolve()
+        if vectordbbench_root_arg
+        else Path(os.environ.get("VECTORDBBENCH_ROOT", zvec_root.parent / "VectorDBBench")).resolve()
+    )
+    benchmark_dir = (
+        Path(benchmark_dir_arg).resolve()
+        if benchmark_dir_arg
+        else Path(os.environ.get("ZVEC_BENCHMARK_DIR", zvec_root / "benchmark_results")).resolve()
+    )
+
+    if results_dir_arg:
+        results_dir = Path(results_dir_arg).resolve()
+    else:
+        results_dir = None
+        try:
+            config = importlib.import_module("vectordb_bench").config
+            results_dir = Path(config.RESULTS_LOCAL_DIR).resolve() / "Zvec"
+        except Exception:
+            results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
+    return zvec_root, vectordbbench_root, benchmark_dir, results_dir
+
+
+def resolve_vectordbbench_command() -> list[str]:
+    return [sys.executable, "-m", "vectordb_bench"]
+
+
+KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")
+
+
+def parse_scalar(value: str):
+    lower = value.lower()
+    if lower in {"true", "false"}:
+        return lower == "true"
+    try:
+        if any(ch in value for ch in [".", "e", "E"]):
+            return float(value)
+        return int(value)
+    except ValueError:
+        return value
+
+
+def parse_key_values(line: str) -> dict:
+    return {key: parse_scalar(value) for key, value in KV_PATTERN.findall(line)}
+
+
+def avg_metric(records: list[dict], key: str) -> float | None:
+    values = [float(record[key]) for record in records if key in record]
+    if not values:
+        return None
+    return sum(values) / len(values)
+
+
+def parse_serial_runner_summary(output: str) -> dict:
+    summary = {}
+    for line in output.splitlines():
+        if "search entire test_data:" not in line:
+            continue
+        summary = parse_key_values(line)
+    return summary
+
+
+def parse_query_records(output: str, prefix: str) -> list[dict]:
+    records = []
+    for line in output.splitlines():
+        if prefix not in line:
+            continue
+        records.append(parse_key_values(line))
+    return records
+
+
+def build_hnsw_profile(metrics: dict, output: str) -> dict:
+    query_records = parse_query_records(output, "HNSW query stats:")
+    serial_summary = parse_serial_runner_summary(output)
+    return {
+        "query_count": len(query_records),
+        "recall": metrics.get("recall"),
+        "qps": metrics.get("qps"),
+        "avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
+        "avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
+        "avg_scan_cmps": avg_metric(query_records, "cmps"),
+        "serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "serial_p99_s": serial_summary.get("p99"),
+        "serial_p95_s": serial_summary.get("p95"),
+        "serial_avg_recall": serial_summary.get("avg_recall"),
+    }
+
+
+def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -> dict:
+    query_records = parse_query_records(output, "OMEGA query stats:")
+    serial_summary = parse_serial_runner_summary(output)
+
+    avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
+    avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
+    avg_omega_control_ms = avg_metric(query_records, "omega_control_ms")
+
+    cmp_time_ms = None
+    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_pure_search_ms is not None:
+        cmp_time_ms = avg_pure_search_ms / avg_pairwise_dist_cnt
+
+    model_overhead_cmp_equiv = None
+    if cmp_time_ms and cmp_time_ms > 0 and avg_omega_control_ms is not None:
+        model_overhead_cmp_equiv = avg_omega_control_ms / cmp_time_ms
+
+    avg_saved_cmps = None
+    if hnsw_profile and hnsw_profile.get("avg_cmps") is not None and avg_pairwise_dist_cnt is not None:
+        avg_saved_cmps = hnsw_profile["avg_cmps"] - avg_pairwise_dist_cnt
+
+    return {
+        "query_count": len(query_records),
+        "recall": metrics.get("recall"),
+        "qps": metrics.get("qps"),
+        "avg_end2end_latency_ms": avg_metric(query_records, "total_ms"),
+        "avg_cmps": avg_pairwise_dist_cnt,
+        "avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
+        "avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
+        "avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
+        "avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
+        "avg_advance_calls": avg_metric(query_records, "advance_calls"),
+        "avg_model_overhead_ms": avg_omega_control_ms,
+        "avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
+        "avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
+        "avg_feature_prep_ms": avg_metric(query_records, "feature_prep_ms"),
+        "avg_pure_search_ms": avg_pure_search_ms,
+        "avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
+        "avg_early_stop_saved_cmps": avg_saved_cmps,
+        "avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
+        "serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "serial_p99_s": serial_summary.get("p99"),
+        "serial_p95_s": serial_summary.get("p95"),
+        "serial_avg_recall": serial_summary.get("avg_recall"),
+    }
+
+
+def profiling_output_path(benchmark_dir: Path) -> Path:
+    return benchmark_dir / "cohere_1m_profiling_summary.json"
+
+
+def write_profiling_summary(benchmark_dir: Path, payload: dict) -> None:
+    with open(profiling_output_path(benchmark_dir), "w") as f:
+        json.dump(payload, f, indent=2, sort_keys=True)
+
+
+def get_latest_result(db_label: str, results_dir: Path) -> dict:
+    """Get the latest benchmark result for a given db_label from VectorDBBench."""
+    if not results_dir.exists():
+        return {}
+
+    # Find all result files, sorted by modification time (newest first)
+    result_files = sorted(
+        results_dir.glob("result_*.json"),
+        key=lambda f: f.stat().st_mtime,
+        reverse=True
+    )
+
+    for result_file in result_files:
+        try:
+            with open(result_file) as f:
+                data = json.load(f)
+
+            # Check each result in this file
+            for result in data.get("results", []):
+                task_config = result.get("task_config", {})
+                db_config = task_config.get("db_config", {})
+                if db_config.get("db_label") == db_label:
+                    metrics = result.get("metrics", {})
+                    return {
+                        'insert_duration': metrics.get('insert_duration'),
+                        'optimize_duration': metrics.get('optimize_duration'),
+                        'load_duration': metrics.get('load_duration'),
+                        'qps': metrics.get('qps'),
+                        'recall': metrics.get('recall'),
+                    }
+        except Exception:
+            # Skip files that can't be parsed
+            continue
+
+    return {}
+
+
+def snapshot_result_files(results_dir: Path) -> set[str]:
+    if not results_dir.exists():
+        return set()
+    return {str(p) for p in results_dir.glob("result_*.json")}
+
+
+def extract_result_from_file(result_file: Path, db_label: str) -> dict:
+    try:
+        with open(result_file) as f:
+            data = json.load(f)
+        for result in data.get("results", []):
+            task_config = result.get("task_config", {})
+            db_config = task_config.get("db_config", {})
+            if db_config.get("db_label") == db_label:
+                metrics = result.get("metrics", {})
+                return {
+                    "insert_duration": metrics.get("insert_duration"),
+                    "optimize_duration": metrics.get("optimize_duration"),
+                    "load_duration": metrics.get("load_duration"),
+                    "qps": metrics.get("qps"),
+                    "recall": metrics.get("recall"),
+                }
+    except Exception:
+        return {}
+    return {}
+
+
+def get_run_result(db_label: str, before_files: set[str], results_dir: Path) -> dict:
+    if not results_dir.exists():
+        return {}
+
+    current_files = {str(p) for p in results_dir.glob("result_*.json")}
+    new_files = sorted(
+        [Path(p) for p in current_files - before_files],
+        key=lambda p: p.stat().st_mtime,
+        reverse=True,
+    )
+
+    for result_file in new_files:
+        metrics = extract_result_from_file(result_file, db_label)
+        if metrics:
+            return metrics
+
+    return get_latest_result(db_label, results_dir)
+
+
+def offline_summary_path(index_path: Path) -> Path:
+    return index_path / "offline_benchmark_summary.json"
+
+
+def read_json_if_exists(path: Path) -> dict:
+    if not path.exists():
+        return {}
+    try:
+        with open(path) as f:
+            return json.load(f)
+    except Exception:
+        return {}
+
+
+def find_omega_model_dir(index_path: Path) -> Path | None:
+    candidates = sorted(index_path.glob("*/omega_model"))
+    return candidates[0] if candidates else None
+
+
+def sum_timing_ms(data: dict) -> int:
+    return sum(v for v in data.values() if isinstance(v, (int, float)))
+
+
+def build_offline_summary(
+    index_path: Path,
+    db_label: str,
+    metrics: dict,
+    retrain_only: bool = False,
+) -> dict:
+    previous_summary = read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
+    previous_offline = previous_summary.get("offline", {})
+    previous_omega_training = previous_summary.get("omega_training", {})
+
+    insert_duration = metrics.get("insert_duration")
+    optimize_duration = metrics.get("optimize_duration")
+    load_duration = metrics.get("load_duration")
+
+    omega_model_dir = find_omega_model_dir(index_path)
+    omega_training = {}
+    if omega_model_dir is not None:
+        omega_training = {
+            "collection_timing_ms": read_json_if_exists(
+                omega_model_dir / "training_collection_timing.json"
+            ),
+            "lightgbm_timing_ms": read_json_if_exists(
+                omega_model_dir / "lightgbm_training_timing.json"
+            ),
+        }
+
+    if retrain_only:
+        insert_duration = previous_offline.get("insert_duration_s")
+        old_optimize_duration = previous_offline.get("optimize_duration_s")
+        old_training_s = (
+            sum_timing_ms(previous_omega_training.get("collection_timing_ms", {}))
+            + sum_timing_ms(previous_omega_training.get("lightgbm_timing_ms", {}))
+        ) / 1000.0
+        new_training_s = (
+            sum_timing_ms(omega_training.get("collection_timing_ms", {}))
+            + sum_timing_ms(omega_training.get("lightgbm_timing_ms", {}))
+        ) / 1000.0
+        if old_optimize_duration is not None:
+            optimize_duration = round(old_optimize_duration - old_training_s + new_training_s, 4)
+        else:
+            optimize_duration = metrics.get("optimize_duration")
+        load_duration = (
+            round(insert_duration + optimize_duration, 4)
+            if insert_duration is not None and optimize_duration is not None
+            else metrics.get("load_duration")
+        )
+
+    summary = {
+        "db_label": db_label,
+        "index_path": str(index_path),
+        "generated_at": datetime.now().isoformat(),
+        "offline": {
+            "insert_duration_s": insert_duration,
+            "optimize_duration_s": optimize_duration,
+            "load_duration_s": load_duration,
+        },
+    }
+
+    if omega_training:
+        summary["omega_training"] = omega_training
+
+    return summary
+
+
+def write_offline_summary(
+    index_path: Path,
+    db_label: str,
+    metrics: dict,
+    retrain_only: bool = False,
+) -> None:
+    summary = build_offline_summary(index_path, db_label, metrics, retrain_only=retrain_only)
+    with open(offline_summary_path(index_path), "w") as f:
+        json.dump(summary, f, indent=2, sort_keys=True)
+
+
+def get_offline_load_duration(index_path: Path) -> float | None:
+    summary = read_json_if_exists(offline_summary_path(index_path))
+    return summary.get("offline", {}).get("load_duration_s")
+
+
+def run_command(
+    cmd: list[str],
+    vectordbbench_root: Path,
+    dry_run: bool = False,
+    extra_env: dict[str, str] | None = None,
+) -> tuple[int, str]:
+    """Run a command and return the exit code."""
+    cmd_str = " \\\n    ".join(cmd)
+    print(f"\n{'='*60}")
+    print(f"Command:\n{cmd_str}")
+    print(f"{'='*60}\n")
+
+    if dry_run:
+        print("[DRY RUN] Command not executed")
+        return 0, ""
+
+    cwd = vectordbbench_root if vectordbbench_root.exists() else None
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+    process = subprocess.Popen(
+        cmd,
+        cwd=cwd,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        bufsize=1,
+    )
+    lines: list[str] = []
+    assert process.stdout is not None
+    for line in process.stdout:
+        print(line, end="")
+        lines.append(line)
+    return process.wait(), "".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Benchmark Zvec HNSW vs OMEGA on Cohere-1M dataset"
+    )
+    parser.add_argument("--dry-run", action="store_true", help="Print commands without executing")
+    parser.add_argument("--target-recalls", type=str, default="0.95",
+                        help="Comma-separated target recalls for OMEGA (default: 0.95)")
+    parser.add_argument("--skip-hnsw", action="store_true", help="Skip HNSW benchmark")
+    parser.add_argument("--skip-omega", action="store_true", help="Skip OMEGA benchmark")
+    parser.add_argument("--build-only", action="store_true", help="Only build index, skip search")
+    parser.add_argument("--search-only", action="store_true", help="Only run search on existing index")
+    parser.add_argument(
+        "--retrain-only",
+        action="store_true",
+        help="Reuse existing OMEGA index and only retrain the model during the build phase",
+    )
+    parser.add_argument(
+        "--zvec-root",
+        type=str,
+        default=None,
+        help="Path to the zvec repository root (default: auto-detect from this script)",
+    )
+    parser.add_argument(
+        "--vectordbbench-root",
+        type=str,
+        default=None,
+        help="Path to the VectorDBBench repository root "
+             "(default: $VECTORDBBENCH_ROOT or sibling repo next to zvec)",
+    )
+    parser.add_argument(
+        "--benchmark-dir",
+        type=str,
+        default=None,
+        help="Directory used to store built benchmark artifacts "
+             "(default: $ZVEC_BENCHMARK_DIR or <zvec_root>/benchmark_results)",
+    )
+    parser.add_argument(
+        "--results-dir",
+        type=str,
+        default=None,
+        help="Directory containing VectorDBBench JSON result files "
+             "(default: runtime vectordb_bench.config.RESULTS_LOCAL_DIR/Zvec)",
+    )
+
+    args = parser.parse_args()
+
+    zvec_root, vectordbbench_root, benchmark_dir, results_dir = resolve_paths(
+        args.zvec_root, args.vectordbbench_root, args.benchmark_dir, args.results_dir
+    )
+    vectordbbench_cmd = resolve_vectordbbench_command()
+
+    # Configuration - based on official zvec.org parameters
+    benchmark_dir.mkdir(parents=True, exist_ok=True)
+
+    # Official parameters from zvec.org for Cohere-1M
+    CASE_TYPE = "Performance768D1M"
+    M = 15
+    EF_SEARCH = 180
+    QUANTIZE_TYPE = "int8"
+    NUM_CONCURRENCY = "12,14,16,18,20"
+    CONCURRENCY_DURATION = 30
+    K = 100
+
+    # OMEGA parameters
+    MIN_VECTOR_THRESHOLD = 100000   # 100K (will train since we have 1M)
+    NUM_TRAINING_QUERIES = 4000
+    EF_TRAINING = 300
+    WINDOW_SIZE = 100
+    EF_GROUNDTRUTH = 500            # Use HNSW for faster ground truth computation
+
+    # Parse target recalls
+    target_recalls = [float(x) for x in args.target_recalls.split(",")]
+
+    # Paths
+    hnsw_path = benchmark_dir / "cohere_1m_hnsw"
+    omega_path = benchmark_dir / "cohere_1m_omega"
+
+    print("=" * 70)
+    print("VectorDBBench: Zvec HNSW vs OMEGA (Cohere-1M)")
+    print("Based on official zvec.org benchmark parameters")
+    print("=" * 70)
+    print()
+    print("Official HNSW Parameters:")
+    print(f"  M: {M}")
+    print(f"  ef_search: {EF_SEARCH}")
+    print(f"  quantize_type: {QUANTIZE_TYPE}")
+    print()
+    print("OMEGA Parameters:")
+    print(f"  min_vector_threshold: {MIN_VECTOR_THRESHOLD}")
+    print(f"  num_training_queries: {NUM_TRAINING_QUERIES}")
+    print(f"  ef_training: {EF_TRAINING}")
+    print(f"  window_size: {WINDOW_SIZE}")
+    print(f"  ef_groundtruth: {EF_GROUNDTRUTH} (HNSW-based ground truth)")
+    print(f"  target_recalls: {target_recalls}")
+    print(f"  build_mode: {'retrain model only (reuse existing index)' if args.retrain_only else 'build index + train model'}")
+    print()
+    print(f"Concurrency: {NUM_CONCURRENCY}")
+    print(f"zvec_root: {zvec_root}")
+    print(f"vectordbbench_root: {vectordbbench_root}")
+    print(f"vectordbbench_cmd: {' '.join(vectordbbench_cmd)}")
+    print(f"benchmark_dir: {benchmark_dir}")
+    print(f"results_dir: {results_dir}")
+    print("=" * 70)
+
+    results: list[BenchmarkResult] = []
+
+    # ============ HNSW Benchmark ============
+    if not args.skip_hnsw:
+        print(f"\n\n{'#'*70}")
+        print(f"# HNSW Benchmark")
+        print(f"{'#'*70}")
+
+        hnsw_db_label = "16c64g-v0.1"
+
+        if not args.search_only:
+            # Phase 1: Build Index
+            print("\n[Phase 1] Building HNSW index...")
+            before_files = snapshot_result_files(results_dir)
+            cmd = [
+                *vectordbbench_cmd, "zvec",
+                "--path", str(hnsw_path),
+                "--db-label", hnsw_db_label,
+                "--case-type", CASE_TYPE,
+                "--m", str(M),
+                "--ef-search", str(EF_SEARCH),
+                "--quantize-type", QUANTIZE_TYPE,
+                "--num-concurrency", NUM_CONCURRENCY,
+                "--concurrency-duration", str(CONCURRENCY_DURATION),
+                "--k", str(K),
+                "--skip-search-serial",
+                "--skip-search-concurrent",
+            ]
+            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            if ret != 0 and not args.dry_run:
+                print("ERROR: HNSW build failed!")
+                return 1
+            if not args.dry_run:
+                write_offline_summary(
+                    hnsw_path,
+                    hnsw_db_label,
+                    get_run_result(hnsw_db_label, before_files, results_dir),
+                )
+
+        if not args.build_only:
+            # Phase 2: Run Search Benchmark
+            print("\n[Phase 2] Running HNSW search benchmark...")
+            before_files = snapshot_result_files(results_dir)
+            cmd = [
+                *vectordbbench_cmd, "zvec",
+                "--path", str(hnsw_path),
+                "--db-label", hnsw_db_label,
+                "--case-type", CASE_TYPE,
+                "--m", str(M),
+                "--ef-search", str(EF_SEARCH),
+                "--quantize-type", QUANTIZE_TYPE,
+                "--num-concurrency", NUM_CONCURRENCY,
+                "--concurrency-duration", str(CONCURRENCY_DURATION),
+                "--k", str(K),
+                "--skip-drop-old",
+                "--skip-load",
+            ]
+            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+
+            # Get results from VectorDBBench
+            metrics = get_run_result(hnsw_db_label, before_files, results_dir) if not args.dry_run else {}
+            load_duration = get_offline_load_duration(hnsw_path)
+            hnsw_profile = None
+            if ret == 0 and not args.dry_run:
+                print("\n[Profiling] Running HNSW serial-only profiling pass...")
+                profile_cmd = [
+                    *vectordbbench_cmd, "zvec",
+                    "--path", str(hnsw_path),
+                    "--db-label", hnsw_db_label,
+                    "--case-type", CASE_TYPE,
+                    "--m", str(M),
+                    "--ef-search", str(EF_SEARCH),
+                    "--quantize-type", QUANTIZE_TYPE,
+                    "--num-concurrency", NUM_CONCURRENCY,
+                    "--concurrency-duration", str(CONCURRENCY_DURATION),
+                    "--k", str(K),
+                    "--skip-drop-old",
+                    "--skip-load",
+                    "--skip-search-concurrent",
+                ]
+                _, profile_output = run_command(
+                    profile_cmd,
+                    vectordbbench_root,
+                    dry_run=False,
+                    extra_env={
+                        "ZVEC_HNSW_LOG_QUERY_STATS": "1",
+                        "ZVEC_HNSW_LOG_QUERY_LIMIT": "2000",
+                    },
+                )
+                hnsw_profile = build_hnsw_profile(metrics, profile_output)
+            results.append(BenchmarkResult(
+                type="HNSW",
+                ef_search=EF_SEARCH,
+                target_recall=None,
+                path=str(hnsw_path),
+                success=ret == 0,
+                load_duration=load_duration if load_duration is not None else metrics.get('load_duration'),
+                qps=metrics.get('qps'),
+                recall=metrics.get('recall'),
+                profiling=hnsw_profile,
+            ))
+
+    # ============ OMEGA Benchmarks ============
+    if not args.skip_omega:
+        omega_db_label = f"omega-m{M}-ef{EF_SEARCH}-int8"
+        build_target_recall = target_recalls[0]
+
+        if not args.search_only:
+            print(f"\n\n{'#'*70}")
+            print("# OMEGA Offline Phase")
+            print(f"{'#'*70}")
+            if args.retrain_only:
+                print("\n[Phase 1] Retraining OMEGA model only (reusing existing index)...")
+                print(
+                    f"Reusing existing OMEGA path/db_label: path={omega_path}, db_label={omega_db_label}"
+                )
+            else:
+                print("\n[Phase 1] Building OMEGA index + training model...")
+                print(
+                    f"Using shared OMEGA path/db_label for all target recalls: path={omega_path}, db_label={omega_db_label}"
+                )
+            print(
+                f"Build-time target_recall is ignored by training; using first requested value for CLI compatibility: {build_target_recall}"
+            )
+            before_files = snapshot_result_files(results_dir)
+            cmd = [
+                *vectordbbench_cmd, "zvecomega",
+                "--path", str(omega_path),
+                "--db-label", omega_db_label,
+                "--case-type", CASE_TYPE,
+                "--m", str(M),
+                "--ef-search", str(EF_SEARCH),
+                "--quantize-type", QUANTIZE_TYPE,
+                "--min-vector-threshold", str(MIN_VECTOR_THRESHOLD),
+                "--num-training-queries", str(NUM_TRAINING_QUERIES),
+                "--ef-training", str(EF_TRAINING),
+                "--window-size", str(WINDOW_SIZE),
+                "--ef-groundtruth", str(EF_GROUNDTRUTH),
+                "--target-recall", str(build_target_recall),
+                "--num-concurrency", NUM_CONCURRENCY,
+                "--concurrency-duration", str(CONCURRENCY_DURATION),
+                "--k", str(K),
+                "--skip-search-serial",
+                "--skip-search-concurrent",
+            ]
+            if args.retrain_only:
+                cmd.extend([
+                    "--skip-drop-old",
+                    "--skip-load",
+                    "--retrain-only",
+                ])
+            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            if ret != 0 and not args.dry_run:
+                print("ERROR: OMEGA build failed!")
+                return 1
+            if not args.dry_run:
+                write_offline_summary(
+                    omega_path,
+                    omega_db_label,
+                    get_run_result(omega_db_label, before_files, results_dir),
+                    retrain_only=args.retrain_only,
+                )
+
+        if not args.build_only:
+            for target_recall in target_recalls:
+                print(f"\n\n{'#'*70}")
+                print(f"# OMEGA Benchmark (target_recall={target_recall})")
+                print(f"{'#'*70}")
+
+                # Phase 2: Run Search Benchmark
+                print("\n[Phase 2] Running OMEGA search benchmark...")
+                if args.retrain_only:
+                    print("Search is using the newly retrained model on the existing index.")
+                before_files = snapshot_result_files(results_dir)
+                cmd = [
+                    *vectordbbench_cmd, "zvecomega",
+                    "--path", str(omega_path),
+                    "--db-label", omega_db_label,
+                    "--case-type", CASE_TYPE,
+                    "--m", str(M),
+                    "--ef-search", str(EF_SEARCH),
+                    "--quantize-type", QUANTIZE_TYPE,
+                    "--min-vector-threshold", str(MIN_VECTOR_THRESHOLD),
+                    "--num-training-queries", str(NUM_TRAINING_QUERIES),
+                    "--ef-training", str(EF_TRAINING),
+                    "--window-size", str(WINDOW_SIZE),
+                    "--ef-groundtruth", str(EF_GROUNDTRUTH),
+                    "--target-recall", str(target_recall),
+                    "--num-concurrency", NUM_CONCURRENCY,
+                    "--concurrency-duration", str(CONCURRENCY_DURATION),
+                    "--k", str(K),
+                    "--skip-drop-old",
+                    "--skip-load",
+                ]
+                if args.retrain_only:
+                    cmd.append("--retrain-only")
+                ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+
+                metrics = get_run_result(omega_db_label, before_files, results_dir) if not args.dry_run else {}
+                load_duration = get_offline_load_duration(omega_path)
+                omega_profile = None
+                if ret == 0 and not args.dry_run:
+                    print("\n[Profiling] Running OMEGA serial-only profiling pass...")
+                    profile_cmd = [
+                        *vectordbbench_cmd, "zvecomega",
+                        "--path", str(omega_path),
+                        "--db-label", omega_db_label,
+                        "--case-type", CASE_TYPE,
+                        "--m", str(M),
+                        "--ef-search", str(EF_SEARCH),
+                        "--quantize-type", QUANTIZE_TYPE,
+                        "--min-vector-threshold", str(MIN_VECTOR_THRESHOLD),
+                        "--num-training-queries", str(NUM_TRAINING_QUERIES),
+                        "--ef-training", str(EF_TRAINING),
+                        "--window-size", str(WINDOW_SIZE),
+                        "--ef-groundtruth", str(EF_GROUNDTRUTH),
+                        "--target-recall", str(target_recall),
+                        "--num-concurrency", NUM_CONCURRENCY,
+                        "--concurrency-duration", str(CONCURRENCY_DURATION),
+                        "--k", str(K),
+                        "--skip-drop-old",
+                        "--skip-load",
+                        "--skip-search-concurrent",
+                    ]
+                    if args.retrain_only:
+                        profile_cmd.append("--retrain-only")
+                    _, profile_output = run_command(
+                        profile_cmd,
+                        vectordbbench_root,
+                        dry_run=False,
+                        extra_env={
+                            "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
+                            "ZVEC_OMEGA_LOG_QUERY_LIMIT": "2000",
+                        },
+                    )
+                    baseline_profile = next(
+                        (result.profiling for result in results if result.type == "HNSW" and result.profiling),
+                        None,
+                    )
+                    omega_profile = build_omega_profile(metrics, profile_output, baseline_profile)
+                results.append(BenchmarkResult(
+                    type="OMEGA",
+                    ef_search=EF_SEARCH,
+                    target_recall=target_recall,
+                    path=str(omega_path),
+                    success=ret == 0,
+                    load_duration=load_duration if load_duration is not None else metrics.get('load_duration'),
+                    qps=metrics.get('qps'),
+                    recall=metrics.get('recall'),
+                    profiling=omega_profile,
+                ))
+
+    # ============ Summary ============
+    if results:
+        write_profiling_summary(
+            benchmark_dir,
+            {
+                "generated_at": datetime.now().isoformat(),
+                "dataset": "cohere_1m",
+                "results": [
+                    {
+                        "type": result.type,
+                        "target_recall": result.target_recall,
+                        "path": result.path,
+                        "load_duration_s": result.load_duration,
+                        "qps": result.qps,
+                        "recall": result.recall,
+                        "profiling": result.profiling,
+                    }
+                    for result in results
+                ],
+            },
+        )
+        print("\n\n" + "=" * 70)
+        print("Benchmark Summary")
+        print("=" * 70)
+        print()
+        print(f"{'Type':<10} {'target_recall':<15} {'load_dur(s)':<12} {'qps':<12} {'recall':<10} {'Status':<10}")
+        print("-" * 75)
+        for r in results:
+            tr = f"{r.target_recall:.2f}" if r.target_recall else "N/A"
+            status = "OK" if r.success else "FAILED"
+            ld = f"{r.load_duration:.1f}" if r.load_duration else "N/A"
+            qps = f"{r.qps:.1f}" if r.qps else "N/A"
+            recall = f"{r.recall:.4f}" if r.recall else "N/A"
+            print(f"{r.type:<10} {tr:<15} {ld:<12} {qps:<12} {recall:<10} {status:<10}")
+
+        print()
+        print("Profiling Summary")
+        print("-" * 75)
+        print(f"{'Type':<10} {'target_recall':<15} {'avg_lat(ms)':<12} {'avg_cmps':<12} {'avg_pred_calls':<16} {'avg_model_ms':<14} {'saved_cmps':<12}")
+        for r in results:
+            profile = r.profiling or {}
+            tr = f"{r.target_recall:.2f}" if r.target_recall else "N/A"
+            avg_lat = profile.get("avg_end2end_latency_ms")
+            avg_cmps = profile.get("avg_cmps")
+            avg_pred_calls = profile.get("avg_prediction_calls")
+            avg_model_ms = profile.get("avg_model_overhead_ms")
+            saved_cmps = profile.get("avg_early_stop_saved_cmps")
+            print(
+                f"{r.type:<10} "
+                f"{tr:<15} "
+                f"{(f'{avg_lat:.3f}' if avg_lat is not None else 'N/A'):<12} "
+                f"{(f'{avg_cmps:.1f}' if avg_cmps is not None else 'N/A'):<12} "
+                f"{(f'{avg_pred_calls:.2f}' if avg_pred_calls is not None else 'N/A'):<16} "
+                f"{(f'{avg_model_ms:.3f}' if avg_model_ms is not None else 'N/A'):<14} "
+                f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
+            )
+        print()
+        print(f"Profiling JSON: {profiling_output_path(benchmark_dir)}")
+
+    print()
+    print("To view results:")
+    print("  vectordbbench results")
+    print()
+    print("Or start the web UI:")
+    print("  vectordbbench start")
+    print()
+
+    return 0 if all(r.success for r in results) else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From 53e0b553e1d136add5c9f141f949eac7615f77e1 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 22 Mar 2026 14:30:52 +0800
Subject: [PATCH 019/126] chore(scripts): update OMEGA training defaults for
 benchmark runs

---
 scripts/benchmark_cohere_10m.py | 4 ++--
 scripts/benchmark_cohere_1m.py  | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index 5c5814298..b1669ef20 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -467,9 +467,9 @@ def main():
 
     MIN_VECTOR_THRESHOLD = 100000
     NUM_TRAINING_QUERIES = 4000
-    EF_TRAINING = 300
+    EF_TRAINING = 500
     WINDOW_SIZE = 100
-    EF_GROUNDTRUTH = 500
+    EF_GROUNDTRUTH = 1000
 
     target_recalls = [float(x) for x in args.target_recalls.split(",")]
 
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
index 05360aa7e..ff68ae91d 100755
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -472,11 +472,11 @@ def main():
     K = 100
 
     # OMEGA parameters
-    MIN_VECTOR_THRESHOLD = 100000   # 100K (will train since we have 1M)
+    MIN_VECTOR_THRESHOLD = 100000
     NUM_TRAINING_QUERIES = 4000
-    EF_TRAINING = 300
+    EF_TRAINING = 500
     WINDOW_SIZE = 100
-    EF_GROUNDTRUTH = 500            # Use HNSW for faster ground truth computation
+    EF_GROUNDTRUTH = 1000
 
     # Parse target recalls
     target_recalls = [float(x) for x in args.target_recalls.split(",")]

From 420b4c4d77aaa3cda8345a3d62c520458bf8a8ae Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 22 Mar 2026 14:35:25 +0800
Subject: [PATCH 020/126] fix(scripts): invoke VectorDBBench CLI instead of
 Streamlit entrypoint

---
 scripts/benchmark_cohere_10m.py | 2 +-
 scripts/benchmark_cohere_1m.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index b1669ef20..b67c77774 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -64,7 +64,7 @@ def resolve_paths(
 
 
 def resolve_vectordbbench_command() -> list[str]:
-    return [sys.executable, "-m", "vectordb_bench"]
+    return [sys.executable, "-m", "vectordb_bench.cli.vectordbbench"]
 
 
 KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
index ff68ae91d..50cfd3ae1 100755
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -66,7 +66,7 @@ def resolve_paths(
 
 
 def resolve_vectordbbench_command() -> list[str]:
-    return [sys.executable, "-m", "vectordb_bench"]
+    return [sys.executable, "-m", "vectordb_bench.cli.vectordbbench"]
 
 
 KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")

From 27fda47263a3995571ed7d56f7e7fa8782dd489a Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 22 Mar 2026 14:46:32 +0800
Subject: [PATCH 021/126] fix(scripts): avoid piping main benchmark output
 during profiling runs

---
 scripts/benchmark_cohere_10m.py | 35 +++++++++++++++++++++++++------
 scripts/benchmark_cohere_1m.py  | 37 ++++++++++++++++++++++++++-------
 2 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index b67c77774..6a2f79d8d 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -369,6 +369,29 @@ def run_command(
     vectordbbench_root: Path,
     dry_run: bool = False,
     extra_env: dict[str, str] | None = None,
+) -> int:
+    cmd_str = " \\\n    ".join(cmd)
+    print(f"\n{'=' * 60}")
+    print(f"Command:\n{cmd_str}")
+    print(f"{'=' * 60}\n")
+
+    if dry_run:
+        print("[DRY RUN] Command not executed")
+        return 0
+
+    cwd = vectordbbench_root if vectordbbench_root.exists() else None
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+    result = subprocess.run(cmd, cwd=cwd, env=env)
+    return result.returncode
+
+
+def run_command_capture(
+    cmd: list[str],
+    vectordbbench_root: Path,
+    dry_run: bool = False,
+    extra_env: dict[str, str] | None = None,
 ) -> tuple[int, str]:
     cmd_str = " \\\n    ".join(cmd)
     print(f"\n{'=' * 60}")
@@ -544,7 +567,7 @@ def main():
                 "--skip-search-serial",
                 "--skip-search-concurrent",
             ]
-            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
             if ret != 0 and not args.dry_run:
                 print("ERROR: HNSW build failed!")
                 return 1
@@ -562,7 +585,7 @@ def main():
                 "--skip-drop-old",
                 "--skip-load",
             ]
-            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
             metrics = get_run_result(hnsw_db_label, before_files, results_dir) if not args.dry_run else {}
             load_duration = get_offline_load_duration(hnsw_path)
             hnsw_profile = None
@@ -573,7 +596,7 @@ def main():
                     "--skip-load",
                     "--skip-search-concurrent",
                 ]
-                _, profile_output = run_command(
+                _, profile_output = run_command_capture(
                     profile_cmd,
                     vectordbbench_root,
                     dry_run=False,
@@ -669,7 +692,7 @@ def main():
                     "--skip-load",
                     "--retrain-only",
                 ]
-            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
             if ret != 0 and not args.dry_run:
                 print("ERROR: OMEGA build failed!")
                 return 1
@@ -698,7 +721,7 @@ def main():
                 ]
                 if args.retrain_only:
                     cmd.append("--retrain-only")
-                ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+                ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
                 metrics = get_run_result(omega_db_label, before_files, results_dir) if not args.dry_run else {}
                 load_duration = get_offline_load_duration(omega_path)
                 omega_profile = None
@@ -713,7 +736,7 @@ def main():
                     ]
                     if args.retrain_only:
                         profile_cmd.append("--retrain-only")
-                    _, profile_output = run_command(
+                    _, profile_output = run_command_capture(
                         profile_cmd,
                         vectordbbench_root,
                         dry_run=False,
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
index 50cfd3ae1..3530e39f6 100755
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -376,13 +376,36 @@ def run_command(
     vectordbbench_root: Path,
     dry_run: bool = False,
     extra_env: dict[str, str] | None = None,
-) -> tuple[int, str]:
+) -> int:
     """Run a command and return the exit code."""
     cmd_str = " \\\n    ".join(cmd)
     print(f"\n{'='*60}")
     print(f"Command:\n{cmd_str}")
     print(f"{'='*60}\n")
 
+    if dry_run:
+        print("[DRY RUN] Command not executed")
+        return 0
+
+    cwd = vectordbbench_root if vectordbbench_root.exists() else None
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+    result = subprocess.run(cmd, cwd=cwd, env=env)
+    return result.returncode
+
+
+def run_command_capture(
+    cmd: list[str],
+    vectordbbench_root: Path,
+    dry_run: bool = False,
+    extra_env: dict[str, str] | None = None,
+) -> tuple[int, str]:
+    cmd_str = " \\\n    ".join(cmd)
+    print(f"\n{'='*60}")
+    print(f"Command:\n{cmd_str}")
+    print(f"{'='*60}\n")
+
     if dry_run:
         print("[DRY RUN] Command not executed")
         return 0, ""
@@ -540,7 +563,7 @@ def main():
                 "--skip-search-serial",
                 "--skip-search-concurrent",
             ]
-            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
             if ret != 0 and not args.dry_run:
                 print("ERROR: HNSW build failed!")
                 return 1
@@ -569,7 +592,7 @@ def main():
                 "--skip-drop-old",
                 "--skip-load",
             ]
-            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
 
             # Get results from VectorDBBench
             metrics = get_run_result(hnsw_db_label, before_files, results_dir) if not args.dry_run else {}
@@ -592,7 +615,7 @@ def main():
                     "--skip-load",
                     "--skip-search-concurrent",
                 ]
-                _, profile_output = run_command(
+                _, profile_output = run_command_capture(
                     profile_cmd,
                     vectordbbench_root,
                     dry_run=False,
@@ -663,7 +686,7 @@ def main():
                     "--skip-load",
                     "--retrain-only",
                 ])
-            ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
             if ret != 0 and not args.dry_run:
                 print("ERROR: OMEGA build failed!")
                 return 1
@@ -708,7 +731,7 @@ def main():
                 ]
                 if args.retrain_only:
                     cmd.append("--retrain-only")
-                ret, _ = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+                ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
 
                 metrics = get_run_result(omega_db_label, before_files, results_dir) if not args.dry_run else {}
                 load_duration = get_offline_load_duration(omega_path)
@@ -738,7 +761,7 @@ def main():
                     ]
                     if args.retrain_only:
                         profile_cmd.append("--retrain-only")
-                    _, profile_output = run_command(
+                    _, profile_output = run_command_capture(
                         profile_cmd,
                         vectordbbench_root,
                         dry_run=False,

From 543d16df05631dbd2e47758ba55f5ed564542b01 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 22 Mar 2026 14:51:14 +0800
Subject: [PATCH 022/126] chore(scripts): rename online profiling fields and
 persist summary beside index

---
 scripts/benchmark_cohere_10m.py | 87 +++++++++++++++++----------------
 scripts/benchmark_cohere_1m.py  | 87 +++++++++++++++++----------------
 2 files changed, 88 insertions(+), 86 deletions(-)

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index 6a2f79d8d..8c1050614 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -115,16 +115,16 @@ def build_hnsw_profile(metrics: dict, output: str) -> dict:
     query_records = parse_query_records(output, "HNSW query stats:")
     serial_summary = parse_serial_runner_summary(output)
     return {
-        "query_count": len(query_records),
-        "recall": metrics.get("recall"),
-        "qps": metrics.get("qps"),
-        "avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
-        "avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
-        "avg_scan_cmps": avg_metric(query_records, "cmps"),
-        "serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "serial_p99_s": serial_summary.get("p99"),
-        "serial_p95_s": serial_summary.get("p95"),
-        "serial_avg_recall": serial_summary.get("avg_recall"),
+        "benchmark_recall": metrics.get("recall"),
+        "benchmark_qps": metrics.get("qps"),
+        "profile_query_count": len(query_records),
+        "profile_avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
+        "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
+        "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
+        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "profile_serial_p99_s": serial_summary.get("p99"),
+        "profile_serial_p95_s": serial_summary.get("p95"),
+        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
     }
 
 
@@ -149,37 +149,37 @@ def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -
         avg_saved_cmps = hnsw_profile["avg_cmps"] - avg_pairwise_dist_cnt
 
     return {
-        "query_count": len(query_records),
-        "recall": metrics.get("recall"),
-        "qps": metrics.get("qps"),
-        "avg_end2end_latency_ms": avg_metric(query_records, "total_ms"),
-        "avg_cmps": avg_pairwise_dist_cnt,
-        "avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
-        "avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
-        "avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
-        "avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
-        "avg_advance_calls": avg_metric(query_records, "advance_calls"),
-        "avg_model_overhead_ms": avg_omega_control_ms,
-        "avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
-        "avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
-        "avg_feature_prep_ms": avg_metric(query_records, "feature_prep_ms"),
-        "avg_pure_search_ms": avg_pure_search_ms,
-        "avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
-        "avg_early_stop_saved_cmps": avg_saved_cmps,
-        "avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
-        "serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "serial_p99_s": serial_summary.get("p99"),
-        "serial_p95_s": serial_summary.get("p95"),
-        "serial_avg_recall": serial_summary.get("avg_recall"),
+        "benchmark_recall": metrics.get("recall"),
+        "benchmark_qps": metrics.get("qps"),
+        "profile_query_count": len(query_records),
+        "profile_avg_end2end_latency_ms": avg_metric(query_records, "total_ms"),
+        "profile_avg_cmps": avg_pairwise_dist_cnt,
+        "profile_avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
+        "profile_avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
+        "profile_avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
+        "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
+        "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
+        "profile_avg_model_overhead_ms": avg_omega_control_ms,
+        "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
+        "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
+        "profile_avg_feature_prep_ms": avg_metric(query_records, "feature_prep_ms"),
+        "profile_avg_pure_search_ms": avg_pure_search_ms,
+        "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
+        "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
+        "profile_avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
+        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "profile_serial_p99_s": serial_summary.get("p99"),
+        "profile_serial_p95_s": serial_summary.get("p95"),
+        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
     }
 
 
-def profiling_output_path(benchmark_dir: Path) -> Path:
-    return benchmark_dir / "cohere_10m_profiling_summary.json"
+def profiling_output_path(index_path: Path) -> Path:
+    return index_path / "online_benchmark_summary.json"
 
 
-def write_profiling_summary(benchmark_dir: Path, payload: dict) -> None:
-    with open(profiling_output_path(benchmark_dir), "w") as f:
+def write_profiling_summary(index_path: Path, payload: dict) -> None:
+    with open(profiling_output_path(index_path), "w") as f:
         json.dump(payload, f, indent=2, sort_keys=True)
 
 
@@ -765,8 +765,9 @@ def main():
                 )
 
     if results:
+        summary_index_path = omega_path if not args.skip_omega else hnsw_path
         write_profiling_summary(
-            benchmark_dir,
+            summary_index_path,
             {
                 "generated_at": datetime.now().isoformat(),
                 "dataset": "cohere_10m",
@@ -805,11 +806,11 @@ def main():
         for r in results:
             profile = r.profiling or {}
             tr = f"{r.target_recall:.2f}" if r.target_recall else "N/A"
-            avg_lat = profile.get("avg_end2end_latency_ms")
-            avg_cmps = profile.get("avg_cmps")
-            avg_pred_calls = profile.get("avg_prediction_calls")
-            avg_model_ms = profile.get("avg_model_overhead_ms")
-            saved_cmps = profile.get("avg_early_stop_saved_cmps")
+            avg_lat = profile.get("profile_avg_end2end_latency_ms")
+            avg_cmps = profile.get("profile_avg_cmps")
+            avg_pred_calls = profile.get("profile_avg_prediction_calls")
+            avg_model_ms = profile.get("profile_avg_model_overhead_ms")
+            saved_cmps = profile.get("profile_avg_early_stop_saved_cmps")
             print(
                 f"{r.type:<10} "
                 f"{tr:<15} "
@@ -820,7 +821,7 @@ def main():
                 f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
             )
         print()
-        print(f"Profiling JSON: {profiling_output_path(benchmark_dir)}")
+        print(f"Profiling JSON: {profiling_output_path(summary_index_path)}")
 
     print()
     print("To view results:")
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
index 3530e39f6..f64b96ad2 100755
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -117,16 +117,16 @@ def build_hnsw_profile(metrics: dict, output: str) -> dict:
     query_records = parse_query_records(output, "HNSW query stats:")
     serial_summary = parse_serial_runner_summary(output)
     return {
-        "query_count": len(query_records),
-        "recall": metrics.get("recall"),
-        "qps": metrics.get("qps"),
-        "avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
-        "avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
-        "avg_scan_cmps": avg_metric(query_records, "cmps"),
-        "serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "serial_p99_s": serial_summary.get("p99"),
-        "serial_p95_s": serial_summary.get("p95"),
-        "serial_avg_recall": serial_summary.get("avg_recall"),
+        "benchmark_recall": metrics.get("recall"),
+        "benchmark_qps": metrics.get("qps"),
+        "profile_query_count": len(query_records),
+        "profile_avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
+        "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
+        "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
+        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "profile_serial_p99_s": serial_summary.get("p99"),
+        "profile_serial_p95_s": serial_summary.get("p95"),
+        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
     }
 
 
@@ -151,37 +151,37 @@ def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -
         avg_saved_cmps = hnsw_profile["avg_cmps"] - avg_pairwise_dist_cnt
 
     return {
-        "query_count": len(query_records),
-        "recall": metrics.get("recall"),
-        "qps": metrics.get("qps"),
-        "avg_end2end_latency_ms": avg_metric(query_records, "total_ms"),
-        "avg_cmps": avg_pairwise_dist_cnt,
-        "avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
-        "avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
-        "avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
-        "avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
-        "avg_advance_calls": avg_metric(query_records, "advance_calls"),
-        "avg_model_overhead_ms": avg_omega_control_ms,
-        "avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
-        "avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
-        "avg_feature_prep_ms": avg_metric(query_records, "feature_prep_ms"),
-        "avg_pure_search_ms": avg_pure_search_ms,
-        "avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
-        "avg_early_stop_saved_cmps": avg_saved_cmps,
-        "avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
-        "serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "serial_p99_s": serial_summary.get("p99"),
-        "serial_p95_s": serial_summary.get("p95"),
-        "serial_avg_recall": serial_summary.get("avg_recall"),
+        "benchmark_recall": metrics.get("recall"),
+        "benchmark_qps": metrics.get("qps"),
+        "profile_query_count": len(query_records),
+        "profile_avg_end2end_latency_ms": avg_metric(query_records, "total_ms"),
+        "profile_avg_cmps": avg_pairwise_dist_cnt,
+        "profile_avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
+        "profile_avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
+        "profile_avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
+        "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
+        "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
+        "profile_avg_model_overhead_ms": avg_omega_control_ms,
+        "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
+        "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
+        "profile_avg_feature_prep_ms": avg_metric(query_records, "feature_prep_ms"),
+        "profile_avg_pure_search_ms": avg_pure_search_ms,
+        "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
+        "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
+        "profile_avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
+        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "profile_serial_p99_s": serial_summary.get("p99"),
+        "profile_serial_p95_s": serial_summary.get("p95"),
+        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
     }
 
 
-def profiling_output_path(benchmark_dir: Path) -> Path:
-    return benchmark_dir / "cohere_1m_profiling_summary.json"
+def profiling_output_path(index_path: Path) -> Path:
+    return index_path / "online_benchmark_summary.json"
 
 
-def write_profiling_summary(benchmark_dir: Path, payload: dict) -> None:
-    with open(profiling_output_path(benchmark_dir), "w") as f:
+def write_profiling_summary(index_path: Path, payload: dict) -> None:
+    with open(profiling_output_path(index_path), "w") as f:
         json.dump(payload, f, indent=2, sort_keys=True)
 
 
@@ -789,8 +789,9 @@ def main():
 
     # ============ Summary ============
     if results:
+        summary_index_path = omega_path if not args.skip_omega else hnsw_path
         write_profiling_summary(
-            benchmark_dir,
+            summary_index_path,
             {
                 "generated_at": datetime.now().isoformat(),
                 "dataset": "cohere_1m",
@@ -829,11 +830,11 @@ def main():
         for r in results:
             profile = r.profiling or {}
             tr = f"{r.target_recall:.2f}" if r.target_recall else "N/A"
-            avg_lat = profile.get("avg_end2end_latency_ms")
-            avg_cmps = profile.get("avg_cmps")
-            avg_pred_calls = profile.get("avg_prediction_calls")
-            avg_model_ms = profile.get("avg_model_overhead_ms")
-            saved_cmps = profile.get("avg_early_stop_saved_cmps")
+            avg_lat = profile.get("profile_avg_end2end_latency_ms")
+            avg_cmps = profile.get("profile_avg_cmps")
+            avg_pred_calls = profile.get("profile_avg_prediction_calls")
+            avg_model_ms = profile.get("profile_avg_model_overhead_ms")
+            saved_cmps = profile.get("profile_avg_early_stop_saved_cmps")
             print(
                 f"{r.type:<10} "
                 f"{tr:<15} "
@@ -844,7 +845,7 @@ def main():
                 f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
             )
         print()
-        print(f"Profiling JSON: {profiling_output_path(benchmark_dir)}")
+        print(f"Profiling JSON: {profiling_output_path(summary_index_path)}")
 
     print()
     print("To view results:")

From 9d750ffd9bc0c4285c5e4ba8422887d93f77e653 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 22 Mar 2026 18:17:11 +0800
Subject: [PATCH 023/126] fix(scripts): persist per-index profiling summaries
 and stabilize capture

---
 scripts/benchmark_cohere_10m.py | 91 ++++++++++++++++++--------------
 scripts/benchmark_cohere_1m.py  | 92 +++++++++++++++++++--------------
 2 files changed, 107 insertions(+), 76 deletions(-)

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index 8c1050614..ba0a269dd 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -15,6 +15,7 @@
 import os
 import importlib
 import re
+import tempfile
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
@@ -51,8 +52,11 @@ def resolve_paths(
         if benchmark_dir_arg
         else Path(os.environ.get("ZVEC_BENCHMARK_DIR", zvec_root / "benchmark_results")).resolve()
     )
+    source_results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
     if results_dir_arg:
         results_dir = Path(results_dir_arg).resolve()
+    elif source_results_dir.exists():
+        results_dir = source_results_dir
     else:
         results_dir = None
         try:
@@ -145,8 +149,8 @@ def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -
         model_overhead_cmp_equiv = avg_omega_control_ms / cmp_time_ms
 
     avg_saved_cmps = None
-    if hnsw_profile and hnsw_profile.get("avg_cmps") is not None and avg_pairwise_dist_cnt is not None:
-        avg_saved_cmps = hnsw_profile["avg_cmps"] - avg_pairwise_dist_cnt
+    if hnsw_profile and hnsw_profile.get("profile_avg_cmps") is not None and avg_pairwise_dist_cnt is not None:
+        avg_saved_cmps = hnsw_profile["profile_avg_cmps"] - avg_pairwise_dist_cnt
 
     return {
         "benchmark_recall": metrics.get("recall"),
@@ -183,6 +187,38 @@ def write_profiling_summary(index_path: Path, payload: dict) -> None:
         json.dump(payload, f, indent=2, sort_keys=True)
 
 
+def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResult]) -> list[Path]:
+    written_paths: list[Path] = []
+    grouped: dict[str, list[BenchmarkResult]] = {}
+    for result in results:
+        grouped.setdefault(result.path, []).append(result)
+
+    for path_str, grouped_results in grouped.items():
+        index_path = Path(path_str)
+        write_profiling_summary(
+            index_path,
+            {
+                "generated_at": datetime.now().isoformat(),
+                "dataset": dataset,
+                "results": [
+                    {
+                        "type": result.type,
+                        "target_recall": result.target_recall,
+                        "path": result.path,
+                        "load_duration_s": result.load_duration,
+                        "qps": result.qps,
+                        "recall": result.recall,
+                        "profiling": result.profiling,
+                    }
+                    for result in grouped_results
+                ],
+            },
+        )
+        written_paths.append(profiling_output_path(index_path))
+
+    return written_paths
+
+
 def get_latest_result(db_label: str, results_dir: Path) -> dict:
     if not results_dir.exists():
         return {}
@@ -406,22 +442,19 @@ def run_command_capture(
     env = os.environ.copy()
     if extra_env:
         env.update(extra_env)
+    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".log") as tmp:
+        tmp_path = Path(tmp.name)
 
-    process = subprocess.Popen(
-        cmd,
-        cwd=cwd,
-        env=env,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        bufsize=1,
-    )
-    lines: list[str] = []
-    assert process.stdout is not None
-    for line in process.stdout:
-        print(line, end="")
-        lines.append(line)
-    return process.wait(), "".join(lines)
+    try:
+        with tmp_path.open("w+") as tmp:
+            result = subprocess.run(cmd, cwd=cwd, env=env, stdout=tmp, stderr=subprocess.STDOUT, text=True)
+            tmp.flush()
+            tmp.seek(0)
+            output = tmp.read()
+        print(output, end="" if output.endswith("\n") or not output else "\n")
+        return result.returncode, output
+    finally:
+        tmp_path.unlink(missing_ok=True)
 
 
 def main():
@@ -765,26 +798,7 @@ def main():
                 )
 
     if results:
-        summary_index_path = omega_path if not args.skip_omega else hnsw_path
-        write_profiling_summary(
-            summary_index_path,
-            {
-                "generated_at": datetime.now().isoformat(),
-                "dataset": "cohere_10m",
-                "results": [
-                    {
-                        "type": result.type,
-                        "target_recall": result.target_recall,
-                        "path": result.path,
-                        "load_duration_s": result.load_duration,
-                        "qps": result.qps,
-                        "recall": result.recall,
-                        "profiling": result.profiling,
-                    }
-                    for result in results
-                ],
-            },
-        )
+        written_summary_paths = write_grouped_profiling_summaries("cohere_10m", results)
         print("\n\n" + "=" * 70)
         print("Benchmark Summary")
         print("=" * 70)
@@ -821,7 +835,8 @@ def main():
                 f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
             )
         print()
-        print(f"Profiling JSON: {profiling_output_path(summary_index_path)}")
+        for path in written_summary_paths:
+            print(f"Profiling JSON: {path}")
 
     print()
     print("To view results:")
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
index f64b96ad2..4569bc711 100755
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -15,6 +15,7 @@
 import os
 import importlib
 import re
+import tempfile
 from datetime import datetime
 from pathlib import Path
 from dataclasses import dataclass
@@ -52,9 +53,12 @@ def resolve_paths(
         if benchmark_dir_arg
         else Path(os.environ.get("ZVEC_BENCHMARK_DIR", zvec_root / "benchmark_results")).resolve()
     )
+    source_results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
 
     if results_dir_arg:
         results_dir = Path(results_dir_arg).resolve()
+    elif source_results_dir.exists():
+        results_dir = source_results_dir
     else:
         results_dir = None
         try:
@@ -147,8 +151,8 @@ def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -
         model_overhead_cmp_equiv = avg_omega_control_ms / cmp_time_ms
 
     avg_saved_cmps = None
-    if hnsw_profile and hnsw_profile.get("avg_cmps") is not None and avg_pairwise_dist_cnt is not None:
-        avg_saved_cmps = hnsw_profile["avg_cmps"] - avg_pairwise_dist_cnt
+    if hnsw_profile and hnsw_profile.get("profile_avg_cmps") is not None and avg_pairwise_dist_cnt is not None:
+        avg_saved_cmps = hnsw_profile["profile_avg_cmps"] - avg_pairwise_dist_cnt
 
     return {
         "benchmark_recall": metrics.get("recall"),
@@ -185,6 +189,38 @@ def write_profiling_summary(index_path: Path, payload: dict) -> None:
         json.dump(payload, f, indent=2, sort_keys=True)
 
 
+def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResult]) -> list[Path]:
+    written_paths: list[Path] = []
+    grouped: dict[str, list[BenchmarkResult]] = {}
+    for result in results:
+        grouped.setdefault(result.path, []).append(result)
+
+    for path_str, grouped_results in grouped.items():
+        index_path = Path(path_str)
+        write_profiling_summary(
+            index_path,
+            {
+                "generated_at": datetime.now().isoformat(),
+                "dataset": dataset,
+                "results": [
+                    {
+                        "type": result.type,
+                        "target_recall": result.target_recall,
+                        "path": result.path,
+                        "load_duration_s": result.load_duration,
+                        "qps": result.qps,
+                        "recall": result.recall,
+                        "profiling": result.profiling,
+                    }
+                    for result in grouped_results
+                ],
+            },
+        )
+        written_paths.append(profiling_output_path(index_path))
+
+    return written_paths
+
+
 def get_latest_result(db_label: str, results_dir: Path) -> dict:
     """Get the latest benchmark result for a given db_label from VectorDBBench."""
     if not results_dir.exists():
@@ -414,21 +450,19 @@ def run_command_capture(
     env = os.environ.copy()
     if extra_env:
         env.update(extra_env)
-    process = subprocess.Popen(
-        cmd,
-        cwd=cwd,
-        env=env,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        bufsize=1,
-    )
-    lines: list[str] = []
-    assert process.stdout is not None
-    for line in process.stdout:
-        print(line, end="")
-        lines.append(line)
-    return process.wait(), "".join(lines)
+    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".log") as tmp:
+        tmp_path = Path(tmp.name)
+
+    try:
+        with tmp_path.open("w+") as tmp:
+            result = subprocess.run(cmd, cwd=cwd, env=env, stdout=tmp, stderr=subprocess.STDOUT, text=True)
+            tmp.flush()
+            tmp.seek(0)
+            output = tmp.read()
+        print(output, end="" if output.endswith("\n") or not output else "\n")
+        return result.returncode, output
+    finally:
+        tmp_path.unlink(missing_ok=True)
 
 
 def main():
@@ -789,26 +823,7 @@ def main():
 
     # ============ Summary ============
     if results:
-        summary_index_path = omega_path if not args.skip_omega else hnsw_path
-        write_profiling_summary(
-            summary_index_path,
-            {
-                "generated_at": datetime.now().isoformat(),
-                "dataset": "cohere_1m",
-                "results": [
-                    {
-                        "type": result.type,
-                        "target_recall": result.target_recall,
-                        "path": result.path,
-                        "load_duration_s": result.load_duration,
-                        "qps": result.qps,
-                        "recall": result.recall,
-                        "profiling": result.profiling,
-                    }
-                    for result in results
-                ],
-            },
-        )
+        written_summary_paths = write_grouped_profiling_summaries("cohere_1m", results)
         print("\n\n" + "=" * 70)
         print("Benchmark Summary")
         print("=" * 70)
@@ -845,7 +860,8 @@ def main():
                 f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
             )
         print()
-        print(f"Profiling JSON: {profiling_output_path(summary_index_path)}")
+        for path in written_summary_paths:
+            print(f"Profiling JSON: {path}")
 
     print()
     print("To view results:")

From 308d8ba9577f25623a3bc51efdaf136a84ec2815 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 22 Mar 2026 18:30:25 +0800
Subject: [PATCH 024/126] fix(scripts): enable zvec info logs during profiling
 passes

---
 scripts/benchmark_cohere_10m.py | 4 +++-
 scripts/benchmark_cohere_1m.py  | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index ba0a269dd..d7b2f2eca 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -517,7 +517,7 @@ def main():
     EF_SEARCH = 118
     QUANTIZE_TYPE = "int8"
     USE_REFINER = True
-    NUM_CONCURRENCY = "12,14,16,18,20"
+    NUM_CONCURRENCY = "16"
     CONCURRENCY_DURATION = 30
     K = 100
 
@@ -634,6 +634,7 @@ def main():
                     vectordbbench_root,
                     dry_run=False,
                     extra_env={
+                        "ZVEC_LOG_LEVEL": "INFO",
                         "ZVEC_HNSW_LOG_QUERY_STATS": "1",
                         "ZVEC_HNSW_LOG_QUERY_LIMIT": "2000",
                     },
@@ -774,6 +775,7 @@ def main():
                         vectordbbench_root,
                         dry_run=False,
                         extra_env={
+                            "ZVEC_LOG_LEVEL": "INFO",
                             "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
                             "ZVEC_OMEGA_LOG_QUERY_LIMIT": "2000",
                         },
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
index 4569bc711..ffa9a347b 100755
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -524,7 +524,7 @@ def main():
     M = 15
     EF_SEARCH = 180
     QUANTIZE_TYPE = "int8"
-    NUM_CONCURRENCY = "12,14,16,18,20"
+    NUM_CONCURRENCY = "16"
     CONCURRENCY_DURATION = 30
     K = 100
 
@@ -654,6 +654,7 @@ def main():
                     vectordbbench_root,
                     dry_run=False,
                     extra_env={
+                        "ZVEC_LOG_LEVEL": "INFO",
                         "ZVEC_HNSW_LOG_QUERY_STATS": "1",
                         "ZVEC_HNSW_LOG_QUERY_LIMIT": "2000",
                     },
@@ -800,6 +801,7 @@ def main():
                         vectordbbench_root,
                         dry_run=False,
                         extra_env={
+                            "ZVEC_LOG_LEVEL": "INFO",
                             "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
                             "ZVEC_OMEGA_LOG_QUERY_LIMIT": "2000",
                         },

From e548017d704bb177579da58effc94df7be0ae85d Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 22 Mar 2026 19:01:49 +0800
Subject: [PATCH 025/126] fix(omega): align search-core timing and reduce
 traversal overhead

---
 src/core/algorithm/omega/omega_streamer.cc | 47 +++++++++++++---------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index d39ec56b9..770afa031 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -165,7 +165,7 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qmeta,
                                      uint32_t count, Context::Pointer &context,
                                      bool enable_early_stopping) const {
-  auto query_start = std::chrono::steady_clock::now();
+  auto query_total_start = std::chrono::steady_clock::now();
   uint64_t omega_control_time_ns = 0;
 
   // Cast context to OmegaContext to access training_query_id
@@ -237,6 +237,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   // Initialize context for search
   hnsw_ctx->clear();
   hnsw_ctx->resize_results(count);
+  hnsw_ctx->check_need_adjuct_ctx(entity_.doc_cnt());
+  auto query_core_start = std::chrono::steady_clock::now();
   hnsw_ctx->reset_query(query);
 
   // Get entity and distance calculator from context
@@ -351,25 +353,25 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
 
     // Get neighbors of current node
     const Neighbors neighbors = entity.get_neighbors(0, current_node);
+    ailego_prefetch(neighbors.data);
     if (neighbors.size() == 0) continue;
 
     // Prepare to compute distances
-    std::vector<node_id_t> unvisited_neighbors;
+    node_id_t neighbor_ids[neighbors.size()];
+    uint32_t size = 0;
     for (uint32_t i = 0; i < neighbors.size(); ++i) {
       node_id_t neighbor = neighbors[i];
       if (!visit_filter.visited(neighbor)) {
         visit_filter.set_visited(neighbor);
-        unvisited_neighbors.push_back(neighbor);
+        neighbor_ids[size++] = neighbor;
       }
     }
 
-    if (unvisited_neighbors.empty()) continue;
+    if (size == 0) continue;
 
     // Get neighbor vectors
     std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
-    int ret = entity.get_vector(unvisited_neighbors.data(),
-                                unvisited_neighbors.size(),
-                                neighbor_vec_blocks);
+    int ret = entity.get_vector(neighbor_ids, size, neighbor_vec_blocks);
     if (ret != 0) {
       break;
     }
@@ -378,21 +380,21 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     static constexpr node_id_t PREFETCH_STEP = 2;
     for (size_t i = 0;
          i < std::min(static_cast<size_t>(BATCH_SIZE * PREFETCH_STEP),
-                      unvisited_neighbors.size());
+                      static_cast<size_t>(size));
          ++i) {
       ailego_prefetch(neighbor_vec_blocks[i].data());
     }
 
-    float dists[unvisited_neighbors.size()];
-    const void *neighbor_vecs[unvisited_neighbors.size()];
-    for (size_t i = 0; i < unvisited_neighbors.size(); ++i) {
+    float dists[size];
+    const void *neighbor_vecs[size];
+    for (uint32_t i = 0; i < size; ++i) {
       neighbor_vecs[i] = neighbor_vec_blocks[i].data();
     }
-    dc.batch_dist(neighbor_vecs, unvisited_neighbors.size(), dists);
+    dc.batch_dist(neighbor_vecs, size, dists);
 
     // Compute distances and update candidates
-    for (size_t i = 0; i < unvisited_neighbors.size(); ++i) {
-      node_id_t neighbor = unvisited_neighbors[i];
+    for (uint32_t i = 0; i < size; ++i) {
+      node_id_t neighbor = neighbor_ids[i];
       dist_t neighbor_dist = dists[i];
 
       // Reference semantics:
@@ -458,8 +460,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     }
   }
 
-  // Convert results to context format
-  hnsw_ctx->topk_to_result();
+  auto query_core_end = std::chrono::steady_clock::now();
 
   // Get final statistics
   int hops, cmps, collected_gt;
@@ -478,7 +479,11 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   unsigned long long max_prediction_calls_per_should_stop = 0;
   uint64_t query_total_time_ns =
       std::chrono::duration_cast<std::chrono::nanoseconds>(
-          std::chrono::steady_clock::now() - query_start)
+          std::chrono::steady_clock::now() - query_total_start)
+          .count();
+  uint64_t query_core_time_ns =
+      std::chrono::duration_cast<std::chrono::nanoseconds>(
+          query_core_end - query_core_start)
           .count();
   uint64_t query_seq = query_stats_sequence_.fetch_add(1);
   omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
@@ -500,8 +505,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     size_t scan_cmps = hnsw_ctx->get_scan_num();
     uint64_t pairwise_dist_cnt = hnsw_ctx->get_pairwise_dist_num();
     uint64_t pure_search_time_ns =
-        query_total_time_ns > omega_control_time_ns
-            ? (query_total_time_ns - omega_control_time_ns)
+        query_core_time_ns > omega_control_time_ns
+            ? (query_core_time_ns - omega_control_time_ns)
             : 0;
     bool expected = false;
     if (debug_stats_logged_.compare_exchange_strong(expected, true)) {
@@ -558,6 +563,10 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     }
   }
 
+  // Match HNSW timing semantics: result materialization is outside the
+  // search-core timer and happens after logging.
+  hnsw_ctx->topk_to_result();
+
   // Collect training records (only in training mode)
   if (training_mode_enabled_) {
     size_t record_count = omega_search_get_training_records_count(omega_search);

From 5300d3518e99614ef37848f5e99ba8f3ec2e3cbe Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 22 Mar 2026 22:56:44 +0800
Subject: [PATCH 026/126] perf(omega): add compile-time control timing
 profiling and align benchmark summaries

---
 CMakeLists.txt                             |   6 +
 scripts/benchmark_cohere_10m.py            |  11 +-
 scripts/benchmark_cohere_1m.py             |  11 +-
 src/core/algorithm/omega/omega_searcher.cc |  24 ++-
 src/core/algorithm/omega/omega_streamer.cc | 194 +++++++++++++--------
 thirdparty/omega/OMEGALib                  |   2 +-
 6 files changed, 159 insertions(+), 89 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 935ab0da1..bab4e6d74 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,6 +34,12 @@ if(ZVEC_ENABLE_OMEGA)
     add_definitions(-DZVEC_ENABLE_OMEGA)
 endif()
 
+option(ZVEC_OMEGA_PROFILE_CONTROL_TIMING "Enable OMEGA control-path profiling timing" OFF)
+message(STATUS "ZVEC_OMEGA_PROFILE_CONTROL_TIMING:${ZVEC_OMEGA_PROFILE_CONTROL_TIMING}")
+if(ZVEC_OMEGA_PROFILE_CONTROL_TIMING)
+    add_definitions(-DZVEC_OMEGA_PROFILE_CONTROL_TIMING)
+endif()
+
 cc_directory(thirdparty)
 cc_directories(src)
 cc_directories(tests)
diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index d7b2f2eca..1273d7f24 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -137,12 +137,16 @@ def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -
     serial_summary = parse_serial_runner_summary(output)
 
     avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
+    avg_core_search_ms = avg_metric(query_records, "core_search_ms")
     avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
     avg_omega_control_ms = avg_metric(query_records, "omega_control_ms")
+    avg_search_only_ms = (
+        avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
+    )
 
     cmp_time_ms = None
-    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_pure_search_ms is not None:
-        cmp_time_ms = avg_pure_search_ms / avg_pairwise_dist_cnt
+    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_search_only_ms is not None:
+        cmp_time_ms = avg_search_only_ms / avg_pairwise_dist_cnt
 
     model_overhead_cmp_equiv = None
     if cmp_time_ms and cmp_time_ms > 0 and avg_omega_control_ms is not None:
@@ -164,9 +168,10 @@ def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -
         "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
         "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
         "profile_avg_model_overhead_ms": avg_omega_control_ms,
+        "profile_avg_setup_ms": avg_metric(query_records, "setup_ms"),
         "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
         "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
-        "profile_avg_feature_prep_ms": avg_metric(query_records, "feature_prep_ms"),
+        "profile_avg_core_search_ms": avg_core_search_ms,
         "profile_avg_pure_search_ms": avg_pure_search_ms,
         "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
         "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
index ffa9a347b..6847e5925 100755
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -139,12 +139,16 @@ def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -
     serial_summary = parse_serial_runner_summary(output)
 
     avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
+    avg_core_search_ms = avg_metric(query_records, "core_search_ms")
     avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
     avg_omega_control_ms = avg_metric(query_records, "omega_control_ms")
+    avg_search_only_ms = (
+        avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
+    )
 
     cmp_time_ms = None
-    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_pure_search_ms is not None:
-        cmp_time_ms = avg_pure_search_ms / avg_pairwise_dist_cnt
+    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_search_only_ms is not None:
+        cmp_time_ms = avg_search_only_ms / avg_pairwise_dist_cnt
 
     model_overhead_cmp_equiv = None
     if cmp_time_ms and cmp_time_ms > 0 and avg_omega_control_ms is not None:
@@ -166,9 +170,10 @@ def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -
         "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
         "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
         "profile_avg_model_overhead_ms": avg_omega_control_ms,
+        "profile_avg_setup_ms": avg_metric(query_records, "setup_ms"),
         "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
         "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
-        "profile_avg_feature_prep_ms": avg_metric(query_records, "feature_prep_ms"),
+        "profile_avg_core_search_ms": avg_core_search_ms,
         "profile_avg_pure_search_ms": avg_pure_search_ms,
         "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
         "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 2e572f59a..0014f9230 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -265,6 +265,12 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     LOG_WARN("Failed to create OMEGA search context, falling back to HNSW");
     return HnswSearcher::search_impl(query, qmeta, count, context);
   }
+  omega::SearchContext* omega_search_ctx = omega_search_get_cpp_context(omega_search);
+  if (omega_search_ctx == nullptr) {
+    omega_search_destroy(omega_search);
+    LOG_WARN("Failed to get OMEGA search context, falling back to HNSW");
+    return HnswSearcher::search_impl(query, qmeta, count, context);
+  }
 
   // Enable training mode if active (CRITICAL: must be before search)
   if (training_mode_enabled_) {
@@ -333,7 +339,7 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   }
 
   // Set dist_start for OMEGA
-  omega_search_set_dist_start(omega_search, dist);
+  omega_search_ctx->SetDistStart(dist);
 
   // Now perform OMEGA-enhanced search on layer 0
   candidates.clear();
@@ -346,7 +352,7 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   candidates.emplace(entry_point, dist);
 
   // Report initial visit to OMEGA
-  omega_search_report_visit_candidate(omega_search, entry_point, dist, 1);
+  omega_search_ctx->ReportVisitCandidate(entry_point, dist, true);
 
   dist_t lowerBound = dist;
 
@@ -358,7 +364,7 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     dist_t candidate_dist = top->second;
 
     // Reference semantics: count the hop before the stop-condition check.
-    omega_search_report_hop(omega_search);
+    omega_search_ctx->ReportHop();
 
     // Standard HNSW stopping condition
     if (candidate_dist > lowerBound && topk_heap.size() >= ef) {
@@ -398,13 +404,13 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
 
       bool should_consider_candidate =
           (topk_heap.size() < ef || neighbor_dist < lowerBound);
-      omega_search_report_visit_candidate(omega_search, neighbor, neighbor_dist,
-                                          should_consider_candidate ? 1 : 0);
+      omega_search_ctx->ReportVisitCandidate(neighbor, neighbor_dist,
+                                             should_consider_candidate);
 
-      if (!training_mode_enabled_ && omega_search_should_predict(omega_search)) {
-        if (omega_search_should_stop(omega_search)) {
+      if (!training_mode_enabled_ && omega_search_ctx->ShouldPredict()) {
+        if (omega_search_ctx->ShouldStopEarly()) {
           int hops, cmps, collected_gt;
-          omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
+          omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
           LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
                     cmps, hops, collected_gt);
           early_stop_hit = true;
@@ -444,7 +450,7 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
 
   // Get final statistics
   int hops, cmps, collected_gt;
-  omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
+  omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
   LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu",
             cmps, hops, topk_heap.size());
 
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 770afa031..132ff136e 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -22,8 +22,8 @@
 #include "omega_context.h"
 #include "omega_params.h"
 #include <omega/omega_api.h>
+#include <omega/profiling_timer.h>
 #include <omega/search_context.h>
-#include <chrono>
 #include <cstdlib>
 
 namespace zvec {
@@ -165,7 +165,7 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qmeta,
                                      uint32_t count, Context::Pointer &context,
                                      bool enable_early_stopping) const {
-  auto query_total_start = std::chrono::steady_clock::now();
+  auto query_total_start = omega::ProfilingTimer::Now();
   uint64_t omega_control_time_ns = 0;
 
   // Cast context to OmegaContext to access training_query_id
@@ -206,6 +206,12 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     LOG_ERROR("Failed to create OMEGA search context");
     return IndexError_Runtime;
   }
+  omega::SearchContext* omega_search_ctx = omega_search_get_cpp_context(omega_search);
+  if (omega_search_ctx == nullptr) {
+    omega_search_destroy(omega_search);
+    LOG_ERROR("Failed to get OMEGA search context");
+    return IndexError_Runtime;
+  }
 
   // Enable training mode if active (CRITICAL: must be before search)
   if (training_mode_enabled_) {
@@ -238,7 +244,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   hnsw_ctx->clear();
   hnsw_ctx->resize_results(count);
   hnsw_ctx->check_need_adjuct_ctx(entity_.doc_cnt());
-  auto query_core_start = std::chrono::steady_clock::now();
+  auto query_core_start = omega::ProfilingTimer::Now();
   hnsw_ctx->reset_query(query);
 
   // Get entity and distance calculator from context
@@ -295,14 +301,16 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   }
 
   // Set dist_start for OMEGA
+#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
   {
-    auto control_start = std::chrono::steady_clock::now();
-    omega_search_set_dist_start(omega_search, dist);
-    omega_control_time_ns +=
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::steady_clock::now() - control_start)
-            .count();
+    auto control_start = omega::ProfilingTimer::Now();
+    omega_search_ctx->SetDistStart(dist);
+    omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
+        control_start, omega::ProfilingTimer::Now());
   }
+#else
+  omega_search_ctx->SetDistStart(dist);
+#endif
 
   // Perform HNSW search on layer 0 with OMEGA
   candidates.clear();
@@ -315,14 +323,16 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   candidates.emplace(entry_point, dist);
 
   // Report initial visit to OMEGA
+#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
   {
-    auto control_start = std::chrono::steady_clock::now();
-    omega_search_report_visit_candidate(omega_search, entry_point, dist, 1);
-    omega_control_time_ns +=
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::steady_clock::now() - control_start)
-            .count();
+    auto control_start = omega::ProfilingTimer::Now();
+    omega_search_ctx->ReportVisitCandidate(entry_point, dist, true);
+    omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
+        control_start, omega::ProfilingTimer::Now());
   }
+#else
+  omega_search_ctx->ReportVisitCandidate(entry_point, dist, true);
+#endif
 
   dist_t lowerBound = dist;
 
@@ -335,14 +345,16 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
 
     // Reference semantics: count the hop as soon as the current candidate is
     // examined, before stop-condition evaluation.
+#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
     {
-      auto control_start = std::chrono::steady_clock::now();
-      omega_search_report_hop(omega_search);
-      omega_control_time_ns +=
-          std::chrono::duration_cast<std::chrono::nanoseconds>(
-              std::chrono::steady_clock::now() - control_start)
-              .count();
+      auto control_start = omega::ProfilingTimer::Now();
+      omega_search_ctx->ReportHop();
+      omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
+          control_start, omega::ProfilingTimer::Now());
     }
+#else
+    omega_search_ctx->ReportHop();
+#endif
 
     // Standard HNSW stopping condition
     if (topk_heap.full() && candidate_dist > lowerBound) {
@@ -403,35 +415,42 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       //    result-set-sized top-k structure, not by ef admission alone.
       bool should_consider_candidate =
           (!topk_heap.full() || neighbor_dist < lowerBound);
+#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
       {
-        auto control_start = std::chrono::steady_clock::now();
-        omega_search_report_visit_candidate(omega_search, neighbor, neighbor_dist,
-                                            should_consider_candidate ? 1 : 0);
-        omega_control_time_ns +=
-            std::chrono::duration_cast<std::chrono::nanoseconds>(
-                std::chrono::steady_clock::now() - control_start)
-                .count();
+        auto control_start = omega::ProfilingTimer::Now();
+        omega_search_ctx->ReportVisitCandidate(neighbor, neighbor_dist,
+                                               should_consider_candidate);
+        omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
+            control_start, omega::ProfilingTimer::Now());
       }
+#else
+      omega_search_ctx->ReportVisitCandidate(neighbor, neighbor_dist,
+                                             should_consider_candidate);
+#endif
 
       bool should_predict = false;
       if (enable_early_stopping) {
-        auto control_start = std::chrono::steady_clock::now();
-        should_predict = omega_search_should_predict(omega_search);
-        omega_control_time_ns +=
-            std::chrono::duration_cast<std::chrono::nanoseconds>(
-                std::chrono::steady_clock::now() - control_start)
-                .count();
+#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
+        auto control_start = omega::ProfilingTimer::Now();
+        should_predict = omega_search_ctx->ShouldPredict();
+        omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
+            control_start, omega::ProfilingTimer::Now());
+#else
+        should_predict = omega_search_ctx->ShouldPredict();
+#endif
       }
       if (enable_early_stopping && should_predict) {
-        auto control_start = std::chrono::steady_clock::now();
-        bool should_stop = omega_search_should_stop(omega_search);
-        omega_control_time_ns +=
-            std::chrono::duration_cast<std::chrono::nanoseconds>(
-                std::chrono::steady_clock::now() - control_start)
-                .count();
+#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
+        auto control_start = omega::ProfilingTimer::Now();
+        bool should_stop = omega_search_ctx->ShouldStopEarly();
+        omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
+            control_start, omega::ProfilingTimer::Now());
+#else
+        bool should_stop = omega_search_ctx->ShouldStopEarly();
+#endif
         if (should_stop) {
           int hops, cmps, collected_gt;
-          omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
+          omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
           LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
                     cmps, hops, collected_gt);
           early_stop_hit = true;
@@ -460,7 +479,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     }
   }
 
-  auto query_core_end = std::chrono::steady_clock::now();
+  auto query_core_end = omega::ProfilingTimer::Now();
 
   // Get final statistics
   int hops, cmps, collected_gt;
@@ -478,36 +497,46 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   unsigned long long should_stop_calls_with_advance = 0;
   unsigned long long max_prediction_calls_per_should_stop = 0;
   uint64_t query_total_time_ns =
-      std::chrono::duration_cast<std::chrono::nanoseconds>(
-          std::chrono::steady_clock::now() - query_total_start)
-          .count();
+      omega::ProfilingTimer::ElapsedNs(query_total_start,
+                                       omega::ProfilingTimer::Now());
   uint64_t query_core_time_ns =
-      std::chrono::duration_cast<std::chrono::nanoseconds>(
-          query_core_end - query_core_start)
-          .count();
+      omega::ProfilingTimer::ElapsedNs(query_core_start, query_core_end);
+  uint64_t query_setup_time_ns =
+      query_total_time_ns > query_core_time_ns
+          ? (query_total_time_ns - query_core_time_ns)
+          : 0;
   uint64_t query_seq = query_stats_sequence_.fetch_add(1);
-  omega_search_get_stats(omega_search, &hops, &cmps, &collected_gt);
-  omega_search_get_debug_stats(omega_search, &predicted_recall_avg,
-                               &predicted_recall_at_target,
-                               &omega_early_stop_hit,
-                               &should_stop_calls, &prediction_calls,
-                               &should_stop_time_ns,
-                               &prediction_eval_time_ns,
-                               &sorted_window_time_ns,
-                               &average_recall_eval_time_ns,
-                               &prediction_feature_prep_time_ns,
-                               &collected_gt_advance_count,
-                               &should_stop_calls_with_advance,
-                               &max_prediction_calls_per_should_stop);
+  omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
+  predicted_recall_avg = omega_search_ctx->GetLastPredictedRecallAvg();
+  predicted_recall_at_target =
+      omega_search_ctx->GetLastPredictedRecallAtTarget();
+  omega_early_stop_hit = omega_search_ctx->EarlyStopHit() ? 1 : 0;
+  should_stop_calls = omega_search_ctx->GetShouldStopCalls();
+  prediction_calls = omega_search_ctx->GetPredictionCalls();
+  should_stop_time_ns = omega_search_ctx->GetShouldStopTimeNs();
+  prediction_eval_time_ns = omega_search_ctx->GetPredictionEvalTimeNs();
+  sorted_window_time_ns = omega_search_ctx->GetSortedWindowTimeNs();
+  average_recall_eval_time_ns = omega_search_ctx->GetAverageRecallEvalTimeNs();
+  prediction_feature_prep_time_ns =
+      omega_search_ctx->GetPredictionFeaturePrepTimeNs();
+  collected_gt_advance_count = omega_search_ctx->GetCollectedGtAdvanceCount();
+  should_stop_calls_with_advance =
+      omega_search_ctx->GetShouldStopCallsWithAdvance();
+  max_prediction_calls_per_should_stop =
+      omega_search_ctx->GetMaxPredictionCallsPerShouldStop();
   LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu, early_stop=%d",
             cmps, hops, topk_heap.size(), enable_early_stopping);
   if (enable_early_stopping) {
     size_t scan_cmps = hnsw_ctx->get_scan_num();
     uint64_t pairwise_dist_cnt = hnsw_ctx->get_pairwise_dist_num();
-    uint64_t pure_search_time_ns =
+    uint64_t pure_search_time_ns = 0;
+    uint64_t core_search_time_ns = query_core_time_ns;
+#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
+    pure_search_time_ns =
         query_core_time_ns > omega_control_time_ns
             ? (query_core_time_ns - omega_control_time_ns)
             : 0;
+#endif
     bool expected = false;
     if (debug_stats_logged_.compare_exchange_strong(expected, true)) {
       LOG_INFO("OMEGA runtime stats: model_loaded=%d target_recall=%.4f "
@@ -517,8 +546,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                "should_stop_calls=%llu prediction_calls=%llu "
                "advance_calls=%llu collected_gt_advance=%llu "
                "max_pred_per_stop=%llu should_stop_ms=%.3f "
-               "prediction_eval_ms=%.3f sorted_window_ms=%.3f "
-               "avg_recall_eval_ms=%.3f feature_prep_ms=%.3f",
+               "prediction_eval_ms=%.3f",
                IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
                static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
                collected_gt,
@@ -528,12 +556,10 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                should_stop_calls_with_advance, collected_gt_advance_count,
                max_prediction_calls_per_should_stop,
                static_cast<double>(should_stop_time_ns) / 1e6,
-               static_cast<double>(prediction_eval_time_ns) / 1e6,
-               static_cast<double>(sorted_window_time_ns) / 1e6,
-               static_cast<double>(average_recall_eval_time_ns) / 1e6,
-               static_cast<double>(prediction_feature_prep_time_ns) / 1e6);
+               static_cast<double>(prediction_eval_time_ns) / 1e6);
     }
     if (ShouldLogQueryStats(query_seq)) {
+#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
       LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
                "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
                "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
@@ -541,8 +567,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                "prediction_calls=%llu advance_calls=%llu "
                "collected_gt_advance=%llu max_pred_per_stop=%llu "
                "should_stop_ms=%.3f prediction_eval_ms=%.3f "
-               "sorted_window_ms=%.3f avg_recall_eval_ms=%.3f "
-               "feature_prep_ms=%.3f omega_control_ms=%.3f pure_search_ms=%.3f total_ms=%.3f",
+               "setup_ms=%.3f "
+               "omega_control_ms=%.3f pure_search_ms=%.3f total_ms=%.3f",
                static_cast<unsigned long long>(query_seq),
                IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
                static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
@@ -554,12 +580,34 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                max_prediction_calls_per_should_stop,
                static_cast<double>(should_stop_time_ns) / 1e6,
                static_cast<double>(prediction_eval_time_ns) / 1e6,
-               static_cast<double>(sorted_window_time_ns) / 1e6,
-               static_cast<double>(average_recall_eval_time_ns) / 1e6,
-               static_cast<double>(prediction_feature_prep_time_ns) / 1e6,
+               static_cast<double>(query_setup_time_ns) / 1e6,
                static_cast<double>(omega_control_time_ns) / 1e6,
                static_cast<double>(pure_search_time_ns) / 1e6,
                static_cast<double>(query_total_time_ns) / 1e6);
+#else
+      LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
+               "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
+               "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
+               "early_stop_hit=%d should_stop_calls=%llu "
+               "prediction_calls=%llu advance_calls=%llu "
+               "collected_gt_advance=%llu max_pred_per_stop=%llu "
+               "should_stop_ms=%.3f prediction_eval_ms=%.3f "
+               "setup_ms=%.3f core_search_ms=%.3f total_ms=%.3f",
+               static_cast<unsigned long long>(query_seq),
+               IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
+               static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
+               collected_gt,
+               predicted_recall_avg, predicted_recall_at_target,
+               (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
+               should_stop_calls, prediction_calls,
+               should_stop_calls_with_advance, collected_gt_advance_count,
+               max_prediction_calls_per_should_stop,
+               static_cast<double>(should_stop_time_ns) / 1e6,
+               static_cast<double>(prediction_eval_time_ns) / 1e6,
+               static_cast<double>(query_setup_time_ns) / 1e6,
+               static_cast<double>(core_search_time_ns) / 1e6,
+               static_cast<double>(query_total_time_ns) / 1e6);
+#endif
     }
   }
 
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 1e3b3e397..317961f82 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 1e3b3e39702493b3962294b59ecfcc2d12e9871b
+Subproject commit 317961f823e09b093444b18659af8a06571603ad

From 4871eaeb1bbd75af9855fc1f4b4a71ed30da43b9 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 22 Mar 2026 23:49:04 +0800
Subject: [PATCH 027/126] refactor(omega): move control timing profiling to
 runtime flags

---
 CMakeLists.txt                             |   6 -
 scripts/benchmark_cohere_10m.py            |   1 +
 scripts/benchmark_cohere_1m.py             |   1 +
 src/core/algorithm/omega/omega_streamer.cc | 178 ++++++++++-----------
 thirdparty/omega/OMEGALib                  |   2 +-
 5 files changed, 89 insertions(+), 99 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bab4e6d74..935ab0da1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,12 +34,6 @@ if(ZVEC_ENABLE_OMEGA)
     add_definitions(-DZVEC_ENABLE_OMEGA)
 endif()
 
-option(ZVEC_OMEGA_PROFILE_CONTROL_TIMING "Enable OMEGA control-path profiling timing" OFF)
-message(STATUS "ZVEC_OMEGA_PROFILE_CONTROL_TIMING:${ZVEC_OMEGA_PROFILE_CONTROL_TIMING}")
-if(ZVEC_OMEGA_PROFILE_CONTROL_TIMING)
-    add_definitions(-DZVEC_OMEGA_PROFILE_CONTROL_TIMING)
-endif()
-
 cc_directory(thirdparty)
 cc_directories(src)
 cc_directories(tests)
diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index 1273d7f24..8207d536b 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -781,6 +781,7 @@ def main():
                         dry_run=False,
                         extra_env={
                             "ZVEC_LOG_LEVEL": "INFO",
+                            "ZVEC_OMEGA_PROFILE_CONTROL_TIMING": "1",
                             "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
                             "ZVEC_OMEGA_LOG_QUERY_LIMIT": "2000",
                         },
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
index 6847e5925..db6c52915 100755
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -807,6 +807,7 @@ def main():
                         dry_run=False,
                         extra_env={
                             "ZVEC_LOG_LEVEL": "INFO",
+                            "ZVEC_OMEGA_PROFILE_CONTROL_TIMING": "1",
                             "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
                             "ZVEC_OMEGA_LOG_QUERY_LIMIT": "2000",
                         },
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 132ff136e..6862a2bba 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -166,6 +166,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                                      uint32_t count, Context::Pointer &context,
                                      bool enable_early_stopping) const {
   auto query_total_start = omega::ProfilingTimer::Now();
+  const bool collect_control_timing = omega::IsControlTimingEnabled();
   uint64_t omega_control_time_ns = 0;
 
   // Cast context to OmegaContext to access training_query_id
@@ -301,16 +302,14 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   }
 
   // Set dist_start for OMEGA
-#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
-  {
+  if (collect_control_timing) {
     auto control_start = omega::ProfilingTimer::Now();
     omega_search_ctx->SetDistStart(dist);
     omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
         control_start, omega::ProfilingTimer::Now());
+  } else {
+    omega_search_ctx->SetDistStart(dist);
   }
-#else
-  omega_search_ctx->SetDistStart(dist);
-#endif
 
   // Perform HNSW search on layer 0 with OMEGA
   candidates.clear();
@@ -323,16 +322,14 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   candidates.emplace(entry_point, dist);
 
   // Report initial visit to OMEGA
-#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
-  {
+  if (collect_control_timing) {
     auto control_start = omega::ProfilingTimer::Now();
     omega_search_ctx->ReportVisitCandidate(entry_point, dist, true);
     omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
         control_start, omega::ProfilingTimer::Now());
+  } else {
+    omega_search_ctx->ReportVisitCandidate(entry_point, dist, true);
   }
-#else
-  omega_search_ctx->ReportVisitCandidate(entry_point, dist, true);
-#endif
 
   dist_t lowerBound = dist;
 
@@ -345,16 +342,14 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
 
     // Reference semantics: count the hop as soon as the current candidate is
     // examined, before stop-condition evaluation.
-#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
-    {
+    if (collect_control_timing) {
       auto control_start = omega::ProfilingTimer::Now();
       omega_search_ctx->ReportHop();
       omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
           control_start, omega::ProfilingTimer::Now());
+    } else {
+      omega_search_ctx->ReportHop();
     }
-#else
-    omega_search_ctx->ReportHop();
-#endif
 
     // Standard HNSW stopping condition
     if (topk_heap.full() && candidate_dist > lowerBound) {
@@ -415,39 +410,38 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       //    result-set-sized top-k structure, not by ef admission alone.
       bool should_consider_candidate =
           (!topk_heap.full() || neighbor_dist < lowerBound);
-#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
-      {
+      if (collect_control_timing) {
         auto control_start = omega::ProfilingTimer::Now();
         omega_search_ctx->ReportVisitCandidate(neighbor, neighbor_dist,
                                                should_consider_candidate);
         omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
             control_start, omega::ProfilingTimer::Now());
+      } else {
+        omega_search_ctx->ReportVisitCandidate(neighbor, neighbor_dist,
+                                               should_consider_candidate);
       }
-#else
-      omega_search_ctx->ReportVisitCandidate(neighbor, neighbor_dist,
-                                             should_consider_candidate);
-#endif
 
       bool should_predict = false;
       if (enable_early_stopping) {
-#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
-        auto control_start = omega::ProfilingTimer::Now();
-        should_predict = omega_search_ctx->ShouldPredict();
-        omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
-            control_start, omega::ProfilingTimer::Now());
-#else
-        should_predict = omega_search_ctx->ShouldPredict();
-#endif
+        if (collect_control_timing) {
+          auto control_start = omega::ProfilingTimer::Now();
+          should_predict = omega_search_ctx->ShouldPredict();
+          omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
+              control_start, omega::ProfilingTimer::Now());
+        } else {
+          should_predict = omega_search_ctx->ShouldPredict();
+        }
       }
       if (enable_early_stopping && should_predict) {
-#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
-        auto control_start = omega::ProfilingTimer::Now();
-        bool should_stop = omega_search_ctx->ShouldStopEarly();
-        omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
-            control_start, omega::ProfilingTimer::Now());
-#else
-        bool should_stop = omega_search_ctx->ShouldStopEarly();
-#endif
+        bool should_stop = false;
+        if (collect_control_timing) {
+          auto control_start = omega::ProfilingTimer::Now();
+          should_stop = omega_search_ctx->ShouldStopEarly();
+          omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
+              control_start, omega::ProfilingTimer::Now());
+        } else {
+          should_stop = omega_search_ctx->ShouldStopEarly();
+        }
         if (should_stop) {
           int hops, cmps, collected_gt;
           omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
@@ -531,12 +525,12 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     uint64_t pairwise_dist_cnt = hnsw_ctx->get_pairwise_dist_num();
     uint64_t pure_search_time_ns = 0;
     uint64_t core_search_time_ns = query_core_time_ns;
-#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
-    pure_search_time_ns =
-        query_core_time_ns > omega_control_time_ns
-            ? (query_core_time_ns - omega_control_time_ns)
-            : 0;
-#endif
+    if (collect_control_timing) {
+      pure_search_time_ns =
+          query_core_time_ns > omega_control_time_ns
+              ? (query_core_time_ns - omega_control_time_ns)
+              : 0;
+    }
     bool expected = false;
     if (debug_stats_logged_.compare_exchange_strong(expected, true)) {
       LOG_INFO("OMEGA runtime stats: model_loaded=%d target_recall=%.4f "
@@ -559,55 +553,55 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                static_cast<double>(prediction_eval_time_ns) / 1e6);
     }
     if (ShouldLogQueryStats(query_seq)) {
-#ifdef ZVEC_OMEGA_PROFILE_CONTROL_TIMING
-      LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
-               "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
-               "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
-               "early_stop_hit=%d should_stop_calls=%llu "
-               "prediction_calls=%llu advance_calls=%llu "
-               "collected_gt_advance=%llu max_pred_per_stop=%llu "
-               "should_stop_ms=%.3f prediction_eval_ms=%.3f "
-               "setup_ms=%.3f "
-               "omega_control_ms=%.3f pure_search_ms=%.3f total_ms=%.3f",
-               static_cast<unsigned long long>(query_seq),
-               IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
-               static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
-               collected_gt,
-               predicted_recall_avg, predicted_recall_at_target,
-               (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
-               should_stop_calls, prediction_calls,
-               should_stop_calls_with_advance, collected_gt_advance_count,
-               max_prediction_calls_per_should_stop,
-               static_cast<double>(should_stop_time_ns) / 1e6,
-               static_cast<double>(prediction_eval_time_ns) / 1e6,
-               static_cast<double>(query_setup_time_ns) / 1e6,
-               static_cast<double>(omega_control_time_ns) / 1e6,
-               static_cast<double>(pure_search_time_ns) / 1e6,
-               static_cast<double>(query_total_time_ns) / 1e6);
-#else
-      LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
-               "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
-               "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
-               "early_stop_hit=%d should_stop_calls=%llu "
-               "prediction_calls=%llu advance_calls=%llu "
-               "collected_gt_advance=%llu max_pred_per_stop=%llu "
-               "should_stop_ms=%.3f prediction_eval_ms=%.3f "
-               "setup_ms=%.3f core_search_ms=%.3f total_ms=%.3f",
-               static_cast<unsigned long long>(query_seq),
-               IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
-               static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
-               collected_gt,
-               predicted_recall_avg, predicted_recall_at_target,
-               (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
-               should_stop_calls, prediction_calls,
-               should_stop_calls_with_advance, collected_gt_advance_count,
-               max_prediction_calls_per_should_stop,
-               static_cast<double>(should_stop_time_ns) / 1e6,
-               static_cast<double>(prediction_eval_time_ns) / 1e6,
-               static_cast<double>(query_setup_time_ns) / 1e6,
-               static_cast<double>(core_search_time_ns) / 1e6,
-               static_cast<double>(query_total_time_ns) / 1e6);
-#endif
+      if (collect_control_timing) {
+        LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
+                 "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
+                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
+                 "early_stop_hit=%d should_stop_calls=%llu "
+                 "prediction_calls=%llu advance_calls=%llu "
+                 "collected_gt_advance=%llu max_pred_per_stop=%llu "
+                 "should_stop_ms=%.3f prediction_eval_ms=%.3f "
+                 "setup_ms=%.3f "
+                 "omega_control_ms=%.3f pure_search_ms=%.3f total_ms=%.3f",
+                 static_cast<unsigned long long>(query_seq),
+                 IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
+                 static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
+                 collected_gt,
+                 predicted_recall_avg, predicted_recall_at_target,
+                 (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
+                 should_stop_calls, prediction_calls,
+                 should_stop_calls_with_advance, collected_gt_advance_count,
+                 max_prediction_calls_per_should_stop,
+                 static_cast<double>(should_stop_time_ns) / 1e6,
+                 static_cast<double>(prediction_eval_time_ns) / 1e6,
+                 static_cast<double>(query_setup_time_ns) / 1e6,
+                 static_cast<double>(omega_control_time_ns) / 1e6,
+                 static_cast<double>(pure_search_time_ns) / 1e6,
+                 static_cast<double>(query_total_time_ns) / 1e6);
+      } else {
+        LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
+                 "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
+                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
+                 "early_stop_hit=%d should_stop_calls=%llu "
+                 "prediction_calls=%llu advance_calls=%llu "
+                 "collected_gt_advance=%llu max_pred_per_stop=%llu "
+                 "should_stop_ms=%.3f prediction_eval_ms=%.3f "
+                 "setup_ms=%.3f core_search_ms=%.3f total_ms=%.3f",
+                 static_cast<unsigned long long>(query_seq),
+                 IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
+                 static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
+                 collected_gt,
+                 predicted_recall_avg, predicted_recall_at_target,
+                 (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
+                 should_stop_calls, prediction_calls,
+                 should_stop_calls_with_advance, collected_gt_advance_count,
+                 max_prediction_calls_per_should_stop,
+                 static_cast<double>(should_stop_time_ns) / 1e6,
+                 static_cast<double>(prediction_eval_time_ns) / 1e6,
+                 static_cast<double>(query_setup_time_ns) / 1e6,
+                 static_cast<double>(core_search_time_ns) / 1e6,
+                 static_cast<double>(query_total_time_ns) / 1e6);
+      }
     }
   }
 
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 317961f82..efb5d6fb2 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 317961f823e09b093444b18659af8a06571603ad
+Subproject commit efb5d6fb2a854f74a6a22cc64444aee880348b7b

From 5df2d5a98c44cea382fe2885e1be904161fe6a95 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 23 Mar 2026 00:39:27 +0800
Subject: [PATCH 028/126] refactor(omega): align omega search path with hnsw
 core loop

---
 src/core/algorithm/hnsw/hnsw_algorithm.cc  |  96 ++++---
 src/core/algorithm/hnsw/hnsw_algorithm.h   |  29 +-
 src/core/algorithm/hnsw/hnsw_searcher.cc   |   6 +
 src/core/algorithm/hnsw/hnsw_searcher.h    |   6 +-
 src/core/algorithm/omega/omega_searcher.cc | 202 ++++----------
 src/core/algorithm/omega/omega_streamer.cc | 301 ++++++---------------
 6 files changed, 232 insertions(+), 408 deletions(-)

diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.cc b/src/core/algorithm/hnsw/hnsw_algorithm.cc
index 9a4125899..c563c0695 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.cc
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.cc
@@ -14,6 +14,7 @@
 #include "hnsw_algorithm.h"
 #include <chrono>
 #include <iostream>
+#include <utility>
 #include <ailego/internal/cpu_features.h>
 
 namespace zvec {
@@ -81,34 +82,46 @@ int HnswAlgorithm::add_node(node_id_t id, level_t level, HnswContext *ctx) {
 }
 
 int HnswAlgorithm::search(HnswContext *ctx) const {
-  spin_lock_.lock();
-  auto maxLevel = entity_.cur_max_level();
-  auto entry_point = entity_.entry_point();
-  spin_lock_.unlock();
-
-  if (ailego_unlikely(entry_point == kInvalidNodeId)) {
-    return 0;
-  }
-
-  dist_t dist = ctx->dist_calculator().dist(entry_point);
-  for (level_t cur_level = maxLevel; cur_level >= 1; --cur_level) {
-    select_entry_point(cur_level, &entry_point, &dist, ctx);
-  }
+  return search_internal(ctx, true, nullptr, nullptr);
+}
 
-  auto &topk_heap = ctx->topk_heap();
-  topk_heap.clear();
-  search_neighbors(0, &entry_point, &dist, topk_heap, ctx);
+int HnswAlgorithm::fast_search(HnswContext *ctx) const {
+  return search_internal(ctx, false, nullptr, nullptr);
+}
 
-  if (ctx->group_by_search()) {
-    expand_neighbors_by_group(topk_heap, ctx);
-  }
+int HnswAlgorithm::search_with_hooks(HnswContext *ctx,
+                                     const SearchHooks *hooks,
+                                     bool *stopped_early) const {
+  return search_internal(ctx, true, hooks, stopped_early);
+}
 
-  return 0;
+int HnswAlgorithm::fast_search_with_hooks(HnswContext *ctx,
+                                          const SearchHooks *hooks,
+                                          bool *stopped_early) const {
+  return search_internal(ctx, false, hooks, stopped_early);
 }
 
-int HnswAlgorithm::fast_search(HnswContext *ctx) const {
-  auto max_level = entity_.cur_max_level();
-  auto entry_point = entity_.entry_point();
+int HnswAlgorithm::search_internal(HnswContext *ctx, bool use_lock,
+                                   const SearchHooks *hooks,
+                                   bool *stopped_early) const {
+  auto load_entry_point = [&]() {
+    if (use_lock) {
+      spin_lock_.lock();
+      auto max_level = entity_.cur_max_level();
+      auto entry_point = entity_.entry_point();
+      spin_lock_.unlock();
+      return std::make_pair(max_level, entry_point);
+    }
+    return std::make_pair(entity_.cur_max_level(), entity_.entry_point());
+  };
+
+  auto [max_level, entry_point] = load_entry_point();
+  if (ailego_unlikely(entry_point == kInvalidNodeId)) {
+    if (stopped_early != nullptr) {
+      *stopped_early = false;
+    }
+    return 0;
+  }
 
   dist_t dist = ctx->dist_calculator().dist(entry_point);
   for (level_t cur_level = max_level; cur_level >= 1; --cur_level) {
@@ -117,10 +130,13 @@ int HnswAlgorithm::fast_search(HnswContext *ctx) const {
 
   auto &topk_heap = ctx->topk_heap();
   topk_heap.clear();
+  bool did_stop_early =
+      search_neighbors(0, &entry_point, &dist, topk_heap, ctx, hooks);
+  if (stopped_early != nullptr) {
+    *stopped_early = did_stop_early;
+  }
 
-  search_neighbors(0, &entry_point, &dist, topk_heap, ctx);
-
-  if (ctx->group_by_search()) {
+  if (!did_stop_early && ctx->group_by_search()) {
     expand_neighbors_by_group(topk_heap, ctx);
   }
 
@@ -199,9 +215,10 @@ void HnswAlgorithm::add_neighbors(node_id_t id, level_t level,
   return;
 }
 
-void HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
+bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
                                      dist_t *dist, TopkHeap &topk,
-                                     HnswContext *ctx) const {
+                                     HnswContext *ctx,
+                                     const SearchHooks *hooks) const {
   const auto &entity = ctx->get_entity();
   HnswDistCalculator &dc = ctx->dist_calculator();
   VisitFilter &visit = ctx->visit_filter();
@@ -214,12 +231,21 @@ void HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
   candidates.clear();
   visit.clear();
   visit.set_visited(*entry_point);
-  if (!filter(*entry_point)) {
+  bool entry_inserted_to_topk = !filter(*entry_point);
+  if (entry_inserted_to_topk) {
     topk.emplace(*entry_point, *dist);
   }
 
   candidates.emplace(*entry_point, *dist);
+  if (hooks != nullptr && hooks->on_level0_entry != nullptr) {
+    hooks->on_level0_entry(*entry_point, *dist, entry_inserted_to_topk,
+                           hooks->user_data);
+  }
   while (!candidates.empty() && !ctx->reach_scan_limit()) {
+    if (hooks != nullptr && hooks->on_hop != nullptr) {
+      hooks->on_hop(hooks->user_data);
+    }
+
     auto top = candidates.begin();
     node_id_t main_node = top->first;
     dist_t main_dist = top->second;
@@ -281,8 +307,16 @@ void HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
     for (uint32_t i = 0; i < size; ++i) {
       node_id_t node = neighbor_ids[i];
       dist_t cur_dist = dists[i];
+      bool should_consider_candidate =
+          (!topk.full()) || cur_dist < topk[0].second;
+
+      if (hooks != nullptr && hooks->on_visit_candidate != nullptr &&
+          hooks->on_visit_candidate(node, cur_dist, should_consider_candidate,
+                                    hooks->user_data)) {
+        return true;
+      }
 
-      if ((!topk.full()) || cur_dist < topk[0].second) {
+      if (should_consider_candidate) {
         candidates.emplace(node, cur_dist);
         // update entry_point for next level scan
         if (cur_dist < *dist) {
@@ -296,7 +330,7 @@ void HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
     }  // end for
   }  // while
 
-  return;
+  return false;
 }
 
 void HnswAlgorithm::expand_neighbors_by_group(TopkHeap &topk,
diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.h b/src/core/algorithm/hnsw/hnsw_algorithm.h
index b34548438..1937773a8 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.h
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.h
@@ -27,6 +27,16 @@ class HnswAlgorithm {
  public:
   typedef std::unique_ptr<HnswAlgorithm> UPointer;
 
+  struct SearchHooks {
+    void *user_data{nullptr};
+    void (*on_level0_entry)(node_id_t id, dist_t dist, bool inserted_to_topk,
+                            void *user_data){nullptr};
+    void (*on_hop)(void *user_data){nullptr};
+    bool (*on_visit_candidate)(node_id_t id, dist_t dist,
+                               bool should_consider_candidate,
+                               void *user_data){nullptr};
+  };
+
  public:
   //! Constructor
   explicit HnswAlgorithm(HnswEntity &entity);
@@ -51,6 +61,14 @@ class HnswAlgorithm {
   //! return 0 on success, or errCode in failure. results saved in ctx
   int fast_search(HnswContext *ctx) const;
 
+  //! do knn search in graph with optional callbacks inserted on the hot path
+  int search_with_hooks(HnswContext *ctx, const SearchHooks *hooks,
+                        bool *stopped_early = nullptr) const;
+
+  //! do knn search in graph without lock with optional callbacks
+  int fast_search_with_hooks(HnswContext *ctx, const SearchHooks *hooks,
+                             bool *stopped_early = nullptr) const;
+
   //! Initiate HnswAlgorithm
   int init() {
     level_probas_.clear();
@@ -84,6 +102,10 @@ class HnswAlgorithm {
   }
 
  private:
+  int search_internal(HnswContext *ctx, bool use_lock,
+                      const SearchHooks *hooks,
+                      bool *stopped_early) const;
+
   //! Select in upper layer to get entry point for next layer search
   void select_entry_point(level_t level, node_id_t *entry_point, dist_t *dist,
                           HnswContext *ctx) const;
@@ -95,8 +117,9 @@ class HnswAlgorithm {
   //! Given a node id and level, search the nearest neighbors in graph
   //! Note: the nearest neighbors result keeps in topk, and entry_point and
   //! dist will be updated to current level nearest node id and distance
-  void search_neighbors(level_t level, node_id_t *entry_point, dist_t *dist,
-                        TopkHeap &topk, HnswContext *ctx) const;
+  bool search_neighbors(level_t level, node_id_t *entry_point, dist_t *dist,
+                        TopkHeap &topk, HnswContext *ctx,
+                        const SearchHooks *hooks = nullptr) const;
 
   //! Update the node's neighbors
   void update_neighbors(HnswDistCalculator &dc, node_id_t id, level_t level,
@@ -131,4 +154,4 @@ class HnswAlgorithm {
 };
 
 }  // namespace core
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/src/core/algorithm/hnsw/hnsw_searcher.cc b/src/core/algorithm/hnsw/hnsw_searcher.cc
index 2e066873e..cd6318658 100644
--- a/src/core/algorithm/hnsw/hnsw_searcher.cc
+++ b/src/core/algorithm/hnsw/hnsw_searcher.cc
@@ -190,6 +190,12 @@ int HnswSearcher::update_context(HnswContext *ctx) const {
                              entity, magic_);
 }
 
+int HnswSearcher::fast_search_with_hooks(
+    HnswContext *ctx, const HnswAlgorithm::SearchHooks *hooks,
+    bool *stopped_early) const {
+  return alg_->fast_search_with_hooks(ctx, hooks, stopped_early);
+}
+
 int HnswSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
                               uint32_t count, Context::Pointer &context) const {
   if (ailego_unlikely(!query || !context)) {
diff --git a/src/core/algorithm/hnsw/hnsw_searcher.h b/src/core/algorithm/hnsw/hnsw_searcher.h
index 22477c021..60cef55d6 100644
--- a/src/core/algorithm/hnsw/hnsw_searcher.h
+++ b/src/core/algorithm/hnsw/hnsw_searcher.h
@@ -112,6 +112,10 @@ class HnswSearcher : public IndexSearcher {
   int update_context(HnswContext *ctx) const;
 
  protected:
+  int fast_search_with_hooks(HnswContext *ctx,
+                             const HnswAlgorithm::SearchHooks *hooks,
+                             bool *stopped_early) const;
+
   enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };
 
   uint32_t ef_{HnswEntity::kDefaultEf};
@@ -137,4 +141,4 @@ class HnswSearcher : public IndexSearcher {
 };
 
 }  // namespace core
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 0014f9230..1a84b3e0f 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -27,6 +27,38 @@
 namespace zvec {
 namespace core {
 
+namespace {
+
+struct OmegaHookState {
+  omega::SearchContext *search_ctx{nullptr};
+  bool enable_early_stopping{false};
+};
+
+void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
+                        void *user_data) {
+  auto &state = *static_cast<OmegaHookState *>(user_data);
+  state.search_ctx->SetDistStart(dist);
+  state.search_ctx->ReportVisitCandidate(id, dist, true);
+}
+
+void OnOmegaHop(void *user_data) {
+  auto &state = *static_cast<OmegaHookState *>(user_data);
+  state.search_ctx->ReportHop();
+}
+
+bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
+                           bool should_consider_candidate, void *user_data) {
+  auto &state = *static_cast<OmegaHookState *>(user_data);
+  state.search_ctx->ReportVisitCandidate(id, dist, should_consider_candidate);
+  if (!state.enable_early_stopping) {
+    return false;
+  }
+  return state.search_ctx->ShouldPredict() &&
+         state.search_ctx->ShouldStopEarly();
+}
+
+}  // namespace
+
 OmegaSearcher::OmegaSearcher(void)
     : HnswSearcher(),
       omega_model_(nullptr),
@@ -291,168 +323,40 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
               query_id, gt_for_query.size());
   }
 
-  // OmegaContext extends HnswContext, so we can use it directly
-  // Initialize query in distance calculator
-  omega_ctx->reset_query(query);
-
-  // Get entity and distance calculator
-  const auto &entity = omega_ctx->get_entity();
-  auto &dc = omega_ctx->dist_calculator();
-  auto &visit_filter = omega_ctx->visit_filter();
-  auto &candidates = omega_ctx->candidates();
-  auto &topk_heap = omega_ctx->topk_heap();
-
-  // Use ef from parent class (now protected, so accessible)
-  uint32_t ef = ef_;
-  topk_heap.limit(std::max(ef, count));
-
-  // Get entry point
-  auto max_level = entity.cur_max_level();
-  auto entry_point = entity.entry_point();
-
-  if (entry_point == kInvalidNodeId) {
-    omega_search_destroy(omega_search);
-    return 0;
-  }
-
-  // Navigate to layer 0
-  dist_t dist = dc.dist(entry_point);
-  for (level_t cur_level = max_level; cur_level >= 1; --cur_level) {
-    const Neighbors neighbors = entity.get_neighbors(cur_level, entry_point);
-    if (neighbors.size() == 0) break;
-
-    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
-    int ret = entity.get_vector(&neighbors[0], neighbors.size(), neighbor_vec_blocks);
-    if (ret != 0) break;
-
-    bool find_closer = false;
-    for (uint32_t i = 0; i < neighbors.size(); ++i) {
-      const void *neighbor_vec = neighbor_vec_blocks[i].data();
-      dist_t cur_dist = dc.dist(neighbor_vec);
-      if (cur_dist < dist) {
-        entry_point = neighbors[i];
-        dist = cur_dist;
-        find_closer = true;
-      }
-    }
-    if (!find_closer) break;
-  }
-
-  // Set dist_start for OMEGA
-  omega_search_ctx->SetDistStart(dist);
-
-  // Now perform OMEGA-enhanced search on layer 0
-  candidates.clear();
-  visit_filter.clear();
-  topk_heap.clear();
-
-  // Add entry point to search
-  visit_filter.set_visited(entry_point);
-  topk_heap.emplace(entry_point, dist);
-  candidates.emplace(entry_point, dist);
-
-  // Report initial visit to OMEGA
-  omega_search_ctx->ReportVisitCandidate(entry_point, dist, true);
-
-  dist_t lowerBound = dist;
-
-  // Main search loop with OMEGA predictions
+  omega_ctx->clear();
+  omega_ctx->resize_results(count);
   bool early_stop_hit = false;
-  while (!candidates.empty()) {
-    auto top = candidates.begin();
-    node_id_t current_node = top->first;
-    dist_t candidate_dist = top->second;
-
-    // Reference semantics: count the hop before the stop-condition check.
-    omega_search_ctx->ReportHop();
-
-    // Standard HNSW stopping condition
-    if (candidate_dist > lowerBound && topk_heap.size() >= ef) {
-      break;
-    }
-
-    candidates.pop();
-
-    // Get neighbors of current node
-    const Neighbors neighbors = entity.get_neighbors(0, current_node);
-    if (neighbors.size() == 0) continue;
-
-    // Prepare to compute distances
-    std::vector<node_id_t> unvisited_neighbors;
-    for (uint32_t i = 0; i < neighbors.size(); ++i) {
-      node_id_t neighbor = neighbors[i];
-      if (!visit_filter.visited(neighbor)) {
-        visit_filter.set_visited(neighbor);
-        unvisited_neighbors.push_back(neighbor);
-      }
-    }
 
-    if (unvisited_neighbors.empty()) continue;
-
-    // Get neighbor vectors
-    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
-    int ret = entity.get_vector(unvisited_neighbors.data(),
-                                unvisited_neighbors.size(),
-                                neighbor_vec_blocks);
-    if (ret != 0) break;
-
-    // Compute distances and update candidates
-    for (size_t i = 0; i < unvisited_neighbors.size(); ++i) {
-      node_id_t neighbor = unvisited_neighbors[i];
-      const void *neighbor_vec = neighbor_vec_blocks[i].data();
-      dist_t neighbor_dist = dc.dist(neighbor_vec);
-
-      bool should_consider_candidate =
-          (topk_heap.size() < ef || neighbor_dist < lowerBound);
-      omega_search_ctx->ReportVisitCandidate(neighbor, neighbor_dist,
-                                             should_consider_candidate);
-
-      if (!training_mode_enabled_ && omega_search_ctx->ShouldPredict()) {
-        if (omega_search_ctx->ShouldStopEarly()) {
-          int hops, cmps, collected_gt;
-          omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
-          LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
-                    cmps, hops, collected_gt);
-          early_stop_hit = true;
-          break;
-        }
-      }
-
-      // Consider this candidate
-      if (should_consider_candidate) {
-        candidates.emplace(neighbor, neighbor_dist);
-        topk_heap.emplace(neighbor, neighbor_dist);
-
-        // Update lowerBound
-        if (neighbor_dist < lowerBound) {
-          lowerBound = neighbor_dist;
-        }
-
-        // Remove excess from topk_heap
-        while (topk_heap.size() > ef) {
-          topk_heap.pop();
-        }
-
-        // Update lowerBound to the worst distance in topk
-        if (!topk_heap.empty() && topk_heap.size() >= ef) {
-          lowerBound = topk_heap[0].second;  // Max heap, so [0] is the worst
-        }
-      }
+  for (size_t q = 0; q < count; ++q) {
+    omega_ctx->reset_query(query);
+    OmegaHookState hook_state;
+    hook_state.search_ctx = omega_search_ctx;
+    hook_state.enable_early_stopping = !training_mode_enabled_;
+    HnswAlgorithm::SearchHooks hooks;
+    hooks.user_data = &hook_state;
+    hooks.on_level0_entry = OnOmegaLevel0Entry;
+    hooks.on_hop = OnOmegaHop;
+    hooks.on_visit_candidate = OnOmegaVisitCandidate;
+
+    int ret = fast_search_with_hooks(omega_ctx, &hooks, &early_stop_hit);
+    if (ret != 0) {
+      omega_search_destroy(omega_search);
+      LOG_WARN("OMEGA adaptive search failed, falling back to HNSW");
+      return HnswSearcher::search_impl(query, qmeta, count, context);
     }
 
+    omega_ctx->topk_to_result(q);
     if (early_stop_hit) {
       break;
     }
+    query = static_cast<const char *>(query) + qmeta.element_size();
   }
 
-  // Convert results to context format
-  omega_ctx->topk_to_result();
-
   // Get final statistics
   int hops, cmps, collected_gt;
   omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
   LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu",
-            cmps, hops, topk_heap.size());
+            cmps, hops, omega_ctx->topk_heap().size());
 
   // Collect training records if in training mode
   if (training_mode_enabled_) {
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 6862a2bba..71e81af14 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -60,6 +60,65 @@ bool ShouldLogQueryStats(uint64_t query_seq) {
   return limit == 0 || query_seq < limit;
 }
 
+struct OmegaHookState {
+  omega::SearchContext *search_ctx{nullptr};
+  bool enable_early_stopping{false};
+  bool collect_control_timing{false};
+  uint64_t *omega_control_time_ns{nullptr};
+};
+
+template <typename Fn>
+void RunOmegaControlHook(const OmegaHookState &state, Fn &&fn) {
+  if (!state.collect_control_timing) {
+    fn();
+    return;
+  }
+  auto control_start = omega::ProfilingTimer::Now();
+  fn();
+  if (state.omega_control_time_ns != nullptr) {
+    *state.omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
+        control_start, omega::ProfilingTimer::Now());
+  }
+}
+
+void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
+                        void *user_data) {
+  auto &state = *static_cast<OmegaHookState *>(user_data);
+  RunOmegaControlHook(state, [&]() {
+    state.search_ctx->SetDistStart(dist);
+    state.search_ctx->ReportVisitCandidate(id, dist, true);
+  });
+}
+
+void OnOmegaHop(void *user_data) {
+  auto &state = *static_cast<OmegaHookState *>(user_data);
+  RunOmegaControlHook(state, [&]() { state.search_ctx->ReportHop(); });
+}
+
+bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
+                           bool should_consider_candidate, void *user_data) {
+  auto &state = *static_cast<OmegaHookState *>(user_data);
+  RunOmegaControlHook(state, [&]() {
+    state.search_ctx->ReportVisitCandidate(id, dist, should_consider_candidate);
+  });
+
+  if (!state.enable_early_stopping) {
+    return false;
+  }
+
+  bool should_predict = false;
+  RunOmegaControlHook(state,
+                      [&]() { should_predict = state.search_ctx->ShouldPredict(); });
+  if (!should_predict) {
+    return false;
+  }
+
+  bool should_stop = false;
+  RunOmegaControlHook(
+      state, [&]() { should_stop = state.search_ctx->ShouldStopEarly(); });
+  return should_stop;
+}
+
 }  // namespace
 
 bool OmegaStreamer::LoadModel(const std::string& model_dir) {
@@ -243,234 +302,28 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
 
   // Initialize context for search
   hnsw_ctx->clear();
+  hnsw_ctx->update_dist_caculator_distance(search_distance_,
+                                           search_batch_distance_);
   hnsw_ctx->resize_results(count);
   hnsw_ctx->check_need_adjuct_ctx(entity_.doc_cnt());
   auto query_core_start = omega::ProfilingTimer::Now();
   hnsw_ctx->reset_query(query);
-
-  // Get entity and distance calculator from context
-  const auto &entity = hnsw_ctx->get_entity();
-  auto &dc = hnsw_ctx->dist_calculator();
-  auto &visit_filter = hnsw_ctx->visit_filter();
-  auto &candidates = hnsw_ctx->candidates();
-  auto &topk_heap = hnsw_ctx->topk_heap();
-
-  // Get entry point
-  auto max_level = entity.cur_max_level();
-  auto entry_point = entity.entry_point();
-
-  if (entry_point == kInvalidNodeId) {
-    omega_search_destroy(omega_search);
-    return 0;
-  }
-
-  // Navigate to layer 0
-  dist_t dist = dc.dist(entry_point);
-
-  for (level_t cur_level = max_level; cur_level >= 1; --cur_level) {
-    const Neighbors neighbors = entity.get_neighbors(cur_level, entry_point);
-    if (neighbors.size() == 0) {
-      break;
-    }
-
-    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
-    int ret = entity.get_vector(&neighbors[0], neighbors.size(), neighbor_vec_blocks);
-    if (ret != 0) {
-      break;
-    }
-
-    bool find_closer = false;
-    float dists[neighbors.size()];
-    const void *neighbor_vecs[neighbors.size()];
-    for (uint32_t i = 0; i < neighbors.size(); ++i) {
-      neighbor_vecs[i] = neighbor_vec_blocks[i].data();
-    }
-
-    dc.batch_dist(neighbor_vecs, neighbors.size(), dists);
-
-    for (uint32_t i = 0; i < neighbors.size(); ++i) {
-      dist_t cur_dist = dists[i];
-      if (cur_dist < dist) {
-        entry_point = neighbors[i];
-        dist = cur_dist;
-        find_closer = true;
-      }
-    }
-    if (!find_closer) {
-      break;
-    }
-  }
-
-  // Set dist_start for OMEGA
-  if (collect_control_timing) {
-    auto control_start = omega::ProfilingTimer::Now();
-    omega_search_ctx->SetDistStart(dist);
-    omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
-        control_start, omega::ProfilingTimer::Now());
-  } else {
-    omega_search_ctx->SetDistStart(dist);
-  }
-
-  // Perform HNSW search on layer 0 with OMEGA
-  candidates.clear();
-  visit_filter.clear();
-  topk_heap.clear();
-
-  // Add entry point to search
-  visit_filter.set_visited(entry_point);
-  topk_heap.emplace(entry_point, dist);
-  candidates.emplace(entry_point, dist);
-
-  // Report initial visit to OMEGA
-  if (collect_control_timing) {
-    auto control_start = omega::ProfilingTimer::Now();
-    omega_search_ctx->ReportVisitCandidate(entry_point, dist, true);
-    omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
-        control_start, omega::ProfilingTimer::Now());
-  } else {
-    omega_search_ctx->ReportVisitCandidate(entry_point, dist, true);
-  }
-
-  dist_t lowerBound = dist;
-
-  // Main search loop with OMEGA feature collection and early stopping
+  OmegaHookState hook_state;
+  hook_state.search_ctx = omega_search_ctx;
+  hook_state.enable_early_stopping = enable_early_stopping;
+  hook_state.collect_control_timing = collect_control_timing;
+  hook_state.omega_control_time_ns = &omega_control_time_ns;
+  HnswAlgorithm::SearchHooks hooks;
+  hooks.user_data = &hook_state;
+  hooks.on_level0_entry = OnOmegaLevel0Entry;
+  hooks.on_hop = OnOmegaHop;
+  hooks.on_visit_candidate = OnOmegaVisitCandidate;
   bool early_stop_hit = false;
-  while (!candidates.empty()) {
-    auto top = candidates.begin();
-    node_id_t current_node = top->first;
-    dist_t candidate_dist = top->second;
-
-    // Reference semantics: count the hop as soon as the current candidate is
-    // examined, before stop-condition evaluation.
-    if (collect_control_timing) {
-      auto control_start = omega::ProfilingTimer::Now();
-      omega_search_ctx->ReportHop();
-      omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
-          control_start, omega::ProfilingTimer::Now());
-    } else {
-      omega_search_ctx->ReportHop();
-    }
-
-    // Standard HNSW stopping condition
-    if (topk_heap.full() && candidate_dist > lowerBound) {
-      break;
-    }
-
-    candidates.pop();
-
-    // Get neighbors of current node
-    const Neighbors neighbors = entity.get_neighbors(0, current_node);
-    ailego_prefetch(neighbors.data);
-    if (neighbors.size() == 0) continue;
-
-    // Prepare to compute distances
-    node_id_t neighbor_ids[neighbors.size()];
-    uint32_t size = 0;
-    for (uint32_t i = 0; i < neighbors.size(); ++i) {
-      node_id_t neighbor = neighbors[i];
-      if (!visit_filter.visited(neighbor)) {
-        visit_filter.set_visited(neighbor);
-        neighbor_ids[size++] = neighbor;
-      }
-    }
-
-    if (size == 0) continue;
-
-    // Get neighbor vectors
-    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
-    int ret = entity.get_vector(neighbor_ids, size, neighbor_vec_blocks);
-    if (ret != 0) {
-      break;
-    }
-
-    static constexpr node_id_t BATCH_SIZE = 12;
-    static constexpr node_id_t PREFETCH_STEP = 2;
-    for (size_t i = 0;
-         i < std::min(static_cast<size_t>(BATCH_SIZE * PREFETCH_STEP),
-                      static_cast<size_t>(size));
-         ++i) {
-      ailego_prefetch(neighbor_vec_blocks[i].data());
-    }
-
-    float dists[size];
-    const void *neighbor_vecs[size];
-    for (uint32_t i = 0; i < size; ++i) {
-      neighbor_vecs[i] = neighbor_vec_blocks[i].data();
-    }
-    dc.batch_dist(neighbor_vecs, size, dists);
-
-    // Compute distances and update candidates
-    for (uint32_t i = 0; i < size; ++i) {
-      node_id_t neighbor = neighbor_ids[i];
-      dist_t neighbor_dist = dists[i];
-
-      // Reference semantics:
-      // 1. `should_consider_candidate` is driven by the ef-bounded heap
-      // 2. OMEGA's top-candidate updates are driven by insertion into the
-      //    result-set-sized top-k structure, not by ef admission alone.
-      bool should_consider_candidate =
-          (!topk_heap.full() || neighbor_dist < lowerBound);
-      if (collect_control_timing) {
-        auto control_start = omega::ProfilingTimer::Now();
-        omega_search_ctx->ReportVisitCandidate(neighbor, neighbor_dist,
-                                               should_consider_candidate);
-        omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
-            control_start, omega::ProfilingTimer::Now());
-      } else {
-        omega_search_ctx->ReportVisitCandidate(neighbor, neighbor_dist,
-                                               should_consider_candidate);
-      }
-
-      bool should_predict = false;
-      if (enable_early_stopping) {
-        if (collect_control_timing) {
-          auto control_start = omega::ProfilingTimer::Now();
-          should_predict = omega_search_ctx->ShouldPredict();
-          omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
-              control_start, omega::ProfilingTimer::Now());
-        } else {
-          should_predict = omega_search_ctx->ShouldPredict();
-        }
-      }
-      if (enable_early_stopping && should_predict) {
-        bool should_stop = false;
-        if (collect_control_timing) {
-          auto control_start = omega::ProfilingTimer::Now();
-          should_stop = omega_search_ctx->ShouldStopEarly();
-          omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
-              control_start, omega::ProfilingTimer::Now());
-        } else {
-          should_stop = omega_search_ctx->ShouldStopEarly();
-        }
-        if (should_stop) {
-          int hops, cmps, collected_gt;
-          omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
-          LOG_DEBUG("OMEGA early stop: cmps=%d, hops=%d, collected_gt=%d",
-                    cmps, hops, collected_gt);
-          early_stop_hit = true;
-          break;
-        }
-      }
-
-      // Consider this candidate
-      if (should_consider_candidate) {
-        candidates.emplace(neighbor, neighbor_dist);
-        topk_heap.emplace(neighbor, neighbor_dist);
-
-        // Update lowerBound
-        if (neighbor_dist < lowerBound) {
-          lowerBound = neighbor_dist;
-        }
-
-        if (topk_heap.full()) {
-          lowerBound = topk_heap[0].second;
-        }
-      }
-    }
-
-    if (early_stop_hit) {
-      break;
-    }
+  int ret = alg_->search_with_hooks(hnsw_ctx, &hooks, &early_stop_hit);
+  if (ret != 0) {
+    omega_search_destroy(omega_search);
+    LOG_ERROR("OMEGA search failed");
+    return ret;
   }
 
   auto query_core_end = omega::ProfilingTimer::Now();
@@ -519,7 +372,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   max_prediction_calls_per_should_stop =
       omega_search_ctx->GetMaxPredictionCallsPerShouldStop();
   LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu, early_stop=%d",
-            cmps, hops, topk_heap.size(), enable_early_stopping);
+            cmps, hops, hnsw_ctx->topk_heap().size(), enable_early_stopping);
   if (enable_early_stopping) {
     size_t scan_cmps = hnsw_ctx->get_scan_num();
     uint64_t pairwise_dist_cnt = hnsw_ctx->get_pairwise_dist_num();

From 40785466c924e7d15023edc892950d5f40fc5969 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 23 Mar 2026 14:35:55 +0800
Subject: [PATCH 029/126] feat(omega): add hooks-only perf switch and ab script

---
 scripts/perf_ab_search_core.sh             | 142 +++++++++++++++++++++
 src/core/algorithm/omega/omega_searcher.cc |  17 ++-
 src/core/algorithm/omega/omega_searcher.h  |   6 +
 src/core/algorithm/omega/omega_streamer.cc |  16 ++-
 4 files changed, 175 insertions(+), 6 deletions(-)
 create mode 100644 scripts/perf_ab_search_core.sh

diff --git a/scripts/perf_ab_search_core.sh b/scripts/perf_ab_search_core.sh
new file mode 100644
index 000000000..bafdb74a5
--- /dev/null
+++ b/scripts/perf_ab_search_core.sh
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+DATASET="${1:-1m}"
+CPU_CORE="${CPU_CORE:-0}"
+REPEAT="${PERF_REPEAT:-5}"
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ZVEC_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+CONDA_SH="${CONDA_SH:-/root/miniconda3/etc/profile.d/conda.sh}"
+CONDA_ENV="${CONDA_ENV:-bench}"
+PYTHON_BIN="${PYTHON_BIN:-python}"
+
+if [[ -f "${CONDA_SH}" ]]; then
+  # shellcheck disable=SC1090
+  source "${CONDA_SH}"
+  conda activate "${CONDA_ENV}"
+fi
+
+PERF_EVENTS="cycles,instructions,branches,branch-misses,cache-references,cache-misses,L1-dcache-loads,L1-dcache-load-misses,LLC-loads,LLC-load-misses,dTLB-loads,dTLB-load-misses"
+
+case "${DATASET}" in
+  1m)
+    CASE_TYPE="Performance768D1M"
+    HNSW_PATH="${ZVEC_ROOT}/benchmark_results/cohere_1m_hnsw"
+    OMEGA_PATH="${ZVEC_ROOT}/benchmark_results/cohere_1m_omega"
+    HNSW_LABEL="16c64g-v0.1"
+    OMEGA_LABEL="omega-m15-ef180-int8"
+    HNSW_ARGS=(
+      zvec
+      --path "${HNSW_PATH}"
+      --db-label "${HNSW_LABEL}"
+      --case-type "${CASE_TYPE}"
+      --m 15
+      --ef-search 180
+      --quantize-type int8
+      --num-concurrency 16
+      --concurrency-duration 30
+      --k 100
+      --skip-drop-old
+      --skip-load
+      --skip-search-concurrent
+    )
+    OMEGA_ARGS=(
+      zvecomega
+      --path "${OMEGA_PATH}"
+      --db-label "${OMEGA_LABEL}"
+      --case-type "${CASE_TYPE}"
+      --m 15
+      --ef-search 180
+      --quantize-type int8
+      --min-vector-threshold 100000
+      --num-training-queries 4000
+      --ef-training 500
+      --window-size 100
+      --ef-groundtruth 1000
+      --target-recall 0.90
+      --num-concurrency 16
+      --concurrency-duration 30
+      --k 100
+      --skip-drop-old
+      --skip-load
+      --skip-search-concurrent
+    )
+    ;;
+  10m)
+    CASE_TYPE="Performance768D10M"
+    HNSW_PATH="${ZVEC_ROOT}/benchmark_results/cohere_10m_hnsw"
+    OMEGA_PATH="${ZVEC_ROOT}/benchmark_results/cohere_10m_omega"
+    HNSW_LABEL="16c64g-v0.1"
+    OMEGA_LABEL="omega-m50-ef118-int8-refiner"
+    HNSW_ARGS=(
+      zvec
+      --path "${HNSW_PATH}"
+      --db-label "${HNSW_LABEL}"
+      --case-type "${CASE_TYPE}"
+      --m 50
+      --ef-search 118
+      --quantize-type int8
+      --is-using-refiner
+      --num-concurrency 12,14,16,18,20
+      --concurrency-duration 30
+      --k 100
+      --skip-drop-old
+      --skip-load
+      --skip-search-concurrent
+    )
+    OMEGA_ARGS=(
+      zvecomega
+      --path "${OMEGA_PATH}"
+      --db-label "${OMEGA_LABEL}"
+      --case-type "${CASE_TYPE}"
+      --m 50
+      --ef-search 118
+      --quantize-type int8
+      --is-using-refiner
+      --min-vector-threshold 100000
+      --num-training-queries 4000
+      --ef-training 500
+      --window-size 100
+      --ef-groundtruth 1000
+      --target-recall 0.90
+      --num-concurrency 12,14,16,18,20
+      --concurrency-duration 30
+      --k 100
+      --skip-drop-old
+      --skip-load
+      --skip-search-concurrent
+    )
+    ;;
+  *)
+    echo "Unsupported dataset: ${DATASET}" >&2
+    echo "Usage: $0 [1m|10m]" >&2
+    exit 1
+    ;;
+esac
+
+run_perf() {
+  local title="$1"
+  shift
+
+  echo
+  echo "============================================================"
+  echo "${title}"
+  echo "============================================================"
+
+  taskset -c "${CPU_CORE}" numactl --cpunodebind=0 --membind=0 \
+    perf stat -r "${REPEAT}" -e "${PERF_EVENTS}" \
+    "$@"
+}
+
+cd "${ZVEC_ROOT}"
+
+run_perf \
+  "HNSW core search perf (${DATASET})" \
+  "${PYTHON_BIN}" -m vectordb_bench.cli.vectordbbench "${HNSW_ARGS[@]}"
+
+run_perf \
+  "OMEGA hooks-only core search perf (${DATASET})" \
+  env ZVEC_OMEGA_DISABLE_MODEL_PREDICTION=1 \
+  "${PYTHON_BIN}" -m vectordb_bench.cli.vectordbbench "${OMEGA_ARGS[@]}"
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 1a84b3e0f..f67b97cb6 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -23,12 +23,21 @@
 #include <limits>
 #include <deque>
 #include <algorithm>
+#include <cstdlib>
 
 namespace zvec {
 namespace core {
 
 namespace {
 
+bool DisableOmegaModelPrediction() {
+  const char* value = std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION");
+  if (value == nullptr) {
+    return false;
+  }
+  return std::string(value) != "0";
+}
+
 struct OmegaHookState {
   omega::SearchContext *search_ctx{nullptr};
   bool enable_early_stopping{false};
@@ -288,7 +297,10 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
 
   // Match OmegaStreamer/reference behavior:
   // training mode collects features only and must not run model inference.
-  OmegaModelHandle model_to_use = training_mode_enabled_ ? nullptr : omega_model_;
+  const bool disable_model_prediction = DisableOmegaModelPrediction();
+  OmegaModelHandle model_to_use =
+      (training_mode_enabled_ || disable_model_prediction) ? nullptr
+                                                           : omega_model_;
 
   OmegaSearchHandle omega_search = omega_search_create_with_params(
       model_to_use, target_recall, omega_topk, window_size_);
@@ -331,7 +343,8 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     omega_ctx->reset_query(query);
     OmegaHookState hook_state;
     hook_state.search_ctx = omega_search_ctx;
-    hook_state.enable_early_stopping = !training_mode_enabled_;
+    hook_state.enable_early_stopping =
+        !training_mode_enabled_ && !disable_model_prediction;
     HnswAlgorithm::SearchHooks hooks;
     hooks.user_data = &hook_state;
     hooks.on_level0_entry = OnOmegaLevel0Entry;
diff --git a/src/core/algorithm/omega/omega_searcher.h b/src/core/algorithm/omega/omega_searcher.h
index 31b2a819e..53760461d 100644
--- a/src/core/algorithm/omega/omega_searcher.h
+++ b/src/core/algorithm/omega/omega_searcher.h
@@ -18,7 +18,9 @@
 #include <zvec/db/status.h>
 #include "../hnsw/hnsw_searcher.h"
 #include <omega/omega_api.h>
+#include <cstdlib>
 #include <mutex>
+#include <string>
 #include <vector>
 
 namespace zvec {
@@ -179,6 +181,10 @@ class OmegaSearcher : public HnswSearcher {
     if (training_mode_enabled_) {
       return true;  // Always use adaptive_search in training mode
     }
+    if (std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION") != nullptr &&
+        std::string(std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION")) != "0") {
+      return true;
+    }
     return omega_enabled_ && use_omega_mode_ &&
            omega_model_ != nullptr &&
            omega_model_is_loaded(omega_model_);
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 71e81af14..96b29bfb4 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -60,6 +60,14 @@ bool ShouldLogQueryStats(uint64_t query_seq) {
   return limit == 0 || query_seq < limit;
 }
 
+bool DisableOmegaModelPrediction() {
+  const char* value = std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION");
+  if (value == nullptr) {
+    return false;
+  }
+  return std::string(value) != "0";
+}
+
 struct OmegaHookState {
   omega::SearchContext *search_ctx{nullptr};
   bool enable_early_stopping{false};
@@ -206,16 +214,16 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                                uint32_t count,
                                Context::Pointer &context) const {
   // Determine mode: training (no early stopping) vs inference (with early stopping)
-  bool enable_early_stopping = !training_mode_enabled_ && IsModelLoaded();
+  bool enable_early_stopping =
+      !training_mode_enabled_ && IsModelLoaded() &&
+      !DisableOmegaModelPrediction();
 
   if (training_mode_enabled_) {
     LOG_DEBUG("OmegaStreamer: training mode, early stopping DISABLED");
   } else if (enable_early_stopping) {
     LOG_DEBUG("OmegaStreamer: inference mode with OMEGA model");
   } else {
-    // No model loaded and not in training mode - use parent HNSW search
-    LOG_DEBUG("OmegaStreamer: no model loaded, using parent HNSW search");
-    return HnswStreamer::search_impl(query, qmeta, count, context);
+    LOG_DEBUG("OmegaStreamer: OMEGA hooks mode without model prediction");
   }
 
   return omega_search_impl(query, qmeta, count, context, enable_early_stopping);

From acf2a8332c81cefca4b2a94a5e6786070db208d5 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 23 Mar 2026 16:39:51 +0800
Subject: [PATCH 030/126] refactor(omega): split search profiling timings

---
 scripts/perf_ab_search_core.sh             | 130 ++++++++++++++++++++-
 src/core/algorithm/hnsw/hnsw_algorithm.cc  |  36 ++++--
 src/core/algorithm/hnsw/hnsw_algorithm.h   |   3 +
 src/core/algorithm/omega/omega_streamer.cc |  66 +++++++----
 4 files changed, 201 insertions(+), 34 deletions(-)

diff --git a/scripts/perf_ab_search_core.sh b/scripts/perf_ab_search_core.sh
index bafdb74a5..690f1112e 100644
--- a/scripts/perf_ab_search_core.sh
+++ b/scripts/perf_ab_search_core.sh
@@ -4,9 +4,22 @@ set -euo pipefail
 DATASET="${1:-1m}"
 CPU_CORE="${CPU_CORE:-0}"
 REPEAT="${PERF_REPEAT:-5}"
+MODE="${PERF_MODE:-all}"
+RECORD_FREQ="${PERF_RECORD_FREQ:-999}"
+TOPN="${PERF_TOPN:-60}"
+CALL_GRAPH_MODE="${PERF_CALL_GRAPH_MODE:-fp}"
+PERF_USER_ONLY="${PERF_USER_ONLY:-1}"
+
+OPENBLAS_THREADS="${OPENBLAS_NUM_THREADS:-1}"
+OMP_THREADS="${OMP_NUM_THREADS:-1}"
+MKL_THREADS="${MKL_NUM_THREADS:-1}"
+NUMEXPR_THREADS="${NUMEXPR_NUM_THREADS:-1}"
+GOTO_THREADS="${GOTO_NUM_THREADS:-1}"
+VECLIB_THREADS="${VECLIB_MAXIMUM_THREADS:-1}"
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ZVEC_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+OUT_DIR="${PERF_OUT_DIR:-${ZVEC_ROOT}/perf_results/${DATASET}}"
 
 CONDA_SH="${CONDA_SH:-/root/miniconda3/etc/profile.d/conda.sh}"
 CONDA_ENV="${CONDA_ENV:-bench}"
@@ -20,6 +33,21 @@ fi
 
 PERF_EVENTS="cycles,instructions,branches,branch-misses,cache-references,cache-misses,L1-dcache-loads,L1-dcache-load-misses,LLC-loads,LLC-load-misses,dTLB-loads,dTLB-load-misses"
 
+COMMON_ENV=(
+  env
+  OPENBLAS_NUM_THREADS="${OPENBLAS_THREADS}"
+  OMP_NUM_THREADS="${OMP_THREADS}"
+  MKL_NUM_THREADS="${MKL_THREADS}"
+  NUMEXPR_NUM_THREADS="${NUMEXPR_THREADS}"
+  GOTO_NUM_THREADS="${GOTO_THREADS}"
+  VECLIB_MAXIMUM_THREADS="${VECLIB_THREADS}"
+)
+
+PERF_RECORD_ARGS=(-F "${RECORD_FREQ}" -g --call-graph "${CALL_GRAPH_MODE}")
+if [[ "${PERF_USER_ONLY}" == "1" ]]; then
+  PERF_RECORD_ARGS+=(--all-user)
+fi
+
 case "${DATASET}" in
   1m)
     CASE_TYPE="Performance768D1M"
@@ -130,13 +158,105 @@ run_perf() {
     "$@"
 }
 
+run_record() {
+  local title="$1"
+  local output_prefix="$2"
+  shift 2
+
+  local data_file="${OUT_DIR}/${output_prefix}.data"
+  local report_file="${OUT_DIR}/${output_prefix}.report.txt"
+  local zvec_report_file="${OUT_DIR}/${output_prefix}.zvec_only.report.txt"
+
+  echo
+  echo "============================================================"
+  echo "${title}"
+  echo "============================================================"
+  echo "perf.data: ${data_file}"
+  echo "report:    ${report_file}"
+  echo "zvec-only: ${zvec_report_file}"
+
+  taskset -c "${CPU_CORE}" numactl --cpunodebind=0 --membind=0 \
+    perf record "${PERF_RECORD_ARGS[@]}" -o "${data_file}" -- \
+    "$@"
+
+  perf report --stdio --no-children -i "${data_file}" --percent-limit 0.5 \
+    > "${report_file}"
+  sed -n "1,${TOPN}p" "${report_file}"
+
+  perf report --stdio --no-children -i "${data_file}" \
+    --sort dso,symbol --percent-limit 0.1 \
+    --dsos _zvec.cpython-311-x86_64-linux-gnu.so \
+    > "${zvec_report_file}"
+  sed -n "1,${TOPN}p" "${zvec_report_file}"
+}
+
 cd "${ZVEC_ROOT}"
+mkdir -p "${OUT_DIR}"
 
-run_perf \
-  "HNSW core search perf (${DATASET})" \
+HNSW_CMD=(
+  "${COMMON_ENV[@]}"
   "${PYTHON_BIN}" -m vectordb_bench.cli.vectordbbench "${HNSW_ARGS[@]}"
+)
 
-run_perf \
-  "OMEGA hooks-only core search perf (${DATASET})" \
-  env ZVEC_OMEGA_DISABLE_MODEL_PREDICTION=1 \
+OMEGA_HOOKS_CMD=(
+  "${COMMON_ENV[@]}"
+  ZVEC_OMEGA_DISABLE_MODEL_PREDICTION=1
   "${PYTHON_BIN}" -m vectordb_bench.cli.vectordbbench "${OMEGA_ARGS[@]}"
+)
+
+echo "Using thread env:"
+echo "  OPENBLAS_NUM_THREADS=${OPENBLAS_THREADS}"
+echo "  OMP_NUM_THREADS=${OMP_THREADS}"
+echo "  MKL_NUM_THREADS=${MKL_THREADS}"
+echo "  NUMEXPR_NUM_THREADS=${NUMEXPR_THREADS}"
+echo "  GOTO_NUM_THREADS=${GOTO_THREADS}"
+echo "  VECLIB_MAXIMUM_THREADS=${VECLIB_THREADS}"
+echo "  PERF_CALL_GRAPH_MODE=${CALL_GRAPH_MODE}"
+echo "  PERF_USER_ONLY=${PERF_USER_ONLY}"
+
+case "${MODE}" in
+  stat)
+    run_perf \
+      "HNSW core search perf (${DATASET})" \
+      "${HNSW_CMD[@]}"
+
+    run_perf \
+      "OMEGA hooks-only core search perf (${DATASET})" \
+      "${OMEGA_HOOKS_CMD[@]}"
+    ;;
+  record)
+    run_record \
+      "HNSW core search hotspots (${DATASET})" \
+      "hnsw_core" \
+      "${HNSW_CMD[@]}"
+
+    run_record \
+      "OMEGA hooks-only core search hotspots (${DATASET})" \
+      "omega_hooks_only" \
+      "${OMEGA_HOOKS_CMD[@]}"
+    ;;
+  all)
+    run_perf \
+      "HNSW core search perf (${DATASET})" \
+      "${HNSW_CMD[@]}"
+
+    run_perf \
+      "OMEGA hooks-only core search perf (${DATASET})" \
+      "${OMEGA_HOOKS_CMD[@]}"
+
+    run_record \
+      "HNSW core search hotspots (${DATASET})" \
+      "hnsw_core" \
+      "${HNSW_CMD[@]}"
+
+    run_record \
+      "OMEGA hooks-only core search hotspots (${DATASET})" \
+      "omega_hooks_only" \
+      "${OMEGA_HOOKS_CMD[@]}"
+    ;;
+  *)
+    echo "Unsupported PERF_MODE: ${MODE}" >&2
+    echo "Use PERF_MODE=stat|record|all" >&2
+    exit 1
+    ;;
+esac
diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.cc b/src/core/algorithm/hnsw/hnsw_algorithm.cc
index c563c0695..9d2d71f88 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.cc
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.cc
@@ -228,6 +228,17 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
     filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };
   }
 
+  auto run_timed_hook = [&](auto &&fn) {
+    if (hooks == nullptr || !hooks->collect_timing || hooks->now_ns == nullptr ||
+        hooks->hook_total_time_ns == nullptr) {
+      return fn();
+    }
+    uint64_t start_ns = hooks->now_ns();
+    auto result = fn();
+    *hooks->hook_total_time_ns += (hooks->now_ns() - start_ns);
+    return result;
+  };
+
   candidates.clear();
   visit.clear();
   visit.set_visited(*entry_point);
@@ -238,12 +249,18 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
 
   candidates.emplace(*entry_point, *dist);
   if (hooks != nullptr && hooks->on_level0_entry != nullptr) {
-    hooks->on_level0_entry(*entry_point, *dist, entry_inserted_to_topk,
-                           hooks->user_data);
+    run_timed_hook([&]() {
+      hooks->on_level0_entry(*entry_point, *dist, entry_inserted_to_topk,
+                             hooks->user_data);
+      return 0;
+    });
   }
   while (!candidates.empty() && !ctx->reach_scan_limit()) {
     if (hooks != nullptr && hooks->on_hop != nullptr) {
-      hooks->on_hop(hooks->user_data);
+      run_timed_hook([&]() {
+        hooks->on_hop(hooks->user_data);
+        return 0;
+      });
     }
 
     auto top = candidates.begin();
@@ -310,10 +327,15 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
       bool should_consider_candidate =
           (!topk.full()) || cur_dist < topk[0].second;
 
-      if (hooks != nullptr && hooks->on_visit_candidate != nullptr &&
-          hooks->on_visit_candidate(node, cur_dist, should_consider_candidate,
-                                    hooks->user_data)) {
-        return true;
+      if (hooks != nullptr && hooks->on_visit_candidate != nullptr) {
+        bool should_stop = run_timed_hook([&]() {
+          return hooks->on_visit_candidate(node, cur_dist,
+                                           should_consider_candidate,
+                                           hooks->user_data);
+        });
+        if (should_stop) {
+          return true;
+        }
       }
 
       if (should_consider_candidate) {
diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.h b/src/core/algorithm/hnsw/hnsw_algorithm.h
index 1937773a8..2f1052bb2 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.h
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.h
@@ -29,6 +29,9 @@ class HnswAlgorithm {
 
   struct SearchHooks {
     void *user_data{nullptr};
+    bool collect_timing{false};
+    uint64_t (*now_ns)(){nullptr};
+    uint64_t *hook_total_time_ns{nullptr};
     void (*on_level0_entry)(node_id_t id, dist_t dist, bool inserted_to_topk,
                             void *user_data){nullptr};
     void (*on_hop)(void *user_data){nullptr};
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 96b29bfb4..0bc016f95 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -72,7 +72,7 @@ struct OmegaHookState {
   omega::SearchContext *search_ctx{nullptr};
   bool enable_early_stopping{false};
   bool collect_control_timing{false};
-  uint64_t *omega_control_time_ns{nullptr};
+  uint64_t *hook_body_time_ns{nullptr};
 };
 
 template <typename Fn>
@@ -83,8 +83,8 @@ void RunOmegaControlHook(const OmegaHookState &state, Fn &&fn) {
   }
   auto control_start = omega::ProfilingTimer::Now();
   fn();
-  if (state.omega_control_time_ns != nullptr) {
-    *state.omega_control_time_ns += omega::ProfilingTimer::ElapsedNs(
+  if (state.hook_body_time_ns != nullptr) {
+    *state.hook_body_time_ns += omega::ProfilingTimer::ElapsedNs(
         control_start, omega::ProfilingTimer::Now());
   }
 }
@@ -234,7 +234,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                                      bool enable_early_stopping) const {
   auto query_total_start = omega::ProfilingTimer::Now();
   const bool collect_control_timing = omega::IsControlTimingEnabled();
-  uint64_t omega_control_time_ns = 0;
+  uint64_t hook_total_time_ns = 0;
+  uint64_t hook_body_time_ns = 0;
 
   // Cast context to OmegaContext to access training_query_id
   auto *omega_ctx = dynamic_cast<OmegaContext*>(context.get());
@@ -314,27 +315,31 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                                            search_batch_distance_);
   hnsw_ctx->resize_results(count);
   hnsw_ctx->check_need_adjuct_ctx(entity_.doc_cnt());
-  auto query_core_start = omega::ProfilingTimer::Now();
+  auto query_reset_start = omega::ProfilingTimer::Now();
   hnsw_ctx->reset_query(query);
+  auto query_reset_end = omega::ProfilingTimer::Now();
   OmegaHookState hook_state;
   hook_state.search_ctx = omega_search_ctx;
   hook_state.enable_early_stopping = enable_early_stopping;
   hook_state.collect_control_timing = collect_control_timing;
-  hook_state.omega_control_time_ns = &omega_control_time_ns;
+  hook_state.hook_body_time_ns = &hook_body_time_ns;
   HnswAlgorithm::SearchHooks hooks;
   hooks.user_data = &hook_state;
+  hooks.collect_timing = collect_control_timing;
+  hooks.now_ns = []() { return omega::ProfilingTimer::NowNs(); };
+  hooks.hook_total_time_ns = &hook_total_time_ns;
   hooks.on_level0_entry = OnOmegaLevel0Entry;
   hooks.on_hop = OnOmegaHop;
   hooks.on_visit_candidate = OnOmegaVisitCandidate;
   bool early_stop_hit = false;
+  auto query_search_start = omega::ProfilingTimer::Now();
   int ret = alg_->search_with_hooks(hnsw_ctx, &hooks, &early_stop_hit);
   if (ret != 0) {
     omega_search_destroy(omega_search);
     LOG_ERROR("OMEGA search failed");
     return ret;
   }
-
-  auto query_core_end = omega::ProfilingTimer::Now();
+  auto query_search_end = omega::ProfilingTimer::Now();
 
   // Get final statistics
   int hops, cmps, collected_gt;
@@ -354,12 +359,15 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   uint64_t query_total_time_ns =
       omega::ProfilingTimer::ElapsedNs(query_total_start,
                                        omega::ProfilingTimer::Now());
-  uint64_t query_core_time_ns =
-      omega::ProfilingTimer::ElapsedNs(query_core_start, query_core_end);
-  uint64_t query_setup_time_ns =
-      query_total_time_ns > query_core_time_ns
-          ? (query_total_time_ns - query_core_time_ns)
-          : 0;
+  uint64_t query_reset_time_ns =
+      omega::ProfilingTimer::ElapsedNs(query_reset_start, query_reset_end);
+  uint64_t query_search_time_ns =
+      omega::ProfilingTimer::ElapsedNs(query_search_start, query_search_end);
+  uint64_t query_setup_time_ns = 0;
+  if (query_total_time_ns > (query_reset_time_ns + query_search_time_ns)) {
+    query_setup_time_ns =
+        query_total_time_ns - query_reset_time_ns - query_search_time_ns;
+  }
   uint64_t query_seq = query_stats_sequence_.fetch_add(1);
   omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
   predicted_recall_avg = omega_search_ctx->GetLastPredictedRecallAvg();
@@ -385,11 +393,15 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     size_t scan_cmps = hnsw_ctx->get_scan_num();
     uint64_t pairwise_dist_cnt = hnsw_ctx->get_pairwise_dist_num();
     uint64_t pure_search_time_ns = 0;
-    uint64_t core_search_time_ns = query_core_time_ns;
+    uint64_t hook_dispatch_time_ns = 0;
     if (collect_control_timing) {
       pure_search_time_ns =
-          query_core_time_ns > omega_control_time_ns
-              ? (query_core_time_ns - omega_control_time_ns)
+          query_search_time_ns > hook_total_time_ns
+              ? (query_search_time_ns - hook_total_time_ns)
+              : 0;
+      hook_dispatch_time_ns =
+          hook_total_time_ns > hook_body_time_ns
+              ? (hook_total_time_ns - hook_body_time_ns)
               : 0;
     }
     bool expected = false;
@@ -422,8 +434,10 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  "prediction_calls=%llu advance_calls=%llu "
                  "collected_gt_advance=%llu max_pred_per_stop=%llu "
                  "should_stop_ms=%.3f prediction_eval_ms=%.3f "
-                 "setup_ms=%.3f "
-                 "omega_control_ms=%.3f pure_search_ms=%.3f total_ms=%.3f",
+                 "setup_ms=%.3f reset_query_ms=%.3f "
+                 "core_search_ms=%.3f omega_control_ms=%.3f "
+                 "hook_total_ms=%.3f hook_body_ms=%.3f "
+                 "hook_dispatch_ms=%.3f pure_search_ms=%.3f total_ms=%.3f",
                  static_cast<unsigned long long>(query_seq),
                  IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
                  static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
@@ -436,7 +450,12 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  static_cast<double>(should_stop_time_ns) / 1e6,
                  static_cast<double>(prediction_eval_time_ns) / 1e6,
                  static_cast<double>(query_setup_time_ns) / 1e6,
-                 static_cast<double>(omega_control_time_ns) / 1e6,
+                 static_cast<double>(query_reset_time_ns) / 1e6,
+                 static_cast<double>(query_search_time_ns) / 1e6,
+                 static_cast<double>(hook_total_time_ns) / 1e6,
+                 static_cast<double>(hook_total_time_ns) / 1e6,
+                 static_cast<double>(hook_body_time_ns) / 1e6,
+                 static_cast<double>(hook_dispatch_time_ns) / 1e6,
                  static_cast<double>(pure_search_time_ns) / 1e6,
                  static_cast<double>(query_total_time_ns) / 1e6);
       } else {
@@ -447,7 +466,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  "prediction_calls=%llu advance_calls=%llu "
                  "collected_gt_advance=%llu max_pred_per_stop=%llu "
                  "should_stop_ms=%.3f prediction_eval_ms=%.3f "
-                 "setup_ms=%.3f core_search_ms=%.3f total_ms=%.3f",
+                 "setup_ms=%.3f reset_query_ms=%.3f "
+                 "core_search_ms=%.3f search_with_hooks_ms=%.3f total_ms=%.3f",
                  static_cast<unsigned long long>(query_seq),
                  IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
                  static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
@@ -460,7 +480,9 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  static_cast<double>(should_stop_time_ns) / 1e6,
                  static_cast<double>(prediction_eval_time_ns) / 1e6,
                  static_cast<double>(query_setup_time_ns) / 1e6,
-                 static_cast<double>(core_search_time_ns) / 1e6,
+                 static_cast<double>(query_reset_time_ns) / 1e6,
+                 static_cast<double>(query_search_time_ns) / 1e6,
+                 static_cast<double>(query_search_time_ns) / 1e6,
                  static_cast<double>(query_total_time_ns) / 1e6);
       }
     }

From c03a460c494c99f2c325a009cdbd0a5697a8a650 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 23 Mar 2026 18:44:08 +0800
Subject: [PATCH 031/126] fix(omega): restore profiling timer callback build

---
 src/core/algorithm/omega/omega_streamer.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 0bc016f95..2efe37dc6 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -68,6 +68,10 @@ bool DisableOmegaModelPrediction() {
   return std::string(value) != "0";
 }
 
+uint64_t OmegaProfilingNowNs() {
+  return omega::ProfilingTimer::Now();
+}
+
 struct OmegaHookState {
   omega::SearchContext *search_ctx{nullptr};
   bool enable_early_stopping{false};
@@ -326,7 +330,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   HnswAlgorithm::SearchHooks hooks;
   hooks.user_data = &hook_state;
   hooks.collect_timing = collect_control_timing;
-  hooks.now_ns = []() { return omega::ProfilingTimer::NowNs(); };
+  hooks.now_ns = &OmegaProfilingNowNs;
   hooks.hook_total_time_ns = &hook_total_time_ns;
   hooks.on_level0_entry = OnOmegaLevel0Entry;
   hooks.on_hop = OnOmegaHop;

From 66f653329e572c9ae29a2c4976ba1617d8cb020b Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 23 Mar 2026 19:03:09 +0800
Subject: [PATCH 032/126] fix(omega): correct hook timing units

---
 src/core/algorithm/hnsw/hnsw_algorithm.cc  | 5 +++--
 src/core/algorithm/hnsw/hnsw_algorithm.h   | 1 +
 src/core/algorithm/omega/omega_streamer.cc | 5 +++++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.cc b/src/core/algorithm/hnsw/hnsw_algorithm.cc
index 9d2d71f88..bad11d7c5 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.cc
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.cc
@@ -230,12 +230,13 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
 
   auto run_timed_hook = [&](auto &&fn) {
     if (hooks == nullptr || !hooks->collect_timing || hooks->now_ns == nullptr ||
-        hooks->hook_total_time_ns == nullptr) {
+        hooks->elapsed_ns == nullptr || hooks->hook_total_time_ns == nullptr) {
       return fn();
     }
     uint64_t start_ns = hooks->now_ns();
     auto result = fn();
-    *hooks->hook_total_time_ns += (hooks->now_ns() - start_ns);
+    *hooks->hook_total_time_ns +=
+        hooks->elapsed_ns(start_ns, hooks->now_ns());
     return result;
   };
 
diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.h b/src/core/algorithm/hnsw/hnsw_algorithm.h
index 2f1052bb2..902710345 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.h
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.h
@@ -31,6 +31,7 @@ class HnswAlgorithm {
     void *user_data{nullptr};
     bool collect_timing{false};
     uint64_t (*now_ns)(){nullptr};
+    uint64_t (*elapsed_ns)(uint64_t start, uint64_t end){nullptr};
     uint64_t *hook_total_time_ns{nullptr};
     void (*on_level0_entry)(node_id_t id, dist_t dist, bool inserted_to_topk,
                             void *user_data){nullptr};
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 2efe37dc6..165f0d6ba 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -72,6 +72,10 @@ uint64_t OmegaProfilingNowNs() {
   return omega::ProfilingTimer::Now();
 }
 
+uint64_t OmegaProfilingElapsedNs(uint64_t start, uint64_t end) {
+  return omega::ProfilingTimer::ElapsedNs(start, end);
+}
+
 struct OmegaHookState {
   omega::SearchContext *search_ctx{nullptr};
   bool enable_early_stopping{false};
@@ -331,6 +335,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   hooks.user_data = &hook_state;
   hooks.collect_timing = collect_control_timing;
   hooks.now_ns = &OmegaProfilingNowNs;
+  hooks.elapsed_ns = &OmegaProfilingElapsedNs;
   hooks.hook_total_time_ns = &hook_total_time_ns;
   hooks.on_level0_entry = OnOmegaLevel0Entry;
   hooks.on_hop = OnOmegaHop;

From 16e2fc486b3669cce9dcb47ba58fffd0ebeecc56 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Tue, 24 Mar 2026 03:07:49 +0800
Subject: [PATCH 033/126] Make omega model training deterministic

---
 src/db/training/omega_model_trainer.cc | 2 ++
 src/db/training/omega_model_trainer.h  | 2 ++
 thirdparty/omega/OMEGALib              | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/db/training/omega_model_trainer.cc b/src/db/training/omega_model_trainer.cc
index b8fecd1ef..cdbaedb4d 100644
--- a/src/db/training/omega_model_trainer.cc
+++ b/src/db/training/omega_model_trainer.cc
@@ -107,6 +107,8 @@ Status OmegaModelTrainer::TrainModelWithGtCmps(
   trainer_options.num_leaves = options.num_leaves;
   trainer_options.learning_rate = options.learning_rate;
   trainer_options.num_threads = options.num_threads;
+  trainer_options.seed = options.seed;
+  trainer_options.deterministic = options.deterministic;
   trainer_options.verbose = options.verbose;
   trainer_options.topk = gt_cmps_data.topk > 0 ? gt_cmps_data.topk : 100;
 
diff --git a/src/db/training/omega_model_trainer.h b/src/db/training/omega_model_trainer.h
index 4560ebd9d..4859188d4 100644
--- a/src/db/training/omega_model_trainer.h
+++ b/src/db/training/omega_model_trainer.h
@@ -45,6 +45,8 @@ struct OmegaModelTrainerOptions {
   int num_leaves = 31;
   double learning_rate = 0.1;
   int num_threads = DefaultOmegaTrainerThreads();
+  int seed = 42;
+  bool deterministic = true;
 
   // Enable verbose logging during training
   bool verbose = false;
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index efb5d6fb2..492e51c0f 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit efb5d6fb2a854f74a6a22cc64444aee880348b7b
+Subproject commit 492e51c0fa2d291cee99d4a8a4afdfe512608e7e

From 4f66406206c49ab6861b95e6e3e824969450452f Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Tue, 24 Mar 2026 03:08:12 +0800
Subject: [PATCH 034/126] Improve HNSW profiling and timing consistency

---
 scripts/benchmark_cohere_10m.py            |  1 +
 scripts/benchmark_cohere_1m.py             |  1 +
 src/core/algorithm/hnsw/hnsw_streamer.cc   | 24 +++----
 src/core/algorithm/omega/omega_streamer.cc | 39 ++++++-----
 src/core/utility/rdtsc_timer.cc            | 80 ++++++++++++++++++++++
 src/core/utility/rdtsc_timer.h             | 45 ++++++++++++
 6 files changed, 162 insertions(+), 28 deletions(-)
 create mode 100644 src/core/utility/rdtsc_timer.cc
 create mode 100644 src/core/utility/rdtsc_timer.h

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index 8207d536b..f19913e80 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -125,6 +125,7 @@ def build_hnsw_profile(metrics: dict, output: str) -> dict:
         "profile_avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
         "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
         "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
+        "profile_avg_pure_search_ms": avg_metric(query_records, "pure_search_ms"),
         "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
         "profile_serial_p99_s": serial_summary.get("p99"),
         "profile_serial_p95_s": serial_summary.get("p95"),
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
index db6c52915..fa8583e07 100755
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -127,6 +127,7 @@ def build_hnsw_profile(metrics: dict, output: str) -> dict:
         "profile_avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
         "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
         "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
+        "profile_avg_pure_search_ms": avg_metric(query_records, "pure_search_ms"),
         "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
         "profile_serial_p99_s": serial_summary.get("p99"),
         "profile_serial_p95_s": serial_summary.get("p95"),
diff --git a/src/core/algorithm/hnsw/hnsw_streamer.cc b/src/core/algorithm/hnsw/hnsw_streamer.cc
index bd5a2e39d..7e990ddac 100644
--- a/src/core/algorithm/hnsw/hnsw_streamer.cc
+++ b/src/core/algorithm/hnsw/hnsw_streamer.cc
@@ -14,11 +14,11 @@
 #include "hnsw_streamer.h"
 #include <iostream>
 #include <atomic>
-#include <chrono>
 #include <cstdlib>
 #include <ailego/internal/cpu_features.h>
 #include <ailego/pattern/defer.h>
 #include <ailego/utility/memory_helper.h>
+#include "utility/rdtsc_timer.h"
 #include "utility/sparse_utility.h"
 #include "hnsw_algorithm.h"
 #include "hnsw_context.h"
@@ -660,22 +660,25 @@ int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   ctx->resize_results(count);
   ctx->check_need_adjuct_ctx(entity_.doc_cnt());
   for (size_t q = 0; q < count; ++q) {
-    auto query_start = std::chrono::steady_clock::now();
+    auto query_start = RdtscTimer::Now();
     ctx->reset_query(query);
+    auto query_search_start = RdtscTimer::Now();
     ret = alg_->search(ctx);
     if (ailego_unlikely(ret != 0)) {
       LOG_ERROR("Hnsw searcher fast search failed");
       return ret;
     }
-    auto query_latency_ns =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::steady_clock::now() - query_start)
-            .count();
+    auto query_search_end = RdtscTimer::Now();
+    auto query_search_time_ns =
+        RdtscTimer::ElapsedNs(query_search_start, query_search_end);
+    auto query_latency_ns = RdtscTimer::ElapsedNs(query_start, RdtscTimer::Now());
     uint64_t query_seq = HnswQueryStatsSequence().fetch_add(1);
     if (ShouldLogHnswQueryStats(query_seq)) {
-      LOG_INFO("HNSW query stats: query_seq=%llu cmps=%zu pairwise_dist_cnt=%zu latency_ms=%.3f",
+      LOG_INFO("HNSW query stats: query_seq=%llu cmps=%zu pairwise_dist_cnt=%zu "
+               "pure_search_ms=%.3f latency_ms=%.3f",
                static_cast<unsigned long long>(query_seq), ctx->get_scan_num(),
                ctx->get_pairwise_dist_num(),
+               static_cast<double>(query_search_time_ns) / 1e6,
                static_cast<double>(query_latency_ns) / 1e6);
     }
     ctx->topk_to_result(q);
@@ -780,7 +783,7 @@ int HnswStreamer::search_bf_impl(
     auto &topk = ctx->topk_heap();
 
     for (size_t q = 0; q < count; ++q) {
-      auto query_start = std::chrono::steady_clock::now();
+      auto query_start = RdtscTimer::Now();
       ctx->reset_query(query);
       topk.clear();
       for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
@@ -793,10 +796,7 @@ int HnswStreamer::search_bf_impl(
           topk.emplace(id, dist);
         }
       }
-      auto query_latency_ns =
-          std::chrono::duration_cast<std::chrono::nanoseconds>(
-              std::chrono::steady_clock::now() - query_start)
-              .count();
+      auto query_latency_ns = RdtscTimer::ElapsedNs(query_start, RdtscTimer::Now());
       uint64_t query_seq = HnswQueryStatsSequence().fetch_add(1);
       if (ShouldLogHnswQueryStats(query_seq)) {
         LOG_INFO("HNSW query stats: query_seq=%llu cmps=%zu pairwise_dist_cnt=%zu latency_ms=%.3f",
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 165f0d6ba..61a66fe85 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -17,12 +17,12 @@
 #include <zvec/core/framework/index_helper.h>
 #include <zvec/core/framework/index_logger.h>
 #include <zvec/ailego/io/file.h>
+#include "utility/rdtsc_timer.h"
 #include "../hnsw/hnsw_entity.h"
 #include "../hnsw/hnsw_context.h"
 #include "omega_context.h"
 #include "omega_params.h"
 #include <omega/omega_api.h>
-#include <omega/profiling_timer.h>
 #include <omega/search_context.h>
 #include <cstdlib>
 
@@ -68,12 +68,20 @@ bool DisableOmegaModelPrediction() {
   return std::string(value) != "0";
 }
 
+bool IsOmegaControlTimingEnabled() {
+  const char* value = std::getenv("ZVEC_OMEGA_PROFILE_CONTROL_TIMING");
+  if (value == nullptr) {
+    return false;
+  }
+  return value[0] != '\0' && value[0] != '0';
+}
+
 uint64_t OmegaProfilingNowNs() {
-  return omega::ProfilingTimer::Now();
+  return RdtscTimer::Now();
 }
 
 uint64_t OmegaProfilingElapsedNs(uint64_t start, uint64_t end) {
-  return omega::ProfilingTimer::ElapsedNs(start, end);
+  return RdtscTimer::ElapsedNs(start, end);
 }
 
 struct OmegaHookState {
@@ -89,11 +97,11 @@ void RunOmegaControlHook(const OmegaHookState &state, Fn &&fn) {
     fn();
     return;
   }
-  auto control_start = omega::ProfilingTimer::Now();
+  auto control_start = RdtscTimer::Now();
   fn();
   if (state.hook_body_time_ns != nullptr) {
-    *state.hook_body_time_ns += omega::ProfilingTimer::ElapsedNs(
-        control_start, omega::ProfilingTimer::Now());
+    *state.hook_body_time_ns += RdtscTimer::ElapsedNs(
+        control_start, RdtscTimer::Now());
   }
 }
 
@@ -240,8 +248,8 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qmeta,
                                      uint32_t count, Context::Pointer &context,
                                      bool enable_early_stopping) const {
-  auto query_total_start = omega::ProfilingTimer::Now();
-  const bool collect_control_timing = omega::IsControlTimingEnabled();
+  auto query_total_start = RdtscTimer::Now();
+  const bool collect_control_timing = IsOmegaControlTimingEnabled();
   uint64_t hook_total_time_ns = 0;
   uint64_t hook_body_time_ns = 0;
 
@@ -323,9 +331,9 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                                            search_batch_distance_);
   hnsw_ctx->resize_results(count);
   hnsw_ctx->check_need_adjuct_ctx(entity_.doc_cnt());
-  auto query_reset_start = omega::ProfilingTimer::Now();
+  auto query_reset_start = RdtscTimer::Now();
   hnsw_ctx->reset_query(query);
-  auto query_reset_end = omega::ProfilingTimer::Now();
+  auto query_reset_end = RdtscTimer::Now();
   OmegaHookState hook_state;
   hook_state.search_ctx = omega_search_ctx;
   hook_state.enable_early_stopping = enable_early_stopping;
@@ -341,14 +349,14 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   hooks.on_hop = OnOmegaHop;
   hooks.on_visit_candidate = OnOmegaVisitCandidate;
   bool early_stop_hit = false;
-  auto query_search_start = omega::ProfilingTimer::Now();
+  auto query_search_start = RdtscTimer::Now();
   int ret = alg_->search_with_hooks(hnsw_ctx, &hooks, &early_stop_hit);
   if (ret != 0) {
     omega_search_destroy(omega_search);
     LOG_ERROR("OMEGA search failed");
     return ret;
   }
-  auto query_search_end = omega::ProfilingTimer::Now();
+  auto query_search_end = RdtscTimer::Now();
 
   // Get final statistics
   int hops, cmps, collected_gt;
@@ -366,12 +374,11 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   unsigned long long should_stop_calls_with_advance = 0;
   unsigned long long max_prediction_calls_per_should_stop = 0;
   uint64_t query_total_time_ns =
-      omega::ProfilingTimer::ElapsedNs(query_total_start,
-                                       omega::ProfilingTimer::Now());
+      RdtscTimer::ElapsedNs(query_total_start, RdtscTimer::Now());
   uint64_t query_reset_time_ns =
-      omega::ProfilingTimer::ElapsedNs(query_reset_start, query_reset_end);
+      RdtscTimer::ElapsedNs(query_reset_start, query_reset_end);
   uint64_t query_search_time_ns =
-      omega::ProfilingTimer::ElapsedNs(query_search_start, query_search_end);
+      RdtscTimer::ElapsedNs(query_search_start, query_search_end);
   uint64_t query_setup_time_ns = 0;
   if (query_total_time_ns > (query_reset_time_ns + query_search_time_ns)) {
     query_setup_time_ns =
diff --git a/src/core/utility/rdtsc_timer.cc b/src/core/utility/rdtsc_timer.cc
new file mode 100644
index 000000000..9a69a59d5
--- /dev/null
+++ b/src/core/utility/rdtsc_timer.cc
@@ -0,0 +1,80 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "utility/rdtsc_timer.h"
+
+#include <ctime>
+
+namespace zvec {
+namespace core {
+
+RdtscTimer::tick_t RdtscTimer::Now() {
+#if ZVEC_CORE_HAS_TSC
+  uint32_t lo = 0;
+  uint32_t hi = 0;
+  uint32_t aux = 0;
+  __asm__ __volatile__("rdtscp"
+                       : "=a"(lo), "=d"(hi), "=c"(aux)
+                       :
+                       :);
+  return (static_cast<uint64_t>(hi) << 32) | lo;
+#else
+  return MonotonicRawNs();
+#endif
+}
+
+uint64_t RdtscTimer::ElapsedNs(tick_t start, tick_t end) {
+#if ZVEC_CORE_HAS_TSC
+  if (end <= start) {
+    return 0;
+  }
+  return static_cast<uint64_t>(
+      static_cast<double>(end - start) * NsPerTick());
+#else
+  return end > start ? (end - start) : 0;
+#endif
+}
+
+uint64_t RdtscTimer::MonotonicRawNs() {
+  struct timespec ts {};
+  clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+  return static_cast<uint64_t>(ts.tv_sec) * 1000000000ull +
+         static_cast<uint64_t>(ts.tv_nsec);
+}
+
+double RdtscTimer::NsPerTick() {
+  static const double ns_per_tick = CalibrateNsPerTick();
+  return ns_per_tick;
+}
+
+double RdtscTimer::CalibrateNsPerTick() {
+  constexpr uint64_t kMinCalibrationNs = 5 * 1000 * 1000;
+  const uint64_t start_ns = MonotonicRawNs();
+  const tick_t start_tick = Now();
+
+  uint64_t end_ns = start_ns;
+  while (end_ns - start_ns < kMinCalibrationNs) {
+    end_ns = MonotonicRawNs();
+  }
+
+  const tick_t end_tick = Now();
+  if (end_tick <= start_tick) {
+    return 1.0;
+  }
+  return static_cast<double>(end_ns - start_ns) /
+         static_cast<double>(end_tick - start_tick);
+}
+
+}  // namespace core
+}  // namespace zvec
diff --git a/src/core/utility/rdtsc_timer.h b/src/core/utility/rdtsc_timer.h
new file mode 100644
index 000000000..4d244af9b
--- /dev/null
+++ b/src/core/utility/rdtsc_timer.h
@@ -0,0 +1,45 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ZVEC_CORE_UTILITY_RDTSC_TIMER_H_
+#define ZVEC_CORE_UTILITY_RDTSC_TIMER_H_
+
+#include <cstdint>
+
+#if defined(__x86_64__) || defined(__i386__)
+#define ZVEC_CORE_HAS_TSC 1
+#else
+#define ZVEC_CORE_HAS_TSC 0
+#endif
+
+namespace zvec {
+namespace core {
+
+class RdtscTimer {
+ public:
+  using tick_t = uint64_t;
+
+  static tick_t Now();
+  static uint64_t ElapsedNs(tick_t start, tick_t end);
+
+ private:
+  static uint64_t MonotonicRawNs();
+  static double NsPerTick();
+  static double CalibrateNsPerTick();
+};
+
+}  // namespace core
+}  // namespace zvec
+
+#endif  // ZVEC_CORE_UTILITY_RDTSC_TIMER_H_

From c5ce7cdadd8459091c2034dbc5f208e93959f2f6 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Tue, 24 Mar 2026 04:08:48 +0800
Subject: [PATCH 035/126] Add HNSW hooks microbenchmark tooling

---
 scripts/perf_ab_search_core.sh           |  24 ++
 src/core/algorithm/hnsw/hnsw_searcher.cc |   4 +
 src/core/algorithm/hnsw/hnsw_searcher.h  |   2 +
 src/core/algorithm/hnsw/hnsw_streamer.cc |  22 +-
 tools/core/CMakeLists.txt                |  10 +
 tools/core/hnsw_hooks_microbench.cc      | 316 +++++++++++++++++++++++
 6 files changed, 375 insertions(+), 3 deletions(-)
 create mode 100644 tools/core/hnsw_hooks_microbench.cc

diff --git a/scripts/perf_ab_search_core.sh b/scripts/perf_ab_search_core.sh
index 690f1112e..3b56cbfcf 100644
--- a/scripts/perf_ab_search_core.sh
+++ b/scripts/perf_ab_search_core.sh
@@ -198,6 +198,12 @@ HNSW_CMD=(
   "${PYTHON_BIN}" -m vectordb_bench.cli.vectordbbench "${HNSW_ARGS[@]}"
 )
 
+HNSW_EMPTY_HOOKS_CMD=(
+  "${COMMON_ENV[@]}"
+  ZVEC_HNSW_ENABLE_EMPTY_HOOKS=1
+  "${PYTHON_BIN}" -m vectordb_bench.cli.vectordbbench "${HNSW_ARGS[@]}"
+)
+
 OMEGA_HOOKS_CMD=(
   "${COMMON_ENV[@]}"
   ZVEC_OMEGA_DISABLE_MODEL_PREDICTION=1
@@ -220,6 +226,10 @@ case "${MODE}" in
       "HNSW core search perf (${DATASET})" \
       "${HNSW_CMD[@]}"
 
+    run_perf \
+      "HNSW empty-hooks core search perf (${DATASET})" \
+      "${HNSW_EMPTY_HOOKS_CMD[@]}"
+
     run_perf \
       "OMEGA hooks-only core search perf (${DATASET})" \
       "${OMEGA_HOOKS_CMD[@]}"
@@ -230,6 +240,11 @@ case "${MODE}" in
       "hnsw_core" \
       "${HNSW_CMD[@]}"
 
+    run_record \
+      "HNSW empty-hooks core search hotspots (${DATASET})" \
+      "hnsw_empty_hooks" \
+      "${HNSW_EMPTY_HOOKS_CMD[@]}"
+
     run_record \
       "OMEGA hooks-only core search hotspots (${DATASET})" \
       "omega_hooks_only" \
@@ -240,6 +255,10 @@ case "${MODE}" in
       "HNSW core search perf (${DATASET})" \
       "${HNSW_CMD[@]}"
 
+    run_perf \
+      "HNSW empty-hooks core search perf (${DATASET})" \
+      "${HNSW_EMPTY_HOOKS_CMD[@]}"
+
     run_perf \
       "OMEGA hooks-only core search perf (${DATASET})" \
       "${OMEGA_HOOKS_CMD[@]}"
@@ -249,6 +268,11 @@ case "${MODE}" in
       "hnsw_core" \
       "${HNSW_CMD[@]}"
 
+    run_record \
+      "HNSW empty-hooks core search hotspots (${DATASET})" \
+      "hnsw_empty_hooks" \
+      "${HNSW_EMPTY_HOOKS_CMD[@]}"
+
     run_record \
       "OMEGA hooks-only core search hotspots (${DATASET})" \
       "omega_hooks_only" \
diff --git a/src/core/algorithm/hnsw/hnsw_searcher.cc b/src/core/algorithm/hnsw/hnsw_searcher.cc
index cd6318658..085533c04 100644
--- a/src/core/algorithm/hnsw/hnsw_searcher.cc
+++ b/src/core/algorithm/hnsw/hnsw_searcher.cc
@@ -190,6 +190,10 @@ int HnswSearcher::update_context(HnswContext *ctx) const {
                              entity, magic_);
 }
 
+int HnswSearcher::fast_search(HnswContext *ctx) const {
+  return alg_->fast_search(ctx);
+}
+
 int HnswSearcher::fast_search_with_hooks(
     HnswContext *ctx, const HnswAlgorithm::SearchHooks *hooks,
     bool *stopped_early) const {
diff --git a/src/core/algorithm/hnsw/hnsw_searcher.h b/src/core/algorithm/hnsw/hnsw_searcher.h
index 60cef55d6..4215fc7f1 100644
--- a/src/core/algorithm/hnsw/hnsw_searcher.h
+++ b/src/core/algorithm/hnsw/hnsw_searcher.h
@@ -112,6 +112,8 @@ class HnswSearcher : public IndexSearcher {
   int update_context(HnswContext *ctx) const;
 
  protected:
+  int fast_search(HnswContext *ctx) const;
+
   int fast_search_with_hooks(HnswContext *ctx,
                              const HnswAlgorithm::SearchHooks *hooks,
                              bool *stopped_early) const;
diff --git a/src/core/algorithm/hnsw/hnsw_streamer.cc b/src/core/algorithm/hnsw/hnsw_streamer.cc
index 7e990ddac..a6f2e8f27 100644
--- a/src/core/algorithm/hnsw/hnsw_streamer.cc
+++ b/src/core/algorithm/hnsw/hnsw_streamer.cc
@@ -58,6 +58,14 @@ bool ShouldLogHnswQueryStats(uint64_t query_seq) {
   return query_seq < limit;
 }
 
+bool UseEmptyHnswHooks() {
+  const char* value = std::getenv("ZVEC_HNSW_ENABLE_EMPTY_HOOKS");
+  if (value == nullptr) {
+    return false;
+  }
+  return std::string(value) != "0";
+}
+
 std::atomic<uint64_t>& HnswQueryStatsSequence() {
   static std::atomic<uint64_t> sequence{0};
   return sequence;
@@ -659,11 +667,18 @@ int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);
   ctx->resize_results(count);
   ctx->check_need_adjuct_ctx(entity_.doc_cnt());
+  const bool use_empty_hooks = UseEmptyHnswHooks();
+  HnswAlgorithm::SearchHooks empty_hooks;
   for (size_t q = 0; q < count; ++q) {
     auto query_start = RdtscTimer::Now();
     ctx->reset_query(query);
     auto query_search_start = RdtscTimer::Now();
-    ret = alg_->search(ctx);
+    if (use_empty_hooks) {
+      bool stopped_early = false;
+      ret = alg_->search_with_hooks(ctx, &empty_hooks, &stopped_early);
+    } else {
+      ret = alg_->search(ctx);
+    }
     if (ailego_unlikely(ret != 0)) {
       LOG_ERROR("Hnsw searcher fast search failed");
       return ret;
@@ -674,9 +689,10 @@ int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
     auto query_latency_ns = RdtscTimer::ElapsedNs(query_start, RdtscTimer::Now());
     uint64_t query_seq = HnswQueryStatsSequence().fetch_add(1);
     if (ShouldLogHnswQueryStats(query_seq)) {
-      LOG_INFO("HNSW query stats: query_seq=%llu cmps=%zu pairwise_dist_cnt=%zu "
-               "pure_search_ms=%.3f latency_ms=%.3f",
+      LOG_INFO("HNSW query stats: query_seq=%llu hook_mode=%s cmps=%zu "
+               "pairwise_dist_cnt=%zu pure_search_ms=%.3f latency_ms=%.3f",
                static_cast<unsigned long long>(query_seq), ctx->get_scan_num(),
+               use_empty_hooks ? "empty" : "none",
                ctx->get_pairwise_dist_num(),
                static_cast<double>(query_search_time_ns) / 1e6,
                static_cast<double>(query_latency_ns) / 1e6);
diff --git a/tools/core/CMakeLists.txt b/tools/core/CMakeLists.txt
index df34fb454..ecb1c2874 100644
--- a/tools/core/CMakeLists.txt
+++ b/tools/core/CMakeLists.txt
@@ -41,6 +41,16 @@ cc_binary(
     LIBS omega
 )
 
+if(ZVEC_ENABLE_OMEGA)
+cc_binary(
+    NAME hnsw_hooks_microbench
+    STRICT PACKED
+    SRCS hnsw_hooks_microbench.cc
+    INCS ${PROJECT_ROOT_DIR}/src/core/ ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
+    LIBS magic_enum core_framework core_metric core_quantizer core_utility core_knn_hnsw omega core_interface
+)
+endif()
+
 cc_binary(
     NAME recall_original
     STRICT PACKED
diff --git a/tools/core/hnsw_hooks_microbench.cc b/tools/core/hnsw_hooks_microbench.cc
new file mode 100644
index 000000000..f61ebc732
--- /dev/null
+++ b/tools/core/hnsw_hooks_microbench.cc
@@ -0,0 +1,316 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iomanip>
+#include <iostream>
+#include <numeric>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "zvec/core/framework/index_factory.h"
+#include "utility/rdtsc_timer.h"
+#include "algorithm/hnsw/hnsw_context.h"
+#include "algorithm/hnsw/hnsw_params.h"
+#include "algorithm/hnsw/hnsw_searcher.h"
+#include "omega/search_context.h"
+
+namespace zvec {
+namespace core {
+
+namespace {
+
+struct Options {
+  std::string index_path;
+  uint32_t ef_search = 180;
+  uint32_t topk = 100;
+  uint32_t query_count = 1000;
+  uint32_t iterations = 1000;
+  uint32_t warmup = 100;
+  uint32_t seed = 12345;
+  int window_size = 100;
+  float target_recall = 0.90f;
+};
+
+void PrintUsage(const char* argv0) {
+  std::cerr
+      << "Usage: " << argv0 << " --index-path <path> [options]\n"
+      << "Options:\n"
+      << "  --ef-search <n>      HNSW ef_search\n"
+      << "  --topk <n>           Search topk\n"
+      << "  --query-count <n>    Number of sampled queries\n"
+      << "  --iterations <n>     Number of measured iterations\n"
+      << "  --warmup <n>         Number of warmup iterations\n"
+      << "  --seed <n>           RNG seed for sampled queries\n"
+      << "  --window-size <n>    OMEGA window size for hooks-only mode\n"
+      << "  --target-recall <f>  OMEGA target recall for hooks-only mode\n";
+}
+
+bool ParseArgs(int argc, char** argv, Options* opts) {
+  for (int i = 1; i < argc; ++i) {
+    const char* arg = argv[i];
+    if (std::strcmp(arg, "--index-path") == 0 && i + 1 < argc) {
+      opts->index_path = argv[++i];
+    } else if (std::strcmp(arg, "--ef-search") == 0 && i + 1 < argc) {
+      opts->ef_search = static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
+    } else if (std::strcmp(arg, "--topk") == 0 && i + 1 < argc) {
+      opts->topk = static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
+    } else if (std::strcmp(arg, "--query-count") == 0 && i + 1 < argc) {
+      opts->query_count =
+          static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
+    } else if (std::strcmp(arg, "--iterations") == 0 && i + 1 < argc) {
+      opts->iterations =
+          static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
+    } else if (std::strcmp(arg, "--warmup") == 0 && i + 1 < argc) {
+      opts->warmup =
+          static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
+    } else if (std::strcmp(arg, "--seed") == 0 && i + 1 < argc) {
+      opts->seed = static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
+    } else if (std::strcmp(arg, "--window-size") == 0 && i + 1 < argc) {
+      opts->window_size = std::atoi(argv[++i]);
+    } else if (std::strcmp(arg, "--target-recall") == 0 && i + 1 < argc) {
+      opts->target_recall = std::strtof(argv[++i], nullptr);
+    } else if (std::strcmp(arg, "--help") == 0 ||
+               std::strcmp(arg, "-h") == 0) {
+      PrintUsage(argv[0]);
+      return false;
+    } else {
+      std::cerr << "Unknown argument: " << arg << "\n";
+      PrintUsage(argv[0]);
+      return false;
+    }
+  }
+
+  if (opts->index_path.empty()) {
+    PrintUsage(argv[0]);
+    return false;
+  }
+  opts->query_count = std::max(1u, opts->query_count);
+  opts->iterations = std::max(1u, opts->iterations);
+  opts->topk = std::max(1u, opts->topk);
+  opts->window_size = std::max(1, opts->window_size);
+  return true;
+}
+
+class ExposedHnswSearcher : public HnswSearcher {
+ public:
+  int Init(const ailego::Params& params) { return HnswSearcher::init(params); }
+
+  int Load(IndexStorage::Pointer container) {
+    return HnswSearcher::load(std::move(container), nullptr);
+  }
+
+  ContextPointer CreateContext() const { return HnswSearcher::create_context(); }
+
+  int FastSearch(HnswContext* ctx) const { return fast_search(ctx); }
+
+  int FastSearchWithHooks(HnswContext* ctx,
+                          const HnswAlgorithm::SearchHooks* hooks,
+                          bool* stopped_early) const {
+    return fast_search_with_hooks(ctx, hooks, stopped_early);
+  }
+
+  const IndexMeta& MetaPublic() const { return meta(); }
+};
+
+struct OmegaHookState {
+  omega::SearchContext* search_ctx{nullptr};
+  bool enable_early_stopping{false};
+};
+
+void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
+                        void* user_data) {
+  auto& state = *static_cast<OmegaHookState*>(user_data);
+  state.search_ctx->SetDistStart(dist);
+  state.search_ctx->ReportVisitCandidate(id, dist, true);
+}
+
+void OnOmegaHop(void* user_data) {
+  auto& state = *static_cast<OmegaHookState*>(user_data);
+  state.search_ctx->ReportHop();
+}
+
+bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
+                           bool should_consider_candidate, void* user_data) {
+  auto& state = *static_cast<OmegaHookState*>(user_data);
+  state.search_ctx->ReportVisitCandidate(id, dist, should_consider_candidate);
+  if (!state.enable_early_stopping) {
+    return false;
+  }
+  return state.search_ctx->ShouldPredict() &&
+         state.search_ctx->ShouldStopEarly();
+}
+
+struct BenchStats {
+  double avg_ns{0.0};
+  double avg_cmps{0.0};
+  double ns_per_cmp{0.0};
+  double checksum{0.0};
+};
+
+std::vector<const void*> SampleIndexVectors(HnswContext* ctx, const IndexMeta& meta,
+                                            uint32_t count, uint32_t seed) {
+  const auto& entity = ctx->get_entity();
+  const uint32_t doc_cnt = static_cast<uint32_t>(entity.doc_cnt());
+  std::vector<uint32_t> ids(doc_cnt);
+  std::iota(ids.begin(), ids.end(), 0u);
+  std::mt19937 rng(seed);
+  std::shuffle(ids.begin(), ids.end(), rng);
+
+  count = std::min(count, doc_cnt);
+  std::vector<const void*> queries;
+  queries.reserve(count);
+  for (uint32_t i = 0; i < count; ++i) {
+    queries.push_back(entity.get_vector(ids[i]));
+  }
+
+  std::cout << "Using " << queries.size() << " sampled in-index queries"
+            << " element_size=" << meta.element_size()
+            << " doc_cnt=" << doc_cnt << "\n";
+  return queries;
+}
+
+template <typename Fn>
+BenchStats RunBench(const std::string& name, HnswContext* ctx,
+                    const std::vector<const void*>& queries, uint32_t warmup,
+                    uint32_t iterations, Fn&& fn) {
+  for (uint32_t i = 0; i < warmup; ++i) {
+    const void* query = queries[i % queries.size()];
+    ctx->clear();
+    ctx->resize_results(1);
+    ctx->reset_query(query);
+    fn();
+  }
+
+  uint64_t total_ns = 0;
+  uint64_t total_cmps = 0;
+  double checksum = 0.0;
+
+  for (uint32_t i = 0; i < iterations; ++i) {
+    const void* query = queries[i % queries.size()];
+    ctx->clear();
+    ctx->resize_results(1);
+    ctx->reset_query(query);
+    const auto start = RdtscTimer::Now();
+    fn();
+    const auto end = RdtscTimer::Now();
+    total_ns += RdtscTimer::ElapsedNs(start, end);
+    total_cmps += ctx->get_pairwise_dist_num();
+    if (!ctx->topk_heap().empty()) {
+      checksum += ctx->topk_heap()[0].second;
+    }
+  }
+
+  BenchStats stats;
+  stats.avg_ns = static_cast<double>(total_ns) / iterations;
+  stats.avg_cmps = static_cast<double>(total_cmps) / iterations;
+  stats.ns_per_cmp =
+      total_cmps == 0 ? 0.0 : static_cast<double>(total_ns) / total_cmps;
+  stats.checksum = checksum;
+
+  std::cout << std::fixed << std::setprecision(3)
+            << name << ": avg_ns=" << stats.avg_ns
+            << " avg_cmps=" << stats.avg_cmps
+            << " ns_per_cmp=" << stats.ns_per_cmp
+            << " checksum=" << stats.checksum << "\n";
+  return stats;
+}
+
+}  // namespace
+
+}  // namespace core
+}  // namespace zvec
+
+int main(int argc, char** argv) {
+  using namespace zvec::core;
+
+  Options opts;
+  if (!ParseArgs(argc, argv, &opts)) {
+    return 1;
+  }
+
+  ailego::Params params;
+  params.set(PARAM_HNSW_SEARCHER_EF, opts.ef_search);
+
+  ExposedHnswSearcher searcher;
+  if (searcher.Init(params) != 0) {
+    std::cerr << "Failed to init HNSW searcher\n";
+    return 2;
+  }
+
+  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
+  if (!storage || storage->open(opts.index_path, false) != 0) {
+    std::cerr << "Failed to open index storage: " << opts.index_path << "\n";
+    return 3;
+  }
+
+  if (searcher.Load(storage) != 0) {
+    std::cerr << "Failed to load HNSW searcher\n";
+    return 4;
+  }
+
+  auto context = searcher.CreateContext();
+  auto* ctx = dynamic_cast<HnswContext*>(context.get());
+  if (ctx == nullptr) {
+    std::cerr << "Failed to create HNSW context\n";
+    return 5;
+  }
+  ctx->set_topk(opts.topk);
+
+  auto queries = SampleIndexVectors(ctx, searcher.MetaPublic(), opts.query_count,
+                                    opts.seed);
+  if (queries.empty()) {
+    std::cerr << "No queries sampled from index\n";
+    return 6;
+  }
+
+  HnswAlgorithm::SearchHooks empty_hooks;
+
+  omega::SearchContext omega_search_ctx(
+      nullptr, nullptr, opts.target_recall, static_cast<int>(opts.topk),
+      opts.window_size);
+  OmegaHookState omega_hook_state;
+  omega_hook_state.search_ctx = &omega_search_ctx;
+  omega_hook_state.enable_early_stopping = false;
+  HnswAlgorithm::SearchHooks omega_hooks;
+  omega_hooks.user_data = &omega_hook_state;
+  omega_hooks.on_level0_entry = OnOmegaLevel0Entry;
+  omega_hooks.on_hop = OnOmegaHop;
+  omega_hooks.on_visit_candidate = OnOmegaVisitCandidate;
+
+  RunBench("alg_fast_search", ctx, queries, opts.warmup, opts.iterations, [&]() {
+    return searcher.FastSearch(ctx);
+  });
+
+  RunBench("alg_fast_search_with_empty_hooks", ctx, queries, opts.warmup,
+           opts.iterations, [&]() {
+             bool stopped_early = false;
+             return searcher.FastSearchWithHooks(ctx, &empty_hooks,
+                                                &stopped_early);
+           });
+
+  RunBench("alg_fast_search_with_omega_hooks_only", ctx, queries, opts.warmup,
+           opts.iterations, [&]() {
+             omega_search_ctx.Reset();
+             bool stopped_early = false;
+             return searcher.FastSearchWithHooks(ctx, &omega_hooks,
+                                                &stopped_early);
+           });
+
+  return 0;
+}

From 0f53cf6988c843f1b23c9388f69fbdd5d1f392e6 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Tue, 24 Mar 2026 16:42:12 +0800
Subject: [PATCH 036/126] Add HNSW hooks perf analysis tooling

---
 scripts/perf_hnsw_hooks_microbench.sh | 114 +++++++++++++++++++++++
 tools/core/CMakeLists.txt             |  33 +++++--
 tools/core/hnsw_hooks_microbench.cc   | 127 +++++++++++++++++---------
 3 files changed, 225 insertions(+), 49 deletions(-)
 create mode 100755 scripts/perf_hnsw_hooks_microbench.sh

diff --git a/scripts/perf_hnsw_hooks_microbench.sh b/scripts/perf_hnsw_hooks_microbench.sh
new file mode 100755
index 000000000..a3cdb7507
--- /dev/null
+++ b/scripts/perf_hnsw_hooks_microbench.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ZVEC_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+BIN="${BIN:-${ZVEC_ROOT}/build/bin/hnsw_hooks_microbench}"
+INDEX_PATH="${INDEX_PATH:-${ZVEC_ROOT}/benchmark_results/cohere_1m_hnsw/0/dense.qindex.5.proxima}"
+OUT_DIR="${OUT_DIR:-${ZVEC_ROOT}/perf_results/hnsw_hooks_microbench}"
+
+CPU_CORE="${CPU_CORE:-0}"
+REPEAT="${REPEAT:-5}"
+EVENTS="${EVENTS:-cycles,instructions,branches,branch-misses,cache-references,cache-misses}"
+RECORD_FREQ="${RECORD_FREQ:-999}"
+CALL_GRAPH_MODE="${CALL_GRAPH_MODE:-fp}"
+TOPN="${TOPN:-80}"
+MODE_FILTER="${MODE_FILTER:-all}"
+
+QUERY_COUNT="${QUERY_COUNT:-1000}"
+WARMUP="${WARMUP:-200}"
+ITERATIONS="${ITERATIONS:-2000}"
+EF_SEARCH="${EF_SEARCH:-180}"
+TOPK="${TOPK:-100}"
+WINDOW_SIZE="${WINDOW_SIZE:-100}"
+TARGET_RECALL="${TARGET_RECALL:-0.91}"
+SEED="${SEED:-12345}"
+
+if ! command -v perf >/dev/null 2>&1; then
+  echo "perf not found in PATH" >&2
+  exit 1
+fi
+
+if [[ ! -x "${BIN}" ]]; then
+  echo "microbench binary not found: ${BIN}" >&2
+  exit 1
+fi
+
+if [[ ! -f "${INDEX_PATH}" ]]; then
+  echo "index file not found: ${INDEX_PATH}" >&2
+  exit 1
+fi
+
+mkdir -p "${OUT_DIR}"
+
+COMMON_ARGS=(
+  "${BIN}"
+  --index-path "${INDEX_PATH}"
+  --ef-search "${EF_SEARCH}"
+  --topk "${TOPK}"
+  --query-count "${QUERY_COUNT}"
+  --warmup "${WARMUP}"
+  --iterations "${ITERATIONS}"
+  --window-size "${WINDOW_SIZE}"
+  --target-recall "${TARGET_RECALL}"
+  --seed "${SEED}"
+)
+
+run_stat() {
+  local mode="$1"
+  echo
+  echo "============================================================"
+  echo "perf stat: ${mode}"
+  echo "============================================================"
+  taskset -c "${CPU_CORE}" perf stat -r "${REPEAT}" -e "${EVENTS}" \
+    "${COMMON_ARGS[@]}" --mode "${mode}"
+}
+
+run_record() {
+  local mode="$1"
+  local data_file="${OUT_DIR}/${mode}.data"
+  local report_file="${OUT_DIR}/${mode}.report.txt"
+  local zvec_report_file="${OUT_DIR}/${mode}.zvec_only.report.txt"
+
+  echo
+  echo "============================================================"
+  echo "perf record: ${mode}"
+  echo "============================================================"
+  echo "perf.data: ${data_file}"
+  echo "report:    ${report_file}"
+  echo "zvec-only: ${zvec_report_file}"
+
+  taskset -c "${CPU_CORE}" perf record -F "${RECORD_FREQ}" -g \
+    --call-graph "${CALL_GRAPH_MODE}" -o "${data_file}" -- \
+    "${COMMON_ARGS[@]}" --mode "${mode}"
+
+  perf report --stdio --no-children -i "${data_file}" --percent-limit 0.3 \
+    > "${report_file}"
+  sed -n "1,${TOPN}p" "${report_file}"
+
+  perf report --stdio --no-children -i "${data_file}" \
+    --sort dso,symbol --percent-limit 0.05 > "${zvec_report_file}"
+  sed -n "1,${TOPN}p" "${zvec_report_file}"
+}
+
+run_mode() {
+  local mode="$1"
+  run_stat "${mode}"
+  run_record "${mode}"
+}
+
+case "${MODE_FILTER}" in
+  all)
+    run_mode fast
+    run_mode empty
+    run_mode omega
+    ;;
+  fast|empty|omega)
+    run_mode "${MODE_FILTER}"
+    ;;
+  *)
+    echo "Unsupported MODE_FILTER: ${MODE_FILTER}" >&2
+    exit 1
+    ;;
+esac
diff --git a/tools/core/CMakeLists.txt b/tools/core/CMakeLists.txt
index ecb1c2874..cfcbd550a 100644
--- a/tools/core/CMakeLists.txt
+++ b/tools/core/CMakeLists.txt
@@ -1,6 +1,25 @@
 include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/option.cmake)
 
+set(ZVEC_TOOL_CORE_INTERFACE_LIBS
+    core_framework
+    core_metric
+    core_quantizer
+    core_utility
+    core_knn_flat
+    core_knn_flat_sparse
+    core_knn_hnsw
+    core_knn_hnsw_sparse
+    core_knn_cluster
+    core_knn_ivf
+    core_interface
+)
+
+set(ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS core_mix_reducer)
+if(ZVEC_ENABLE_OMEGA)
+  list(APPEND ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS core_knn_omega)
+endif()
+
 cc_binary(
     NAME txt2vecs
     STRICT PACKED
@@ -14,7 +33,7 @@ cc_binary(
     STRICT PACKED
     SRCS local_builder.cc
     INCS ${PROJECT_ROOT_DIR}/src/core/
-    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_cluster core_knn_ivf core_interface
+    LIBS gflags yaml-cpp magic_enum ${ZVEC_TOOL_CORE_INTERFACE_LIBS} ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
   )
 
 cc_binary(
@@ -22,7 +41,7 @@ cc_binary(
     STRICT PACKED
     SRCS recall.cc
     INCS ${PROJECT_ROOT_DIR}/src/core/
-    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_cluster core_knn_ivf roaring core_interface
+    LIBS gflags yaml-cpp magic_enum roaring ${ZVEC_TOOL_CORE_INTERFACE_LIBS} ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
   )
 
 cc_binary(
@@ -30,7 +49,7 @@ cc_binary(
     STRICT PACKED
     SRCS bench.cc
     INCS ${PROJECT_ROOT_DIR}/src/core/
-    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_cluster core_knn_ivf roaring core_interface
+    LIBS gflags yaml-cpp magic_enum roaring ${ZVEC_TOOL_CORE_INTERFACE_LIBS} ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
 )
 
 cc_binary(
@@ -47,7 +66,7 @@ cc_binary(
     STRICT PACKED
     SRCS hnsw_hooks_microbench.cc
     INCS ${PROJECT_ROOT_DIR}/src/core/ ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
-    LIBS magic_enum core_framework core_metric core_quantizer core_utility core_knn_hnsw omega core_interface
+    LIBS magic_enum core_framework core_metric core_quantizer core_utility core_knn_hnsw omega core_interface ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
 )
 endif()
 
@@ -56,7 +75,7 @@ cc_binary(
     STRICT PACKED
     SRCS recall_original.cc flow.cc
     INCS ${PROJECT_ROOT_DIR}/src/core/
-    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_cluster core_knn_ivf roaring core_interface
+    LIBS gflags yaml-cpp magic_enum roaring ${ZVEC_TOOL_CORE_INTERFACE_LIBS} ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
 )
 
 cc_binary(
@@ -64,7 +83,7 @@ cc_binary(
     STRICT PACKED
     SRCS bench_original.cc flow.cc
     INCS ${PROJECT_ROOT_DIR}/src/core/
-    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_cluster core_knn_ivf roaring core_interface
+    LIBS gflags yaml-cpp magic_enum roaring ${ZVEC_TOOL_CORE_INTERFACE_LIBS} ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
 )
 
 cc_binary(
@@ -72,5 +91,5 @@ cc_binary(
         STRICT PACKED
         SRCS local_builder_original.cc
         INCS ${PROJECT_ROOT_DIR}/src/core/
-        LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_cluster core_knn_ivf core_interface
+        LIBS gflags yaml-cpp magic_enum ${ZVEC_TOOL_CORE_INTERFACE_LIBS} ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
 )
diff --git a/tools/core/hnsw_hooks_microbench.cc b/tools/core/hnsw_hooks_microbench.cc
index f61ebc732..719d09fc4 100644
--- a/tools/core/hnsw_hooks_microbench.cc
+++ b/tools/core/hnsw_hooks_microbench.cc
@@ -23,7 +23,9 @@
 #include <string>
 #include <vector>
 
+#include "zvec/ailego/container/params.h"
 #include "zvec/core/framework/index_factory.h"
+#include "zvec/core/framework/index_helper.h"
 #include "utility/rdtsc_timer.h"
 #include "algorithm/hnsw/hnsw_context.h"
 #include "algorithm/hnsw/hnsw_params.h"
@@ -36,6 +38,7 @@ namespace core {
 namespace {
 
 struct Options {
+  std::string mode = "all";
   std::string index_path;
   uint32_t ef_search = 180;
   uint32_t topk = 100;
@@ -51,6 +54,7 @@ void PrintUsage(const char* argv0) {
   std::cerr
       << "Usage: " << argv0 << " --index-path <path> [options]\n"
       << "Options:\n"
+      << "  --mode <name>        all|fast|empty|omega\n"
       << "  --ef-search <n>      HNSW ef_search\n"
       << "  --topk <n>           Search topk\n"
       << "  --query-count <n>    Number of sampled queries\n"
@@ -66,6 +70,8 @@ bool ParseArgs(int argc, char** argv, Options* opts) {
     const char* arg = argv[i];
     if (std::strcmp(arg, "--index-path") == 0 && i + 1 < argc) {
       opts->index_path = argv[++i];
+    } else if (std::strcmp(arg, "--mode") == 0 && i + 1 < argc) {
+      opts->mode = argv[++i];
     } else if (std::strcmp(arg, "--ef-search") == 0 && i + 1 < argc) {
       opts->ef_search = static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
     } else if (std::strcmp(arg, "--topk") == 0 && i + 1 < argc) {
@@ -104,19 +110,31 @@ bool ParseArgs(int argc, char** argv, Options* opts) {
   opts->iterations = std::max(1u, opts->iterations);
   opts->topk = std::max(1u, opts->topk);
   opts->window_size = std::max(1, opts->window_size);
+  if (opts->mode != "all" && opts->mode != "fast" &&
+      opts->mode != "empty" && opts->mode != "omega") {
+    std::cerr << "Invalid --mode: " << opts->mode << "\n";
+    PrintUsage(argv[0]);
+    return false;
+  }
   return true;
 }
 
 class ExposedHnswSearcher : public HnswSearcher {
  public:
-  int Init(const ailego::Params& params) { return HnswSearcher::init(params); }
+  int Init(const ailego::Params& params) {
+    return HnswSearcher::init(params);
+  }
 
-  int Load(IndexStorage::Pointer container) {
-    return HnswSearcher::load(std::move(container), nullptr);
+  int Load(IndexStorage::Pointer storage) {
+    return HnswSearcher::load(std::move(storage), nullptr);
   }
 
   ContextPointer CreateContext() const { return HnswSearcher::create_context(); }
 
+  IndexProvider::Pointer CreateProvider() const {
+    return HnswSearcher::create_provider();
+  }
+
   int FastSearch(HnswContext* ctx) const { return fast_search(ctx); }
 
   int FastSearchWithHooks(HnswContext* ctx,
@@ -163,25 +181,34 @@ struct BenchStats {
   double checksum{0.0};
 };
 
-std::vector<const void*> SampleIndexVectors(HnswContext* ctx, const IndexMeta& meta,
+std::vector<const void*> SampleIndexVectors(const IndexProvider::Pointer& provider,
+                                            const IndexMeta& meta,
                                             uint32_t count, uint32_t seed) {
-  const auto& entity = ctx->get_entity();
-  const uint32_t doc_cnt = static_cast<uint32_t>(entity.doc_cnt());
-  std::vector<uint32_t> ids(doc_cnt);
+  const uint32_t doc_cnt = static_cast<uint32_t>(provider->count());
+  std::vector<const void*> all_queries;
+  all_queries.reserve(doc_cnt);
+
+  auto it = provider->create_iterator();
+  for (; it && it->is_valid(); it->next()) {
+    all_queries.push_back(it->data());
+  }
+
+  std::vector<uint32_t> ids(all_queries.size());
   std::iota(ids.begin(), ids.end(), 0u);
   std::mt19937 rng(seed);
   std::shuffle(ids.begin(), ids.end(), rng);
 
-  count = std::min(count, doc_cnt);
+  count = std::min<uint32_t>(count, static_cast<uint32_t>(all_queries.size()));
   std::vector<const void*> queries;
   queries.reserve(count);
   for (uint32_t i = 0; i < count; ++i) {
-    queries.push_back(entity.get_vector(ids[i]));
+    queries.push_back(all_queries[ids[i]]);
   }
 
   std::cout << "Using " << queries.size() << " sampled in-index queries"
             << " element_size=" << meta.element_size()
-            << " doc_cnt=" << doc_cnt << "\n";
+            << " doc_cnt=" << doc_cnt
+            << " valid_vectors=" << all_queries.size() << "\n";
   return queries;
 }
 
@@ -244,39 +271,50 @@ int main(int argc, char** argv) {
     return 1;
   }
 
-  ailego::Params params;
-  params.set(PARAM_HNSW_SEARCHER_EF, opts.ef_search);
-
-  ExposedHnswSearcher searcher;
-  if (searcher.Init(params) != 0) {
-    std::cerr << "Failed to init HNSW searcher\n";
+  auto storage = IndexFactory::CreateStorage("MMapFileReadStorage");
+  if (!storage || storage->open(opts.index_path, false) != 0) {
+    std::cerr << "Failed to open index storage: " << opts.index_path << "\n";
     return 2;
   }
 
-  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
-  if (!storage || storage->open(opts.index_path, false) != 0) {
-    std::cerr << "Failed to open index storage: " << opts.index_path << "\n";
+  IndexMeta meta;
+  if (IndexHelper::DeserializeFromStorage(storage.get(), &meta) != 0) {
+    std::cerr << "Failed to deserialize index meta from storage\n";
     return 3;
   }
 
+  zvec::ailego::Params params = meta.searcher_params();
+  params.set(PARAM_HNSW_SEARCHER_EF, opts.ef_search);
+
+  ExposedHnswSearcher searcher;
+  if (searcher.Init(params) != 0) {
+    std::cerr << "Failed to init HNSW searcher\n";
+    return 4;
+  }
   if (searcher.Load(storage) != 0) {
     std::cerr << "Failed to load HNSW searcher\n";
-    return 4;
+    return 5;
   }
 
   auto context = searcher.CreateContext();
   auto* ctx = dynamic_cast<HnswContext*>(context.get());
   if (ctx == nullptr) {
     std::cerr << "Failed to create HNSW context\n";
-    return 5;
+    return 6;
   }
   ctx->set_topk(opts.topk);
 
-  auto queries = SampleIndexVectors(ctx, searcher.MetaPublic(), opts.query_count,
-                                    opts.seed);
+  auto provider = searcher.CreateProvider();
+  if (!provider) {
+    std::cerr << "Failed to create HNSW provider\n";
+    return 7;
+  }
+
+  auto queries = SampleIndexVectors(provider, searcher.MetaPublic(),
+                                    opts.query_count, opts.seed);
   if (queries.empty()) {
     std::cerr << "No queries sampled from index\n";
-    return 6;
+    return 8;
   }
 
   HnswAlgorithm::SearchHooks empty_hooks;
@@ -293,24 +331,29 @@ int main(int argc, char** argv) {
   omega_hooks.on_hop = OnOmegaHop;
   omega_hooks.on_visit_candidate = OnOmegaVisitCandidate;
 
-  RunBench("alg_fast_search", ctx, queries, opts.warmup, opts.iterations, [&]() {
-    return searcher.FastSearch(ctx);
-  });
-
-  RunBench("alg_fast_search_with_empty_hooks", ctx, queries, opts.warmup,
-           opts.iterations, [&]() {
-             bool stopped_early = false;
-             return searcher.FastSearchWithHooks(ctx, &empty_hooks,
-                                                &stopped_early);
-           });
-
-  RunBench("alg_fast_search_with_omega_hooks_only", ctx, queries, opts.warmup,
-           opts.iterations, [&]() {
-             omega_search_ctx.Reset();
-             bool stopped_early = false;
-             return searcher.FastSearchWithHooks(ctx, &omega_hooks,
-                                                &stopped_early);
-           });
+  if (opts.mode == "all" || opts.mode == "fast") {
+    RunBench("alg_fast_search", ctx, queries, opts.warmup, opts.iterations,
+             [&]() { return searcher.FastSearch(ctx); });
+  }
+
+  if (opts.mode == "all" || opts.mode == "empty") {
+    RunBench("alg_fast_search_with_empty_hooks", ctx, queries, opts.warmup,
+             opts.iterations, [&]() {
+               bool stopped_early = false;
+               return searcher.FastSearchWithHooks(ctx, &empty_hooks,
+                                                  &stopped_early);
+             });
+  }
+
+  if (opts.mode == "all" || opts.mode == "omega") {
+    RunBench("alg_fast_search_with_omega_hooks_only", ctx, queries, opts.warmup,
+             opts.iterations, [&]() {
+               omega_search_ctx.Reset();
+               bool stopped_early = false;
+               return searcher.FastSearchWithHooks(ctx, &omega_hooks,
+                                                  &stopped_early);
+             });
+  }
 
   return 0;
 }

From 97cd0f6358ceacb383bcd481d409a70eb9519d34 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Tue, 24 Mar 2026 17:31:09 +0800
Subject: [PATCH 037/126] Fix HNSW hooks microbench benchmark loading

---
 scripts/perf_hnsw_hooks_microbench.sh         |  98 ++++++--
 src/core/algorithm/hnsw/hnsw_streamer.cc      |  10 +
 src/core/algorithm/hnsw/hnsw_streamer.h       |   6 +
 .../vector_column/vector_column_indexer.h     |   4 +
 tools/core/CMakeLists.txt                     |   4 +-
 tools/core/hnsw_hooks_microbench.cc           | 212 +++++++++++++-----
 6 files changed, 258 insertions(+), 76 deletions(-)

diff --git a/scripts/perf_hnsw_hooks_microbench.sh b/scripts/perf_hnsw_hooks_microbench.sh
index a3cdb7507..7c2eb8d90 100755
--- a/scripts/perf_hnsw_hooks_microbench.sh
+++ b/scripts/perf_hnsw_hooks_microbench.sh
@@ -5,7 +5,8 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ZVEC_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
 
 BIN="${BIN:-${ZVEC_ROOT}/build/bin/hnsw_hooks_microbench}"
-INDEX_PATH="${INDEX_PATH:-${ZVEC_ROOT}/benchmark_results/cohere_1m_hnsw/0/dense.qindex.5.proxima}"
+DEFAULT_INDEX_DIR="${ZVEC_ROOT}/benchmark_results/cohere_1m_hnsw"
+INDEX_PATH="${INDEX_PATH:-}"
 OUT_DIR="${OUT_DIR:-${ZVEC_ROOT}/perf_results/hnsw_hooks_microbench}"
 
 CPU_CORE="${CPU_CORE:-0}"
@@ -35,25 +36,83 @@ if [[ ! -x "${BIN}" ]]; then
   exit 1
 fi
 
-if [[ ! -f "${INDEX_PATH}" ]]; then
-  echo "index file not found: ${INDEX_PATH}" >&2
-  exit 1
-fi
-
 mkdir -p "${OUT_DIR}"
 
-COMMON_ARGS=(
-  "${BIN}"
-  --index-path "${INDEX_PATH}"
-  --ef-search "${EF_SEARCH}"
-  --topk "${TOPK}"
-  --query-count "${QUERY_COUNT}"
-  --warmup "${WARMUP}"
-  --iterations "${ITERATIONS}"
-  --window-size "${WINDOW_SIZE}"
-  --target-recall "${TARGET_RECALL}"
-  --seed "${SEED}"
-)
+build_common_args() {
+  COMMON_ARGS=(
+    "${BIN}"
+    --index-path "${INDEX_PATH}"
+    --ef-search "${EF_SEARCH}"
+    --topk "${TOPK}"
+    --query-count "${QUERY_COUNT}"
+    --warmup "${WARMUP}"
+    --iterations "${ITERATIONS}"
+    --window-size "${WINDOW_SIZE}"
+    --target-recall "${TARGET_RECALL}"
+    --seed "${SEED}"
+  )
+}
+
+preflight_index() {
+  local candidate="$1"
+  [[ -d "${candidate}" ]] || return 1
+
+  local output
+  if ! output="$("${BIN}" \
+      --index-path "${candidate}" \
+      --ef-search "${EF_SEARCH}" \
+      --topk "${TOPK}" \
+      --query-count 8 \
+      --warmup 2 \
+      --iterations 4 \
+      --window-size "${WINDOW_SIZE}" \
+      --target-recall "${TARGET_RECALL}" \
+      --seed "${SEED}" \
+      --mode fast 2>&1)"; then
+    return 1
+  fi
+
+  if [[ "${output}" != *"doc_cnt="* ]] || [[ "${output}" == *"doc_cnt=0"* ]]; then
+    return 1
+  fi
+
+  echo "${output}"
+  return 0
+}
+
+detect_index_path() {
+  if [[ -n "${INDEX_PATH}" ]]; then
+    if [[ ! -d "${INDEX_PATH}" ]]; then
+      echo "index dir not found: ${INDEX_PATH}" >&2
+      exit 1
+    fi
+    local output
+    if ! output="$(preflight_index "${INDEX_PATH}")"; then
+      echo "preflight failed for INDEX_PATH=${INDEX_PATH}" >&2
+      exit 1
+    fi
+    echo "Using user-provided INDEX_PATH=${INDEX_PATH}"
+    echo "${output}"
+    return
+  fi
+
+  local candidates=(
+    "${DEFAULT_INDEX_DIR}"
+  )
+  local candidate
+  for candidate in "${candidates[@]}"; do
+    local output
+    if output="$(preflight_index "${candidate}")"; then
+      INDEX_PATH="${candidate}"
+      echo "Auto-detected INDEX_PATH=${INDEX_PATH}"
+      echo "${output}"
+      return
+    fi
+  done
+
+  echo "Failed to auto-detect a valid index file under ${DEFAULT_INDEX_DIR}" >&2
+  exit 1
+}
 
 run_stat() {
   local mode="$1"
@@ -98,6 +157,9 @@ run_mode() {
   run_record "${mode}"
 }
 
+detect_index_path
+build_common_args
+
 case "${MODE_FILTER}" in
   all)
     run_mode fast
diff --git a/src/core/algorithm/hnsw/hnsw_streamer.cc b/src/core/algorithm/hnsw/hnsw_streamer.cc
index a6f2e8f27..4bb90013f 100644
--- a/src/core/algorithm/hnsw/hnsw_streamer.cc
+++ b/src/core/algorithm/hnsw/hnsw_streamer.cc
@@ -81,6 +81,16 @@ HnswStreamer::~HnswStreamer() {
   }
 }
 
+int HnswStreamer::FastSearch(HnswContext *ctx) const {
+  return alg_->fast_search(ctx);
+}
+
+int HnswStreamer::FastSearchWithHooks(
+    HnswContext *ctx, const HnswAlgorithm::SearchHooks *hooks,
+    bool *stopped_early) const {
+  return alg_->fast_search_with_hooks(ctx, hooks, stopped_early);
+}
+
 int HnswStreamer::init(const IndexMeta &imeta, const ailego::Params &params) {
   meta_ = imeta;
   meta_.set_streamer("HnswStreamer", HnswEntity::kRevision, params);
diff --git a/src/core/algorithm/hnsw/hnsw_streamer.h b/src/core/algorithm/hnsw/hnsw_streamer.h
index 0ab9c88a7..efa8940dd 100644
--- a/src/core/algorithm/hnsw/hnsw_streamer.h
+++ b/src/core/algorithm/hnsw/hnsw_streamer.h
@@ -31,6 +31,12 @@ class HnswStreamer : public IndexStreamer {
   HnswStreamer(const HnswStreamer &streamer) = delete;
   HnswStreamer &operator=(const HnswStreamer &streamer) = delete;
 
+  int FastSearch(HnswContext *ctx) const;
+
+  int FastSearchWithHooks(HnswContext *ctx,
+                          const HnswAlgorithm::SearchHooks *hooks,
+                          bool *stopped_early) const;
+
  protected:
   //! Initialize Streamer
   virtual int init(const IndexMeta &imeta,
diff --git a/src/db/index/column/vector_column/vector_column_indexer.h b/src/db/index/column/vector_column/vector_column_indexer.h
index 98f4a365b..e7d6eb590 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.h
+++ b/src/db/index/column/vector_column/vector_column_indexer.h
@@ -165,6 +165,10 @@ class VectorColumnIndexer {
     return index_file_path_;
   }
 
+  core_interface::Index::Pointer core_index() const {
+    return index;
+  }
+
   size_t doc_count() const {
     if (index == nullptr) {
       return -1;
diff --git a/tools/core/CMakeLists.txt b/tools/core/CMakeLists.txt
index cfcbd550a..4b3a03dc5 100644
--- a/tools/core/CMakeLists.txt
+++ b/tools/core/CMakeLists.txt
@@ -65,8 +65,8 @@ cc_binary(
     NAME hnsw_hooks_microbench
     STRICT PACKED
     SRCS hnsw_hooks_microbench.cc
-    INCS ${PROJECT_ROOT_DIR}/src/core/ ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
-    LIBS magic_enum core_framework core_metric core_quantizer core_utility core_knn_hnsw omega core_interface ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
+    INCS ${PROJECT_ROOT_DIR}/src/core/ ${PROJECT_ROOT_DIR}/src/db/ ${PROJECT_BINARY_DIR}/src/db/ ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
+    LIBS magic_enum core_framework core_metric core_quantizer core_utility core_knn_hnsw omega core_interface zvec_index ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
 )
 endif()
 
diff --git a/tools/core/hnsw_hooks_microbench.cc b/tools/core/hnsw_hooks_microbench.cc
index 719d09fc4..2c2c742fc 100644
--- a/tools/core/hnsw_hooks_microbench.cc
+++ b/tools/core/hnsw_hooks_microbench.cc
@@ -16,6 +16,7 @@
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
+#include <filesystem>
 #include <iomanip>
 #include <iostream>
 #include <numeric>
@@ -24,12 +25,13 @@
 #include <vector>
 
 #include "zvec/ailego/container/params.h"
-#include "zvec/core/framework/index_factory.h"
-#include "zvec/core/framework/index_helper.h"
 #include "utility/rdtsc_timer.h"
 #include "algorithm/hnsw/hnsw_context.h"
 #include "algorithm/hnsw/hnsw_params.h"
-#include "algorithm/hnsw/hnsw_searcher.h"
+#include "algorithm/hnsw/hnsw_streamer.h"
+#include "db/common/file_helper.h"
+#include "db/index/column/vector_column/vector_column_indexer.h"
+#include "db/index/common/version_manager.h"
 #include "omega/search_context.h"
 
 namespace zvec {
@@ -37,9 +39,14 @@ namespace core {
 
 namespace {
 
+namespace fs = std::filesystem;
+
 struct Options {
   std::string mode = "all";
   std::string index_path;
+  uint32_t dimension = 768;
+  uint32_t m = 15;
+  uint32_t ef_construction = 500;
   uint32_t ef_search = 180;
   uint32_t topk = 100;
   uint32_t query_count = 1000;
@@ -55,6 +62,9 @@ void PrintUsage(const char* argv0) {
       << "Usage: " << argv0 << " --index-path <path> [options]\n"
       << "Options:\n"
       << "  --mode <name>        all|fast|empty|omega\n"
+      << "  --dimension <n>      Vector dimension\n"
+      << "  --m <n>              HNSW max neighbors\n"
+      << "  --ef-construction <n> HNSW ef_construction\n"
       << "  --ef-search <n>      HNSW ef_search\n"
       << "  --topk <n>           Search topk\n"
       << "  --query-count <n>    Number of sampled queries\n"
@@ -62,7 +72,11 @@ void PrintUsage(const char* argv0) {
       << "  --warmup <n>         Number of warmup iterations\n"
       << "  --seed <n>           RNG seed for sampled queries\n"
       << "  --window-size <n>    OMEGA window size for hooks-only mode\n"
-      << "  --target-recall <f>  OMEGA target recall for hooks-only mode\n";
+      << "  --target-recall <f>  OMEGA target recall for hooks-only mode\n"
+      << "\n"
+      << "--index-path accepts either the benchmark index directory\n"
+      << "(for example .../cohere_1m_hnsw) or a file under its segment\n"
+      << "subdirectory such as .../0/dense.qindex.5.proxima.\n";
 }
 
 bool ParseArgs(int argc, char** argv, Options* opts) {
@@ -72,6 +86,14 @@ bool ParseArgs(int argc, char** argv, Options* opts) {
       opts->index_path = argv[++i];
     } else if (std::strcmp(arg, "--mode") == 0 && i + 1 < argc) {
       opts->mode = argv[++i];
+    } else if (std::strcmp(arg, "--dimension") == 0 && i + 1 < argc) {
+      opts->dimension =
+          static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
+    } else if (std::strcmp(arg, "--m") == 0 && i + 1 < argc) {
+      opts->m = static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
+    } else if (std::strcmp(arg, "--ef-construction") == 0 && i + 1 < argc) {
+      opts->ef_construction =
+          static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
     } else if (std::strcmp(arg, "--ef-search") == 0 && i + 1 < argc) {
       opts->ef_search = static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
     } else if (std::strcmp(arg, "--topk") == 0 && i + 1 < argc) {
@@ -119,33 +141,6 @@ bool ParseArgs(int argc, char** argv, Options* opts) {
   return true;
 }
 
-class ExposedHnswSearcher : public HnswSearcher {
- public:
-  int Init(const ailego::Params& params) {
-    return HnswSearcher::init(params);
-  }
-
-  int Load(IndexStorage::Pointer storage) {
-    return HnswSearcher::load(std::move(storage), nullptr);
-  }
-
-  ContextPointer CreateContext() const { return HnswSearcher::create_context(); }
-
-  IndexProvider::Pointer CreateProvider() const {
-    return HnswSearcher::create_provider();
-  }
-
-  int FastSearch(HnswContext* ctx) const { return fast_search(ctx); }
-
-  int FastSearchWithHooks(HnswContext* ctx,
-                          const HnswAlgorithm::SearchHooks* hooks,
-                          bool* stopped_early) const {
-    return fast_search_with_hooks(ctx, hooks, stopped_early);
-  }
-
-  const IndexMeta& MetaPublic() const { return meta(); }
-};
-
 struct OmegaHookState {
   omega::SearchContext* search_ctx{nullptr};
   bool enable_early_stopping{false};
@@ -181,6 +176,37 @@ struct BenchStats {
   double checksum{0.0};
 };
 
+std::string NormalizeIndexPath(const std::string& input_path) {
+  if (input_path.empty()) {
+    return input_path;
+  }
+
+  fs::path path(input_path);
+  std::error_code ec;
+  if (!fs::exists(path, ec)) {
+    return input_path;
+  }
+
+  if (fs::is_regular_file(path, ec)) {
+    auto segment_dir = path.parent_path();
+    auto index_root = segment_dir.parent_path();
+    if (!segment_dir.empty() && segment_dir.filename() == "0" &&
+        !index_root.empty()) {
+      return index_root.string();
+    }
+    return segment_dir.string();
+  }
+
+  if (fs::is_directory(path, ec) && path.filename() == "0") {
+    auto index_root = path.parent_path();
+    if (!index_root.empty()) {
+      return index_root.string();
+    }
+  }
+
+  return path.string();
+}
+
 std::vector<const void*> SampleIndexVectors(const IndexProvider::Pointer& provider,
                                             const IndexMeta& meta,
                                             uint32_t count, uint32_t seed) {
@@ -212,6 +238,78 @@ std::vector<const void*> SampleIndexVectors(const IndexProvider::Pointer& provid
   return queries;
 }
 
+bool OpenBenchmarkVectorIndexer(const std::string& benchmark_root,
+                                VectorColumnIndexer::Ptr* out,
+                                std::string* error) {
+  auto version_manager_result = VersionManager::Recovery(benchmark_root);
+  if (!version_manager_result.has_value()) {
+    *error = version_manager_result.error().message();
+    return false;
+  }
+
+  auto version_manager = version_manager_result.value();
+  const Version version = version_manager->get_current_version();
+  const auto& schema = version.schema();
+  const auto vector_fields = schema.vector_fields();
+  if (vector_fields.empty()) {
+    *error = "No vector field found in benchmark root";
+    return false;
+  }
+
+  const FieldSchema* field = vector_fields.front().get();
+  if (field == nullptr) {
+    *error = "Invalid vector field in benchmark root";
+    return false;
+  }
+
+  std::string index_file_path;
+  uint32_t best_doc_count = 0;
+  bool found_quantized = false;
+
+  for (const auto& segment_meta : version.persisted_segment_metas()) {
+    for (const auto& block : segment_meta->persisted_blocks()) {
+      const bool match_field = block.contain_column(field->name());
+      const bool is_quantized = block.type() == BlockType::VECTOR_INDEX_QUANTIZE;
+      const bool is_plain = block.type() == BlockType::VECTOR_INDEX;
+      if (!match_field || (!is_quantized && !is_plain)) {
+        continue;
+      }
+
+      if (is_quantized && (!found_quantized || block.doc_count() > best_doc_count)) {
+        index_file_path = FileHelper::MakeQuantizeVectorIndexPath(
+            benchmark_root, field->name(), segment_meta->id(), block.id());
+        best_doc_count = block.doc_count();
+        found_quantized = true;
+        continue;
+      }
+
+      if (!found_quantized && block.doc_count() > best_doc_count) {
+        index_file_path = FileHelper::MakeVectorIndexPath(
+            benchmark_root, field->name(), segment_meta->id(), block.id());
+        best_doc_count = block.doc_count();
+      }
+    }
+  }
+
+  if (index_file_path.empty()) {
+    *error = "No HNSW vector index file found under benchmark root";
+    return false;
+  }
+
+  auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path, *field);
+  auto status = indexer->Open({true, false, true});
+  if (!status.ok()) {
+    *error = status.message();
+    return false;
+  }
+
+  std::cout << "Opened benchmark root: " << benchmark_root << "\n"
+            << "Selected vector index file: " << index_file_path << "\n"
+            << "Vector field: " << field->name() << "\n";
+  *out = std::move(indexer);
+  return true;
+}
+
 template <typename Fn>
 BenchStats RunBench(const std::string& name, HnswContext* ctx,
                     const std::vector<const void*>& queries, uint32_t warmup,
@@ -265,52 +363,54 @@ BenchStats RunBench(const std::string& name, HnswContext* ctx,
 
 int main(int argc, char** argv) {
   using namespace zvec::core;
+  using namespace zvec;
 
   Options opts;
   if (!ParseArgs(argc, argv, &opts)) {
     return 1;
   }
+  opts.index_path = NormalizeIndexPath(opts.index_path);
 
-  auto storage = IndexFactory::CreateStorage("MMapFileReadStorage");
-  if (!storage || storage->open(opts.index_path, false) != 0) {
-    std::cerr << "Failed to open index storage: " << opts.index_path << "\n";
+  VectorColumnIndexer::Ptr indexer;
+  std::string open_error;
+  if (!OpenBenchmarkVectorIndexer(opts.index_path, &indexer, &open_error)) {
+    std::cerr << "Failed to open benchmark index: " << open_error << "\n";
     return 2;
   }
-
-  IndexMeta meta;
-  if (IndexHelper::DeserializeFromStorage(storage.get(), &meta) != 0) {
-    std::cerr << "Failed to deserialize index meta from storage\n";
+  auto index = indexer->core_index();
+  if (!index) {
+    std::cerr << "Opened benchmark indexer without underlying core index\n";
     return 3;
   }
 
-  zvec::ailego::Params params = meta.searcher_params();
-  params.set(PARAM_HNSW_SEARCHER_EF, opts.ef_search);
-
-  ExposedHnswSearcher searcher;
-  if (searcher.Init(params) != 0) {
-    std::cerr << "Failed to init HNSW searcher\n";
+  auto streamer_base = index->index_searcher();
+  auto* streamer = dynamic_cast<HnswStreamer*>(streamer_base.get());
+  if (streamer == nullptr) {
+    std::cerr << "Failed to get HnswStreamer from opened index\n";
     return 4;
   }
-  if (searcher.Load(storage) != 0) {
-    std::cerr << "Failed to load HNSW searcher\n";
-    return 5;
-  }
 
-  auto context = searcher.CreateContext();
+  auto context = streamer_base->create_context();
   auto* ctx = dynamic_cast<HnswContext*>(context.get());
   if (ctx == nullptr) {
     std::cerr << "Failed to create HNSW context\n";
+    return 5;
+  }
+  zvec::ailego::Params query_params;
+  query_params.set(PARAM_HNSW_STREAMER_EF, opts.ef_search);
+  if (ctx->update(query_params) != 0) {
+    std::cerr << "Failed to update HNSW query params\n";
     return 6;
   }
   ctx->set_topk(opts.topk);
 
-  auto provider = searcher.CreateProvider();
+  auto provider = streamer_base->create_provider();
   if (!provider) {
     std::cerr << "Failed to create HNSW provider\n";
     return 7;
   }
 
-  auto queries = SampleIndexVectors(provider, searcher.MetaPublic(),
+  auto queries = SampleIndexVectors(provider, streamer_base->meta(),
                                     opts.query_count, opts.seed);
   if (queries.empty()) {
     std::cerr << "No queries sampled from index\n";
@@ -333,15 +433,15 @@ int main(int argc, char** argv) {
 
   if (opts.mode == "all" || opts.mode == "fast") {
     RunBench("alg_fast_search", ctx, queries, opts.warmup, opts.iterations,
-             [&]() { return searcher.FastSearch(ctx); });
+             [&]() { return streamer->FastSearch(ctx); });
   }
 
   if (opts.mode == "all" || opts.mode == "empty") {
     RunBench("alg_fast_search_with_empty_hooks", ctx, queries, opts.warmup,
              opts.iterations, [&]() {
                bool stopped_early = false;
-               return searcher.FastSearchWithHooks(ctx, &empty_hooks,
-                                                  &stopped_early);
+               return streamer->FastSearchWithHooks(ctx, &empty_hooks,
+                                                    &stopped_early);
              });
   }
 
@@ -350,8 +450,8 @@ int main(int argc, char** argv) {
              opts.iterations, [&]() {
                omega_search_ctx.Reset();
                bool stopped_early = false;
-               return searcher.FastSearchWithHooks(ctx, &omega_hooks,
-                                                  &stopped_early);
+               return streamer->FastSearchWithHooks(ctx, &omega_hooks,
+                                                    &stopped_early);
              });
   }
 

From bbd32595edd344f6a7896ba2c2fb42de0af7eebb Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 25 Mar 2026 04:39:55 +0800
Subject: [PATCH 038/126] Reduce omega hook bookkeeping overhead

---
 src/core/algorithm/hnsw/hnsw_algorithm.cc  | 40 +++++++++++++++-------
 src/core/algorithm/hnsw/hnsw_algorithm.h   |  2 +-
 src/core/algorithm/omega/omega_searcher.cc |  4 +--
 src/core/algorithm/omega/omega_streamer.cc |  4 +--
 thirdparty/omega/OMEGALib                  |  2 +-
 5 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.cc b/src/core/algorithm/hnsw/hnsw_algorithm.cc
index bad11d7c5..dfbd95a3b 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.cc
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.cc
@@ -240,12 +240,21 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
     return result;
   };
 
+  const uint32_t result_topk_limit = ctx->topk();
+  const bool track_hook_result_topk =
+      hooks != nullptr && hooks->on_visit_candidate != nullptr &&
+      result_topk_limit > 0;
+  TopkHeap hook_result_topk(result_topk_limit > 0 ? result_topk_limit : 1U);
+
   candidates.clear();
   visit.clear();
   visit.set_visited(*entry_point);
   bool entry_inserted_to_topk = !filter(*entry_point);
   if (entry_inserted_to_topk) {
     topk.emplace(*entry_point, *dist);
+    if (track_hook_result_topk) {
+      hook_result_topk.emplace(*entry_point, *dist);
+    }
   }
 
   candidates.emplace(*entry_point, *dist);
@@ -327,17 +336,7 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
       dist_t cur_dist = dists[i];
       bool should_consider_candidate =
           (!topk.full()) || cur_dist < topk[0].second;
-
-      if (hooks != nullptr && hooks->on_visit_candidate != nullptr) {
-        bool should_stop = run_timed_hook([&]() {
-          return hooks->on_visit_candidate(node, cur_dist,
-                                           should_consider_candidate,
-                                           hooks->user_data);
-        });
-        if (should_stop) {
-          return true;
-        }
-      }
+      bool inserted_to_topk = false;
 
       if (should_consider_candidate) {
         candidates.emplace(node, cur_dist);
@@ -348,8 +347,25 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
         }
         if (!filter(node)) {
           topk.emplace(node, cur_dist);
+          if (track_hook_result_topk) {
+            inserted_to_topk =
+                !hook_result_topk.full() || cur_dist < hook_result_topk[0].second;
+            if (inserted_to_topk) {
+              hook_result_topk.emplace(node, cur_dist);
+            }
+          }
         }
-      }  // end if
+      }
+
+      if (hooks != nullptr && hooks->on_visit_candidate != nullptr) {
+        bool should_stop = run_timed_hook([&]() {
+          return hooks->on_visit_candidate(node, cur_dist, inserted_to_topk,
+                                           hooks->user_data);
+        });
+        if (should_stop) {
+          return true;
+        }
+      }
     }  // end for
   }  // while
 
diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.h b/src/core/algorithm/hnsw/hnsw_algorithm.h
index 902710345..f17c35778 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.h
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.h
@@ -37,7 +37,7 @@ class HnswAlgorithm {
                             void *user_data){nullptr};
     void (*on_hop)(void *user_data){nullptr};
     bool (*on_visit_candidate)(node_id_t id, dist_t dist,
-                               bool should_consider_candidate,
+                               bool inserted_to_topk,
                                void *user_data){nullptr};
   };
 
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index f67b97cb6..3993ee190 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -56,9 +56,9 @@ void OnOmegaHop(void *user_data) {
 }
 
 bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
-                           bool should_consider_candidate, void *user_data) {
+                           bool inserted_to_topk, void *user_data) {
   auto &state = *static_cast<OmegaHookState *>(user_data);
-  state.search_ctx->ReportVisitCandidate(id, dist, should_consider_candidate);
+  state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
   if (!state.enable_early_stopping) {
     return false;
   }
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 61a66fe85..5793afaa1 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -120,10 +120,10 @@ void OnOmegaHop(void *user_data) {
 }
 
 bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
-                           bool should_consider_candidate, void *user_data) {
+                           bool inserted_to_topk, void *user_data) {
   auto &state = *static_cast<OmegaHookState *>(user_data);
   RunOmegaControlHook(state, [&]() {
-    state.search_ctx->ReportVisitCandidate(id, dist, should_consider_candidate);
+    state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
   });
 
   if (!state.enable_early_stopping) {
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 492e51c0f..3f98b48fb 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 492e51c0fa2d291cee99d4a8a4afdfe512608e7e
+Subproject commit 3f98b48fb190ffcf140b0ad639cebc46bacd6d19

From ca7e2e3049bb675c4f0723245abe4f103291b9a3 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 25 Mar 2026 16:35:14 +0800
Subject: [PATCH 039/126] Update omega traversal window tracking

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 3f98b48fb..ae4952079 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 3f98b48fb190ffcf140b0ad639cebc46bacd6d19
+Subproject commit ae495207941ac62fca8b6a291a5c52cee559e11d

From 6d1da957efcf676c593fbd6f634bbd49f4c4b9ec Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 25 Mar 2026 16:58:51 +0800
Subject: [PATCH 040/126] Revert omega traversal window gating

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index ae4952079..a2c316570 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit ae495207941ac62fca8b6a291a5c52cee559e11d
+Subproject commit a2c316570442b2a03600b4b649bac4ca0fd6af91

From 255fd53cb9f079d6dbadefc718902316af5ec077 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 25 Mar 2026 17:20:17 +0800
Subject: [PATCH 041/126] Fix HNSW query stats logging format

---
 src/core/algorithm/hnsw/hnsw_streamer.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core/algorithm/hnsw/hnsw_streamer.cc b/src/core/algorithm/hnsw/hnsw_streamer.cc
index 4bb90013f..670ddb7bd 100644
--- a/src/core/algorithm/hnsw/hnsw_streamer.cc
+++ b/src/core/algorithm/hnsw/hnsw_streamer.cc
@@ -701,8 +701,8 @@ int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
     if (ShouldLogHnswQueryStats(query_seq)) {
       LOG_INFO("HNSW query stats: query_seq=%llu hook_mode=%s cmps=%zu "
                "pairwise_dist_cnt=%zu pure_search_ms=%.3f latency_ms=%.3f",
-               static_cast<unsigned long long>(query_seq), ctx->get_scan_num(),
-               use_empty_hooks ? "empty" : "none",
+               static_cast<unsigned long long>(query_seq),
+               use_empty_hooks ? "empty" : "none", ctx->get_scan_num(),
                ctx->get_pairwise_dist_num(),
                static_cast<double>(query_search_time_ns) / 1e6,
                static_cast<double>(query_latency_ns) / 1e6);

From 9027bb9743bb44dd92e3aab2f3d4d60be1ff7637 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 25 Mar 2026 17:22:26 +0800
Subject: [PATCH 042/126] Restore benchmark concurrency sweep

---
 scripts/benchmark_cohere_10m.py | 2 +-
 scripts/benchmark_cohere_1m.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index f19913e80..10ddb6497 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -523,7 +523,7 @@ def main():
     EF_SEARCH = 118
     QUANTIZE_TYPE = "int8"
     USE_REFINER = True
-    NUM_CONCURRENCY = "16"
+    NUM_CONCURRENCY = "12,14,16,18,20"
     CONCURRENCY_DURATION = 30
     K = 100
 
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
index fa8583e07..187b78070 100755
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -530,7 +530,7 @@ def main():
     M = 15
     EF_SEARCH = 180
     QUANTIZE_TYPE = "int8"
-    NUM_CONCURRENCY = "16"
+    NUM_CONCURRENCY = "12,14,16,18,20"
     CONCURRENCY_DURATION = 30
     K = 100
 

From 6f3d2b4946b1b8b122f63174cb50718c6504bca6 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 25 Mar 2026 17:47:46 +0800
Subject: [PATCH 043/126] Add configurable HNSW vs OMEGA benchmark runner

---
 scripts/benchmark_hnsw_vs_omega.json | 107 ++++
 scripts/benchmark_hnsw_vs_omega.py   | 902 +++++++++++++++++++++++++++
 2 files changed, 1009 insertions(+)
 create mode 100644 scripts/benchmark_hnsw_vs_omega.json
 create mode 100644 scripts/benchmark_hnsw_vs_omega.py

diff --git a/scripts/benchmark_hnsw_vs_omega.json b/scripts/benchmark_hnsw_vs_omega.json
new file mode 100644
index 000000000..e8a8b5914
--- /dev/null
+++ b/scripts/benchmark_hnsw_vs_omega.json
@@ -0,0 +1,107 @@
+{
+  "cohere_1m": {
+    "common": {
+      "case_type": "Performance768D1M",
+      "num_concurrency": "12,14,16,18,20",
+      "concurrency_duration": 30,
+      "k": 100,
+      "m": 15,
+      "ef_search": 180,
+      "quantize_type": "int8"
+    },
+    "hnsw": {
+      "path": "cohere_1m_hnsw",
+      "db_label": "16c64g-v0.1",
+      "args": {}
+    },
+    "omega": {
+      "path": "cohere_1m_omega",
+      "db_label": "omega-m15-ef180-int8",
+      "target_recalls": [
+        0.91
+      ],
+      "args": {
+        "min_vector_threshold": 100000,
+        "num_training_queries": 4000,
+        "ef_training": 500,
+        "window_size": 100,
+        "ef_groundtruth": 1000
+      }
+    },
+    "profiling": {
+      "hnsw_query_limit": 2000,
+      "omega_query_limit": 2000,
+      "omega_profile_control_timing": true
+    }
+  },
+  "bioasq_1m": {
+    "common": {
+      "case_type": "Performance1024D1M",
+      "num_concurrency": "12,14,16,18,20",
+      "concurrency_duration": 30,
+      "k": 100,
+      "m": 15,
+      "ef_search": 180,
+      "quantize_type": "int8"
+    },
+    "hnsw": {
+      "path": "bioasq_1m_hnsw",
+      "db_label": "bioasq-hnsw",
+      "args": {}
+    },
+    "omega": {
+      "path": "bioasq_1m_omega",
+      "db_label": "bioasq-omega",
+      "target_recalls": [
+        0.91
+      ],
+      "args": {
+        "min_vector_threshold": 100000,
+        "num_training_queries": 4000,
+        "ef_training": 500,
+        "window_size": 100,
+        "ef_groundtruth": 1000
+      }
+    },
+    "profiling": {
+      "hnsw_query_limit": 2000,
+      "omega_query_limit": 2000,
+      "omega_profile_control_timing": true
+    }
+  },
+  "cohere_10m": {
+    "common": {
+      "case_type": "Performance768D10M",
+      "num_concurrency": "12,14,16,18,20",
+      "concurrency_duration": 30,
+      "k": 100,
+      "m": 50,
+      "ef_search": 118,
+      "quantize_type": "int8"
+    },
+    "hnsw": {
+      "path": "cohere_10m_hnsw",
+      "db_label": "16c64g-v0.1",
+      "args": {}
+    },
+    "omega": {
+      "path": "cohere_10m_omega",
+      "db_label": "omega-m50-ef118-refiner-int8",
+      "target_recalls": [
+        0.91
+      ],
+      "args": {
+        "min_vector_threshold": 100000,
+        "num_training_queries": 4000,
+        "ef_training": 500,
+        "window_size": 100,
+        "ef_groundtruth": 1000
+      }
+    },
+    "profiling": {
+      "hnsw_query_limit": 2000,
+      "omega_query_limit": 2000,
+      "omega_profile_control_timing": true
+    }
+  }
+}
diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
new file mode 100644
index 000000000..1e95d8202
--- /dev/null
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -0,0 +1,902 @@
+#!/usr/bin/env python3
+"""
+Generic VectorDBBench runner for Zvec HNSW vs Zvec+OMEGA.
+
+Configuration is loaded from a JSON file so datasets, paths, and all
+benchmark/index parameters can be changed without editing the script.
+"""
+
+import argparse
+import importlib
+import json
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+
+@dataclass
+class BenchmarkResult:
+    type: str
+    path: str
+    success: bool
+    target_recall: float | None
+    load_duration: float | None = None
+    qps: float | None = None
+    recall: float | None = None
+    profiling: dict | None = None
+
+
+KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Generic VectorDBBench runner for Zvec HNSW vs OMEGA"
+    )
+    parser.add_argument("--config", required=True, help="Path to benchmark JSON config")
+    parser.add_argument(
+        "--dataset",
+        required=True,
+        help="Dataset key to run from the top-level JSON config map",
+    )
+    parser.add_argument("--dry-run", action="store_true", help="Print commands without executing")
+    parser.add_argument("--skip-hnsw", action="store_true", help="Skip HNSW benchmark")
+    parser.add_argument("--skip-omega", action="store_true", help="Skip OMEGA benchmark")
+    parser.add_argument("--build-only", action="store_true", help="Only build index, skip search")
+    parser.add_argument("--search-only", action="store_true", help="Only run search on existing index")
+    parser.add_argument(
+        "--retrain-only",
+        action="store_true",
+        help="Reuse existing OMEGA index and only retrain the model during the build phase",
+    )
+    parser.add_argument(
+        "--zvec-root",
+        type=str,
+        default=None,
+        help="Path to zvec repo root (default: auto-detect from this script)",
+    )
+    parser.add_argument(
+        "--vectordbbench-root",
+        type=str,
+        default=None,
+        help="Path to VectorDBBench repo root (default: $VECTORDBBENCH_ROOT or sibling repo)",
+    )
+    parser.add_argument(
+        "--benchmark-dir",
+        type=str,
+        default=None,
+        help="Directory used to store benchmark artifacts "
+        "(default: config benchmark_dir, $ZVEC_BENCHMARK_DIR, or <zvec_root>/benchmark_results)",
+    )
+    parser.add_argument(
+        "--results-dir",
+        type=str,
+        default=None,
+        help="Directory containing VectorDBBench JSON result files",
+    )
+    return parser.parse_args()
+
+
+def load_json(path: Path) -> dict[str, Any]:
+    with open(path) as f:
+        return json.load(f)
+
+
+def load_dataset_config(path: Path, dataset_name: str) -> dict[str, Any]:
+    root = load_json(path)
+    if dataset_name not in root:
+        available = ", ".join(sorted(root.keys()))
+        raise ValueError(
+            f"Dataset '{dataset_name}' not found in {path}. Available datasets: {available}"
+        )
+    dataset_config = root[dataset_name]
+    if not isinstance(dataset_config, dict):
+        raise ValueError(f"Dataset config for '{dataset_name}' must be a JSON object")
+    return dataset_config
+
+
+def resolve_paths(
+    config: dict[str, Any],
+    zvec_root_arg: str | None,
+    vectordbbench_root_arg: str | None,
+    benchmark_dir_arg: str | None,
+    results_dir_arg: str | None,
+) -> tuple[Path, Path, Path, Path]:
+    script_path = Path(__file__).resolve()
+    zvec_root = Path(zvec_root_arg).resolve() if zvec_root_arg else script_path.parent.parent
+    vectordbbench_root = (
+        Path(vectordbbench_root_arg).resolve()
+        if vectordbbench_root_arg
+        else Path(os.environ.get("VECTORDBBENCH_ROOT", zvec_root.parent / "VectorDBBench")).resolve()
+    )
+
+    config_benchmark_dir = config.get("benchmark_dir")
+    if benchmark_dir_arg:
+        benchmark_dir = Path(benchmark_dir_arg).resolve()
+    elif config_benchmark_dir:
+        benchmark_dir = Path(config_benchmark_dir).expanduser().resolve()
+    else:
+        benchmark_dir = Path(os.environ.get("ZVEC_BENCHMARK_DIR", zvec_root / "benchmark_results")).resolve()
+
+    source_results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
+    if results_dir_arg:
+        results_dir = Path(results_dir_arg).resolve()
+    elif config.get("results_dir"):
+        results_dir = Path(config["results_dir"]).expanduser().resolve()
+    elif source_results_dir.exists():
+        results_dir = source_results_dir
+    else:
+        try:
+            bench_config = importlib.import_module("vectordb_bench").config
+            results_dir = Path(bench_config.RESULTS_LOCAL_DIR).resolve() / "Zvec"
+        except Exception:
+            results_dir = source_results_dir
+
+    return zvec_root, vectordbbench_root, benchmark_dir, results_dir
+
+
+def resolve_vectordbbench_command() -> list[str]:
+    return [sys.executable, "-m", "vectordb_bench.cli.vectordbbench"]
+
+
+def parse_scalar(value: str) -> Any:
+    lower = value.lower()
+    if lower in {"true", "false"}:
+        return lower == "true"
+    try:
+        if any(ch in value for ch in [".", "e", "E"]):
+            return float(value)
+        return int(value)
+    except ValueError:
+        return value
+
+
+def parse_key_values(line: str) -> dict[str, Any]:
+    return {key: parse_scalar(value) for key, value in KV_PATTERN.findall(line)}
+
+
+def avg_metric(records: list[dict[str, Any]], key: str) -> float | None:
+    values = [float(record[key]) for record in records if key in record]
+    if not values:
+        return None
+    return sum(values) / len(values)
+
+
+def parse_serial_runner_summary(output: str) -> dict[str, Any]:
+    summary = {}
+    for line in output.splitlines():
+        if "search entire test_data:" not in line:
+            continue
+        summary = parse_key_values(line)
+    return summary
+
+
+def parse_query_records(output: str, prefix: str) -> list[dict[str, Any]]:
+    records = []
+    for line in output.splitlines():
+        if prefix not in line:
+            continue
+        records.append(parse_key_values(line))
+    return records
+
+
+def build_hnsw_profile(metrics: dict[str, Any], output: str) -> dict[str, Any]:
+    query_records = parse_query_records(output, "HNSW query stats:")
+    serial_summary = parse_serial_runner_summary(output)
+    return {
+        "benchmark_recall": metrics.get("recall"),
+        "benchmark_qps": metrics.get("qps"),
+        "profile_query_count": len(query_records),
+        "profile_avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
+        "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
+        "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
+        "profile_avg_pure_search_ms": avg_metric(query_records, "pure_search_ms"),
+        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "profile_serial_p99_s": serial_summary.get("p99"),
+        "profile_serial_p95_s": serial_summary.get("p95"),
+        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
+    }
+
+
+def build_omega_profile(
+    metrics: dict[str, Any], output: str, hnsw_profile: dict[str, Any] | None
+) -> dict[str, Any]:
+    query_records = parse_query_records(output, "OMEGA query stats:")
+    serial_summary = parse_serial_runner_summary(output)
+
+    avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
+    avg_core_search_ms = avg_metric(query_records, "core_search_ms")
+    avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
+    avg_omega_control_ms = avg_metric(query_records, "omega_control_ms")
+    avg_search_only_ms = (
+        avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
+    )
+
+    cmp_time_ms = None
+    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_search_only_ms is not None:
+        cmp_time_ms = avg_search_only_ms / avg_pairwise_dist_cnt
+
+    model_overhead_cmp_equiv = None
+    if cmp_time_ms and cmp_time_ms > 0 and avg_omega_control_ms is not None:
+        model_overhead_cmp_equiv = avg_omega_control_ms / cmp_time_ms
+
+    avg_saved_cmps = None
+    if (
+        hnsw_profile
+        and hnsw_profile.get("profile_avg_cmps") is not None
+        and avg_pairwise_dist_cnt is not None
+    ):
+        avg_saved_cmps = hnsw_profile["profile_avg_cmps"] - avg_pairwise_dist_cnt
+
+    return {
+        "benchmark_recall": metrics.get("recall"),
+        "benchmark_qps": metrics.get("qps"),
+        "profile_query_count": len(query_records),
+        "profile_avg_end2end_latency_ms": avg_metric(query_records, "total_ms"),
+        "profile_avg_cmps": avg_pairwise_dist_cnt,
+        "profile_avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
+        "profile_avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
+        "profile_avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
+        "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
+        "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
+        "profile_avg_model_overhead_ms": avg_omega_control_ms,
+        "profile_avg_setup_ms": avg_metric(query_records, "setup_ms"),
+        "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
+        "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
+        "profile_avg_core_search_ms": avg_core_search_ms,
+        "profile_avg_pure_search_ms": avg_pure_search_ms,
+        "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
+        "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
+        "profile_avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
+        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "profile_serial_p99_s": serial_summary.get("p99"),
+        "profile_serial_p95_s": serial_summary.get("p95"),
+        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
+    }
+
+
+def profiling_output_path(index_path: Path) -> Path:
+    return index_path / "online_benchmark_summary.json"
+
+
+def write_profiling_summary(index_path: Path, payload: dict[str, Any]) -> None:
+    with open(profiling_output_path(index_path), "w") as f:
+        json.dump(payload, f, indent=2, sort_keys=True)
+
+
+def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResult]) -> list[Path]:
+    written_paths: list[Path] = []
+    grouped: dict[str, list[BenchmarkResult]] = {}
+    for result in results:
+        grouped.setdefault(result.path, []).append(result)
+
+    for path_str, grouped_results in grouped.items():
+        index_path = Path(path_str)
+        write_profiling_summary(
+            index_path,
+            {
+                "generated_at": datetime.now().isoformat(),
+                "dataset": dataset,
+                "results": [
+                    {
+                        "type": result.type,
+                        "target_recall": result.target_recall,
+                        "path": result.path,
+                        "load_duration_s": result.load_duration,
+                        "qps": result.qps,
+                        "recall": result.recall,
+                        "profiling": result.profiling,
+                    }
+                    for result in grouped_results
+                ],
+            },
+        )
+        written_paths.append(profiling_output_path(index_path))
+
+    return written_paths
+
+
+def get_latest_result(db_label: str, results_dir: Path) -> dict[str, Any]:
+    if not results_dir.exists():
+        return {}
+
+    result_files = sorted(
+        results_dir.glob("result_*.json"),
+        key=lambda f: f.stat().st_mtime,
+        reverse=True,
+    )
+    for result_file in result_files:
+        try:
+            with open(result_file) as f:
+                data = json.load(f)
+            for result in data.get("results", []):
+                task_config = result.get("task_config", {})
+                db_config = task_config.get("db_config", {})
+                if db_config.get("db_label") == db_label:
+                    metrics = result.get("metrics", {})
+                    return {
+                        "insert_duration": metrics.get("insert_duration"),
+                        "optimize_duration": metrics.get("optimize_duration"),
+                        "load_duration": metrics.get("load_duration"),
+                        "qps": metrics.get("qps"),
+                        "recall": metrics.get("recall"),
+                    }
+        except Exception:
+            continue
+    return {}
+
+
+def snapshot_result_files(results_dir: Path) -> set[str]:
+    if not results_dir.exists():
+        return set()
+    return {str(p) for p in results_dir.glob("result_*.json")}
+
+
+def extract_result_from_file(result_file: Path, db_label: str) -> dict[str, Any]:
+    try:
+        with open(result_file) as f:
+            data = json.load(f)
+        for result in data.get("results", []):
+            task_config = result.get("task_config", {})
+            db_config = task_config.get("db_config", {})
+            if db_config.get("db_label") == db_label:
+                metrics = result.get("metrics", {})
+                return {
+                    "insert_duration": metrics.get("insert_duration"),
+                    "optimize_duration": metrics.get("optimize_duration"),
+                    "load_duration": metrics.get("load_duration"),
+                    "qps": metrics.get("qps"),
+                    "recall": metrics.get("recall"),
+                }
+    except Exception:
+        return {}
+    return {}
+
+
+def get_run_result(db_label: str, before_files: set[str], results_dir: Path) -> dict[str, Any]:
+    if not results_dir.exists():
+        return {}
+
+    current_files = {str(p) for p in results_dir.glob("result_*.json")}
+    new_files = sorted(
+        [Path(p) for p in current_files - before_files],
+        key=lambda p: p.stat().st_mtime,
+        reverse=True,
+    )
+    for result_file in new_files:
+        metrics = extract_result_from_file(result_file, db_label)
+        if metrics:
+            return metrics
+    return get_latest_result(db_label, results_dir)
+
+
+def offline_summary_path(index_path: Path) -> Path:
+    return index_path / "offline_benchmark_summary.json"
+
+
+def read_json_if_exists(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {}
+    try:
+        with open(path) as f:
+            return json.load(f)
+    except Exception:
+        return {}
+
+
+def find_omega_model_dir(index_path: Path) -> Path | None:
+    candidates = sorted(index_path.glob("*/omega_model"))
+    return candidates[0] if candidates else None
+
+
+def sum_timing_ms(data: dict[str, Any]) -> int:
+    return sum(v for v in data.values() if isinstance(v, (int, float)))
+
+
+def build_offline_summary(
+    index_path: Path,
+    db_label: str,
+    metrics: dict[str, Any],
+    retrain_only: bool = False,
+) -> dict[str, Any]:
+    previous_summary = read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
+    previous_offline = previous_summary.get("offline", {})
+    previous_omega_training = previous_summary.get("omega_training", {})
+
+    insert_duration = metrics.get("insert_duration")
+    optimize_duration = metrics.get("optimize_duration")
+    load_duration = metrics.get("load_duration")
+
+    omega_model_dir = find_omega_model_dir(index_path)
+    omega_training = {}
+    if omega_model_dir is not None:
+        omega_training = {
+            "collection_timing_ms": read_json_if_exists(
+                omega_model_dir / "training_collection_timing.json"
+            ),
+            "lightgbm_timing_ms": read_json_if_exists(
+                omega_model_dir / "lightgbm_training_timing.json"
+            ),
+        }
+
+    if retrain_only:
+        insert_duration = previous_offline.get("insert_duration_s")
+        old_optimize_duration = previous_offline.get("optimize_duration_s")
+        old_training_s = (
+            sum_timing_ms(previous_omega_training.get("collection_timing_ms", {}))
+            + sum_timing_ms(previous_omega_training.get("lightgbm_timing_ms", {}))
+        ) / 1000.0
+        new_training_s = (
+            sum_timing_ms(omega_training.get("collection_timing_ms", {}))
+            + sum_timing_ms(omega_training.get("lightgbm_timing_ms", {}))
+        ) / 1000.0
+        if old_optimize_duration is not None:
+            optimize_duration = round(old_optimize_duration - old_training_s + new_training_s, 4)
+        else:
+            optimize_duration = metrics.get("optimize_duration")
+        load_duration = (
+            round(insert_duration + optimize_duration, 4)
+            if insert_duration is not None and optimize_duration is not None
+            else metrics.get("load_duration")
+        )
+
+    summary = {
+        "db_label": db_label,
+        "index_path": str(index_path),
+        "generated_at": datetime.now().isoformat(),
+        "offline": {
+            "insert_duration_s": insert_duration,
+            "optimize_duration_s": optimize_duration,
+            "load_duration_s": load_duration,
+        },
+    }
+    if omega_training:
+        summary["omega_training"] = omega_training
+    return summary
+
+
+def write_offline_summary(
+    index_path: Path,
+    db_label: str,
+    metrics: dict[str, Any],
+    retrain_only: bool = False,
+) -> None:
+    summary = build_offline_summary(index_path, db_label, metrics, retrain_only=retrain_only)
+    with open(offline_summary_path(index_path), "w") as f:
+        json.dump(summary, f, indent=2, sort_keys=True)
+
+
+def get_offline_load_duration(index_path: Path) -> float | None:
+    summary = read_json_if_exists(offline_summary_path(index_path))
+    return summary.get("offline", {}).get("load_duration_s")
+
+
+def run_command(
+    cmd: list[str],
+    vectordbbench_root: Path,
+    dry_run: bool = False,
+    extra_env: dict[str, str] | None = None,
+) -> int:
+    cmd_str = " \\\n    ".join(cmd)
+    print(f"\n{'=' * 60}")
+    print(f"Command:\n{cmd_str}")
+    print(f"{'=' * 60}\n")
+    if dry_run:
+        print("[DRY RUN] Command not executed")
+        return 0
+
+    cwd = vectordbbench_root if vectordbbench_root.exists() else None
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+    result = subprocess.run(cmd, cwd=cwd, env=env)
+    return result.returncode
+
+
+def run_command_capture(
+    cmd: list[str],
+    vectordbbench_root: Path,
+    dry_run: bool = False,
+    extra_env: dict[str, str] | None = None,
+) -> tuple[int, str]:
+    cmd_str = " \\\n    ".join(cmd)
+    print(f"\n{'=' * 60}")
+    print(f"Command:\n{cmd_str}")
+    print(f"{'=' * 60}\n")
+
+    if dry_run:
+        print("[DRY RUN] Command not executed")
+        return 0, ""
+
+    cwd = vectordbbench_root if vectordbbench_root.exists() else None
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".log") as tmp:
+        tmp_path = Path(tmp.name)
+
+    try:
+        with tmp_path.open("w+") as tmp:
+            result = subprocess.run(cmd, cwd=cwd, env=env, stdout=tmp, stderr=subprocess.STDOUT, text=True)
+            tmp.flush()
+            tmp.seek(0)
+            output = tmp.read()
+        print(output, end="" if output.endswith("\n") or not output else "\n")
+        return result.returncode, output
+    finally:
+        tmp_path.unlink(missing_ok=True)
+
+
+def must_get(config: dict[str, Any], key: str) -> Any:
+    if key not in config:
+        raise ValueError(f"Missing required config key: {key}")
+    return config[key]
+
+
+def resolve_index_path(benchmark_dir: Path, path_value: str) -> Path:
+    path = Path(path_value).expanduser()
+    return path.resolve() if path.is_absolute() else (benchmark_dir / path).resolve()
+
+
+def append_option(cmd: list[str], key: str, value: Any) -> None:
+    if value is None:
+        return
+    flag = f"--{key.replace('_', '-')}"
+    if isinstance(value, list):
+        cmd.extend([flag, ",".join(str(v) for v in value)])
+    else:
+        cmd.extend([flag, str(value)])
+
+
+def extend_with_args(cmd: list[str], args_map: dict[str, Any] | None) -> None:
+    if not args_map:
+        return
+    for key, value in args_map.items():
+        append_option(cmd, key, value)
+
+
+def extend_with_flags(cmd: list[str], flags: list[str] | None) -> None:
+    if not flags:
+        return
+    for flag in flags:
+        cmd.append(f"--{flag}")
+
+
+def build_base_command(
+    vectordbbench_cmd: list[str],
+    client_name: str,
+    path: Path,
+    db_label: str,
+    case_type: str,
+    common_args: dict[str, Any],
+    specific_args: dict[str, Any] | None = None,
+    extra_flags: list[str] | None = None,
+) -> list[str]:
+    cmd = [
+        *vectordbbench_cmd,
+        client_name,
+        "--path",
+        str(path),
+        "--db-label",
+        db_label,
+        "--case-type",
+        case_type,
+    ]
+    extend_with_args(cmd, common_args)
+    extend_with_args(cmd, specific_args)
+    extend_with_flags(cmd, extra_flags)
+    return cmd
+
+
+def validate_profile_output(profile_name: str, ret: int, output: str, prefix: str) -> None:
+    if ret != 0:
+        raise RuntimeError(f"{profile_name} profiling pass failed with exit code {ret}")
+    if not parse_query_records(output, prefix):
+        raise RuntimeError(
+            f"{profile_name} profiling pass completed without any '{prefix}' records in stdout"
+        )
+
+
+def print_header(title: str) -> None:
+    print("\n\n" + "#" * 70)
+    print(f"# {title}")
+    print("#" * 70)
+
+
+def main() -> int:
+    args = parse_args()
+    config_path = Path(args.config).expanduser().resolve()
+    config = load_dataset_config(config_path, args.dataset)
+    zvec_root, vectordbbench_root, benchmark_dir, results_dir = resolve_paths(
+        config,
+        args.zvec_root,
+        args.vectordbbench_root,
+        args.benchmark_dir,
+        args.results_dir,
+    )
+    vectordbbench_cmd = resolve_vectordbbench_command()
+    benchmark_dir.mkdir(parents=True, exist_ok=True)
+
+    dataset_name = args.dataset
+    common = must_get(config, "common")
+    hnsw_config = must_get(config, "hnsw")
+    omega_config = must_get(config, "omega")
+    profiling_config = config.get("profiling", {})
+
+    case_type = must_get(common, "case_type")
+    hnsw_path = resolve_index_path(benchmark_dir, must_get(hnsw_config, "path"))
+    omega_path = resolve_index_path(benchmark_dir, must_get(omega_config, "path"))
+    hnsw_db_label = must_get(hnsw_config, "db_label")
+    omega_db_label = must_get(omega_config, "db_label")
+    target_recalls = omega_config.get("target_recalls", [])
+    if not target_recalls:
+        raise ValueError("omega.target_recalls must be a non-empty list")
+
+    hnsw_common_args = {k: v for k, v in common.items() if k != "case_type"}
+    hnsw_specific_args = hnsw_config.get("args", {})
+    omega_specific_args = omega_config.get("args", {})
+
+    print("=" * 70)
+    print(f"VectorDBBench: Zvec HNSW vs OMEGA ({dataset_name})")
+    print(f"Config: {config_path}")
+    print("=" * 70)
+    print(f"zvec_root: {zvec_root}")
+    print(f"vectordbbench_root: {vectordbbench_root}")
+    print(f"vectordbbench_cmd: {' '.join(vectordbbench_cmd)}")
+    print(f"benchmark_dir: {benchmark_dir}")
+    print(f"results_dir: {results_dir}")
+    print(f"hnsw_path: {hnsw_path}")
+    print(f"omega_path: {omega_path}")
+    print(f"target_recalls: {target_recalls}")
+    print(
+        "build_mode: "
+        + ("retrain model only (reuse existing index)" if args.retrain_only else "build index + train model")
+    )
+    print("=" * 70)
+
+    results: list[BenchmarkResult] = []
+
+    if not args.skip_hnsw:
+        print_header("HNSW Benchmark")
+
+        if not args.search_only:
+            print("\n[Phase 1] Building HNSW index...")
+            before_files = snapshot_result_files(results_dir)
+            cmd = build_base_command(
+                vectordbbench_cmd,
+                "zvec",
+                hnsw_path,
+                hnsw_db_label,
+                case_type,
+                hnsw_common_args,
+                hnsw_specific_args,
+                ["skip-search-serial", "skip-search-concurrent"],
+            )
+            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            if ret != 0 and not args.dry_run:
+                print("ERROR: HNSW build failed!")
+                return 1
+            if not args.dry_run:
+                write_offline_summary(
+                    hnsw_path,
+                    hnsw_db_label,
+                    get_run_result(hnsw_db_label, before_files, results_dir),
+                )
+
+        if not args.build_only:
+            print("\n[Phase 2] Running HNSW search benchmark...")
+            before_files = snapshot_result_files(results_dir)
+            cmd = build_base_command(
+                vectordbbench_cmd,
+                "zvec",
+                hnsw_path,
+                hnsw_db_label,
+                case_type,
+                hnsw_common_args,
+                hnsw_specific_args,
+                ["skip-drop-old", "skip-load"],
+            )
+            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            metrics = get_run_result(hnsw_db_label, before_files, results_dir) if not args.dry_run else {}
+            load_duration = get_offline_load_duration(hnsw_path)
+            hnsw_profile = None
+            if ret == 0 and not args.dry_run:
+                print("\n[Profiling] Running HNSW serial-only profiling pass...")
+                profile_cmd = build_base_command(
+                    vectordbbench_cmd,
+                    "zvec",
+                    hnsw_path,
+                    hnsw_db_label,
+                    case_type,
+                    hnsw_common_args,
+                    hnsw_specific_args,
+                    ["skip-drop-old", "skip-load", "skip-search-concurrent"],
+                )
+                profile_ret, profile_output = run_command_capture(
+                    profile_cmd,
+                    vectordbbench_root,
+                    dry_run=False,
+                    extra_env={
+                        "ZVEC_LOG_LEVEL": "INFO",
+                        "ZVEC_HNSW_LOG_QUERY_STATS": "1",
+                        "ZVEC_HNSW_LOG_QUERY_LIMIT": str(profiling_config.get("hnsw_query_limit", 2000)),
+                    },
+                )
+                validate_profile_output("HNSW", profile_ret, profile_output, "HNSW query stats:")
+                hnsw_profile = build_hnsw_profile(metrics, profile_output)
+            results.append(
+                BenchmarkResult(
+                    type="HNSW",
+                    path=str(hnsw_path),
+                    success=ret == 0,
+                    target_recall=None,
+                    load_duration=load_duration if load_duration is not None else metrics.get("load_duration"),
+                    qps=metrics.get("qps"),
+                    recall=metrics.get("recall"),
+                    profiling=hnsw_profile,
+                )
+            )
+
+    if not args.skip_omega:
+        build_target_recall = target_recalls[0]
+        print_header("OMEGA Benchmark")
+
+        if not args.search_only:
+            if args.retrain_only:
+                print("\n[Phase 1] Retraining OMEGA model only (reusing existing index)...")
+            else:
+                print("\n[Phase 1] Building OMEGA index + training model...")
+            print(
+                f"Build-time target_recall is ignored by training; using first requested value "
+                f"for CLI compatibility: {build_target_recall}"
+            )
+            before_files = snapshot_result_files(results_dir)
+            build_flags = ["skip-search-serial", "skip-search-concurrent"]
+            if args.retrain_only:
+                build_flags.extend(["skip-drop-old", "skip-load", "retrain-only"])
+            cmd = build_base_command(
+                vectordbbench_cmd,
+                "zvecomega",
+                omega_path,
+                omega_db_label,
+                case_type,
+                hnsw_common_args,
+                {**omega_specific_args, "target_recall": build_target_recall},
+                build_flags,
+            )
+            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+            if ret != 0 and not args.dry_run:
+                print("ERROR: OMEGA build failed!")
+                return 1
+            if not args.dry_run:
+                write_offline_summary(
+                    omega_path,
+                    omega_db_label,
+                    get_run_result(omega_db_label, before_files, results_dir),
+                    retrain_only=args.retrain_only,
+                )
+
+        if not args.build_only:
+            for target_recall in target_recalls:
+                print_header(f"OMEGA Search Benchmark (target_recall={target_recall})")
+                before_files = snapshot_result_files(results_dir)
+                search_flags = ["skip-drop-old", "skip-load"]
+                if args.retrain_only:
+                    search_flags.append("retrain-only")
+                cmd = build_base_command(
+                    vectordbbench_cmd,
+                    "zvecomega",
+                    omega_path,
+                    omega_db_label,
+                    case_type,
+                    hnsw_common_args,
+                    {**omega_specific_args, "target_recall": target_recall},
+                    search_flags,
+                )
+                ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
+                metrics = get_run_result(omega_db_label, before_files, results_dir) if not args.dry_run else {}
+                load_duration = get_offline_load_duration(omega_path)
+                omega_profile = None
+                if ret == 0 and not args.dry_run:
+                    print("\n[Profiling] Running OMEGA serial-only profiling pass...")
+                    profile_flags = ["skip-drop-old", "skip-load", "skip-search-concurrent"]
+                    if args.retrain_only:
+                        profile_flags.append("retrain-only")
+                    profile_cmd = build_base_command(
+                        vectordbbench_cmd,
+                        "zvecomega",
+                        omega_path,
+                        omega_db_label,
+                        case_type,
+                        hnsw_common_args,
+                        {**omega_specific_args, "target_recall": target_recall},
+                        profile_flags,
+                    )
+                    profile_env = {
+                        "ZVEC_LOG_LEVEL": "INFO",
+                        "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
+                        "ZVEC_OMEGA_LOG_QUERY_LIMIT": str(profiling_config.get("omega_query_limit", 2000)),
+                    }
+                    if profiling_config.get("omega_profile_control_timing", True):
+                        profile_env["ZVEC_OMEGA_PROFILE_CONTROL_TIMING"] = "1"
+                    profile_ret, profile_output = run_command_capture(
+                        profile_cmd,
+                        vectordbbench_root,
+                        dry_run=False,
+                        extra_env=profile_env,
+                    )
+                    validate_profile_output("OMEGA", profile_ret, profile_output, "OMEGA query stats:")
+                    baseline_profile = next(
+                        (result.profiling for result in results if result.type == "HNSW" and result.profiling),
+                        None,
+                    )
+                    omega_profile = build_omega_profile(metrics, profile_output, baseline_profile)
+                results.append(
+                    BenchmarkResult(
+                        type="OMEGA",
+                        path=str(omega_path),
+                        success=ret == 0,
+                        target_recall=target_recall,
+                        load_duration=load_duration if load_duration is not None else metrics.get("load_duration"),
+                        qps=metrics.get("qps"),
+                        recall=metrics.get("recall"),
+                        profiling=omega_profile,
+                    )
+                )
+
+    if results:
+        written_summary_paths = write_grouped_profiling_summaries(dataset_name, results)
+        print("\n\n" + "=" * 70)
+        print("Benchmark Summary")
+        print("=" * 70)
+        print(f"{'Type':<10} {'target_recall':<15} {'load_dur(s)':<12} {'qps':<12} {'recall':<10} {'Status':<10}")
+        print("-" * 75)
+        for result in results:
+            tr = f"{result.target_recall:.2f}" if result.target_recall is not None else "N/A"
+            status = "OK" if result.success else "FAILED"
+            ld = f"{result.load_duration:.1f}" if result.load_duration else "N/A"
+            qps = f"{result.qps:.1f}" if result.qps else "N/A"
+            recall = f"{result.recall:.4f}" if result.recall else "N/A"
+            print(f"{result.type:<10} {tr:<15} {ld:<12} {qps:<12} {recall:<10} {status:<10}")
+
+        print("\nProfiling Summary")
+        print("-" * 75)
+        print(f"{'Type':<10} {'target_recall':<15} {'avg_lat(ms)':<12} {'avg_cmps':<12} {'avg_pred_calls':<16} {'avg_model_ms':<14} {'saved_cmps':<12}")
+        for result in results:
+            profile = result.profiling or {}
+            tr = f"{result.target_recall:.2f}" if result.target_recall is not None else "N/A"
+            avg_lat = profile.get("profile_avg_end2end_latency_ms")
+            avg_cmps = profile.get("profile_avg_cmps")
+            avg_pred_calls = profile.get("profile_avg_prediction_calls")
+            avg_model_ms = profile.get("profile_avg_model_overhead_ms")
+            saved_cmps = profile.get("profile_avg_early_stop_saved_cmps")
+            print(
+                f"{result.type:<10} "
+                f"{tr:<15} "
+                f"{(f'{avg_lat:.3f}' if avg_lat is not None else 'N/A'):<12} "
+                f"{(f'{avg_cmps:.1f}' if avg_cmps is not None else 'N/A'):<12} "
+                f"{(f'{avg_pred_calls:.2f}' if avg_pred_calls is not None else 'N/A'):<16} "
+                f"{(f'{avg_model_ms:.3f}' if avg_model_ms is not None else 'N/A'):<14} "
+                f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
+            )
+        print()
+        for path in written_summary_paths:
+            print(f"Profiling JSON: {path}")
+
+    print("\nTo view results:")
+    print("  vectordbbench results")
+    print("\nOr start the web UI:")
+    print("  vectordbbench start")
+    print()
+
+    return 0 if all(result.success for result in results) else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From 33cea2f01552a805b3221d3efdcd42b5e2bfd896 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 25 Mar 2026 19:58:21 +0800
Subject: [PATCH 044/126] Add fine-grained omega control path profiling

- Add timing for ReportVisitCandidate, ShouldPredict, ReportHop
- Add sub-timing for UpdateTopCandidates, PushTraversalWindow
- Fix duplicate hook_total_ms field in query stats log
- New fields only logged when ZVEC_OMEGA_PROFILE_CONTROL_TIMING=1
---
 src/core/algorithm/omega/omega_streamer.cc | 27 ++++++++++++++++++----
 thirdparty/omega/OMEGALib                  |  2 +-
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 5793afaa1..6e1d0a438 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -370,6 +370,11 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   unsigned long long sorted_window_time_ns = 0;
   unsigned long long average_recall_eval_time_ns = 0;
   unsigned long long prediction_feature_prep_time_ns = 0;
+  unsigned long long report_visit_candidate_time_ns = 0;
+  unsigned long long should_predict_time_ns = 0;
+  unsigned long long report_hop_time_ns = 0;
+  unsigned long long update_top_candidates_time_ns = 0;
+  unsigned long long push_traversal_window_time_ns = 0;
   unsigned long long collected_gt_advance_count = 0;
   unsigned long long should_stop_calls_with_advance = 0;
   unsigned long long max_prediction_calls_per_should_stop = 0;
@@ -398,6 +403,14 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   average_recall_eval_time_ns = omega_search_ctx->GetAverageRecallEvalTimeNs();
   prediction_feature_prep_time_ns =
       omega_search_ctx->GetPredictionFeaturePrepTimeNs();
+  report_visit_candidate_time_ns =
+      omega_search_ctx->GetReportVisitCandidateTimeNs();
+  should_predict_time_ns = omega_search_ctx->GetShouldPredictTimeNs();
+  report_hop_time_ns = omega_search_ctx->GetReportHopTimeNs();
+  update_top_candidates_time_ns =
+      omega_search_ctx->GetUpdateTopCandidatesTimeNs();
+  push_traversal_window_time_ns =
+      omega_search_ctx->GetPushTraversalWindowTimeNs();
   collected_gt_advance_count = omega_search_ctx->GetCollectedGtAdvanceCount();
   should_stop_calls_with_advance =
       omega_search_ctx->GetShouldStopCallsWithAdvance();
@@ -451,9 +464,11 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  "collected_gt_advance=%llu max_pred_per_stop=%llu "
                  "should_stop_ms=%.3f prediction_eval_ms=%.3f "
                  "setup_ms=%.3f reset_query_ms=%.3f "
-                 "core_search_ms=%.3f omega_control_ms=%.3f "
-                 "hook_total_ms=%.3f hook_body_ms=%.3f "
-                 "hook_dispatch_ms=%.3f pure_search_ms=%.3f total_ms=%.3f",
+                 "core_search_ms=%.3f hook_total_ms=%.3f hook_body_ms=%.3f "
+                 "hook_dispatch_ms=%.3f pure_search_ms=%.3f "
+                 "report_visit_candidate_ms=%.3f should_predict_ms=%.3f "
+                 "report_hop_ms=%.3f update_top_candidates_ms=%.3f "
+                 "push_traversal_window_ms=%.3f total_ms=%.3f",
                  static_cast<unsigned long long>(query_seq),
                  IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
                  static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
@@ -469,10 +484,14 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  static_cast<double>(query_reset_time_ns) / 1e6,
                  static_cast<double>(query_search_time_ns) / 1e6,
                  static_cast<double>(hook_total_time_ns) / 1e6,
-                 static_cast<double>(hook_total_time_ns) / 1e6,
                  static_cast<double>(hook_body_time_ns) / 1e6,
                  static_cast<double>(hook_dispatch_time_ns) / 1e6,
                  static_cast<double>(pure_search_time_ns) / 1e6,
+                 static_cast<double>(report_visit_candidate_time_ns) / 1e6,
+                 static_cast<double>(should_predict_time_ns) / 1e6,
+                 static_cast<double>(report_hop_time_ns) / 1e6,
+                 static_cast<double>(update_top_candidates_time_ns) / 1e6,
+                 static_cast<double>(push_traversal_window_time_ns) / 1e6,
                  static_cast<double>(query_total_time_ns) / 1e6);
       } else {
         LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index a2c316570..8d19bc129 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit a2c316570442b2a03600b4b649bac4ca0fd6af91
+Subproject commit 8d19bc129d13639e35e73425f0629312169b8c45

From 6a2ca3249e832d727712c3af27dbf5c70a21bab2 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 25 Mar 2026 20:22:17 +0800
Subject: [PATCH 045/126] Add fine-grained timing fields to benchmark script

- Parse hook_total_ms, hook_body_ms, hook_dispatch_ms
- Parse report_visit_candidate_ms, should_predict_ms, report_hop_ms
- Parse update_top_candidates_ms, push_traversal_window_ms
- Replace deprecated omega_control_ms with hook_total_ms
---
 scripts/benchmark_hnsw_vs_omega.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index 1e95d8202..2a674287a 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -213,7 +213,7 @@ def build_omega_profile(
     avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
     avg_core_search_ms = avg_metric(query_records, "core_search_ms")
     avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
-    avg_omega_control_ms = avg_metric(query_records, "omega_control_ms")
+    avg_hook_total_ms = avg_metric(query_records, "hook_total_ms")
     avg_search_only_ms = (
         avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
     )
@@ -223,8 +223,8 @@ def build_omega_profile(
         cmp_time_ms = avg_search_only_ms / avg_pairwise_dist_cnt
 
     model_overhead_cmp_equiv = None
-    if cmp_time_ms and cmp_time_ms > 0 and avg_omega_control_ms is not None:
-        model_overhead_cmp_equiv = avg_omega_control_ms / cmp_time_ms
+    if cmp_time_ms and cmp_time_ms > 0 and avg_hook_total_ms is not None:
+        model_overhead_cmp_equiv = avg_hook_total_ms / cmp_time_ms
 
     avg_saved_cmps = None
     if (
@@ -245,12 +245,20 @@ def build_omega_profile(
         "profile_avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
         "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
         "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
-        "profile_avg_model_overhead_ms": avg_omega_control_ms,
+        "profile_avg_model_overhead_ms": avg_hook_total_ms,
         "profile_avg_setup_ms": avg_metric(query_records, "setup_ms"),
         "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
         "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
         "profile_avg_core_search_ms": avg_core_search_ms,
         "profile_avg_pure_search_ms": avg_pure_search_ms,
+        "profile_avg_hook_total_ms": avg_hook_total_ms,
+        "profile_avg_hook_body_ms": avg_metric(query_records, "hook_body_ms"),
+        "profile_avg_hook_dispatch_ms": avg_metric(query_records, "hook_dispatch_ms"),
+        "profile_avg_report_visit_candidate_ms": avg_metric(query_records, "report_visit_candidate_ms"),
+        "profile_avg_should_predict_ms": avg_metric(query_records, "should_predict_ms"),
+        "profile_avg_report_hop_ms": avg_metric(query_records, "report_hop_ms"),
+        "profile_avg_update_top_candidates_ms": avg_metric(query_records, "update_top_candidates_ms"),
+        "profile_avg_push_traversal_window_ms": avg_metric(query_records, "push_traversal_window_ms"),
         "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
         "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
         "profile_avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),

From deed4add899022f7719b071c5c032d8258c6bdb5 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 25 Mar 2026 20:44:10 +0800
Subject: [PATCH 046/126] Update OMEGALib: lazy traversal window tracking

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 8d19bc129..085ca8221 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 8d19bc129d13639e35e73425f0629312169b8c45
+Subproject commit 085ca822198914ce5b747d1136699c6a78cb282c

From 3eaadd2fe3e18ea122bb7041a88963b1f4fad99e Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 26 Mar 2026 17:40:07 +0800
Subject: [PATCH 047/126] Use fused ReportVisitCandidate return value to
 eliminate ShouldPredict calls

---
 src/core/algorithm/omega/omega_searcher.cc |  5 ++---
 src/core/algorithm/omega/omega_streamer.cc | 12 +++---------
 thirdparty/omega/OMEGALib                  |  2 +-
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 3993ee190..6c5b06414 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -58,12 +58,11 @@ void OnOmegaHop(void *user_data) {
 bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
                            bool inserted_to_topk, void *user_data) {
   auto &state = *static_cast<OmegaHookState *>(user_data);
-  state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
+  bool should_predict = state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
   if (!state.enable_early_stopping) {
     return false;
   }
-  return state.search_ctx->ShouldPredict() &&
-         state.search_ctx->ShouldStopEarly();
+  return should_predict && state.search_ctx->ShouldStopEarly();
 }
 
 }  // namespace
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 6e1d0a438..b7262b41e 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -122,18 +122,12 @@ void OnOmegaHop(void *user_data) {
 bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
                            bool inserted_to_topk, void *user_data) {
   auto &state = *static_cast<OmegaHookState *>(user_data);
+  bool should_predict = false;
   RunOmegaControlHook(state, [&]() {
-    state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
+    should_predict = state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
   });
 
-  if (!state.enable_early_stopping) {
-    return false;
-  }
-
-  bool should_predict = false;
-  RunOmegaControlHook(state,
-                      [&]() { should_predict = state.search_ctx->ShouldPredict(); });
-  if (!should_predict) {
+  if (!state.enable_early_stopping || !should_predict) {
     return false;
   }
 
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 085ca8221..823c0c5c6 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 085ca822198914ce5b747d1136699c6a78cb282c
+Subproject commit 823c0c5c6a95b911127baa35cd0d9a66bb7892a4

From af7dbacb0f55d75763c62c055244be61df97faad Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 26 Mar 2026 18:07:27 +0800
Subject: [PATCH 048/126] Align omega hooks with fused prediction signal

---
 src/core/algorithm/omega/omega_streamer.cc | 5 +----
 thirdparty/omega/OMEGALib                  | 2 +-
 tools/core/hnsw_hooks_microbench.cc        | 8 ++++----
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index b7262b41e..b5f41cdf9 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -365,7 +365,6 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   unsigned long long average_recall_eval_time_ns = 0;
   unsigned long long prediction_feature_prep_time_ns = 0;
   unsigned long long report_visit_candidate_time_ns = 0;
-  unsigned long long should_predict_time_ns = 0;
   unsigned long long report_hop_time_ns = 0;
   unsigned long long update_top_candidates_time_ns = 0;
   unsigned long long push_traversal_window_time_ns = 0;
@@ -399,7 +398,6 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       omega_search_ctx->GetPredictionFeaturePrepTimeNs();
   report_visit_candidate_time_ns =
       omega_search_ctx->GetReportVisitCandidateTimeNs();
-  should_predict_time_ns = omega_search_ctx->GetShouldPredictTimeNs();
   report_hop_time_ns = omega_search_ctx->GetReportHopTimeNs();
   update_top_candidates_time_ns =
       omega_search_ctx->GetUpdateTopCandidatesTimeNs();
@@ -460,7 +458,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  "setup_ms=%.3f reset_query_ms=%.3f "
                  "core_search_ms=%.3f hook_total_ms=%.3f hook_body_ms=%.3f "
                  "hook_dispatch_ms=%.3f pure_search_ms=%.3f "
-                 "report_visit_candidate_ms=%.3f should_predict_ms=%.3f "
+                 "report_visit_candidate_ms=%.3f "
                  "report_hop_ms=%.3f update_top_candidates_ms=%.3f "
                  "push_traversal_window_ms=%.3f total_ms=%.3f",
                  static_cast<unsigned long long>(query_seq),
@@ -482,7 +480,6 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  static_cast<double>(hook_dispatch_time_ns) / 1e6,
                  static_cast<double>(pure_search_time_ns) / 1e6,
                  static_cast<double>(report_visit_candidate_time_ns) / 1e6,
-                 static_cast<double>(should_predict_time_ns) / 1e6,
                  static_cast<double>(report_hop_time_ns) / 1e6,
                  static_cast<double>(update_top_candidates_time_ns) / 1e6,
                  static_cast<double>(push_traversal_window_time_ns) / 1e6,
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 823c0c5c6..63e487e22 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 823c0c5c6a95b911127baa35cd0d9a66bb7892a4
+Subproject commit 63e487e22c67cc55dd900d480ea7ce0a76fab0c1
diff --git a/tools/core/hnsw_hooks_microbench.cc b/tools/core/hnsw_hooks_microbench.cc
index 2c2c742fc..f0440ee29 100644
--- a/tools/core/hnsw_hooks_microbench.cc
+++ b/tools/core/hnsw_hooks_microbench.cc
@@ -161,12 +161,12 @@ void OnOmegaHop(void* user_data) {
 bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
                            bool should_consider_candidate, void* user_data) {
   auto& state = *static_cast<OmegaHookState*>(user_data);
-  state.search_ctx->ReportVisitCandidate(id, dist, should_consider_candidate);
-  if (!state.enable_early_stopping) {
+  bool should_predict =
+      state.search_ctx->ReportVisitCandidate(id, dist, should_consider_candidate);
+  if (!state.enable_early_stopping || !should_predict) {
     return false;
   }
-  return state.search_ctx->ShouldPredict() &&
-         state.search_ctx->ShouldStopEarly();
+  return state.search_ctx->ShouldStopEarly();
 }
 
 struct BenchStats {

From 22d2308c75134ba3bb76d311c24252291e8b1cbb Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 26 Mar 2026 18:55:34 +0800
Subject: [PATCH 049/126] Throttle omega hooks by min interval

---
 src/core/algorithm/omega/omega_searcher.cc | 62 ++++++++++++--
 src/core/algorithm/omega/omega_streamer.cc | 95 +++++++++++++++++++---
 thirdparty/omega/OMEGALib                  |  2 +-
 tools/core/hnsw_hooks_microbench.cc        | 68 ++++++++++++++--
 4 files changed, 201 insertions(+), 26 deletions(-)

diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 6c5b06414..35d6ef1eb 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -41,13 +41,56 @@ bool DisableOmegaModelPrediction() {
 struct OmegaHookState {
   omega::SearchContext *search_ctx{nullptr};
   bool enable_early_stopping{false};
+  bool per_cmp_reporting{false};
+  std::vector<omega::SearchContext::VisitCandidate> pending_candidates;
+  int batch_min_interval{1};
 };
 
+void ResetOmegaHookState(OmegaHookState* state) {
+  state->pending_candidates.clear();
+  if (state->search_ctx != nullptr) {
+    state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
+  } else {
+    state->batch_min_interval = 1;
+  }
+}
+
+bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count) {
+  if (state->search_ctx == nullptr || flush_count <= 0 ||
+      state->pending_candidates.empty()) {
+    return false;
+  }
+
+  flush_count = std::min(flush_count,
+                         static_cast<int>(state->pending_candidates.size()));
+  bool should_predict = state->search_ctx->ReportVisitCandidates(
+      state->pending_candidates.data(), static_cast<size_t>(flush_count));
+  state->pending_candidates.erase(state->pending_candidates.begin(),
+                                  state->pending_candidates.begin() + flush_count);
+  if (!state->enable_early_stopping || !should_predict) {
+    return false;
+  }
+  return state->search_ctx->ShouldStopEarly();
+}
+
+bool MaybeFlushOmegaPendingCandidates(OmegaHookState* state) {
+  if (static_cast<int>(state->pending_candidates.size()) <
+      state->batch_min_interval) {
+    return false;
+  }
+  return FlushOmegaPendingCandidates(state, state->batch_min_interval);
+}
+
 void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
                         void *user_data) {
   auto &state = *static_cast<OmegaHookState *>(user_data);
   state.search_ctx->SetDistStart(dist);
-  state.search_ctx->ReportVisitCandidate(id, dist, true);
+  if (state.per_cmp_reporting) {
+    state.search_ctx->ReportVisitCandidate(id, dist, true);
+    return;
+  }
+  state.pending_candidates.push_back({static_cast<int>(id), dist, true});
+  MaybeFlushOmegaPendingCandidates(&state);
 }
 
 void OnOmegaHop(void *user_data) {
@@ -58,11 +101,17 @@ void OnOmegaHop(void *user_data) {
 bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
                            bool inserted_to_topk, void *user_data) {
   auto &state = *static_cast<OmegaHookState *>(user_data);
-  bool should_predict = state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
-  if (!state.enable_early_stopping) {
-    return false;
+  if (state.per_cmp_reporting) {
+    bool should_predict =
+        state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
+    if (!state.enable_early_stopping || !should_predict) {
+      return false;
+    }
+    return state.search_ctx->ShouldStopEarly();
   }
-  return should_predict && state.search_ctx->ShouldStopEarly();
+  state.pending_candidates.push_back(
+      {static_cast<int>(id), dist, inserted_to_topk});
+  return MaybeFlushOmegaPendingCandidates(&state);
 }
 
 }  // namespace
@@ -344,6 +393,8 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     hook_state.search_ctx = omega_search_ctx;
     hook_state.enable_early_stopping =
         !training_mode_enabled_ && !disable_model_prediction;
+    hook_state.per_cmp_reporting = training_mode_enabled_;
+    ResetOmegaHookState(&hook_state);
     HnswAlgorithm::SearchHooks hooks;
     hooks.user_data = &hook_state;
     hooks.on_level0_entry = OnOmegaLevel0Entry;
@@ -356,6 +407,7 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
       LOG_WARN("OMEGA adaptive search failed, falling back to HNSW");
       return HnswSearcher::search_impl(query, qmeta, count, context);
     }
+    MaybeFlushOmegaPendingCandidates(&hook_state);
 
     omega_ctx->topk_to_result(q);
     if (early_stop_hit) {
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index b5f41cdf9..d0f20c9b8 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -89,8 +89,49 @@ struct OmegaHookState {
   bool enable_early_stopping{false};
   bool collect_control_timing{false};
   uint64_t *hook_body_time_ns{nullptr};
+  bool per_cmp_reporting{false};
+  std::vector<omega::SearchContext::VisitCandidate> pending_candidates;
+  int batch_min_interval{1};
 };
 
+void ResetOmegaHookState(OmegaHookState *state) {
+  state->pending_candidates.clear();
+  if (state->search_ctx != nullptr) {
+    state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
+  } else {
+    state->batch_min_interval = 1;
+  }
+}
+
+template <typename Fn>
+bool FlushOmegaPendingCandidates(const OmegaHookState &state,
+                                 int flush_count, Fn &&run_control_hook) {
+  if (state.search_ctx == nullptr || flush_count <= 0 ||
+      state.pending_candidates.empty()) {
+    return false;
+  }
+
+  auto &mutable_state = const_cast<OmegaHookState &>(state);
+  flush_count = std::min(flush_count,
+                         static_cast<int>(mutable_state.pending_candidates.size()));
+  bool should_predict = false;
+  run_control_hook([&]() {
+    should_predict = state.search_ctx->ReportVisitCandidates(
+        mutable_state.pending_candidates.data(), static_cast<size_t>(flush_count));
+  });
+  mutable_state.pending_candidates.erase(mutable_state.pending_candidates.begin(),
+                                        mutable_state.pending_candidates.begin() +
+                                            flush_count);
+  if (!state.enable_early_stopping || !should_predict) {
+    return false;
+  }
+
+  bool should_stop = false;
+  run_control_hook(
+      [&]() { should_stop = state.search_ctx->ShouldStopEarly(); });
+  return should_stop;
+}
+
 template <typename Fn>
 void RunOmegaControlHook(const OmegaHookState &state, Fn &&fn) {
   if (!state.collect_control_timing) {
@@ -105,13 +146,34 @@ void RunOmegaControlHook(const OmegaHookState &state, Fn &&fn) {
   }
 }
 
+bool MaybeFlushOmegaPendingCandidates(const OmegaHookState &state) {
+  auto run_control_hook = [&](auto &&fn) {
+    RunOmegaControlHook(state, std::forward<decltype(fn)>(fn));
+  };
+
+  if (static_cast<int>(state.pending_candidates.size()) <
+      state.batch_min_interval) {
+    return false;
+  }
+  return FlushOmegaPendingCandidates(state, state.batch_min_interval,
+                                     run_control_hook);
+}
+
 void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
                         void *user_data) {
   auto &state = *static_cast<OmegaHookState *>(user_data);
+  if (state.per_cmp_reporting) {
+    RunOmegaControlHook(state, [&]() {
+      state.search_ctx->SetDistStart(dist);
+      state.search_ctx->ReportVisitCandidate(id, dist, true);
+    });
+    return;
+  }
   RunOmegaControlHook(state, [&]() {
     state.search_ctx->SetDistStart(dist);
-    state.search_ctx->ReportVisitCandidate(id, dist, true);
+    state.pending_candidates.push_back({static_cast<int>(id), dist, true});
   });
+  MaybeFlushOmegaPendingCandidates(state);
 }
 
 void OnOmegaHop(void *user_data) {
@@ -122,19 +184,25 @@ void OnOmegaHop(void *user_data) {
 bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
                            bool inserted_to_topk, void *user_data) {
   auto &state = *static_cast<OmegaHookState *>(user_data);
-  bool should_predict = false;
+  if (state.per_cmp_reporting) {
+    bool should_predict = false;
+    RunOmegaControlHook(state, [&]() {
+      should_predict =
+          state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
+    });
+    if (!state.enable_early_stopping || !should_predict) {
+      return false;
+    }
+    bool should_stop = false;
+    RunOmegaControlHook(
+        state, [&]() { should_stop = state.search_ctx->ShouldStopEarly(); });
+    return should_stop;
+  }
   RunOmegaControlHook(state, [&]() {
-    should_predict = state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
+    state.pending_candidates.push_back(
+        {static_cast<int>(id), dist, inserted_to_topk});
   });
-
-  if (!state.enable_early_stopping || !should_predict) {
-    return false;
-  }
-
-  bool should_stop = false;
-  RunOmegaControlHook(
-      state, [&]() { should_stop = state.search_ctx->ShouldStopEarly(); });
-  return should_stop;
+  return MaybeFlushOmegaPendingCandidates(state);
 }
 
 }  // namespace
@@ -333,6 +401,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   hook_state.enable_early_stopping = enable_early_stopping;
   hook_state.collect_control_timing = collect_control_timing;
   hook_state.hook_body_time_ns = &hook_body_time_ns;
+  hook_state.per_cmp_reporting = training_mode_enabled_;
+  ResetOmegaHookState(&hook_state);
   HnswAlgorithm::SearchHooks hooks;
   hooks.user_data = &hook_state;
   hooks.collect_timing = collect_control_timing;
@@ -350,6 +420,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     LOG_ERROR("OMEGA search failed");
     return ret;
   }
+  MaybeFlushOmegaPendingCandidates(hook_state);
   auto query_search_end = RdtscTimer::Now();
 
   // Get final statistics
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 63e487e22..f3a981992 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 63e487e22c67cc55dd900d480ea7ce0a76fab0c1
+Subproject commit f3a9819920ceee185dca0cec1d2a515be0cbc62d
diff --git a/tools/core/hnsw_hooks_microbench.cc b/tools/core/hnsw_hooks_microbench.cc
index f0440ee29..f6b174e66 100644
--- a/tools/core/hnsw_hooks_microbench.cc
+++ b/tools/core/hnsw_hooks_microbench.cc
@@ -144,13 +144,56 @@ bool ParseArgs(int argc, char** argv, Options* opts) {
 struct OmegaHookState {
   omega::SearchContext* search_ctx{nullptr};
   bool enable_early_stopping{false};
+  bool per_cmp_reporting{false};
+  std::vector<omega::SearchContext::VisitCandidate> pending_candidates;
+  int batch_min_interval{1};
 };
 
+void ResetOmegaHookState(OmegaHookState* state) {
+  state->pending_candidates.clear();
+  if (state->search_ctx != nullptr) {
+    state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
+  } else {
+    state->batch_min_interval = 1;
+  }
+}
+
+bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count) {
+  if (state->search_ctx == nullptr || flush_count <= 0 ||
+      state->pending_candidates.empty()) {
+    return false;
+  }
+
+  flush_count = std::min(flush_count,
+                         static_cast<int>(state->pending_candidates.size()));
+  bool should_predict = state->search_ctx->ReportVisitCandidates(
+      state->pending_candidates.data(), static_cast<size_t>(flush_count));
+  state->pending_candidates.erase(state->pending_candidates.begin(),
+                                  state->pending_candidates.begin() + flush_count);
+  if (!state->enable_early_stopping || !should_predict) {
+    return false;
+  }
+  return state->search_ctx->ShouldStopEarly();
+}
+
+bool MaybeFlushOmegaPendingCandidates(OmegaHookState* state) {
+  if (static_cast<int>(state->pending_candidates.size()) <
+      state->batch_min_interval) {
+    return false;
+  }
+  return FlushOmegaPendingCandidates(state, state->batch_min_interval);
+}
+
 void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
                         void* user_data) {
   auto& state = *static_cast<OmegaHookState*>(user_data);
   state.search_ctx->SetDistStart(dist);
-  state.search_ctx->ReportVisitCandidate(id, dist, true);
+  if (state.per_cmp_reporting) {
+    state.search_ctx->ReportVisitCandidate(id, dist, true);
+    return;
+  }
+  state.pending_candidates.push_back({static_cast<int>(id), dist, true});
+  MaybeFlushOmegaPendingCandidates(&state);
 }
 
 void OnOmegaHop(void* user_data) {
@@ -161,12 +204,17 @@ void OnOmegaHop(void* user_data) {
 bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
                            bool should_consider_candidate, void* user_data) {
   auto& state = *static_cast<OmegaHookState*>(user_data);
-  bool should_predict =
-      state.search_ctx->ReportVisitCandidate(id, dist, should_consider_candidate);
-  if (!state.enable_early_stopping || !should_predict) {
-    return false;
+  if (state.per_cmp_reporting) {
+    bool should_predict =
+        state.search_ctx->ReportVisitCandidate(id, dist, should_consider_candidate);
+    if (!state.enable_early_stopping || !should_predict) {
+      return false;
+    }
+    return state.search_ctx->ShouldStopEarly();
   }
-  return state.search_ctx->ShouldStopEarly();
+  state.pending_candidates.push_back(
+      {static_cast<int>(id), dist, should_consider_candidate});
+  return MaybeFlushOmegaPendingCandidates(&state);
 }
 
 struct BenchStats {
@@ -425,6 +473,7 @@ int main(int argc, char** argv) {
   OmegaHookState omega_hook_state;
   omega_hook_state.search_ctx = &omega_search_ctx;
   omega_hook_state.enable_early_stopping = false;
+  ResetOmegaHookState(&omega_hook_state);
   HnswAlgorithm::SearchHooks omega_hooks;
   omega_hooks.user_data = &omega_hook_state;
   omega_hooks.on_level0_entry = OnOmegaLevel0Entry;
@@ -449,9 +498,12 @@ int main(int argc, char** argv) {
     RunBench("alg_fast_search_with_omega_hooks_only", ctx, queries, opts.warmup,
              opts.iterations, [&]() {
                omega_search_ctx.Reset();
+               ResetOmegaHookState(&omega_hook_state);
                bool stopped_early = false;
-               return streamer->FastSearchWithHooks(ctx, &omega_hooks,
-                                                    &stopped_early);
+               int ret = streamer->FastSearchWithHooks(ctx, &omega_hooks,
+                                                       &stopped_early);
+               MaybeFlushOmegaPendingCandidates(&omega_hook_state);
+               return ret;
              });
   }
 

From 7ae1d76b786aca5c24897f2749597554c1a44380 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 26 Mar 2026 19:45:05 +0800
Subject: [PATCH 050/126] Bound omega batching by next prediction cmp

---
 src/core/algorithm/omega/omega_searcher.cc | 68 ++++++++++++++++-----
 src/core/algorithm/omega/omega_streamer.cc | 69 +++++++++++++++++-----
 tools/core/hnsw_hooks_microbench.cc        | 67 +++++++++++++++++----
 3 files changed, 162 insertions(+), 42 deletions(-)

diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 35d6ef1eb..4143c4acd 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -39,34 +39,76 @@ bool DisableOmegaModelPrediction() {
 }
 
 struct OmegaHookState {
+  struct PendingVisitBuffer {
+    std::vector<omega::SearchContext::VisitCandidate> storage;
+    int head{0};
+    int count{0};
+
+    void Reset(int capacity) {
+      head = 0;
+      count = 0;
+      storage.resize(std::max(1, capacity));
+    }
+
+    bool Empty() const { return count == 0; }
+
+    int Capacity() const { return static_cast<int>(storage.size()); }
+
+    void Push(const omega::SearchContext::VisitCandidate& candidate) {
+      storage[(head + count) % Capacity()] = candidate;
+      ++count;
+    }
+
+    const omega::SearchContext::VisitCandidate* Data() const {
+      return storage.data() + head;
+    }
+
+    void Clear() {
+      head = 0;
+      count = 0;
+    }
+  };
+
   omega::SearchContext *search_ctx{nullptr};
   bool enable_early_stopping{false};
   bool per_cmp_reporting{false};
-  std::vector<omega::SearchContext::VisitCandidate> pending_candidates;
+  PendingVisitBuffer pending_candidates;
   int batch_min_interval{1};
 };
 
 void ResetOmegaHookState(OmegaHookState* state) {
-  state->pending_candidates.clear();
   if (state->search_ctx != nullptr) {
     state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
   } else {
     state->batch_min_interval = 1;
   }
+  state->pending_candidates.Reset(state->batch_min_interval);
+}
+
+bool ShouldFlushOmegaPendingCandidates(const OmegaHookState& state) {
+  if (state.pending_candidates.Empty()) {
+    return false;
+  }
+  if (state.pending_candidates.count >= state.batch_min_interval) {
+    return true;
+  }
+  if (state.search_ctx == nullptr) {
+    return false;
+  }
+  return state.search_ctx->GetTotalCmps() + state.pending_candidates.count >=
+         state.search_ctx->GetNextPredictionCmps();
 }
 
 bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count) {
   if (state->search_ctx == nullptr || flush_count <= 0 ||
-      state->pending_candidates.empty()) {
+      state->pending_candidates.Empty()) {
     return false;
   }
 
-  flush_count = std::min(flush_count,
-                         static_cast<int>(state->pending_candidates.size()));
+  flush_count = std::min(flush_count, state->pending_candidates.count);
   bool should_predict = state->search_ctx->ReportVisitCandidates(
-      state->pending_candidates.data(), static_cast<size_t>(flush_count));
-  state->pending_candidates.erase(state->pending_candidates.begin(),
-                                  state->pending_candidates.begin() + flush_count);
+      state->pending_candidates.Data(), static_cast<size_t>(flush_count));
+  state->pending_candidates.Clear();
   if (!state->enable_early_stopping || !should_predict) {
     return false;
   }
@@ -74,11 +116,10 @@ bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count) {
 }
 
 bool MaybeFlushOmegaPendingCandidates(OmegaHookState* state) {
-  if (static_cast<int>(state->pending_candidates.size()) <
-      state->batch_min_interval) {
+  if (!ShouldFlushOmegaPendingCandidates(*state)) {
     return false;
   }
-  return FlushOmegaPendingCandidates(state, state->batch_min_interval);
+  return FlushOmegaPendingCandidates(state, state->pending_candidates.count);
 }
 
 void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
@@ -89,7 +130,7 @@ void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
     state.search_ctx->ReportVisitCandidate(id, dist, true);
     return;
   }
-  state.pending_candidates.push_back({static_cast<int>(id), dist, true});
+  state.pending_candidates.Push({static_cast<int>(id), dist, true});
   MaybeFlushOmegaPendingCandidates(&state);
 }
 
@@ -109,8 +150,7 @@ bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
     }
     return state.search_ctx->ShouldStopEarly();
   }
-  state.pending_candidates.push_back(
-      {static_cast<int>(id), dist, inserted_to_topk});
+  state.pending_candidates.Push({static_cast<int>(id), dist, inserted_to_topk});
   return MaybeFlushOmegaPendingCandidates(&state);
 }
 
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index d0f20c9b8..67981b787 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -85,43 +85,84 @@ uint64_t OmegaProfilingElapsedNs(uint64_t start, uint64_t end) {
 }
 
 struct OmegaHookState {
+  struct PendingVisitBuffer {
+    std::vector<omega::SearchContext::VisitCandidate> storage;
+    int head{0};
+    int count{0};
+
+    void Reset(int capacity) {
+      head = 0;
+      count = 0;
+      storage.resize(std::max(1, capacity));
+    }
+
+    bool Empty() const { return count == 0; }
+
+    int Capacity() const { return static_cast<int>(storage.size()); }
+
+    void Push(const omega::SearchContext::VisitCandidate& candidate) {
+      storage[(head + count) % Capacity()] = candidate;
+      ++count;
+    }
+
+    const omega::SearchContext::VisitCandidate* Data() const {
+      return storage.data() + head;
+    }
+
+    void Clear() {
+      head = 0;
+      count = 0;
+    }
+  };
+
   omega::SearchContext *search_ctx{nullptr};
   bool enable_early_stopping{false};
   bool collect_control_timing{false};
   uint64_t *hook_body_time_ns{nullptr};
   bool per_cmp_reporting{false};
-  std::vector<omega::SearchContext::VisitCandidate> pending_candidates;
+  PendingVisitBuffer pending_candidates;
   int batch_min_interval{1};
 };
 
 void ResetOmegaHookState(OmegaHookState *state) {
-  state->pending_candidates.clear();
   if (state->search_ctx != nullptr) {
     state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
   } else {
     state->batch_min_interval = 1;
   }
+  state->pending_candidates.Reset(state->batch_min_interval);
+}
+
+bool ShouldFlushOmegaPendingCandidates(const OmegaHookState &state) {
+  if (state.pending_candidates.Empty()) {
+    return false;
+  }
+  if (state.pending_candidates.count >= state.batch_min_interval) {
+    return true;
+  }
+  if (state.search_ctx == nullptr) {
+    return false;
+  }
+  return state.search_ctx->GetTotalCmps() + state.pending_candidates.count >=
+         state.search_ctx->GetNextPredictionCmps();
 }
 
 template <typename Fn>
 bool FlushOmegaPendingCandidates(const OmegaHookState &state,
                                  int flush_count, Fn &&run_control_hook) {
   if (state.search_ctx == nullptr || flush_count <= 0 ||
-      state.pending_candidates.empty()) {
+      state.pending_candidates.Empty()) {
     return false;
   }
 
   auto &mutable_state = const_cast<OmegaHookState &>(state);
-  flush_count = std::min(flush_count,
-                         static_cast<int>(mutable_state.pending_candidates.size()));
+  flush_count = std::min(flush_count, mutable_state.pending_candidates.count);
   bool should_predict = false;
   run_control_hook([&]() {
     should_predict = state.search_ctx->ReportVisitCandidates(
-        mutable_state.pending_candidates.data(), static_cast<size_t>(flush_count));
+        mutable_state.pending_candidates.Data(), static_cast<size_t>(flush_count));
   });
-  mutable_state.pending_candidates.erase(mutable_state.pending_candidates.begin(),
-                                        mutable_state.pending_candidates.begin() +
-                                            flush_count);
+  mutable_state.pending_candidates.Clear();
   if (!state.enable_early_stopping || !should_predict) {
     return false;
   }
@@ -151,11 +192,10 @@ bool MaybeFlushOmegaPendingCandidates(const OmegaHookState &state) {
     RunOmegaControlHook(state, std::forward<decltype(fn)>(fn));
   };
 
-  if (static_cast<int>(state.pending_candidates.size()) <
-      state.batch_min_interval) {
+  if (!ShouldFlushOmegaPendingCandidates(state)) {
     return false;
   }
-  return FlushOmegaPendingCandidates(state, state.batch_min_interval,
+  return FlushOmegaPendingCandidates(state, state.pending_candidates.count,
                                      run_control_hook);
 }
 
@@ -171,7 +211,7 @@ void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
   }
   RunOmegaControlHook(state, [&]() {
     state.search_ctx->SetDistStart(dist);
-    state.pending_candidates.push_back({static_cast<int>(id), dist, true});
+    state.pending_candidates.Push({static_cast<int>(id), dist, true});
   });
   MaybeFlushOmegaPendingCandidates(state);
 }
@@ -199,8 +239,7 @@ bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
     return should_stop;
   }
   RunOmegaControlHook(state, [&]() {
-    state.pending_candidates.push_back(
-        {static_cast<int>(id), dist, inserted_to_topk});
+    state.pending_candidates.Push({static_cast<int>(id), dist, inserted_to_topk});
   });
   return MaybeFlushOmegaPendingCandidates(state);
 }
diff --git a/tools/core/hnsw_hooks_microbench.cc b/tools/core/hnsw_hooks_microbench.cc
index f6b174e66..435b4967c 100644
--- a/tools/core/hnsw_hooks_microbench.cc
+++ b/tools/core/hnsw_hooks_microbench.cc
@@ -142,34 +142,76 @@ bool ParseArgs(int argc, char** argv, Options* opts) {
 }
 
 struct OmegaHookState {
+  struct PendingVisitBuffer {
+    std::vector<omega::SearchContext::VisitCandidate> storage;
+    int head{0};
+    int count{0};
+
+    void Reset(int capacity) {
+      head = 0;
+      count = 0;
+      storage.resize(std::max(1, capacity));
+    }
+
+    bool Empty() const { return count == 0; }
+
+    int Capacity() const { return static_cast<int>(storage.size()); }
+
+    void Push(const omega::SearchContext::VisitCandidate& candidate) {
+      storage[(head + count) % Capacity()] = candidate;
+      ++count;
+    }
+
+    const omega::SearchContext::VisitCandidate* Data() const {
+      return storage.data() + head;
+    }
+
+    void Clear() {
+      head = 0;
+      count = 0;
+    }
+  };
+
   omega::SearchContext* search_ctx{nullptr};
   bool enable_early_stopping{false};
   bool per_cmp_reporting{false};
-  std::vector<omega::SearchContext::VisitCandidate> pending_candidates;
+  PendingVisitBuffer pending_candidates;
   int batch_min_interval{1};
 };
 
 void ResetOmegaHookState(OmegaHookState* state) {
-  state->pending_candidates.clear();
   if (state->search_ctx != nullptr) {
     state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
   } else {
     state->batch_min_interval = 1;
   }
+  state->pending_candidates.Reset(state->batch_min_interval);
+}
+
+bool ShouldFlushOmegaPendingCandidates(const OmegaHookState& state) {
+  if (state.pending_candidates.Empty()) {
+    return false;
+  }
+  if (state.pending_candidates.count >= state.batch_min_interval) {
+    return true;
+  }
+  if (state.search_ctx == nullptr) {
+    return false;
+  }
+  return state.search_ctx->GetTotalCmps() + state.pending_candidates.count >=
+         state.search_ctx->GetNextPredictionCmps();
 }
 
 bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count) {
   if (state->search_ctx == nullptr || flush_count <= 0 ||
-      state->pending_candidates.empty()) {
+      state->pending_candidates.Empty()) {
     return false;
   }
 
-  flush_count = std::min(flush_count,
-                         static_cast<int>(state->pending_candidates.size()));
+  flush_count = std::min(flush_count, state->pending_candidates.count);
   bool should_predict = state->search_ctx->ReportVisitCandidates(
-      state->pending_candidates.data(), static_cast<size_t>(flush_count));
-  state->pending_candidates.erase(state->pending_candidates.begin(),
-                                  state->pending_candidates.begin() + flush_count);
+      state->pending_candidates.Data(), static_cast<size_t>(flush_count));
+  state->pending_candidates.Clear();
   if (!state->enable_early_stopping || !should_predict) {
     return false;
   }
@@ -177,11 +219,10 @@ bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count) {
 }
 
 bool MaybeFlushOmegaPendingCandidates(OmegaHookState* state) {
-  if (static_cast<int>(state->pending_candidates.size()) <
-      state->batch_min_interval) {
+  if (!ShouldFlushOmegaPendingCandidates(*state)) {
     return false;
   }
-  return FlushOmegaPendingCandidates(state, state->batch_min_interval);
+  return FlushOmegaPendingCandidates(state, state->pending_candidates.count);
 }
 
 void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
@@ -192,7 +233,7 @@ void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
     state.search_ctx->ReportVisitCandidate(id, dist, true);
     return;
   }
-  state.pending_candidates.push_back({static_cast<int>(id), dist, true});
+  state.pending_candidates.Push({static_cast<int>(id), dist, true});
   MaybeFlushOmegaPendingCandidates(&state);
 }
 
@@ -212,7 +253,7 @@ bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
     }
     return state.search_ctx->ShouldStopEarly();
   }
-  state.pending_candidates.push_back(
+  state.pending_candidates.Push(
       {static_cast<int>(id), dist, should_consider_candidate});
   return MaybeFlushOmegaPendingCandidates(&state);
 }

From 7fc1894eeff4102ef08e6e6c9d9b1b98a1da7371 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 26 Mar 2026 19:59:39 +0800
Subject: [PATCH 051/126] Update OMEGALib weighted BH cache

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index f3a981992..f60a8d04f 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit f3a9819920ceee185dca0cec1d2a515be0cbc62d
+Subproject commit f60a8d04fe671f65b52105a01982347de949379a

From 5c453ac4ac4b7799330527506453a766584ca9f5 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 26 Mar 2026 20:17:19 +0800
Subject: [PATCH 052/126] Add latency percentiles to HNSW/OMEGA benchmark
 summary

---
 scripts/benchmark_hnsw_vs_omega.py | 137 +++++++++++++++++++++++++++--
 1 file changed, 129 insertions(+), 8 deletions(-)

diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index 2a674287a..778cf44cb 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -29,6 +29,11 @@ class BenchmarkResult:
     load_duration: float | None = None
     qps: float | None = None
     recall: float | None = None
+    avg_latency_ms: float | None = None
+    p50_latency_ms: float | None = None
+    p90_latency_ms: float | None = None
+    p95_latency_ms: float | None = None
+    p99_latency_ms: float | None = None
     profiling: dict | None = None
 
 
@@ -168,6 +173,22 @@ def avg_metric(records: list[dict[str, Any]], key: str) -> float | None:
     return sum(values) / len(values)
 
 
+def percentile_metric(records: list[dict[str, Any]], key: str, percentile: float) -> float | None:
+    values = sorted(float(record[key]) for record in records if key in record)
+    if not values:
+        return None
+    if len(values) == 1:
+        return values[0]
+
+    rank = (len(values) - 1) * percentile / 100.0
+    lower = int(rank)
+    upper = min(lower + 1, len(values) - 1)
+    if lower == upper:
+        return values[lower]
+    weight = rank - lower
+    return values[lower] * (1.0 - weight) + values[upper] * weight
+
+
 def parse_serial_runner_summary(output: str) -> dict[str, Any]:
     summary = {}
     for line in output.splitlines():
@@ -189,11 +210,20 @@ def parse_query_records(output: str, prefix: str) -> list[dict[str, Any]]:
 def build_hnsw_profile(metrics: dict[str, Any], output: str) -> dict[str, Any]:
     query_records = parse_query_records(output, "HNSW query stats:")
     serial_summary = parse_serial_runner_summary(output)
+    avg_latency_ms = avg_metric(query_records, "latency_ms")
+    p50_latency_ms = percentile_metric(query_records, "latency_ms", 50)
+    p90_latency_ms = percentile_metric(query_records, "latency_ms", 90)
+    p95_latency_ms = percentile_metric(query_records, "latency_ms", 95)
+    p99_latency_ms = percentile_metric(query_records, "latency_ms", 99)
     return {
         "benchmark_recall": metrics.get("recall"),
         "benchmark_qps": metrics.get("qps"),
         "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
+        "profile_avg_end2end_latency_ms": avg_latency_ms,
+        "profile_p50_end2end_latency_ms": p50_latency_ms,
+        "profile_p90_end2end_latency_ms": p90_latency_ms,
+        "profile_p95_end2end_latency_ms": p95_latency_ms,
+        "profile_p99_end2end_latency_ms": p99_latency_ms,
         "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
         "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
         "profile_avg_pure_search_ms": avg_metric(query_records, "pure_search_ms"),
@@ -209,6 +239,11 @@ def build_omega_profile(
 ) -> dict[str, Any]:
     query_records = parse_query_records(output, "OMEGA query stats:")
     serial_summary = parse_serial_runner_summary(output)
+    avg_latency_ms = avg_metric(query_records, "total_ms")
+    p50_latency_ms = percentile_metric(query_records, "total_ms", 50)
+    p90_latency_ms = percentile_metric(query_records, "total_ms", 90)
+    p95_latency_ms = percentile_metric(query_records, "total_ms", 95)
+    p99_latency_ms = percentile_metric(query_records, "total_ms", 99)
 
     avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
     avg_core_search_ms = avg_metric(query_records, "core_search_ms")
@@ -238,7 +273,11 @@ def build_omega_profile(
         "benchmark_recall": metrics.get("recall"),
         "benchmark_qps": metrics.get("qps"),
         "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": avg_metric(query_records, "total_ms"),
+        "profile_avg_end2end_latency_ms": avg_latency_ms,
+        "profile_p50_end2end_latency_ms": p50_latency_ms,
+        "profile_p90_end2end_latency_ms": p90_latency_ms,
+        "profile_p95_end2end_latency_ms": p95_latency_ms,
+        "profile_p99_end2end_latency_ms": p99_latency_ms,
         "profile_avg_cmps": avg_pairwise_dist_cnt,
         "profile_avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
         "profile_avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
@@ -298,6 +337,11 @@ def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResul
                         "path": result.path,
                         "load_duration_s": result.load_duration,
                         "qps": result.qps,
+                        "avg_latency_ms": result.avg_latency_ms,
+                        "p50_latency_ms": result.p50_latency_ms,
+                        "p90_latency_ms": result.p90_latency_ms,
+                        "p95_latency_ms": result.p95_latency_ms,
+                        "p99_latency_ms": result.p99_latency_ms,
                         "recall": result.recall,
                         "profiling": result.profiling,
                     }
@@ -333,6 +377,9 @@ def get_latest_result(db_label: str, results_dir: Path) -> dict[str, Any]:
                         "optimize_duration": metrics.get("optimize_duration"),
                         "load_duration": metrics.get("load_duration"),
                         "qps": metrics.get("qps"),
+                        "avg_latency_ms": metrics.get("serial_latency_avg"),
+                        "p95_latency_ms": metrics.get("serial_latency_p95"),
+                        "p99_latency_ms": metrics.get("serial_latency_p99"),
                         "recall": metrics.get("recall"),
                     }
         except Exception:
@@ -340,6 +387,42 @@ def get_latest_result(db_label: str, results_dir: Path) -> dict[str, Any]:
     return {}
 
 
+def latency_summary_from_profile(profile: dict[str, Any] | None) -> dict[str, float | None]:
+    profile = profile or {}
+    return {
+        "avg_latency_ms": profile.get("profile_avg_end2end_latency_ms"),
+        "p50_latency_ms": profile.get("profile_p50_end2end_latency_ms"),
+        "p90_latency_ms": profile.get("profile_p90_end2end_latency_ms"),
+        "p95_latency_ms": profile.get("profile_p95_end2end_latency_ms"),
+        "p99_latency_ms": profile.get("profile_p99_end2end_latency_ms"),
+    }
+
+
+def merge_omega_detailed_profile(
+    summary_profile: dict[str, Any], detailed_profile: dict[str, Any]
+) -> dict[str, Any]:
+    merged = dict(summary_profile)
+    detailed_keys = [
+        "profile_avg_model_overhead_ms",
+        "profile_avg_should_stop_ms",
+        "profile_avg_prediction_eval_ms",
+        "profile_avg_core_search_ms",
+        "profile_avg_pure_search_ms",
+        "profile_avg_hook_total_ms",
+        "profile_avg_hook_body_ms",
+        "profile_avg_hook_dispatch_ms",
+        "profile_avg_report_visit_candidate_ms",
+        "profile_avg_should_predict_ms",
+        "profile_avg_report_hop_ms",
+        "profile_avg_update_top_candidates_ms",
+        "profile_avg_push_traversal_window_ms",
+        "profile_avg_model_overhead_cmp_equiv",
+    ]
+    for key in detailed_keys:
+        merged[key] = detailed_profile.get(key)
+    return merged
+
+
 def snapshot_result_files(results_dir: Path) -> set[str]:
     if not results_dir.exists():
         return set()
@@ -738,6 +821,7 @@ def main() -> int:
                 )
                 validate_profile_output("HNSW", profile_ret, profile_output, "HNSW query stats:")
                 hnsw_profile = build_hnsw_profile(metrics, profile_output)
+            latency_summary = latency_summary_from_profile(hnsw_profile)
             results.append(
                 BenchmarkResult(
                     type="HNSW",
@@ -746,6 +830,11 @@ def main() -> int:
                     target_recall=None,
                     load_duration=load_duration if load_duration is not None else metrics.get("load_duration"),
                     qps=metrics.get("qps"),
+                    avg_latency_ms=latency_summary["avg_latency_ms"],
+                    p50_latency_ms=latency_summary["p50_latency_ms"],
+                    p90_latency_ms=latency_summary["p90_latency_ms"],
+                    p95_latency_ms=latency_summary["p95_latency_ms"],
+                    p99_latency_ms=latency_summary["p99_latency_ms"],
                     recall=metrics.get("recall"),
                     profiling=hnsw_profile,
                 )
@@ -812,7 +901,7 @@ def main() -> int:
                 load_duration = get_offline_load_duration(omega_path)
                 omega_profile = None
                 if ret == 0 and not args.dry_run:
-                    print("\n[Profiling] Running OMEGA serial-only profiling pass...")
+                    print("\n[Profiling] Running OMEGA serial-only latency pass...")
                     profile_flags = ["skip-drop-old", "skip-load", "skip-search-concurrent"]
                     if args.retrain_only:
                         profile_flags.append("retrain-only")
@@ -831,8 +920,6 @@ def main() -> int:
                         "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
                         "ZVEC_OMEGA_LOG_QUERY_LIMIT": str(profiling_config.get("omega_query_limit", 2000)),
                     }
-                    if profiling_config.get("omega_profile_control_timing", True):
-                        profile_env["ZVEC_OMEGA_PROFILE_CONTROL_TIMING"] = "1"
                     profile_ret, profile_output = run_command_capture(
                         profile_cmd,
                         vectordbbench_root,
@@ -845,6 +932,26 @@ def main() -> int:
                         None,
                     )
                     omega_profile = build_omega_profile(metrics, profile_output, baseline_profile)
+                    if profiling_config.get("omega_profile_control_timing", True):
+                        print("\n[Profiling] Running OMEGA detailed control-timing pass...")
+                        detailed_env = dict(profile_env)
+                        detailed_env["ZVEC_OMEGA_PROFILE_CONTROL_TIMING"] = "1"
+                        detailed_ret, detailed_output = run_command_capture(
+                            profile_cmd,
+                            vectordbbench_root,
+                            dry_run=False,
+                            extra_env=detailed_env,
+                        )
+                        validate_profile_output(
+                            "OMEGA", detailed_ret, detailed_output, "OMEGA query stats:"
+                        )
+                        detailed_profile = build_omega_profile(
+                            metrics, detailed_output, baseline_profile
+                        )
+                        omega_profile = merge_omega_detailed_profile(
+                            omega_profile, detailed_profile
+                        )
+                latency_summary = latency_summary_from_profile(omega_profile)
                 results.append(
                     BenchmarkResult(
                         type="OMEGA",
@@ -853,6 +960,11 @@ def main() -> int:
                         target_recall=target_recall,
                         load_duration=load_duration if load_duration is not None else metrics.get("load_duration"),
                         qps=metrics.get("qps"),
+                        avg_latency_ms=latency_summary["avg_latency_ms"],
+                        p50_latency_ms=latency_summary["p50_latency_ms"],
+                        p90_latency_ms=latency_summary["p90_latency_ms"],
+                        p95_latency_ms=latency_summary["p95_latency_ms"],
+                        p99_latency_ms=latency_summary["p99_latency_ms"],
                         recall=metrics.get("recall"),
                         profiling=omega_profile,
                     )
@@ -863,15 +975,24 @@ def main() -> int:
         print("\n\n" + "=" * 70)
         print("Benchmark Summary")
         print("=" * 70)
-        print(f"{'Type':<10} {'target_recall':<15} {'load_dur(s)':<12} {'qps':<12} {'recall':<10} {'Status':<10}")
-        print("-" * 75)
+        print(
+            f"{'Type':<10} {'target_recall':<15} {'load_dur(s)':<12} "
+            f"{'qps':<8} {'avg_latency(ms)':<16} {'p95_latency(ms)':<16} "
+            f"{'recall':<10} {'Status':<10}"
+        )
+        print("-" * 100)
         for result in results:
             tr = f"{result.target_recall:.2f}" if result.target_recall is not None else "N/A"
             status = "OK" if result.success else "FAILED"
             ld = f"{result.load_duration:.1f}" if result.load_duration else "N/A"
             qps = f"{result.qps:.1f}" if result.qps else "N/A"
+            avg_latency = f"{result.avg_latency_ms:.3f}" if result.avg_latency_ms is not None else "N/A"
+            p95_latency = f"{result.p95_latency_ms:.3f}" if result.p95_latency_ms is not None else "N/A"
             recall = f"{result.recall:.4f}" if result.recall else "N/A"
-            print(f"{result.type:<10} {tr:<15} {ld:<12} {qps:<12} {recall:<10} {status:<10}")
+            print(
+                f"{result.type:<10} {tr:<15} {ld:<12} {qps:<8} "
+                f"{avg_latency:<16} {p95_latency:<16} {recall:<10} {status:<10}"
+            )
 
         print("\nProfiling Summary")
         print("-" * 75)

From 08bde0c5099acb913e0881c22f1c3415b9a823b0 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 26 Mar 2026 23:41:05 +0800
Subject: [PATCH 053/126] Update OMEGALib conservative avg recall merge

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index f60a8d04f..4df5688a5 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit f60a8d04fe671f65b52105a01982347de949379a
+Subproject commit 4df5688a5ce0410ca9581ec2f1251d404b758a9b

From 2a4ed6a5991cd797b6a5f1af25346f08d7e56a6b Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 27 Mar 2026 00:43:45 +0800
Subject: [PATCH 054/126] Add OMEGA training and stop diagnostics

---
 src/core/algorithm/omega/omega_streamer.cc | 11 ++++++++---
 thirdparty/omega/OMEGALib                  |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 67981b787..ffcea1c25 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -466,6 +466,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   int hops, cmps, collected_gt;
   float predicted_recall_avg = 0.0f;
   float predicted_recall_at_target = 0.0f;
+  float true_recall_avg = -1.0f;
   int omega_early_stop_hit = 0;
   unsigned long long should_stop_calls = 0;
   unsigned long long prediction_calls = 0;
@@ -497,6 +498,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   predicted_recall_avg = omega_search_ctx->GetLastPredictedRecallAvg();
   predicted_recall_at_target =
       omega_search_ctx->GetLastPredictedRecallAtTarget();
+  true_recall_avg = omega_search_ctx->GetLastTrueRecallAvg();
   omega_early_stop_hit = omega_search_ctx->EarlyStopHit() ? 1 : 0;
   should_stop_calls = omega_search_ctx->GetShouldStopCalls();
   prediction_calls = omega_search_ctx->GetPredictionCalls();
@@ -540,7 +542,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       LOG_INFO("OMEGA runtime stats: model_loaded=%d target_recall=%.4f "
                "scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d "
                "collected_gt=%d predicted_recall_avg=%.4f "
-               "predicted_recall_at_target=%.4f early_stop_hit=%d "
+               "predicted_recall_at_target=%.4f true_recall_avg=%.4f early_stop_hit=%d "
                "should_stop_calls=%llu prediction_calls=%llu "
                "advance_calls=%llu collected_gt_advance=%llu "
                "max_pred_per_stop=%llu should_stop_ms=%.3f "
@@ -549,6 +551,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
                collected_gt,
                predicted_recall_avg, predicted_recall_at_target,
+               true_recall_avg,
                (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
                should_stop_calls, prediction_calls,
                should_stop_calls_with_advance, collected_gt_advance_count,
@@ -560,7 +563,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       if (collect_control_timing) {
         LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
                  "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
-                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
+                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f true_recall_avg=%.4f "
                  "early_stop_hit=%d should_stop_calls=%llu "
                  "prediction_calls=%llu advance_calls=%llu "
                  "collected_gt_advance=%llu max_pred_per_stop=%llu "
@@ -576,6 +579,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
                  collected_gt,
                  predicted_recall_avg, predicted_recall_at_target,
+                 true_recall_avg,
                  (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
                  should_stop_calls, prediction_calls,
                  should_stop_calls_with_advance, collected_gt_advance_count,
@@ -597,7 +601,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       } else {
         LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
                  "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
-                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
+                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f true_recall_avg=%.4f "
                  "early_stop_hit=%d should_stop_calls=%llu "
                  "prediction_calls=%llu advance_calls=%llu "
                  "collected_gt_advance=%llu max_pred_per_stop=%llu "
@@ -609,6 +613,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
                  collected_gt,
                  predicted_recall_avg, predicted_recall_at_target,
+                 true_recall_avg,
                  (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
                  should_stop_calls, prediction_calls,
                  should_stop_calls_with_advance, collected_gt_advance_count,
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 4df5688a5..81da9a795 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 4df5688a5ce0410ca9581ec2f1251d404b758a9b
+Subproject commit 81da9a79578559c99328b3808adfdfc87815ba67

From 209cdaef4a52f2d272ec0430f935a75815b0af76 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 27 Mar 2026 00:49:12 +0800
Subject: [PATCH 055/126] Update OMEGALib after diagnostics fix

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 81da9a795..e6e527df1 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 81da9a79578559c99328b3808adfdfc87815ba67
+Subproject commit e6e527df1e9cc0f59ee37f04c50bf225036e2199

From 567ea419ac25b4685428eec947e20b6e64e9de8d Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 27 Mar 2026 02:02:10 +0800
Subject: [PATCH 056/126] Make OMEGA k_train configurable

---
 python/zvec/model/param/__init__.pyi          | 50 +++++++++++++++++++
 .../python/model/param/python_param.cc        | 29 +++++++----
 src/db/index/common/proto_converter.cc        |  4 +-
 src/db/index/segment/segment.cc               | 15 ++++--
 src/db/proto/zvec.proto                       |  3 +-
 src/include/zvec/db/index_params.h            | 21 ++++++--
 6 files changed, 102 insertions(+), 20 deletions(-)

diff --git a/python/zvec/model/param/__init__.pyi b/python/zvec/model/param/__init__.pyi
index ed32d99b8..b17c08314 100644
--- a/python/zvec/model/param/__init__.pyi
+++ b/python/zvec/model/param/__init__.pyi
@@ -565,6 +565,12 @@ class OmegaIndexParam(VectorIndexParam):
         m: typing.SupportsInt = 100,
         ef_construction: typing.SupportsInt = 500,
         quantize_type: _zvec.typing.QuantizeType = ...,
+        min_vector_threshold: typing.SupportsInt = 100000,
+        num_training_queries: typing.SupportsInt = 1000,
+        ef_training: typing.SupportsInt = 1000,
+        window_size: typing.SupportsInt = 100,
+        ef_groundtruth: typing.SupportsInt = 0,
+        k_train: typing.SupportsInt = 1,
     ) -> None:
         """
         Constructs an OmegaIndexParam instance.
@@ -576,6 +582,14 @@ class OmegaIndexParam(VectorIndexParam):
                 Defaults to 500.
             quantize_type (QuantizeType, optional): Vector quantization type.
                 Defaults to QuantizeType.UNDEFINED.
+            min_vector_threshold (int, optional): Minimum doc count required to
+                enable OMEGA training.
+            num_training_queries (int, optional): Number of sampled queries for training.
+            ef_training (int, optional): Search ef used when collecting training records.
+            window_size (int, optional): Traversal window size.
+            ef_groundtruth (int, optional): ef used to compute training ground truth.
+            k_train (int, optional): Number of top GT results required for a
+                positive training label.
         """
 
     def __repr__(self) -> str: ...
@@ -597,6 +611,42 @@ class OmegaIndexParam(VectorIndexParam):
         int: Maximum number of neighbors per node in upper layers.
         """
 
+    @property
+    def min_vector_threshold(self) -> int:
+        """
+        int: Minimum vectors required to enable OMEGA optimization.
+        """
+
+    @property
+    def num_training_queries(self) -> int:
+        """
+        int: Number of sampled queries used for OMEGA training.
+        """
+
+    @property
+    def ef_training(self) -> int:
+        """
+        int: Search ef used when collecting training records.
+        """
+
+    @property
+    def window_size(self) -> int:
+        """
+        int: Traversal window size used by OMEGA.
+        """
+
+    @property
+    def ef_groundtruth(self) -> int:
+        """
+        int: ef used for ground truth computation (0 means brute force).
+        """
+
+    @property
+    def k_train(self) -> int:
+        """
+        int: Number of top GT results required for a positive training label.
+        """
+
 class OmegaQueryParam(HnswQueryParam):
     """
 
diff --git a/src/binding/python/model/param/python_param.cc b/src/binding/python/model/param/python_param.cc
index 73bb9b5e0..5e5d323c0 100644
--- a/src/binding/python/model/param/python_param.cc
+++ b/src/binding/python/model/param/python_param.cc
@@ -581,6 +581,9 @@ predict when to stop searching.
         ground truth for training. If 0, brute force search is used (slower but exact).
         If > 0, HNSW search with this ef is used (faster but approximate).
         Default is 0 (brute force).
+    k_train (int): Number of top ground-truth results that must be present in
+        the current top-k before a training record is labeled positive.
+        Default is 1.
 
 Examples:
     >>> from zvec.typing import MetricType, QuantizeType
@@ -592,13 +595,14 @@ predict when to stop searching.
     ...     num_training_queries=500,
     ...     ef_training=800,
     ...     window_size=100,
-    ...     ef_groundtruth=2000  # Use HNSW for faster ground truth computation
+    ...     ef_groundtruth=2000,  # Use HNSW for faster ground truth computation
+    ...     k_train=10
     ... )
-    >>> print(params.ef_groundtruth)
-    2000
+    >>> print(params.k_train)
+    10
 )pbdoc");
   omega_params
-      .def(py::init<MetricType, int, int, QuantizeType, uint32_t, size_t, int, int, int>(),
+      .def(py::init<MetricType, int, int, QuantizeType, uint32_t, size_t, int, int, int, int>(),
            py::arg("metric_type") = MetricType::IP,
            py::arg("m") = core_interface::kDefaultHnswNeighborCnt,
            py::arg("ef_construction") =
@@ -608,7 +612,8 @@ predict when to stop searching.
            py::arg("num_training_queries") = 1000,
            py::arg("ef_training") = 1000,
            py::arg("window_size") = 100,
-           py::arg("ef_groundtruth") = 0)
+           py::arg("ef_groundtruth") = 0,
+           py::arg("k_train") = 1)
       .def_property_readonly(
           "m", &OmegaIndexParams::m,
           "int: Maximum number of neighbors per node in upper layers.")
@@ -630,6 +635,9 @@ predict when to stop searching.
       .def_property_readonly(
           "ef_groundtruth", &OmegaIndexParams::ef_groundtruth,
           "int: ef for ground truth computation (0=brute force, >0=HNSW).")
+      .def_property_readonly(
+          "k_train", &OmegaIndexParams::k_train,
+          "int: Number of top GT results required for a positive training label.")
       .def(
           "to_dict",
           [](const OmegaIndexParams &self) -> py::dict {
@@ -643,6 +651,7 @@ predict when to stop searching.
             dict["ef_training"] = self.ef_training();
             dict["window_size"] = self.window_size();
             dict["ef_groundtruth"] = self.ef_groundtruth();
+            dict["k_train"] = self.k_train();
             dict["quantize_type"] =
                 quantize_type_to_string(self.quantize_type());
             return dict;
@@ -666,6 +675,8 @@ predict when to stop searching.
                     std::to_string(self.window_size()) +
                     ", \"ef_groundtruth\":" +
                     std::to_string(self.ef_groundtruth()) +
+                    ", \"k_train\":" +
+                    std::to_string(self.k_train()) +
                     ", \"quantize_type\":" +
                     quantize_type_to_string(self.quantize_type()) + "}";
            })
@@ -676,15 +687,15 @@ predict when to stop searching.
                                   self.min_vector_threshold(),
                                   self.num_training_queries(),
                                   self.ef_training(), self.window_size(),
-                                  self.ef_groundtruth());
+                                  self.ef_groundtruth(), self.k_train());
           },
           [](py::tuple t) {
             if (t.size() == 10) {
               return std::make_shared<OmegaIndexParams>(
                   t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
                   t[3].cast<QuantizeType>(), t[4].cast<uint32_t>(),
-                  t[6].cast<size_t>(), t[7].cast<int>(), t[8].cast<int>(),
-                  t[9].cast<int>());
+                  t[5].cast<size_t>(), t[6].cast<int>(), t[7].cast<int>(),
+                  t[8].cast<int>(), t[9].cast<int>());
             }
             if (t.size() != 9)
               throw std::runtime_error("Invalid state for OmegaIndexParams");
@@ -692,7 +703,7 @@ predict when to stop searching.
                 t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
                 t[3].cast<QuantizeType>(), t[4].cast<uint32_t>(),
                 t[5].cast<size_t>(), t[6].cast<int>(), t[7].cast<int>(),
-                t[8].cast<int>());
+                t[8].cast<int>(), 1);
           }));
 }
 
diff --git a/src/db/index/common/proto_converter.cc b/src/db/index/common/proto_converter.cc
index f09f3d7b3..935b1f6ee 100644
--- a/src/db/index/common/proto_converter.cc
+++ b/src/db/index/common/proto_converter.cc
@@ -86,7 +86,8 @@ OmegaIndexParams::OPtr ProtoConverter::FromPb(
       params_pb.num_training_queries(),
       params_pb.ef_training(),
       params_pb.window_size(),
-      params_pb.ef_groundtruth());
+      params_pb.ef_groundtruth(),
+      params_pb.k_train());
 
   return params;
 }
@@ -104,6 +105,7 @@ proto::OmegaIndexParams ProtoConverter::ToPb(const OmegaIndexParams *params) {
   params_pb.set_ef_training(params->ef_training());
   params_pb.set_window_size(params->window_size());
   params_pb.set_ef_groundtruth(params->ef_groundtruth());
+  params_pb.set_k_train(params->k_train());
   return params_pb;
 }
 
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index 6508283bf..1704b724e 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -1769,12 +1769,14 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
     size_t num_training_queries = 1000;  // default
     int ef_training = 1000;  // default
     int ef_groundtruth = 0;  // default: brute force
+    int k_train = 1;  // default: top-1 label
     if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field.index_params())) {
       num_training_queries = omega_params->num_training_queries();
       ef_training = omega_params->ef_training();
       ef_groundtruth = omega_params->ef_groundtruth();
-      LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d",
-               num_training_queries, ef_training, ef_groundtruth);
+      k_train = omega_params->k_train();
+      LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d, k_train=%d",
+               num_training_queries, ef_training, ef_groundtruth, k_train);
     }
 
     // Collect training data using the current indexer (in-memory graph still exists)
@@ -1784,7 +1786,7 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
     collector_opts.ef_training = ef_training;
     collector_opts.ef_groundtruth = ef_groundtruth;
     collector_opts.topk = 100;
-    collector_opts.k_train = 1;  // Label=1 when top-1 GT found
+    collector_opts.k_train = k_train;
 
     // Use the current vector_indexer which still has the in-memory graph
     std::vector<VectorColumnIndexer::Ptr> training_indexers = {vector_indexer};
@@ -2466,6 +2468,7 @@ Status SegmentImpl::auto_train_omega_index_internal(
   int ef_training = 1000;  // default
   int ef_groundtruth = 0;  // default: brute force
   uint32_t min_vector_threshold = 100000;  // default
+  int k_train = 1;  // default: top-1 label
   auto field = collection_schema_->get_field(field_name);
   if (field && field->index_params()) {
     if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field->index_params())) {
@@ -2473,8 +2476,9 @@ Status SegmentImpl::auto_train_omega_index_internal(
       ef_training = omega_params->ef_training();
       ef_groundtruth = omega_params->ef_groundtruth();
       min_vector_threshold = omega_params->min_vector_threshold();
-      LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d, min_vector_threshold=%u",
-               num_training_queries, ef_training, ef_groundtruth, min_vector_threshold);
+      k_train = omega_params->k_train();
+      LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d, min_vector_threshold=%u, k_train=%d",
+               num_training_queries, ef_training, ef_groundtruth, min_vector_threshold, k_train);
     }
   }
 
@@ -2501,6 +2505,7 @@ Status SegmentImpl::auto_train_omega_index_internal(
   collector_options.ef_training = ef_training;
   collector_options.ef_groundtruth = ef_groundtruth;
   collector_options.topk = 100;
+  collector_options.k_train = k_train;
   collector_options.noise_scale = 0.01f;
 
   std::vector<std::vector<float>> cached_queries;
diff --git a/src/db/proto/zvec.proto b/src/db/proto/zvec.proto
index a1de4a404..55a012abc 100644
--- a/src/db/proto/zvec.proto
+++ b/src/db/proto/zvec.proto
@@ -110,6 +110,7 @@ message OmegaIndexParams {
   int32 ef_training = 7;
   int32 window_size = 8;
   int32 ef_groundtruth = 9;  // 0 = brute force, >0 = HNSW with this ef
+  int32 k_train = 10;
 }
 
 message IndexParams {
@@ -186,4 +187,4 @@ message Manifest {
   uint32 delete_snapshot_path_suffix = 7;
 
   uint32 next_segment_id = 8;
-};
\ No newline at end of file
+};
diff --git a/src/include/zvec/db/index_params.h b/src/include/zvec/db/index_params.h
index db781977c..73a93be93 100644
--- a/src/include/zvec/db/index_params.h
+++ b/src/include/zvec/db/index_params.h
@@ -327,7 +327,8 @@ class OmegaIndexParams : public VectorIndexParams {
       size_t num_training_queries = 1000,
       int ef_training = 1000,
       int window_size = 100,
-      int ef_groundtruth = 0)  // 0 means use brute force, >0 means use HNSW with this ef
+      int ef_groundtruth = 0,
+      int k_train = 1)  // 0 means use brute force, >0 means use HNSW with this ef
       : VectorIndexParams(IndexType::OMEGA, metric_type, quantize_type),
         m_(m),
         ef_construction_(ef_construction),
@@ -335,7 +336,8 @@ class OmegaIndexParams : public VectorIndexParams {
         num_training_queries_(num_training_queries),
         ef_training_(ef_training),
         window_size_(window_size),
-        ef_groundtruth_(ef_groundtruth) {}
+        ef_groundtruth_(ef_groundtruth),
+        k_train_(k_train) {}
 
   using OPtr = std::shared_ptr<OmegaIndexParams>;
 
@@ -344,7 +346,8 @@ class OmegaIndexParams : public VectorIndexParams {
     return std::make_shared<OmegaIndexParams>(metric_type_, m_, ef_construction_,
                                              quantize_type_, min_vector_threshold_,
                                              num_training_queries_, ef_training_,
-                                             window_size_, ef_groundtruth_);
+                                             window_size_, ef_groundtruth_,
+                                             k_train_);
   }
 
   std::string to_string() const override {
@@ -356,7 +359,8 @@ class OmegaIndexParams : public VectorIndexParams {
         << ",num_training_queries:" << num_training_queries_
         << ",ef_training:" << ef_training_
         << ",window_size:" << window_size_
-        << ",ef_groundtruth:" << ef_groundtruth_ << "}";
+        << ",ef_groundtruth:" << ef_groundtruth_
+        << ",k_train:" << k_train_ << "}";
     return oss.str();
   }
 
@@ -377,6 +381,8 @@ class OmegaIndexParams : public VectorIndexParams {
                static_cast<const OmegaIndexParams &>(other).window_size_ &&
            ef_groundtruth_ ==
                static_cast<const OmegaIndexParams &>(other).ef_groundtruth_ &&
+           k_train_ ==
+               static_cast<const OmegaIndexParams &>(other).k_train_ &&
            quantize_type() ==
                static_cast<const OmegaIndexParams &>(other).quantize_type();
   }
@@ -423,6 +429,12 @@ class OmegaIndexParams : public VectorIndexParams {
   int ef_groundtruth() const {
     return ef_groundtruth_;
   }
+  void set_k_train(int k_train) {
+    k_train_ = k_train;
+  }
+  int k_train() const {
+    return k_train_;
+  }
 
  private:
   int m_;
@@ -432,6 +444,7 @@ class OmegaIndexParams : public VectorIndexParams {
   int ef_training_;
   int window_size_;
   int ef_groundtruth_;  // 0 = brute force, >0 = use HNSW with this ef
+  int k_train_;
 };
 
 }  // namespace zvec

From 97c9722a87a7d831b4e5ce74dea50e7c035dbe82 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 27 Mar 2026 02:10:37 +0800
Subject: [PATCH 057/126] Drop extra omega runtime diagnostics

---
 src/core/algorithm/omega/omega_streamer.cc | 11 +++--------
 thirdparty/omega/OMEGALib                  |  2 +-
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index ffcea1c25..67981b787 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -466,7 +466,6 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   int hops, cmps, collected_gt;
   float predicted_recall_avg = 0.0f;
   float predicted_recall_at_target = 0.0f;
-  float true_recall_avg = -1.0f;
   int omega_early_stop_hit = 0;
   unsigned long long should_stop_calls = 0;
   unsigned long long prediction_calls = 0;
@@ -498,7 +497,6 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   predicted_recall_avg = omega_search_ctx->GetLastPredictedRecallAvg();
   predicted_recall_at_target =
       omega_search_ctx->GetLastPredictedRecallAtTarget();
-  true_recall_avg = omega_search_ctx->GetLastTrueRecallAvg();
   omega_early_stop_hit = omega_search_ctx->EarlyStopHit() ? 1 : 0;
   should_stop_calls = omega_search_ctx->GetShouldStopCalls();
   prediction_calls = omega_search_ctx->GetPredictionCalls();
@@ -542,7 +540,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       LOG_INFO("OMEGA runtime stats: model_loaded=%d target_recall=%.4f "
                "scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d "
                "collected_gt=%d predicted_recall_avg=%.4f "
-               "predicted_recall_at_target=%.4f true_recall_avg=%.4f early_stop_hit=%d "
+               "predicted_recall_at_target=%.4f early_stop_hit=%d "
                "should_stop_calls=%llu prediction_calls=%llu "
                "advance_calls=%llu collected_gt_advance=%llu "
                "max_pred_per_stop=%llu should_stop_ms=%.3f "
@@ -551,7 +549,6 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
                collected_gt,
                predicted_recall_avg, predicted_recall_at_target,
-               true_recall_avg,
                (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
                should_stop_calls, prediction_calls,
                should_stop_calls_with_advance, collected_gt_advance_count,
@@ -563,7 +560,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       if (collect_control_timing) {
         LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
                  "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
-                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f true_recall_avg=%.4f "
+                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
                  "early_stop_hit=%d should_stop_calls=%llu "
                  "prediction_calls=%llu advance_calls=%llu "
                  "collected_gt_advance=%llu max_pred_per_stop=%llu "
@@ -579,7 +576,6 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
                  collected_gt,
                  predicted_recall_avg, predicted_recall_at_target,
-                 true_recall_avg,
                  (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
                  should_stop_calls, prediction_calls,
                  should_stop_calls_with_advance, collected_gt_advance_count,
@@ -601,7 +597,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
       } else {
         LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
                  "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
-                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f true_recall_avg=%.4f "
+                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
                  "early_stop_hit=%d should_stop_calls=%llu "
                  "prediction_calls=%llu advance_calls=%llu "
                  "collected_gt_advance=%llu max_pred_per_stop=%llu "
@@ -613,7 +609,6 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                  static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
                  collected_gt,
                  predicted_recall_avg, predicted_recall_at_target,
-                 true_recall_avg,
                  (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
                  should_stop_calls, prediction_calls,
                  should_stop_calls_with_advance, collected_gt_advance_count,
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index e6e527df1..800d1d645 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit e6e527df1e9cc0f59ee37f04c50bf225036e2199
+Subproject commit 800d1d645cf2da1f233c1b7ee27a328d90b3a67e

From cf0a70a1fc21349208bb9dad193ec573402ae234 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 14:07:53 +0800
Subject: [PATCH 058/126] Update benchmark HNSW vs OMEGA config

---
 scripts/benchmark_hnsw_vs_omega.json | 50 +++++-----------------------
 scripts/benchmark_hnsw_vs_omega.py   |  4 +++
 2 files changed, 12 insertions(+), 42 deletions(-)

diff --git a/scripts/benchmark_hnsw_vs_omega.json b/scripts/benchmark_hnsw_vs_omega.json
index e8a8b5914..5a72b15b3 100644
--- a/scripts/benchmark_hnsw_vs_omega.json
+++ b/scripts/benchmark_hnsw_vs_omega.json
@@ -6,52 +6,17 @@
       "concurrency_duration": 30,
       "k": 100,
       "m": 15,
-      "ef_search": 180,
+      "ef_search": 300,
       "quantize_type": "int8"
     },
     "hnsw": {
       "path": "cohere_1m_hnsw",
-      "db_label": "16c64g-v0.1",
+      "db_label": "16c64g-v0.1-hnsw-m15-ef300",
       "args": {}
     },
     "omega": {
       "path": "cohere_1m_omega",
-      "db_label": "omega-m15-ef180-int8",
-      "target_recalls": [
-        0.91
-      ],
-      "args": {
-        "min_vector_threshold": 100000,
-        "num_training_queries": 4000,
-        "ef_training": 500,
-        "window_size": 100,
-        "ef_groundtruth": 1000
-      }
-    },
-    "profiling": {
-      "hnsw_query_limit": 2000,
-      "omega_query_limit": 2000,
-      "omega_profile_control_timing": true
-    }
-  },
-  "bioasq_1m": {
-    "common": {
-      "case_type": "Performance1024D1M",
-      "num_concurrency": "12,14,16,18,20",
-      "concurrency_duration": 30,
-      "k": 100,
-      "m": 15,
-      "ef_search": 180,
-      "quantize_type": "int8"
-    },
-    "hnsw": {
-      "path": "bioasq_1m_hnsw",
-      "db_label": "bioasq-hnsw",
-      "args": {}
-    },
-    "omega": {
-      "path": "bioasq_1m_omega",
-      "db_label": "bioasq-omega",
+      "db_label": "16c64g-v0.1-omega-m15-ef300",
       "target_recalls": [
         0.91
       ],
@@ -76,17 +41,18 @@
       "concurrency_duration": 30,
       "k": 100,
       "m": 50,
-      "ef_search": 118,
-      "quantize_type": "int8"
+      "ef_search": 300,
+      "quantize_type": "int8",
+      "is_using_refiner": true
     },
     "hnsw": {
       "path": "cohere_10m_hnsw",
-      "db_label": "16c64g-v0.1",
+      "db_label": "16c64g-v0.1-hnsw-m50-ef300",
       "args": {}
     },
     "omega": {
       "path": "cohere_10m_omega",
-      "db_label": "omega-m50-ef118-refiner-int8",
+      "db_label": "16c64g-v0.1-omega-m50-ef300",
       "target_recalls": [
         0.91
       ],
diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index 778cf44cb..b410966b1 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -639,6 +639,10 @@ def append_option(cmd: list[str], key: str, value: Any) -> None:
     if value is None:
         return
     flag = f"--{key.replace('_', '-')}"
+    if isinstance(value, bool):
+        if value:
+            cmd.append(flag)
+        return
     if isinstance(value, list):
         cmd.extend([flag, ",".join(str(v) for v in value)])
     else:

From fecd57ed213a89f0c4adbb2f932a6a93ec079c8d Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 18:46:07 +0800
Subject: [PATCH 059/126] cleanup: clarify omega runtime ownership

---
 src/core/algorithm/omega/omega_builder.cc | 138 ----------------------
 src/core/algorithm/omega/omega_builder.h  |  67 -----------
 src/core/algorithm/omega/omega_searcher.h |  44 +------
 src/core/algorithm/omega/omega_streamer.h |  21 ++--
 src/core/interface/indexes/omega_index.cc |  33 +++---
 5 files changed, 28 insertions(+), 275 deletions(-)
 delete mode 100644 src/core/algorithm/omega/omega_builder.cc
 delete mode 100644 src/core/algorithm/omega/omega_builder.h

diff --git a/src/core/algorithm/omega/omega_builder.cc b/src/core/algorithm/omega/omega_builder.cc
deleted file mode 100644
index d44ed0bc3..000000000
--- a/src/core/algorithm/omega/omega_builder.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright 2025-present the zvec project
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "omega_builder.h"
-#include <zvec/core/framework/index_error.h>
-#include <zvec/core/framework/index_factory.h>
-#include <zvec/core/framework/index_logger.h>
-
-namespace zvec {
-namespace core {
-
-OmegaBuilder::OmegaBuilder() : hnsw_builder_(nullptr) {}
-
-int OmegaBuilder::init(const IndexMeta &meta, const ailego::Params &params) {
-  if (state_ != BUILD_STATE_INIT) {
-    LOG_ERROR("OmegaBuilder already initialized");
-    return IndexError_Duplicate;
-  }
-
-  // NOTE: OmegaBuilder is intentionally not implemented.
-  // OMEGA index building uses OmegaStreamer (which extends HnswStreamer) instead.
-  // This class exists for potential future use but is not currently needed.
-  LOG_ERROR("OmegaBuilder is not implemented - use OmegaStreamer for index building");
-  return IndexError_NotImplemented;
-
-  /*
-  // Create underlying HNSW builder
-  hnsw_builder_ = std::make_shared<HnswBuilder>();
-  int ret = hnsw_builder_->init(meta, params);
-  if (ret != 0) {
-    LOG_ERROR("Failed to initialize HNSW builder");
-    return ret;
-  }
-
-  state_ = BUILD_STATE_INITED;
-  LOG_INFO("OmegaBuilder initialized");
-  return 0;
-  */
-}
-
-int OmegaBuilder::cleanup(void) {
-  if (state_ == BUILD_STATE_INIT) {
-    return 0;
-  }
-
-  if (hnsw_builder_ != nullptr) {
-    hnsw_builder_->cleanup();
-    hnsw_builder_.reset();
-  }
-
-  state_ = BUILD_STATE_INIT;
-  return 0;
-}
-
-int OmegaBuilder::train(IndexThreads::Pointer threads,
-                        IndexHolder::Pointer holder) {
-  if (state_ != BUILD_STATE_INITED) {
-    LOG_ERROR("OmegaBuilder not initialized");
-    return IndexError_NoReady;
-  }
-
-  int ret = hnsw_builder_->train(threads, holder);
-  if (ret != 0) {
-    LOG_ERROR("Failed to train HNSW builder");
-    return ret;
-  }
-
-  state_ = BUILD_STATE_TRAINED;
-  return 0;
-}
-
-int OmegaBuilder::train(const IndexTrainer::Pointer &trainer) {
-  if (state_ != BUILD_STATE_INITED) {
-    LOG_ERROR("OmegaBuilder not initialized");
-    return IndexError_NoReady;
-  }
-
-  int ret = hnsw_builder_->train(trainer);
-  if (ret != 0) {
-    LOG_ERROR("Failed to train HNSW builder");
-    return ret;
-  }
-
-  state_ = BUILD_STATE_TRAINED;
-  return 0;
-}
-
-int OmegaBuilder::build(IndexThreads::Pointer threads,
-                        IndexHolder::Pointer holder) {
-  if (state_ != BUILD_STATE_TRAINED) {
-    LOG_ERROR("OmegaBuilder not trained");
-    return IndexError_NoReady;
-  }
-
-  int ret = hnsw_builder_->build(threads, holder);
-  if (ret != 0) {
-    LOG_ERROR("Failed to build HNSW index");
-    return ret;
-  }
-
-  state_ = BUILD_STATE_BUILT;
-  LOG_INFO("OmegaBuilder build completed");
-  return 0;
-}
-
-int OmegaBuilder::dump(const IndexDumper::Pointer &dumper) {
-  if (state_ != BUILD_STATE_BUILT) {
-    LOG_ERROR("OmegaBuilder not built");
-    return IndexError_NoReady;
-  }
-
-  int ret = hnsw_builder_->dump(dumper);
-  if (ret != 0) {
-    LOG_ERROR("Failed to dump HNSW index");
-    return ret;
-  }
-
-  LOG_INFO("OmegaBuilder dump completed");
-  return 0;
-}
-
-}  // namespace core
-}  // namespace zvec
-
-// NOTE: OmegaBuilder is not registered because OMEGA index building uses
-// OmegaStreamer (which extends HnswStreamer) instead.
-// INDEX_FACTORY_REGISTER_BUILDER(zvec::core::OmegaBuilder);
diff --git a/src/core/algorithm/omega/omega_builder.h b/src/core/algorithm/omega/omega_builder.h
deleted file mode 100644
index 4fc38b18e..000000000
--- a/src/core/algorithm/omega/omega_builder.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2025-present the zvec project
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include <zvec/core/framework/index_builder.h>
-#include "../hnsw/hnsw_builder.h"
-
-namespace zvec {
-namespace core {
-
-//! OMEGA Index Builder - wraps HNSW builder
-class OmegaBuilder : public IndexBuilder {
- public:
-  //! Constructor
-  OmegaBuilder();
-
-  //! Initialize the builder
-  virtual int init(const IndexMeta &meta,
-                   const ailego::Params &params) override;
-
-  //! Cleanup the builder
-  virtual int cleanup(void) override;
-
-  //! Train the data (delegate to HNSW)
-  virtual int train(IndexThreads::Pointer threads,
-                    IndexHolder::Pointer holder) override;
-
-  //! Train the data (delegate to HNSW)
-  virtual int train(const IndexTrainer::Pointer &trainer) override;
-
-  //! Build the index (delegate to HNSW)
-  virtual int build(IndexThreads::Pointer threads,
-                    IndexHolder::Pointer holder) override;
-
-  //! Dump index into storage (delegate to HNSW)
-  virtual int dump(const IndexDumper::Pointer &dumper) override;
-
-  //! Retrieve statistics (delegate to HNSW)
-  virtual const Stats &stats(void) const override {
-    return hnsw_builder_->stats();
-  }
-
- private:
-  enum BUILD_STATE {
-    BUILD_STATE_INIT = 0,
-    BUILD_STATE_INITED = 1,
-    BUILD_STATE_TRAINED = 2,
-    BUILD_STATE_BUILT = 3
-  };
-
-  std::shared_ptr<HnswBuilder> hnsw_builder_;
-  BUILD_STATE state_{BUILD_STATE_INIT};
-};
-
-}  // namespace core
-}  // namespace zvec
diff --git a/src/core/algorithm/omega/omega_searcher.h b/src/core/algorithm/omega/omega_searcher.h
index 53760461d..c4cfd5a7a 100644
--- a/src/core/algorithm/omega/omega_searcher.h
+++ b/src/core/algorithm/omega/omega_searcher.h
@@ -26,7 +26,10 @@
 namespace zvec {
 namespace core {
 
-//! OMEGA Index Searcher - extends HNSW with adaptive search
+// OmegaSearcher owns the loaded OMEGA runtime on the searcher side:
+// model loading, mode selection, HNSW-hook wiring, and per-search context
+// creation. It reuses the HNSW search loop instead of defining an independent
+// graph-search implementation.
 class OmegaSearcher : public HnswSearcher {
  public:
   using ContextPointer = IndexSearcher::Context::Pointer;
@@ -133,45 +136,6 @@ class OmegaSearcher : public HnswSearcher {
   //! Create a searcher context (creates OmegaContext instead of HnswContext)
   virtual ContextPointer create_context() const override;
 
-  // NOTE: The commented-out delegation methods below are intentionally not used.
-  // OmegaSearcher inherits from HnswSearcher and overrides only the necessary methods.
-  // The base class implementations are sufficient for the remaining functionality.
-  /*
-  //! Fetch vector by key (delegate to HNSW)
-  virtual const void *get_vector(uint64_t key) const override {
-    return hnsw_searcher_->get_vector(key);
-  }
-
-  //! Create a searcher context (delegate to HNSW)
-  virtual ContextPointer create_context() const override {
-    return hnsw_searcher_->create_context();
-  }
-
-  //! Create a new iterator (delegate to HNSW)
-  virtual IndexProvider::Pointer create_provider(void) const override {
-    return hnsw_searcher_->create_provider();
-  }
-
-  //! Retrieve statistics (delegate to HNSW)
-  virtual const Stats &stats(void) const override {
-    return hnsw_searcher_->stats();
-  }
-
-  //! Retrieve meta of index (delegate to HNSW)
-  virtual const IndexMeta &meta(void) const override {
-    return hnsw_searcher_->meta();
-  }
-
-  //! Retrieve params of index
-  virtual const ailego::Params &params(void) const override {
-    return params_;
-  }
-
-  virtual void print_debug_info() override {
-    hnsw_searcher_->print_debug_info();
-  }
-  */
-
  private:
   //! Check if OMEGA mode should be used
   bool should_use_omega() const {
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index 86029c57e..b697390c0 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -25,14 +25,15 @@ namespace zvec {
 namespace core {
 
 /**
- * @brief OMEGA Index Streamer
+ * @brief OMEGA-aware HNSW streamer.
  *
- * Inherits from HnswStreamer and overrides dump() to set "OmegaSearcher"
- * as the searcher type, ensuring that disk-persisted indices will use
- * OmegaSearcher (with training support) when loaded.
- *
- * Supports both training mode (feature collection) and inference mode
- * (adaptive search with learned early stopping).
+ * Ownership boundary:
+ * - OmegaStreamer owns persisted-streamer concerns such as open/dump and the
+ *   streamer-side search entry point used by zvec's index framework.
+ * - It carries training-mode/search-mode configuration that must travel with
+ *   the loaded streamer instance.
+ * - It does not define the adaptive-stop policy; that lives in OMEGALib
+ *   through OmegaSearcher/OmegaContext.
  */
 class OmegaStreamer : public HnswStreamer {
  public:
@@ -47,7 +48,7 @@ class OmegaStreamer : public HnswStreamer {
   OmegaStreamer(const OmegaStreamer &streamer) = delete;
   OmegaStreamer &operator=(const OmegaStreamer &streamer) = delete;
 
-  // Training mode support
+  // Training-mode configuration forwarded into per-search contexts.
   void EnableTrainingMode(bool enable) { training_mode_enabled_ = enable; }
   void SetCurrentQueryId(int query_id) { current_query_id_ = query_id; }
   void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
@@ -56,11 +57,9 @@ class OmegaStreamer : public HnswStreamer {
     training_k_train_ = k_train;
   }
 
-  // Inference mode support
+  // Search-mode configuration shared across searches for this streamer.
   bool LoadModel(const std::string& model_dir);
   bool IsModelLoaded() const;
-  void SetTargetRecall(float target_recall) { target_recall_ = target_recall; }
-  void SetWindowSize(int window_size) { window_size_ = window_size; }
 
  protected:
   /**
diff --git a/src/core/interface/indexes/omega_index.cc b/src/core/interface/indexes/omega_index.cc
index 4aee616e2..c6afffb6f 100644
--- a/src/core/interface/indexes/omega_index.cc
+++ b/src/core/interface/indexes/omega_index.cc
@@ -20,43 +20,38 @@
 
 namespace zvec::core_interface {
 
-// OmegaIndex uses OmegaStreamer which provides OMEGA adaptive search
+// OmegaIndex owns the framework-facing index lifecycle and delegates OMEGA-
+// specific runtime behavior to OmegaStreamer/OmegaSearcher. It is responsible
+// for creating the correct streamer and injecting OMEGA query params into the
+// search context. It does not own the adaptive-search algorithm itself.
 int OmegaIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
 
-  // First call parent to set up all parameters and create basic streamer
+  // Reuse HNSWIndex setup so the HNSW-compatible on-disk/index metadata is
+  // initialized consistently before swapping in the OMEGA-aware streamer.
   int ret = HNSWIndex::CreateAndInitStreamer(param);
   if (ret != core::IndexError_Success) {
     return ret;
   }
 
-  // NOTE: We intentionally DO NOT create a builder here!
-  // HNSW works by having data written directly to the streamer during Merge
-  // (via add_with_id_impl). If we create a builder, the MixedStreamerReducer
-  // will use add_vec_with_builder() which puts data into the builder instead
-  // of the streamer, causing doc_count=0 after Merge and subsequent crashes.
-
-  // Now replace the HnswStreamer with OmegaStreamer
-  // Save the current meta and params before replacing streamer
+  // OMEGA build/merge still happens through the streamer path. Keeping the
+  // HNSW builder path untouched avoids changing merge semantics.
   core::IndexMeta saved_meta = proxima_index_meta_;
   ailego::Params saved_params = proxima_index_params_;
 
-  // Create OmegaStreamer
   streamer_ = core::IndexFactory::CreateStreamer("OmegaStreamer");
   if (ailego_unlikely(!streamer_)) {
     LOG_ERROR("Failed to create OmegaStreamer");
     return core::IndexError_Runtime;
   }
 
-  // Initialize OmegaStreamer with the same parameters
   if (ailego_unlikely(
           streamer_->init(saved_meta, saved_params) != 0)) {
     LOG_ERROR("Failed to init OmegaStreamer");
     return core::IndexError_Runtime;
   }
 
-  // CRITICAL: Set "OmegaSearcher" in metadata for disk-persisted indices
-  // This ensures that when the index is saved and loaded later,
-  // IndexFlow will create OmegaSearcher instead of HnswSearcher
+  // Persist the OMEGA-aware searcher type so reopened indices route searches
+  // through OmegaSearcher instead of the plain HNSW searcher.
   proxima_index_meta_.set_searcher("OmegaSearcher", 0, ailego::Params());
 
   return core::IndexError_Success;
@@ -98,14 +93,14 @@ void OmegaIndex::SetCurrentQueryId(int query_id) {
 }
 
 std::vector<TrainingRecord> OmegaIndex::GetTrainingRecords() const {
-  // Training records are collected via SearchResult.training_records_ (from OmegaContext),
-  // not through this method. This is kept for ITrainingCapable interface compliance.
+  // Training records are returned per search through OmegaContext /
+  // SearchResult.training_records_. OmegaIndex itself does not keep a shared
+  // training-record buffer.
   return {};
 }
 
 void OmegaIndex::ClearTrainingRecords() {
-  // Training records are managed per-search via OmegaContext,
-  // no shared state to clear here.
+  // No-op by design: OmegaIndex does not own per-search training records.
 }
 
 void OmegaIndex::SetTrainingGroundTruth(

From 16be71e450bdd15ed360cc42235b36d153e7dad3 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 18:46:38 +0800
Subject: [PATCH 060/126] cleanup: document hnsw hook semantics

---
 src/core/algorithm/hnsw/hnsw_algorithm.h   | 15 +++++++++++++++
 src/core/algorithm/omega/omega_streamer.cc | 12 +++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.h b/src/core/algorithm/hnsw/hnsw_algorithm.h
index f17c35778..dcea105f1 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.h
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.h
@@ -27,6 +27,21 @@ class HnswAlgorithm {
  public:
   typedef std::unique_ptr<HnswAlgorithm> UPointer;
 
+  // SearchHooks is the integration seam used by OMEGA and a small amount of
+  // profiling tooling. Callbacks are invoked from the level-0 search loop in
+  // this order:
+  // 1. on_level0_entry once after the initial level-0 entry point is accepted
+  // 2. on_hop once per popped candidate expansion
+  // 3. on_visit_candidate once per candidate comparison at level 0
+  //
+  // inserted_to_topk is computed by the HNSW loop before callbacks fire and
+  // tells the callback whether the candidate improved the current result heap.
+  //
+  // Returning true from on_visit_candidate requests early termination of the
+  // level-0 search. This is currently used by OMEGA adaptive stopping.
+  //
+  // collect_timing/now_ns/elapsed_ns/hook_total_time_ns are profiling-only
+  // fields; they do not affect hook semantics.
   struct SearchHooks {
     void *user_data{nullptr};
     bool collect_timing{false};
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 67981b787..e16ec1502 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -399,7 +399,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     return IndexError_Runtime;
   }
 
-  // Enable training mode if active (CRITICAL: must be before search)
+  // Training state is attached to the OMEGA search context before the shared
+  // HNSW loop starts so label collection sees the full query trajectory.
   if (training_mode_enabled_) {
     std::vector<int> gt_for_query;
     if (query_id >= 0 &&
@@ -417,7 +418,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
               query_id, gt_for_query.size());
   }
 
-  // CRITICAL: Update context if it was created by another searcher/streamer
+  // Rebind the context if it originated from a different searcher/streamer
+  // instance so the HNSW state matches this streamer before search begins.
   if (hnsw_ctx->magic() != magic_) {
     int ret = update_context(hnsw_ctx);
     if (ret != 0) {
@@ -723,12 +725,12 @@ int OmegaStreamer::dump(const IndexDumper::Pointer &dumper) {
   shared_mutex_.lock();
   AILEGO_DEFER([&]() { shared_mutex_.unlock(); });
 
-  // Extract OMEGA params from streamer params and pass to searcher
-  // This ensures OmegaSearcher gets the necessary params when loaded
+  // Persist the OMEGA searcher params alongside the dumped index metadata so a
+  // reopened index reconstructs the same searcher-side behavior.
   ailego::Params searcher_params;
   const auto& streamer_params = meta_.streamer_params();
 
-  // Copy omega.* params from streamer to searcher
+  // Copy the omega.* params needed by OmegaSearcher::init().
   if (streamer_params.has("omega.enabled")) {
     searcher_params.insert("omega.enabled",
                            streamer_params.get_as_bool("omega.enabled"));

From 16b26051d98814edd873aa9eba5c3c12764e9915 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 18:47:43 +0800
Subject: [PATCH 061/126] cleanup: classify omega runtime flags

---
 OMEGA_RUNTIME_FLAGS.md | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 OMEGA_RUNTIME_FLAGS.md

diff --git a/OMEGA_RUNTIME_FLAGS.md b/OMEGA_RUNTIME_FLAGS.md
new file mode 100644
index 000000000..315530dfb
--- /dev/null
+++ b/OMEGA_RUNTIME_FLAGS.md
@@ -0,0 +1,42 @@
+# OMEGA Runtime Flags
+
+This note classifies the runtime flags currently used by zvec's HNSW/OMEGA
+integration. The goal is to distinguish product-path controls from benchmark
+and profiling knobs.
+
+## Production / Safety
+
+| Flag | Scope | Purpose |
+| --- | --- | --- |
+| `ZVEC_OMEGA_DISABLE_MODEL_PREDICTION` | OMEGA search path | Forces the OMEGA path to run without model-driven stopping. Useful as a fallback/debug switch while preserving the hook/control path. |
+
+## Profiling / Per-query stats
+
+| Flag | Scope | Purpose |
+| --- | --- | --- |
+| `ZVEC_HNSW_LOG_QUERY_STATS` | HNSW streamer | Enables per-query HNSW stats logging. |
+| `ZVEC_HNSW_LOG_QUERY_LIMIT` | HNSW streamer | Caps how many HNSW query-stat lines are emitted. |
+| `ZVEC_OMEGA_LOG_QUERY_STATS` | OMEGA streamer | Enables per-query OMEGA stats logging. |
+| `ZVEC_OMEGA_LOG_QUERY_LIMIT` | OMEGA streamer | Caps how many OMEGA query-stat lines are emitted. |
+| `ZVEC_OMEGA_PROFILE_CONTROL_TIMING` | OMEGA / OMEGALib | Enables fine-grained OMEGA control-path timing. This is profiling-only and should stay off for normal benchmark runs. |
+
+## Benchmark-only
+
+| Flag | Scope | Purpose |
+| --- | --- | --- |
+| `ZVEC_HNSW_ENABLE_EMPTY_HOOKS` | HNSW streamer | Forces HNSW to execute the empty-hook path so hook dispatch overhead can be measured in isolation. |
+
+## Generic logging
+
+| Flag | Scope | Purpose |
+| --- | --- | --- |
+| `ZVEC_LOG_LEVEL` | Logging | Controls zvec log verbosity. Benchmark scripts commonly set it to `INFO` so query-stat lines are visible. |
+
+## Cleanup notes
+
+- All flags listed above still have active call sites or benchmark usage.
+- No remaining runtime env var was removed in this cleanup step because no
+  clearly dead env-var knob was found in the current branch.
+- Previously removed dead surface in this cleanup phase was limited to unused
+  code/API, not to active runtime flags.
+

From e6e5113c3535ba64c64c6dfa30163fa7df2f0f65 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 18:54:49 +0800
Subject: [PATCH 062/126] cleanup: extract omega training coordinator

---
 src/db/index/segment/segment.cc               | 334 +++---------------
 src/db/training/omega_training_coordinator.cc | 320 +++++++++++++++++
 src/db/training/omega_training_coordinator.h  |  81 +++++
 3 files changed, 443 insertions(+), 292 deletions(-)
 create mode 100644 src/db/training/omega_training_coordinator.cc
 create mode 100644 src/db/training/omega_training_coordinator.h

diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index 1704b724e..47d6ab3bd 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -18,7 +18,6 @@
 #include <cstddef>
 #include <cstdint>
 #include <filesystem>
-#include <fstream>
 #include <memory>
 #include <mutex>
 #include <optional>
@@ -58,6 +57,7 @@
 #include "db/index/storage/wal/wal_file.h"
 #include "column_merging_reader.h"
 #include "sql_expr_parser.h"
+#include "db/training/omega_training_coordinator.h"
 #include "db/training/training_data_collector.h"
 #ifdef ZVEC_ENABLE_OMEGA
 #include "db/training/omega_model_trainer.h"
@@ -67,100 +67,6 @@ namespace zvec {
 
 namespace {
 
-constexpr uint32_t kOmegaQueryCacheMagic = 0x4F514359;  // OQCY
-constexpr uint32_t kOmegaQueryCacheVersion = 1;
-
-void WriteTimingStatsJson(
-    const std::string& output_path,
-    const std::vector<std::pair<std::string, int64_t>>& stats) {
-  std::ofstream ofs(output_path);
-  if (!ofs.is_open()) {
-    return;
-  }
-  ofs << "{\n";
-  for (size_t i = 0; i < stats.size(); ++i) {
-    ofs << "  \"" << stats[i].first << "\": " << stats[i].second;
-    if (i + 1 < stats.size()) {
-      ofs << ",";
-    }
-    ofs << "\n";
-  }
-  ofs << "}\n";
-}
-
-std::string OmegaQueryCachePath(const std::string& model_output_dir) {
-  return model_output_dir + "/training_queries.bin";
-}
-
-bool SaveOmegaTrainingQueryCache(
-    const std::string& model_output_dir,
-    const std::vector<std::vector<float>>& queries,
-    const std::vector<uint64_t>& query_doc_ids) {
-  if (queries.empty() || queries.size() != query_doc_ids.size()) {
-    return false;
-  }
-  const uint32_t dim = static_cast<uint32_t>(queries[0].size());
-  for (const auto& query : queries) {
-    if (query.size() != dim) {
-      return false;
-    }
-  }
-
-  std::ofstream ofs(OmegaQueryCachePath(model_output_dir), std::ios::binary);
-  if (!ofs.is_open()) {
-    return false;
-  }
-
-  const uint64_t num_queries = queries.size();
-  ofs.write(reinterpret_cast<const char*>(&kOmegaQueryCacheMagic), sizeof(kOmegaQueryCacheMagic));
-  ofs.write(reinterpret_cast<const char*>(&kOmegaQueryCacheVersion), sizeof(kOmegaQueryCacheVersion));
-  ofs.write(reinterpret_cast<const char*>(&num_queries), sizeof(num_queries));
-  ofs.write(reinterpret_cast<const char*>(&dim), sizeof(dim));
-  for (size_t i = 0; i < queries.size(); ++i) {
-    ofs.write(reinterpret_cast<const char*>(&query_doc_ids[i]), sizeof(query_doc_ids[i]));
-    ofs.write(reinterpret_cast<const char*>(queries[i].data()),
-              static_cast<std::streamsize>(dim * sizeof(float)));
-  }
-  return ofs.good();
-}
-
-bool LoadOmegaTrainingQueryCache(
-    const std::string& model_output_dir,
-    std::vector<std::vector<float>>* queries,
-    std::vector<uint64_t>* query_doc_ids) {
-  std::ifstream ifs(OmegaQueryCachePath(model_output_dir), std::ios::binary);
-  if (!ifs.is_open()) {
-    return false;
-  }
-
-  uint32_t magic = 0;
-  uint32_t version = 0;
-  uint64_t num_queries = 0;
-  uint32_t dim = 0;
-  ifs.read(reinterpret_cast<char*>(&magic), sizeof(magic));
-  ifs.read(reinterpret_cast<char*>(&version), sizeof(version));
-  ifs.read(reinterpret_cast<char*>(&num_queries), sizeof(num_queries));
-  ifs.read(reinterpret_cast<char*>(&dim), sizeof(dim));
-  if (!ifs.good() || magic != kOmegaQueryCacheMagic || version != kOmegaQueryCacheVersion ||
-      num_queries == 0 || dim == 0) {
-    return false;
-  }
-
-  queries->assign(num_queries, std::vector<float>(dim));
-  query_doc_ids->assign(num_queries, 0);
-  for (size_t i = 0; i < num_queries; ++i) {
-    ifs.read(reinterpret_cast<char*>(&(*query_doc_ids)[i]), sizeof(uint64_t));
-    ifs.read(reinterpret_cast<char*>((*queries)[i].data()),
-             static_cast<std::streamsize>(dim * sizeof(float)));
-    if (!ifs.good()) {
-      queries->clear();
-      query_doc_ids->clear();
-      return false;
-    }
-  }
-  return true;
-}
-
 }  // namespace
 
 void global_init() {
@@ -1734,22 +1640,21 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
   auto* training_capable = vector_indexer->GetTrainingCapability();
   bool needs_training = false;
   std::string model_output_dir;
+  OmegaTrainingParams omega_training_params;
 
   if (training_capable != nullptr) {
-    // Get min_vector_threshold from OMEGA index params
-    uint32_t min_vector_threshold = 100000;  // default
-    if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field.index_params())) {
-      min_vector_threshold = omega_params->min_vector_threshold();
-    }
+    omega_training_params = ResolveOmegaTrainingParams(field.index_params());
 
     size_t doc_count = vector_indexer->doc_count();
-    if (doc_count >= min_vector_threshold) {
+    if (doc_count >= omega_training_params.min_vector_threshold) {
       needs_training = true;
       LOG_INFO("Trainable index detected after merge for field '%s' in segment %d (doc_count=%zu >= min_vector_threshold=%u)",
-               column.c_str(), id(), doc_count, min_vector_threshold);
+               column.c_str(), id(), doc_count,
+               omega_training_params.min_vector_threshold);
     } else {
       LOG_INFO("Skipping OMEGA training for field '%s': doc_count=%zu < min_vector_threshold=%u",
-               column.c_str(), doc_count, min_vector_threshold);
+               column.c_str(), doc_count,
+               omega_training_params.min_vector_threshold);
     }
   }
 
@@ -1764,54 +1669,22 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
     model_output_dir = segment_dir + "/omega_model";
 
     LOG_INFO("Starting OMEGA training data collection for field '%s' (using in-memory graph before flush)", column.c_str());
-
-    // Get training params from index params
-    size_t num_training_queries = 1000;  // default
-    int ef_training = 1000;  // default
-    int ef_groundtruth = 0;  // default: brute force
-    int k_train = 1;  // default: top-1 label
-    if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field.index_params())) {
-      num_training_queries = omega_params->num_training_queries();
-      ef_training = omega_params->ef_training();
-      ef_groundtruth = omega_params->ef_groundtruth();
-      k_train = omega_params->k_train();
-      LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d, k_train=%d",
-               num_training_queries, ef_training, ef_groundtruth, k_train);
-    }
-
-    // Collect training data using the current indexer (in-memory graph still exists)
-    TrainingDataCollectorOptions collector_opts;
-    size_t doc_count = vector_indexer->doc_count();
-    collector_opts.num_training_queries = std::min(doc_count, num_training_queries);
-    collector_opts.ef_training = ef_training;
-    collector_opts.ef_groundtruth = ef_groundtruth;
-    collector_opts.topk = 100;
-    collector_opts.k_train = k_train;
-
-    // Use the current vector_indexer which still has the in-memory graph
-    std::vector<VectorColumnIndexer::Ptr> training_indexers = {vector_indexer};
-
-    auto training_result = TrainingDataCollector::CollectTrainingDataWithGtCmps(
-        shared_from_this(), column, collector_opts, training_indexers);
-
+    LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d, k_train=%d",
+             omega_training_params.num_training_queries,
+             omega_training_params.ef_training,
+             omega_training_params.ef_groundtruth,
+             omega_training_params.k_train);
+
+    auto training_result = CollectOmegaTrainingDataBeforeFlush(
+        shared_from_this(), column, vector_indexer, omega_training_params,
+        model_output_dir);
     if (training_result.has_value()) {
       training_result_opt = std::move(training_result.value());
-      if (!FileHelper::DirectoryExists(model_output_dir)) {
-        FileHelper::CreateDirectory(model_output_dir);
-      }
-      if (!SaveOmegaTrainingQueryCache(
-              model_output_dir,
-              training_result_opt->training_queries,
-              training_result_opt->query_doc_ids)) {
-        LOG_WARN("Failed to persist OMEGA training query cache: %s",
-                 OmegaQueryCachePath(model_output_dir).c_str());
-      }
-      WriteTimingStatsJson(
-          model_output_dir + "/training_collection_timing.json",
-          TrainingDataCollector::ConsumeTimingStats());
-      LOG_INFO("Collected %zu training records (before flush)", training_result_opt->records.size());
+      LOG_INFO("Collected %zu training records (before flush)",
+               training_result_opt->records.size());
     } else {
-      LOG_WARN("Failed to collect training data: %s", training_result.error().message().c_str());
+      LOG_WARN("Failed to collect training data: %s",
+               training_result.error().message().c_str());
     }
   }
 
@@ -1821,35 +1694,9 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
 
   // Train the model using the previously collected data (doesn't need the graph)
   if (needs_training && training_result_opt.has_value()) {
-    auto& result = training_result_opt.value();
-
-    if (result.records.size() >= 100) {
-#ifdef ZVEC_ENABLE_OMEGA
-      // Train the model
-      OmegaModelTrainerOptions trainer_opts;
-      trainer_opts.output_dir = model_output_dir;
-      trainer_opts.verbose = true;
-
-      // Create output directory if it doesn't exist
-      if (!FileHelper::DirectoryExists(model_output_dir)) {
-        if (!FileHelper::CreateDirectory(model_output_dir)) {
-          LOG_WARN("Failed to create model output directory: %s", model_output_dir.c_str());
-        }
-      }
-
-      auto train_status = OmegaModelTrainer::TrainModelWithGtCmps(
-          result.records, result.gt_cmps_data, trainer_opts);
-      if (train_status.ok()) {
-        LOG_INFO("OMEGA model training completed successfully: %s", trainer_opts.output_dir.c_str());
-      } else {
-        LOG_WARN("OMEGA model training failed: %s", train_status.message().c_str());
-      }
-#else
-      LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
-#endif
-    } else {
-      LOG_INFO("Skipping model training: only %zu records collected (need >= 100)", result.records.size());
-    }
+    auto s_train = TrainOmegaModelAfterBuild(training_result_opt.value(),
+                                             model_output_dir);
+    CHECK_RETURN_STATUS_EXPECTED(s_train);
   }
 
 
@@ -2463,23 +2310,16 @@ Status SegmentImpl::auto_train_omega_index_internal(
   LOG_WARN("Starting auto-training for OMEGA index on field '%s' in segment %d",
            field_name.c_str(), id());
 
-  // Get training params from index params
-  size_t num_training_queries = 1000;  // default
-  int ef_training = 1000;  // default
-  int ef_groundtruth = 0;  // default: brute force
-  uint32_t min_vector_threshold = 100000;  // default
-  int k_train = 1;  // default: top-1 label
+  OmegaTrainingParams omega_training_params;
   auto field = collection_schema_->get_field(field_name);
   if (field && field->index_params()) {
-    if (auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(field->index_params())) {
-      num_training_queries = omega_params->num_training_queries();
-      ef_training = omega_params->ef_training();
-      ef_groundtruth = omega_params->ef_groundtruth();
-      min_vector_threshold = omega_params->min_vector_threshold();
-      k_train = omega_params->k_train();
-      LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d, min_vector_threshold=%u, k_train=%d",
-               num_training_queries, ef_training, ef_groundtruth, min_vector_threshold, k_train);
-    }
+    omega_training_params = ResolveOmegaTrainingParams(field->index_params());
+    LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d, min_vector_threshold=%u, k_train=%d",
+             omega_training_params.num_training_queries,
+             omega_training_params.ef_training,
+             omega_training_params.ef_groundtruth,
+             omega_training_params.min_vector_threshold,
+             omega_training_params.k_train);
   }
 
   // Check if we have enough vectors to justify training
@@ -2488,44 +2328,24 @@ Status SegmentImpl::auto_train_omega_index_internal(
     total_doc_count += indexer->doc_count();
   }
 
-  if (total_doc_count < min_vector_threshold) {
+  if (total_doc_count < omega_training_params.min_vector_threshold) {
     LOG_INFO("Skipping OMEGA training for field '%s': doc_count=%zu < min_vector_threshold=%u",
-             field_name.c_str(), total_doc_count, min_vector_threshold);
+             field_name.c_str(), total_doc_count,
+             omega_training_params.min_vector_threshold);
     return Status::OK();
   }
 
   LOG_INFO("Proceeding with OMEGA training: doc_count=%zu >= min_vector_threshold=%u",
-           total_doc_count, min_vector_threshold);
+           total_doc_count, omega_training_params.min_vector_threshold);
 
   // Step 1: Collect training data using the provided indexers
   LOG_WARN("OMEGA retrain step 1/2: start collecting training data for field '%s' in segment %d",
            field_name.c_str(), id());
-  TrainingDataCollectorOptions collector_options;
-  collector_options.num_training_queries = num_training_queries;
-  collector_options.ef_training = ef_training;
-  collector_options.ef_groundtruth = ef_groundtruth;
-  collector_options.topk = 100;
-  collector_options.k_train = k_train;
-  collector_options.noise_scale = 0.01f;
-
-  std::vector<std::vector<float>> cached_queries;
-  std::vector<uint64_t> cached_query_doc_ids;
   const std::string model_output_dir =
       FileHelper::MakeSegmentPath(path_, id()) + "/omega_model";
-  Result<TrainingDataCollectorResult> training_records_result;
-  if (LoadOmegaTrainingQueryCache(model_output_dir, &cached_queries, &cached_query_doc_ids)) {
-    LOG_WARN("Loaded %zu cached held-out queries for OMEGA retraining from %s",
-             cached_queries.size(), OmegaQueryCachePath(model_output_dir).c_str());
-    training_records_result =
-        TrainingDataCollector::CollectTrainingDataWithGtCmpsFromQueries(
-            shared_from_this(), field_name, cached_queries, cached_query_doc_ids,
-            collector_options, indexers);
-  } else {
-    LOG_WARN("OMEGA retrain query cache not found, falling back to sampling held-out queries from persisted segment");
-    training_records_result =
-        TrainingDataCollector::CollectTrainingDataWithGtCmps(
-            shared_from_this(), field_name, collector_options, indexers);
-  }
+  auto training_records_result = CollectOmegaRetrainingData(
+      shared_from_this(), field_name, indexers, omega_training_params,
+      model_output_dir);
 
   if (!training_records_result.has_value()) {
     return Status::InternalError(
@@ -2537,81 +2357,11 @@ Status SegmentImpl::auto_train_omega_index_internal(
            field_name.c_str(), id());
 
   auto& training_result = training_records_result.value();
-  auto& training_records = training_result.records;
   LOG_INFO("Collected %zu training records for segment %d",
-           training_records.size(), id());
-
-  if (training_records.empty()) {
-    LOG_WARN("No training records collected, skipping model training");
-    return Status::OK();
-  }
-
-  // Check if we have enough positive and negative samples
-  size_t positive_count = 0;
-  size_t negative_count = 0;
-  for (const auto& record : training_records) {
-    if (record.label == 1) {
-      positive_count++;
-    } else {
-      negative_count++;
-    }
-  }
-
-  if (positive_count == 0 || negative_count == 0) {
-    LOG_WARN("Insufficient training samples: %zu positive, %zu negative. Need both > 0. Skipping training.",
-             positive_count, negative_count);
-    return Status::OK();
-  }
-
-  // Need at least 50 samples of each class for reasonable training
-  if (positive_count < 50 || negative_count < 50) {
-    LOG_WARN("Too few training samples: %zu positive, %zu negative. Need at least 50 of each. Skipping training.",
-             positive_count, negative_count);
-    return Status::OK();
-  }
-
-  LOG_INFO("Training data stats: %zu positive, %zu negative samples",
-           positive_count, negative_count);
-
-#ifdef ZVEC_ENABLE_OMEGA
-  // Step 2: Train OMEGA model with gt_cmps data
-  LOG_WARN("OMEGA retrain step 2/2: start model training for field '%s' in segment %d",
-           field_name.c_str(), id());
-  OmegaModelTrainerOptions trainer_options;
-  trainer_options.output_dir = model_output_dir;
-  trainer_options.verbose = true;
-
-  // Create output directory if it doesn't exist
-  if (!FileHelper::DirectoryExists(trainer_options.output_dir)) {
-    if (!FileHelper::CreateDirectory(trainer_options.output_dir)) {
-      return Status::InternalError(
-          "Failed to create model output directory: " +
-          trainer_options.output_dir);
-    }
-  }
-
-  WriteTimingStatsJson(
-      trainer_options.output_dir + "/training_collection_timing.json",
-      TrainingDataCollector::ConsumeTimingStats());
-
-  auto train_status = OmegaModelTrainer::TrainModelWithGtCmps(
-      training_records, training_result.gt_cmps_data, trainer_options);
-  if (!train_status.ok()) {
-    return Status::InternalError(
-        "Failed to train OMEGA model: " + train_status.message());
-  }
-
-  LOG_WARN("OMEGA retrain step 2/2: finished model training for segment %d, output: %s",
-           id(), trainer_options.output_dir.c_str());
-#else
-  LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
-#endif
+           training_result.records.size(), id());
 
-  // Step 3: Load model into the provided indexers
-  // TODO: Implement model loading into VectorColumnIndexer
-  // For now, the model will be loaded when the index is reopened
-
-  return Status::OK();
+  return TrainOmegaModelAfterRetrainCollect(training_result, model_output_dir,
+                                            id(), field_name);
 }
 
 Status SegmentImpl::retrain_omega_model() {
diff --git a/src/db/training/omega_training_coordinator.cc b/src/db/training/omega_training_coordinator.cc
new file mode 100644
index 000000000..f3753e08d
--- /dev/null
+++ b/src/db/training/omega_training_coordinator.cc
@@ -0,0 +1,320 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "db/training/omega_training_coordinator.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <fstream>
+#include <vector>
+#include <zvec/ailego/logger/logger.h>
+#include "db/common/file_helper.h"
+#ifdef ZVEC_ENABLE_OMEGA
+#include "db/training/omega_model_trainer.h"
+#endif
+
+namespace zvec {
+
+namespace {
+
+constexpr uint32_t kOmegaQueryCacheMagic = 0x4F514359;  // OQCY
+constexpr uint32_t kOmegaQueryCacheVersion = 1;
+
+}  // namespace
+
+void WriteOmegaTimingStatsJson(
+    const std::string& output_path,
+    const std::vector<std::pair<std::string, int64_t>>& stats) {
+  std::ofstream ofs(output_path);
+  if (!ofs.is_open()) {
+    return;
+  }
+  ofs << "{\n";
+  for (size_t i = 0; i < stats.size(); ++i) {
+    ofs << "  \"" << stats[i].first << "\": " << stats[i].second;
+    if (i + 1 < stats.size()) {
+      ofs << ",";
+    }
+    ofs << "\n";
+  }
+  ofs << "}\n";
+}
+
+std::string OmegaQueryCachePath(const std::string& model_output_dir) {
+  return model_output_dir + "/training_queries.bin";
+}
+
+bool SaveOmegaTrainingQueryCache(
+    const std::string& model_output_dir,
+    const std::vector<std::vector<float>>& queries,
+    const std::vector<uint64_t>& query_doc_ids) {
+  if (queries.empty() || queries.size() != query_doc_ids.size()) {
+    return false;
+  }
+  const uint32_t dim = static_cast<uint32_t>(queries[0].size());
+  for (const auto& query : queries) {
+    if (query.size() != dim) {
+      return false;
+    }
+  }
+
+  std::ofstream ofs(OmegaQueryCachePath(model_output_dir), std::ios::binary);
+  if (!ofs.is_open()) {
+    return false;
+  }
+
+  const uint64_t num_queries = queries.size();
+  ofs.write(reinterpret_cast<const char*>(&kOmegaQueryCacheMagic),
+            sizeof(kOmegaQueryCacheMagic));
+  ofs.write(reinterpret_cast<const char*>(&kOmegaQueryCacheVersion),
+            sizeof(kOmegaQueryCacheVersion));
+  ofs.write(reinterpret_cast<const char*>(&num_queries), sizeof(num_queries));
+  ofs.write(reinterpret_cast<const char*>(&dim), sizeof(dim));
+  for (size_t i = 0; i < queries.size(); ++i) {
+    ofs.write(reinterpret_cast<const char*>(&query_doc_ids[i]),
+              sizeof(query_doc_ids[i]));
+    ofs.write(reinterpret_cast<const char*>(queries[i].data()),
+              static_cast<std::streamsize>(dim * sizeof(float)));
+  }
+  return ofs.good();
+}
+
+bool LoadOmegaTrainingQueryCache(
+    const std::string& model_output_dir,
+    std::vector<std::vector<float>>* queries,
+    std::vector<uint64_t>* query_doc_ids) {
+  std::ifstream ifs(OmegaQueryCachePath(model_output_dir), std::ios::binary);
+  if (!ifs.is_open()) {
+    return false;
+  }
+
+  uint32_t magic = 0;
+  uint32_t version = 0;
+  uint64_t num_queries = 0;
+  uint32_t dim = 0;
+  ifs.read(reinterpret_cast<char*>(&magic), sizeof(magic));
+  ifs.read(reinterpret_cast<char*>(&version), sizeof(version));
+  ifs.read(reinterpret_cast<char*>(&num_queries), sizeof(num_queries));
+  ifs.read(reinterpret_cast<char*>(&dim), sizeof(dim));
+  if (!ifs.good() || magic != kOmegaQueryCacheMagic ||
+      version != kOmegaQueryCacheVersion || num_queries == 0 || dim == 0) {
+    return false;
+  }
+
+  queries->assign(num_queries, std::vector<float>(dim));
+  query_doc_ids->assign(num_queries, 0);
+  for (size_t i = 0; i < num_queries; ++i) {
+    ifs.read(reinterpret_cast<char*>(&(*query_doc_ids)[i]), sizeof(uint64_t));
+    ifs.read(reinterpret_cast<char*>((*queries)[i].data()),
+             static_cast<std::streamsize>(dim * sizeof(float)));
+    if (!ifs.good()) {
+      queries->clear();
+      query_doc_ids->clear();
+      return false;
+    }
+  }
+  return true;
+}
+
+OmegaTrainingParams ResolveOmegaTrainingParams(
+    const IndexParams::Ptr& index_params) {
+  OmegaTrainingParams params;
+  auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(index_params);
+  if (!omega_params) {
+    return params;
+  }
+
+  params.num_training_queries = omega_params->num_training_queries();
+  params.ef_training = omega_params->ef_training();
+  params.ef_groundtruth = omega_params->ef_groundtruth();
+  params.min_vector_threshold = omega_params->min_vector_threshold();
+  params.k_train = omega_params->k_train();
+  return params;
+}
+
+Result<TrainingDataCollectorResult> CollectOmegaTrainingDataBeforeFlush(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    const VectorColumnIndexer::Ptr& vector_indexer,
+    const OmegaTrainingParams& params,
+    const std::string& model_output_dir) {
+  TrainingDataCollectorOptions collector_opts;
+  const size_t doc_count = vector_indexer->doc_count();
+  collector_opts.num_training_queries =
+      std::min(doc_count, params.num_training_queries);
+  collector_opts.ef_training = params.ef_training;
+  collector_opts.ef_groundtruth = params.ef_groundtruth;
+  collector_opts.topk = 100;
+  collector_opts.k_train = params.k_train;
+
+  std::vector<VectorColumnIndexer::Ptr> training_indexers = {vector_indexer};
+  auto training_result = TrainingDataCollector::CollectTrainingDataWithGtCmps(
+      segment, field_name, collector_opts, training_indexers);
+  if (!training_result.has_value()) {
+    return training_result;
+  }
+
+  if (!FileHelper::DirectoryExists(model_output_dir)) {
+    FileHelper::CreateDirectory(model_output_dir);
+  }
+  if (!SaveOmegaTrainingQueryCache(model_output_dir,
+                                   training_result->training_queries,
+                                   training_result->query_doc_ids)) {
+    LOG_WARN("Failed to persist OMEGA training query cache: %s",
+             OmegaQueryCachePath(model_output_dir).c_str());
+  }
+  WriteOmegaTimingStatsJson(
+      model_output_dir + "/training_collection_timing.json",
+      TrainingDataCollector::ConsumeTimingStats());
+  return training_result;
+}
+
+Result<TrainingDataCollectorResult> CollectOmegaRetrainingData(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    const std::vector<VectorColumnIndexer::Ptr>& indexers,
+    const OmegaTrainingParams& params,
+    const std::string& model_output_dir) {
+  TrainingDataCollectorOptions collector_options;
+  collector_options.num_training_queries = params.num_training_queries;
+  collector_options.ef_training = params.ef_training;
+  collector_options.ef_groundtruth = params.ef_groundtruth;
+  collector_options.topk = 100;
+  collector_options.k_train = params.k_train;
+  collector_options.noise_scale = 0.01f;
+
+  std::vector<std::vector<float>> cached_queries;
+  std::vector<uint64_t> cached_query_doc_ids;
+  if (LoadOmegaTrainingQueryCache(model_output_dir, &cached_queries,
+                                  &cached_query_doc_ids)) {
+    LOG_WARN("Loaded %zu cached held-out queries for OMEGA retraining from %s",
+             cached_queries.size(), OmegaQueryCachePath(model_output_dir).c_str());
+    return TrainingDataCollector::CollectTrainingDataWithGtCmpsFromQueries(
+        segment, field_name, cached_queries, cached_query_doc_ids,
+        collector_options, indexers);
+  }
+
+  LOG_WARN("OMEGA retrain query cache not found, falling back to sampling held-out queries from persisted segment");
+  return TrainingDataCollector::CollectTrainingDataWithGtCmps(
+      segment, field_name, collector_options, indexers);
+}
+
+Status TrainOmegaModelAfterBuild(
+    const TrainingDataCollectorResult& training_result,
+    const std::string& model_output_dir) {
+  if (training_result.records.size() < 100) {
+    LOG_INFO("Skipping model training: only %zu records collected (need >= 100)",
+             training_result.records.size());
+    return Status::OK();
+  }
+
+#ifdef ZVEC_ENABLE_OMEGA
+  OmegaModelTrainerOptions trainer_opts;
+  trainer_opts.output_dir = model_output_dir;
+  trainer_opts.verbose = true;
+
+  if (!FileHelper::DirectoryExists(model_output_dir) &&
+      !FileHelper::CreateDirectory(model_output_dir)) {
+    LOG_WARN("Failed to create model output directory: %s",
+             model_output_dir.c_str());
+  }
+
+  auto train_status = OmegaModelTrainer::TrainModelWithGtCmps(
+      training_result.records, training_result.gt_cmps_data, trainer_opts);
+  if (train_status.ok()) {
+    LOG_INFO("OMEGA model training completed successfully: %s",
+             trainer_opts.output_dir.c_str());
+  } else {
+    LOG_WARN("OMEGA model training failed: %s",
+             train_status.message().c_str());
+  }
+#else
+  LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
+#endif
+
+  return Status::OK();
+}
+
+Status TrainOmegaModelAfterRetrainCollect(
+    const TrainingDataCollectorResult& training_result,
+    const std::string& model_output_dir,
+    SegmentID segment_id,
+    const std::string& field_name) {
+  const auto& training_records = training_result.records;
+  if (training_records.empty()) {
+    LOG_WARN("No training records collected, skipping model training");
+    return Status::OK();
+  }
+
+  size_t positive_count = 0;
+  size_t negative_count = 0;
+  for (const auto& record : training_records) {
+    if (record.label == 1) {
+      positive_count++;
+    } else {
+      negative_count++;
+    }
+  }
+
+  if (positive_count == 0 || negative_count == 0) {
+    LOG_WARN("Insufficient training samples: %zu positive, %zu negative. Need both > 0. Skipping training.",
+             positive_count, negative_count);
+    return Status::OK();
+  }
+
+  if (positive_count < 50 || negative_count < 50) {
+    LOG_WARN("Too few training samples: %zu positive, %zu negative. Need at least 50 of each. Skipping training.",
+             positive_count, negative_count);
+    return Status::OK();
+  }
+
+  LOG_INFO("Training data stats: %zu positive, %zu negative samples",
+           positive_count, negative_count);
+
+#ifdef ZVEC_ENABLE_OMEGA
+  LOG_WARN("OMEGA retrain step 2/2: start model training for field '%s' in segment %d",
+           field_name.c_str(), segment_id);
+  OmegaModelTrainerOptions trainer_options;
+  trainer_options.output_dir = model_output_dir;
+  trainer_options.verbose = true;
+
+  if (!FileHelper::DirectoryExists(trainer_options.output_dir) &&
+      !FileHelper::CreateDirectory(trainer_options.output_dir)) {
+    return Status::InternalError(
+        "Failed to create model output directory: " +
+        trainer_options.output_dir);
+  }
+
+  WriteOmegaTimingStatsJson(
+      trainer_options.output_dir + "/training_collection_timing.json",
+      TrainingDataCollector::ConsumeTimingStats());
+
+  auto train_status = OmegaModelTrainer::TrainModelWithGtCmps(
+      training_records, training_result.gt_cmps_data, trainer_options);
+  if (!train_status.ok()) {
+    return Status::InternalError(
+        "Failed to train OMEGA model: " + train_status.message());
+  }
+
+  LOG_WARN("OMEGA retrain step 2/2: finished model training for segment %d, output: %s",
+           segment_id, trainer_options.output_dir.c_str());
+#else
+  LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
+#endif
+
+  return Status::OK();
+}
+
+}  // namespace zvec
+
diff --git a/src/db/training/omega_training_coordinator.h b/src/db/training/omega_training_coordinator.h
new file mode 100644
index 000000000..7247b1a2f
--- /dev/null
+++ b/src/db/training/omega_training_coordinator.h
@@ -0,0 +1,81 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <vector>
+#include <zvec/ailego/pattern/expected.hpp>
+#include <zvec/db/index_params.h>
+#include <zvec/db/status.h>
+#include "db/index/column/vector_column/vector_column_indexer.h"
+#include "db/index/segment/segment.h"
+#include "db/training/training_data_collector.h"
+
+namespace zvec {
+
+struct OmegaTrainingParams {
+  size_t num_training_queries = 1000;
+  int ef_training = 1000;
+  int ef_groundtruth = 0;
+  uint32_t min_vector_threshold = 100000;
+  int k_train = 1;
+};
+
+void WriteOmegaTimingStatsJson(
+    const std::string& output_path,
+    const std::vector<std::pair<std::string, int64_t>>& stats);
+
+std::string OmegaQueryCachePath(const std::string& model_output_dir);
+
+bool SaveOmegaTrainingQueryCache(
+    const std::string& model_output_dir,
+    const std::vector<std::vector<float>>& queries,
+    const std::vector<uint64_t>& query_doc_ids);
+
+bool LoadOmegaTrainingQueryCache(
+    const std::string& model_output_dir,
+    std::vector<std::vector<float>>* queries,
+    std::vector<uint64_t>* query_doc_ids);
+
+OmegaTrainingParams ResolveOmegaTrainingParams(
+    const IndexParams::Ptr& index_params);
+
+Result<TrainingDataCollectorResult> CollectOmegaTrainingDataBeforeFlush(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    const VectorColumnIndexer::Ptr& vector_indexer,
+    const OmegaTrainingParams& params,
+    const std::string& model_output_dir);
+
+Result<TrainingDataCollectorResult> CollectOmegaRetrainingData(
+    const Segment::Ptr& segment,
+    const std::string& field_name,
+    const std::vector<VectorColumnIndexer::Ptr>& indexers,
+    const OmegaTrainingParams& params,
+    const std::string& model_output_dir);
+
+Status TrainOmegaModelAfterBuild(
+    const TrainingDataCollectorResult& training_result,
+    const std::string& model_output_dir);
+
+Status TrainOmegaModelAfterRetrainCollect(
+    const TrainingDataCollectorResult& training_result,
+    const std::string& model_output_dir,
+    SegmentID segment_id,
+    const std::string& field_name);
+
+}  // namespace zvec
+

From f1caadab82f1d408232c77f5231d2c575318b47d Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 19:00:45 +0800
Subject: [PATCH 063/126] cleanup: share omega hook control path

---
 src/core/algorithm/omega/omega_hook_utils.h | 189 ++++++++++++++++++++
 src/core/algorithm/omega/omega_searcher.cc  | 132 +-------------
 src/core/algorithm/omega/omega_streamer.cc  | 171 +-----------------
 3 files changed, 192 insertions(+), 300 deletions(-)
 create mode 100644 src/core/algorithm/omega/omega_hook_utils.h

diff --git a/src/core/algorithm/omega/omega_hook_utils.h b/src/core/algorithm/omega/omega_hook_utils.h
new file mode 100644
index 000000000..5524ac415
--- /dev/null
+++ b/src/core/algorithm/omega/omega_hook_utils.h
@@ -0,0 +1,189 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdlib>
+#include <string>
+#include <vector>
+#include <omega/search_context.h>
+#include "utility/rdtsc_timer.h"
+#include "../hnsw/hnsw_entity.h"
+
+namespace zvec::core {
+
+inline bool DisableOmegaModelPrediction() {
+  const char* value = std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION");
+  if (value == nullptr) {
+    return false;
+  }
+  return std::string(value) != "0";
+}
+
+struct OmegaHookState {
+  struct PendingVisitBuffer {
+    std::vector<omega::SearchContext::VisitCandidate> storage;
+    int head{0};
+    int count{0};
+
+    void Reset(int capacity) {
+      head = 0;
+      count = 0;
+      storage.resize(std::max(1, capacity));
+    }
+
+    bool Empty() const { return count == 0; }
+
+    int Capacity() const { return static_cast<int>(storage.size()); }
+
+    void Push(const omega::SearchContext::VisitCandidate& candidate) {
+      storage[(head + count) % Capacity()] = candidate;
+      ++count;
+    }
+
+    const omega::SearchContext::VisitCandidate* Data() const {
+      return storage.data() + head;
+    }
+
+    void Clear() {
+      head = 0;
+      count = 0;
+    }
+  };
+
+  omega::SearchContext* search_ctx{nullptr};
+  bool enable_early_stopping{false};
+  bool collect_control_timing{false};
+  uint64_t* hook_body_time_ns{nullptr};
+  bool per_cmp_reporting{false};
+  PendingVisitBuffer pending_candidates;
+  int batch_min_interval{1};
+};
+
+template <typename Fn>
+inline void RunOmegaControlHook(const OmegaHookState& state, Fn&& fn) {
+  if (!state.collect_control_timing) {
+    fn();
+    return;
+  }
+  auto control_start = RdtscTimer::Now();
+  fn();
+  if (state.hook_body_time_ns != nullptr) {
+    *state.hook_body_time_ns += RdtscTimer::ElapsedNs(
+        control_start, RdtscTimer::Now());
+  }
+}
+
+inline void ResetOmegaHookState(OmegaHookState* state) {
+  if (state->search_ctx != nullptr) {
+    state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
+  } else {
+    state->batch_min_interval = 1;
+  }
+  state->pending_candidates.Reset(state->batch_min_interval);
+}
+
+inline bool ShouldFlushOmegaPendingCandidates(const OmegaHookState& state) {
+  if (state.pending_candidates.Empty()) {
+    return false;
+  }
+  if (state.pending_candidates.count >= state.batch_min_interval) {
+    return true;
+  }
+  if (state.search_ctx == nullptr) {
+    return false;
+  }
+  return state.search_ctx->GetTotalCmps() + state.pending_candidates.count >=
+         state.search_ctx->GetNextPredictionCmps();
+}
+
+inline bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count) {
+  if (state->search_ctx == nullptr || flush_count <= 0 ||
+      state->pending_candidates.Empty()) {
+    return false;
+  }
+
+  flush_count = std::min(flush_count, state->pending_candidates.count);
+  bool should_predict = false;
+  RunOmegaControlHook(*state, [&]() {
+    should_predict = state->search_ctx->ReportVisitCandidates(
+        state->pending_candidates.Data(), static_cast<size_t>(flush_count));
+  });
+  state->pending_candidates.Clear();
+  if (!state->enable_early_stopping || !should_predict) {
+    return false;
+  }
+
+  bool should_stop = false;
+  RunOmegaControlHook(
+      *state, [&]() { should_stop = state->search_ctx->ShouldStopEarly(); });
+  return should_stop;
+}
+
+inline bool MaybeFlushOmegaPendingCandidates(OmegaHookState* state) {
+  if (!ShouldFlushOmegaPendingCandidates(*state)) {
+    return false;
+  }
+  return FlushOmegaPendingCandidates(state, state->pending_candidates.count);
+}
+
+inline void OnOmegaLevel0Entry(node_id_t id, dist_t dist,
+                               bool /*inserted_to_topk*/, void* user_data) {
+  auto& state = *static_cast<OmegaHookState*>(user_data);
+  if (state.per_cmp_reporting) {
+    RunOmegaControlHook(state, [&]() {
+      state.search_ctx->SetDistStart(dist);
+      state.search_ctx->ReportVisitCandidate(id, dist, true);
+    });
+    return;
+  }
+  RunOmegaControlHook(state, [&]() {
+    state.search_ctx->SetDistStart(dist);
+    state.pending_candidates.Push({static_cast<int>(id), dist, true});
+  });
+  MaybeFlushOmegaPendingCandidates(&state);
+}
+
+inline void OnOmegaHop(void* user_data) {
+  auto& state = *static_cast<OmegaHookState*>(user_data);
+  RunOmegaControlHook(state, [&]() { state.search_ctx->ReportHop(); });
+}
+
+inline bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
+                                  bool inserted_to_topk, void* user_data) {
+  auto& state = *static_cast<OmegaHookState*>(user_data);
+  if (state.per_cmp_reporting) {
+    bool should_predict = false;
+    RunOmegaControlHook(state, [&]() {
+      should_predict =
+          state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
+    });
+    if (!state.enable_early_stopping || !should_predict) {
+      return false;
+    }
+    bool should_stop = false;
+    RunOmegaControlHook(
+        state, [&]() { should_stop = state.search_ctx->ShouldStopEarly(); });
+    return should_stop;
+  }
+  RunOmegaControlHook(state, [&]() {
+    state.pending_candidates.Push(
+        {static_cast<int>(id), dist, inserted_to_topk});
+  });
+  return MaybeFlushOmegaPendingCandidates(&state);
+}
+
+}  // namespace zvec::core
+
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 4143c4acd..35cb762e3 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -14,6 +14,7 @@
 
 #include "omega_searcher.h"
 #include "omega_context.h"
+#include "omega_hook_utils.h"
 #include "omega_params.h"
 #include <zvec/core/framework/index_error.h>
 #include <zvec/core/framework/index_factory.h>
@@ -21,141 +22,10 @@
 #include <omega/search_context.h>
 #include "../hnsw/hnsw_context.h"
 #include <limits>
-#include <deque>
-#include <algorithm>
-#include <cstdlib>
 
 namespace zvec {
 namespace core {
 
-namespace {
-
-bool DisableOmegaModelPrediction() {
-  const char* value = std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION");
-  if (value == nullptr) {
-    return false;
-  }
-  return std::string(value) != "0";
-}
-
-struct OmegaHookState {
-  struct PendingVisitBuffer {
-    std::vector<omega::SearchContext::VisitCandidate> storage;
-    int head{0};
-    int count{0};
-
-    void Reset(int capacity) {
-      head = 0;
-      count = 0;
-      storage.resize(std::max(1, capacity));
-    }
-
-    bool Empty() const { return count == 0; }
-
-    int Capacity() const { return static_cast<int>(storage.size()); }
-
-    void Push(const omega::SearchContext::VisitCandidate& candidate) {
-      storage[(head + count) % Capacity()] = candidate;
-      ++count;
-    }
-
-    const omega::SearchContext::VisitCandidate* Data() const {
-      return storage.data() + head;
-    }
-
-    void Clear() {
-      head = 0;
-      count = 0;
-    }
-  };
-
-  omega::SearchContext *search_ctx{nullptr};
-  bool enable_early_stopping{false};
-  bool per_cmp_reporting{false};
-  PendingVisitBuffer pending_candidates;
-  int batch_min_interval{1};
-};
-
-void ResetOmegaHookState(OmegaHookState* state) {
-  if (state->search_ctx != nullptr) {
-    state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
-  } else {
-    state->batch_min_interval = 1;
-  }
-  state->pending_candidates.Reset(state->batch_min_interval);
-}
-
-bool ShouldFlushOmegaPendingCandidates(const OmegaHookState& state) {
-  if (state.pending_candidates.Empty()) {
-    return false;
-  }
-  if (state.pending_candidates.count >= state.batch_min_interval) {
-    return true;
-  }
-  if (state.search_ctx == nullptr) {
-    return false;
-  }
-  return state.search_ctx->GetTotalCmps() + state.pending_candidates.count >=
-         state.search_ctx->GetNextPredictionCmps();
-}
-
-bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count) {
-  if (state->search_ctx == nullptr || flush_count <= 0 ||
-      state->pending_candidates.Empty()) {
-    return false;
-  }
-
-  flush_count = std::min(flush_count, state->pending_candidates.count);
-  bool should_predict = state->search_ctx->ReportVisitCandidates(
-      state->pending_candidates.Data(), static_cast<size_t>(flush_count));
-  state->pending_candidates.Clear();
-  if (!state->enable_early_stopping || !should_predict) {
-    return false;
-  }
-  return state->search_ctx->ShouldStopEarly();
-}
-
-bool MaybeFlushOmegaPendingCandidates(OmegaHookState* state) {
-  if (!ShouldFlushOmegaPendingCandidates(*state)) {
-    return false;
-  }
-  return FlushOmegaPendingCandidates(state, state->pending_candidates.count);
-}
-
-void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
-                        void *user_data) {
-  auto &state = *static_cast<OmegaHookState *>(user_data);
-  state.search_ctx->SetDistStart(dist);
-  if (state.per_cmp_reporting) {
-    state.search_ctx->ReportVisitCandidate(id, dist, true);
-    return;
-  }
-  state.pending_candidates.Push({static_cast<int>(id), dist, true});
-  MaybeFlushOmegaPendingCandidates(&state);
-}
-
-void OnOmegaHop(void *user_data) {
-  auto &state = *static_cast<OmegaHookState *>(user_data);
-  state.search_ctx->ReportHop();
-}
-
-bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
-                           bool inserted_to_topk, void *user_data) {
-  auto &state = *static_cast<OmegaHookState *>(user_data);
-  if (state.per_cmp_reporting) {
-    bool should_predict =
-        state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
-    if (!state.enable_early_stopping || !should_predict) {
-      return false;
-    }
-    return state.search_ctx->ShouldStopEarly();
-  }
-  state.pending_candidates.Push({static_cast<int>(id), dist, inserted_to_topk});
-  return MaybeFlushOmegaPendingCandidates(&state);
-}
-
-}  // namespace
-
 OmegaSearcher::OmegaSearcher(void)
     : HnswSearcher(),
       omega_model_(nullptr),
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index e16ec1502..11b3f174d 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -17,6 +17,7 @@
 #include <zvec/core/framework/index_helper.h>
 #include <zvec/core/framework/index_logger.h>
 #include <zvec/ailego/io/file.h>
+#include "omega_hook_utils.h"
 #include "utility/rdtsc_timer.h"
 #include "../hnsw/hnsw_entity.h"
 #include "../hnsw/hnsw_context.h"
@@ -60,14 +61,6 @@ bool ShouldLogQueryStats(uint64_t query_seq) {
   return limit == 0 || query_seq < limit;
 }
 
-bool DisableOmegaModelPrediction() {
-  const char* value = std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION");
-  if (value == nullptr) {
-    return false;
-  }
-  return std::string(value) != "0";
-}
-
 bool IsOmegaControlTimingEnabled() {
   const char* value = std::getenv("ZVEC_OMEGA_PROFILE_CONTROL_TIMING");
   if (value == nullptr) {
@@ -84,166 +77,6 @@ uint64_t OmegaProfilingElapsedNs(uint64_t start, uint64_t end) {
   return RdtscTimer::ElapsedNs(start, end);
 }
 
-struct OmegaHookState {
-  struct PendingVisitBuffer {
-    std::vector<omega::SearchContext::VisitCandidate> storage;
-    int head{0};
-    int count{0};
-
-    void Reset(int capacity) {
-      head = 0;
-      count = 0;
-      storage.resize(std::max(1, capacity));
-    }
-
-    bool Empty() const { return count == 0; }
-
-    int Capacity() const { return static_cast<int>(storage.size()); }
-
-    void Push(const omega::SearchContext::VisitCandidate& candidate) {
-      storage[(head + count) % Capacity()] = candidate;
-      ++count;
-    }
-
-    const omega::SearchContext::VisitCandidate* Data() const {
-      return storage.data() + head;
-    }
-
-    void Clear() {
-      head = 0;
-      count = 0;
-    }
-  };
-
-  omega::SearchContext *search_ctx{nullptr};
-  bool enable_early_stopping{false};
-  bool collect_control_timing{false};
-  uint64_t *hook_body_time_ns{nullptr};
-  bool per_cmp_reporting{false};
-  PendingVisitBuffer pending_candidates;
-  int batch_min_interval{1};
-};
-
-void ResetOmegaHookState(OmegaHookState *state) {
-  if (state->search_ctx != nullptr) {
-    state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
-  } else {
-    state->batch_min_interval = 1;
-  }
-  state->pending_candidates.Reset(state->batch_min_interval);
-}
-
-bool ShouldFlushOmegaPendingCandidates(const OmegaHookState &state) {
-  if (state.pending_candidates.Empty()) {
-    return false;
-  }
-  if (state.pending_candidates.count >= state.batch_min_interval) {
-    return true;
-  }
-  if (state.search_ctx == nullptr) {
-    return false;
-  }
-  return state.search_ctx->GetTotalCmps() + state.pending_candidates.count >=
-         state.search_ctx->GetNextPredictionCmps();
-}
-
-template <typename Fn>
-bool FlushOmegaPendingCandidates(const OmegaHookState &state,
-                                 int flush_count, Fn &&run_control_hook) {
-  if (state.search_ctx == nullptr || flush_count <= 0 ||
-      state.pending_candidates.Empty()) {
-    return false;
-  }
-
-  auto &mutable_state = const_cast<OmegaHookState &>(state);
-  flush_count = std::min(flush_count, mutable_state.pending_candidates.count);
-  bool should_predict = false;
-  run_control_hook([&]() {
-    should_predict = state.search_ctx->ReportVisitCandidates(
-        mutable_state.pending_candidates.Data(), static_cast<size_t>(flush_count));
-  });
-  mutable_state.pending_candidates.Clear();
-  if (!state.enable_early_stopping || !should_predict) {
-    return false;
-  }
-
-  bool should_stop = false;
-  run_control_hook(
-      [&]() { should_stop = state.search_ctx->ShouldStopEarly(); });
-  return should_stop;
-}
-
-template <typename Fn>
-void RunOmegaControlHook(const OmegaHookState &state, Fn &&fn) {
-  if (!state.collect_control_timing) {
-    fn();
-    return;
-  }
-  auto control_start = RdtscTimer::Now();
-  fn();
-  if (state.hook_body_time_ns != nullptr) {
-    *state.hook_body_time_ns += RdtscTimer::ElapsedNs(
-        control_start, RdtscTimer::Now());
-  }
-}
-
-bool MaybeFlushOmegaPendingCandidates(const OmegaHookState &state) {
-  auto run_control_hook = [&](auto &&fn) {
-    RunOmegaControlHook(state, std::forward<decltype(fn)>(fn));
-  };
-
-  if (!ShouldFlushOmegaPendingCandidates(state)) {
-    return false;
-  }
-  return FlushOmegaPendingCandidates(state, state.pending_candidates.count,
-                                     run_control_hook);
-}
-
-void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
-                        void *user_data) {
-  auto &state = *static_cast<OmegaHookState *>(user_data);
-  if (state.per_cmp_reporting) {
-    RunOmegaControlHook(state, [&]() {
-      state.search_ctx->SetDistStart(dist);
-      state.search_ctx->ReportVisitCandidate(id, dist, true);
-    });
-    return;
-  }
-  RunOmegaControlHook(state, [&]() {
-    state.search_ctx->SetDistStart(dist);
-    state.pending_candidates.Push({static_cast<int>(id), dist, true});
-  });
-  MaybeFlushOmegaPendingCandidates(state);
-}
-
-void OnOmegaHop(void *user_data) {
-  auto &state = *static_cast<OmegaHookState *>(user_data);
-  RunOmegaControlHook(state, [&]() { state.search_ctx->ReportHop(); });
-}
-
-bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
-                           bool inserted_to_topk, void *user_data) {
-  auto &state = *static_cast<OmegaHookState *>(user_data);
-  if (state.per_cmp_reporting) {
-    bool should_predict = false;
-    RunOmegaControlHook(state, [&]() {
-      should_predict =
-          state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
-    });
-    if (!state.enable_early_stopping || !should_predict) {
-      return false;
-    }
-    bool should_stop = false;
-    RunOmegaControlHook(
-        state, [&]() { should_stop = state.search_ctx->ShouldStopEarly(); });
-    return should_stop;
-  }
-  RunOmegaControlHook(state, [&]() {
-    state.pending_candidates.Push({static_cast<int>(id), dist, inserted_to_topk});
-  });
-  return MaybeFlushOmegaPendingCandidates(state);
-}
-
 }  // namespace
 
 bool OmegaStreamer::LoadModel(const std::string& model_dir) {
@@ -461,7 +294,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     LOG_ERROR("OMEGA search failed");
     return ret;
   }
-  MaybeFlushOmegaPendingCandidates(hook_state);
+  MaybeFlushOmegaPendingCandidates(&hook_state);
   auto query_search_end = RdtscTimer::Now();
 
   // Get final statistics

From cfe6756d9caa4daba92ddf3e40f9c91fc5cbec5c Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 19:08:03 +0800
Subject: [PATCH 064/126] cleanup: extract benchmark runner helpers

---
 scripts/benchmark_hnsw_vs_omega.py | 675 +----------------------------
 scripts/benchmark_lib.py           | 671 ++++++++++++++++++++++++++++
 2 files changed, 695 insertions(+), 651 deletions(-)
 create mode 100644 scripts/benchmark_lib.py

diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index b410966b1..73dfe837e 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -1,43 +1,31 @@
 #!/usr/bin/env python3
-"""
-Generic VectorDBBench runner for Zvec HNSW vs Zvec+OMEGA.
-
-Configuration is loaded from a JSON file so datasets, paths, and all
-benchmark/index parameters can be changed without editing the script.
-"""
+"""Generic VectorDBBench runner for Zvec HNSW vs Zvec+OMEGA."""
 
 import argparse
-import importlib
-import json
-import os
-import re
-import subprocess
 import sys
-import tempfile
-from dataclasses import dataclass
-from datetime import datetime
 from pathlib import Path
-from typing import Any
-
-
-@dataclass
-class BenchmarkResult:
-    type: str
-    path: str
-    success: bool
-    target_recall: float | None
-    load_duration: float | None = None
-    qps: float | None = None
-    recall: float | None = None
-    avg_latency_ms: float | None = None
-    p50_latency_ms: float | None = None
-    p90_latency_ms: float | None = None
-    p95_latency_ms: float | None = None
-    p99_latency_ms: float | None = None
-    profiling: dict | None = None
-
-
-KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")
+from benchmark_lib import (
+    BenchmarkResult,
+    build_base_command,
+    build_hnsw_profile,
+    build_omega_profile,
+    get_offline_load_duration,
+    get_run_result,
+    latency_summary_from_profile,
+    load_dataset_config,
+    merge_omega_detailed_profile,
+    must_get,
+    print_header,
+    resolve_index_path,
+    resolve_paths,
+    resolve_vectordbbench_command,
+    run_command,
+    run_command_capture,
+    snapshot_result_files,
+    validate_profile_output,
+    write_grouped_profiling_summaries,
+    write_offline_summary,
+)
 
 
 def parse_args() -> argparse.Namespace:
@@ -88,627 +76,12 @@ def parse_args() -> argparse.Namespace:
     return parser.parse_args()
 
 
-def load_json(path: Path) -> dict[str, Any]:
-    with open(path) as f:
-        return json.load(f)
-
-
-def load_dataset_config(path: Path, dataset_name: str) -> dict[str, Any]:
-    root = load_json(path)
-    if dataset_name not in root:
-        available = ", ".join(sorted(root.keys()))
-        raise ValueError(
-            f"Dataset '{dataset_name}' not found in {path}. Available datasets: {available}"
-        )
-    dataset_config = root[dataset_name]
-    if not isinstance(dataset_config, dict):
-        raise ValueError(f"Dataset config for '{dataset_name}' must be a JSON object")
-    return dataset_config
-
-
-def resolve_paths(
-    config: dict[str, Any],
-    zvec_root_arg: str | None,
-    vectordbbench_root_arg: str | None,
-    benchmark_dir_arg: str | None,
-    results_dir_arg: str | None,
-) -> tuple[Path, Path, Path, Path]:
-    script_path = Path(__file__).resolve()
-    zvec_root = Path(zvec_root_arg).resolve() if zvec_root_arg else script_path.parent.parent
-    vectordbbench_root = (
-        Path(vectordbbench_root_arg).resolve()
-        if vectordbbench_root_arg
-        else Path(os.environ.get("VECTORDBBENCH_ROOT", zvec_root.parent / "VectorDBBench")).resolve()
-    )
-
-    config_benchmark_dir = config.get("benchmark_dir")
-    if benchmark_dir_arg:
-        benchmark_dir = Path(benchmark_dir_arg).resolve()
-    elif config_benchmark_dir:
-        benchmark_dir = Path(config_benchmark_dir).expanduser().resolve()
-    else:
-        benchmark_dir = Path(os.environ.get("ZVEC_BENCHMARK_DIR", zvec_root / "benchmark_results")).resolve()
-
-    source_results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
-    if results_dir_arg:
-        results_dir = Path(results_dir_arg).resolve()
-    elif config.get("results_dir"):
-        results_dir = Path(config["results_dir"]).expanduser().resolve()
-    elif source_results_dir.exists():
-        results_dir = source_results_dir
-    else:
-        try:
-            bench_config = importlib.import_module("vectordb_bench").config
-            results_dir = Path(bench_config.RESULTS_LOCAL_DIR).resolve() / "Zvec"
-        except Exception:
-            results_dir = source_results_dir
-
-    return zvec_root, vectordbbench_root, benchmark_dir, results_dir
-
-
-def resolve_vectordbbench_command() -> list[str]:
-    return [sys.executable, "-m", "vectordb_bench.cli.vectordbbench"]
-
-
-def parse_scalar(value: str) -> Any:
-    lower = value.lower()
-    if lower in {"true", "false"}:
-        return lower == "true"
-    try:
-        if any(ch in value for ch in [".", "e", "E"]):
-            return float(value)
-        return int(value)
-    except ValueError:
-        return value
-
-
-def parse_key_values(line: str) -> dict[str, Any]:
-    return {key: parse_scalar(value) for key, value in KV_PATTERN.findall(line)}
-
-
-def avg_metric(records: list[dict[str, Any]], key: str) -> float | None:
-    values = [float(record[key]) for record in records if key in record]
-    if not values:
-        return None
-    return sum(values) / len(values)
-
-
-def percentile_metric(records: list[dict[str, Any]], key: str, percentile: float) -> float | None:
-    values = sorted(float(record[key]) for record in records if key in record)
-    if not values:
-        return None
-    if len(values) == 1:
-        return values[0]
-
-    rank = (len(values) - 1) * percentile / 100.0
-    lower = int(rank)
-    upper = min(lower + 1, len(values) - 1)
-    if lower == upper:
-        return values[lower]
-    weight = rank - lower
-    return values[lower] * (1.0 - weight) + values[upper] * weight
-
-
-def parse_serial_runner_summary(output: str) -> dict[str, Any]:
-    summary = {}
-    for line in output.splitlines():
-        if "search entire test_data:" not in line:
-            continue
-        summary = parse_key_values(line)
-    return summary
-
-
-def parse_query_records(output: str, prefix: str) -> list[dict[str, Any]]:
-    records = []
-    for line in output.splitlines():
-        if prefix not in line:
-            continue
-        records.append(parse_key_values(line))
-    return records
-
-
-def build_hnsw_profile(metrics: dict[str, Any], output: str) -> dict[str, Any]:
-    query_records = parse_query_records(output, "HNSW query stats:")
-    serial_summary = parse_serial_runner_summary(output)
-    avg_latency_ms = avg_metric(query_records, "latency_ms")
-    p50_latency_ms = percentile_metric(query_records, "latency_ms", 50)
-    p90_latency_ms = percentile_metric(query_records, "latency_ms", 90)
-    p95_latency_ms = percentile_metric(query_records, "latency_ms", 95)
-    p99_latency_ms = percentile_metric(query_records, "latency_ms", 99)
-    return {
-        "benchmark_recall": metrics.get("recall"),
-        "benchmark_qps": metrics.get("qps"),
-        "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": avg_latency_ms,
-        "profile_p50_end2end_latency_ms": p50_latency_ms,
-        "profile_p90_end2end_latency_ms": p90_latency_ms,
-        "profile_p95_end2end_latency_ms": p95_latency_ms,
-        "profile_p99_end2end_latency_ms": p99_latency_ms,
-        "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
-        "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
-        "profile_avg_pure_search_ms": avg_metric(query_records, "pure_search_ms"),
-        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "profile_serial_p99_s": serial_summary.get("p99"),
-        "profile_serial_p95_s": serial_summary.get("p95"),
-        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
-    }
-
-
-def build_omega_profile(
-    metrics: dict[str, Any], output: str, hnsw_profile: dict[str, Any] | None
-) -> dict[str, Any]:
-    query_records = parse_query_records(output, "OMEGA query stats:")
-    serial_summary = parse_serial_runner_summary(output)
-    avg_latency_ms = avg_metric(query_records, "total_ms")
-    p50_latency_ms = percentile_metric(query_records, "total_ms", 50)
-    p90_latency_ms = percentile_metric(query_records, "total_ms", 90)
-    p95_latency_ms = percentile_metric(query_records, "total_ms", 95)
-    p99_latency_ms = percentile_metric(query_records, "total_ms", 99)
-
-    avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
-    avg_core_search_ms = avg_metric(query_records, "core_search_ms")
-    avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
-    avg_hook_total_ms = avg_metric(query_records, "hook_total_ms")
-    avg_search_only_ms = (
-        avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
-    )
-
-    cmp_time_ms = None
-    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_search_only_ms is not None:
-        cmp_time_ms = avg_search_only_ms / avg_pairwise_dist_cnt
-
-    model_overhead_cmp_equiv = None
-    if cmp_time_ms and cmp_time_ms > 0 and avg_hook_total_ms is not None:
-        model_overhead_cmp_equiv = avg_hook_total_ms / cmp_time_ms
-
-    avg_saved_cmps = None
-    if (
-        hnsw_profile
-        and hnsw_profile.get("profile_avg_cmps") is not None
-        and avg_pairwise_dist_cnt is not None
-    ):
-        avg_saved_cmps = hnsw_profile["profile_avg_cmps"] - avg_pairwise_dist_cnt
-
-    return {
-        "benchmark_recall": metrics.get("recall"),
-        "benchmark_qps": metrics.get("qps"),
-        "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": avg_latency_ms,
-        "profile_p50_end2end_latency_ms": p50_latency_ms,
-        "profile_p90_end2end_latency_ms": p90_latency_ms,
-        "profile_p95_end2end_latency_ms": p95_latency_ms,
-        "profile_p99_end2end_latency_ms": p99_latency_ms,
-        "profile_avg_cmps": avg_pairwise_dist_cnt,
-        "profile_avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
-        "profile_avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
-        "profile_avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
-        "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
-        "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
-        "profile_avg_model_overhead_ms": avg_hook_total_ms,
-        "profile_avg_setup_ms": avg_metric(query_records, "setup_ms"),
-        "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
-        "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
-        "profile_avg_core_search_ms": avg_core_search_ms,
-        "profile_avg_pure_search_ms": avg_pure_search_ms,
-        "profile_avg_hook_total_ms": avg_hook_total_ms,
-        "profile_avg_hook_body_ms": avg_metric(query_records, "hook_body_ms"),
-        "profile_avg_hook_dispatch_ms": avg_metric(query_records, "hook_dispatch_ms"),
-        "profile_avg_report_visit_candidate_ms": avg_metric(query_records, "report_visit_candidate_ms"),
-        "profile_avg_should_predict_ms": avg_metric(query_records, "should_predict_ms"),
-        "profile_avg_report_hop_ms": avg_metric(query_records, "report_hop_ms"),
-        "profile_avg_update_top_candidates_ms": avg_metric(query_records, "update_top_candidates_ms"),
-        "profile_avg_push_traversal_window_ms": avg_metric(query_records, "push_traversal_window_ms"),
-        "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
-        "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
-        "profile_avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
-        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "profile_serial_p99_s": serial_summary.get("p99"),
-        "profile_serial_p95_s": serial_summary.get("p95"),
-        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
-    }
-
-
-def profiling_output_path(index_path: Path) -> Path:
-    return index_path / "online_benchmark_summary.json"
-
-
-def write_profiling_summary(index_path: Path, payload: dict[str, Any]) -> None:
-    with open(profiling_output_path(index_path), "w") as f:
-        json.dump(payload, f, indent=2, sort_keys=True)
-
-
-def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResult]) -> list[Path]:
-    written_paths: list[Path] = []
-    grouped: dict[str, list[BenchmarkResult]] = {}
-    for result in results:
-        grouped.setdefault(result.path, []).append(result)
-
-    for path_str, grouped_results in grouped.items():
-        index_path = Path(path_str)
-        write_profiling_summary(
-            index_path,
-            {
-                "generated_at": datetime.now().isoformat(),
-                "dataset": dataset,
-                "results": [
-                    {
-                        "type": result.type,
-                        "target_recall": result.target_recall,
-                        "path": result.path,
-                        "load_duration_s": result.load_duration,
-                        "qps": result.qps,
-                        "avg_latency_ms": result.avg_latency_ms,
-                        "p50_latency_ms": result.p50_latency_ms,
-                        "p90_latency_ms": result.p90_latency_ms,
-                        "p95_latency_ms": result.p95_latency_ms,
-                        "p99_latency_ms": result.p99_latency_ms,
-                        "recall": result.recall,
-                        "profiling": result.profiling,
-                    }
-                    for result in grouped_results
-                ],
-            },
-        )
-        written_paths.append(profiling_output_path(index_path))
-
-    return written_paths
-
-
-def get_latest_result(db_label: str, results_dir: Path) -> dict[str, Any]:
-    if not results_dir.exists():
-        return {}
-
-    result_files = sorted(
-        results_dir.glob("result_*.json"),
-        key=lambda f: f.stat().st_mtime,
-        reverse=True,
-    )
-    for result_file in result_files:
-        try:
-            with open(result_file) as f:
-                data = json.load(f)
-            for result in data.get("results", []):
-                task_config = result.get("task_config", {})
-                db_config = task_config.get("db_config", {})
-                if db_config.get("db_label") == db_label:
-                    metrics = result.get("metrics", {})
-                    return {
-                        "insert_duration": metrics.get("insert_duration"),
-                        "optimize_duration": metrics.get("optimize_duration"),
-                        "load_duration": metrics.get("load_duration"),
-                        "qps": metrics.get("qps"),
-                        "avg_latency_ms": metrics.get("serial_latency_avg"),
-                        "p95_latency_ms": metrics.get("serial_latency_p95"),
-                        "p99_latency_ms": metrics.get("serial_latency_p99"),
-                        "recall": metrics.get("recall"),
-                    }
-        except Exception:
-            continue
-    return {}
-
-
-def latency_summary_from_profile(profile: dict[str, Any] | None) -> dict[str, float | None]:
-    profile = profile or {}
-    return {
-        "avg_latency_ms": profile.get("profile_avg_end2end_latency_ms"),
-        "p50_latency_ms": profile.get("profile_p50_end2end_latency_ms"),
-        "p90_latency_ms": profile.get("profile_p90_end2end_latency_ms"),
-        "p95_latency_ms": profile.get("profile_p95_end2end_latency_ms"),
-        "p99_latency_ms": profile.get("profile_p99_end2end_latency_ms"),
-    }
-
-
-def merge_omega_detailed_profile(
-    summary_profile: dict[str, Any], detailed_profile: dict[str, Any]
-) -> dict[str, Any]:
-    merged = dict(summary_profile)
-    detailed_keys = [
-        "profile_avg_model_overhead_ms",
-        "profile_avg_should_stop_ms",
-        "profile_avg_prediction_eval_ms",
-        "profile_avg_core_search_ms",
-        "profile_avg_pure_search_ms",
-        "profile_avg_hook_total_ms",
-        "profile_avg_hook_body_ms",
-        "profile_avg_hook_dispatch_ms",
-        "profile_avg_report_visit_candidate_ms",
-        "profile_avg_should_predict_ms",
-        "profile_avg_report_hop_ms",
-        "profile_avg_update_top_candidates_ms",
-        "profile_avg_push_traversal_window_ms",
-        "profile_avg_model_overhead_cmp_equiv",
-    ]
-    for key in detailed_keys:
-        merged[key] = detailed_profile.get(key)
-    return merged
-
-
-def snapshot_result_files(results_dir: Path) -> set[str]:
-    if not results_dir.exists():
-        return set()
-    return {str(p) for p in results_dir.glob("result_*.json")}
-
-
-def extract_result_from_file(result_file: Path, db_label: str) -> dict[str, Any]:
-    try:
-        with open(result_file) as f:
-            data = json.load(f)
-        for result in data.get("results", []):
-            task_config = result.get("task_config", {})
-            db_config = task_config.get("db_config", {})
-            if db_config.get("db_label") == db_label:
-                metrics = result.get("metrics", {})
-                return {
-                    "insert_duration": metrics.get("insert_duration"),
-                    "optimize_duration": metrics.get("optimize_duration"),
-                    "load_duration": metrics.get("load_duration"),
-                    "qps": metrics.get("qps"),
-                    "recall": metrics.get("recall"),
-                }
-    except Exception:
-        return {}
-    return {}
-
-
-def get_run_result(db_label: str, before_files: set[str], results_dir: Path) -> dict[str, Any]:
-    if not results_dir.exists():
-        return {}
-
-    current_files = {str(p) for p in results_dir.glob("result_*.json")}
-    new_files = sorted(
-        [Path(p) for p in current_files - before_files],
-        key=lambda p: p.stat().st_mtime,
-        reverse=True,
-    )
-    for result_file in new_files:
-        metrics = extract_result_from_file(result_file, db_label)
-        if metrics:
-            return metrics
-    return get_latest_result(db_label, results_dir)
-
-
-def offline_summary_path(index_path: Path) -> Path:
-    return index_path / "offline_benchmark_summary.json"
-
-
-def read_json_if_exists(path: Path) -> dict[str, Any]:
-    if not path.exists():
-        return {}
-    try:
-        with open(path) as f:
-            return json.load(f)
-    except Exception:
-        return {}
-
-
-def find_omega_model_dir(index_path: Path) -> Path | None:
-    candidates = sorted(index_path.glob("*/omega_model"))
-    return candidates[0] if candidates else None
-
-
-def sum_timing_ms(data: dict[str, Any]) -> int:
-    return sum(v for v in data.values() if isinstance(v, (int, float)))
-
-
-def build_offline_summary(
-    index_path: Path,
-    db_label: str,
-    metrics: dict[str, Any],
-    retrain_only: bool = False,
-) -> dict[str, Any]:
-    previous_summary = read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
-    previous_offline = previous_summary.get("offline", {})
-    previous_omega_training = previous_summary.get("omega_training", {})
-
-    insert_duration = metrics.get("insert_duration")
-    optimize_duration = metrics.get("optimize_duration")
-    load_duration = metrics.get("load_duration")
-
-    omega_model_dir = find_omega_model_dir(index_path)
-    omega_training = {}
-    if omega_model_dir is not None:
-        omega_training = {
-            "collection_timing_ms": read_json_if_exists(
-                omega_model_dir / "training_collection_timing.json"
-            ),
-            "lightgbm_timing_ms": read_json_if_exists(
-                omega_model_dir / "lightgbm_training_timing.json"
-            ),
-        }
-
-    if retrain_only:
-        insert_duration = previous_offline.get("insert_duration_s")
-        old_optimize_duration = previous_offline.get("optimize_duration_s")
-        old_training_s = (
-            sum_timing_ms(previous_omega_training.get("collection_timing_ms", {}))
-            + sum_timing_ms(previous_omega_training.get("lightgbm_timing_ms", {}))
-        ) / 1000.0
-        new_training_s = (
-            sum_timing_ms(omega_training.get("collection_timing_ms", {}))
-            + sum_timing_ms(omega_training.get("lightgbm_timing_ms", {}))
-        ) / 1000.0
-        if old_optimize_duration is not None:
-            optimize_duration = round(old_optimize_duration - old_training_s + new_training_s, 4)
-        else:
-            optimize_duration = metrics.get("optimize_duration")
-        load_duration = (
-            round(insert_duration + optimize_duration, 4)
-            if insert_duration is not None and optimize_duration is not None
-            else metrics.get("load_duration")
-        )
-
-    summary = {
-        "db_label": db_label,
-        "index_path": str(index_path),
-        "generated_at": datetime.now().isoformat(),
-        "offline": {
-            "insert_duration_s": insert_duration,
-            "optimize_duration_s": optimize_duration,
-            "load_duration_s": load_duration,
-        },
-    }
-    if omega_training:
-        summary["omega_training"] = omega_training
-    return summary
-
-
-def write_offline_summary(
-    index_path: Path,
-    db_label: str,
-    metrics: dict[str, Any],
-    retrain_only: bool = False,
-) -> None:
-    summary = build_offline_summary(index_path, db_label, metrics, retrain_only=retrain_only)
-    with open(offline_summary_path(index_path), "w") as f:
-        json.dump(summary, f, indent=2, sort_keys=True)
-
-
-def get_offline_load_duration(index_path: Path) -> float | None:
-    summary = read_json_if_exists(offline_summary_path(index_path))
-    return summary.get("offline", {}).get("load_duration_s")
-
-
-def run_command(
-    cmd: list[str],
-    vectordbbench_root: Path,
-    dry_run: bool = False,
-    extra_env: dict[str, str] | None = None,
-) -> int:
-    cmd_str = " \\\n    ".join(cmd)
-    print(f"\n{'=' * 60}")
-    print(f"Command:\n{cmd_str}")
-    print(f"{'=' * 60}\n")
-    if dry_run:
-        print("[DRY RUN] Command not executed")
-        return 0
-
-    cwd = vectordbbench_root if vectordbbench_root.exists() else None
-    env = os.environ.copy()
-    if extra_env:
-        env.update(extra_env)
-    result = subprocess.run(cmd, cwd=cwd, env=env)
-    return result.returncode
-
-
-def run_command_capture(
-    cmd: list[str],
-    vectordbbench_root: Path,
-    dry_run: bool = False,
-    extra_env: dict[str, str] | None = None,
-) -> tuple[int, str]:
-    cmd_str = " \\\n    ".join(cmd)
-    print(f"\n{'=' * 60}")
-    print(f"Command:\n{cmd_str}")
-    print(f"{'=' * 60}\n")
-
-    if dry_run:
-        print("[DRY RUN] Command not executed")
-        return 0, ""
-
-    cwd = vectordbbench_root if vectordbbench_root.exists() else None
-    env = os.environ.copy()
-    if extra_env:
-        env.update(extra_env)
-    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".log") as tmp:
-        tmp_path = Path(tmp.name)
-
-    try:
-        with tmp_path.open("w+") as tmp:
-            result = subprocess.run(cmd, cwd=cwd, env=env, stdout=tmp, stderr=subprocess.STDOUT, text=True)
-            tmp.flush()
-            tmp.seek(0)
-            output = tmp.read()
-        print(output, end="" if output.endswith("\n") or not output else "\n")
-        return result.returncode, output
-    finally:
-        tmp_path.unlink(missing_ok=True)
-
-
-def must_get(config: dict[str, Any], key: str) -> Any:
-    if key not in config:
-        raise ValueError(f"Missing required config key: {key}")
-    return config[key]
-
-
-def resolve_index_path(benchmark_dir: Path, path_value: str) -> Path:
-    path = Path(path_value).expanduser()
-    return path.resolve() if path.is_absolute() else (benchmark_dir / path).resolve()
-
-
-def append_option(cmd: list[str], key: str, value: Any) -> None:
-    if value is None:
-        return
-    flag = f"--{key.replace('_', '-')}"
-    if isinstance(value, bool):
-        if value:
-            cmd.append(flag)
-        return
-    if isinstance(value, list):
-        cmd.extend([flag, ",".join(str(v) for v in value)])
-    else:
-        cmd.extend([flag, str(value)])
-
-
-def extend_with_args(cmd: list[str], args_map: dict[str, Any] | None) -> None:
-    if not args_map:
-        return
-    for key, value in args_map.items():
-        append_option(cmd, key, value)
-
-
-def extend_with_flags(cmd: list[str], flags: list[str] | None) -> None:
-    if not flags:
-        return
-    for flag in flags:
-        cmd.append(f"--{flag}")
-
-
-def build_base_command(
-    vectordbbench_cmd: list[str],
-    client_name: str,
-    path: Path,
-    db_label: str,
-    case_type: str,
-    common_args: dict[str, Any],
-    specific_args: dict[str, Any] | None = None,
-    extra_flags: list[str] | None = None,
-) -> list[str]:
-    cmd = [
-        *vectordbbench_cmd,
-        client_name,
-        "--path",
-        str(path),
-        "--db-label",
-        db_label,
-        "--case-type",
-        case_type,
-    ]
-    extend_with_args(cmd, common_args)
-    extend_with_args(cmd, specific_args)
-    extend_with_flags(cmd, extra_flags)
-    return cmd
-
-
-def validate_profile_output(profile_name: str, ret: int, output: str, prefix: str) -> None:
-    if ret != 0:
-        raise RuntimeError(f"{profile_name} profiling pass failed with exit code {ret}")
-    if not parse_query_records(output, prefix):
-        raise RuntimeError(
-            f"{profile_name} profiling pass completed without any '{prefix}' records in stdout"
-        )
-
-
-def print_header(title: str) -> None:
-    print("\n\n" + "#" * 70)
-    print(f"# {title}")
-    print("#" * 70)
-
-
 def main() -> int:
     args = parse_args()
     config_path = Path(args.config).expanduser().resolve()
     config = load_dataset_config(config_path, args.dataset)
     zvec_root, vectordbbench_root, benchmark_dir, results_dir = resolve_paths(
+        Path(__file__).resolve(),
         config,
         args.zvec_root,
         args.vectordbbench_root,
diff --git a/scripts/benchmark_lib.py b/scripts/benchmark_lib.py
new file mode 100644
index 000000000..b5beb7de0
--- /dev/null
+++ b/scripts/benchmark_lib.py
@@ -0,0 +1,671 @@
+#!/usr/bin/env python3
+
+import importlib
+import json
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+
+@dataclass
+class BenchmarkResult:
+    type: str
+    path: str
+    success: bool
+    target_recall: float | None
+    load_duration: float | None = None
+    qps: float | None = None
+    recall: float | None = None
+    avg_latency_ms: float | None = None
+    p50_latency_ms: float | None = None
+    p90_latency_ms: float | None = None
+    p95_latency_ms: float | None = None
+    p99_latency_ms: float | None = None
+    profiling: dict[str, Any] | None = None
+
+
+KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")
+
+
+def load_json(path: Path) -> dict[str, Any]:
+    with open(path) as f:
+        return json.load(f)
+
+
+def load_dataset_config(path: Path, dataset_name: str) -> dict[str, Any]:
+    root = load_json(path)
+    if dataset_name not in root:
+        available = ", ".join(sorted(root.keys()))
+        raise ValueError(
+            f"Dataset '{dataset_name}' not found in {path}. Available datasets: {available}"
+        )
+    dataset_config = root[dataset_name]
+    if not isinstance(dataset_config, dict):
+        raise ValueError(f"Dataset config for '{dataset_name}' must be a JSON object")
+    return dataset_config
+
+
+def resolve_paths(
+    script_path: Path,
+    config: dict[str, Any],
+    zvec_root_arg: str | None,
+    vectordbbench_root_arg: str | None,
+    benchmark_dir_arg: str | None,
+    results_dir_arg: str | None,
+) -> tuple[Path, Path, Path, Path]:
+    zvec_root = Path(zvec_root_arg).resolve() if zvec_root_arg else script_path.parent.parent
+    vectordbbench_root = (
+        Path(vectordbbench_root_arg).resolve()
+        if vectordbbench_root_arg
+        else Path(
+            os.environ.get("VECTORDBBENCH_ROOT", zvec_root.parent / "VectorDBBench")
+        ).resolve()
+    )
+
+    config_benchmark_dir = config.get("benchmark_dir")
+    if benchmark_dir_arg:
+        benchmark_dir = Path(benchmark_dir_arg).resolve()
+    elif config_benchmark_dir:
+        benchmark_dir = Path(config_benchmark_dir).expanduser().resolve()
+    else:
+        benchmark_dir = Path(
+            os.environ.get("ZVEC_BENCHMARK_DIR", zvec_root / "benchmark_results")
+        ).resolve()
+
+    source_results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
+    if results_dir_arg:
+        results_dir = Path(results_dir_arg).resolve()
+    elif config.get("results_dir"):
+        results_dir = Path(config["results_dir"]).expanduser().resolve()
+    elif source_results_dir.exists():
+        results_dir = source_results_dir
+    else:
+        try:
+            bench_config = importlib.import_module("vectordb_bench").config
+            results_dir = Path(bench_config.RESULTS_LOCAL_DIR).resolve() / "Zvec"
+        except Exception:
+            results_dir = source_results_dir
+
+    return zvec_root, vectordbbench_root, benchmark_dir, results_dir
+
+
+def resolve_vectordbbench_command() -> list[str]:
+    return [sys.executable, "-m", "vectordb_bench.cli.vectordbbench"]
+
+
+def parse_scalar(value: str) -> Any:
+    lower = value.lower()
+    if lower in {"true", "false"}:
+        return lower == "true"
+    try:
+        if any(ch in value for ch in [".", "e", "E"]):
+            return float(value)
+        return int(value)
+    except ValueError:
+        return value
+
+
+def parse_key_values(line: str) -> dict[str, Any]:
+    return {key: parse_scalar(value) for key, value in KV_PATTERN.findall(line)}
+
+
+def avg_metric(records: list[dict[str, Any]], key: str) -> float | None:
+    values = [float(record[key]) for record in records if key in record]
+    if not values:
+        return None
+    return sum(values) / len(values)
+
+
+def percentile_metric(
+    records: list[dict[str, Any]], key: str, percentile: float
+) -> float | None:
+    values = sorted(float(record[key]) for record in records if key in record)
+    if not values:
+        return None
+    if len(values) == 1:
+        return values[0]
+
+    rank = (len(values) - 1) * percentile / 100.0
+    lower = int(rank)
+    upper = min(lower + 1, len(values) - 1)
+    if lower == upper:
+        return values[lower]
+    weight = rank - lower
+    return values[lower] * (1.0 - weight) + values[upper] * weight
+
+
+def parse_serial_runner_summary(output: str) -> dict[str, Any]:
+    summary = {}
+    for line in output.splitlines():
+        if "search entire test_data:" not in line:
+            continue
+        summary = parse_key_values(line)
+    return summary
+
+
+def parse_query_records(output: str, prefix: str) -> list[dict[str, Any]]:
+    records = []
+    for line in output.splitlines():
+        if prefix not in line:
+            continue
+        records.append(parse_key_values(line))
+    return records
+
+
+def build_hnsw_profile(metrics: dict[str, Any], output: str) -> dict[str, Any]:
+    query_records = parse_query_records(output, "HNSW query stats:")
+    serial_summary = parse_serial_runner_summary(output)
+    avg_latency_ms = avg_metric(query_records, "latency_ms")
+    p50_latency_ms = percentile_metric(query_records, "latency_ms", 50)
+    p90_latency_ms = percentile_metric(query_records, "latency_ms", 90)
+    p95_latency_ms = percentile_metric(query_records, "latency_ms", 95)
+    p99_latency_ms = percentile_metric(query_records, "latency_ms", 99)
+    return {
+        "benchmark_recall": metrics.get("recall"),
+        "benchmark_qps": metrics.get("qps"),
+        "profile_query_count": len(query_records),
+        "profile_avg_end2end_latency_ms": avg_latency_ms,
+        "profile_p50_end2end_latency_ms": p50_latency_ms,
+        "profile_p90_end2end_latency_ms": p90_latency_ms,
+        "profile_p95_end2end_latency_ms": p95_latency_ms,
+        "profile_p99_end2end_latency_ms": p99_latency_ms,
+        "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
+        "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
+        "profile_avg_pure_search_ms": avg_metric(query_records, "pure_search_ms"),
+        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "profile_serial_p99_s": serial_summary.get("p99"),
+        "profile_serial_p95_s": serial_summary.get("p95"),
+        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
+    }
+
+
+def build_omega_profile(
+    metrics: dict[str, Any], output: str, hnsw_profile: dict[str, Any] | None
+) -> dict[str, Any]:
+    query_records = parse_query_records(output, "OMEGA query stats:")
+    serial_summary = parse_serial_runner_summary(output)
+    avg_latency_ms = avg_metric(query_records, "total_ms")
+    p50_latency_ms = percentile_metric(query_records, "total_ms", 50)
+    p90_latency_ms = percentile_metric(query_records, "total_ms", 90)
+    p95_latency_ms = percentile_metric(query_records, "total_ms", 95)
+    p99_latency_ms = percentile_metric(query_records, "total_ms", 99)
+
+    avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
+    avg_core_search_ms = avg_metric(query_records, "core_search_ms")
+    avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
+    avg_hook_total_ms = avg_metric(query_records, "hook_total_ms")
+    avg_search_only_ms = (
+        avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
+    )
+
+    cmp_time_ms = None
+    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_search_only_ms is not None:
+        cmp_time_ms = avg_search_only_ms / avg_pairwise_dist_cnt
+
+    model_overhead_cmp_equiv = None
+    if cmp_time_ms and cmp_time_ms > 0 and avg_hook_total_ms is not None:
+        model_overhead_cmp_equiv = avg_hook_total_ms / cmp_time_ms
+
+    avg_saved_cmps = None
+    if (
+        hnsw_profile
+        and hnsw_profile.get("profile_avg_cmps") is not None
+        and avg_pairwise_dist_cnt is not None
+    ):
+        avg_saved_cmps = hnsw_profile["profile_avg_cmps"] - avg_pairwise_dist_cnt
+
+    return {
+        "benchmark_recall": metrics.get("recall"),
+        "benchmark_qps": metrics.get("qps"),
+        "profile_query_count": len(query_records),
+        "profile_avg_end2end_latency_ms": avg_latency_ms,
+        "profile_p50_end2end_latency_ms": p50_latency_ms,
+        "profile_p90_end2end_latency_ms": p90_latency_ms,
+        "profile_p95_end2end_latency_ms": p95_latency_ms,
+        "profile_p99_end2end_latency_ms": p99_latency_ms,
+        "profile_avg_cmps": avg_pairwise_dist_cnt,
+        "profile_avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
+        "profile_avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
+        "profile_avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
+        "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
+        "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
+        "profile_avg_model_overhead_ms": avg_hook_total_ms,
+        "profile_avg_setup_ms": avg_metric(query_records, "setup_ms"),
+        "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
+        "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
+        "profile_avg_core_search_ms": avg_core_search_ms,
+        "profile_avg_pure_search_ms": avg_pure_search_ms,
+        "profile_avg_hook_total_ms": avg_hook_total_ms,
+        "profile_avg_hook_body_ms": avg_metric(query_records, "hook_body_ms"),
+        "profile_avg_hook_dispatch_ms": avg_metric(query_records, "hook_dispatch_ms"),
+        "profile_avg_report_visit_candidate_ms": avg_metric(
+            query_records, "report_visit_candidate_ms"
+        ),
+        "profile_avg_should_predict_ms": avg_metric(query_records, "should_predict_ms"),
+        "profile_avg_report_hop_ms": avg_metric(query_records, "report_hop_ms"),
+        "profile_avg_update_top_candidates_ms": avg_metric(
+            query_records, "update_top_candidates_ms"
+        ),
+        "profile_avg_push_traversal_window_ms": avg_metric(
+            query_records, "push_traversal_window_ms"
+        ),
+        "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
+        "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
+        "profile_avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
+        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
+        "profile_serial_p99_s": serial_summary.get("p99"),
+        "profile_serial_p95_s": serial_summary.get("p95"),
+        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
+    }
+
+
+def profiling_output_path(index_path: Path) -> Path:
+    return index_path / "online_benchmark_summary.json"
+
+
+def write_profiling_summary(index_path: Path, payload: dict[str, Any]) -> None:
+    with open(profiling_output_path(index_path), "w") as f:
+        json.dump(payload, f, indent=2, sort_keys=True)
+
+
+def write_grouped_profiling_summaries(
+    dataset: str, results: list[BenchmarkResult]
+) -> list[Path]:
+    written_paths: list[Path] = []
+    grouped: dict[str, list[BenchmarkResult]] = {}
+    for result in results:
+        grouped.setdefault(result.path, []).append(result)
+
+    for path_str, grouped_results in grouped.items():
+        index_path = Path(path_str)
+        write_profiling_summary(
+            index_path,
+            {
+                "generated_at": datetime.now().isoformat(),
+                "dataset": dataset,
+                "results": [
+                    {
+                        "type": result.type,
+                        "target_recall": result.target_recall,
+                        "path": result.path,
+                        "load_duration_s": result.load_duration,
+                        "qps": result.qps,
+                        "avg_latency_ms": result.avg_latency_ms,
+                        "p50_latency_ms": result.p50_latency_ms,
+                        "p90_latency_ms": result.p90_latency_ms,
+                        "p95_latency_ms": result.p95_latency_ms,
+                        "p99_latency_ms": result.p99_latency_ms,
+                        "recall": result.recall,
+                        "profiling": result.profiling,
+                    }
+                    for result in grouped_results
+                ],
+            },
+        )
+        written_paths.append(profiling_output_path(index_path))
+
+    return written_paths
+
+
+def get_latest_result(db_label: str, results_dir: Path) -> dict[str, Any]:
+    if not results_dir.exists():
+        return {}
+
+    result_files = sorted(
+        results_dir.glob("result_*.json"),
+        key=lambda f: f.stat().st_mtime,
+        reverse=True,
+    )
+    for result_file in result_files:
+        try:
+            with open(result_file) as f:
+                data = json.load(f)
+            for result in data.get("results", []):
+                task_config = result.get("task_config", {})
+                db_config = task_config.get("db_config", {})
+                if db_config.get("db_label") == db_label:
+                    metrics = result.get("metrics", {})
+                    return {
+                        "insert_duration": metrics.get("insert_duration"),
+                        "optimize_duration": metrics.get("optimize_duration"),
+                        "load_duration": metrics.get("load_duration"),
+                        "qps": metrics.get("qps"),
+                        "avg_latency_ms": metrics.get("serial_latency_avg"),
+                        "p95_latency_ms": metrics.get("serial_latency_p95"),
+                        "p99_latency_ms": metrics.get("serial_latency_p99"),
+                        "recall": metrics.get("recall"),
+                    }
+        except Exception:
+            continue
+    return {}
+
+
+def latency_summary_from_profile(profile: dict[str, Any] | None) -> dict[str, float | None]:
+    profile = profile or {}
+    return {
+        "avg_latency_ms": profile.get("profile_avg_end2end_latency_ms"),
+        "p50_latency_ms": profile.get("profile_p50_end2end_latency_ms"),
+        "p90_latency_ms": profile.get("profile_p90_end2end_latency_ms"),
+        "p95_latency_ms": profile.get("profile_p95_end2end_latency_ms"),
+        "p99_latency_ms": profile.get("profile_p99_end2end_latency_ms"),
+    }
+
+
+def merge_omega_detailed_profile(
+    summary_profile: dict[str, Any], detailed_profile: dict[str, Any]
+) -> dict[str, Any]:
+    merged = dict(summary_profile)
+    detailed_keys = [
+        "profile_avg_model_overhead_ms",
+        "profile_avg_should_stop_ms",
+        "profile_avg_prediction_eval_ms",
+        "profile_avg_core_search_ms",
+        "profile_avg_pure_search_ms",
+        "profile_avg_hook_total_ms",
+        "profile_avg_hook_body_ms",
+        "profile_avg_hook_dispatch_ms",
+        "profile_avg_report_visit_candidate_ms",
+        "profile_avg_should_predict_ms",
+        "profile_avg_report_hop_ms",
+        "profile_avg_update_top_candidates_ms",
+        "profile_avg_push_traversal_window_ms",
+        "profile_avg_model_overhead_cmp_equiv",
+    ]
+    for key in detailed_keys:
+        merged[key] = detailed_profile.get(key)
+    return merged
+
+
+def snapshot_result_files(results_dir: Path) -> set[str]:
+    if not results_dir.exists():
+        return set()
+    return {str(p) for p in results_dir.glob("result_*.json")}
+
+
+def extract_result_from_file(result_file: Path, db_label: str) -> dict[str, Any]:
+    try:
+        with open(result_file) as f:
+            data = json.load(f)
+        for result in data.get("results", []):
+            task_config = result.get("task_config", {})
+            db_config = task_config.get("db_config", {})
+            if db_config.get("db_label") == db_label:
+                metrics = result.get("metrics", {})
+                return {
+                    "insert_duration": metrics.get("insert_duration"),
+                    "optimize_duration": metrics.get("optimize_duration"),
+                    "load_duration": metrics.get("load_duration"),
+                    "qps": metrics.get("qps"),
+                    "recall": metrics.get("recall"),
+                }
+    except Exception:
+        return {}
+    return {}
+
+
+def get_run_result(
+    db_label: str, before_files: set[str], results_dir: Path
+) -> dict[str, Any]:
+    if not results_dir.exists():
+        return {}
+
+    current_files = {str(p) for p in results_dir.glob("result_*.json")}
+    new_files = sorted(
+        [Path(p) for p in current_files - before_files],
+        key=lambda p: p.stat().st_mtime,
+        reverse=True,
+    )
+    for result_file in new_files:
+        metrics = extract_result_from_file(result_file, db_label)
+        if metrics:
+            return metrics
+    return get_latest_result(db_label, results_dir)
+
+
+def offline_summary_path(index_path: Path) -> Path:
+    return index_path / "offline_benchmark_summary.json"
+
+
+def read_json_if_exists(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {}
+    try:
+        with open(path) as f:
+            return json.load(f)
+    except Exception:
+        return {}
+
+
+def find_omega_model_dir(index_path: Path) -> Path | None:
+    candidates = sorted(index_path.glob("*/omega_model"))
+    return candidates[0] if candidates else None
+
+
+def sum_timing_ms(data: dict[str, Any]) -> int:
+    return sum(v for v in data.values() if isinstance(v, (int, float)))
+
+
+def build_offline_summary(
+    index_path: Path,
+    db_label: str,
+    metrics: dict[str, Any],
+    retrain_only: bool = False,
+) -> dict[str, Any]:
+    previous_summary = (
+        read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
+    )
+    previous_offline = previous_summary.get("offline", {})
+    previous_omega_training = previous_summary.get("omega_training", {})
+
+    insert_duration = metrics.get("insert_duration")
+    optimize_duration = metrics.get("optimize_duration")
+    load_duration = metrics.get("load_duration")
+
+    omega_model_dir = find_omega_model_dir(index_path)
+    omega_training = {}
+    if omega_model_dir is not None:
+        omega_training = {
+            "collection_timing_ms": read_json_if_exists(
+                omega_model_dir / "training_collection_timing.json"
+            ),
+            "lightgbm_timing_ms": read_json_if_exists(
+                omega_model_dir / "lightgbm_training_timing.json"
+            ),
+        }
+
+    if retrain_only:
+        insert_duration = previous_offline.get("insert_duration_s")
+        old_optimize_duration = previous_offline.get("optimize_duration_s")
+        old_training_s = (
+            sum_timing_ms(previous_omega_training.get("collection_timing_ms", {}))
+            + sum_timing_ms(previous_omega_training.get("lightgbm_timing_ms", {}))
+        ) / 1000.0
+        new_training_s = (
+            sum_timing_ms(omega_training.get("collection_timing_ms", {}))
+            + sum_timing_ms(omega_training.get("lightgbm_timing_ms", {}))
+        ) / 1000.0
+        if old_optimize_duration is not None:
+            optimize_duration = round(
+                old_optimize_duration - old_training_s + new_training_s, 4
+            )
+        else:
+            optimize_duration = metrics.get("optimize_duration")
+        load_duration = (
+            round(insert_duration + optimize_duration, 4)
+            if insert_duration is not None and optimize_duration is not None
+            else metrics.get("load_duration")
+        )
+
+    summary = {
+        "db_label": db_label,
+        "index_path": str(index_path),
+        "generated_at": datetime.now().isoformat(),
+        "offline": {
+            "insert_duration_s": insert_duration,
+            "optimize_duration_s": optimize_duration,
+            "load_duration_s": load_duration,
+        },
+    }
+    if omega_training:
+        summary["omega_training"] = omega_training
+    return summary
+
+
+def write_offline_summary(
+    index_path: Path,
+    db_label: str,
+    metrics: dict[str, Any],
+    retrain_only: bool = False,
+) -> None:
+    summary = build_offline_summary(index_path, db_label, metrics, retrain_only=retrain_only)
+    with open(offline_summary_path(index_path), "w") as f:
+        json.dump(summary, f, indent=2, sort_keys=True)
+
+
+def get_offline_load_duration(index_path: Path) -> float | None:
+    summary = read_json_if_exists(offline_summary_path(index_path))
+    return summary.get("offline", {}).get("load_duration_s")
+
+
+def run_command(
+    cmd: list[str],
+    vectordbbench_root: Path,
+    dry_run: bool = False,
+    extra_env: dict[str, str] | None = None,
+) -> int:
+    cmd_str = " \\\n    ".join(cmd)
+    print(f"\n{'=' * 60}")
+    print(f"Command:\n{cmd_str}")
+    print(f"{'=' * 60}\n")
+    if dry_run:
+        print("[DRY RUN] Command not executed")
+        return 0
+
+    cwd = vectordbbench_root if vectordbbench_root.exists() else None
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+    result = subprocess.run(cmd, cwd=cwd, env=env)
+    return result.returncode
+
+
+def run_command_capture(
+    cmd: list[str],
+    vectordbbench_root: Path,
+    dry_run: bool = False,
+    extra_env: dict[str, str] | None = None,
+) -> tuple[int, str]:
+    cmd_str = " \\\n    ".join(cmd)
+    print(f"\n{'=' * 60}")
+    print(f"Command:\n{cmd_str}")
+    print(f"{'=' * 60}\n")
+
+    if dry_run:
+        print("[DRY RUN] Command not executed")
+        return 0, ""
+
+    cwd = vectordbbench_root if vectordbbench_root.exists() else None
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".log") as tmp:
+        tmp_path = Path(tmp.name)
+
+    try:
+        with tmp_path.open("w+") as tmp:
+            result = subprocess.run(
+                cmd, cwd=cwd, env=env, stdout=tmp, stderr=subprocess.STDOUT, text=True
+            )
+            tmp.flush()
+            tmp.seek(0)
+            output = tmp.read()
+        print(output, end="" if output.endswith("\n") or not output else "\n")
+        return result.returncode, output
+    finally:
+        tmp_path.unlink(missing_ok=True)
+
+
+def must_get(config: dict[str, Any], key: str) -> Any:
+    if key not in config:
+        raise ValueError(f"Missing required config key: {key}")
+    return config[key]
+
+
+def resolve_index_path(benchmark_dir: Path, path_value: str) -> Path:
+    path = Path(path_value).expanduser()
+    return path.resolve() if path.is_absolute() else (benchmark_dir / path).resolve()
+
+
+def append_option(cmd: list[str], key: str, value: Any) -> None:
+    if value is None:
+        return
+    flag = f"--{key.replace('_', '-')}"
+    if isinstance(value, bool):
+        if value:
+            cmd.append(flag)
+        return
+    if isinstance(value, list):
+        cmd.extend([flag, ",".join(str(v) for v in value)])
+    else:
+        cmd.extend([flag, str(value)])
+
+
+def extend_with_args(cmd: list[str], args_map: dict[str, Any] | None) -> None:
+    if not args_map:
+        return
+    for key, value in args_map.items():
+        append_option(cmd, key, value)
+
+
+def extend_with_flags(cmd: list[str], flags: list[str] | None) -> None:
+    if not flags:
+        return
+    for flag in flags:
+        cmd.append(f"--{flag}")
+
+
+def build_base_command(
+    vectordbbench_cmd: list[str],
+    client_name: str,
+    path: Path,
+    db_label: str,
+    case_type: str,
+    common_args: dict[str, Any],
+    specific_args: dict[str, Any] | None = None,
+    extra_flags: list[str] | None = None,
+) -> list[str]:
+    cmd = [
+        *vectordbbench_cmd,
+        client_name,
+        "--path",
+        str(path),
+        "--db-label",
+        db_label,
+        "--case-type",
+        case_type,
+    ]
+    extend_with_args(cmd, common_args)
+    extend_with_args(cmd, specific_args)
+    extend_with_flags(cmd, extra_flags)
+    return cmd
+
+
+def validate_profile_output(profile_name: str, ret: int, output: str, prefix: str) -> None:
+    if ret != 0:
+        raise RuntimeError(f"{profile_name} profiling pass failed with exit code {ret}")
+    if not parse_query_records(output, prefix):
+        raise RuntimeError(
+            f"{profile_name} profiling pass completed without any '{prefix}' records in stdout"
+        )
+
+
+def print_header(title: str) -> None:
+    print("\n\n" + "#" * 70)
+    print(f"# {title}")
+    print("#" * 70)

From 0578983044b7edb8975c4dd315eb51afa073816a Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 19:11:15 +0800
Subject: [PATCH 065/126] cleanup: consolidate benchmark entrypoints

---
 scripts/benchmark_cohere_10m.py    | 867 +---------------------------
 scripts/benchmark_cohere_1m.py     | 894 +----------------------------
 scripts/benchmark_hnsw_vs_omega.py |   8 +
 3 files changed, 39 insertions(+), 1730 deletions(-)
 mode change 100755 => 100644 scripts/benchmark_cohere_1m.py

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
index 10ddb6497..3c946ab00 100644
--- a/scripts/benchmark_cohere_10m.py
+++ b/scripts/benchmark_cohere_10m.py
@@ -1,862 +1,25 @@
 #!/usr/bin/env python3
-"""
-VectorDBBench: Zvec vs Zvec+OMEGA Comparison on Cohere-10M
+"""Compatibility wrapper for the generic JSON-driven HNSW vs OMEGA runner."""
 
-Based on official zvec.org Cohere-10M benchmark parameters.
-
-Usage:
-    python benchmark_cohere_10m.py [--dry-run] [--target-recalls 0.90,0.95]
-"""
-
-import argparse
-import json
-import subprocess
 import sys
-import os
-import importlib
-import re
-import tempfile
-from dataclasses import dataclass
-from datetime import datetime
 from pathlib import Path
 
+import benchmark_hnsw_vs_omega
 
-@dataclass
-class BenchmarkResult:
-    type: str
-    ef_search: int
-    target_recall: float | None
-    path: str
-    success: bool
-    load_duration: float | None = None
-    qps: float | None = None
-    recall: float | None = None
-    profiling: dict | None = None
-
-
-def resolve_paths(
-    zvec_root_arg: str | None,
-    vectordbbench_root_arg: str | None,
-    benchmark_dir_arg: str | None,
-    results_dir_arg: str | None,
-) -> tuple[Path, Path, Path, Path]:
-    script_path = Path(__file__).resolve()
-    zvec_root = Path(zvec_root_arg).resolve() if zvec_root_arg else script_path.parent.parent
-    vectordbbench_root = (
-        Path(vectordbbench_root_arg).resolve()
-        if vectordbbench_root_arg
-        else Path(os.environ.get("VECTORDBBENCH_ROOT", zvec_root.parent / "VectorDBBench")).resolve()
-    )
-    benchmark_dir = (
-        Path(benchmark_dir_arg).resolve()
-        if benchmark_dir_arg
-        else Path(os.environ.get("ZVEC_BENCHMARK_DIR", zvec_root / "benchmark_results")).resolve()
-    )
-    source_results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
-    if results_dir_arg:
-        results_dir = Path(results_dir_arg).resolve()
-    elif source_results_dir.exists():
-        results_dir = source_results_dir
-    else:
-        results_dir = None
-        try:
-            config = importlib.import_module("vectordb_bench").config
-            results_dir = Path(config.RESULTS_LOCAL_DIR).resolve() / "Zvec"
-        except Exception:
-            results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
-    return zvec_root, vectordbbench_root, benchmark_dir, results_dir
-
-
-def resolve_vectordbbench_command() -> list[str]:
-    return [sys.executable, "-m", "vectordb_bench.cli.vectordbbench"]
-
-
-KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")
-
-
-def parse_scalar(value: str):
-    lower = value.lower()
-    if lower in {"true", "false"}:
-        return lower == "true"
-    try:
-        if any(ch in value for ch in [".", "e", "E"]):
-            return float(value)
-        return int(value)
-    except ValueError:
-        return value
-
-
-def parse_key_values(line: str) -> dict:
-    return {key: parse_scalar(value) for key, value in KV_PATTERN.findall(line)}
-
-
-def avg_metric(records: list[dict], key: str) -> float | None:
-    values = [float(record[key]) for record in records if key in record]
-    if not values:
-        return None
-    return sum(values) / len(values)
-
-
-def parse_serial_runner_summary(output: str) -> dict:
-    summary = {}
-    for line in output.splitlines():
-        if "search entire test_data:" not in line:
-            continue
-        summary = parse_key_values(line)
-    return summary
-
-
-def parse_query_records(output: str, prefix: str) -> list[dict]:
-    records = []
-    for line in output.splitlines():
-        if prefix not in line:
-            continue
-        records.append(parse_key_values(line))
-    return records
-
-
-def build_hnsw_profile(metrics: dict, output: str) -> dict:
-    query_records = parse_query_records(output, "HNSW query stats:")
-    serial_summary = parse_serial_runner_summary(output)
-    return {
-        "benchmark_recall": metrics.get("recall"),
-        "benchmark_qps": metrics.get("qps"),
-        "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
-        "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
-        "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
-        "profile_avg_pure_search_ms": avg_metric(query_records, "pure_search_ms"),
-        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "profile_serial_p99_s": serial_summary.get("p99"),
-        "profile_serial_p95_s": serial_summary.get("p95"),
-        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
-    }
-
-
-def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -> dict:
-    query_records = parse_query_records(output, "OMEGA query stats:")
-    serial_summary = parse_serial_runner_summary(output)
-
-    avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
-    avg_core_search_ms = avg_metric(query_records, "core_search_ms")
-    avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
-    avg_omega_control_ms = avg_metric(query_records, "omega_control_ms")
-    avg_search_only_ms = (
-        avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
-    )
-
-    cmp_time_ms = None
-    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_search_only_ms is not None:
-        cmp_time_ms = avg_search_only_ms / avg_pairwise_dist_cnt
-
-    model_overhead_cmp_equiv = None
-    if cmp_time_ms and cmp_time_ms > 0 and avg_omega_control_ms is not None:
-        model_overhead_cmp_equiv = avg_omega_control_ms / cmp_time_ms
-
-    avg_saved_cmps = None
-    if hnsw_profile and hnsw_profile.get("profile_avg_cmps") is not None and avg_pairwise_dist_cnt is not None:
-        avg_saved_cmps = hnsw_profile["profile_avg_cmps"] - avg_pairwise_dist_cnt
-
-    return {
-        "benchmark_recall": metrics.get("recall"),
-        "benchmark_qps": metrics.get("qps"),
-        "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": avg_metric(query_records, "total_ms"),
-        "profile_avg_cmps": avg_pairwise_dist_cnt,
-        "profile_avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
-        "profile_avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
-        "profile_avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
-        "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
-        "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
-        "profile_avg_model_overhead_ms": avg_omega_control_ms,
-        "profile_avg_setup_ms": avg_metric(query_records, "setup_ms"),
-        "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
-        "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
-        "profile_avg_core_search_ms": avg_core_search_ms,
-        "profile_avg_pure_search_ms": avg_pure_search_ms,
-        "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
-        "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
-        "profile_avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
-        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "profile_serial_p99_s": serial_summary.get("p99"),
-        "profile_serial_p95_s": serial_summary.get("p95"),
-        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
-    }
-
-
-def profiling_output_path(index_path: Path) -> Path:
-    return index_path / "online_benchmark_summary.json"
-
-
-def write_profiling_summary(index_path: Path, payload: dict) -> None:
-    with open(profiling_output_path(index_path), "w") as f:
-        json.dump(payload, f, indent=2, sort_keys=True)
-
-
-def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResult]) -> list[Path]:
-    written_paths: list[Path] = []
-    grouped: dict[str, list[BenchmarkResult]] = {}
-    for result in results:
-        grouped.setdefault(result.path, []).append(result)
-
-    for path_str, grouped_results in grouped.items():
-        index_path = Path(path_str)
-        write_profiling_summary(
-            index_path,
-            {
-                "generated_at": datetime.now().isoformat(),
-                "dataset": dataset,
-                "results": [
-                    {
-                        "type": result.type,
-                        "target_recall": result.target_recall,
-                        "path": result.path,
-                        "load_duration_s": result.load_duration,
-                        "qps": result.qps,
-                        "recall": result.recall,
-                        "profiling": result.profiling,
-                    }
-                    for result in grouped_results
-                ],
-            },
-        )
-        written_paths.append(profiling_output_path(index_path))
-
-    return written_paths
-
-
-def get_latest_result(db_label: str, results_dir: Path) -> dict:
-    if not results_dir.exists():
-        return {}
-
-    result_files = sorted(
-        results_dir.glob("result_*.json"),
-        key=lambda f: f.stat().st_mtime,
-        reverse=True,
-    )
-
-    for result_file in result_files:
-        try:
-            with open(result_file) as f:
-                data = json.load(f)
-            for result in data.get("results", []):
-                task_config = result.get("task_config", {})
-                db_config = task_config.get("db_config", {})
-                if db_config.get("db_label") == db_label:
-                    metrics = result.get("metrics", {})
-                    return {
-                        "insert_duration": metrics.get("insert_duration"),
-                        "optimize_duration": metrics.get("optimize_duration"),
-                        "load_duration": metrics.get("load_duration"),
-                        "qps": metrics.get("qps"),
-                        "recall": metrics.get("recall"),
-                    }
-        except Exception:
-            continue
-
-    return {}
-
-
-def snapshot_result_files(results_dir: Path) -> set[str]:
-    if not results_dir.exists():
-        return set()
-    return {str(p) for p in results_dir.glob("result_*.json")}
-
-
-def extract_result_from_file(result_file: Path, db_label: str) -> dict:
-    try:
-        with open(result_file) as f:
-            data = json.load(f)
-        for result in data.get("results", []):
-            task_config = result.get("task_config", {})
-            db_config = task_config.get("db_config", {})
-            if db_config.get("db_label") == db_label:
-                metrics = result.get("metrics", {})
-                return {
-                    "insert_duration": metrics.get("insert_duration"),
-                    "optimize_duration": metrics.get("optimize_duration"),
-                    "load_duration": metrics.get("load_duration"),
-                    "qps": metrics.get("qps"),
-                    "recall": metrics.get("recall"),
-                }
-    except Exception:
-        return {}
-    return {}
-
-
-def get_run_result(db_label: str, before_files: set[str], results_dir: Path) -> dict:
-    if not results_dir.exists():
-        return {}
-
-    current_files = {str(p) for p in results_dir.glob("result_*.json")}
-    new_files = sorted(
-        [Path(p) for p in current_files - before_files],
-        key=lambda p: p.stat().st_mtime,
-        reverse=True,
-    )
-
-    for result_file in new_files:
-        metrics = extract_result_from_file(result_file, db_label)
-        if metrics:
-            return metrics
-
-    return get_latest_result(db_label, results_dir)
-
-
-def offline_summary_path(index_path: Path) -> Path:
-    return index_path / "offline_benchmark_summary.json"
-
-
-def read_json_if_exists(path: Path) -> dict:
-    if not path.exists():
-        return {}
-    try:
-        with open(path) as f:
-            return json.load(f)
-    except Exception:
-        return {}
-
-
-def find_omega_model_dir(index_path: Path) -> Path | None:
-    candidates = sorted(index_path.glob("*/omega_model"))
-    return candidates[0] if candidates else None
-
-
-def sum_timing_ms(data: dict) -> int:
-    return sum(v for v in data.values() if isinstance(v, (int, float)))
-
-
-def build_offline_summary(
-    index_path: Path,
-    db_label: str,
-    metrics: dict,
-    retrain_only: bool = False,
-) -> dict:
-    previous_summary = read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
-    previous_offline = previous_summary.get("offline", {})
-    previous_omega_training = previous_summary.get("omega_training", {})
-
-    insert_duration = metrics.get("insert_duration")
-    optimize_duration = metrics.get("optimize_duration")
-    load_duration = metrics.get("load_duration")
-
-    omega_model_dir = find_omega_model_dir(index_path)
-    omega_training = {}
-    if omega_model_dir is not None:
-        omega_training = {
-            "collection_timing_ms": read_json_if_exists(
-                omega_model_dir / "training_collection_timing.json"
-            ),
-            "lightgbm_timing_ms": read_json_if_exists(
-                omega_model_dir / "lightgbm_training_timing.json"
-            ),
-        }
-
-    if retrain_only:
-        insert_duration = previous_offline.get("insert_duration_s")
-        old_optimize_duration = previous_offline.get("optimize_duration_s")
-        old_training_s = (
-            sum_timing_ms(previous_omega_training.get("collection_timing_ms", {}))
-            + sum_timing_ms(previous_omega_training.get("lightgbm_timing_ms", {}))
-        ) / 1000.0
-        new_training_s = (
-            sum_timing_ms(omega_training.get("collection_timing_ms", {}))
-            + sum_timing_ms(omega_training.get("lightgbm_timing_ms", {}))
-        ) / 1000.0
-        if old_optimize_duration is not None:
-            optimize_duration = round(old_optimize_duration - old_training_s + new_training_s, 4)
-        else:
-            optimize_duration = metrics.get("optimize_duration")
-        load_duration = (
-            round(insert_duration + optimize_duration, 4)
-            if insert_duration is not None and optimize_duration is not None
-            else metrics.get("load_duration")
-        )
-
-    summary = {
-        "db_label": db_label,
-        "index_path": str(index_path),
-        "generated_at": datetime.now().isoformat(),
-        "offline": {
-            "insert_duration_s": insert_duration,
-            "optimize_duration_s": optimize_duration,
-            "load_duration_s": load_duration,
-        },
-    }
-
-    if omega_training:
-        summary["omega_training"] = omega_training
-
-    return summary
-
-
-def write_offline_summary(
-    index_path: Path,
-    db_label: str,
-    metrics: dict,
-    retrain_only: bool = False,
-) -> None:
-    summary = build_offline_summary(index_path, db_label, metrics, retrain_only=retrain_only)
-    with open(offline_summary_path(index_path), "w") as f:
-        json.dump(summary, f, indent=2, sort_keys=True)
-
-
-def get_offline_load_duration(index_path: Path) -> float | None:
-    summary = read_json_if_exists(offline_summary_path(index_path))
-    return summary.get("offline", {}).get("load_duration_s")
-
-
-def run_command(
-    cmd: list[str],
-    vectordbbench_root: Path,
-    dry_run: bool = False,
-    extra_env: dict[str, str] | None = None,
-) -> int:
-    cmd_str = " \\\n    ".join(cmd)
-    print(f"\n{'=' * 60}")
-    print(f"Command:\n{cmd_str}")
-    print(f"{'=' * 60}\n")
-
-    if dry_run:
-        print("[DRY RUN] Command not executed")
-        return 0
-
-    cwd = vectordbbench_root if vectordbbench_root.exists() else None
-    env = os.environ.copy()
-    if extra_env:
-        env.update(extra_env)
-    result = subprocess.run(cmd, cwd=cwd, env=env)
-    return result.returncode
-
-
-def run_command_capture(
-    cmd: list[str],
-    vectordbbench_root: Path,
-    dry_run: bool = False,
-    extra_env: dict[str, str] | None = None,
-) -> tuple[int, str]:
-    cmd_str = " \\\n    ".join(cmd)
-    print(f"\n{'=' * 60}")
-    print(f"Command:\n{cmd_str}")
-    print(f"{'=' * 60}\n")
-
-    if dry_run:
-        print("[DRY RUN] Command not executed")
-        return 0, ""
-
-    cwd = vectordbbench_root if vectordbbench_root.exists() else None
-    env = os.environ.copy()
-    if extra_env:
-        env.update(extra_env)
-    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".log") as tmp:
-        tmp_path = Path(tmp.name)
-
-    try:
-        with tmp_path.open("w+") as tmp:
-            result = subprocess.run(cmd, cwd=cwd, env=env, stdout=tmp, stderr=subprocess.STDOUT, text=True)
-            tmp.flush()
-            tmp.seek(0)
-            output = tmp.read()
-        print(output, end="" if output.endswith("\n") or not output else "\n")
-        return result.returncode, output
-    finally:
-        tmp_path.unlink(missing_ok=True)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Benchmark Zvec HNSW vs OMEGA on Cohere-10M dataset"
-    )
-    parser.add_argument("--dry-run", action="store_true", help="Print commands without executing")
-    parser.add_argument(
-        "--target-recalls",
-        type=str,
-        default="0.95",
-        help="Comma-separated target recalls for OMEGA (default: 0.95)",
-    )
-    parser.add_argument("--skip-hnsw", action="store_true", help="Skip HNSW benchmark")
-    parser.add_argument("--skip-omega", action="store_true", help="Skip OMEGA benchmark")
-    parser.add_argument("--build-only", action="store_true", help="Only build index, skip search")
-    parser.add_argument("--search-only", action="store_true", help="Only run search on existing index")
-    parser.add_argument(
-        "--retrain-only",
-        action="store_true",
-        help="Reuse existing OMEGA index and only retrain the model during the build phase",
-    )
-    parser.add_argument(
-        "--zvec-root",
-        type=str,
-        default=None,
-        help="Path to the zvec repository root (default: auto-detect from this script)",
-    )
-    parser.add_argument(
-        "--vectordbbench-root",
-        type=str,
-        default=None,
-        help="Path to the VectorDBBench repository root "
-             "(default: $VECTORDBBENCH_ROOT or sibling repo next to zvec)",
-    )
-    parser.add_argument(
-        "--benchmark-dir",
-        type=str,
-        default=None,
-        help="Directory used to store built benchmark artifacts "
-             "(default: $ZVEC_BENCHMARK_DIR or <zvec_root>/benchmark_results)",
-    )
-    parser.add_argument(
-        "--results-dir",
-        type=str,
-        default=None,
-        help="Directory containing VectorDBBench JSON result files "
-             "(default: runtime vectordb_bench.config.RESULTS_LOCAL_DIR/Zvec)",
-    )
-    args = parser.parse_args()
-
-    zvec_root, vectordbbench_root, benchmark_dir, results_dir = resolve_paths(
-        args.zvec_root, args.vectordbbench_root, args.benchmark_dir, args.results_dir
-    )
-    vectordbbench_cmd = resolve_vectordbbench_command()
-    benchmark_dir.mkdir(parents=True, exist_ok=True)
-
-    CASE_TYPE = "Performance768D10M"
-    M = 50
-    EF_SEARCH = 118
-    QUANTIZE_TYPE = "int8"
-    USE_REFINER = True
-    NUM_CONCURRENCY = "12,14,16,18,20"
-    CONCURRENCY_DURATION = 30
-    K = 100
-
-    MIN_VECTOR_THRESHOLD = 100000
-    NUM_TRAINING_QUERIES = 4000
-    EF_TRAINING = 500
-    WINDOW_SIZE = 100
-    EF_GROUNDTRUTH = 1000
-
-    target_recalls = [float(x) for x in args.target_recalls.split(",")]
-
-    hnsw_path = benchmark_dir / "cohere_10m_hnsw"
-    omega_path = benchmark_dir / "cohere_10m_omega"
-
-    print("=" * 70)
-    print("VectorDBBench: Zvec HNSW vs OMEGA (Cohere-10M)")
-    print("Based on official zvec.org benchmark parameters")
-    print("=" * 70)
-    print()
-    print("Official HNSW Parameters:")
-    print(f"  M: {M}")
-    print(f"  ef_search: {EF_SEARCH}")
-    print(f"  quantize_type: {QUANTIZE_TYPE}")
-    print(f"  is_using_refiner: {USE_REFINER}")
-    print(f"  num_concurrency: {NUM_CONCURRENCY}")
-    print()
-    print("OMEGA Parameters:")
-    print(f"  min_vector_threshold: {MIN_VECTOR_THRESHOLD}")
-    print(f"  num_training_queries: {NUM_TRAINING_QUERIES}")
-    print(f"  ef_training: {EF_TRAINING}")
-    print(f"  window_size: {WINDOW_SIZE}")
-    print(f"  ef_groundtruth: {EF_GROUNDTRUTH}")
-    print(f"  target_recalls: {target_recalls}")
-    print(f"  build_mode: {'retrain model only (reuse existing index)' if args.retrain_only else 'build index + train model'}")
-    print(f"zvec_root: {zvec_root}")
-    print(f"vectordbbench_root: {vectordbbench_root}")
-    print(f"vectordbbench_cmd: {' '.join(vectordbbench_cmd)}")
-    print(f"benchmark_dir: {benchmark_dir}")
-    print(f"results_dir: {results_dir}")
-    print("=" * 70)
-
-    results: list[BenchmarkResult] = []
-
-    if not args.skip_hnsw:
-        print(f"\n\n{'#' * 70}")
-        print("# HNSW Benchmark")
-        print(f"{'#' * 70}")
-
-        hnsw_db_label = "16c64g-v0.1"
-
-        common_hnsw_args = [
-            *vectordbbench_cmd,
-            "zvec",
-            "--path",
-            str(hnsw_path),
-            "--db-label",
-            hnsw_db_label,
-            "--case-type",
-            CASE_TYPE,
-            "--num-concurrency",
-            NUM_CONCURRENCY,
-            "--quantize-type",
-            QUANTIZE_TYPE,
-            "--m",
-            str(M),
-            "--ef-search",
-            str(EF_SEARCH),
-            "--k",
-            str(K),
-            "--concurrency-duration",
-            str(CONCURRENCY_DURATION),
-        ]
-        if USE_REFINER:
-            common_hnsw_args.append("--is-using-refiner")
-
-        if not args.search_only:
-            print("\n[Phase 1] Building HNSW index...")
-            before_files = snapshot_result_files(results_dir)
-            cmd = common_hnsw_args + [
-                "--skip-search-serial",
-                "--skip-search-concurrent",
-            ]
-            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-            if ret != 0 and not args.dry_run:
-                print("ERROR: HNSW build failed!")
-                return 1
-            if not args.dry_run:
-                write_offline_summary(
-                    hnsw_path,
-                    hnsw_db_label,
-                    get_run_result(hnsw_db_label, before_files, results_dir),
-                )
-
-        if not args.build_only:
-            print("\n[Phase 2] Running HNSW search benchmark...")
-            before_files = snapshot_result_files(results_dir)
-            cmd = common_hnsw_args + [
-                "--skip-drop-old",
-                "--skip-load",
-            ]
-            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-            metrics = get_run_result(hnsw_db_label, before_files, results_dir) if not args.dry_run else {}
-            load_duration = get_offline_load_duration(hnsw_path)
-            hnsw_profile = None
-            if ret == 0 and not args.dry_run:
-                print("\n[Profiling] Running HNSW serial-only profiling pass...")
-                profile_cmd = common_hnsw_args + [
-                    "--skip-drop-old",
-                    "--skip-load",
-                    "--skip-search-concurrent",
-                ]
-                _, profile_output = run_command_capture(
-                    profile_cmd,
-                    vectordbbench_root,
-                    dry_run=False,
-                    extra_env={
-                        "ZVEC_LOG_LEVEL": "INFO",
-                        "ZVEC_HNSW_LOG_QUERY_STATS": "1",
-                        "ZVEC_HNSW_LOG_QUERY_LIMIT": "2000",
-                    },
-                )
-                hnsw_profile = build_hnsw_profile(metrics, profile_output)
-            results.append(
-                BenchmarkResult(
-                    type="HNSW",
-                    ef_search=EF_SEARCH,
-                    target_recall=None,
-                    path=str(hnsw_path),
-                    success=ret == 0,
-                    load_duration=load_duration if load_duration is not None else metrics.get("load_duration"),
-                    qps=metrics.get("qps"),
-                    recall=metrics.get("recall"),
-                    profiling=hnsw_profile,
-                )
-            )
-
-    if not args.skip_omega:
-        omega_db_label = f"omega-m{M}-ef{EF_SEARCH}-refiner-int8"
-        build_target_recall = target_recalls[0]
-
-        common_omega_args = [
-            *vectordbbench_cmd,
-            "zvecomega",
-            "--path",
-            str(omega_path),
-            "--db-label",
-            omega_db_label,
-            "--case-type",
-            CASE_TYPE,
-            "--num-concurrency",
-            NUM_CONCURRENCY,
-            "--quantize-type",
-            QUANTIZE_TYPE,
-            "--m",
-            str(M),
-            "--ef-search",
-            str(EF_SEARCH),
-            "--k",
-            str(K),
-            "--concurrency-duration",
-            str(CONCURRENCY_DURATION),
-            "--min-vector-threshold",
-            str(MIN_VECTOR_THRESHOLD),
-            "--num-training-queries",
-            str(NUM_TRAINING_QUERIES),
-            "--ef-training",
-            str(EF_TRAINING),
-            "--window-size",
-            str(WINDOW_SIZE),
-            "--ef-groundtruth",
-            str(EF_GROUNDTRUTH),
-        ]
-        if USE_REFINER:
-            common_omega_args.append("--is-using-refiner")
-
-        if not args.search_only:
-            print(f"\n\n{'#' * 70}")
-            print("# OMEGA Offline Phase")
-            print(f"{'#' * 70}")
-            if args.retrain_only:
-                print("\n[Phase 1] Retraining OMEGA model only (reusing existing index)...")
-                print(
-                    f"Reusing existing OMEGA path/db_label: "
-                    f"path={omega_path}, db_label={omega_db_label}"
-                )
-            else:
-                print("\n[Phase 1] Building OMEGA index + training model...")
-                print(
-                    f"Using shared OMEGA path/db_label for all target recalls: "
-                    f"path={omega_path}, db_label={omega_db_label}"
-                )
-            print(
-                "Build-time target_recall is ignored by training; "
-                f"using first requested value for CLI compatibility: {build_target_recall}"
-            )
-            before_files = snapshot_result_files(results_dir)
-            cmd = common_omega_args + [
-                "--target-recall",
-                str(build_target_recall),
-                "--skip-search-serial",
-                "--skip-search-concurrent",
-            ]
-            if args.retrain_only:
-                cmd += [
-                    "--skip-drop-old",
-                    "--skip-load",
-                    "--retrain-only",
-                ]
-            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-            if ret != 0 and not args.dry_run:
-                print("ERROR: OMEGA build failed!")
-                return 1
-            if not args.dry_run:
-                write_offline_summary(
-                    omega_path,
-                    omega_db_label,
-                    get_run_result(omega_db_label, before_files, results_dir),
-                    retrain_only=args.retrain_only,
-                )
-
-        if not args.build_only:
-            for target_recall in target_recalls:
-                print(f"\n\n{'#' * 70}")
-                print(f"# OMEGA Benchmark (target_recall={target_recall})")
-                print(f"{'#' * 70}")
-                print("\n[Phase 2] Running OMEGA search benchmark...")
-                if args.retrain_only:
-                    print("Search is using the newly retrained model on the existing index.")
-                before_files = snapshot_result_files(results_dir)
-                cmd = common_omega_args + [
-                    "--target-recall",
-                    str(target_recall),
-                    "--skip-drop-old",
-                    "--skip-load",
-                ]
-                if args.retrain_only:
-                    cmd.append("--retrain-only")
-                ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-                metrics = get_run_result(omega_db_label, before_files, results_dir) if not args.dry_run else {}
-                load_duration = get_offline_load_duration(omega_path)
-                omega_profile = None
-                if ret == 0 and not args.dry_run:
-                    print("\n[Profiling] Running OMEGA serial-only profiling pass...")
-                    profile_cmd = common_omega_args + [
-                        "--target-recall",
-                        str(target_recall),
-                        "--skip-drop-old",
-                        "--skip-load",
-                        "--skip-search-concurrent",
-                    ]
-                    if args.retrain_only:
-                        profile_cmd.append("--retrain-only")
-                    _, profile_output = run_command_capture(
-                        profile_cmd,
-                        vectordbbench_root,
-                        dry_run=False,
-                        extra_env={
-                            "ZVEC_LOG_LEVEL": "INFO",
-                            "ZVEC_OMEGA_PROFILE_CONTROL_TIMING": "1",
-                            "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
-                            "ZVEC_OMEGA_LOG_QUERY_LIMIT": "2000",
-                        },
-                    )
-                    baseline_profile = next(
-                        (result.profiling for result in results if result.type == "HNSW" and result.profiling),
-                        None,
-                    )
-                    omega_profile = build_omega_profile(metrics, profile_output, baseline_profile)
-                results.append(
-                    BenchmarkResult(
-                        type="OMEGA",
-                        ef_search=EF_SEARCH,
-                        target_recall=target_recall,
-                        path=str(omega_path),
-                        success=ret == 0,
-                        load_duration=load_duration if load_duration is not None else metrics.get("load_duration"),
-                        qps=metrics.get("qps"),
-                        recall=metrics.get("recall"),
-                        profiling=omega_profile,
-                    )
-                )
-
-    if results:
-        written_summary_paths = write_grouped_profiling_summaries("cohere_10m", results)
-        print("\n\n" + "=" * 70)
-        print("Benchmark Summary")
-        print("=" * 70)
-        print()
-        print(f"{'Type':<10} {'target_recall':<15} {'load_dur(s)':<12} {'qps':<12} {'recall':<10} {'Status':<10}")
-        print("-" * 75)
-        for r in results:
-            tr = f"{r.target_recall:.2f}" if r.target_recall else "N/A"
-            status = "OK" if r.success else "FAILED"
-            ld = f"{r.load_duration:.1f}" if r.load_duration else "N/A"
-            qps = f"{r.qps:.1f}" if r.qps else "N/A"
-            recall = f"{r.recall:.4f}" if r.recall else "N/A"
-            print(f"{r.type:<10} {tr:<15} {ld:<12} {qps:<12} {recall:<10} {status:<10}")
-
-        print()
-        print("Profiling Summary")
-        print("-" * 75)
-        print(f"{'Type':<10} {'target_recall':<15} {'avg_lat(ms)':<12} {'avg_cmps':<12} {'avg_pred_calls':<16} {'avg_model_ms':<14} {'saved_cmps':<12}")
-        for r in results:
-            profile = r.profiling or {}
-            tr = f"{r.target_recall:.2f}" if r.target_recall else "N/A"
-            avg_lat = profile.get("profile_avg_end2end_latency_ms")
-            avg_cmps = profile.get("profile_avg_cmps")
-            avg_pred_calls = profile.get("profile_avg_prediction_calls")
-            avg_model_ms = profile.get("profile_avg_model_overhead_ms")
-            saved_cmps = profile.get("profile_avg_early_stop_saved_cmps")
-            print(
-                f"{r.type:<10} "
-                f"{tr:<15} "
-                f"{(f'{avg_lat:.3f}' if avg_lat is not None else 'N/A'):<12} "
-                f"{(f'{avg_cmps:.1f}' if avg_cmps is not None else 'N/A'):<12} "
-                f"{(f'{avg_pred_calls:.2f}' if avg_pred_calls is not None else 'N/A'):<16} "
-                f"{(f'{avg_model_ms:.3f}' if avg_model_ms is not None else 'N/A'):<14} "
-                f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
-            )
-        print()
-        for path in written_summary_paths:
-            print(f"Profiling JSON: {path}")
-
-    print()
-    print("To view results:")
-    print("  vectordbbench results")
-    print()
-    print("Or start the web UI:")
-    print("  vectordbbench start")
-    print()
 
-    return 0 if all(r.success for r in results) else 1
+def main() -> int:
+    script_dir = Path(__file__).resolve().parent
+    config_path = script_dir / "benchmark_hnsw_vs_omega.json"
+    sys.argv = [
+        str(script_dir / "benchmark_hnsw_vs_omega.py"),
+        "--config",
+        str(config_path),
+        "--dataset",
+        "cohere_10m",
+        *sys.argv[1:],
+    ]
+    return benchmark_hnsw_vs_omega.main()
 
 
 if __name__ == "__main__":
-    raise SystemExit(main())
+    sys.exit(main())
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
old mode 100755
new mode 100644
index 187b78070..33979e8d0
--- a/scripts/benchmark_cohere_1m.py
+++ b/scripts/benchmark_cohere_1m.py
@@ -1,886 +1,24 @@
 #!/usr/bin/env python3
-"""
-VectorDBBench: Zvec vs Zvec+OMEGA Comparison on Cohere-1M
+"""Compatibility wrapper for the generic JSON-driven HNSW vs OMEGA runner."""
 
-Based on official zvec.org benchmark parameters.
-
-Usage:
-    python benchmark_cohere_1m.py [--dry-run] [--target-recalls 0.90,0.95,0.98]
-"""
-
-import argparse
-import json
-import subprocess
 import sys
-import os
-import importlib
-import re
-import tempfile
-from datetime import datetime
 from pathlib import Path
-from dataclasses import dataclass
-
-
-@dataclass
-class BenchmarkResult:
-    """Parsed benchmark result from VectorDBBench output."""
-    type: str
-    ef_search: int
-    target_recall: float | None
-    path: str
-    success: bool
-    load_duration: float | None = None
-    qps: float | None = None
-    recall: float | None = None
-    profiling: dict | None = None
-
-
-def resolve_paths(
-    zvec_root_arg: str | None,
-    vectordbbench_root_arg: str | None,
-    benchmark_dir_arg: str | None,
-    results_dir_arg: str | None,
-) -> tuple[Path, Path, Path, Path]:
-    script_path = Path(__file__).resolve()
-    zvec_root = Path(zvec_root_arg).resolve() if zvec_root_arg else script_path.parent.parent
-    vectordbbench_root = (
-        Path(vectordbbench_root_arg).resolve()
-        if vectordbbench_root_arg
-        else Path(os.environ.get("VECTORDBBENCH_ROOT", zvec_root.parent / "VectorDBBench")).resolve()
-    )
-    benchmark_dir = (
-        Path(benchmark_dir_arg).resolve()
-        if benchmark_dir_arg
-        else Path(os.environ.get("ZVEC_BENCHMARK_DIR", zvec_root / "benchmark_results")).resolve()
-    )
-    source_results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
-
-    if results_dir_arg:
-        results_dir = Path(results_dir_arg).resolve()
-    elif source_results_dir.exists():
-        results_dir = source_results_dir
-    else:
-        results_dir = None
-        try:
-            config = importlib.import_module("vectordb_bench").config
-            results_dir = Path(config.RESULTS_LOCAL_DIR).resolve() / "Zvec"
-        except Exception:
-            results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
-    return zvec_root, vectordbbench_root, benchmark_dir, results_dir
-
-
-def resolve_vectordbbench_command() -> list[str]:
-    return [sys.executable, "-m", "vectordb_bench.cli.vectordbbench"]
-
-
-KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")
-
-
-def parse_scalar(value: str):
-    lower = value.lower()
-    if lower in {"true", "false"}:
-        return lower == "true"
-    try:
-        if any(ch in value for ch in [".", "e", "E"]):
-            return float(value)
-        return int(value)
-    except ValueError:
-        return value
-
-
-def parse_key_values(line: str) -> dict:
-    return {key: parse_scalar(value) for key, value in KV_PATTERN.findall(line)}
-
-
-def avg_metric(records: list[dict], key: str) -> float | None:
-    values = [float(record[key]) for record in records if key in record]
-    if not values:
-        return None
-    return sum(values) / len(values)
-
-
-def parse_serial_runner_summary(output: str) -> dict:
-    summary = {}
-    for line in output.splitlines():
-        if "search entire test_data:" not in line:
-            continue
-        summary = parse_key_values(line)
-    return summary
-
-
-def parse_query_records(output: str, prefix: str) -> list[dict]:
-    records = []
-    for line in output.splitlines():
-        if prefix not in line:
-            continue
-        records.append(parse_key_values(line))
-    return records
-
-
-def build_hnsw_profile(metrics: dict, output: str) -> dict:
-    query_records = parse_query_records(output, "HNSW query stats:")
-    serial_summary = parse_serial_runner_summary(output)
-    return {
-        "benchmark_recall": metrics.get("recall"),
-        "benchmark_qps": metrics.get("qps"),
-        "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": avg_metric(query_records, "latency_ms"),
-        "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
-        "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
-        "profile_avg_pure_search_ms": avg_metric(query_records, "pure_search_ms"),
-        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "profile_serial_p99_s": serial_summary.get("p99"),
-        "profile_serial_p95_s": serial_summary.get("p95"),
-        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
-    }
-
-
-def build_omega_profile(metrics: dict, output: str, hnsw_profile: dict | None) -> dict:
-    query_records = parse_query_records(output, "OMEGA query stats:")
-    serial_summary = parse_serial_runner_summary(output)
-
-    avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
-    avg_core_search_ms = avg_metric(query_records, "core_search_ms")
-    avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
-    avg_omega_control_ms = avg_metric(query_records, "omega_control_ms")
-    avg_search_only_ms = (
-        avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
-    )
-
-    cmp_time_ms = None
-    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_search_only_ms is not None:
-        cmp_time_ms = avg_search_only_ms / avg_pairwise_dist_cnt
-
-    model_overhead_cmp_equiv = None
-    if cmp_time_ms and cmp_time_ms > 0 and avg_omega_control_ms is not None:
-        model_overhead_cmp_equiv = avg_omega_control_ms / cmp_time_ms
-
-    avg_saved_cmps = None
-    if hnsw_profile and hnsw_profile.get("profile_avg_cmps") is not None and avg_pairwise_dist_cnt is not None:
-        avg_saved_cmps = hnsw_profile["profile_avg_cmps"] - avg_pairwise_dist_cnt
-
-    return {
-        "benchmark_recall": metrics.get("recall"),
-        "benchmark_qps": metrics.get("qps"),
-        "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": avg_metric(query_records, "total_ms"),
-        "profile_avg_cmps": avg_pairwise_dist_cnt,
-        "profile_avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
-        "profile_avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
-        "profile_avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
-        "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
-        "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
-        "profile_avg_model_overhead_ms": avg_omega_control_ms,
-        "profile_avg_setup_ms": avg_metric(query_records, "setup_ms"),
-        "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
-        "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
-        "profile_avg_core_search_ms": avg_core_search_ms,
-        "profile_avg_pure_search_ms": avg_pure_search_ms,
-        "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
-        "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
-        "profile_avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
-        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "profile_serial_p99_s": serial_summary.get("p99"),
-        "profile_serial_p95_s": serial_summary.get("p95"),
-        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
-    }
-
-
-def profiling_output_path(index_path: Path) -> Path:
-    return index_path / "online_benchmark_summary.json"
-
-
-def write_profiling_summary(index_path: Path, payload: dict) -> None:
-    with open(profiling_output_path(index_path), "w") as f:
-        json.dump(payload, f, indent=2, sort_keys=True)
-
-
-def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResult]) -> list[Path]:
-    written_paths: list[Path] = []
-    grouped: dict[str, list[BenchmarkResult]] = {}
-    for result in results:
-        grouped.setdefault(result.path, []).append(result)
-
-    for path_str, grouped_results in grouped.items():
-        index_path = Path(path_str)
-        write_profiling_summary(
-            index_path,
-            {
-                "generated_at": datetime.now().isoformat(),
-                "dataset": dataset,
-                "results": [
-                    {
-                        "type": result.type,
-                        "target_recall": result.target_recall,
-                        "path": result.path,
-                        "load_duration_s": result.load_duration,
-                        "qps": result.qps,
-                        "recall": result.recall,
-                        "profiling": result.profiling,
-                    }
-                    for result in grouped_results
-                ],
-            },
-        )
-        written_paths.append(profiling_output_path(index_path))
-
-    return written_paths
-
-
-def get_latest_result(db_label: str, results_dir: Path) -> dict:
-    """Get the latest benchmark result for a given db_label from VectorDBBench."""
-    if not results_dir.exists():
-        return {}
-
-    # Find all result files, sorted by modification time (newest first)
-    result_files = sorted(
-        results_dir.glob("result_*.json"),
-        key=lambda f: f.stat().st_mtime,
-        reverse=True
-    )
-
-    for result_file in result_files:
-        try:
-            with open(result_file) as f:
-                data = json.load(f)
-
-            # Check each result in this file
-            for result in data.get("results", []):
-                task_config = result.get("task_config", {})
-                db_config = task_config.get("db_config", {})
-                if db_config.get("db_label") == db_label:
-                    metrics = result.get("metrics", {})
-                    return {
-                        'insert_duration': metrics.get('insert_duration'),
-                        'optimize_duration': metrics.get('optimize_duration'),
-                        'load_duration': metrics.get('load_duration'),
-                        'qps': metrics.get('qps'),
-                        'recall': metrics.get('recall'),
-                    }
-        except Exception:
-            # Skip files that can't be parsed
-            continue
-
-    return {}
-
-
-def snapshot_result_files(results_dir: Path) -> set[str]:
-    if not results_dir.exists():
-        return set()
-    return {str(p) for p in results_dir.glob("result_*.json")}
-
-
-def extract_result_from_file(result_file: Path, db_label: str) -> dict:
-    try:
-        with open(result_file) as f:
-            data = json.load(f)
-        for result in data.get("results", []):
-            task_config = result.get("task_config", {})
-            db_config = task_config.get("db_config", {})
-            if db_config.get("db_label") == db_label:
-                metrics = result.get("metrics", {})
-                return {
-                    "insert_duration": metrics.get("insert_duration"),
-                    "optimize_duration": metrics.get("optimize_duration"),
-                    "load_duration": metrics.get("load_duration"),
-                    "qps": metrics.get("qps"),
-                    "recall": metrics.get("recall"),
-                }
-    except Exception:
-        return {}
-    return {}
-
-
-def get_run_result(db_label: str, before_files: set[str], results_dir: Path) -> dict:
-    if not results_dir.exists():
-        return {}
-
-    current_files = {str(p) for p in results_dir.glob("result_*.json")}
-    new_files = sorted(
-        [Path(p) for p in current_files - before_files],
-        key=lambda p: p.stat().st_mtime,
-        reverse=True,
-    )
-
-    for result_file in new_files:
-        metrics = extract_result_from_file(result_file, db_label)
-        if metrics:
-            return metrics
-
-    return get_latest_result(db_label, results_dir)
-
-
-def offline_summary_path(index_path: Path) -> Path:
-    return index_path / "offline_benchmark_summary.json"
-
-
-def read_json_if_exists(path: Path) -> dict:
-    if not path.exists():
-        return {}
-    try:
-        with open(path) as f:
-            return json.load(f)
-    except Exception:
-        return {}
-
-
-def find_omega_model_dir(index_path: Path) -> Path | None:
-    candidates = sorted(index_path.glob("*/omega_model"))
-    return candidates[0] if candidates else None
-
-
-def sum_timing_ms(data: dict) -> int:
-    return sum(v for v in data.values() if isinstance(v, (int, float)))
-
-
-def build_offline_summary(
-    index_path: Path,
-    db_label: str,
-    metrics: dict,
-    retrain_only: bool = False,
-) -> dict:
-    previous_summary = read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
-    previous_offline = previous_summary.get("offline", {})
-    previous_omega_training = previous_summary.get("omega_training", {})
-
-    insert_duration = metrics.get("insert_duration")
-    optimize_duration = metrics.get("optimize_duration")
-    load_duration = metrics.get("load_duration")
-
-    omega_model_dir = find_omega_model_dir(index_path)
-    omega_training = {}
-    if omega_model_dir is not None:
-        omega_training = {
-            "collection_timing_ms": read_json_if_exists(
-                omega_model_dir / "training_collection_timing.json"
-            ),
-            "lightgbm_timing_ms": read_json_if_exists(
-                omega_model_dir / "lightgbm_training_timing.json"
-            ),
-        }
-
-    if retrain_only:
-        insert_duration = previous_offline.get("insert_duration_s")
-        old_optimize_duration = previous_offline.get("optimize_duration_s")
-        old_training_s = (
-            sum_timing_ms(previous_omega_training.get("collection_timing_ms", {}))
-            + sum_timing_ms(previous_omega_training.get("lightgbm_timing_ms", {}))
-        ) / 1000.0
-        new_training_s = (
-            sum_timing_ms(omega_training.get("collection_timing_ms", {}))
-            + sum_timing_ms(omega_training.get("lightgbm_timing_ms", {}))
-        ) / 1000.0
-        if old_optimize_duration is not None:
-            optimize_duration = round(old_optimize_duration - old_training_s + new_training_s, 4)
-        else:
-            optimize_duration = metrics.get("optimize_duration")
-        load_duration = (
-            round(insert_duration + optimize_duration, 4)
-            if insert_duration is not None and optimize_duration is not None
-            else metrics.get("load_duration")
-        )
-
-    summary = {
-        "db_label": db_label,
-        "index_path": str(index_path),
-        "generated_at": datetime.now().isoformat(),
-        "offline": {
-            "insert_duration_s": insert_duration,
-            "optimize_duration_s": optimize_duration,
-            "load_duration_s": load_duration,
-        },
-    }
-
-    if omega_training:
-        summary["omega_training"] = omega_training
-
-    return summary
-
-
-def write_offline_summary(
-    index_path: Path,
-    db_label: str,
-    metrics: dict,
-    retrain_only: bool = False,
-) -> None:
-    summary = build_offline_summary(index_path, db_label, metrics, retrain_only=retrain_only)
-    with open(offline_summary_path(index_path), "w") as f:
-        json.dump(summary, f, indent=2, sort_keys=True)
-
-
-def get_offline_load_duration(index_path: Path) -> float | None:
-    summary = read_json_if_exists(offline_summary_path(index_path))
-    return summary.get("offline", {}).get("load_duration_s")
-
-
-def run_command(
-    cmd: list[str],
-    vectordbbench_root: Path,
-    dry_run: bool = False,
-    extra_env: dict[str, str] | None = None,
-) -> int:
-    """Run a command and return the exit code."""
-    cmd_str = " \\\n    ".join(cmd)
-    print(f"\n{'='*60}")
-    print(f"Command:\n{cmd_str}")
-    print(f"{'='*60}\n")
-
-    if dry_run:
-        print("[DRY RUN] Command not executed")
-        return 0
-
-    cwd = vectordbbench_root if vectordbbench_root.exists() else None
-    env = os.environ.copy()
-    if extra_env:
-        env.update(extra_env)
-    result = subprocess.run(cmd, cwd=cwd, env=env)
-    return result.returncode
-
-
-def run_command_capture(
-    cmd: list[str],
-    vectordbbench_root: Path,
-    dry_run: bool = False,
-    extra_env: dict[str, str] | None = None,
-) -> tuple[int, str]:
-    cmd_str = " \\\n    ".join(cmd)
-    print(f"\n{'='*60}")
-    print(f"Command:\n{cmd_str}")
-    print(f"{'='*60}\n")
-
-    if dry_run:
-        print("[DRY RUN] Command not executed")
-        return 0, ""
-
-    cwd = vectordbbench_root if vectordbbench_root.exists() else None
-    env = os.environ.copy()
-    if extra_env:
-        env.update(extra_env)
-    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".log") as tmp:
-        tmp_path = Path(tmp.name)
-
-    try:
-        with tmp_path.open("w+") as tmp:
-            result = subprocess.run(cmd, cwd=cwd, env=env, stdout=tmp, stderr=subprocess.STDOUT, text=True)
-            tmp.flush()
-            tmp.seek(0)
-            output = tmp.read()
-        print(output, end="" if output.endswith("\n") or not output else "\n")
-        return result.returncode, output
-    finally:
-        tmp_path.unlink(missing_ok=True)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Benchmark Zvec HNSW vs OMEGA on Cohere-1M dataset"
-    )
-    parser.add_argument("--dry-run", action="store_true", help="Print commands without executing")
-    parser.add_argument("--target-recalls", type=str, default="0.95",
-                        help="Comma-separated target recalls for OMEGA (default: 0.95)")
-    parser.add_argument("--skip-hnsw", action="store_true", help="Skip HNSW benchmark")
-    parser.add_argument("--skip-omega", action="store_true", help="Skip OMEGA benchmark")
-    parser.add_argument("--build-only", action="store_true", help="Only build index, skip search")
-    parser.add_argument("--search-only", action="store_true", help="Only run search on existing index")
-    parser.add_argument(
-        "--retrain-only",
-        action="store_true",
-        help="Reuse existing OMEGA index and only retrain the model during the build phase",
-    )
-    parser.add_argument(
-        "--zvec-root",
-        type=str,
-        default=None,
-        help="Path to the zvec repository root (default: auto-detect from this script)",
-    )
-    parser.add_argument(
-        "--vectordbbench-root",
-        type=str,
-        default=None,
-        help="Path to the VectorDBBench repository root "
-             "(default: $VECTORDBBENCH_ROOT or sibling repo next to zvec)",
-    )
-    parser.add_argument(
-        "--benchmark-dir",
-        type=str,
-        default=None,
-        help="Directory used to store built benchmark artifacts "
-             "(default: $ZVEC_BENCHMARK_DIR or <zvec_root>/benchmark_results)",
-    )
-    parser.add_argument(
-        "--results-dir",
-        type=str,
-        default=None,
-        help="Directory containing VectorDBBench JSON result files "
-             "(default: runtime vectordb_bench.config.RESULTS_LOCAL_DIR/Zvec)",
-    )
-
-    args = parser.parse_args()
-
-    zvec_root, vectordbbench_root, benchmark_dir, results_dir = resolve_paths(
-        args.zvec_root, args.vectordbbench_root, args.benchmark_dir, args.results_dir
-    )
-    vectordbbench_cmd = resolve_vectordbbench_command()
-
-    # Configuration - based on official zvec.org parameters
-    benchmark_dir.mkdir(parents=True, exist_ok=True)
-
-    # Official parameters from zvec.org for Cohere-1M
-    CASE_TYPE = "Performance768D1M"
-    M = 15
-    EF_SEARCH = 180
-    QUANTIZE_TYPE = "int8"
-    NUM_CONCURRENCY = "12,14,16,18,20"
-    CONCURRENCY_DURATION = 30
-    K = 100
-
-    # OMEGA parameters
-    MIN_VECTOR_THRESHOLD = 100000
-    NUM_TRAINING_QUERIES = 4000
-    EF_TRAINING = 500
-    WINDOW_SIZE = 100
-    EF_GROUNDTRUTH = 1000
-
-    # Parse target recalls
-    target_recalls = [float(x) for x in args.target_recalls.split(",")]
-
-    # Paths
-    hnsw_path = benchmark_dir / "cohere_1m_hnsw"
-    omega_path = benchmark_dir / "cohere_1m_omega"
-
-    print("=" * 70)
-    print("VectorDBBench: Zvec HNSW vs OMEGA (Cohere-1M)")
-    print("Based on official zvec.org benchmark parameters")
-    print("=" * 70)
-    print()
-    print("Official HNSW Parameters:")
-    print(f"  M: {M}")
-    print(f"  ef_search: {EF_SEARCH}")
-    print(f"  quantize_type: {QUANTIZE_TYPE}")
-    print()
-    print("OMEGA Parameters:")
-    print(f"  min_vector_threshold: {MIN_VECTOR_THRESHOLD}")
-    print(f"  num_training_queries: {NUM_TRAINING_QUERIES}")
-    print(f"  ef_training: {EF_TRAINING}")
-    print(f"  window_size: {WINDOW_SIZE}")
-    print(f"  ef_groundtruth: {EF_GROUNDTRUTH} (HNSW-based ground truth)")
-    print(f"  target_recalls: {target_recalls}")
-    print(f"  build_mode: {'retrain model only (reuse existing index)' if args.retrain_only else 'build index + train model'}")
-    print()
-    print(f"Concurrency: {NUM_CONCURRENCY}")
-    print(f"zvec_root: {zvec_root}")
-    print(f"vectordbbench_root: {vectordbbench_root}")
-    print(f"vectordbbench_cmd: {' '.join(vectordbbench_cmd)}")
-    print(f"benchmark_dir: {benchmark_dir}")
-    print(f"results_dir: {results_dir}")
-    print("=" * 70)
-
-    results: list[BenchmarkResult] = []
-
-    # ============ HNSW Benchmark ============
-    if not args.skip_hnsw:
-        print(f"\n\n{'#'*70}")
-        print(f"# HNSW Benchmark")
-        print(f"{'#'*70}")
-
-        hnsw_db_label = "16c64g-v0.1"
-
-        if not args.search_only:
-            # Phase 1: Build Index
-            print("\n[Phase 1] Building HNSW index...")
-            before_files = snapshot_result_files(results_dir)
-            cmd = [
-                *vectordbbench_cmd, "zvec",
-                "--path", str(hnsw_path),
-                "--db-label", hnsw_db_label,
-                "--case-type", CASE_TYPE,
-                "--m", str(M),
-                "--ef-search", str(EF_SEARCH),
-                "--quantize-type", QUANTIZE_TYPE,
-                "--num-concurrency", NUM_CONCURRENCY,
-                "--concurrency-duration", str(CONCURRENCY_DURATION),
-                "--k", str(K),
-                "--skip-search-serial",
-                "--skip-search-concurrent",
-            ]
-            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-            if ret != 0 and not args.dry_run:
-                print("ERROR: HNSW build failed!")
-                return 1
-            if not args.dry_run:
-                write_offline_summary(
-                    hnsw_path,
-                    hnsw_db_label,
-                    get_run_result(hnsw_db_label, before_files, results_dir),
-                )
-
-        if not args.build_only:
-            # Phase 2: Run Search Benchmark
-            print("\n[Phase 2] Running HNSW search benchmark...")
-            before_files = snapshot_result_files(results_dir)
-            cmd = [
-                *vectordbbench_cmd, "zvec",
-                "--path", str(hnsw_path),
-                "--db-label", hnsw_db_label,
-                "--case-type", CASE_TYPE,
-                "--m", str(M),
-                "--ef-search", str(EF_SEARCH),
-                "--quantize-type", QUANTIZE_TYPE,
-                "--num-concurrency", NUM_CONCURRENCY,
-                "--concurrency-duration", str(CONCURRENCY_DURATION),
-                "--k", str(K),
-                "--skip-drop-old",
-                "--skip-load",
-            ]
-            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-
-            # Get results from VectorDBBench
-            metrics = get_run_result(hnsw_db_label, before_files, results_dir) if not args.dry_run else {}
-            load_duration = get_offline_load_duration(hnsw_path)
-            hnsw_profile = None
-            if ret == 0 and not args.dry_run:
-                print("\n[Profiling] Running HNSW serial-only profiling pass...")
-                profile_cmd = [
-                    *vectordbbench_cmd, "zvec",
-                    "--path", str(hnsw_path),
-                    "--db-label", hnsw_db_label,
-                    "--case-type", CASE_TYPE,
-                    "--m", str(M),
-                    "--ef-search", str(EF_SEARCH),
-                    "--quantize-type", QUANTIZE_TYPE,
-                    "--num-concurrency", NUM_CONCURRENCY,
-                    "--concurrency-duration", str(CONCURRENCY_DURATION),
-                    "--k", str(K),
-                    "--skip-drop-old",
-                    "--skip-load",
-                    "--skip-search-concurrent",
-                ]
-                _, profile_output = run_command_capture(
-                    profile_cmd,
-                    vectordbbench_root,
-                    dry_run=False,
-                    extra_env={
-                        "ZVEC_LOG_LEVEL": "INFO",
-                        "ZVEC_HNSW_LOG_QUERY_STATS": "1",
-                        "ZVEC_HNSW_LOG_QUERY_LIMIT": "2000",
-                    },
-                )
-                hnsw_profile = build_hnsw_profile(metrics, profile_output)
-            results.append(BenchmarkResult(
-                type="HNSW",
-                ef_search=EF_SEARCH,
-                target_recall=None,
-                path=str(hnsw_path),
-                success=ret == 0,
-                load_duration=load_duration if load_duration is not None else metrics.get('load_duration'),
-                qps=metrics.get('qps'),
-                recall=metrics.get('recall'),
-                profiling=hnsw_profile,
-            ))
-
-    # ============ OMEGA Benchmarks ============
-    if not args.skip_omega:
-        omega_db_label = f"omega-m{M}-ef{EF_SEARCH}-int8"
-        build_target_recall = target_recalls[0]
-
-        if not args.search_only:
-            print(f"\n\n{'#'*70}")
-            print("# OMEGA Offline Phase")
-            print(f"{'#'*70}")
-            if args.retrain_only:
-                print("\n[Phase 1] Retraining OMEGA model only (reusing existing index)...")
-                print(
-                    f"Reusing existing OMEGA path/db_label: path={omega_path}, db_label={omega_db_label}"
-                )
-            else:
-                print("\n[Phase 1] Building OMEGA index + training model...")
-                print(
-                    f"Using shared OMEGA path/db_label for all target recalls: path={omega_path}, db_label={omega_db_label}"
-                )
-            print(
-                f"Build-time target_recall is ignored by training; using first requested value for CLI compatibility: {build_target_recall}"
-            )
-            before_files = snapshot_result_files(results_dir)
-            cmd = [
-                *vectordbbench_cmd, "zvecomega",
-                "--path", str(omega_path),
-                "--db-label", omega_db_label,
-                "--case-type", CASE_TYPE,
-                "--m", str(M),
-                "--ef-search", str(EF_SEARCH),
-                "--quantize-type", QUANTIZE_TYPE,
-                "--min-vector-threshold", str(MIN_VECTOR_THRESHOLD),
-                "--num-training-queries", str(NUM_TRAINING_QUERIES),
-                "--ef-training", str(EF_TRAINING),
-                "--window-size", str(WINDOW_SIZE),
-                "--ef-groundtruth", str(EF_GROUNDTRUTH),
-                "--target-recall", str(build_target_recall),
-                "--num-concurrency", NUM_CONCURRENCY,
-                "--concurrency-duration", str(CONCURRENCY_DURATION),
-                "--k", str(K),
-                "--skip-search-serial",
-                "--skip-search-concurrent",
-            ]
-            if args.retrain_only:
-                cmd.extend([
-                    "--skip-drop-old",
-                    "--skip-load",
-                    "--retrain-only",
-                ])
-            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-            if ret != 0 and not args.dry_run:
-                print("ERROR: OMEGA build failed!")
-                return 1
-            if not args.dry_run:
-                write_offline_summary(
-                    omega_path,
-                    omega_db_label,
-                    get_run_result(omega_db_label, before_files, results_dir),
-                    retrain_only=args.retrain_only,
-                )
-
-        if not args.build_only:
-            for target_recall in target_recalls:
-                print(f"\n\n{'#'*70}")
-                print(f"# OMEGA Benchmark (target_recall={target_recall})")
-                print(f"{'#'*70}")
-
-                # Phase 2: Run Search Benchmark
-                print("\n[Phase 2] Running OMEGA search benchmark...")
-                if args.retrain_only:
-                    print("Search is using the newly retrained model on the existing index.")
-                before_files = snapshot_result_files(results_dir)
-                cmd = [
-                    *vectordbbench_cmd, "zvecomega",
-                    "--path", str(omega_path),
-                    "--db-label", omega_db_label,
-                    "--case-type", CASE_TYPE,
-                    "--m", str(M),
-                    "--ef-search", str(EF_SEARCH),
-                    "--quantize-type", QUANTIZE_TYPE,
-                    "--min-vector-threshold", str(MIN_VECTOR_THRESHOLD),
-                    "--num-training-queries", str(NUM_TRAINING_QUERIES),
-                    "--ef-training", str(EF_TRAINING),
-                    "--window-size", str(WINDOW_SIZE),
-                    "--ef-groundtruth", str(EF_GROUNDTRUTH),
-                    "--target-recall", str(target_recall),
-                    "--num-concurrency", NUM_CONCURRENCY,
-                    "--concurrency-duration", str(CONCURRENCY_DURATION),
-                    "--k", str(K),
-                    "--skip-drop-old",
-                    "--skip-load",
-                ]
-                if args.retrain_only:
-                    cmd.append("--retrain-only")
-                ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-
-                metrics = get_run_result(omega_db_label, before_files, results_dir) if not args.dry_run else {}
-                load_duration = get_offline_load_duration(omega_path)
-                omega_profile = None
-                if ret == 0 and not args.dry_run:
-                    print("\n[Profiling] Running OMEGA serial-only profiling pass...")
-                    profile_cmd = [
-                        *vectordbbench_cmd, "zvecomega",
-                        "--path", str(omega_path),
-                        "--db-label", omega_db_label,
-                        "--case-type", CASE_TYPE,
-                        "--m", str(M),
-                        "--ef-search", str(EF_SEARCH),
-                        "--quantize-type", QUANTIZE_TYPE,
-                        "--min-vector-threshold", str(MIN_VECTOR_THRESHOLD),
-                        "--num-training-queries", str(NUM_TRAINING_QUERIES),
-                        "--ef-training", str(EF_TRAINING),
-                        "--window-size", str(WINDOW_SIZE),
-                        "--ef-groundtruth", str(EF_GROUNDTRUTH),
-                        "--target-recall", str(target_recall),
-                        "--num-concurrency", NUM_CONCURRENCY,
-                        "--concurrency-duration", str(CONCURRENCY_DURATION),
-                        "--k", str(K),
-                        "--skip-drop-old",
-                        "--skip-load",
-                        "--skip-search-concurrent",
-                    ]
-                    if args.retrain_only:
-                        profile_cmd.append("--retrain-only")
-                    _, profile_output = run_command_capture(
-                        profile_cmd,
-                        vectordbbench_root,
-                        dry_run=False,
-                        extra_env={
-                            "ZVEC_LOG_LEVEL": "INFO",
-                            "ZVEC_OMEGA_PROFILE_CONTROL_TIMING": "1",
-                            "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
-                            "ZVEC_OMEGA_LOG_QUERY_LIMIT": "2000",
-                        },
-                    )
-                    baseline_profile = next(
-                        (result.profiling for result in results if result.type == "HNSW" and result.profiling),
-                        None,
-                    )
-                    omega_profile = build_omega_profile(metrics, profile_output, baseline_profile)
-                results.append(BenchmarkResult(
-                    type="OMEGA",
-                    ef_search=EF_SEARCH,
-                    target_recall=target_recall,
-                    path=str(omega_path),
-                    success=ret == 0,
-                    load_duration=load_duration if load_duration is not None else metrics.get('load_duration'),
-                    qps=metrics.get('qps'),
-                    recall=metrics.get('recall'),
-                    profiling=omega_profile,
-                ))
-
-    # ============ Summary ============
-    if results:
-        written_summary_paths = write_grouped_profiling_summaries("cohere_1m", results)
-        print("\n\n" + "=" * 70)
-        print("Benchmark Summary")
-        print("=" * 70)
-        print()
-        print(f"{'Type':<10} {'target_recall':<15} {'load_dur(s)':<12} {'qps':<12} {'recall':<10} {'Status':<10}")
-        print("-" * 75)
-        for r in results:
-            tr = f"{r.target_recall:.2f}" if r.target_recall else "N/A"
-            status = "OK" if r.success else "FAILED"
-            ld = f"{r.load_duration:.1f}" if r.load_duration else "N/A"
-            qps = f"{r.qps:.1f}" if r.qps else "N/A"
-            recall = f"{r.recall:.4f}" if r.recall else "N/A"
-            print(f"{r.type:<10} {tr:<15} {ld:<12} {qps:<12} {recall:<10} {status:<10}")
-
-        print()
-        print("Profiling Summary")
-        print("-" * 75)
-        print(f"{'Type':<10} {'target_recall':<15} {'avg_lat(ms)':<12} {'avg_cmps':<12} {'avg_pred_calls':<16} {'avg_model_ms':<14} {'saved_cmps':<12}")
-        for r in results:
-            profile = r.profiling or {}
-            tr = f"{r.target_recall:.2f}" if r.target_recall else "N/A"
-            avg_lat = profile.get("profile_avg_end2end_latency_ms")
-            avg_cmps = profile.get("profile_avg_cmps")
-            avg_pred_calls = profile.get("profile_avg_prediction_calls")
-            avg_model_ms = profile.get("profile_avg_model_overhead_ms")
-            saved_cmps = profile.get("profile_avg_early_stop_saved_cmps")
-            print(
-                f"{r.type:<10} "
-                f"{tr:<15} "
-                f"{(f'{avg_lat:.3f}' if avg_lat is not None else 'N/A'):<12} "
-                f"{(f'{avg_cmps:.1f}' if avg_cmps is not None else 'N/A'):<12} "
-                f"{(f'{avg_pred_calls:.2f}' if avg_pred_calls is not None else 'N/A'):<16} "
-                f"{(f'{avg_model_ms:.3f}' if avg_model_ms is not None else 'N/A'):<14} "
-                f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
-            )
-        print()
-        for path in written_summary_paths:
-            print(f"Profiling JSON: {path}")
-
-    print()
-    print("To view results:")
-    print("  vectordbbench results")
-    print()
-    print("Or start the web UI:")
-    print("  vectordbbench start")
-    print()
 
-    return 0 if all(r.success for r in results) else 1
+import benchmark_hnsw_vs_omega
+
+
+def main() -> int:
+    script_dir = Path(__file__).resolve().parent
+    config_path = script_dir / "benchmark_hnsw_vs_omega.json"
+    sys.argv = [
+        str(script_dir / "benchmark_hnsw_vs_omega.py"),
+        "--config",
+        str(config_path),
+        "--dataset",
+        "cohere_1m",
+        *sys.argv[1:],
+    ]
+    return benchmark_hnsw_vs_omega.main()
 
 
 if __name__ == "__main__":
diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index 73dfe837e..2531c9f19 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -38,6 +38,12 @@ def parse_args() -> argparse.Namespace:
         required=True,
         help="Dataset key to run from the top-level JSON config map",
     )
+    parser.add_argument(
+        "--target-recalls",
+        type=str,
+        default=None,
+        help="Optional comma-separated override for omega.target_recalls in the JSON config",
+    )
     parser.add_argument("--dry-run", action="store_true", help="Print commands without executing")
     parser.add_argument("--skip-hnsw", action="store_true", help="Skip HNSW benchmark")
     parser.add_argument("--skip-omega", action="store_true", help="Skip OMEGA benchmark")
@@ -103,6 +109,8 @@ def main() -> int:
     hnsw_db_label = must_get(hnsw_config, "db_label")
     omega_db_label = must_get(omega_config, "db_label")
     target_recalls = omega_config.get("target_recalls", [])
+    if args.target_recalls:
+        target_recalls = [float(value) for value in args.target_recalls.split(",") if value]
     if not target_recalls:
         raise ValueError("omega.target_recalls must be a non-empty list")
 

From 8f73f0142021119b55cc368d8f9df7cc6cdf9cee Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 19:15:21 +0800
Subject: [PATCH 066/126] cleanup: remove stale omega integration comments

---
 src/core/algorithm/omega/omega_searcher.cc               | 3 ++-
 src/db/index/column/vector_column/vector_index_results.h | 7 -------
 src/db/training/query_generator.h                        | 4 ++--
 src/db/training/training_data_collector.cc               | 2 +-
 thirdparty/omega/OMEGALib                                | 2 +-
 5 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 35cb762e3..54e194dcf 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -274,7 +274,8 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     return HnswSearcher::search_impl(query, qmeta, count, context);
   }
 
-  // Enable training mode if active (CRITICAL: must be before search)
+  // Attach training state before the HNSW loop starts so labels observe the
+  // full query trajectory.
   if (training_mode_enabled_) {
     // Get ground truth for this query if available
     std::vector<int> gt_for_query;
diff --git a/src/db/index/column/vector_column/vector_index_results.h b/src/db/index/column/vector_column/vector_index_results.h
index 154ff6e42..f824b651d 100644
--- a/src/db/index/column/vector_column/vector_index_results.h
+++ b/src/db/index/column/vector_column/vector_index_results.h
@@ -90,13 +90,6 @@ class VectorIndexResults : public IndexResults {
   friend class VectorIterator;
 
  public:
-  // VectorIndexResults(core::IndexDocumentList &&doc_list)
-  //     : docs_(std::move(doc_list)) {}
-  //
-  // VectorIndexResults(core::IndexDocumentList &&doc_list,
-  //                    std::vector<std::string> &&reverted_vector_list)
-  //     : docs_(std::move(doc_list)),
-  //       reverted_vector_list_(std::move(reverted_vector_list)) {}
   VectorIndexResults(bool is_sparse, core::IndexDocumentList &&doc_list,
                      std::vector<std::string> &&reverted_vector_list,
                      std::vector<std::string> &&reverted_sparse_values_list)
diff --git a/src/db/training/query_generator.h b/src/db/training/query_generator.h
index 723ca1f4e..af7c030b3 100644
--- a/src/db/training/query_generator.h
+++ b/src/db/training/query_generator.h
@@ -56,7 +56,7 @@ class TrainingQueryGenerator {
       uint64_t seed = 42);
 
   /**
-   * @brief Sample base vectors from a segment (legacy, without doc_ids)
+   * @brief Sample base vectors from a segment without doc_ids
    *
    * @param segment The segment to sample from (must be persisted)
    * @param field_name The vector field name to sample
@@ -102,7 +102,7 @@ class TrainingQueryGenerator {
       uint64_t seed = 42);
 
   /**
-   * @brief Generate training queries (sample + noise) - legacy method
+   * @brief Generate training queries with the sample-and-noise helper
    *
    * Combines sampling and noise addition in one step.
    *
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 0c41b1e84..307da137a 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -209,7 +209,7 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
 
         // Persisted OMEGA collections currently do not propagate per-query
         // training_query_id through the search context reliably. In the
-        // single-threaded calibration path, fall back to the legacy global
+        // single-threaded calibration path, fall back to the existing global
         // query-id setter to preserve correct labels without races.
         if (actual_threads == 1) {
           indexers[0]->SetCurrentQueryId(static_cast<int>(query_idx));
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 800d1d645..ac25139c3 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 800d1d645cf2da1f233c1b7ee27a328d90b3a67e
+Subproject commit ac25139c3ea0dbe9dbfe00d05900870c66a36318

From 52e657c47e83fde14d68ef0879c937c78448c918 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 19:18:55 +0800
Subject: [PATCH 067/126] cleanup: classify internal perf tooling

---
 scripts/README.md                     |  20 ++++
 scripts/perf_ab_search_core.sh        |   2 +
 scripts/perf_hnsw_hooks_microbench.sh |   2 +
 tools/core/README.md                  | 162 ++++++--------------------
 4 files changed, 59 insertions(+), 127 deletions(-)

diff --git a/scripts/README.md b/scripts/README.md
index e69de29bb..e9b9e5437 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -0,0 +1,20 @@
+# Benchmark Scripts
+
+## Maintained Entry Points
+
+| Script | Status | Purpose |
+| --- | --- | --- |
+| `benchmark_hnsw_vs_omega.py` | maintained | Generic JSON-driven HNSW vs OMEGA runner. |
+| `benchmark_cohere_1m.py` | compatibility wrapper | Preset wrapper around `benchmark_hnsw_vs_omega.py --dataset cohere_1m`. |
+| `benchmark_cohere_10m.py` | compatibility wrapper | Preset wrapper around `benchmark_hnsw_vs_omega.py --dataset cohere_10m`. |
+
+## Internal Perf Helpers
+
+| Script | Status | Purpose |
+| --- | --- | --- |
+| `perf_hnsw_hooks_microbench.sh` | internal | Run `perf stat/record` against `hnsw_hooks_microbench`. |
+| `perf_ab_search_core.sh` | internal | Compare HNSW, empty-hooks, and hooks-only OMEGA search paths with `perf`. |
+| `gcov.sh` | internal | Coverage helper for local development. |
+
+These helpers assume a prepared benchmark environment and are not part of the
+stable user-facing benchmarking interface.
diff --git a/scripts/perf_ab_search_core.sh b/scripts/perf_ab_search_core.sh
index 3b56cbfcf..649bf6255 100644
--- a/scripts/perf_ab_search_core.sh
+++ b/scripts/perf_ab_search_core.sh
@@ -1,4 +1,6 @@
 #!/usr/bin/env bash
+# Internal profiling helper for VectorDBBench search-core comparisons.
+# Kept for local perf investigations; expects prepared benchmark artifacts.
 set -euo pipefail
 
 DATASET="${1:-1m}"
diff --git a/scripts/perf_hnsw_hooks_microbench.sh b/scripts/perf_hnsw_hooks_microbench.sh
index 7c2eb8d90..d32aea3b8 100755
--- a/scripts/perf_hnsw_hooks_microbench.sh
+++ b/scripts/perf_hnsw_hooks_microbench.sh
@@ -1,4 +1,6 @@
 #!/usr/bin/env bash
+# Internal profiling helper for hnsw_hooks_microbench.
+# Maintained for OMEGA/HNSW hotspot analysis, not as a public benchmark entrypoint.
 set -euo pipefail
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
diff --git a/tools/core/README.md b/tools/core/README.md
index 8a25d55ad..37b524a70 100644
--- a/tools/core/README.md
+++ b/tools/core/README.md
@@ -1,140 +1,48 @@
+# Core Tools
 
-# Benchmarking scripts
+This directory mixes product-adjacent command-line tools with internal
+benchmark helpers. The table below is the maintenance contract for each group.
 
-This directory contains benchmarking scripts and reproducing steps.
+## Maintained Tools
 
-## COHERE experiments
+These binaries are part of the normal local benchmarking / debugging workflow
+ and should keep building with the rest of the tree.
 
-### Getting COHERE Data
+| Tool | Status | Purpose |
+| --- | --- | --- |
+| `txt2vecs` | maintained | Convert text vectors into zvec binary format. |
+| `local_builder` | maintained | Build an index from YAML config. |
+| `recall` | maintained | Offline recall evaluation from YAML config. |
+| `bench` | maintained | Throughput / latency benchmarking from YAML config. |
 
-Please download the COHERE 10M dataset to cohere_large_10m as follows:
+## Internal Perf Tools
 
-```bash
-neighbors_head_1p.parquet   
-neighbors_tail_1pgit.parquet   
-neighbors_labels_label_20p.parquet  
-neighbors_labels_label_50p.parquet  
-neighbors_labels_label_80p.parquet  
-neighbors_labels_label_95p.parquet             
-neighbors.parquet    
-shuffle_train-00-of-10.parquet     
-shuffle_train-01-of-10.parquet          
-shuffle_train-02-of-10.parquet  
-shuffle_train-03-of-10.parquet 
-shuffle_train-04-of-10.parquet  
-shuffle_train-05-of-10.parquet 
-shuffle_train-06-of-10.parquet
-shuffle_train-07-of-10.parquet
-shuffle_train-08-of-10.parquet
-shuffle_train-09-of-10.parquet
-scalar_labels.parquet     
-test.parquet      
-```
+These tools exist to answer OMEGA and HNSW integration questions. They are
+useful for development, but they are not general product entrypoints.
 
-### Preparing Environment 
-Clone code and init:
-```bash
-$ git clone git@github.com:alibaba/zvec.git
-$ cd zvec
-$ git submodule update --init
-```
+| Tool | Status | Purpose | Typical entrypoint |
+| --- | --- | --- | --- |
+| `hnsw_hooks_microbench` | internal | Compare raw HNSW, empty hooks, and OMEGA hooks on the same search core. | `scripts/perf_hnsw_hooks_microbench.sh` |
+| `omega_predict_microbench` | internal | Measure standalone OMEGA prediction cost outside the full search loop. | Invoke binary directly with a saved model. |
 
-Make build docker image:
-```bash
-docker build -t  zvec/build-image:latest -f ./.github/workflows/docker/Dockerfile.ubuntu18.10-glibc228 .
-```
+`hnsw_hooks_microbench` assumes a persisted HNSW index and benchmark query set.
+It is intended for single-machine profiling, not for end-user benchmarking.
 
-Start bulld container:
-```bash
-docker run -it --net=host -d -e DEBUG_MODE=true  --user root --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /home/zvec/:/home/zvec/  -w /home/zvec --name=build_zvec zvec/build-image:latest bash
-```
+## Compatibility / Reference Tools
 
-Turn-off complation option:
-```
-option(BUILD_PYTHON_BINDINGS "Build Python bindings using pybind11" ON)
-=>
-option(BUILD_PYTHON_BINDINGS "Build Python bindings using pybind11" OFF)
-```
+These binaries are retained so older YAML-based flows and historical result
+reproduction still work, but new work should prefer the maintained entrypoints
+above.
 
-Build source code:
-```
-$ docker exec -it build_zvec bash
-$ cd /home/zvec/workspace/zvec
-$ mkdir build
-$ cd build  
-$ cmake -DENABLE_SKYLAKE=ON -DCMAKE_BUILD_TYPE=Release ..
-```
-
-### Converting Dataset 
-Export vector data using python script:
-```bash
-$ mkdir 10m.output
-$ python3 convert_cohere_parquet.py
-```
-
-Convert vector data to binary formatted file.
-```bash
-/home/zvec/workspace/zvec/bin/txt2vecs -input=cohere_train_vector_10m.txt --output=cohere_train_vector_10m.zvec.vecs --dimension=768
-```
-
-### Preparing Bench Config 
-Prepare Build Config
-
-```yaml
-BuilderCommon:
-    BuilderClass: HnswStreamer
-    BuildFile: /home/zvec/bench/data/10m/cohere_train_vector_10m.zvec.vecs
-    NeedTrain: true 
-    TrainFile: /home/zvec/bench/data/10m/cohere_train_vector_10m.zvec.vecs
-    DumpPath:  /home/zvec/bench/config/cohere_train_vector_10m.index
-    IndexPath: /home/zvec/bench/config/cohere_train_vector_10m.2.index
-
-    ConverterName: CosineInt8Converter
-    MetricName: Cosine
-
-    ThreadCount: 16
-
-BuilderParams: 
-    proxima.general.builder.thread_count: !!int 16
-    proxima.hnsw.builder.thread_count: !!int 16
-```
-
-Prepare Search Config
-
-```yaml
-SearcherCommon:
-    SearcherClass: HnswStreamer
-    IndexPath: /home/zvec/bench/config/cohere_train_vector_10m.2.index
-    TopK: 1,10,50,100 
-    QueryFile: /home/zvec/bench/data/10m/cohere_test_vector_1000.new.txt
-    QueryType: float 
-    QueryFirstSep: ";" 
-    QuerySecondSep: " "
-    GroundTruthFile: /home/zvec/bench/data/10m/neighbors.txt
-    RecallThreadCount: 1
-    BenchThreadCount: 16 
-    BenchIterCount: 1000000000 
-    CompareById: true
-
-SearcherParams: 
-    proxima.hnsw.streamer.ef: !!int 250
-```
-
-### Building Index 
-Conduct Build 
-```bash
-$ /home/zvec/workspace/zvec/build/bin/local_build_original ./build.yaml 
-```
-
-### Performing Bench
-Conduct Recall 
-```bash
-$ /home/zvec/workspace/zvec/build/bin/recall_original ./search.yaml
-```
-
-Conduct Bench 
-```bash
-$ /home/zvec/workspace/zvec/build/bin/bench_original ./search.yaml
-```
+| Tool | Status | Purpose |
+| --- | --- | --- |
+| `local_builder_original` | compatibility | Reference copy of the legacy builder flow. |
+| `recall_original` | compatibility | Reference copy of the legacy recall flow. |
+| `bench_original` | compatibility | Reference copy of the legacy bench flow. |
+| `convert_cohere_parquet.py` | compatibility | Dataset conversion helper for historical Cohere experiments. |
 
+## Notes
 
+- The JSON-driven OMEGA vs HNSW workflow lives under [`scripts/`](../scripts).
+- Perf shell wrappers in [`scripts/`](../scripts) are internal-only and assume a
+  prepared local environment plus existing benchmark artifacts.

From d87adfc51d5ec44f8e0c60e8d6636589bccf9c99 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 19:36:28 +0800
Subject: [PATCH 068/126] cleanup: revert uploaded doc changes

---
 OMEGA_RUNTIME_FLAGS.md |  42 -----------
 scripts/README.md      |  20 ------
 tools/core/README.md   | 160 ++++++++++++++++++++++++++++++++---------
 3 files changed, 125 insertions(+), 97 deletions(-)
 delete mode 100644 OMEGA_RUNTIME_FLAGS.md

diff --git a/OMEGA_RUNTIME_FLAGS.md b/OMEGA_RUNTIME_FLAGS.md
deleted file mode 100644
index 315530dfb..000000000
--- a/OMEGA_RUNTIME_FLAGS.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# OMEGA Runtime Flags
-
-This note classifies the runtime flags currently used by zvec's HNSW/OMEGA
-integration. The goal is to distinguish product-path controls from benchmark
-and profiling knobs.
-
-## Production / Safety
-
-| Flag | Scope | Purpose |
-| --- | --- | --- |
-| `ZVEC_OMEGA_DISABLE_MODEL_PREDICTION` | OMEGA search path | Forces the OMEGA path to run without model-driven stopping. Useful as a fallback/debug switch while preserving the hook/control path. |
-
-## Profiling / Per-query stats
-
-| Flag | Scope | Purpose |
-| --- | --- | --- |
-| `ZVEC_HNSW_LOG_QUERY_STATS` | HNSW streamer | Enables per-query HNSW stats logging. |
-| `ZVEC_HNSW_LOG_QUERY_LIMIT` | HNSW streamer | Caps how many HNSW query-stat lines are emitted. |
-| `ZVEC_OMEGA_LOG_QUERY_STATS` | OMEGA streamer | Enables per-query OMEGA stats logging. |
-| `ZVEC_OMEGA_LOG_QUERY_LIMIT` | OMEGA streamer | Caps how many OMEGA query-stat lines are emitted. |
-| `ZVEC_OMEGA_PROFILE_CONTROL_TIMING` | OMEGA / OMEGALib | Enables fine-grained OMEGA control-path timing. This is profiling-only and should stay off for normal benchmark runs. |
-
-## Benchmark-only
-
-| Flag | Scope | Purpose |
-| --- | --- | --- |
-| `ZVEC_HNSW_ENABLE_EMPTY_HOOKS` | HNSW streamer | Forces HNSW to execute the empty-hook path so hook dispatch overhead can be measured in isolation. |
-
-## Generic logging
-
-| Flag | Scope | Purpose |
-| --- | --- | --- |
-| `ZVEC_LOG_LEVEL` | Logging | Controls zvec log verbosity. Benchmark scripts commonly set it to `INFO` so query-stat lines are visible. |
-
-## Cleanup notes
-
-- All flags listed above still have active call sites or benchmark usage.
-- No remaining runtime env var was removed in this cleanup step because no
-  clearly dead env-var knob was found in the current branch.
-- Previously removed dead surface in this cleanup phase was limited to unused
-  code/API, not to active runtime flags.
-
diff --git a/scripts/README.md b/scripts/README.md
index e9b9e5437..e69de29bb 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,20 +0,0 @@
-# Benchmark Scripts
-
-## Maintained Entry Points
-
-| Script | Status | Purpose |
-| --- | --- | --- |
-| `benchmark_hnsw_vs_omega.py` | maintained | Generic JSON-driven HNSW vs OMEGA runner. |
-| `benchmark_cohere_1m.py` | compatibility wrapper | Preset wrapper around `benchmark_hnsw_vs_omega.py --dataset cohere_1m`. |
-| `benchmark_cohere_10m.py` | compatibility wrapper | Preset wrapper around `benchmark_hnsw_vs_omega.py --dataset cohere_10m`. |
-
-## Internal Perf Helpers
-
-| Script | Status | Purpose |
-| --- | --- | --- |
-| `perf_hnsw_hooks_microbench.sh` | internal | Run `perf stat/record` against `hnsw_hooks_microbench`. |
-| `perf_ab_search_core.sh` | internal | Compare HNSW, empty-hooks, and hooks-only OMEGA search paths with `perf`. |
-| `gcov.sh` | internal | Coverage helper for local development. |
-
-These helpers assume a prepared benchmark environment and are not part of the
-stable user-facing benchmarking interface.
diff --git a/tools/core/README.md b/tools/core/README.md
index 37b524a70..d617cb095 100644
--- a/tools/core/README.md
+++ b/tools/core/README.md
@@ -1,48 +1,138 @@
-# Core Tools
 
-This directory mixes product-adjacent command-line tools with internal
-benchmark helpers. The table below is the maintenance contract for each group.
+# Benchmarking scripts
 
-## Maintained Tools
+This directory contains benchmarking scripts and reproducing steps.
 
-These binaries are part of the normal local benchmarking / debugging workflow
- and should keep building with the rest of the tree.
+## COHERE experiments
 
-| Tool | Status | Purpose |
-| --- | --- | --- |
-| `txt2vecs` | maintained | Convert text vectors into zvec binary format. |
-| `local_builder` | maintained | Build an index from YAML config. |
-| `recall` | maintained | Offline recall evaluation from YAML config. |
-| `bench` | maintained | Throughput / latency benchmarking from YAML config. |
+### Getting COHERE Data
 
-## Internal Perf Tools
+Please download the COHERE 10M dataset to cohere_large_10m as follows:
 
-These tools exist to answer OMEGA and HNSW integration questions. They are
-useful for development, but they are not general product entrypoints.
+```bash
+neighbors_head_1p.parquet   
+neighbors_tail_1pgit.parquet   
+neighbors_labels_label_20p.parquet  
+neighbors_labels_label_50p.parquet  
+neighbors_labels_label_80p.parquet  
+neighbors_labels_label_95p.parquet             
+neighbors.parquet    
+shuffle_train-00-of-10.parquet     
+shuffle_train-01-of-10.parquet          
+shuffle_train-02-of-10.parquet  
+shuffle_train-03-of-10.parquet 
+shuffle_train-04-of-10.parquet  
+shuffle_train-05-of-10.parquet 
+shuffle_train-06-of-10.parquet
+shuffle_train-07-of-10.parquet
+shuffle_train-08-of-10.parquet
+shuffle_train-09-of-10.parquet
+scalar_labels.parquet     
+test.parquet      
+```
 
-| Tool | Status | Purpose | Typical entrypoint |
-| --- | --- | --- | --- |
-| `hnsw_hooks_microbench` | internal | Compare raw HNSW, empty hooks, and OMEGA hooks on the same search core. | `scripts/perf_hnsw_hooks_microbench.sh` |
-| `omega_predict_microbench` | internal | Measure standalone OMEGA prediction cost outside the full search loop. | Invoke binary directly with a saved model. |
+### Preparing Environment 
+Clone code and init:
+```bash
+$ git clone git@github.com:alibaba/zvec.git
+$ cd zvec
+$ git submodule update --init
+```
 
-`hnsw_hooks_microbench` assumes a persisted HNSW index and benchmark query set.
-It is intended for single-machine profiling, not for end-user benchmarking.
+Make build docker image:
+```bash
+docker build -t  zvec/build-image:latest -f ./.github/workflows/docker/Dockerfile.ubuntu18.10-glibc228 .
+```
 
-## Compatibility / Reference Tools
+Start bulld container:
+```bash
+docker run -it --net=host -d -e DEBUG_MODE=true  --user root --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /home/zvec/:/home/zvec/  -w /home/zvec --name=build_zvec zvec/build-image:latest bash
+```
 
-These binaries are retained so older YAML-based flows and historical result
-reproduction still work, but new work should prefer the maintained entrypoints
-above.
+Turn-off complation option:
+```
+option(BUILD_PYTHON_BINDINGS "Build Python bindings using pybind11" ON)
+=>
+option(BUILD_PYTHON_BINDINGS "Build Python bindings using pybind11" OFF)
+```
 
-| Tool | Status | Purpose |
-| --- | --- | --- |
-| `local_builder_original` | compatibility | Reference copy of the legacy builder flow. |
-| `recall_original` | compatibility | Reference copy of the legacy recall flow. |
-| `bench_original` | compatibility | Reference copy of the legacy bench flow. |
-| `convert_cohere_parquet.py` | compatibility | Dataset conversion helper for historical Cohere experiments. |
+Build source code:
+```
+$ docker exec -it build_zvec bash
+$ cd /home/zvec/workspace/zvec
+$ mkdir build
+$ cd build  
+$ cmake -DENABLE_SKYLAKE=ON -DCMAKE_BUILD_TYPE=Release ..
+```
 
-## Notes
+### Converting Dataset 
+Export vector data using python script:
+```bash
+$ mkdir 10m.output
+$ python3 convert_cohere_parquet.py
+```
 
-- The JSON-driven OMEGA vs HNSW workflow lives under [`scripts/`](../scripts).
-- Perf shell wrappers in [`scripts/`](../scripts) are internal-only and assume a
-  prepared local environment plus existing benchmark artifacts.
+Convert vector data to binary formatted file.
+```bash
+/home/zvec/workspace/zvec/bin/txt2vecs -input=cohere_train_vector_10m.txt --output=cohere_train_vector_10m.zvec.vecs --dimension=768
+```
+
+### Preparing Bench Config 
+Prepare Build Config
+
+```yaml
+BuilderCommon:
+    BuilderClass: HnswStreamer
+    BuildFile: /home/zvec/bench/data/10m/cohere_train_vector_10m.zvec.vecs
+    NeedTrain: true 
+    TrainFile: /home/zvec/bench/data/10m/cohere_train_vector_10m.zvec.vecs
+    DumpPath:  /home/zvec/bench/config/cohere_train_vector_10m.index
+    IndexPath: /home/zvec/bench/config/cohere_train_vector_10m.2.index
+
+    ConverterName: CosineInt8Converter
+    MetricName: Cosine
+
+    ThreadCount: 16
+
+BuilderParams: 
+    proxima.general.builder.thread_count: !!int 16
+    proxima.hnsw.builder.thread_count: !!int 16
+```
+
+Prepare Search Config
+
+```yaml
+SearcherCommon:
+    SearcherClass: HnswStreamer
+    IndexPath: /home/zvec/bench/config/cohere_train_vector_10m.2.index
+    TopK: 1,10,50,100 
+    QueryFile: /home/zvec/bench/data/10m/cohere_test_vector_1000.new.txt
+    QueryType: float 
+    QueryFirstSep: ";" 
+    QuerySecondSep: " "
+    GroundTruthFile: /home/zvec/bench/data/10m/neighbors.txt
+    RecallThreadCount: 1
+    BenchThreadCount: 16 
+    BenchIterCount: 1000000000 
+    CompareById: true
+
+SearcherParams: 
+    proxima.hnsw.streamer.ef: !!int 250
+```
+
+### Building Index 
+Conduct Build 
+```bash
+$ /home/zvec/workspace/zvec/build/bin/local_build_original ./build.yaml 
+```
+
+### Performing Bench
+Conduct Recall 
+```bash
+$ /home/zvec/workspace/zvec/build/bin/recall_original ./search.yaml
+```
+
+Conduct Bench 
+```bash
+$ /home/zvec/workspace/zvec/build/bin/bench_original ./search.yaml
+```

From b279fa21372980977484b7522c6f1135d79e7fd4 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 19:38:39 +0800
Subject: [PATCH 069/126] cleanup: restore tool readme formatting

---
 tools/core/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/core/README.md b/tools/core/README.md
index d617cb095..ecd8fabdd 100644
--- a/tools/core/README.md
+++ b/tools/core/README.md
@@ -136,3 +136,4 @@ Conduct Bench
 ```bash
 $ /home/zvec/workspace/zvec/build/bin/bench_original ./search.yaml
 ```
+

From f2e1dc81601b484a22ee6e30cdb7e46b6cf44c75 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 19:40:16 +0800
Subject: [PATCH 070/126] cleanup: align tool readme with main

---
 tools/core/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/core/README.md b/tools/core/README.md
index ecd8fabdd..8a25d55ad 100644
--- a/tools/core/README.md
+++ b/tools/core/README.md
@@ -137,3 +137,4 @@ Conduct Bench
 $ /home/zvec/workspace/zvec/build/bin/bench_original ./search.yaml
 ```
 
+

From 4f67fb19c4bba901c0ce64546b142850c1ab8ef8 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 19:55:09 +0800
Subject: [PATCH 071/126] cleanup: drop stale omega tests and dead searcher
 paths

---
 python/tests/test_collection.py               | 382 ----------------
 src/core/algorithm/omega/omega_searcher.cc    | 126 +-----
 src/core/algorithm/omega/omega_searcher.h     |  86 +---
 src/core/interface/indexes/omega_index.cc     |  43 +-
 src/db/training/training_data_collector.cc    |  31 --
 src/db/training/training_data_collector.h     |  22 -
 src/include/zvec/core/interface/index.h       |  10 -
 tests/core/algorithm/CMakeLists.txt           |   1 -
 tests/core/algorithm/omega/CMakeLists.txt     |  14 -
 .../algorithm/omega/omega_searcher_test.cc    | 425 ------------------
 10 files changed, 17 insertions(+), 1123 deletions(-)
 delete mode 100644 tests/core/algorithm/omega/CMakeLists.txt
 delete mode 100644 tests/core/algorithm/omega/omega_searcher_test.cc

diff --git a/python/tests/test_collection.py b/python/tests/test_collection.py
index 7ab49f9f5..7d021d6fd 100644
--- a/python/tests/test_collection.py
+++ b/python/tests/test_collection.py
@@ -32,9 +32,6 @@
     IndexType,
     VectorQuery,
     OptimizeOption,
-    OmegaIndexParam,
-    OmegaQueryParam,
-    MetricType,
 )
 
 # ==================== Common ====================
@@ -1046,382 +1043,3 @@ def test_collection_query_with_weighted_reranker_by_hybrid_vector(
         self, collection_with_multiple_docs: Collection, multiple_docs
     ):
         pass
-
-
-# ----------------------------
-# OMEGA Index Test Case
-# ----------------------------
-
-
-class TestOmegaFullWorkflow:
-    """
-    Complete end-to-end test for OMEGA adaptive search.
-
-    This test validates the entire OMEGA workflow:
-    1. Collection creation with OmegaIndexParam
-    2. Data insertion (100,000 documents to meet min_vector_threshold)
-    3. Automatic training triggered by optimize()
-    4. Model file generation (LightGBM model + lookup tables)
-    5. Search functionality with OMEGA early stopping enabled
-    6. Recall validation
-    """
-
-    def test_omega_end_to_end_workflow(self, tmp_path_factory):
-        """Full OMEGA workflow: create → insert 100k docs → train → search with early stopping."""
-        import numpy as np
-        import os
-
-        print("\n" + "="*80)
-        print("OMEGA End-to-End Workflow Test (100k documents)")
-        print("="*80)
-
-        # Step 1: Create collection with OMEGA index
-        print("\n[Step 1/6] Creating OMEGA collection...")
-        temp_dir = tmp_path_factory.mktemp("omega_e2e")
-        collection_path = str(temp_dir / "omega_collection")
-
-        schema = zvec.CollectionSchema(
-            name="omega_e2e_test",
-            fields=[
-                FieldSchema("id", DataType.INT64, nullable=False),
-                FieldSchema("name", DataType.STRING, nullable=False),
-            ],
-            vectors=[
-                VectorSchema(
-                    "embedding",
-                    DataType.VECTOR_FP32,
-                    dimension=128,
-                    index_param=OmegaIndexParam(
-                        metric_type=MetricType.L2,
-                        m=16,
-                        ef_construction=200,
-                        min_vector_threshold=100000,  # Explicitly set threshold
-                    ),
-                ),
-            ],
-        )
-
-        collection = zvec.create_and_open(
-            path=collection_path,
-            schema=schema,
-            option=CollectionOption()
-        )
-
-        # Verify OMEGA index param
-        embedding_field = collection.schema.vector("embedding")
-        assert embedding_field.index_param.type == IndexType.OMEGA
-        assert embedding_field.index_param.min_vector_threshold == 100000
-        print(f"   ✓ Collection created with OMEGA index")
-        print(f"   ✓ Index params: m={embedding_field.index_param.m}, "
-              f"ef_construction={embedding_field.index_param.ef_construction}, "
-              f"min_vector_threshold={embedding_field.index_param.min_vector_threshold}")
-
-        # Step 2: Insert 100,000 documents to exceed threshold
-        print("\n[Step 2/6] Inserting 100,000 documents...")
-        docs = []
-        np.random.seed(42)
-        num_docs = 100000
-        for i in range(num_docs):
-            vector = np.random.randn(128).astype(np.float32)
-            vector = vector / np.linalg.norm(vector)
-            docs.append(
-                Doc(
-                    id=f"{i}",
-                    fields={"id": i, "name": f"doc_{i}"},
-                    vectors={"embedding": vector.tolist()},
-                )
-            )
-
-        batch_size = 1000
-        for i in range(0, len(docs), batch_size):
-            batch = docs[i : i + batch_size]
-            result = collection.insert(batch)
-            assert all(r.ok() for r in result)
-            if (i // batch_size) % 10 == 0:
-                print(f"   Progress: {i}/{num_docs} documents inserted...")
-
-        assert collection.stats.doc_count == len(docs)
-        print(f"   ✓ Inserted {len(docs)} documents (exceeds min_vector_threshold)")
-
-        # Step 3: Flush to persist data
-        print("\n[Step 3/6] Flushing data...")
-        collection.flush()
-        print(f"   ✓ Data flushed")
-
-        # Step 4: Trigger training via optimize
-        print("\n[Step 4/6] Triggering training via optimize()...")
-        collection.optimize(option=OptimizeOption())
-        print(f"   ✓ Optimize completed (merge + auto-training)")
-
-        # Step 5: Verify model files
-        print("\n[Step 5/6] Verifying model files...")
-        model_files_found = False
-        required_files = [
-            "model.txt",
-            "threshold_table.txt",
-            "interval_table.txt",
-            "gt_collected_table.txt",
-            "gt_cmps_all_table.txt"
-        ]
-
-        # Search for omega_model directory
-        for item in os.listdir(collection_path):
-            item_path = os.path.join(collection_path, item)
-            if os.path.isdir(item_path):
-                model_dir = os.path.join(item_path, "omega_model")
-                if os.path.exists(model_dir):
-                    print(f"   Found model directory: {model_dir}")
-
-                    all_exist = True
-                    for fname in required_files:
-                        fpath = os.path.join(model_dir, fname)
-                        if os.path.exists(fpath):
-                            size = os.path.getsize(fpath)
-                            print(f"   ✓ {fname} ({size} bytes)")
-                        else:
-                            print(f"   ✗ {fname} MISSING")
-                            all_exist = False
-
-                    if all_exist:
-                        model_files_found = True
-                    break
-
-        assert model_files_found, "OMEGA model files not found after training"
-        print(f"   ✅ All required model files generated")
-
-        # Step 6: Test search with OMEGA early stopping (vector count >= threshold)
-        print("\n[Step 6/6] Testing search with OMEGA early stopping enabled...")
-        print(f"   Note: OMEGA early stopping is ENABLED (doc_count={num_docs} >= threshold=100000)")
-        print(f"   OMEGA target_recall = 0.80")
-
-        n_test_queries = 1000
-        topk = 100
-        target_recall = 0.80
-
-        # Generate NEW random query vectors (not from base vectors)
-        print(f"   Generating {n_test_queries} random query vectors...")
-        np.random.seed(12345)  # Different seed from base vectors
-        query_vectors = []
-        for i in range(n_test_queries):
-            qv = np.random.randn(128).astype(np.float32)
-            qv = qv / np.linalg.norm(qv)
-            query_vectors.append(qv.tolist())
-
-        # Compute ground truth using brute-force
-        print(f"   Computing ground truth (brute-force) for recall evaluation...")
-        base_vectors = np.array([docs[i].vector("embedding") for i in range(num_docs)])
-        query_vectors_np = np.array(query_vectors)
-
-        # Compute all distances (L2)
-        ground_truth_indices = []
-        for i in range(n_test_queries):
-            qv = query_vectors_np[i]
-            distances = np.sum((base_vectors - qv) ** 2, axis=1)
-            gt_indices = np.argsort(distances)[:topk]
-            ground_truth_indices.append(set(str(idx) for idx in gt_indices))
-
-        # Run OMEGA search and compute recall
-        print(f"   Running {n_test_queries} OMEGA searches with topk={topk}, target_recall={target_recall}...")
-        recalls = []
-        for i in range(n_test_queries):
-            results = collection.query(
-                VectorQuery(
-                    field_name="embedding",
-                    vector=query_vectors[i],
-                    param=OmegaQueryParam(ef=1000, target_recall=target_recall)
-                ),
-                topk=topk
-            )
-
-            result_ids = {r.id for r in results}
-            gt_ids = ground_truth_indices[i]
-
-            # Compute recall: |intersection| / |ground_truth|
-            recall = len(result_ids & gt_ids) / len(gt_ids) if gt_ids else 1.0
-            recalls.append(recall)
-
-            if (i + 1) % 200 == 0:
-                print(f"   Progress: {i+1}/{n_test_queries} queries completed...")
-
-        avg_recall = np.mean(recalls)
-        print(f"   Average Recall@{topk}: {avg_recall:.4f}")
-
-        # Validate recall meets target
-        assert avg_recall >= target_recall, f"Recall too low: {avg_recall:.4f} < {target_recall}"
-        print(f"   ✅ Recall meets target ({avg_recall:.4f} >= {target_recall})")
-
-        # Summary
-        print("\n" + "="*80)
-        print("✅ OMEGA End-to-End Workflow PASSED")
-        print("   1. ✓ Collection created with OMEGA index")
-        print(f"   2. ✓ {len(docs)} documents inserted (>= min_vector_threshold)")
-        print("   3. ✓ Training triggered and completed")
-        print("   4. ✓ All model files generated (5 files)")
-        print("   5. ✓ OMEGA early stopping ENABLED during search")
-        print(f"   6. ✓ {n_test_queries} queries, topk={topk}, recall: {avg_recall:.4f} >= {target_recall}")
-        print("="*80)
-
-    def test_omega_fallback_to_hnsw(self, tmp_path_factory):
-        """
-        Test OMEGA fallback behavior when document count < min_vector_threshold.
-
-        Verifies that:
-        1. Training still occurs during optimize()
-        2. Search falls back to standard HNSW (OMEGA disabled)
-        3. Results are identical to pure HNSW search
-        """
-        import numpy as np
-        import os
-
-        print("\n" + "="*80)
-        print("OMEGA Fallback to HNSW Test (< min_vector_threshold)")
-        print("="*80)
-
-        # Step 1: Create OMEGA collection
-        print("\n[Step 1/5] Creating OMEGA collection...")
-        temp_dir = tmp_path_factory.mktemp("omega_fallback")
-        collection_path = str(temp_dir / "omega_fallback_collection")
-
-        schema = zvec.CollectionSchema(
-            name="omega_fallback_test",
-            fields=[
-                FieldSchema("id", DataType.INT64, nullable=False),
-                FieldSchema("name", DataType.STRING, nullable=False),
-            ],
-            vectors=[
-                VectorSchema(
-                    "embedding",
-                    DataType.VECTOR_FP32,
-                    dimension=128,
-                    index_param=OmegaIndexParam(
-                        metric_type=MetricType.L2,
-                        m=16,
-                        ef_construction=200,
-                        min_vector_threshold=100000,  # Explicitly set threshold
-                    ),
-                ),
-            ],
-        )
-
-        collection = zvec.create_and_open(
-            path=collection_path,
-            schema=schema,
-            option=CollectionOption()
-        )
-
-        print(f"   ✓ Collection created with min_vector_threshold=100000")
-
-        # Step 2: Insert only 1000 documents (< threshold)
-        print("\n[Step 2/5] Inserting 1,000 documents (< min_vector_threshold)...")
-        docs = []
-        np.random.seed(42)
-        num_docs = 1000
-        for i in range(num_docs):
-            vector = np.random.randn(128).astype(np.float32)
-            vector = vector / np.linalg.norm(vector)
-            docs.append(
-                Doc(
-                    id=f"{i}",
-                    fields={"id": i, "name": f"doc_{i}"},
-                    vectors={"embedding": vector.tolist()},
-                )
-            )
-
-        batch_size = 100
-        for i in range(0, len(docs), batch_size):
-            batch = docs[i : i + batch_size]
-            result = collection.insert(batch)
-            assert all(r.ok() for r in result)
-
-        assert collection.stats.doc_count == len(docs)
-        print(f"   ✓ Inserted {len(docs)} documents")
-        print(f"   ✓ Doc count ({num_docs}) < min_vector_threshold (100000)")
-
-        # Step 3: Flush and optimize (training will occur)
-        print("\n[Step 3/5] Flushing and optimizing...")
-        collection.flush()
-        collection.optimize(option=OptimizeOption())
-        print(f"   ✓ Optimize completed (training executed)")
-
-        # Step 4: Verify model files were generated despite fallback
-        print("\n[Step 4/5] Verifying model files (training should have occurred)...")
-        model_files_found = False
-        for item in os.listdir(collection_path):
-            item_path = os.path.join(collection_path, item)
-            if os.path.isdir(item_path):
-                model_dir = os.path.join(item_path, "omega_model")
-                if os.path.exists(model_dir) and os.path.exists(os.path.join(model_dir, "model.txt")):
-                    model_files_found = True
-                    print(f"   ✓ Model files found (training executed)")
-                    break
-
-        assert model_files_found, "Model files should exist even when fallback occurs"
-
-        # Step 5: Test search with fallback behavior
-        print("\n[Step 5/5] Testing search with OMEGA disabled (fallback to HNSW)...")
-        print(f"   Note: OMEGA early stopping is DISABLED (doc_count={num_docs} < threshold=100000)")
-        print(f"   Expected: Search uses standard HNSW algorithm")
-
-        n_test_queries = 100
-        topk = 10
-        target_recall = 0.80
-
-        # Generate NEW random query vectors
-        print(f"   Generating {n_test_queries} random query vectors...")
-        np.random.seed(12345)
-        query_vectors = []
-        for i in range(n_test_queries):
-            qv = np.random.randn(128).astype(np.float32)
-            qv = qv / np.linalg.norm(qv)
-            query_vectors.append(qv.tolist())
-
-        # Compute ground truth using brute-force
-        print(f"   Computing ground truth (brute-force) for recall evaluation...")
-        base_vectors = np.array([docs[i].vector("embedding") for i in range(num_docs)])
-        query_vectors_np = np.array(query_vectors)
-
-        ground_truth_indices = []
-        for i in range(n_test_queries):
-            qv = query_vectors_np[i]
-            distances = np.sum((base_vectors - qv) ** 2, axis=1)
-            gt_indices = np.argsort(distances)[:topk]
-            ground_truth_indices.append(set(str(idx) for idx in gt_indices))
-
-        # Run search and compute recall
-        print(f"   Running {n_test_queries} HNSW searches with topk={topk}...")
-        recalls = []
-        for i in range(n_test_queries):
-            # Even with OmegaQueryParam, OMEGA early stopping is disabled
-            # because doc_count < min_vector_threshold
-            results = collection.query(
-                VectorQuery(
-                    field_name="embedding",
-                    vector=query_vectors[i],
-                    param=OmegaQueryParam(target_recall=target_recall)
-                ),
-                topk=topk
-            )
-
-            result_ids = {r.id for r in results}
-            gt_ids = ground_truth_indices[i]
-
-            recall = len(result_ids & gt_ids) / len(gt_ids) if gt_ids else 1.0
-            recalls.append(recall)
-
-        avg_recall = np.mean(recalls)
-        print(f"   Average Recall@{topk}: {avg_recall:.4f}")
-
-        # For fallback to HNSW, recall should still be high (pure HNSW performance)
-        assert avg_recall >= target_recall, f"Recall too low: {avg_recall:.4f} < {target_recall}"
-        print(f"   ✅ Recall meets target (standard HNSW performance)")
-
-        # Summary
-        print("\n" + "="*80)
-        print("✅ OMEGA Fallback Test PASSED")
-        print("   1. ✓ Collection created with min_vector_threshold=100000")
-        print(f"   2. ✓ {num_docs} documents inserted (< threshold)")
-        print("   3. ✓ Training executed during optimize()")
-        print("   4. ✓ Model files generated")
-        print("   5. ✓ Search falls back to HNSW (OMEGA disabled)")
-        print(f"   6. ✓ {n_test_queries} queries, topk={topk}, recall: {avg_recall:.4f} >= {target_recall}")
-        print("="*80)
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 54e194dcf..82c52b6f2 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -34,9 +34,7 @@ OmegaSearcher::OmegaSearcher(void)
       target_recall_(0.95f),
       min_vector_threshold_(100000),
       current_vector_count_(0),
-      window_size_(100),
-      training_mode_enabled_(false),
-      current_query_id_(0) {}
+      window_size_(100) {}
 
 OmegaSearcher::~OmegaSearcher(void) {
   this->cleanup();
@@ -191,43 +189,6 @@ int OmegaSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
   return adaptive_search(query, qmeta, count, context);
 }
 
-// Training mode method implementations
-zvec::Status OmegaSearcher::EnableTrainingMode(bool enable) {
-  std::lock_guard<std::mutex> lock(training_mutex_);
-  training_mode_enabled_ = enable;
-
-  if (enable) {
-    LOG_INFO("OMEGA training mode ENABLED - early stopping will be disabled");
-  } else {
-    LOG_INFO("OMEGA training mode DISABLED");
-  }
-
-  return zvec::Status::OK();
-}
-
-void OmegaSearcher::SetCurrentQueryId(int query_id) {
-  current_query_id_ = query_id;
-}
-
-std::vector<core_interface::TrainingRecord> OmegaSearcher::GetTrainingRecords() const {
-  std::lock_guard<std::mutex> lock(training_mutex_);
-  return collected_records_;  // Return a copy
-}
-
-void OmegaSearcher::ClearTrainingRecords() {
-  std::lock_guard<std::mutex> lock(training_mutex_);
-  collected_records_.clear();
-  LOG_INFO("Cleared %zu training records", collected_records_.size());
-}
-
-void OmegaSearcher::SetTrainingGroundTruth(
-    const std::vector<std::vector<uint64_t>>& ground_truth, int k_train) {
-  training_ground_truth_ = ground_truth;
-  training_k_train_ = k_train;
-  LOG_INFO("Set training ground truth for %zu queries, k_train=%d",
-           ground_truth.size(), k_train);
-}
-
 int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmeta,
                                    uint32_t count,
                                    ContextPointer &context) const {
@@ -238,11 +199,6 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     return IndexError_InvalidArgument;
   }
 
-  int query_id = current_query_id_;
-  if (omega_ctx->training_query_id() >= 0) {
-    query_id = omega_ctx->training_query_id();
-  }
-
   // Read target_recall from context (per-query parameter)
   float target_recall = omega_ctx->target_recall();
 
@@ -253,12 +209,9 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     omega_topk = static_cast<int>(count);
   }
 
-  // Match OmegaStreamer/reference behavior:
-  // training mode collects features only and must not run model inference.
   const bool disable_model_prediction = DisableOmegaModelPrediction();
   OmegaModelHandle model_to_use =
-      (training_mode_enabled_ || disable_model_prediction) ? nullptr
-                                                           : omega_model_;
+      disable_model_prediction ? nullptr : omega_model_;
 
   OmegaSearchHandle omega_search = omega_search_create_with_params(
       model_to_use, target_recall, omega_topk, window_size_);
@@ -274,26 +227,6 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     return HnswSearcher::search_impl(query, qmeta, count, context);
   }
 
-  // Attach training state before the HNSW loop starts so labels observe the
-  // full query trajectory.
-  if (training_mode_enabled_) {
-    // Get ground truth for this query if available
-    std::vector<int> gt_for_query;
-    if (query_id >= 0 &&
-        static_cast<size_t>(query_id) < training_ground_truth_.size()) {
-      const auto& gt = training_ground_truth_[query_id];
-      gt_for_query.reserve(gt.size());
-      for (uint64_t node_id : gt) {
-        gt_for_query.push_back(static_cast<int>(node_id));
-      }
-    }
-    omega_search_enable_training(omega_search, query_id,
-                                  gt_for_query.data(), gt_for_query.size(),
-                                  training_k_train_);
-    LOG_DEBUG("Training mode enabled for query_id=%d with %zu GT nodes",
-              query_id, gt_for_query.size());
-  }
-
   omega_ctx->clear();
   omega_ctx->resize_results(count);
   bool early_stop_hit = false;
@@ -302,9 +235,8 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     omega_ctx->reset_query(query);
     OmegaHookState hook_state;
     hook_state.search_ctx = omega_search_ctx;
-    hook_state.enable_early_stopping =
-        !training_mode_enabled_ && !disable_model_prediction;
-    hook_state.per_cmp_reporting = training_mode_enabled_;
+    hook_state.enable_early_stopping = !disable_model_prediction;
+    hook_state.per_cmp_reporting = false;
     ResetOmegaHookState(&hook_state);
     HnswAlgorithm::SearchHooks hooks;
     hooks.user_data = &hook_state;
@@ -333,56 +265,6 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu",
             cmps, hops, omega_ctx->topk_heap().size());
 
-  // Collect training records if in training mode
-  if (training_mode_enabled_) {
-    size_t record_count = omega_search_get_training_records_count(omega_search);
-    if (record_count > 0) {
-      const void* records_ptr = omega_search_get_training_records(omega_search);
-      const auto* records_vec =
-          static_cast<const std::vector<omega::TrainingRecord>*>(records_ptr);
-
-      for (size_t i = 0; i < record_count; ++i) {
-        const auto& omega_record = (*records_vec)[i];
-        core_interface::TrainingRecord record;
-        record.query_id = omega_record.query_id;
-        record.hops_visited = omega_record.hops;
-        record.cmps_visited = omega_record.cmps;
-        record.dist_1st = omega_record.dist_1st;
-        record.dist_start = omega_record.dist_start;
-
-        // Copy 7 traversal window statistics
-        if (omega_record.traversal_window_stats.size() == 7) {
-          std::copy(omega_record.traversal_window_stats.begin(),
-                    omega_record.traversal_window_stats.end(),
-                    record.traversal_window_stats.begin());
-        } else {
-          LOG_WARN("Unexpected traversal_window_stats size: %zu (expected 7)",
-                   omega_record.traversal_window_stats.size());
-        }
-
-        // Label is already computed in real-time during search
-        record.label = omega_record.label;
-        omega_ctx->add_training_record(std::move(record));
-      }
-
-      LOG_DEBUG("Collected %zu training records for query_id=%d",
-                record_count, query_id);
-    }
-
-    size_t gt_cmps_count = omega_search_get_gt_cmps_count(omega_search);
-    if (gt_cmps_count > 0) {
-      const int* gt_cmps_ptr = omega_search_get_gt_cmps(omega_search);
-      int total_cmps = omega_search_get_total_cmps(omega_search);
-      if (gt_cmps_ptr != nullptr) {
-        std::vector<int> gt_cmps_vec(gt_cmps_ptr, gt_cmps_ptr + gt_cmps_count);
-        for (auto& v : gt_cmps_vec) {
-          if (v < 0) v = total_cmps;
-        }
-        omega_ctx->set_gt_cmps(gt_cmps_vec, total_cmps);
-      }
-    }
-  }
-
   // Cleanup
   omega_search_destroy(omega_search);
 
diff --git a/src/core/algorithm/omega/omega_searcher.h b/src/core/algorithm/omega/omega_searcher.h
index c4cfd5a7a..7554b57c1 100644
--- a/src/core/algorithm/omega/omega_searcher.h
+++ b/src/core/algorithm/omega/omega_searcher.h
@@ -14,12 +14,9 @@
 #pragma once
 
 #include <zvec/core/framework/index_framework.h>
-#include <zvec/core/interface/training.h>
-#include <zvec/db/status.h>
 #include "../hnsw/hnsw_searcher.h"
 #include <omega/omega_api.h>
 #include <cstdlib>
-#include <mutex>
 #include <string>
 #include <vector>
 
@@ -38,76 +35,9 @@ class OmegaSearcher : public HnswSearcher {
   OmegaSearcher(void);
   ~OmegaSearcher(void);
 
-  OmegaSearcher(const OmegaSearcher &) = delete;
+ OmegaSearcher(const OmegaSearcher &) = delete;
   OmegaSearcher &operator=(const OmegaSearcher &) = delete;
 
- public:
-  // OMEGA Training Mode Support
-  /**
-   * @brief Enable or disable training mode for collecting training features.
-   *
-   * When training mode is enabled:
-   * - Early stopping is disabled (complete HNSW search)
-   * - Training features are collected for each visited node
-   * - query_id must be set via SetCurrentQueryId() before each search
-   *
-   * @param enable True to enable training mode, false to disable
-   * @return Status indicating success or failure
-   */
-  zvec::Status EnableTrainingMode(bool enable);
-
-  /**
-   * @brief Set the query ID for the next search operation.
-   *
-   * Must be called before search_impl() when training mode is enabled.
-   * The query_id will be included in all training records collected
-   * during that search.
-   *
-   * @param query_id Unique identifier for the query
-   */
-  void SetCurrentQueryId(int query_id);
-
-  /**
-   * @brief Get all collected training records.
-   *
-   * Returns a copy of all training records collected since training mode
-   * was enabled or since the last ClearTrainingRecords() call.
-   *
-   * @return Vector of TrainingRecord structures
-   */
-  std::vector<core_interface::TrainingRecord> GetTrainingRecords() const;
-
-  /**
-   * @brief Clear all collected training records.
-   *
-   * Removes all training records from internal storage. Useful for
-   * starting a fresh training data collection session.
-   */
-  void ClearTrainingRecords();
-
-  /**
-   * @brief Set ground truth for training queries.
-   *
-   * Ground truth is used for real-time label computation during training.
-   * Labels are computed as: label=1 iff top k_train GT nodes are in current topk.
-   *
-   * @param ground_truth 2D vector: ground_truth[query_id][rank] = node_id
-   * @param k_train Number of GT nodes to check for label (typically 1)
-   */
-  void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
-                               int k_train = 1);
-
-  /**
-   * @brief Public search method for OmegaStreamer to call
-   *
-   * This allows OmegaStreamer to delegate search to OmegaSearcher
-   * without needing to access protected methods.
-   */
-  int search(const void *query, const IndexQueryMeta &qmeta,
-             uint32_t count, ContextPointer &context) const {
-    return search_impl(query, qmeta, count, context);
-  }
-
  protected:
   //! Initialize Searcher
   virtual int init(const ailego::Params &params) override;
@@ -139,12 +69,6 @@ class OmegaSearcher : public HnswSearcher {
  private:
   //! Check if OMEGA mode should be used
   bool should_use_omega() const {
-    // Use OMEGA adaptive search if:
-    // 1. Training mode is enabled (to collect features even without model), OR
-    // 2. OMEGA is enabled and model is loaded
-    if (training_mode_enabled_) {
-      return true;  // Always use adaptive_search in training mode
-    }
     if (std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION") != nullptr &&
         std::string(std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION")) != "0") {
       return true;
@@ -167,14 +91,6 @@ class OmegaSearcher : public HnswSearcher {
   uint32_t min_vector_threshold_;
   size_t current_vector_count_;
   int window_size_;
-
-  // Training mode support
-  bool training_mode_enabled_;
-  int current_query_id_;
-  mutable std::mutex training_mutex_;
-  mutable std::vector<core_interface::TrainingRecord> collected_records_;
-  std::vector<std::vector<uint64_t>> training_ground_truth_;  // [query_id][rank] = node_id
-  int training_k_train_;  // Number of GT nodes to check for label
 };
 
 }  // namespace core
diff --git a/src/core/interface/indexes/omega_index.cc b/src/core/interface/indexes/omega_index.cc
index c6afffb6f..f48942687 100644
--- a/src/core/interface/indexes/omega_index.cc
+++ b/src/core/interface/indexes/omega_index.cc
@@ -59,36 +59,19 @@ int OmegaIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
 
 
 zvec::Status OmegaIndex::EnableTrainingMode(bool enable) {
-  LOG_INFO("OmegaIndex::EnableTrainingMode called with enable=%d", enable);
-  training_mode_enabled_ = enable;
-
-  // Delegate to OmegaStreamer if available
-  if (streamer_) {
-    LOG_INFO("OmegaIndex: streamer_ exists, attempting dynamic_cast to OmegaStreamer");
-    auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
-    if (omega_streamer) {
-      LOG_INFO("OmegaIndex: Successfully cast to OmegaStreamer, calling EnableTrainingMode");
-      omega_streamer->EnableTrainingMode(enable);
-      return zvec::Status::OK();
-    } else {
-      LOG_WARN("OmegaIndex: Failed to cast streamer_ to OmegaStreamer");
-    }
-  } else {
-    LOG_WARN("OmegaIndex: streamer_ is null");
+  if (auto* omega_streamer =
+          streamer_ ? dynamic_cast<core::OmegaStreamer*>(streamer_.get())
+                    : nullptr) {
+    omega_streamer->EnableTrainingMode(enable);
   }
-
   return zvec::Status::OK();
 }
 
 void OmegaIndex::SetCurrentQueryId(int query_id) {
-  current_query_id_ = query_id;
-
-  // Delegate to OmegaStreamer if available
-  if (streamer_) {
-    auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
-    if (omega_streamer) {
-      omega_streamer->SetCurrentQueryId(query_id);
-    }
+  if (auto* omega_streamer =
+          streamer_ ? dynamic_cast<core::OmegaStreamer*>(streamer_.get())
+                    : nullptr) {
+    omega_streamer->SetCurrentQueryId(query_id);
   }
 }
 
@@ -105,12 +88,10 @@ void OmegaIndex::ClearTrainingRecords() {
 
 void OmegaIndex::SetTrainingGroundTruth(
     const std::vector<std::vector<uint64_t>>& ground_truth, int k_train) {
-  // Delegate to OmegaStreamer if available
-  if (streamer_) {
-    auto* omega_streamer = dynamic_cast<core::OmegaStreamer*>(streamer_.get());
-    if (omega_streamer) {
-      omega_streamer->SetTrainingGroundTruth(ground_truth, k_train);
-    }
+  if (auto* omega_streamer =
+          streamer_ ? dynamic_cast<core::OmegaStreamer*>(streamer_.get())
+                    : nullptr) {
+    omega_streamer->SetTrainingGroundTruth(ground_truth, k_train);
   }
 }
 
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 307da137a..9a116b052 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -869,37 +869,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   return ground_truth;
 }
 
-void TrainingDataCollector::FillLabels(
-    std::vector<core_interface::TrainingRecord>* records,
-    const std::vector<std::vector<uint64_t>>& ground_truth,
-    const std::vector<std::vector<uint64_t>>& search_results,
-    size_t k_train) {
-  // NOTE: Labels are now computed in real-time during search.
-  // This function is kept for backward compatibility but only counts existing labels.
-  (void)ground_truth;
-  (void)search_results;
-  (void)k_train;
-
-  if (!records || records->empty()) {
-    LOG_WARN("No records to fill labels");
-    return;
-  }
-
-  // Count existing labels (already computed in real-time during search)
-  size_t positive_count = 0;
-  size_t negative_count = 0;
-  for (const auto& record : *records) {
-    if (record.label > 0) {
-      positive_count++;
-    } else {
-      negative_count++;
-    }
-  }
-
-  LOG_INFO("Labels already computed in real-time: %zu positive, %zu negative (k_train=%zu)",
-           positive_count, negative_count, k_train);
-}
-
 core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
     const std::vector<core_interface::TrainingRecord>& records,
     const std::vector<std::vector<uint64_t>>& ground_truth,
diff --git a/src/db/training/training_data_collector.h b/src/db/training/training_data_collector.h
index b456a8d06..104a93fec 100644
--- a/src/db/training/training_data_collector.h
+++ b/src/db/training/training_data_collector.h
@@ -152,28 +152,6 @@ class TrainingDataCollector {
       MetricType metric_type = MetricType::IP,
       const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
 
-  /**
-   * @brief Fill labels in training records based on ground truth
-   *
-   * Label=1 iff the top K_train ground truth nodes are ALL in collected_node_ids.
-   * Label=0 otherwise.
-   *
-   * This follows big-ann-benchmarks labeling strategy:
-   * - Training records represent search states
-   * - label=1 means "we've found enough results, can stop now"
-   * - label=0 means "need to continue searching"
-   *
-   * @param records Training records to fill (modified in-place)
-   * @param ground_truth Ground truth doc IDs per query (sorted by distance)
-   * @param search_results Search result doc IDs per query (unused but kept for compatibility)
-   * @param k_train Number of top ground truth results that must be collected
-   */
-  static void FillLabels(
-      std::vector<core_interface::TrainingRecord>* records,
-      const std::vector<std::vector<uint64_t>>& ground_truth,
-      const std::vector<std::vector<uint64_t>>& search_results,
-      size_t k_train);
-
   /**
    * @brief Compute gt_cmps data from training records and ground truth
    *
diff --git a/src/include/zvec/core/interface/index.h b/src/include/zvec/core/interface/index.h
index 11b6888e3..63e80cc2c 100644
--- a/src/include/zvec/core/interface/index.h
+++ b/src/include/zvec/core/interface/index.h
@@ -32,14 +32,9 @@
 #include <zvec/core/framework/index_searcher.h>
 #include <zvec/core/framework/index_storage.h>
 #include <zvec/core/interface/index_param.h>
-#include <zvec/core/interface/training.h>
 #include <zvec/core/interface/training_capable.h>
 #include <zvec/db/status.h>
 
-namespace zvec::core {
-class OmegaSearcher;  // Forward declaration
-}
-
 namespace zvec::core_interface {
 
 class IndexFactory;
@@ -362,11 +357,6 @@ class OmegaIndex : public HNSWIndex, public ITrainingCapable {
   virtual int _prepare_for_search(
       const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,
       core::IndexContext::Pointer &context) override;
-
- private:
-  // Training mode state (tracked locally for convenience)
-  bool training_mode_enabled_{false};
-  int current_query_id_{0};
 };
 
 
diff --git a/tests/core/algorithm/CMakeLists.txt b/tests/core/algorithm/CMakeLists.txt
index ca54094e6..0e9aa7259 100644
--- a/tests/core/algorithm/CMakeLists.txt
+++ b/tests/core/algorithm/CMakeLists.txt
@@ -7,4 +7,3 @@ cc_directories(flat_sparse)
 cc_directories(ivf)
 cc_directories(hnsw)
 cc_directories(hnsw_sparse)
-cc_directories(omega)
diff --git a/tests/core/algorithm/omega/CMakeLists.txt b/tests/core/algorithm/omega/CMakeLists.txt
deleted file mode 100644
index fd89e8275..000000000
--- a/tests/core/algorithm/omega/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake)
-
-file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)
-
-foreach(CC_SRCS ${ALL_TEST_SRCS})
-  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
-  cc_gtest(
-      NAME ${CC_TARGET}
-      STRICT
-      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_hnsw core_knn_omega
-      SRCS ${CC_SRCS}
-      INCS . ${CMAKE_SOURCE_DIR}/src/core ${CMAKE_SOURCE_DIR}/src/core/algorithm/omega ${CMAKE_SOURCE_DIR}/src/core/algorithm/hnsw
-    )
-endforeach()
diff --git a/tests/core/algorithm/omega/omega_searcher_test.cc b/tests/core/algorithm/omega/omega_searcher_test.cc
deleted file mode 100644
index 0b450c7a1..000000000
--- a/tests/core/algorithm/omega/omega_searcher_test.cc
+++ /dev/null
@@ -1,425 +0,0 @@
-// Copyright 2025-present the zvec project
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <cstdio>
-#include <gtest/gtest.h>
-#include <ailego/math/distance.h>
-#include <zvec/ailego/container/vector.h>
-#include "zvec/core/framework/index_builder.h"
-#include "zvec/core/framework/index_factory.h"
-#include "zvec/core/framework/index_meta.h"
-
-using namespace std;
-using namespace testing;
-using namespace zvec::ailego;
-
-#if defined(__GNUC__) || defined(__GNUG__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-result"
-#endif
-
-namespace zvec {
-namespace core {
-
-constexpr size_t static dim = 16;
-
-class OmegaSearcherTest : public testing::Test {
- protected:
-  void SetUp(void);
-  void TearDown(void);
-
-  static std::string _dir;
-  static shared_ptr<IndexMeta> _index_meta_ptr;
-};
-
-std::string OmegaSearcherTest::_dir("OmegaSearcherTest/");
-shared_ptr<IndexMeta> OmegaSearcherTest::_index_meta_ptr;
-
-void OmegaSearcherTest::SetUp(void) {
-  _index_meta_ptr.reset(new (nothrow)
-                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
-  _index_meta_ptr->set_metric("SquaredEuclidean", 0, ailego::Params());
-}
-
-void OmegaSearcherTest::TearDown(void) {
-  char cmdBuf[100];
-  snprintf(cmdBuf, 100, "rm -rf %s", _dir.c_str());
-  system(cmdBuf);
-}
-
-// Test that OmegaSearcher falls back to HNSW when omega is disabled
-TEST_F(OmegaSearcherTest, TestFallbackToHnswWhenDisabled) {
-  // Build index using HnswBuilder
-  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
-  ASSERT_NE(builder, nullptr);
-
-  auto holder =
-      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
-  size_t doc_cnt = 1000UL;
-  for (size_t i = 0; i < doc_cnt; i++) {
-    NumericalVector<float> vec(dim);
-    for (size_t j = 0; j < dim; ++j) {
-      vec[j] = i;
-    }
-    ASSERT_TRUE(holder->emplace(i, vec));
-  }
-
-  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));
-  ASSERT_EQ(0, builder->train(holder));
-  ASSERT_EQ(0, builder->build(holder));
-
-  auto dumper = IndexFactory::CreateDumper("FileDumper");
-  ASSERT_NE(dumper, nullptr);
-  string path = _dir + "/TestFallbackToHnswWhenDisabled";
-  ASSERT_EQ(0, dumper->create(path));
-  ASSERT_EQ(0, builder->dump(dumper));
-  ASSERT_EQ(0, dumper->close());
-
-  // Test OmegaSearcher with omega.enabled=false (default)
-  IndexSearcher::Pointer omega_searcher =
-      IndexFactory::CreateSearcher("OmegaSearcher");
-  ASSERT_TRUE(omega_searcher != nullptr);
-
-  // Initialize without enabling omega (should fallback to HNSW)
-  ailego::Params params;
-  params.insert("omega.enabled", false);  // Explicitly disable omega
-  ASSERT_EQ(0, omega_searcher->init(params));
-
-  auto storage = IndexFactory::CreateStorage("FileReadStorage");
-  ASSERT_EQ(0, storage->open(path, false));
-  ASSERT_EQ(0, omega_searcher->load(storage, IndexMetric::Pointer()));
-  auto ctx = omega_searcher->create_context();
-  ASSERT_TRUE(!!ctx);
-
-  // Perform search
-  NumericalVector<float> vec(dim);
-  for (size_t j = 0; j < dim; ++j) {
-    vec[j] = 0.0;
-  }
-  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
-  size_t topk = 50;
-  ctx->set_topk(topk);
-  ASSERT_EQ(0, omega_searcher->search_impl(vec.data(), qmeta, ctx));
-  auto &results = ctx->result();
-  ASSERT_EQ(topk, results.size());
-
-  // Verify results are sorted by distance
-  for (size_t k = 1; k < results.size(); ++k) {
-    ASSERT_LE(results[k - 1].score(), results[k].score());
-  }
-}
-
-// Test that OmegaSearcher and HnswSearcher produce identical results when omega is disabled
-TEST_F(OmegaSearcherTest, TestIdenticalResultsWithHnsw) {
-  // Build index using HnswBuilder
-  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
-  ASSERT_NE(builder, nullptr);
-
-  auto holder =
-      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
-  size_t doc_cnt = 500UL;
-  for (size_t i = 0; i < doc_cnt; i++) {
-    NumericalVector<float> vec(dim);
-    for (size_t j = 0; j < dim; ++j) {
-      vec[j] = static_cast<float>(i + j);
-    }
-    ASSERT_TRUE(holder->emplace(i, vec));
-  }
-
-  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));
-  ASSERT_EQ(0, builder->train(holder));
-  ASSERT_EQ(0, builder->build(holder));
-
-  auto dumper = IndexFactory::CreateDumper("FileDumper");
-  ASSERT_NE(dumper, nullptr);
-  string path = _dir + "/TestIdenticalResultsWithHnsw";
-  ASSERT_EQ(0, dumper->create(path));
-  ASSERT_EQ(0, builder->dump(dumper));
-  ASSERT_EQ(0, dumper->close());
-
-  // Create HnswSearcher
-  IndexSearcher::Pointer hnsw_searcher =
-      IndexFactory::CreateSearcher("HnswSearcher");
-  ASSERT_TRUE(hnsw_searcher != nullptr);
-  ASSERT_EQ(0, hnsw_searcher->init(ailego::Params()));
-
-  auto storage1 = IndexFactory::CreateStorage("FileReadStorage");
-  ASSERT_EQ(0, storage1->open(path, false));
-  ASSERT_EQ(0, hnsw_searcher->load(storage1, IndexMetric::Pointer()));
-
-  // Create OmegaSearcher with omega disabled
-  IndexSearcher::Pointer omega_searcher =
-      IndexFactory::CreateSearcher("OmegaSearcher");
-  ASSERT_TRUE(omega_searcher != nullptr);
-
-  ailego::Params params;
-  params.insert("omega.enabled", false);
-  ASSERT_EQ(0, omega_searcher->init(params));
-
-  auto storage2 = IndexFactory::CreateStorage("FileReadStorage");
-  ASSERT_EQ(0, storage2->open(path, false));
-  ASSERT_EQ(0, omega_searcher->load(storage2, IndexMetric::Pointer()));
-
-  // Search with both searchers and compare results
-  NumericalVector<float> query(dim);
-  for (size_t j = 0; j < dim; ++j) {
-    query[j] = 100.0f + j;
-  }
-
-  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
-  size_t topk = 20;
-
-  auto hnsw_ctx = hnsw_searcher->create_context();
-  hnsw_ctx->set_topk(topk);
-  ASSERT_EQ(0, hnsw_searcher->search_impl(query.data(), qmeta, hnsw_ctx));
-  auto &hnsw_results = hnsw_ctx->result();
-
-  auto omega_ctx = omega_searcher->create_context();
-  omega_ctx->set_topk(topk);
-  ASSERT_EQ(0, omega_searcher->search_impl(query.data(), qmeta, omega_ctx));
-  auto &omega_results = omega_ctx->result();
-
-  // Results should be identical
-  ASSERT_EQ(hnsw_results.size(), omega_results.size());
-  for (size_t k = 0; k < hnsw_results.size(); ++k) {
-    ASSERT_EQ(hnsw_results[k].key(), omega_results[k].key());
-    ASSERT_FLOAT_EQ(hnsw_results[k].score(), omega_results[k].score());
-  }
-}
-
-// Test OmegaSearcher with RNN search (radius search)
-TEST_F(OmegaSearcherTest, TestRnnSearchFallback) {
-  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
-  ASSERT_NE(builder, nullptr);
-
-  auto holder =
-      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
-  size_t doc_cnt = 1000UL;
-  for (size_t i = 0; i < doc_cnt; i++) {
-    NumericalVector<float> vec(dim);
-    for (size_t j = 0; j < dim; ++j) {
-      vec[j] = i;
-    }
-    ASSERT_TRUE(holder->emplace(i, vec));
-  }
-
-  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));
-  ASSERT_EQ(0, builder->train(holder));
-  ASSERT_EQ(0, builder->build(holder));
-
-  auto dumper = IndexFactory::CreateDumper("FileDumper");
-  ASSERT_NE(dumper, nullptr);
-  string path = _dir + "/TestRnnSearchFallback";
-  ASSERT_EQ(0, dumper->create(path));
-  ASSERT_EQ(0, builder->dump(dumper));
-  ASSERT_EQ(0, dumper->close());
-
-  // Test OmegaSearcher with omega disabled
-  IndexSearcher::Pointer searcher =
-      IndexFactory::CreateSearcher("OmegaSearcher");
-  ASSERT_TRUE(searcher != nullptr);
-
-  ailego::Params params;
-  params.insert("omega.enabled", false);
-  ASSERT_EQ(0, searcher->init(params));
-
-  auto storage = IndexFactory::CreateStorage("FileReadStorage");
-  ASSERT_EQ(0, storage->open(path, false));
-  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
-  auto ctx = searcher->create_context();
-  ASSERT_TRUE(!!ctx);
-
-  NumericalVector<float> vec(dim);
-  for (size_t j = 0; j < dim; ++j) {
-    vec[j] = 0.0;
-  }
-  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
-  size_t topk = 50;
-  ctx->set_topk(topk);
-  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
-  auto &results = ctx->result();
-  ASSERT_EQ(topk, results.size());
-
-  // Test with radius threshold
-  float radius = results[topk / 2].score();
-  ctx->set_threshold(radius);
-  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
-  ASSERT_GT(topk, results.size());
-  for (size_t k = 0; k < results.size(); ++k) {
-    ASSERT_GE(radius, results[k].score());
-  }
-
-  // Test Reset Threshold
-  ctx->reset_threshold();
-  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
-  ASSERT_EQ(topk, results.size());
-  ASSERT_LT(radius, results[topk - 1].score());
-}
-
-// Test OmegaSearcher with InnerProduct metric
-TEST_F(OmegaSearcherTest, TestInnerProductFallback) {
-  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
-  ASSERT_NE(builder, nullptr);
-
-  auto holder =
-      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
-  size_t doc_cnt = 1000UL;
-  for (size_t i = 0; i < doc_cnt; i++) {
-    NumericalVector<float> vec(dim);
-    for (size_t j = 0; j < dim; ++j) {
-      vec[j] = i;
-    }
-    ASSERT_TRUE(holder->emplace(i, vec));
-  }
-
-  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);
-  index_meta.set_metric("InnerProduct", 0, ailego::Params());
-
-  ASSERT_EQ(0, builder->init(index_meta, ailego::Params()));
-  ASSERT_EQ(0, builder->train(holder));
-  ASSERT_EQ(0, builder->build(holder));
-
-  auto dumper = IndexFactory::CreateDumper("FileDumper");
-  ASSERT_NE(dumper, nullptr);
-  string path = _dir + "/TestInnerProductFallback";
-  ASSERT_EQ(0, dumper->create(path));
-  ASSERT_EQ(0, builder->dump(dumper));
-  ASSERT_EQ(0, dumper->close());
-
-  // Test OmegaSearcher with omega disabled
-  IndexSearcher::Pointer searcher =
-      IndexFactory::CreateSearcher("OmegaSearcher");
-  ASSERT_TRUE(searcher != nullptr);
-
-  ailego::Params params;
-  params.insert("omega.enabled", false);
-  ASSERT_EQ(0, searcher->init(params));
-
-  auto storage = IndexFactory::CreateStorage("FileReadStorage");
-  ASSERT_EQ(0, storage->open(path, false));
-  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
-  auto ctx = searcher->create_context();
-  ASSERT_TRUE(!!ctx);
-
-  NumericalVector<float> vec(dim);
-  for (size_t j = 0; j < dim; ++j) {
-    vec[j] = 1.0;
-  }
-  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
-  size_t topk = 50;
-  ctx->set_topk(topk);
-  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
-  auto &results = ctx->result();
-  ASSERT_EQ(topk, results.size());
-
-  // Verify results are sorted correctly for InnerProduct (descending)
-  for (size_t k = 1; k < results.size(); ++k) {
-    ASSERT_GE(results[k - 1].score(), results[k].score());
-  }
-}
-
-// Test that omega parameters don't affect HNSW fallback mode
-TEST_F(OmegaSearcherTest, TestOmegaParamsIgnoredWhenDisabled) {
-  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
-  ASSERT_NE(builder, nullptr);
-
-  auto holder =
-      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
-  size_t doc_cnt = 500UL;
-  for (size_t i = 0; i < doc_cnt; i++) {
-    NumericalVector<float> vec(dim);
-    for (size_t j = 0; j < dim; ++j) {
-      vec[j] = i;
-    }
-    ASSERT_TRUE(holder->emplace(i, vec));
-  }
-
-  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));
-  ASSERT_EQ(0, builder->train(holder));
-  ASSERT_EQ(0, builder->build(holder));
-
-  auto dumper = IndexFactory::CreateDumper("FileDumper");
-  ASSERT_NE(dumper, nullptr);
-  string path = _dir + "/TestOmegaParamsIgnored";
-  ASSERT_EQ(0, dumper->create(path));
-  ASSERT_EQ(0, builder->dump(dumper));
-  ASSERT_EQ(0, dumper->close());
-
-  // Create two OmegaSearcher instances with different omega params
-  // but both with omega disabled
-  IndexSearcher::Pointer searcher1 =
-      IndexFactory::CreateSearcher("OmegaSearcher");
-  ASSERT_TRUE(searcher1 != nullptr);
-
-  ailego::Params params1;
-  params1.insert("omega.enabled", false);
-  params1.insert("omega.target_recall", 0.95f);
-  params1.insert("omega.min_vector_threshold", 10000);
-  ASSERT_EQ(0, searcher1->init(params1));
-
-  IndexSearcher::Pointer searcher2 =
-      IndexFactory::CreateSearcher("OmegaSearcher");
-  ASSERT_TRUE(searcher2 != nullptr);
-
-  ailego::Params params2;
-  params2.insert("omega.enabled", false);
-  params2.insert("omega.target_recall", 0.85f);
-  params2.insert("omega.min_vector_threshold", 5000);
-  ASSERT_EQ(0, searcher2->init(params2));
-
-  auto storage1 = IndexFactory::CreateStorage("FileReadStorage");
-  ASSERT_EQ(0, storage1->open(path, false));
-  ASSERT_EQ(0, searcher1->load(storage1, IndexMetric::Pointer()));
-
-  auto storage2 = IndexFactory::CreateStorage("FileReadStorage");
-  ASSERT_EQ(0, storage2->open(path, false));
-  ASSERT_EQ(0, searcher2->load(storage2, IndexMetric::Pointer()));
-
-  // Search with both searchers - results should be identical
-  // since omega is disabled and both use HNSW
-  NumericalVector<float> query(dim);
-  for (size_t j = 0; j < dim; ++j) {
-    query[j] = 50.0f;
-  }
-
-  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
-  size_t topk = 30;
-
-  auto ctx1 = searcher1->create_context();
-  ctx1->set_topk(topk);
-  ASSERT_EQ(0, searcher1->search_impl(query.data(), qmeta, ctx1));
-  auto &results1 = ctx1->result();
-
-  auto ctx2 = searcher2->create_context();
-  ctx2->set_topk(topk);
-  ASSERT_EQ(0, searcher2->search_impl(query.data(), qmeta, ctx2));
-  auto &results2 = ctx2->result();
-
-  // Results should be identical despite different omega params
-  ASSERT_EQ(results1.size(), results2.size());
-  for (size_t k = 0; k < results1.size(); ++k) {
-    ASSERT_EQ(results1[k].key(), results2[k].key());
-    ASSERT_FLOAT_EQ(results1[k].score(), results2[k].score());
-  }
-}
-
-}  // namespace core
-}  // namespace zvec
-
-#if defined(__GNUC__) || defined(__GNUG__)
-#pragma GCC diagnostic pop
-#endif

From 0013fb44e93fbcd43877eb23ec4e1d972463e9f5 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 20:00:04 +0800
Subject: [PATCH 072/126] cleanup: trim unused omega accessors and state

---
 src/core/algorithm/omega/omega_context.h          | 15 ---------------
 src/core/algorithm/omega/omega_streamer.h         |  8 ++++----
 .../column/vector_column/vector_column_indexer.cc |  2 --
 .../column/vector_column/vector_column_indexer.h  |  1 -
 4 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/src/core/algorithm/omega/omega_context.h b/src/core/algorithm/omega/omega_context.h
index 524dfeb9b..cc66c26aa 100644
--- a/src/core/algorithm/omega/omega_context.h
+++ b/src/core/algorithm/omega/omega_context.h
@@ -52,11 +52,6 @@ class OmegaContext : public HnswContext {
     return training_query_id_;
   }
 
-  //! Get training records collected during this search (no locks needed)
-  const std::vector<core_interface::TrainingRecord>& training_records() const {
-    return training_records_;
-  }
-
   //! Move training records out (override base class virtual method)
   std::vector<core_interface::TrainingRecord> take_training_records() override {
     return std::move(training_records_);
@@ -81,16 +76,6 @@ class OmegaContext : public HnswContext {
     total_cmps_ = total_cmps;
   }
 
-  //! Get gt_cmps per rank
-  const std::vector<int>& gt_cmps_per_rank() const {
-    return gt_cmps_per_rank_;
-  }
-
-  //! Get total cmps for this search
-  int total_cmps() const {
-    return total_cmps_;
-  }
-
   //! Take gt_cmps data (override base class virtual method)
   std::vector<int> take_gt_cmps() override {
     return std::move(gt_cmps_per_rank_);
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index b697390c0..41a525e1e 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -57,10 +57,6 @@ class OmegaStreamer : public HnswStreamer {
     training_k_train_ = k_train;
   }
 
-  // Search-mode configuration shared across searches for this streamer.
-  bool LoadModel(const std::string& model_dir);
-  bool IsModelLoaded() const;
-
  protected:
   /**
    * @brief Override search to use OMEGA adaptive search
@@ -91,6 +87,10 @@ class OmegaStreamer : public HnswStreamer {
   virtual int dump(const IndexDumper::Pointer &dumper) override;
 
  private:
+  // Search-mode configuration shared across searches for this streamer.
+  bool LoadModel(const std::string& model_dir);
+  bool IsModelLoaded() const;
+
   // Perform OMEGA adaptive search (shared between training and inference mode)
   int omega_search_impl(const void *query, const IndexQueryMeta &qmeta,
                         uint32_t count, Context::Pointer &context,
diff --git a/src/db/index/column/vector_column/vector_column_indexer.cc b/src/db/index/column/vector_column/vector_column_indexer.cc
index b31e5b31f..467027472 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.cc
+++ b/src/db/index/column/vector_column/vector_column_indexer.cc
@@ -249,8 +249,6 @@ Status VectorColumnIndexer::EnableTrainingMode(bool enable) {
 }
 
 void VectorColumnIndexer::SetCurrentQueryId(int query_id) {
-  current_query_id_ = query_id;
-
   // Propagate to underlying index if it exists and supports training
   if (index != nullptr) {
     if (auto* training_capable = index->GetTrainingCapability()) {
diff --git a/src/db/index/column/vector_column/vector_column_indexer.h b/src/db/index/column/vector_column/vector_column_indexer.h
index e7d6eb590..123c00d03 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.h
+++ b/src/db/index/column/vector_column/vector_column_indexer.h
@@ -213,7 +213,6 @@ class VectorColumnIndexer {
 
   // Training mode support
   bool training_mode_enabled_{false};
-  int current_query_id_{0};
   mutable std::mutex training_mutex_;
   mutable std::vector<core_interface::TrainingRecord> collected_records_;
   // GT cmps data: gt_cmps_map_[query_id] = {gt_cmps_per_rank, total_cmps}

From dbba6dcf8e9811ebf9c4aa670a3ddb1de2a884d7 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 20:06:37 +0800
Subject: [PATCH 073/126] cleanup: drop obsolete omega benchmark helpers

---
 scripts/benchmark_cohere_10m.py       |  25 --
 scripts/benchmark_cohere_1m.py        |  25 --
 scripts/perf_ab_search_core.sh        | 288 --------------
 scripts/perf_hnsw_hooks_microbench.sh | 178 ---------
 tools/core/CMakeLists.txt             |  10 -
 tools/core/hnsw_hooks_microbench.cc   | 552 --------------------------
 6 files changed, 1078 deletions(-)
 delete mode 100644 scripts/benchmark_cohere_10m.py
 delete mode 100644 scripts/benchmark_cohere_1m.py
 delete mode 100644 scripts/perf_ab_search_core.sh
 delete mode 100755 scripts/perf_hnsw_hooks_microbench.sh
 delete mode 100644 tools/core/hnsw_hooks_microbench.cc

diff --git a/scripts/benchmark_cohere_10m.py b/scripts/benchmark_cohere_10m.py
deleted file mode 100644
index 3c946ab00..000000000
--- a/scripts/benchmark_cohere_10m.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-"""Compatibility wrapper for the generic JSON-driven HNSW vs OMEGA runner."""
-
-import sys
-from pathlib import Path
-
-import benchmark_hnsw_vs_omega
-
-
-def main() -> int:
-    script_dir = Path(__file__).resolve().parent
-    config_path = script_dir / "benchmark_hnsw_vs_omega.json"
-    sys.argv = [
-        str(script_dir / "benchmark_hnsw_vs_omega.py"),
-        "--config",
-        str(config_path),
-        "--dataset",
-        "cohere_10m",
-        *sys.argv[1:],
-    ]
-    return benchmark_hnsw_vs_omega.main()
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/scripts/benchmark_cohere_1m.py b/scripts/benchmark_cohere_1m.py
deleted file mode 100644
index 33979e8d0..000000000
--- a/scripts/benchmark_cohere_1m.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-"""Compatibility wrapper for the generic JSON-driven HNSW vs OMEGA runner."""
-
-import sys
-from pathlib import Path
-
-import benchmark_hnsw_vs_omega
-
-
-def main() -> int:
-    script_dir = Path(__file__).resolve().parent
-    config_path = script_dir / "benchmark_hnsw_vs_omega.json"
-    sys.argv = [
-        str(script_dir / "benchmark_hnsw_vs_omega.py"),
-        "--config",
-        str(config_path),
-        "--dataset",
-        "cohere_1m",
-        *sys.argv[1:],
-    ]
-    return benchmark_hnsw_vs_omega.main()
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/scripts/perf_ab_search_core.sh b/scripts/perf_ab_search_core.sh
deleted file mode 100644
index 649bf6255..000000000
--- a/scripts/perf_ab_search_core.sh
+++ /dev/null
@@ -1,288 +0,0 @@
-#!/usr/bin/env bash
-# Internal profiling helper for VectorDBBench search-core comparisons.
-# Kept for local perf investigations; expects prepared benchmark artifacts.
-set -euo pipefail
-
-DATASET="${1:-1m}"
-CPU_CORE="${CPU_CORE:-0}"
-REPEAT="${PERF_REPEAT:-5}"
-MODE="${PERF_MODE:-all}"
-RECORD_FREQ="${PERF_RECORD_FREQ:-999}"
-TOPN="${PERF_TOPN:-60}"
-CALL_GRAPH_MODE="${PERF_CALL_GRAPH_MODE:-fp}"
-PERF_USER_ONLY="${PERF_USER_ONLY:-1}"
-
-OPENBLAS_THREADS="${OPENBLAS_NUM_THREADS:-1}"
-OMP_THREADS="${OMP_NUM_THREADS:-1}"
-MKL_THREADS="${MKL_NUM_THREADS:-1}"
-NUMEXPR_THREADS="${NUMEXPR_NUM_THREADS:-1}"
-GOTO_THREADS="${GOTO_NUM_THREADS:-1}"
-VECLIB_THREADS="${VECLIB_MAXIMUM_THREADS:-1}"
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ZVEC_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-OUT_DIR="${PERF_OUT_DIR:-${ZVEC_ROOT}/perf_results/${DATASET}}"
-
-CONDA_SH="${CONDA_SH:-/root/miniconda3/etc/profile.d/conda.sh}"
-CONDA_ENV="${CONDA_ENV:-bench}"
-PYTHON_BIN="${PYTHON_BIN:-python}"
-
-if [[ -f "${CONDA_SH}" ]]; then
-  # shellcheck disable=SC1090
-  source "${CONDA_SH}"
-  conda activate "${CONDA_ENV}"
-fi
-
-PERF_EVENTS="cycles,instructions,branches,branch-misses,cache-references,cache-misses,L1-dcache-loads,L1-dcache-load-misses,LLC-loads,LLC-load-misses,dTLB-loads,dTLB-load-misses"
-
-COMMON_ENV=(
-  env
-  OPENBLAS_NUM_THREADS="${OPENBLAS_THREADS}"
-  OMP_NUM_THREADS="${OMP_THREADS}"
-  MKL_NUM_THREADS="${MKL_THREADS}"
-  NUMEXPR_NUM_THREADS="${NUMEXPR_THREADS}"
-  GOTO_NUM_THREADS="${GOTO_THREADS}"
-  VECLIB_MAXIMUM_THREADS="${VECLIB_THREADS}"
-)
-
-PERF_RECORD_ARGS=(-F "${RECORD_FREQ}" -g --call-graph "${CALL_GRAPH_MODE}")
-if [[ "${PERF_USER_ONLY}" == "1" ]]; then
-  PERF_RECORD_ARGS+=(--all-user)
-fi
-
-case "${DATASET}" in
-  1m)
-    CASE_TYPE="Performance768D1M"
-    HNSW_PATH="${ZVEC_ROOT}/benchmark_results/cohere_1m_hnsw"
-    OMEGA_PATH="${ZVEC_ROOT}/benchmark_results/cohere_1m_omega"
-    HNSW_LABEL="16c64g-v0.1"
-    OMEGA_LABEL="omega-m15-ef180-int8"
-    HNSW_ARGS=(
-      zvec
-      --path "${HNSW_PATH}"
-      --db-label "${HNSW_LABEL}"
-      --case-type "${CASE_TYPE}"
-      --m 15
-      --ef-search 180
-      --quantize-type int8
-      --num-concurrency 16
-      --concurrency-duration 30
-      --k 100
-      --skip-drop-old
-      --skip-load
-      --skip-search-concurrent
-    )
-    OMEGA_ARGS=(
-      zvecomega
-      --path "${OMEGA_PATH}"
-      --db-label "${OMEGA_LABEL}"
-      --case-type "${CASE_TYPE}"
-      --m 15
-      --ef-search 180
-      --quantize-type int8
-      --min-vector-threshold 100000
-      --num-training-queries 4000
-      --ef-training 500
-      --window-size 100
-      --ef-groundtruth 1000
-      --target-recall 0.90
-      --num-concurrency 16
-      --concurrency-duration 30
-      --k 100
-      --skip-drop-old
-      --skip-load
-      --skip-search-concurrent
-    )
-    ;;
-  10m)
-    CASE_TYPE="Performance768D10M"
-    HNSW_PATH="${ZVEC_ROOT}/benchmark_results/cohere_10m_hnsw"
-    OMEGA_PATH="${ZVEC_ROOT}/benchmark_results/cohere_10m_omega"
-    HNSW_LABEL="16c64g-v0.1"
-    OMEGA_LABEL="omega-m50-ef118-int8-refiner"
-    HNSW_ARGS=(
-      zvec
-      --path "${HNSW_PATH}"
-      --db-label "${HNSW_LABEL}"
-      --case-type "${CASE_TYPE}"
-      --m 50
-      --ef-search 118
-      --quantize-type int8
-      --is-using-refiner
-      --num-concurrency 12,14,16,18,20
-      --concurrency-duration 30
-      --k 100
-      --skip-drop-old
-      --skip-load
-      --skip-search-concurrent
-    )
-    OMEGA_ARGS=(
-      zvecomega
-      --path "${OMEGA_PATH}"
-      --db-label "${OMEGA_LABEL}"
-      --case-type "${CASE_TYPE}"
-      --m 50
-      --ef-search 118
-      --quantize-type int8
-      --is-using-refiner
-      --min-vector-threshold 100000
-      --num-training-queries 4000
-      --ef-training 500
-      --window-size 100
-      --ef-groundtruth 1000
-      --target-recall 0.90
-      --num-concurrency 12,14,16,18,20
-      --concurrency-duration 30
-      --k 100
-      --skip-drop-old
-      --skip-load
-      --skip-search-concurrent
-    )
-    ;;
-  *)
-    echo "Unsupported dataset: ${DATASET}" >&2
-    echo "Usage: $0 [1m|10m]" >&2
-    exit 1
-    ;;
-esac
-
-run_perf() {
-  local title="$1"
-  shift
-
-  echo
-  echo "============================================================"
-  echo "${title}"
-  echo "============================================================"
-
-  taskset -c "${CPU_CORE}" numactl --cpunodebind=0 --membind=0 \
-    perf stat -r "${REPEAT}" -e "${PERF_EVENTS}" \
-    "$@"
-}
-
-run_record() {
-  local title="$1"
-  local output_prefix="$2"
-  shift 2
-
-  local data_file="${OUT_DIR}/${output_prefix}.data"
-  local report_file="${OUT_DIR}/${output_prefix}.report.txt"
-  local zvec_report_file="${OUT_DIR}/${output_prefix}.zvec_only.report.txt"
-
-  echo
-  echo "============================================================"
-  echo "${title}"
-  echo "============================================================"
-  echo "perf.data: ${data_file}"
-  echo "report:    ${report_file}"
-  echo "zvec-only: ${zvec_report_file}"
-
-  taskset -c "${CPU_CORE}" numactl --cpunodebind=0 --membind=0 \
-    perf record "${PERF_RECORD_ARGS[@]}" -o "${data_file}" -- \
-    "$@"
-
-  perf report --stdio --no-children -i "${data_file}" --percent-limit 0.5 \
-    > "${report_file}"
-  sed -n "1,${TOPN}p" "${report_file}"
-
-  perf report --stdio --no-children -i "${data_file}" \
-    --sort dso,symbol --percent-limit 0.1 \
-    --dsos _zvec.cpython-311-x86_64-linux-gnu.so \
-    > "${zvec_report_file}"
-  sed -n "1,${TOPN}p" "${zvec_report_file}"
-}
-
-cd "${ZVEC_ROOT}"
-mkdir -p "${OUT_DIR}"
-
-HNSW_CMD=(
-  "${COMMON_ENV[@]}"
-  "${PYTHON_BIN}" -m vectordb_bench.cli.vectordbbench "${HNSW_ARGS[@]}"
-)
-
-HNSW_EMPTY_HOOKS_CMD=(
-  "${COMMON_ENV[@]}"
-  ZVEC_HNSW_ENABLE_EMPTY_HOOKS=1
-  "${PYTHON_BIN}" -m vectordb_bench.cli.vectordbbench "${HNSW_ARGS[@]}"
-)
-
-OMEGA_HOOKS_CMD=(
-  "${COMMON_ENV[@]}"
-  ZVEC_OMEGA_DISABLE_MODEL_PREDICTION=1
-  "${PYTHON_BIN}" -m vectordb_bench.cli.vectordbbench "${OMEGA_ARGS[@]}"
-)
-
-echo "Using thread env:"
-echo "  OPENBLAS_NUM_THREADS=${OPENBLAS_THREADS}"
-echo "  OMP_NUM_THREADS=${OMP_THREADS}"
-echo "  MKL_NUM_THREADS=${MKL_THREADS}"
-echo "  NUMEXPR_NUM_THREADS=${NUMEXPR_THREADS}"
-echo "  GOTO_NUM_THREADS=${GOTO_THREADS}"
-echo "  VECLIB_MAXIMUM_THREADS=${VECLIB_THREADS}"
-echo "  PERF_CALL_GRAPH_MODE=${CALL_GRAPH_MODE}"
-echo "  PERF_USER_ONLY=${PERF_USER_ONLY}"
-
-case "${MODE}" in
-  stat)
-    run_perf \
-      "HNSW core search perf (${DATASET})" \
-      "${HNSW_CMD[@]}"
-
-    run_perf \
-      "HNSW empty-hooks core search perf (${DATASET})" \
-      "${HNSW_EMPTY_HOOKS_CMD[@]}"
-
-    run_perf \
-      "OMEGA hooks-only core search perf (${DATASET})" \
-      "${OMEGA_HOOKS_CMD[@]}"
-    ;;
-  record)
-    run_record \
-      "HNSW core search hotspots (${DATASET})" \
-      "hnsw_core" \
-      "${HNSW_CMD[@]}"
-
-    run_record \
-      "HNSW empty-hooks core search hotspots (${DATASET})" \
-      "hnsw_empty_hooks" \
-      "${HNSW_EMPTY_HOOKS_CMD[@]}"
-
-    run_record \
-      "OMEGA hooks-only core search hotspots (${DATASET})" \
-      "omega_hooks_only" \
-      "${OMEGA_HOOKS_CMD[@]}"
-    ;;
-  all)
-    run_perf \
-      "HNSW core search perf (${DATASET})" \
-      "${HNSW_CMD[@]}"
-
-    run_perf \
-      "HNSW empty-hooks core search perf (${DATASET})" \
-      "${HNSW_EMPTY_HOOKS_CMD[@]}"
-
-    run_perf \
-      "OMEGA hooks-only core search perf (${DATASET})" \
-      "${OMEGA_HOOKS_CMD[@]}"
-
-    run_record \
-      "HNSW core search hotspots (${DATASET})" \
-      "hnsw_core" \
-      "${HNSW_CMD[@]}"
-
-    run_record \
-      "HNSW empty-hooks core search hotspots (${DATASET})" \
-      "hnsw_empty_hooks" \
-      "${HNSW_EMPTY_HOOKS_CMD[@]}"
-
-    run_record \
-      "OMEGA hooks-only core search hotspots (${DATASET})" \
-      "omega_hooks_only" \
-      "${OMEGA_HOOKS_CMD[@]}"
-    ;;
-  *)
-    echo "Unsupported PERF_MODE: ${MODE}" >&2
-    echo "Use PERF_MODE=stat|record|all" >&2
-    exit 1
-    ;;
-esac
diff --git a/scripts/perf_hnsw_hooks_microbench.sh b/scripts/perf_hnsw_hooks_microbench.sh
deleted file mode 100755
index d32aea3b8..000000000
--- a/scripts/perf_hnsw_hooks_microbench.sh
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/env bash
-# Internal profiling helper for hnsw_hooks_microbench.
-# Maintained for OMEGA/HNSW hotspot analysis, not as a public benchmark entrypoint.
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ZVEC_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-
-BIN="${BIN:-${ZVEC_ROOT}/build/bin/hnsw_hooks_microbench}"
-DEFAULT_INDEX_DIR="${ZVEC_ROOT}/benchmark_results/cohere_1m_hnsw"
-INDEX_PATH="${INDEX_PATH:-}"
-OUT_DIR="${OUT_DIR:-${ZVEC_ROOT}/perf_results/hnsw_hooks_microbench}"
-
-CPU_CORE="${CPU_CORE:-0}"
-REPEAT="${REPEAT:-5}"
-EVENTS="${EVENTS:-cycles,instructions,branches,branch-misses,cache-references,cache-misses}"
-RECORD_FREQ="${RECORD_FREQ:-999}"
-CALL_GRAPH_MODE="${CALL_GRAPH_MODE:-fp}"
-TOPN="${TOPN:-80}"
-MODE_FILTER="${MODE_FILTER:-all}"
-
-QUERY_COUNT="${QUERY_COUNT:-1000}"
-WARMUP="${WARMUP:-200}"
-ITERATIONS="${ITERATIONS:-2000}"
-EF_SEARCH="${EF_SEARCH:-180}"
-TOPK="${TOPK:-100}"
-WINDOW_SIZE="${WINDOW_SIZE:-100}"
-TARGET_RECALL="${TARGET_RECALL:-0.91}"
-SEED="${SEED:-12345}"
-
-if ! command -v perf >/dev/null 2>&1; then
-  echo "perf not found in PATH" >&2
-  exit 1
-fi
-
-if [[ ! -x "${BIN}" ]]; then
-  echo "microbench binary not found: ${BIN}" >&2
-  exit 1
-fi
-
-mkdir -p "${OUT_DIR}"
-
-build_common_args() {
-  COMMON_ARGS=(
-    "${BIN}"
-    --index-path "${INDEX_PATH}"
-    --ef-search "${EF_SEARCH}"
-    --topk "${TOPK}"
-    --query-count "${QUERY_COUNT}"
-    --warmup "${WARMUP}"
-    --iterations "${ITERATIONS}"
-    --window-size "${WINDOW_SIZE}"
-    --target-recall "${TARGET_RECALL}"
-    --seed "${SEED}"
-  )
-}
-
-preflight_index() {
-  local candidate="$1"
-  [[ -d "${candidate}" ]] || return 1
-
-  local output
-  if ! output="$("${BIN}" \
-      --index-path "${candidate}" \
-      --ef-search "${EF_SEARCH}" \
-      --topk "${TOPK}" \
-      --query-count 8 \
-      --warmup 2 \
-      --iterations 4 \
-      --window-size "${WINDOW_SIZE}" \
-      --target-recall "${TARGET_RECALL}" \
-      --seed "${SEED}" \
-      --mode fast 2>&1)"; then
-    return 1
-  fi
-
-  if [[ "${output}" != *"doc_cnt="* ]] || [[ "${output}" == *"doc_cnt=0"* ]]; then
-    return 1
-  fi
-
-  echo "${output}"
-  return 0
-}
-
-detect_index_path() {
-  if [[ -n "${INDEX_PATH}" ]]; then
-    if [[ ! -d "${INDEX_PATH}" ]]; then
-      echo "index dir not found: ${INDEX_PATH}" >&2
-      exit 1
-    fi
-    local output
-    if ! output="$(preflight_index "${INDEX_PATH}")"; then
-      echo "preflight failed for INDEX_PATH=${INDEX_PATH}" >&2
-      exit 1
-    fi
-    echo "Using user-provided INDEX_PATH=${INDEX_PATH}"
-    echo "${output}"
-    return
-  fi
-
-  local candidates=(
-    "${DEFAULT_INDEX_DIR}"
-  )
-  local candidate
-  for candidate in "${candidates[@]}"; do
-    local output
-    if output="$(preflight_index "${candidate}")"; then
-      INDEX_PATH="${candidate}"
-      echo "Auto-detected INDEX_PATH=${INDEX_PATH}"
-      echo "${output}"
-      return
-    fi
-  done
-
-  echo "Failed to auto-detect a valid index file under ${DEFAULT_INDEX_DIR}" >&2
-  exit 1
-}
-
-run_stat() {
-  local mode="$1"
-  echo
-  echo "============================================================"
-  echo "perf stat: ${mode}"
-  echo "============================================================"
-  taskset -c "${CPU_CORE}" perf stat -r "${REPEAT}" -e "${EVENTS}" \
-    "${COMMON_ARGS[@]}" --mode "${mode}"
-}
-
-run_record() {
-  local mode="$1"
-  local data_file="${OUT_DIR}/${mode}.data"
-  local report_file="${OUT_DIR}/${mode}.report.txt"
-  local zvec_report_file="${OUT_DIR}/${mode}.zvec_only.report.txt"
-
-  echo
-  echo "============================================================"
-  echo "perf record: ${mode}"
-  echo "============================================================"
-  echo "perf.data: ${data_file}"
-  echo "report:    ${report_file}"
-  echo "zvec-only: ${zvec_report_file}"
-
-  taskset -c "${CPU_CORE}" perf record -F "${RECORD_FREQ}" -g \
-    --call-graph "${CALL_GRAPH_MODE}" -o "${data_file}" -- \
-    "${COMMON_ARGS[@]}" --mode "${mode}"
-
-  perf report --stdio --no-children -i "${data_file}" --percent-limit 0.3 \
-    > "${report_file}"
-  sed -n "1,${TOPN}p" "${report_file}"
-
-  perf report --stdio --no-children -i "${data_file}" \
-    --sort dso,symbol --percent-limit 0.05 > "${zvec_report_file}"
-  sed -n "1,${TOPN}p" "${zvec_report_file}"
-}
-
-run_mode() {
-  local mode="$1"
-  run_stat "${mode}"
-  run_record "${mode}"
-}
-
-detect_index_path
-build_common_args
-
-case "${MODE_FILTER}" in
-  all)
-    run_mode fast
-    run_mode empty
-    run_mode omega
-    ;;
-  fast|empty|omega)
-    run_mode "${MODE_FILTER}"
-    ;;
-  *)
-    echo "Unsupported MODE_FILTER: ${MODE_FILTER}" >&2
-    exit 1
-    ;;
-esac
diff --git a/tools/core/CMakeLists.txt b/tools/core/CMakeLists.txt
index 4b3a03dc5..a2c649a54 100644
--- a/tools/core/CMakeLists.txt
+++ b/tools/core/CMakeLists.txt
@@ -60,16 +60,6 @@ cc_binary(
     LIBS omega
 )
 
-if(ZVEC_ENABLE_OMEGA)
-cc_binary(
-    NAME hnsw_hooks_microbench
-    STRICT PACKED
-    SRCS hnsw_hooks_microbench.cc
-    INCS ${PROJECT_ROOT_DIR}/src/core/ ${PROJECT_ROOT_DIR}/src/db/ ${PROJECT_BINARY_DIR}/src/db/ ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
-    LIBS magic_enum core_framework core_metric core_quantizer core_utility core_knn_hnsw omega core_interface zvec_index ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
-)
-endif()
-
 cc_binary(
     NAME recall_original
     STRICT PACKED
diff --git a/tools/core/hnsw_hooks_microbench.cc b/tools/core/hnsw_hooks_microbench.cc
deleted file mode 100644
index 435b4967c..000000000
--- a/tools/core/hnsw_hooks_microbench.cc
+++ /dev/null
@@ -1,552 +0,0 @@
-// Copyright 2025-present the zvec project
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <algorithm>
-#include <cstdint>
-#include <cstdlib>
-#include <cstring>
-#include <filesystem>
-#include <iomanip>
-#include <iostream>
-#include <numeric>
-#include <random>
-#include <string>
-#include <vector>
-
-#include "zvec/ailego/container/params.h"
-#include "utility/rdtsc_timer.h"
-#include "algorithm/hnsw/hnsw_context.h"
-#include "algorithm/hnsw/hnsw_params.h"
-#include "algorithm/hnsw/hnsw_streamer.h"
-#include "db/common/file_helper.h"
-#include "db/index/column/vector_column/vector_column_indexer.h"
-#include "db/index/common/version_manager.h"
-#include "omega/search_context.h"
-
-namespace zvec {
-namespace core {
-
-namespace {
-
-namespace fs = std::filesystem;
-
-struct Options {
-  std::string mode = "all";
-  std::string index_path;
-  uint32_t dimension = 768;
-  uint32_t m = 15;
-  uint32_t ef_construction = 500;
-  uint32_t ef_search = 180;
-  uint32_t topk = 100;
-  uint32_t query_count = 1000;
-  uint32_t iterations = 1000;
-  uint32_t warmup = 100;
-  uint32_t seed = 12345;
-  int window_size = 100;
-  float target_recall = 0.90f;
-};
-
-void PrintUsage(const char* argv0) {
-  std::cerr
-      << "Usage: " << argv0 << " --index-path <path> [options]\n"
-      << "Options:\n"
-      << "  --mode <name>        all|fast|empty|omega\n"
-      << "  --dimension <n>      Vector dimension\n"
-      << "  --m <n>              HNSW max neighbors\n"
-      << "  --ef-construction <n> HNSW ef_construction\n"
-      << "  --ef-search <n>      HNSW ef_search\n"
-      << "  --topk <n>           Search topk\n"
-      << "  --query-count <n>    Number of sampled queries\n"
-      << "  --iterations <n>     Number of measured iterations\n"
-      << "  --warmup <n>         Number of warmup iterations\n"
-      << "  --seed <n>           RNG seed for sampled queries\n"
-      << "  --window-size <n>    OMEGA window size for hooks-only mode\n"
-      << "  --target-recall <f>  OMEGA target recall for hooks-only mode\n"
-      << "\n"
-      << "--index-path accepts either the benchmark index directory\n"
-      << "(for example .../cohere_1m_hnsw) or a file under its segment\n"
-      << "subdirectory such as .../0/dense.qindex.5.proxima.\n";
-}
-
-bool ParseArgs(int argc, char** argv, Options* opts) {
-  for (int i = 1; i < argc; ++i) {
-    const char* arg = argv[i];
-    if (std::strcmp(arg, "--index-path") == 0 && i + 1 < argc) {
-      opts->index_path = argv[++i];
-    } else if (std::strcmp(arg, "--mode") == 0 && i + 1 < argc) {
-      opts->mode = argv[++i];
-    } else if (std::strcmp(arg, "--dimension") == 0 && i + 1 < argc) {
-      opts->dimension =
-          static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
-    } else if (std::strcmp(arg, "--m") == 0 && i + 1 < argc) {
-      opts->m = static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
-    } else if (std::strcmp(arg, "--ef-construction") == 0 && i + 1 < argc) {
-      opts->ef_construction =
-          static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
-    } else if (std::strcmp(arg, "--ef-search") == 0 && i + 1 < argc) {
-      opts->ef_search = static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
-    } else if (std::strcmp(arg, "--topk") == 0 && i + 1 < argc) {
-      opts->topk = static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
-    } else if (std::strcmp(arg, "--query-count") == 0 && i + 1 < argc) {
-      opts->query_count =
-          static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
-    } else if (std::strcmp(arg, "--iterations") == 0 && i + 1 < argc) {
-      opts->iterations =
-          static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
-    } else if (std::strcmp(arg, "--warmup") == 0 && i + 1 < argc) {
-      opts->warmup =
-          static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
-    } else if (std::strcmp(arg, "--seed") == 0 && i + 1 < argc) {
-      opts->seed = static_cast<uint32_t>(std::strtoul(argv[++i], nullptr, 10));
-    } else if (std::strcmp(arg, "--window-size") == 0 && i + 1 < argc) {
-      opts->window_size = std::atoi(argv[++i]);
-    } else if (std::strcmp(arg, "--target-recall") == 0 && i + 1 < argc) {
-      opts->target_recall = std::strtof(argv[++i], nullptr);
-    } else if (std::strcmp(arg, "--help") == 0 ||
-               std::strcmp(arg, "-h") == 0) {
-      PrintUsage(argv[0]);
-      return false;
-    } else {
-      std::cerr << "Unknown argument: " << arg << "\n";
-      PrintUsage(argv[0]);
-      return false;
-    }
-  }
-
-  if (opts->index_path.empty()) {
-    PrintUsage(argv[0]);
-    return false;
-  }
-  opts->query_count = std::max(1u, opts->query_count);
-  opts->iterations = std::max(1u, opts->iterations);
-  opts->topk = std::max(1u, opts->topk);
-  opts->window_size = std::max(1, opts->window_size);
-  if (opts->mode != "all" && opts->mode != "fast" &&
-      opts->mode != "empty" && opts->mode != "omega") {
-    std::cerr << "Invalid --mode: " << opts->mode << "\n";
-    PrintUsage(argv[0]);
-    return false;
-  }
-  return true;
-}
-
-struct OmegaHookState {
-  struct PendingVisitBuffer {
-    std::vector<omega::SearchContext::VisitCandidate> storage;
-    int head{0};
-    int count{0};
-
-    void Reset(int capacity) {
-      head = 0;
-      count = 0;
-      storage.resize(std::max(1, capacity));
-    }
-
-    bool Empty() const { return count == 0; }
-
-    int Capacity() const { return static_cast<int>(storage.size()); }
-
-    void Push(const omega::SearchContext::VisitCandidate& candidate) {
-      storage[(head + count) % Capacity()] = candidate;
-      ++count;
-    }
-
-    const omega::SearchContext::VisitCandidate* Data() const {
-      return storage.data() + head;
-    }
-
-    void Clear() {
-      head = 0;
-      count = 0;
-    }
-  };
-
-  omega::SearchContext* search_ctx{nullptr};
-  bool enable_early_stopping{false};
-  bool per_cmp_reporting{false};
-  PendingVisitBuffer pending_candidates;
-  int batch_min_interval{1};
-};
-
-void ResetOmegaHookState(OmegaHookState* state) {
-  if (state->search_ctx != nullptr) {
-    state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
-  } else {
-    state->batch_min_interval = 1;
-  }
-  state->pending_candidates.Reset(state->batch_min_interval);
-}
-
-bool ShouldFlushOmegaPendingCandidates(const OmegaHookState& state) {
-  if (state.pending_candidates.Empty()) {
-    return false;
-  }
-  if (state.pending_candidates.count >= state.batch_min_interval) {
-    return true;
-  }
-  if (state.search_ctx == nullptr) {
-    return false;
-  }
-  return state.search_ctx->GetTotalCmps() + state.pending_candidates.count >=
-         state.search_ctx->GetNextPredictionCmps();
-}
-
-bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count) {
-  if (state->search_ctx == nullptr || flush_count <= 0 ||
-      state->pending_candidates.Empty()) {
-    return false;
-  }
-
-  flush_count = std::min(flush_count, state->pending_candidates.count);
-  bool should_predict = state->search_ctx->ReportVisitCandidates(
-      state->pending_candidates.Data(), static_cast<size_t>(flush_count));
-  state->pending_candidates.Clear();
-  if (!state->enable_early_stopping || !should_predict) {
-    return false;
-  }
-  return state->search_ctx->ShouldStopEarly();
-}
-
-bool MaybeFlushOmegaPendingCandidates(OmegaHookState* state) {
-  if (!ShouldFlushOmegaPendingCandidates(*state)) {
-    return false;
-  }
-  return FlushOmegaPendingCandidates(state, state->pending_candidates.count);
-}
-
-void OnOmegaLevel0Entry(node_id_t id, dist_t dist, bool /*inserted_to_topk*/,
-                        void* user_data) {
-  auto& state = *static_cast<OmegaHookState*>(user_data);
-  state.search_ctx->SetDistStart(dist);
-  if (state.per_cmp_reporting) {
-    state.search_ctx->ReportVisitCandidate(id, dist, true);
-    return;
-  }
-  state.pending_candidates.Push({static_cast<int>(id), dist, true});
-  MaybeFlushOmegaPendingCandidates(&state);
-}
-
-void OnOmegaHop(void* user_data) {
-  auto& state = *static_cast<OmegaHookState*>(user_data);
-  state.search_ctx->ReportHop();
-}
-
-bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
-                           bool should_consider_candidate, void* user_data) {
-  auto& state = *static_cast<OmegaHookState*>(user_data);
-  if (state.per_cmp_reporting) {
-    bool should_predict =
-        state.search_ctx->ReportVisitCandidate(id, dist, should_consider_candidate);
-    if (!state.enable_early_stopping || !should_predict) {
-      return false;
-    }
-    return state.search_ctx->ShouldStopEarly();
-  }
-  state.pending_candidates.Push(
-      {static_cast<int>(id), dist, should_consider_candidate});
-  return MaybeFlushOmegaPendingCandidates(&state);
-}
-
-struct BenchStats {
-  double avg_ns{0.0};
-  double avg_cmps{0.0};
-  double ns_per_cmp{0.0};
-  double checksum{0.0};
-};
-
-std::string NormalizeIndexPath(const std::string& input_path) {
-  if (input_path.empty()) {
-    return input_path;
-  }
-
-  fs::path path(input_path);
-  std::error_code ec;
-  if (!fs::exists(path, ec)) {
-    return input_path;
-  }
-
-  if (fs::is_regular_file(path, ec)) {
-    auto segment_dir = path.parent_path();
-    auto index_root = segment_dir.parent_path();
-    if (!segment_dir.empty() && segment_dir.filename() == "0" &&
-        !index_root.empty()) {
-      return index_root.string();
-    }
-    return segment_dir.string();
-  }
-
-  if (fs::is_directory(path, ec) && path.filename() == "0") {
-    auto index_root = path.parent_path();
-    if (!index_root.empty()) {
-      return index_root.string();
-    }
-  }
-
-  return path.string();
-}
-
-std::vector<const void*> SampleIndexVectors(const IndexProvider::Pointer& provider,
-                                            const IndexMeta& meta,
-                                            uint32_t count, uint32_t seed) {
-  const uint32_t doc_cnt = static_cast<uint32_t>(provider->count());
-  std::vector<const void*> all_queries;
-  all_queries.reserve(doc_cnt);
-
-  auto it = provider->create_iterator();
-  for (; it && it->is_valid(); it->next()) {
-    all_queries.push_back(it->data());
-  }
-
-  std::vector<uint32_t> ids(all_queries.size());
-  std::iota(ids.begin(), ids.end(), 0u);
-  std::mt19937 rng(seed);
-  std::shuffle(ids.begin(), ids.end(), rng);
-
-  count = std::min<uint32_t>(count, static_cast<uint32_t>(all_queries.size()));
-  std::vector<const void*> queries;
-  queries.reserve(count);
-  for (uint32_t i = 0; i < count; ++i) {
-    queries.push_back(all_queries[ids[i]]);
-  }
-
-  std::cout << "Using " << queries.size() << " sampled in-index queries"
-            << " element_size=" << meta.element_size()
-            << " doc_cnt=" << doc_cnt
-            << " valid_vectors=" << all_queries.size() << "\n";
-  return queries;
-}
-
-bool OpenBenchmarkVectorIndexer(const std::string& benchmark_root,
-                                VectorColumnIndexer::Ptr* out,
-                                std::string* error) {
-  auto version_manager_result = VersionManager::Recovery(benchmark_root);
-  if (!version_manager_result.has_value()) {
-    *error = version_manager_result.error().message();
-    return false;
-  }
-
-  auto version_manager = version_manager_result.value();
-  const Version version = version_manager->get_current_version();
-  const auto& schema = version.schema();
-  const auto vector_fields = schema.vector_fields();
-  if (vector_fields.empty()) {
-    *error = "No vector field found in benchmark root";
-    return false;
-  }
-
-  const FieldSchema* field = vector_fields.front().get();
-  if (field == nullptr) {
-    *error = "Invalid vector field in benchmark root";
-    return false;
-  }
-
-  std::string index_file_path;
-  uint32_t best_doc_count = 0;
-  bool found_quantized = false;
-
-  for (const auto& segment_meta : version.persisted_segment_metas()) {
-    for (const auto& block : segment_meta->persisted_blocks()) {
-      const bool match_field = block.contain_column(field->name());
-      const bool is_quantized = block.type() == BlockType::VECTOR_INDEX_QUANTIZE;
-      const bool is_plain = block.type() == BlockType::VECTOR_INDEX;
-      if (!match_field || (!is_quantized && !is_plain)) {
-        continue;
-      }
-
-      if (is_quantized && (!found_quantized || block.doc_count() > best_doc_count)) {
-        index_file_path = FileHelper::MakeQuantizeVectorIndexPath(
-            benchmark_root, field->name(), segment_meta->id(), block.id());
-        best_doc_count = block.doc_count();
-        found_quantized = true;
-        continue;
-      }
-
-      if (!found_quantized && block.doc_count() > best_doc_count) {
-        index_file_path = FileHelper::MakeVectorIndexPath(
-            benchmark_root, field->name(), segment_meta->id(), block.id());
-        best_doc_count = block.doc_count();
-      }
-    }
-  }
-
-  if (index_file_path.empty()) {
-    *error = "No HNSW vector index file found under benchmark root";
-    return false;
-  }
-
-  auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path, *field);
-  auto status = indexer->Open({true, false, true});
-  if (!status.ok()) {
-    *error = status.message();
-    return false;
-  }
-
-  std::cout << "Opened benchmark root: " << benchmark_root << "\n"
-            << "Selected vector index file: " << index_file_path << "\n"
-            << "Vector field: " << field->name() << "\n";
-  *out = std::move(indexer);
-  return true;
-}
-
-template <typename Fn>
-BenchStats RunBench(const std::string& name, HnswContext* ctx,
-                    const std::vector<const void*>& queries, uint32_t warmup,
-                    uint32_t iterations, Fn&& fn) {
-  for (uint32_t i = 0; i < warmup; ++i) {
-    const void* query = queries[i % queries.size()];
-    ctx->clear();
-    ctx->resize_results(1);
-    ctx->reset_query(query);
-    fn();
-  }
-
-  uint64_t total_ns = 0;
-  uint64_t total_cmps = 0;
-  double checksum = 0.0;
-
-  for (uint32_t i = 0; i < iterations; ++i) {
-    const void* query = queries[i % queries.size()];
-    ctx->clear();
-    ctx->resize_results(1);
-    ctx->reset_query(query);
-    const auto start = RdtscTimer::Now();
-    fn();
-    const auto end = RdtscTimer::Now();
-    total_ns += RdtscTimer::ElapsedNs(start, end);
-    total_cmps += ctx->get_pairwise_dist_num();
-    if (!ctx->topk_heap().empty()) {
-      checksum += ctx->topk_heap()[0].second;
-    }
-  }
-
-  BenchStats stats;
-  stats.avg_ns = static_cast<double>(total_ns) / iterations;
-  stats.avg_cmps = static_cast<double>(total_cmps) / iterations;
-  stats.ns_per_cmp =
-      total_cmps == 0 ? 0.0 : static_cast<double>(total_ns) / total_cmps;
-  stats.checksum = checksum;
-
-  std::cout << std::fixed << std::setprecision(3)
-            << name << ": avg_ns=" << stats.avg_ns
-            << " avg_cmps=" << stats.avg_cmps
-            << " ns_per_cmp=" << stats.ns_per_cmp
-            << " checksum=" << stats.checksum << "\n";
-  return stats;
-}
-
-}  // namespace
-
-}  // namespace core
-}  // namespace zvec
-
-int main(int argc, char** argv) {
-  using namespace zvec::core;
-  using namespace zvec;
-
-  Options opts;
-  if (!ParseArgs(argc, argv, &opts)) {
-    return 1;
-  }
-  opts.index_path = NormalizeIndexPath(opts.index_path);
-
-  VectorColumnIndexer::Ptr indexer;
-  std::string open_error;
-  if (!OpenBenchmarkVectorIndexer(opts.index_path, &indexer, &open_error)) {
-    std::cerr << "Failed to open benchmark index: " << open_error << "\n";
-    return 2;
-  }
-  auto index = indexer->core_index();
-  if (!index) {
-    std::cerr << "Opened benchmark indexer without underlying core index\n";
-    return 3;
-  }
-
-  auto streamer_base = index->index_searcher();
-  auto* streamer = dynamic_cast<HnswStreamer*>(streamer_base.get());
-  if (streamer == nullptr) {
-    std::cerr << "Failed to get HnswStreamer from opened index\n";
-    return 4;
-  }
-
-  auto context = streamer_base->create_context();
-  auto* ctx = dynamic_cast<HnswContext*>(context.get());
-  if (ctx == nullptr) {
-    std::cerr << "Failed to create HNSW context\n";
-    return 5;
-  }
-  zvec::ailego::Params query_params;
-  query_params.set(PARAM_HNSW_STREAMER_EF, opts.ef_search);
-  if (ctx->update(query_params) != 0) {
-    std::cerr << "Failed to update HNSW query params\n";
-    return 6;
-  }
-  ctx->set_topk(opts.topk);
-
-  auto provider = streamer_base->create_provider();
-  if (!provider) {
-    std::cerr << "Failed to create HNSW provider\n";
-    return 7;
-  }
-
-  auto queries = SampleIndexVectors(provider, streamer_base->meta(),
-                                    opts.query_count, opts.seed);
-  if (queries.empty()) {
-    std::cerr << "No queries sampled from index\n";
-    return 8;
-  }
-
-  HnswAlgorithm::SearchHooks empty_hooks;
-
-  omega::SearchContext omega_search_ctx(
-      nullptr, nullptr, opts.target_recall, static_cast<int>(opts.topk),
-      opts.window_size);
-  OmegaHookState omega_hook_state;
-  omega_hook_state.search_ctx = &omega_search_ctx;
-  omega_hook_state.enable_early_stopping = false;
-  ResetOmegaHookState(&omega_hook_state);
-  HnswAlgorithm::SearchHooks omega_hooks;
-  omega_hooks.user_data = &omega_hook_state;
-  omega_hooks.on_level0_entry = OnOmegaLevel0Entry;
-  omega_hooks.on_hop = OnOmegaHop;
-  omega_hooks.on_visit_candidate = OnOmegaVisitCandidate;
-
-  if (opts.mode == "all" || opts.mode == "fast") {
-    RunBench("alg_fast_search", ctx, queries, opts.warmup, opts.iterations,
-             [&]() { return streamer->FastSearch(ctx); });
-  }
-
-  if (opts.mode == "all" || opts.mode == "empty") {
-    RunBench("alg_fast_search_with_empty_hooks", ctx, queries, opts.warmup,
-             opts.iterations, [&]() {
-               bool stopped_early = false;
-               return streamer->FastSearchWithHooks(ctx, &empty_hooks,
-                                                    &stopped_early);
-             });
-  }
-
-  if (opts.mode == "all" || opts.mode == "omega") {
-    RunBench("alg_fast_search_with_omega_hooks_only", ctx, queries, opts.warmup,
-             opts.iterations, [&]() {
-               omega_search_ctx.Reset();
-               ResetOmegaHookState(&omega_hook_state);
-               bool stopped_early = false;
-               int ret = streamer->FastSearchWithHooks(ctx, &omega_hooks,
-                                                       &stopped_early);
-               MaybeFlushOmegaPendingCandidates(&omega_hook_state);
-               return ret;
-             });
-  }
-
-  return 0;
-}

From 1c53fd57f87263386fed50401e6cea325cfaf0cd Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 20:48:27 +0800
Subject: [PATCH 074/126] cleanup: refactor omega training around sessions

---
 src/core/algorithm/omega/omega_streamer.cc    | 526 +++++++++++-------
 src/core/interface/indexes/omega_index.cc     |  35 +-
 .../indexes/omega_training_session.cc         | 113 ++++
 .../indexes/omega_training_session.h          |  56 ++
 .../vector_column/vector_column_indexer.cc    | 134 +----
 .../vector_column/vector_column_indexer.h     |  67 +--
 src/db/training/training_data_collector.cc    | 324 ++++-------
 src/include/zvec/core/interface/index.h       |  17 +-
 .../zvec/core/interface/training_capable.h    |  69 +--
 .../zvec/core/interface/training_session.h    |  60 ++
 10 files changed, 682 insertions(+), 719 deletions(-)
 create mode 100644 src/core/interface/indexes/omega_training_session.cc
 create mode 100644 src/core/interface/indexes/omega_training_session.h
 create mode 100644 src/include/zvec/core/interface/training_session.h

diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 11b3f174d..5c0fda3a0 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -77,6 +77,285 @@ uint64_t OmegaProfilingElapsedNs(uint64_t start, uint64_t end) {
   return RdtscTimer::ElapsedNs(start, end);
 }
 
+struct OmegaHookSetup {
+  OmegaHookState state;
+  HnswAlgorithm::SearchHooks hooks;
+};
+
+struct OmegaFinalStats {
+  int hops{0};
+  int cmps{0};
+  int collected_gt{0};
+  float predicted_recall_avg{0.0f};
+  float predicted_recall_at_target{0.0f};
+  int omega_early_stop_hit{0};
+  unsigned long long should_stop_calls{0};
+  unsigned long long prediction_calls{0};
+  unsigned long long should_stop_time_ns{0};
+  unsigned long long prediction_eval_time_ns{0};
+  unsigned long long sorted_window_time_ns{0};
+  unsigned long long average_recall_eval_time_ns{0};
+  unsigned long long prediction_feature_prep_time_ns{0};
+  unsigned long long report_visit_candidate_time_ns{0};
+  unsigned long long report_hop_time_ns{0};
+  unsigned long long update_top_candidates_time_ns{0};
+  unsigned long long push_traversal_window_time_ns{0};
+  unsigned long long collected_gt_advance_count{0};
+  unsigned long long should_stop_calls_with_advance{0};
+  unsigned long long max_prediction_calls_per_should_stop{0};
+};
+
+struct OmegaTimingSummary {
+  uint64_t query_total_time_ns{0};
+  uint64_t query_reset_time_ns{0};
+  uint64_t query_search_time_ns{0};
+  uint64_t query_setup_time_ns{0};
+  uint64_t hook_total_time_ns{0};
+  uint64_t hook_body_time_ns{0};
+  uint64_t pure_search_time_ns{0};
+  uint64_t hook_dispatch_time_ns{0};
+};
+
+OmegaHookSetup CreateOmegaHookSetup(omega::SearchContext* omega_search_ctx,
+                                    bool enable_early_stopping,
+                                    bool collect_control_timing,
+                                    bool per_cmp_reporting,
+                                    uint64_t* hook_body_time_ns,
+                                    uint64_t* hook_total_time_ns) {
+  OmegaHookSetup setup;
+  setup.state.search_ctx = omega_search_ctx;
+  setup.state.enable_early_stopping = enable_early_stopping;
+  setup.state.collect_control_timing = collect_control_timing;
+  setup.state.hook_body_time_ns = hook_body_time_ns;
+  setup.state.per_cmp_reporting = per_cmp_reporting;
+  ResetOmegaHookState(&setup.state);
+
+  setup.hooks.user_data = &setup.state;
+  setup.hooks.collect_timing = collect_control_timing;
+  setup.hooks.now_ns = &OmegaProfilingNowNs;
+  setup.hooks.elapsed_ns = &OmegaProfilingElapsedNs;
+  setup.hooks.hook_total_time_ns = hook_total_time_ns;
+  setup.hooks.on_level0_entry = OnOmegaLevel0Entry;
+  setup.hooks.on_hop = OnOmegaHop;
+  setup.hooks.on_visit_candidate = OnOmegaVisitCandidate;
+  return setup;
+}
+
+OmegaFinalStats CollectOmegaFinalStats(omega::SearchContext* omega_search_ctx) {
+  OmegaFinalStats stats;
+  omega_search_ctx->GetStats(&stats.hops, &stats.cmps, &stats.collected_gt);
+  stats.predicted_recall_avg = omega_search_ctx->GetLastPredictedRecallAvg();
+  stats.predicted_recall_at_target =
+      omega_search_ctx->GetLastPredictedRecallAtTarget();
+  stats.omega_early_stop_hit = omega_search_ctx->EarlyStopHit() ? 1 : 0;
+  stats.should_stop_calls = omega_search_ctx->GetShouldStopCalls();
+  stats.prediction_calls = omega_search_ctx->GetPredictionCalls();
+  stats.should_stop_time_ns = omega_search_ctx->GetShouldStopTimeNs();
+  stats.prediction_eval_time_ns = omega_search_ctx->GetPredictionEvalTimeNs();
+  stats.sorted_window_time_ns = omega_search_ctx->GetSortedWindowTimeNs();
+  stats.average_recall_eval_time_ns =
+      omega_search_ctx->GetAverageRecallEvalTimeNs();
+  stats.prediction_feature_prep_time_ns =
+      omega_search_ctx->GetPredictionFeaturePrepTimeNs();
+  stats.report_visit_candidate_time_ns =
+      omega_search_ctx->GetReportVisitCandidateTimeNs();
+  stats.report_hop_time_ns = omega_search_ctx->GetReportHopTimeNs();
+  stats.update_top_candidates_time_ns =
+      omega_search_ctx->GetUpdateTopCandidatesTimeNs();
+  stats.push_traversal_window_time_ns =
+      omega_search_ctx->GetPushTraversalWindowTimeNs();
+  stats.collected_gt_advance_count =
+      omega_search_ctx->GetCollectedGtAdvanceCount();
+  stats.should_stop_calls_with_advance =
+      omega_search_ctx->GetShouldStopCallsWithAdvance();
+  stats.max_prediction_calls_per_should_stop =
+      omega_search_ctx->GetMaxPredictionCallsPerShouldStop();
+  return stats;
+}
+
+void EnableOmegaTrainingIfNeeded(OmegaSearchHandle omega_search, int query_id,
+                                 bool training_mode_enabled,
+                                 const std::vector<std::vector<uint64_t>>& training_ground_truth,
+                                 int training_k_train) {
+  if (!training_mode_enabled) {
+    return;
+  }
+
+  std::vector<int> gt_for_query;
+  if (query_id >= 0 &&
+      static_cast<size_t>(query_id) < training_ground_truth.size()) {
+    const auto& gt = training_ground_truth[query_id];
+    gt_for_query.reserve(gt.size());
+    for (uint64_t node_id : gt) {
+      gt_for_query.push_back(static_cast<int>(node_id));
+    }
+  }
+
+  omega_search_enable_training(omega_search, query_id, gt_for_query.data(),
+                               gt_for_query.size(), training_k_train);
+  LOG_DEBUG("Training mode enabled for query_id=%d with %zu GT nodes",
+            query_id, gt_for_query.size());
+}
+
+void CollectOmegaTrainingOutputs(OmegaSearchHandle omega_search,
+                                 OmegaContext* omega_ctx, int query_id) {
+  if (omega_ctx == nullptr) {
+    return;
+  }
+
+  size_t record_count = omega_search_get_training_records_count(omega_search);
+  if (record_count > 0) {
+    const void* records_ptr = omega_search_get_training_records(omega_search);
+    const auto* records_vec =
+        static_cast<const std::vector<omega::TrainingRecord>*>(records_ptr);
+
+    for (size_t i = 0; i < record_count; ++i) {
+      const auto& omega_record = (*records_vec)[i];
+      core_interface::TrainingRecord record;
+      record.query_id = omega_record.query_id;
+      record.hops_visited = omega_record.hops;
+      record.cmps_visited = omega_record.cmps;
+      record.dist_1st = omega_record.dist_1st;
+      record.dist_start = omega_record.dist_start;
+
+      if (omega_record.traversal_window_stats.size() == 7) {
+        std::copy(omega_record.traversal_window_stats.begin(),
+                  omega_record.traversal_window_stats.end(),
+                  record.traversal_window_stats.begin());
+      }
+
+      record.label = omega_record.label;
+      omega_ctx->add_training_record(std::move(record));
+    }
+
+    LOG_DEBUG("Collected %zu training records for query_id=%d", record_count,
+              query_id);
+  }
+
+  size_t gt_cmps_count = omega_search_get_gt_cmps_count(omega_search);
+  if (gt_cmps_count == 0) {
+    return;
+  }
+
+  const int* gt_cmps_ptr = omega_search_get_gt_cmps(omega_search);
+  int total_cmps = omega_search_get_total_cmps(omega_search);
+  if (gt_cmps_ptr == nullptr) {
+    return;
+  }
+
+  std::vector<int> gt_cmps_vec(gt_cmps_ptr, gt_cmps_ptr + gt_cmps_count);
+  for (auto& v : gt_cmps_vec) {
+    if (v < 0) {
+      v = total_cmps;
+    }
+  }
+  omega_ctx->set_gt_cmps(gt_cmps_vec, total_cmps);
+}
+
+void LogOmegaRuntimeStatsOnce(std::atomic<bool>* debug_stats_logged,
+                              bool model_loaded, float target_recall,
+                              size_t scan_cmps, uint64_t pairwise_dist_cnt,
+                              const OmegaFinalStats& final_stats,
+                              bool early_stop_hit) {
+  bool expected = false;
+  if (!debug_stats_logged->compare_exchange_strong(expected, true)) {
+    return;
+  }
+
+  LOG_INFO("OMEGA runtime stats: model_loaded=%d target_recall=%.4f "
+           "scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d "
+           "collected_gt=%d predicted_recall_avg=%.4f "
+           "predicted_recall_at_target=%.4f early_stop_hit=%d "
+           "should_stop_calls=%llu prediction_calls=%llu "
+           "advance_calls=%llu collected_gt_advance=%llu "
+           "max_pred_per_stop=%llu should_stop_ms=%.3f "
+           "prediction_eval_ms=%.3f",
+           model_loaded ? 1 : 0, target_recall, scan_cmps,
+           static_cast<unsigned long long>(pairwise_dist_cnt), final_stats.cmps,
+           final_stats.collected_gt, final_stats.predicted_recall_avg,
+           final_stats.predicted_recall_at_target, early_stop_hit ? 1 : 0,
+           final_stats.should_stop_calls, final_stats.prediction_calls,
+           final_stats.should_stop_calls_with_advance,
+           final_stats.collected_gt_advance_count,
+           final_stats.max_prediction_calls_per_should_stop,
+           static_cast<double>(final_stats.should_stop_time_ns) / 1e6,
+           static_cast<double>(final_stats.prediction_eval_time_ns) / 1e6);
+}
+
+void LogOmegaQueryStats(uint64_t query_seq, bool model_loaded,
+                        float target_recall, size_t scan_cmps,
+                        uint64_t pairwise_dist_cnt,
+                        const OmegaFinalStats& final_stats,
+                        const OmegaTimingSummary& timing,
+                        bool collect_control_timing, bool early_stop_hit) {
+  if (collect_control_timing) {
+    LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
+             "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
+             "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
+             "early_stop_hit=%d should_stop_calls=%llu "
+             "prediction_calls=%llu advance_calls=%llu "
+             "collected_gt_advance=%llu max_pred_per_stop=%llu "
+             "should_stop_ms=%.3f prediction_eval_ms=%.3f "
+             "setup_ms=%.3f reset_query_ms=%.3f "
+             "core_search_ms=%.3f hook_total_ms=%.3f hook_body_ms=%.3f "
+             "hook_dispatch_ms=%.3f pure_search_ms=%.3f "
+             "report_visit_candidate_ms=%.3f "
+             "report_hop_ms=%.3f update_top_candidates_ms=%.3f "
+             "push_traversal_window_ms=%.3f total_ms=%.3f",
+             static_cast<unsigned long long>(query_seq), model_loaded ? 1 : 0,
+             target_recall, scan_cmps,
+             static_cast<unsigned long long>(pairwise_dist_cnt),
+             final_stats.cmps, final_stats.collected_gt,
+             final_stats.predicted_recall_avg,
+             final_stats.predicted_recall_at_target, early_stop_hit ? 1 : 0,
+             final_stats.should_stop_calls, final_stats.prediction_calls,
+             final_stats.should_stop_calls_with_advance,
+             final_stats.collected_gt_advance_count,
+             final_stats.max_prediction_calls_per_should_stop,
+             static_cast<double>(final_stats.should_stop_time_ns) / 1e6,
+             static_cast<double>(final_stats.prediction_eval_time_ns) / 1e6,
+             static_cast<double>(timing.query_setup_time_ns) / 1e6,
+             static_cast<double>(timing.query_reset_time_ns) / 1e6,
+             static_cast<double>(timing.query_search_time_ns) / 1e6,
+             static_cast<double>(timing.hook_total_time_ns) / 1e6,
+             static_cast<double>(timing.hook_body_time_ns) / 1e6,
+             static_cast<double>(timing.hook_dispatch_time_ns) / 1e6,
+             static_cast<double>(timing.pure_search_time_ns) / 1e6,
+             static_cast<double>(final_stats.report_visit_candidate_time_ns) / 1e6,
+             static_cast<double>(final_stats.report_hop_time_ns) / 1e6,
+             static_cast<double>(final_stats.update_top_candidates_time_ns) / 1e6,
+             static_cast<double>(final_stats.push_traversal_window_time_ns) / 1e6,
+             static_cast<double>(timing.query_total_time_ns) / 1e6);
+    return;
+  }
+
+  LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
+           "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
+           "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
+           "early_stop_hit=%d should_stop_calls=%llu "
+           "prediction_calls=%llu advance_calls=%llu "
+           "collected_gt_advance=%llu max_pred_per_stop=%llu "
+           "should_stop_ms=%.3f prediction_eval_ms=%.3f "
+           "setup_ms=%.3f reset_query_ms=%.3f "
+           "core_search_ms=%.3f search_with_hooks_ms=%.3f total_ms=%.3f",
+           static_cast<unsigned long long>(query_seq), model_loaded ? 1 : 0,
+           target_recall, scan_cmps,
+           static_cast<unsigned long long>(pairwise_dist_cnt), final_stats.cmps,
+           final_stats.collected_gt, final_stats.predicted_recall_avg,
+           final_stats.predicted_recall_at_target, early_stop_hit ? 1 : 0,
+           final_stats.should_stop_calls, final_stats.prediction_calls,
+           final_stats.should_stop_calls_with_advance,
+           final_stats.collected_gt_advance_count,
+           final_stats.max_prediction_calls_per_should_stop,
+           static_cast<double>(final_stats.should_stop_time_ns) / 1e6,
+           static_cast<double>(final_stats.prediction_eval_time_ns) / 1e6,
+           static_cast<double>(timing.query_setup_time_ns) / 1e6,
+           static_cast<double>(timing.query_reset_time_ns) / 1e6,
+           static_cast<double>(timing.query_search_time_ns) / 1e6,
+           static_cast<double>(timing.query_search_time_ns) / 1e6,
+           static_cast<double>(timing.query_total_time_ns) / 1e6);
+}
+
 }  // namespace
 
 bool OmegaStreamer::LoadModel(const std::string& model_dir) {
@@ -232,24 +511,10 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     return IndexError_Runtime;
   }
 
-  // Training state is attached to the OMEGA search context before the shared
-  // HNSW loop starts so label collection sees the full query trajectory.
-  if (training_mode_enabled_) {
-    std::vector<int> gt_for_query;
-    if (query_id >= 0 &&
-        static_cast<size_t>(query_id) < training_ground_truth_.size()) {
-      const auto& gt = training_ground_truth_[query_id];
-      gt_for_query.reserve(gt.size());
-      for (uint64_t node_id : gt) {
-        gt_for_query.push_back(static_cast<int>(node_id));
-      }
-    }
-    omega_search_enable_training(omega_search, query_id,
-                                 gt_for_query.data(), gt_for_query.size(),
-                                 training_k_train_);
-    LOG_DEBUG("Training mode enabled for query_id=%d with %zu GT nodes",
-              query_id, gt_for_query.size());
-  }
+  // Training state is attached before the shared HNSW loop starts so label
+  // collection sees the full query trajectory.
+  EnableOmegaTrainingIfNeeded(omega_search, query_id, training_mode_enabled_,
+                              training_ground_truth_, training_k_train_);
 
   // Rebind the context if it originated from a different searcher/streamer
   // instance so the HNSW state matches this streamer before search begins.
@@ -270,52 +535,22 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   auto query_reset_start = RdtscTimer::Now();
   hnsw_ctx->reset_query(query);
   auto query_reset_end = RdtscTimer::Now();
-  OmegaHookState hook_state;
-  hook_state.search_ctx = omega_search_ctx;
-  hook_state.enable_early_stopping = enable_early_stopping;
-  hook_state.collect_control_timing = collect_control_timing;
-  hook_state.hook_body_time_ns = &hook_body_time_ns;
-  hook_state.per_cmp_reporting = training_mode_enabled_;
-  ResetOmegaHookState(&hook_state);
-  HnswAlgorithm::SearchHooks hooks;
-  hooks.user_data = &hook_state;
-  hooks.collect_timing = collect_control_timing;
-  hooks.now_ns = &OmegaProfilingNowNs;
-  hooks.elapsed_ns = &OmegaProfilingElapsedNs;
-  hooks.hook_total_time_ns = &hook_total_time_ns;
-  hooks.on_level0_entry = OnOmegaLevel0Entry;
-  hooks.on_hop = OnOmegaHop;
-  hooks.on_visit_candidate = OnOmegaVisitCandidate;
+  OmegaHookSetup hook_setup =
+      CreateOmegaHookSetup(omega_search_ctx, enable_early_stopping,
+                           collect_control_timing, training_mode_enabled_,
+                           &hook_body_time_ns, &hook_total_time_ns);
   bool early_stop_hit = false;
   auto query_search_start = RdtscTimer::Now();
-  int ret = alg_->search_with_hooks(hnsw_ctx, &hooks, &early_stop_hit);
+  int ret =
+      alg_->search_with_hooks(hnsw_ctx, &hook_setup.hooks, &early_stop_hit);
   if (ret != 0) {
     omega_search_destroy(omega_search);
     LOG_ERROR("OMEGA search failed");
     return ret;
   }
-  MaybeFlushOmegaPendingCandidates(&hook_state);
+  MaybeFlushOmegaPendingCandidates(&hook_setup.state);
   auto query_search_end = RdtscTimer::Now();
 
-  // Get final statistics
-  int hops, cmps, collected_gt;
-  float predicted_recall_avg = 0.0f;
-  float predicted_recall_at_target = 0.0f;
-  int omega_early_stop_hit = 0;
-  unsigned long long should_stop_calls = 0;
-  unsigned long long prediction_calls = 0;
-  unsigned long long should_stop_time_ns = 0;
-  unsigned long long prediction_eval_time_ns = 0;
-  unsigned long long sorted_window_time_ns = 0;
-  unsigned long long average_recall_eval_time_ns = 0;
-  unsigned long long prediction_feature_prep_time_ns = 0;
-  unsigned long long report_visit_candidate_time_ns = 0;
-  unsigned long long report_hop_time_ns = 0;
-  unsigned long long update_top_candidates_time_ns = 0;
-  unsigned long long push_traversal_window_time_ns = 0;
-  unsigned long long collected_gt_advance_count = 0;
-  unsigned long long should_stop_calls_with_advance = 0;
-  unsigned long long max_prediction_calls_per_should_stop = 0;
   uint64_t query_total_time_ns =
       RdtscTimer::ElapsedNs(query_total_start, RdtscTimer::Now());
   uint64_t query_reset_time_ns =
@@ -328,134 +563,40 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
         query_total_time_ns - query_reset_time_ns - query_search_time_ns;
   }
   uint64_t query_seq = query_stats_sequence_.fetch_add(1);
-  omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
-  predicted_recall_avg = omega_search_ctx->GetLastPredictedRecallAvg();
-  predicted_recall_at_target =
-      omega_search_ctx->GetLastPredictedRecallAtTarget();
-  omega_early_stop_hit = omega_search_ctx->EarlyStopHit() ? 1 : 0;
-  should_stop_calls = omega_search_ctx->GetShouldStopCalls();
-  prediction_calls = omega_search_ctx->GetPredictionCalls();
-  should_stop_time_ns = omega_search_ctx->GetShouldStopTimeNs();
-  prediction_eval_time_ns = omega_search_ctx->GetPredictionEvalTimeNs();
-  sorted_window_time_ns = omega_search_ctx->GetSortedWindowTimeNs();
-  average_recall_eval_time_ns = omega_search_ctx->GetAverageRecallEvalTimeNs();
-  prediction_feature_prep_time_ns =
-      omega_search_ctx->GetPredictionFeaturePrepTimeNs();
-  report_visit_candidate_time_ns =
-      omega_search_ctx->GetReportVisitCandidateTimeNs();
-  report_hop_time_ns = omega_search_ctx->GetReportHopTimeNs();
-  update_top_candidates_time_ns =
-      omega_search_ctx->GetUpdateTopCandidatesTimeNs();
-  push_traversal_window_time_ns =
-      omega_search_ctx->GetPushTraversalWindowTimeNs();
-  collected_gt_advance_count = omega_search_ctx->GetCollectedGtAdvanceCount();
-  should_stop_calls_with_advance =
-      omega_search_ctx->GetShouldStopCallsWithAdvance();
-  max_prediction_calls_per_should_stop =
-      omega_search_ctx->GetMaxPredictionCallsPerShouldStop();
+  const OmegaFinalStats final_stats = CollectOmegaFinalStats(omega_search_ctx);
   LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu, early_stop=%d",
-            cmps, hops, hnsw_ctx->topk_heap().size(), enable_early_stopping);
+            final_stats.cmps, final_stats.hops, hnsw_ctx->topk_heap().size(),
+            enable_early_stopping);
   if (enable_early_stopping) {
+    const bool model_loaded = IsModelLoaded();
     size_t scan_cmps = hnsw_ctx->get_scan_num();
     uint64_t pairwise_dist_cnt = hnsw_ctx->get_pairwise_dist_num();
-    uint64_t pure_search_time_ns = 0;
-    uint64_t hook_dispatch_time_ns = 0;
+    OmegaTimingSummary timing;
+    timing.query_total_time_ns = query_total_time_ns;
+    timing.query_reset_time_ns = query_reset_time_ns;
+    timing.query_search_time_ns = query_search_time_ns;
+    timing.query_setup_time_ns = query_setup_time_ns;
+    timing.hook_total_time_ns = hook_total_time_ns;
+    timing.hook_body_time_ns = hook_body_time_ns;
     if (collect_control_timing) {
-      pure_search_time_ns =
+      timing.pure_search_time_ns =
           query_search_time_ns > hook_total_time_ns
               ? (query_search_time_ns - hook_total_time_ns)
               : 0;
-      hook_dispatch_time_ns =
+      timing.hook_dispatch_time_ns =
           hook_total_time_ns > hook_body_time_ns
               ? (hook_total_time_ns - hook_body_time_ns)
               : 0;
     }
-    bool expected = false;
-    if (debug_stats_logged_.compare_exchange_strong(expected, true)) {
-      LOG_INFO("OMEGA runtime stats: model_loaded=%d target_recall=%.4f "
-               "scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d "
-               "collected_gt=%d predicted_recall_avg=%.4f "
-               "predicted_recall_at_target=%.4f early_stop_hit=%d "
-               "should_stop_calls=%llu prediction_calls=%llu "
-               "advance_calls=%llu collected_gt_advance=%llu "
-               "max_pred_per_stop=%llu should_stop_ms=%.3f "
-               "prediction_eval_ms=%.3f",
-               IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
-               static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
-               collected_gt,
-               predicted_recall_avg, predicted_recall_at_target,
-               (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
-               should_stop_calls, prediction_calls,
-               should_stop_calls_with_advance, collected_gt_advance_count,
-               max_prediction_calls_per_should_stop,
-               static_cast<double>(should_stop_time_ns) / 1e6,
-               static_cast<double>(prediction_eval_time_ns) / 1e6);
-    }
+    const bool omega_early_stop_hit =
+        early_stop_hit || final_stats.omega_early_stop_hit != 0;
+    LogOmegaRuntimeStatsOnce(&debug_stats_logged_, model_loaded, target_recall,
+                             scan_cmps, pairwise_dist_cnt, final_stats,
+                             omega_early_stop_hit);
     if (ShouldLogQueryStats(query_seq)) {
-      if (collect_control_timing) {
-        LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
-                 "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
-                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
-                 "early_stop_hit=%d should_stop_calls=%llu "
-                 "prediction_calls=%llu advance_calls=%llu "
-                 "collected_gt_advance=%llu max_pred_per_stop=%llu "
-                 "should_stop_ms=%.3f prediction_eval_ms=%.3f "
-                 "setup_ms=%.3f reset_query_ms=%.3f "
-                 "core_search_ms=%.3f hook_total_ms=%.3f hook_body_ms=%.3f "
-                 "hook_dispatch_ms=%.3f pure_search_ms=%.3f "
-                 "report_visit_candidate_ms=%.3f "
-                 "report_hop_ms=%.3f update_top_candidates_ms=%.3f "
-                 "push_traversal_window_ms=%.3f total_ms=%.3f",
-                 static_cast<unsigned long long>(query_seq),
-                 IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
-                 static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
-                 collected_gt,
-                 predicted_recall_avg, predicted_recall_at_target,
-                 (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
-                 should_stop_calls, prediction_calls,
-                 should_stop_calls_with_advance, collected_gt_advance_count,
-                 max_prediction_calls_per_should_stop,
-                 static_cast<double>(should_stop_time_ns) / 1e6,
-                 static_cast<double>(prediction_eval_time_ns) / 1e6,
-                 static_cast<double>(query_setup_time_ns) / 1e6,
-                 static_cast<double>(query_reset_time_ns) / 1e6,
-                 static_cast<double>(query_search_time_ns) / 1e6,
-                 static_cast<double>(hook_total_time_ns) / 1e6,
-                 static_cast<double>(hook_body_time_ns) / 1e6,
-                 static_cast<double>(hook_dispatch_time_ns) / 1e6,
-                 static_cast<double>(pure_search_time_ns) / 1e6,
-                 static_cast<double>(report_visit_candidate_time_ns) / 1e6,
-                 static_cast<double>(report_hop_time_ns) / 1e6,
-                 static_cast<double>(update_top_candidates_time_ns) / 1e6,
-                 static_cast<double>(push_traversal_window_time_ns) / 1e6,
-                 static_cast<double>(query_total_time_ns) / 1e6);
-      } else {
-        LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
-                 "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
-                 "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
-                 "early_stop_hit=%d should_stop_calls=%llu "
-                 "prediction_calls=%llu advance_calls=%llu "
-                 "collected_gt_advance=%llu max_pred_per_stop=%llu "
-                 "should_stop_ms=%.3f prediction_eval_ms=%.3f "
-                 "setup_ms=%.3f reset_query_ms=%.3f "
-                 "core_search_ms=%.3f search_with_hooks_ms=%.3f total_ms=%.3f",
-                 static_cast<unsigned long long>(query_seq),
-                 IsModelLoaded() ? 1 : 0, target_recall, scan_cmps,
-                 static_cast<unsigned long long>(pairwise_dist_cnt), cmps,
-                 collected_gt,
-                 predicted_recall_avg, predicted_recall_at_target,
-                 (early_stop_hit || omega_early_stop_hit != 0) ? 1 : 0,
-                 should_stop_calls, prediction_calls,
-                 should_stop_calls_with_advance, collected_gt_advance_count,
-                 max_prediction_calls_per_should_stop,
-                 static_cast<double>(should_stop_time_ns) / 1e6,
-                 static_cast<double>(prediction_eval_time_ns) / 1e6,
-                 static_cast<double>(query_setup_time_ns) / 1e6,
-                 static_cast<double>(query_reset_time_ns) / 1e6,
-                 static_cast<double>(query_search_time_ns) / 1e6,
-                 static_cast<double>(query_search_time_ns) / 1e6,
-                 static_cast<double>(query_total_time_ns) / 1e6);
-      }
+      LogOmegaQueryStats(query_seq, model_loaded, target_recall, scan_cmps,
+                         pairwise_dist_cnt, final_stats, timing,
+                         collect_control_timing, omega_early_stop_hit);
     }
   }
 
@@ -463,51 +604,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   // search-core timer and happens after logging.
   hnsw_ctx->topk_to_result();
 
-  // Collect training records (only in training mode)
   if (training_mode_enabled_) {
-    size_t record_count = omega_search_get_training_records_count(omega_search);
-
-    if (record_count > 0 && omega_ctx != nullptr) {
-      const void* records_ptr = omega_search_get_training_records(omega_search);
-      const auto* records_vec = static_cast<const std::vector<omega::TrainingRecord>*>(records_ptr);
-
-      for (size_t i = 0; i < record_count; ++i) {
-        const auto& omega_record = (*records_vec)[i];
-        core_interface::TrainingRecord record;
-        record.query_id = omega_record.query_id;
-        record.hops_visited = omega_record.hops;
-        record.cmps_visited = omega_record.cmps;
-        record.dist_1st = omega_record.dist_1st;
-        record.dist_start = omega_record.dist_start;
-
-        if (omega_record.traversal_window_stats.size() == 7) {
-          std::copy(omega_record.traversal_window_stats.begin(),
-                    omega_record.traversal_window_stats.end(),
-                    record.traversal_window_stats.begin());
-        }
-
-        record.label = omega_record.label;
-        omega_ctx->add_training_record(std::move(record));
-      }
-
-      LOG_DEBUG("Collected %zu training records for query_id=%d", record_count, query_id);
-    }
-
-    // Collect gt_cmps data
-    if (omega_ctx != nullptr) {
-      size_t gt_cmps_count = omega_search_get_gt_cmps_count(omega_search);
-      if (gt_cmps_count > 0) {
-        const int* gt_cmps_ptr = omega_search_get_gt_cmps(omega_search);
-        int total_cmps = omega_search_get_total_cmps(omega_search);
-        if (gt_cmps_ptr != nullptr) {
-          std::vector<int> gt_cmps_vec(gt_cmps_ptr, gt_cmps_ptr + gt_cmps_count);
-          for (auto& v : gt_cmps_vec) {
-            if (v < 0) v = total_cmps;
-          }
-          omega_ctx->set_gt_cmps(gt_cmps_vec, total_cmps);
-        }
-      }
-    }
+    CollectOmegaTrainingOutputs(omega_search, omega_ctx, query_id);
   }
 
   omega_search_destroy(omega_search);
diff --git a/src/core/interface/indexes/omega_index.cc b/src/core/interface/indexes/omega_index.cc
index f48942687..e78e46640 100644
--- a/src/core/interface/indexes/omega_index.cc
+++ b/src/core/interface/indexes/omega_index.cc
@@ -16,6 +16,7 @@
 #include "algorithm/omega/omega_streamer.h"
 #include "algorithm/omega/omega_params.h"
 #include "algorithm/hnsw/hnsw_params.h"
+#include "omega_training_session.h"
 #include <zvec/core/framework/index_factory.h>
 
 namespace zvec::core_interface {
@@ -58,41 +59,13 @@ int OmegaIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
 }
 
 
-zvec::Status OmegaIndex::EnableTrainingMode(bool enable) {
+ITrainingSession::Pointer OmegaIndex::CreateTrainingSession() {
   if (auto* omega_streamer =
           streamer_ ? dynamic_cast<core::OmegaStreamer*>(streamer_.get())
                     : nullptr) {
-    omega_streamer->EnableTrainingMode(enable);
-  }
-  return zvec::Status::OK();
-}
-
-void OmegaIndex::SetCurrentQueryId(int query_id) {
-  if (auto* omega_streamer =
-          streamer_ ? dynamic_cast<core::OmegaStreamer*>(streamer_.get())
-                    : nullptr) {
-    omega_streamer->SetCurrentQueryId(query_id);
-  }
-}
-
-std::vector<TrainingRecord> OmegaIndex::GetTrainingRecords() const {
-  // Training records are returned per search through OmegaContext /
-  // SearchResult.training_records_. OmegaIndex itself does not keep a shared
-  // training-record buffer.
-  return {};
-}
-
-void OmegaIndex::ClearTrainingRecords() {
-  // No-op by design: OmegaIndex does not own per-search training records.
-}
-
-void OmegaIndex::SetTrainingGroundTruth(
-    const std::vector<std::vector<uint64_t>>& ground_truth, int k_train) {
-  if (auto* omega_streamer =
-          streamer_ ? dynamic_cast<core::OmegaStreamer*>(streamer_.get())
-                    : nullptr) {
-    omega_streamer->SetTrainingGroundTruth(ground_truth, k_train);
+    return std::make_shared<OmegaTrainingSession>(omega_streamer);
   }
+  return nullptr;
 }
 
 int OmegaIndex::_prepare_for_search(
diff --git a/src/core/interface/indexes/omega_training_session.cc b/src/core/interface/indexes/omega_training_session.cc
new file mode 100644
index 000000000..f7976e1c5
--- /dev/null
+++ b/src/core/interface/indexes/omega_training_session.cc
@@ -0,0 +1,113 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "omega_training_session.h"
+
+#include "algorithm/omega/omega_streamer.h"
+
+namespace zvec::core_interface {
+
+zvec::Status OmegaTrainingSession::Start(const TrainingSessionConfig& config) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (streamer_ == nullptr) {
+    return zvec::Status::InvalidArgument("Omega streamer is not available");
+  }
+
+  ResetArtifactsLocked();
+  topk_ = config.topk;
+  num_queries_ = config.ground_truth.size();
+  streamer_->SetTrainingGroundTruth(config.ground_truth, config.k_train);
+  streamer_->EnableTrainingMode(true);
+  active_ = true;
+  return zvec::Status::OK();
+}
+
+void OmegaTrainingSession::BeginQuery(int query_id) {
+  if (streamer_ != nullptr) {
+    streamer_->SetCurrentQueryId(query_id);
+  }
+}
+
+void OmegaTrainingSession::CollectQueryArtifacts(QueryTrainingArtifacts&& artifacts) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (!artifacts.records.empty()) {
+    records_.insert(records_.end(),
+                    std::make_move_iterator(artifacts.records.begin()),
+                    std::make_move_iterator(artifacts.records.end()));
+  }
+  if (!artifacts.gt_cmps_per_rank.empty() && artifacts.training_query_id >= 0) {
+    gt_cmps_map_[artifacts.training_query_id] = {
+        std::move(artifacts.gt_cmps_per_rank), artifacts.total_cmps};
+  }
+}
+
+TrainingArtifacts OmegaTrainingSession::ConsumeArtifacts() {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  TrainingArtifacts artifacts;
+  artifacts.records = std::move(records_);
+
+  if (!gt_cmps_map_.empty()) {
+    size_t num_queries = num_queries_;
+    if (num_queries == 0) {
+      num_queries = static_cast<size_t>(gt_cmps_map_.rbegin()->first) + 1;
+    }
+    size_t topk = topk_;
+    if (topk == 0) {
+      for (const auto& entry : gt_cmps_map_) {
+        if (!entry.second.first.empty()) {
+          topk = entry.second.first.size();
+          break;
+        }
+      }
+    }
+
+    artifacts.gt_cmps_data.num_queries = num_queries;
+    artifacts.gt_cmps_data.topk = topk;
+    artifacts.gt_cmps_data.gt_cmps.resize(num_queries);
+    artifacts.gt_cmps_data.total_cmps.resize(num_queries, 0);
+    for (size_t q = 0; q < num_queries; ++q) {
+      artifacts.gt_cmps_data.gt_cmps[q].resize(topk, 0);
+    }
+    for (const auto& entry : gt_cmps_map_) {
+      size_t query_id = static_cast<size_t>(entry.first);
+      if (query_id >= num_queries) {
+        continue;
+      }
+      const auto& [gt_cmps_per_rank, total_cmps] = entry.second;
+      artifacts.gt_cmps_data.total_cmps[query_id] = total_cmps;
+      for (size_t rank = 0; rank < gt_cmps_per_rank.size() && rank < topk; ++rank) {
+        artifacts.gt_cmps_data.gt_cmps[query_id][rank] = gt_cmps_per_rank[rank];
+      }
+    }
+  }
+
+  gt_cmps_map_.clear();
+  return artifacts;
+}
+
+void OmegaTrainingSession::Finish() {
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (active_ && streamer_ != nullptr) {
+    streamer_->EnableTrainingMode(false);
+  }
+  active_ = false;
+}
+
+void OmegaTrainingSession::ResetArtifactsLocked() {
+  records_.clear();
+  gt_cmps_map_.clear();
+}
+
+}  // namespace zvec::core_interface
diff --git a/src/core/interface/indexes/omega_training_session.h b/src/core/interface/indexes/omega_training_session.h
new file mode 100644
index 000000000..c4041488e
--- /dev/null
+++ b/src/core/interface/indexes/omega_training_session.h
@@ -0,0 +1,56 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <map>
+#include <mutex>
+#include <vector>
+#include <zvec/core/interface/training_session.h>
+
+namespace zvec {
+namespace core {
+class OmegaStreamer;
+}  // namespace core
+namespace core_interface {
+
+class OmegaTrainingSession : public ITrainingSession {
+ public:
+  explicit OmegaTrainingSession(core::OmegaStreamer* streamer)
+      : streamer_(streamer) {}
+
+  zvec::Status Start(const TrainingSessionConfig& config) override;
+
+  void BeginQuery(int query_id) override;
+
+  void CollectQueryArtifacts(QueryTrainingArtifacts&& artifacts) override;
+
+  TrainingArtifacts ConsumeArtifacts() override;
+
+  void Finish() override;
+
+ private:
+  void ResetArtifactsLocked();
+
+  core::OmegaStreamer* streamer_{nullptr};
+  std::mutex mutex_;
+  size_t topk_{0};
+  size_t num_queries_{0};
+  bool active_{false};
+  std::vector<TrainingRecord> records_;
+  std::map<int, std::pair<std::vector<int>, int>> gt_cmps_map_;
+};
+
+}  // namespace core_interface
+}  // namespace zvec
diff --git a/src/db/index/column/vector_column/vector_column_indexer.cc b/src/db/index/column/vector_column/vector_column_indexer.cc
index 467027472..f0cac5e83 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.cc
+++ b/src/db/index/column/vector_column/vector_column_indexer.cc
@@ -200,30 +200,26 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
         Status::InternalError("Failed to search vector"));
   }
 
-  if (training_mode_enabled_) {
+  core_interface::ITrainingSession::Pointer training_session;
+  {
+    std::lock_guard<std::mutex> lock(training_mutex_);
+    training_session = training_session_;
+  }
+
+  if (training_session != nullptr) {
     LOG_INFO(
         "VectorColumnIndexer training search: query_id=%d records=%zu gt_cmps=%zu total_cmps=%d",
         search_result.training_query_id_, search_result.training_records_.size(),
         search_result.gt_cmps_per_rank_.size(), search_result.total_cmps_);
   }
 
-  // Collect training records from search result (stored in context during search)
-  // This is thread-safe because each search has its own context
-  if (training_mode_enabled_ && !search_result.training_records_.empty()) {
-    std::lock_guard<std::mutex> lock(training_mutex_);
-    collected_records_.insert(collected_records_.end(),
-                             std::make_move_iterator(search_result.training_records_.begin()),
-                             std::make_move_iterator(search_result.training_records_.end()));
-  }
-
-  // Collect gt_cmps data from search result (for OMEGA training)
-  if (training_mode_enabled_ && !search_result.gt_cmps_per_rank_.empty() &&
-      search_result.training_query_id_ >= 0) {
-    std::lock_guard<std::mutex> lock(training_mutex_);
-    gt_cmps_map_[search_result.training_query_id_] = {
-        std::move(search_result.gt_cmps_per_rank_),
-        search_result.total_cmps_
-    };
+  if (training_session != nullptr) {
+    core_interface::QueryTrainingArtifacts artifacts;
+    artifacts.records = std::move(search_result.training_records_);
+    artifacts.gt_cmps_per_rank = std::move(search_result.gt_cmps_per_rank_);
+    artifacts.total_cmps = search_result.total_cmps_;
+    artifacts.training_query_id = search_result.training_query_id_;
+    training_session->CollectQueryArtifacts(std::move(artifacts));
   }
 
   auto result = std::make_shared<VectorIndexResults>(
@@ -233,110 +229,32 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
   return result;
 }
 
-// Training mode method implementations
-Status VectorColumnIndexer::EnableTrainingMode(bool enable) {
-  std::lock_guard<std::mutex> lock(training_mutex_);
-  training_mode_enabled_ = enable;
-
-  // Propagate to underlying index if it exists and supports training
+core_interface::ITrainingCapable* VectorColumnIndexer::GetTrainingCapability() const {
   if (index != nullptr) {
-    if (auto* training_capable = index->GetTrainingCapability()) {
-      return training_capable->EnableTrainingMode(enable);
-    }
+    return index->GetTrainingCapability();
   }
-
-  return Status::OK();
+  return nullptr;
 }
 
-void VectorColumnIndexer::SetCurrentQueryId(int query_id) {
-  // Propagate to underlying index if it exists and supports training
+core_interface::ITrainingSession::Pointer
+VectorColumnIndexer::CreateTrainingSession() const {
   if (index != nullptr) {
     if (auto* training_capable = index->GetTrainingCapability()) {
-      training_capable->SetCurrentQueryId(query_id);
+      return training_capable->CreateTrainingSession();
     }
   }
+  return nullptr;
 }
 
-std::vector<core_interface::TrainingRecord> VectorColumnIndexer::GetTrainingRecords() const {
-  std::lock_guard<std::mutex> lock(training_mutex_);
-  // All records are already collected in collected_records_ during Search()
-  // The underlying index records are cleared after each Search to avoid duplication
-  return collected_records_;
-}
-
-void VectorColumnIndexer::ClearTrainingRecords() {
+void VectorColumnIndexer::SetTrainingSession(
+    const core_interface::ITrainingSession::Pointer& session) {
   std::lock_guard<std::mutex> lock(training_mutex_);
-  collected_records_.clear();
-  gt_cmps_map_.clear();
-
-  // Propagate to underlying index if it exists and supports training
-  if (index != nullptr) {
-    if (auto* training_capable = index->GetTrainingCapability()) {
-      training_capable->ClearTrainingRecords();
-    }
-  }
-}
-
-void VectorColumnIndexer::SetTrainingGroundTruth(
-    const std::vector<std::vector<uint64_t>>& ground_truth, int k_train) {
-  // Propagate to underlying index if it exists and supports training
-  if (index != nullptr) {
-    if (auto* training_capable = index->GetTrainingCapability()) {
-      training_capable->SetTrainingGroundTruth(ground_truth, k_train);
-    }
-  }
+  training_session_ = session;
 }
 
-core_interface::GtCmpsData VectorColumnIndexer::GetGtCmpsData() const {
+void VectorColumnIndexer::ClearTrainingSession() {
   std::lock_guard<std::mutex> lock(training_mutex_);
-
-  core_interface::GtCmpsData result;
-  if (gt_cmps_map_.empty()) {
-    return result;
-  }
-
-  // Find max query_id to determine array size
-  int max_query_id = gt_cmps_map_.rbegin()->first;
-  result.num_queries = max_query_id + 1;
-
-  // Determine topk from first non-empty entry
-  for (const auto& entry : gt_cmps_map_) {
-    if (!entry.second.first.empty()) {
-      result.topk = entry.second.first.size();
-      break;
-    }
-  }
-
-  // Initialize arrays
-  result.gt_cmps.resize(result.num_queries);
-  result.total_cmps.resize(result.num_queries, 0);
-
-  for (size_t q = 0; q < result.num_queries; ++q) {
-    result.gt_cmps[q].resize(result.topk, 0);
-  }
-
-  // Fill in collected data
-  for (const auto& entry : gt_cmps_map_) {
-    int query_id = entry.first;
-    const auto& gt_cmps_vec = entry.second.first;
-    int total = entry.second.second;
-
-    if (query_id >= 0 && query_id < static_cast<int>(result.num_queries)) {
-      result.total_cmps[query_id] = total;
-      for (size_t r = 0; r < gt_cmps_vec.size() && r < result.topk; ++r) {
-        result.gt_cmps[query_id][r] = gt_cmps_vec[r];
-      }
-    }
-  }
-
-  return result;
-}
-
-core_interface::ITrainingCapable* VectorColumnIndexer::GetTrainingCapability() const {
-  if (index != nullptr) {
-    return index->GetTrainingCapability();
-  }
-  return nullptr;
+  training_session_.reset();
 }
 
 }  // namespace zvec
diff --git a/src/db/index/column/vector_column/vector_column_indexer.h b/src/db/index/column/vector_column/vector_column_indexer.h
index 123c00d03..3c48a987a 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.h
+++ b/src/db/index/column/vector_column/vector_column_indexer.h
@@ -23,6 +23,7 @@
 #include <zvec/core/interface/index.h>
 #include <zvec/core/interface/index_param.h>
 #include <zvec/core/interface/training.h>
+#include <zvec/core/interface/training_session.h>
 #include <zvec/db/schema.h>
 #include <zvec/db/status.h>
 #include "db/common/constants.h"
@@ -100,65 +101,11 @@ class VectorColumnIndexer {
    */
   core_interface::ITrainingCapable* GetTrainingCapability() const;
 
-  /**
-   * @brief Enable or disable training mode for collecting training features.
-   *
-   * Propagates the training mode setting to the underlying index.
-   * When enabled, searches will collect training features.
-   *
-   * @param enable True to enable training mode, false to disable
-   * @return Status indicating success or failure
-   */
-  Status EnableTrainingMode(bool enable);
-
-  /**
-   * @brief Set the query ID for the next search operation.
-   *
-   * Must be called before Search() when training mode is enabled.
-   * The query_id will be propagated to the underlying index.
-   *
-   * @param query_id Unique identifier for the query
-   */
-  void SetCurrentQueryId(int query_id);
-
-  /**
-   * @brief Get all collected training records.
-   *
-   * Returns a copy of all training records collected from the
-   * underlying index since training mode was enabled.
-   *
-   * @return Vector of TrainingRecord structures
-   */
-  std::vector<core_interface::TrainingRecord> GetTrainingRecords() const;
+  core_interface::ITrainingSession::Pointer CreateTrainingSession() const;
 
-  /**
-   * @brief Clear all collected training records.
-   *
-   * Clears training records from both this layer and the underlying index.
-   */
-  void ClearTrainingRecords();
+  void SetTrainingSession(const core_interface::ITrainingSession::Pointer& session);
 
-  /**
-   * @brief Set ground truth for training queries.
-   *
-   * Ground truth is used for real-time label computation during training.
-   * Labels are computed as: label=1 iff top k_train GT nodes are in current topk.
-   *
-   * @param ground_truth 2D vector: ground_truth[query_id][rank] = node_id
-   * @param k_train Number of GT nodes to check for label (typically 1)
-   */
-  void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
-                               int k_train = 1);
-
-  /**
-   * @brief Get collected gt_cmps data for all queries.
-   *
-   * Returns the gt_cmps data collected during training searches.
-   * The data is indexed by query_id.
-   *
-   * @return GtCmpsData structure with per-query gt_cmps values
-   */
-  core_interface::GtCmpsData GetGtCmpsData() const;
+  void ClearTrainingSession();
 
  public:
   std::string index_file_path() const {
@@ -211,12 +158,8 @@ class VectorColumnIndexer {
   bool is_sparse_{false};  // TODO: eliminate the dynamic flag and make it
                            // static/template/seperate class
 
-  // Training mode support
-  bool training_mode_enabled_{false};
   mutable std::mutex training_mutex_;
-  mutable std::vector<core_interface::TrainingRecord> collected_records_;
-  // GT cmps data: gt_cmps_map_[query_id] = {gt_cmps_per_rank, total_cmps}
-  mutable std::map<int, std::pair<std::vector<int>, int>> gt_cmps_map_;
+  core_interface::ITrainingSession::Pointer training_session_;
 };
 
 
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 9a116b052..2896cb017 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -90,6 +90,78 @@ class ScopedTimer {
   std::string name_;
   std::chrono::high_resolution_clock::time_point start_;
 };
+
+std::vector<VectorColumnIndexer::Ptr> ResolveTrainingIndexers(
+    const Segment::Ptr& segment, const std::string& field_name,
+    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+  if (!provided_indexers.empty()) {
+    return provided_indexers;
+  }
+  return segment->get_vector_indexer(field_name);
+}
+
+std::vector<core_interface::ITrainingSession::Pointer> StartTrainingSessions(
+    const std::vector<VectorColumnIndexer::Ptr>& indexers,
+    const std::vector<std::vector<uint64_t>>& ground_truth, size_t topk,
+    int k_train) {
+  std::vector<core_interface::ITrainingSession::Pointer> sessions;
+  sessions.reserve(indexers.size());
+
+  core_interface::TrainingSessionConfig config;
+  config.ground_truth = ground_truth;
+  config.topk = topk;
+  config.k_train = k_train;
+
+  for (auto& indexer : indexers) {
+    auto session = indexer->CreateTrainingSession();
+    if (session == nullptr) {
+      LOG_WARN("Indexer does not expose a training session");
+      sessions.emplace_back();
+      continue;
+    }
+    auto status = session->Start(config);
+    if (!status.ok()) {
+      LOG_WARN("Failed to start training session on indexer: %s",
+               status.message().c_str());
+      sessions.emplace_back();
+      continue;
+    }
+    indexer->SetTrainingSession(session);
+    sessions.push_back(std::move(session));
+  }
+
+  return sessions;
+}
+
+core_interface::TrainingArtifacts ConsumeTrainingArtifacts(
+    const std::vector<core_interface::ITrainingSession::Pointer>& sessions) {
+  core_interface::TrainingArtifacts merged;
+  for (const auto& session : sessions) {
+    if (session == nullptr) {
+      continue;
+    }
+    auto artifacts = session->ConsumeArtifacts();
+    merged.records.insert(merged.records.end(),
+                          std::make_move_iterator(artifacts.records.begin()),
+                          std::make_move_iterator(artifacts.records.end()));
+    if (merged.gt_cmps_data.gt_cmps.empty() &&
+        !artifacts.gt_cmps_data.gt_cmps.empty()) {
+      merged.gt_cmps_data = std::move(artifacts.gt_cmps_data);
+    }
+  }
+  return merged;
+}
+
+void FinishTrainingSessions(
+    const std::vector<VectorColumnIndexer::Ptr>& indexers,
+    const std::vector<core_interface::ITrainingSession::Pointer>& sessions) {
+  for (size_t i = 0; i < indexers.size(); ++i) {
+    if (i < sessions.size() && sessions[i] != nullptr) {
+      sessions[i]->Finish();
+    }
+    indexers[i]->ClearTrainingSession();
+  }
+}
 }  // namespace
 
 void TrainingDataCollector::ResetTimingStats() {
@@ -115,12 +187,8 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
     const TrainingDataCollectorOptions& options,
     const std::vector<uint64_t>& query_doc_ids,
     const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
-  std::vector<VectorColumnIndexer::Ptr> indexers;
-  if (!provided_indexers.empty()) {
-    indexers = provided_indexers;
-  } else {
-    indexers = segment->get_vector_indexer(field_name);
-  }
+  std::vector<VectorColumnIndexer::Ptr> indexers =
+      ResolveTrainingIndexers(segment, field_name, provided_indexers);
 
   if (indexers.empty()) {
     return tl::make_unexpected(
@@ -152,18 +220,13 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
         "Failed to obtain ground truth"));
   }
 
-  LOG_INFO("Setting ground truth (%zu queries) and enabling training mode on %zu indexers",
+  LOG_INFO("Starting training sessions for %zu queries on %zu indexers",
            ground_truth.size(), indexers.size());
+  std::vector<core_interface::ITrainingSession::Pointer> training_sessions;
   {
     ScopedTimer timer("Step3: EnableTrainingMode");
-    for (auto& indexer : indexers) {
-      indexer->SetTrainingGroundTruth(ground_truth, options.k_train);
-      auto status = indexer->EnableTrainingMode(true);
-      if (!status.ok()) {
-        LOG_WARN("Failed to enable training mode on indexer: %s",
-                 status.message().c_str());
-      }
-    }
+    training_sessions = StartTrainingSessions(indexers, ground_truth,
+                                              options.topk, options.k_train);
   }
 
   LOG_INFO("Performing training searches with ef=%d", options.ef_training);
@@ -211,8 +274,9 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
         // training_query_id through the search context reliably. In the
         // single-threaded calibration path, fall back to the existing global
         // query-id setter to preserve correct labels without races.
-        if (actual_threads == 1) {
-          indexers[0]->SetCurrentQueryId(static_cast<int>(query_idx));
+        if (actual_threads == 1 && !training_sessions.empty() &&
+            training_sessions[0] != nullptr) {
+          training_sessions[0]->BeginQuery(static_cast<int>(query_idx));
         }
 
         auto search_result = indexers[0]->Search(vector_data, query_params);
@@ -273,16 +337,16 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
   }
 
   LOG_INFO("Collecting training records from indexers");
-  std::vector<core_interface::TrainingRecord> all_records;
+  core_interface::TrainingArtifacts training_artifacts;
   {
     ScopedTimer timer("Step5: CollectTrainingRecords");
-    for (auto& indexer : indexers) {
-      auto records = indexer->GetTrainingRecords();
-      LOG_INFO("Collected %zu records from indexer", records.size());
-      all_records.insert(all_records.end(), records.begin(), records.end());
-    }
+    training_artifacts = ConsumeTrainingArtifacts(training_sessions);
+    LOG_INFO("Collected %zu records from training sessions",
+             training_artifacts.records.size());
   }
 
+  auto& all_records = training_artifacts.records;
+
   if (all_records.empty()) {
     LOG_WARN("No training records collected from any indexer");
   }
@@ -300,28 +364,22 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
            all_records.size(), positive_count, negative_count);
 
   LOG_INFO("Collecting gt_cmps data from indexers");
-  core_interface::GtCmpsData gt_cmps_data;
+  core_interface::GtCmpsData gt_cmps_data = std::move(training_artifacts.gt_cmps_data);
   {
     ScopedTimer timer("Step6: GetGtCmpsData");
-    if (!indexers.empty()) {
-      gt_cmps_data = indexers[0]->GetGtCmpsData();
-      if (gt_cmps_data.gt_cmps.empty()) {
-        LOG_WARN("No actual gt_cmps data collected, falling back to approximation");
-        gt_cmps_data =
-            TrainingDataCollector::ComputeGtCmps(all_records, ground_truth, options.topk);
-      } else {
-        LOG_INFO("Got actual gt_cmps data for %zu queries, topk=%zu",
-                 gt_cmps_data.num_queries, gt_cmps_data.topk);
-      }
+    if (gt_cmps_data.gt_cmps.empty()) {
+      LOG_WARN("No actual gt_cmps data collected, falling back to approximation");
+      gt_cmps_data =
+          TrainingDataCollector::ComputeGtCmps(all_records, ground_truth, options.topk);
+    } else {
+      LOG_INFO("Got actual gt_cmps data for %zu queries, topk=%zu",
+               gt_cmps_data.num_queries, gt_cmps_data.topk);
     }
   }
 
   {
     ScopedTimer timer("Step7: DisableTrainingMode");
-    for (auto& indexer : indexers) {
-      indexer->EnableTrainingMode(false);
-      indexer->ClearTrainingRecords();
-    }
+    FinishTrainingSessions(indexers, training_sessions);
   }
 
   TrainingDataCollectorResult result;
@@ -339,192 +397,12 @@ TrainingDataCollector::CollectTrainingData(
     const std::string& field_name,
     const TrainingDataCollectorOptions& options,
     const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
-  // Step 1: Get indexers first (needed for metric type)
-  std::vector<VectorColumnIndexer::Ptr> indexers;
-  if (!provided_indexers.empty()) {
-    indexers = provided_indexers;
-  } else {
-    indexers = segment->get_vector_indexer(field_name);
-  }
-
-  if (indexers.empty()) {
-    return tl::make_unexpected(
-        Status::InternalError("No vector indexers found for field: " + field_name));
-  }
-
-  // Get metric type from first indexer
-  MetricType metric_type = indexers[0]->metric_type();
-
-  // Step 2: Generate training queries using held-out approach
-  LOG_INFO("Generating %zu held-out training queries for field '%s'",
-           options.num_training_queries, field_name.c_str());
-
-  auto sampled = TrainingQueryGenerator::GenerateHeldOutQueries(
-      segment, field_name, options.num_training_queries, options.seed);
-  auto training_queries = std::move(sampled.vectors);
-  auto query_doc_ids = std::move(sampled.doc_ids);
-
-  if (training_queries.empty()) {
-    return tl::make_unexpected(
-        Status::InternalError("Failed to generate training queries"));
-  }
-
-  // Step 3: Compute ground truth (brute force or HNSW search, excluding self-matches)
-  LOG_INFO("Computing ground truth (topk=%zu, ef_groundtruth=%d, excluding self)",
-           options.topk, options.ef_groundtruth);
-
-  auto ground_truth = ComputeGroundTruth(
-      segment, field_name, training_queries, options.topk, options.num_threads,
-      query_doc_ids, options.ef_groundtruth, metric_type, indexers);
-
-  if (ground_truth.empty()) {
-    return tl::make_unexpected(
-        Status::InternalError("Failed to compute ground truth"));
-  }
-
-  LOG_INFO("Found %zu indexers for field '%s' (will enable training on all, but only training-capable ones will collect)",
-           indexers.size(), field_name.c_str());
-
-  // Step 4: Set ground truth and enable training mode on all indexers
-  LOG_INFO("Setting ground truth (%zu queries) and enabling training mode on %zu indexers",
-           ground_truth.size(), indexers.size());
-  for (auto& indexer : indexers) {
-    // Set ground truth for real-time label computation
-    indexer->SetTrainingGroundTruth(ground_truth, options.k_train);
-
-    auto status = indexer->EnableTrainingMode(true);
-    if (!status.ok()) {
-      LOG_WARN("Failed to enable training mode on indexer: %s",
-               status.message().c_str());
-    }
-  }
-
-  // Step 5: Perform searches with large ef and collect training records
-  LOG_INFO("Performing training searches with ef=%d (parallel)", options.ef_training);
-
-  std::vector<std::vector<uint64_t>> search_results;
-
-  // Determine thread count
-  size_t actual_threads = options.num_threads;
-  if (actual_threads == 0) {
-    actual_threads = std::thread::hardware_concurrency();
-  }
-  actual_threads = std::min(actual_threads, training_queries.size());
-
-  // Pre-allocate search_results for thread-safe access
-  search_results.resize(training_queries.size());
-
-  std::atomic<size_t> completed_searches{0};
-  std::mutex progress_mutex;
-  auto search_start = std::chrono::high_resolution_clock::now();
-
-  // Worker function for a range of queries
-  auto worker = [&](size_t start_idx, size_t end_idx) {
-    for (size_t query_idx = start_idx; query_idx < end_idx; ++query_idx) {
-      const auto& query_vector = training_queries[query_idx];
-
-      // Prepare query parameters
-      vector_column_params::VectorData vector_data;
-      vector_data.vector = vector_column_params::DenseVector{
-          .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))
-      };
-
-      vector_column_params::QueryParams query_params;
-      query_params.topk = options.topk;
-      query_params.fetch_vector = false;
-      query_params.filter = segment->get_filter().get();
-
-      // Create OmegaQueryParams with training_query_id for parallel search
-      auto omega_params = std::make_shared<OmegaQueryParams>();
-      omega_params->set_ef(options.ef_training);
-      omega_params->set_training_query_id(static_cast<int>(query_idx));
-      query_params.query_params = omega_params;
-
-      if (indexers.size() != 1) {
-        if (query_idx == start_idx) {
-          LOG_WARN("Expected 1 indexer but found %zu, using first one only", indexers.size());
-        }
-      }
-
-      auto search_result = indexers[0]->Search(vector_data, query_params);
-      if (!search_result.has_value()) {
-        LOG_WARN("Search failed for query %zu: %s", query_idx,
-                 search_result.error().message().c_str());
-        ++completed_searches;
-        continue;
-      }
-
-      // Extract result doc IDs
-      auto& results = search_result.value();
-      std::vector<uint64_t> result_ids;
-      result_ids.reserve(results->count());
-      auto iter = results->create_iterator();
-      while (iter->valid()) {
-        result_ids.push_back(iter->doc_id());
-        iter->next();
-      }
-
-      search_results[query_idx] = std::move(result_ids);
-      ++completed_searches;
-    }
-  };
-
-  // Launch threads
-  std::vector<std::thread> threads;
-  size_t queries_per_thread = (training_queries.size() + actual_threads - 1) / actual_threads;
-
-  for (size_t t = 0; t < actual_threads; ++t) {
-    size_t start_idx = t * queries_per_thread;
-    size_t end_idx = std::min(start_idx + queries_per_thread, training_queries.size());
-    if (start_idx < end_idx) {
-      threads.emplace_back(worker, start_idx, end_idx);
-    }
-  }
-
-  // Wait for all threads
-  for (auto& thread : threads) {
-    thread.join();
+  auto result = CollectTrainingDataWithGtCmps(segment, field_name, options,
+                                              provided_indexers);
+  if (!result.has_value()) {
+    return tl::make_unexpected(result.error());
   }
-
-  auto search_end = std::chrono::high_resolution_clock::now();
-  auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(search_end - search_start).count();
-  LOG_INFO("Training searches completed in %zu ms (%zu threads)",
-           total_ms, actual_threads);
-
-  // Step 6: Collect training records from all indexers
-  LOG_INFO("Collecting training records from indexers");
-
-  std::vector<core_interface::TrainingRecord> all_records;
-  for (auto& indexer : indexers) {
-    auto records = indexer->GetTrainingRecords();
-    LOG_INFO("Collected %zu records from indexer", records.size());
-    all_records.insert(all_records.end(), records.begin(), records.end());
-  }
-
-  if (all_records.empty()) {
-    LOG_WARN("No training records collected from any indexer");
-  }
-
-  // Labels are now computed in real-time during search (no FillLabels needed)
-  // Count positive/negative labels for verification
-  size_t positive_count = 0, negative_count = 0;
-  for (const auto& record : all_records) {
-    if (record.label > 0) positive_count++;
-    else negative_count++;
-  }
-  LOG_INFO("Collected %zu records: %zu positive, %zu negative",
-           all_records.size(), positive_count, negative_count);
-
-  // Step 7: Disable training mode and clear records
-  for (auto& indexer : indexers) {
-    indexer->EnableTrainingMode(false);
-    indexer->ClearTrainingRecords();
-  }
-
-  LOG_INFO("Successfully collected %zu training records with labels",
-           all_records.size());
-
-  return all_records;
+  return result->records;
 }
 
 std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
diff --git a/src/include/zvec/core/interface/index.h b/src/include/zvec/core/interface/index.h
index 63e80cc2c..a79062cc4 100644
--- a/src/include/zvec/core/interface/index.h
+++ b/src/include/zvec/core/interface/index.h
@@ -29,6 +29,7 @@
 #include <zvec/core/framework/index_reducer.h>
 #include <zvec/core/framework/index_reformer.h>
 #include <zvec/core/interface/training.h>
+#include <zvec/core/interface/training_session.h>
 #include <zvec/core/framework/index_searcher.h>
 #include <zvec/core/framework/index_storage.h>
 #include <zvec/core/interface/index_param.h>
@@ -150,14 +151,6 @@ class Index {
    *
    * @return Pointer to ITrainingCapable interface if supported, nullptr otherwise
    *
-   * @example
-   * @code
-   *   if (auto* training = index->GetTrainingCapability()) {
-   *       training->EnableTrainingMode(true);
-   *       // ... perform searches ...
-   *       auto records = training->GetTrainingRecords();
-   *   }
-   * @endcode
    */
   virtual class ITrainingCapable* GetTrainingCapability() {
     return nullptr;  // Default: capability not supported
@@ -343,13 +336,7 @@ class OmegaIndex : public HNSWIndex, public ITrainingCapable {
     return this;
   }
 
-  // Implement ITrainingCapable interface
-  zvec::Status EnableTrainingMode(bool enable) override;
-  void SetCurrentQueryId(int query_id) override;
-  std::vector<TrainingRecord> GetTrainingRecords() const override;
-  void ClearTrainingRecords() override;
-  void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
-                               int k_train = 1) override;
+  ITrainingSession::Pointer CreateTrainingSession() override;
 
  protected:
   virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;
diff --git a/src/include/zvec/core/interface/training_capable.h b/src/include/zvec/core/interface/training_capable.h
index a527dd78c..158cc5930 100644
--- a/src/include/zvec/core/interface/training_capable.h
+++ b/src/include/zvec/core/interface/training_capable.h
@@ -14,85 +14,22 @@
 
 #pragma once
 
-#include <vector>
-#include <zvec/core/interface/training.h>
-#include <zvec/db/status.h>
+#include <zvec/core/interface/training_session.h>
 
 namespace zvec {
 namespace core_interface {
 
 /**
- * @brief Training capability interface for indexes that support OMEGA training mode.
+ * @brief Training capability interface for indexes that support post-build training.
  *
  * This interface follows the Capability Pattern, allowing indexes to optionally
  * provide training functionality without polluting the base Index class.
- *
- * Example usage:
- * @code
- *   if (auto* training = index->GetTrainingCapability()) {
- *       training->EnableTrainingMode(true);
- *       // ... perform searches ...
- *       auto records = training->GetTrainingRecords();
- *   }
- * @endcode
  */
 class ITrainingCapable {
  public:
   virtual ~ITrainingCapable() = default;
 
-  /**
-   * @brief Enable or disable training mode for collecting training features.
-   *
-   * When training mode is enabled:
-   * - Early stopping is disabled (complete HNSW search)
-   * - Training features are collected for each visited node
-   * - query_id must be set via SetCurrentQueryId() before each search
-   *
-   * @param enable True to enable training mode, false to disable
-   * @return Status indicating success or failure
-   */
-  virtual zvec::Status EnableTrainingMode(bool enable) = 0;
-
-  /**
-   * @brief Set the query ID for the next search operation.
-   *
-   * Must be called before search when training mode is enabled.
-   * The query_id will be included in all training records collected
-   * during that search.
-   *
-   * @param query_id Unique identifier for the query
-   */
-  virtual void SetCurrentQueryId(int query_id) = 0;
-
-  /**
-   * @brief Get all collected training records.
-   *
-   * Returns a copy of all training records collected since training mode
-   * was enabled or since the last ClearTrainingRecords() call.
-   *
-   * @return Vector of TrainingRecord structures
-   */
-  virtual std::vector<TrainingRecord> GetTrainingRecords() const = 0;
-
-  /**
-   * @brief Clear all collected training records.
-   *
-   * Removes all training records from internal storage. Useful for
-   * starting a fresh training data collection session.
-   */
-  virtual void ClearTrainingRecords() = 0;
-
-  /**
-   * @brief Set ground truth for training queries.
-   *
-   * Ground truth is used for real-time label computation during training.
-   * Labels are computed as: label=1 iff top k_train GT nodes are in current topk.
-   *
-   * @param ground_truth 2D vector: ground_truth[query_id][rank] = node_id
-   * @param k_train Number of GT nodes to check for label (typically 1)
-   */
-  virtual void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
-                                       int k_train = 1) = 0;
+  virtual ITrainingSession::Pointer CreateTrainingSession() = 0;
 };
 
 }  // namespace core_interface
diff --git a/src/include/zvec/core/interface/training_session.h b/src/include/zvec/core/interface/training_session.h
new file mode 100644
index 000000000..102c1f5c2
--- /dev/null
+++ b/src/include/zvec/core/interface/training_session.h
@@ -0,0 +1,60 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+#include <zvec/core/interface/training.h>
+#include <zvec/db/status.h>
+
+namespace zvec::core_interface {
+
+struct TrainingSessionConfig {
+  std::vector<std::vector<uint64_t>> ground_truth;
+  size_t topk = 0;
+  int k_train = 1;
+};
+
+struct QueryTrainingArtifacts {
+  std::vector<TrainingRecord> records;
+  std::vector<int> gt_cmps_per_rank;
+  int total_cmps = 0;
+  int training_query_id = -1;
+};
+
+struct TrainingArtifacts {
+  std::vector<TrainingRecord> records;
+  GtCmpsData gt_cmps_data;
+};
+
+class ITrainingSession {
+ public:
+  using Pointer = std::shared_ptr<ITrainingSession>;
+
+  virtual ~ITrainingSession() = default;
+
+  virtual zvec::Status Start(const TrainingSessionConfig& config) = 0;
+
+  virtual void BeginQuery(int query_id) = 0;
+
+  virtual void CollectQueryArtifacts(QueryTrainingArtifacts&& artifacts) = 0;
+
+  virtual TrainingArtifacts ConsumeArtifacts() = 0;
+
+  virtual void Finish() = 0;
+};
+
+}  // namespace zvec::core_interface

From 9d4db08768ba0c56aeaac4362f9b0648ea3ab37d Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 21:16:46 +0800
Subject: [PATCH 075/126] cleanup: trim stale omega training paths

---
 src/core/algorithm/omega/omega_searcher.cc    |  16 ++-
 src/core/algorithm/omega/omega_searcher.h     |  14 +--
 src/core/interface/indexes/hnsw_index.cc      |   6 -
 src/core/interface/indexes/omega_index.cc     |   8 --
 .../column/vector_column/engine_helper.hpp    |  10 +-
 src/db/index/segment/segment.cc               |  23 +---
 src/db/training/omega_model_trainer.cc        |   8 --
 src/db/training/omega_model_trainer.h         |  12 --
 src/db/training/omega_training_coordinator.cc |  10 +-
 src/db/training/omega_training_coordinator.h  |  17 ---
 src/db/training/query_generator.cc            |  75 ------------
 src/db/training/query_generator.h             |  47 --------
 src/db/training/training_data_collector.cc    | 107 +-----------------
 src/db/training/training_data_collector.h     |  18 ---
 14 files changed, 25 insertions(+), 346 deletions(-)

diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 82c52b6f2..c5922e708 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -31,7 +31,6 @@ OmegaSearcher::OmegaSearcher(void)
       omega_model_(nullptr),
       omega_enabled_(false),
       use_omega_mode_(false),
-      target_recall_(0.95f),
       min_vector_threshold_(100000),
       current_vector_count_(0),
       window_size_(100) {}
@@ -40,10 +39,17 @@ OmegaSearcher::~OmegaSearcher(void) {
   this->cleanup();
 }
 
+bool OmegaSearcher::should_use_omega() const {
+  if (DisableOmegaModelPrediction()) {
+    return true;
+  }
+  return omega_enabled_ && use_omega_mode_ && omega_model_ != nullptr &&
+         omega_model_is_loaded(omega_model_);
+}
+
 int OmegaSearcher::init(const ailego::Params &params) {
   // Get OMEGA-specific parameters
   omega_enabled_ = params.has("omega.enabled") ? params.get_as_bool("omega.enabled") : false;
-  target_recall_ = params.has("omega.target_recall") ? params.get_as_float("omega.target_recall") : 0.95f;
   min_vector_threshold_ = params.has("omega.min_vector_threshold") ? params.get_as_uint32("omega.min_vector_threshold") : 100000;
   window_size_ = params.has("omega.window_size") ? params.get_as_int32("omega.window_size") : 100;
 
@@ -54,9 +60,9 @@ int OmegaSearcher::init(const ailego::Params &params) {
     return ret;
   }
 
-  LOG_INFO("OmegaSearcher initialized (omega_enabled=%d, target_recall=%.2f, "
-           "min_threshold=%u, window_size=%d)",
-           omega_enabled_, target_recall_, min_vector_threshold_, window_size_);
+  LOG_INFO("OmegaSearcher initialized (omega_enabled=%d, min_threshold=%u, "
+           "window_size=%d)",
+           omega_enabled_, min_vector_threshold_, window_size_);
   return 0;
 }
 
diff --git a/src/core/algorithm/omega/omega_searcher.h b/src/core/algorithm/omega/omega_searcher.h
index 7554b57c1..0b3c2ee97 100644
--- a/src/core/algorithm/omega/omega_searcher.h
+++ b/src/core/algorithm/omega/omega_searcher.h
@@ -16,9 +16,6 @@
 #include <zvec/core/framework/index_framework.h>
 #include "../hnsw/hnsw_searcher.h"
 #include <omega/omega_api.h>
-#include <cstdlib>
-#include <string>
-#include <vector>
 
 namespace zvec {
 namespace core {
@@ -68,15 +65,7 @@ class OmegaSearcher : public HnswSearcher {
 
  private:
   //! Check if OMEGA mode should be used
-  bool should_use_omega() const {
-    if (std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION") != nullptr &&
-        std::string(std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION")) != "0") {
-      return true;
-    }
-    return omega_enabled_ && use_omega_mode_ &&
-           omega_model_ != nullptr &&
-           omega_model_is_loaded(omega_model_);
-  }
+  bool should_use_omega() const;
 
   //! Adaptive search with OMEGA predictions
   int adaptive_search(const void *query, const IndexQueryMeta &qmeta,
@@ -87,7 +76,6 @@ class OmegaSearcher : public HnswSearcher {
   OmegaModelHandle omega_model_;
   bool omega_enabled_;
   bool use_omega_mode_;
-  float target_recall_;
   uint32_t min_vector_threshold_;
   size_t current_vector_count_;
   int window_size_;
diff --git a/src/core/interface/indexes/hnsw_index.cc b/src/core/interface/indexes/hnsw_index.cc
index 171ad3d8b..53344e30e 100644
--- a/src/core/interface/indexes/hnsw_index.cc
+++ b/src/core/interface/indexes/hnsw_index.cc
@@ -111,12 +111,6 @@ int HNSWIndex::_prepare_for_search(
                hnsw_search_param->training_query_id);
   }
 
-  if (const auto& omega_search_param =
-          std::dynamic_pointer_cast<OmegaQueryParam>(search_param)) {
-    params.set(core::PARAM_OMEGA_SEARCHER_TARGET_RECALL,
-               omega_search_param->target_recall);
-  }
-
   context->update(params);
   return 0;
 }
diff --git a/src/core/interface/indexes/omega_index.cc b/src/core/interface/indexes/omega_index.cc
index e78e46640..b6e590b7d 100644
--- a/src/core/interface/indexes/omega_index.cc
+++ b/src/core/interface/indexes/omega_index.cc
@@ -91,14 +91,6 @@ int OmegaIndex::_prepare_for_search(
       params.set(core::PARAM_OMEGA_SEARCHER_TRAINING_QUERY_ID,
                  omega_search_param->training_query_id);
     }
-  } else {
-    // Fallback: try HNSW params for training_query_id
-    const auto &hnsw_search_param =
-        std::dynamic_pointer_cast<HNSWQueryParam>(search_param);
-    if (hnsw_search_param && hnsw_search_param->training_query_id >= 0) {
-      params.set(core::PARAM_OMEGA_SEARCHER_TRAINING_QUERY_ID,
-                 hnsw_search_param->training_query_id);
-    }
   }
 
   if (!params.empty()) {
diff --git a/src/db/index/column/vector_column/engine_helper.hpp b/src/db/index/column/vector_column/engine_helper.hpp
index b19c6203f..73f44a16a 100644
--- a/src/db/index/column/vector_column/engine_helper.hpp
+++ b/src/db/index/column/vector_column/engine_helper.hpp
@@ -164,7 +164,6 @@ class ProximaEngineHelper {
       }
 
       case IndexType::OMEGA: {
-        // OMEGA uses extended query params with target_recall
         auto omega_query_param_result =
             _build_common_query_param<core_interface::OmegaQueryParam>(
                 query_params);
@@ -175,18 +174,11 @@ class ProximaEngineHelper {
         }
         auto &omega_query_param = omega_query_param_result.value();
         if (query_params.query_params) {
-          // Try to cast to OmegaQueryParams first
           if (auto* db_omega_query_params = dynamic_cast<const OmegaQueryParams *>(
-              query_params.query_params.get())) {
+                  query_params.query_params.get())) {
             omega_query_param->ef_search = db_omega_query_params->ef();
             omega_query_param->target_recall = db_omega_query_params->target_recall();
             omega_query_param->training_query_id = db_omega_query_params->training_query_id();
-          } else if (auto* db_hnsw_query_params = dynamic_cast<const HnswQueryParams *>(
-              query_params.query_params.get())) {
-            // Fallback to HnswQueryParams (backward compatibility)
-            omega_query_param->ef_search = db_hnsw_query_params->ef();
-            omega_query_param->training_query_id = db_hnsw_query_params->training_query_id();
-            // target_recall will use default value (0.95f)
           }
         }
         return std::move(omega_query_param);
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index 47d6ab3bd..fdf13ee58 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -1597,27 +1597,16 @@ Status SegmentImpl::create_all_vector_index(
 Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
     const std::string &index_file_path, const std::string &column,
     const FieldSchema &field, int concurrency) {
-
-  LOG_INFO("[TIMING] merge_vector_indexer START for field '%s'", column.c_str());
-  auto timing_start = std::chrono::steady_clock::now();
-
   VectorColumnIndexer::Ptr vector_indexer =
       std::make_shared<VectorColumnIndexer>(index_file_path, field);
 
   vector_column_params::ReadOptions options{options_.enable_mmap_, true};
 
-  LOG_INFO("[TIMING] About to Open (create_new=true)");
-  auto open_start = std::chrono::steady_clock::now();
   auto s = vector_indexer->Open(options);
   CHECK_RETURN_STATUS_EXPECTED(s);
-  auto open_end = std::chrono::steady_clock::now();
-  LOG_INFO("[TIMING] Open completed in %ld ms",
-           std::chrono::duration_cast<std::chrono::milliseconds>(open_end - open_start).count());
 
   std::vector<VectorColumnIndexer::Ptr> to_merge_indexers =
       vector_indexers_[column];
-  LOG_INFO("[TIMING] to_merge_indexers count: %zu", to_merge_indexers.size());
-
 
   vector_column_params::MergeOptions merge_options;
   if (concurrency == 0) {
@@ -1627,14 +1616,8 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
   } else {
     merge_options.write_concurrency = concurrency;
   }
-  LOG_INFO("[TIMING] About to Merge");
-  auto merge_start = std::chrono::steady_clock::now();
   s = vector_indexer->Merge(to_merge_indexers, filter_, merge_options);
   CHECK_RETURN_STATUS_EXPECTED(s);
-  auto merge_end = std::chrono::steady_clock::now();
-  LOG_INFO("[TIMING] Merge completed in %ld ms",
-           std::chrono::duration_cast<std::chrono::milliseconds>(merge_end - merge_start).count());
-
 
   // Check if this is a trainable index (OMEGA)
   auto* training_capable = vector_indexer->GetTrainingCapability();
@@ -2509,7 +2492,7 @@ TablePtr SegmentImpl::fetch_normal(
   const auto &block_offsets = get_persist_block_offsets(BlockType::SCALAR);
   const auto &block_metas = get_persist_block_metas(BlockType::SCALAR);
 
-  // Phase 1: Map each (doc_id, column) to its block and local row
+  // Map each (doc_id, column) to its block and local row.
   for (int output_row = 0; output_row < static_cast<int>(indices.size());
        ++output_row) {
     int doc_id = indices[output_row];
@@ -2553,7 +2536,7 @@ TablePtr SegmentImpl::fetch_normal(
     }
   }
 
-  // Phase 2: Execute batched fetch per block
+  // Execute batched fetches per block.
   for (const auto &[block_index, col_to_rows] : block_request_map) {
     std::vector<std::string> fetch_columns;
     std::vector<int> fetch_local_rows;
@@ -2613,7 +2596,7 @@ TablePtr SegmentImpl::fetch_normal(
     }
   }
 
-  // Phase 3: Construct result arrays
+  // Construct the output arrays.
   std::vector<std::shared_ptr<arrow::Array>> result_arrays(columns.size());
 
   bool need_local_doc_id = false;
diff --git a/src/db/training/omega_model_trainer.cc b/src/db/training/omega_model_trainer.cc
index cdbaedb4d..1ef8e94b7 100644
--- a/src/db/training/omega_model_trainer.cc
+++ b/src/db/training/omega_model_trainer.cc
@@ -51,14 +51,6 @@ omega::GtCmpsData ConvertGtCmpsData(const core_interface::GtCmpsData& src) {
 
 }  // namespace
 
-Status OmegaModelTrainer::TrainModel(
-    const std::vector<core_interface::TrainingRecord>& training_records,
-    const OmegaModelTrainerOptions& options) {
-  // Call TrainModelWithGtCmps with empty gt_cmps_data
-  core_interface::GtCmpsData empty_gt_cmps;
-  return TrainModelWithGtCmps(training_records, empty_gt_cmps, options);
-}
-
 Status OmegaModelTrainer::TrainModelWithGtCmps(
     const std::vector<core_interface::TrainingRecord>& training_records,
     const core_interface::GtCmpsData& gt_cmps_data,
diff --git a/src/db/training/omega_model_trainer.h b/src/db/training/omega_model_trainer.h
index 4859188d4..ace1775f3 100644
--- a/src/db/training/omega_model_trainer.h
+++ b/src/db/training/omega_model_trainer.h
@@ -60,17 +60,6 @@ struct OmegaModelTrainerOptions {
  */
 class OmegaModelTrainer {
  public:
-  /**
-   * @brief Train OMEGA model from collected training records
-   *
-   * @param training_records Training data collected from searches
-   * @param options Training configuration
-   * @return Status indicating success or failure
-   */
-  static Status TrainModel(
-      const std::vector<core_interface::TrainingRecord>& training_records,
-      const OmegaModelTrainerOptions& options);
-
   /**
    * @brief Train OMEGA model with gt_cmps data for table generation
    *
@@ -86,7 +75,6 @@ class OmegaModelTrainer {
       const std::vector<core_interface::TrainingRecord>& training_records,
       const core_interface::GtCmpsData& gt_cmps_data,
       const OmegaModelTrainerOptions& options);
-
 };
 
 }  // namespace zvec
diff --git a/src/db/training/omega_training_coordinator.cc b/src/db/training/omega_training_coordinator.cc
index f3753e08d..843a2b67f 100644
--- a/src/db/training/omega_training_coordinator.cc
+++ b/src/db/training/omega_training_coordinator.cc
@@ -33,7 +33,7 @@ constexpr uint32_t kOmegaQueryCacheVersion = 1;
 
 }  // namespace
 
-void WriteOmegaTimingStatsJson(
+static void WriteOmegaTimingStatsJson(
     const std::string& output_path,
     const std::vector<std::pair<std::string, int64_t>>& stats) {
   std::ofstream ofs(output_path);
@@ -51,11 +51,11 @@ void WriteOmegaTimingStatsJson(
   ofs << "}\n";
 }
 
-std::string OmegaQueryCachePath(const std::string& model_output_dir) {
+static std::string OmegaQueryCachePath(const std::string& model_output_dir) {
   return model_output_dir + "/training_queries.bin";
 }
 
-bool SaveOmegaTrainingQueryCache(
+static bool SaveOmegaTrainingQueryCache(
     const std::string& model_output_dir,
     const std::vector<std::vector<float>>& queries,
     const std::vector<uint64_t>& query_doc_ids) {
@@ -90,7 +90,7 @@ bool SaveOmegaTrainingQueryCache(
   return ofs.good();
 }
 
-bool LoadOmegaTrainingQueryCache(
+static bool LoadOmegaTrainingQueryCache(
     const std::string& model_output_dir,
     std::vector<std::vector<float>>* queries,
     std::vector<uint64_t>* query_doc_ids) {
@@ -192,7 +192,6 @@ Result<TrainingDataCollectorResult> CollectOmegaRetrainingData(
   collector_options.ef_groundtruth = params.ef_groundtruth;
   collector_options.topk = 100;
   collector_options.k_train = params.k_train;
-  collector_options.noise_scale = 0.01f;
 
   std::vector<std::vector<float>> cached_queries;
   std::vector<uint64_t> cached_query_doc_ids;
@@ -317,4 +316,3 @@ Status TrainOmegaModelAfterRetrainCollect(
 }
 
 }  // namespace zvec
-
diff --git a/src/db/training/omega_training_coordinator.h b/src/db/training/omega_training_coordinator.h
index 7247b1a2f..d8a8fcc53 100644
--- a/src/db/training/omega_training_coordinator.h
+++ b/src/db/training/omega_training_coordinator.h
@@ -34,22 +34,6 @@ struct OmegaTrainingParams {
   int k_train = 1;
 };
 
-void WriteOmegaTimingStatsJson(
-    const std::string& output_path,
-    const std::vector<std::pair<std::string, int64_t>>& stats);
-
-std::string OmegaQueryCachePath(const std::string& model_output_dir);
-
-bool SaveOmegaTrainingQueryCache(
-    const std::string& model_output_dir,
-    const std::vector<std::vector<float>>& queries,
-    const std::vector<uint64_t>& query_doc_ids);
-
-bool LoadOmegaTrainingQueryCache(
-    const std::string& model_output_dir,
-    std::vector<std::vector<float>>* queries,
-    std::vector<uint64_t>* query_doc_ids);
-
 OmegaTrainingParams ResolveOmegaTrainingParams(
     const IndexParams::Ptr& index_params);
 
@@ -78,4 +62,3 @@ Status TrainOmegaModelAfterRetrainCollect(
     const std::string& field_name);
 
 }  // namespace zvec
-
diff --git a/src/db/training/query_generator.cc b/src/db/training/query_generator.cc
index d0edac84d..9b99ee849 100644
--- a/src/db/training/query_generator.cc
+++ b/src/db/training/query_generator.cc
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "query_generator.h"
-#include <random>
 #include <zvec/ailego/logger/logger.h>
 #include <zvec/db/doc.h>
 
@@ -78,56 +77,6 @@ SampledVectors TrainingQueryGenerator::SampleBaseVectorsWithIds(
   return result;
 }
 
-std::vector<std::vector<float>> TrainingQueryGenerator::SampleBaseVectors(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    size_t num_samples,
-    uint64_t seed) {
-  // Use the new method and extract just the vectors
-  auto sampled = SampleBaseVectorsWithIds(segment, field_name, num_samples, seed);
-  return std::move(sampled.vectors);
-}
-
-std::vector<std::vector<float>> TrainingQueryGenerator::AddGaussianNoise(
-    const std::vector<std::vector<float>>& base_vectors,
-    float noise_scale,
-    uint64_t seed) {
-  if (base_vectors.empty()) {
-    LOG_WARN("Input base_vectors is empty, returning empty result");
-    return {};
-  }
-
-  std::vector<std::vector<float>> noisy_vectors;
-  noisy_vectors.reserve(base_vectors.size());
-
-  // Random number generator for Gaussian noise
-  std::mt19937 rng(seed);
-  std::normal_distribution<float> gaussian(0.0f, noise_scale);
-
-  for (const auto& base_vector : base_vectors) {
-    if (base_vector.empty()) {
-      LOG_WARN("Encountered empty vector, skipping");
-      continue;
-    }
-
-    std::vector<float> noisy_vector;
-    noisy_vector.reserve(base_vector.size());
-
-    // Add Gaussian noise to each dimension
-    for (float base_value : base_vector) {
-      float noise = gaussian(rng);
-      noisy_vector.push_back(base_value + noise);
-    }
-
-    noisy_vectors.push_back(std::move(noisy_vector));
-  }
-
-  LOG_INFO("Added Gaussian noise (scale=%.4f) to %zu vectors",
-           noise_scale, noisy_vectors.size());
-
-  return noisy_vectors;
-}
-
 SampledVectors TrainingQueryGenerator::GenerateHeldOutQueries(
     const Segment::Ptr& segment,
     const std::string& field_name,
@@ -148,28 +97,4 @@ SampledVectors TrainingQueryGenerator::GenerateHeldOutQueries(
   return result;
 }
 
-std::vector<std::vector<float>> TrainingQueryGenerator::GenerateTrainingQueries(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    size_t num_queries,
-    float noise_scale,
-    uint64_t seed) {
-  // Step 1: Sample base vectors
-  auto base_vectors = SampleBaseVectors(segment, field_name, num_queries, seed);
-
-  if (base_vectors.empty()) {
-    LOG_ERROR("Failed to sample base vectors from segment");
-    return {};
-  }
-
-  // Step 2: Add Gaussian noise
-  // Use a different seed for noise generation to avoid correlation
-  auto training_queries = AddGaussianNoise(base_vectors, noise_scale, seed + 1);
-
-  LOG_INFO("Generated %zu training queries for field '%s'",
-           training_queries.size(), field_name.c_str());
-
-  return training_queries;
-}
-
 }  // namespace zvec
diff --git a/src/db/training/query_generator.h b/src/db/training/query_generator.h
index af7c030b3..f7ac81b3c 100644
--- a/src/db/training/query_generator.h
+++ b/src/db/training/query_generator.h
@@ -55,34 +55,6 @@ class TrainingQueryGenerator {
       size_t num_samples,
       uint64_t seed = 42);
 
-  /**
-   * @brief Sample base vectors from a segment without doc_ids
-   *
-   * @param segment The segment to sample from (must be persisted)
-   * @param field_name The vector field name to sample
-   * @param num_samples Number of vectors to sample
-   * @param seed Random seed for reproducibility
-   * @return Vector of sampled vectors
-   */
-  static std::vector<std::vector<float>> SampleBaseVectors(
-      const Segment::Ptr& segment,
-      const std::string& field_name,
-      size_t num_samples,
-      uint64_t seed = 42);
-
-  /**
-   * @brief Add Gaussian noise to base vectors
-   *
-   * @param base_vectors Input vectors
-   * @param noise_scale Standard deviation of Gaussian noise
-   * @param seed Random seed for reproducibility
-   * @return Vectors with added noise
-   */
-  static std::vector<std::vector<float>> AddGaussianNoise(
-      const std::vector<std::vector<float>>& base_vectors,
-      float noise_scale = 0.01f,
-      uint64_t seed = 42);
-
   /**
    * @brief Generate training queries using held-out approach
    *
@@ -100,25 +72,6 @@ class TrainingQueryGenerator {
       const std::string& field_name,
       size_t num_queries,
       uint64_t seed = 42);
-
-  /**
-   * @brief Generate training queries with the sample-and-noise helper
-   *
-   * Combines sampling and noise addition in one step.
-   *
-   * @param segment The segment to sample from
-   * @param field_name The vector field name
-   * @param num_queries Number of training queries to generate
-   * @param noise_scale Standard deviation of Gaussian noise
-   * @param seed Random seed for reproducibility
-   * @return Training query vectors
-   */
-  static std::vector<std::vector<float>> GenerateTrainingQueries(
-      const Segment::Ptr& segment,
-      const std::string& field_name,
-      size_t num_queries,
-      float noise_scale = 0.01f,
-      uint64_t seed = 42);
 };
 
 }  // namespace zvec
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 2896cb017..105d624e9 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -14,15 +14,12 @@
 
 #include "training_data_collector.h"
 #include <algorithm>
-#include <unordered_set>
 #include <chrono>
-#include <fstream>
-#include <iomanip>
-#include <thread>
-#include <future>
-#include <mutex>
 #include <atomic>
+#include <mutex>
+#include <thread>
 #include <unordered_map>
+#include <unordered_set>
 #include <zvec/ailego/logger/logger.h>
 #include <zvec/db/query_params.h>
 #include "db/index/column/vector_column/vector_column_params.h"
@@ -56,35 +53,15 @@ void RecordTimingStat(const std::string& name, int64_t duration_ms) {
   }
 }
 
-static std::ofstream& GetDebugLog() {
-  static std::ofstream log_file("/tmp/omega_training_debug.log", std::ios::app);
-  return log_file;
-}
-
-static void DebugLog(const std::string& msg) {
-  auto now = std::chrono::system_clock::now();
-  auto time_t_now = std::chrono::system_clock::to_time_t(now);
-  auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
-      now.time_since_epoch()) % 1000;
-
-  auto& log = GetDebugLog();
-  log << std::put_time(std::localtime(&time_t_now), "%Y-%m-%d %H:%M:%S")
-      << "." << std::setfill('0') << std::setw(3) << ms.count()
-      << " | " << msg << std::endl;
-  log.flush();
-}
-
 class ScopedTimer {
  public:
-  ScopedTimer(const std::string& name) : name_(name) {
+  explicit ScopedTimer(const std::string& name) : name_(name) {
     start_ = std::chrono::high_resolution_clock::now();
-    DebugLog("[START] " + name_);
   }
   ~ScopedTimer() {
     auto end = std::chrono::high_resolution_clock::now();
     auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start_).count();
     RecordTimingStat(name_, duration);
-    DebugLog("[END]   " + name_ + " | Duration: " + std::to_string(duration) + " ms");
   }
  private:
   std::string name_;
@@ -244,8 +221,6 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
 
     search_results.resize(training_queries.size());
 
-    std::atomic<size_t> completed_searches{0};
-    std::mutex progress_mutex;
     auto search_start = std::chrono::high_resolution_clock::now();
 
     auto worker = [&](size_t start_idx, size_t end_idx) {
@@ -283,7 +258,6 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
         if (!search_result.has_value()) {
           LOG_WARN("Search failed for query %zu: %s", query_idx,
                    search_result.error().message().c_str());
-          ++completed_searches;
           continue;
         }
 
@@ -297,19 +271,6 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
         }
 
         search_results[query_idx] = std::move(result_ids);
-
-        size_t completed = ++completed_searches;
-        if (completed % 100 == 0 || completed == training_queries.size()) {
-          std::lock_guard<std::mutex> lock(progress_mutex);
-          auto now = std::chrono::high_resolution_clock::now();
-          auto elapsed_ms =
-              std::chrono::duration_cast<std::chrono::milliseconds>(now - search_start)
-                  .count();
-          DebugLog("  Training search progress: " +
-                   std::to_string(completed) + "/" +
-                   std::to_string(training_queries.size()) + ", elapsed: " +
-                   std::to_string(elapsed_ms) + " ms");
-        }
       }
     };
 
@@ -391,20 +352,6 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
 }
 // ============ END DEBUG TIMING UTILITIES ============
 
-Result<std::vector<core_interface::TrainingRecord>>
-TrainingDataCollector::CollectTrainingData(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    const TrainingDataCollectorOptions& options,
-    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
-  auto result = CollectTrainingDataWithGtCmps(segment, field_name, options,
-                                              provided_indexers);
-  if (!result.has_value()) {
-    return tl::make_unexpected(result.error());
-  }
-  return result->records;
-}
-
 std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
     const Segment::Ptr& segment,
     const std::string& field_name,
@@ -442,18 +389,14 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   // Faster for large datasets, approximate results
   // ============================================================
   if (ef_groundtruth > 0) {
-    DebugLog("[ComputeGroundTruth] Using HNSW search with ef=" + std::to_string(ef_groundtruth));
-
     // Use provided indexers if available, otherwise get from segment
     // IMPORTANT: We must use provided_indexers when available because after Flush,
     // segment->get_vector_indexer() returns stale indexers with cleared in-memory data
     std::vector<VectorColumnIndexer::Ptr> indexers;
     if (!provided_indexers.empty()) {
       indexers = provided_indexers;
-      DebugLog("[ComputeGroundTruth] Using provided indexers (count=" + std::to_string(indexers.size()) + ")");
     } else {
       indexers = segment->get_vector_indexer(field_name);
-      DebugLog("[ComputeGroundTruth] Using indexers from segment (count=" + std::to_string(indexers.size()) + ")");
     }
 
     if (indexers.empty()) {
@@ -469,7 +412,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
       // We use parallel warmup with all threads to load data faster.
       // ========================================================
       {
-        DebugLog("[ComputeGroundTruth] Warming up HNSW index...");
         auto warmup_start = std::chrono::high_resolution_clock::now();
 
         // Warmup count: use a fraction of queries spread across threads
@@ -480,8 +422,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
         // Parallel warmup using std::thread
         std::vector<std::thread> warmup_threads;
-        std::atomic<size_t> warmup_completed{0};
-
         auto warmup_worker = [&](size_t start_idx, size_t count) {
           for (size_t i = 0; i < count && (start_idx + i) < queries.size(); ++i) {
             size_t q_idx = start_idx + i;
@@ -500,8 +440,7 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
             omega_params->set_training_query_id(-1);  // Warmup, don't collect training data
             query_params.query_params = omega_params;
 
-            indexers[0]->Search(vector_data, query_params);
-            ++warmup_completed;
+            static_cast<void>(indexers[0]->Search(vector_data, query_params));
           }
         };
 
@@ -521,9 +460,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
         auto warmup_end = std::chrono::high_resolution_clock::now();
         auto warmup_ms = std::chrono::duration_cast<std::chrono::milliseconds>(warmup_end - warmup_start).count();
-        DebugLog("[ComputeGroundTruth] Warmup completed in " + std::to_string(warmup_ms) +
-                 " ms (" + std::to_string(warmup_completed.load()) + " queries, " +
-                 std::to_string(warmup_threads.size()) + " threads)");
 
         // Note: If warmup takes very long (>60s), recommend using ef_groundtruth=0 (Eigen brute force)
         if (warmup_ms > 60000) {
@@ -539,9 +475,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
       size_t actual_threads = num_threads > 0 ? num_threads : std::thread::hardware_concurrency();
       actual_threads = std::min(actual_threads, queries.size());
 
-      std::atomic<size_t> completed{0};
-      auto search_start = std::chrono::high_resolution_clock::now();
-
       auto worker = [&](size_t start_idx, size_t end_idx) {
         for (size_t q = start_idx; q < end_idx; ++q) {
           // Prepare query parameters (exactly same as training searches)
@@ -581,15 +514,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
             }
             ground_truth[q] = std::move(result_ids);
           }
-
-          // Progress logging
-          size_t done = ++completed;
-          if (done % 500 == 0 || done == queries.size()) {
-            auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
-                std::chrono::high_resolution_clock::now() - search_start).count();
-            DebugLog("[ComputeGroundTruth] HNSW progress: " + std::to_string(done) + "/" +
-                     std::to_string(queries.size()) + ", elapsed: " + std::to_string(elapsed) + " ms");
-          }
         }
       };
 
@@ -611,7 +535,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
       auto end_time = std::chrono::high_resolution_clock::now();
       auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
-      DebugLog("[ComputeGroundTruth] HNSW search completed in " + std::to_string(total_ms) + " ms");
       LOG_INFO("Computed ground truth (HNSW ef=%d) for %zu queries in %zu ms",
                ef_groundtruth, queries.size(), total_ms);
       return ground_truth;
@@ -622,8 +545,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   // Branch 2: Eigen brute force (ef_groundtruth == 0)
   // Exact results, uses batch matrix multiplication
   // ============================================================
-  DebugLog("[ComputeGroundTruth] Using Eigen brute force search");
-
   // Convert zvec MetricType to omega MetricType
   omega::MetricType omega_metric;
   switch (metric_type) {
@@ -640,11 +561,9 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   }
 
   // Step 1: Load all base vectors into memory
-  DebugLog("[ComputeGroundTruth] Loading " + std::to_string(doc_count) + " base vectors...");
   auto load_start = std::chrono::high_resolution_clock::now();
 
   std::vector<float> base_vectors(doc_count * dim);
-  std::atomic<size_t> loaded_count{0};
   std::atomic<bool> load_error{false};
 
   // Load vectors in parallel
@@ -675,14 +594,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
       }
 
       std::memcpy(base_vectors.data() + doc_idx * dim, vec.data(), dim * sizeof(float));
-      ++loaded_count;
-
-      // Progress logging
-      size_t count = loaded_count.load();
-      if (count % 100000 == 0) {
-        DebugLog("[ComputeGroundTruth] Loaded " + std::to_string(count) + "/" +
-                 std::to_string(doc_count) + " vectors");
-      }
     }
   };
 
@@ -703,8 +614,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
   auto load_end = std::chrono::high_resolution_clock::now();
   auto load_ms = std::chrono::duration_cast<std::chrono::milliseconds>(load_end - load_start).count();
-  DebugLog("[ComputeGroundTruth] Loaded " + std::to_string(loaded_count) +
-           " vectors in " + std::to_string(load_ms) + " ms");
 
   if (load_error) {
     LOG_ERROR("Failed to load all base vectors, cannot compute ground truth");
@@ -718,7 +627,6 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   }
 
   // Step 3: Call OmegaLib's fast ground truth computation (Eigen)
-  DebugLog("[ComputeGroundTruth] Computing ground truth with Eigen...");
   auto compute_start = std::chrono::high_resolution_clock::now();
 
   ground_truth = omega::ComputeGroundTruth(
@@ -734,12 +642,9 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
   auto compute_end = std::chrono::high_resolution_clock::now();
   auto compute_ms = std::chrono::duration_cast<std::chrono::milliseconds>(compute_end - compute_start).count();
-  DebugLog("[ComputeGroundTruth] Computed ground truth in " + std::to_string(compute_ms) + " ms");
 
   auto total_end = std::chrono::high_resolution_clock::now();
   auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(total_end - start_time).count();
-  DebugLog("[ComputeGroundTruth] Total time: " + std::to_string(total_ms) +
-           " ms (load: " + std::to_string(load_ms) + " ms, compute: " + std::to_string(compute_ms) + " ms)");
 
   LOG_INFO("Computed ground truth (Eigen brute force) for %zu queries in %zu ms (load: %zu ms, compute: %zu ms)",
            queries.size(), total_ms, load_ms, compute_ms);
@@ -840,8 +745,6 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
         segment, field_name, options.num_training_queries, options.seed);
     training_queries = std::move(sampled.vectors);
     query_doc_ids = std::move(sampled.doc_ids);
-    DebugLog("  Generated " + std::to_string(training_queries.size()) +
-             " held-out queries (with doc_ids for self-exclusion)");
   }
 
   return CollectTrainingDataFromQueriesImpl(segment, field_name,
diff --git a/src/db/training/training_data_collector.h b/src/db/training/training_data_collector.h
index 104a93fec..1b520c209 100644
--- a/src/db/training/training_data_collector.h
+++ b/src/db/training/training_data_collector.h
@@ -33,9 +33,6 @@ struct TrainingDataCollectorOptions {
   // Number of training queries to generate
   size_t num_training_queries = 1000;
 
-  // Gaussian noise scale for query generation
-  float noise_scale = 0.01f;
-
   // ef parameter for training searches (large value for recall ≈ 1)
   int ef_training = 1000;
 
@@ -85,21 +82,6 @@ class TrainingDataCollector {
 
   static TimingStats ConsumeTimingStats();
 
-  /**
-   * @brief Collect training data from a persisted segment
-   *
-   * @param segment The segment to collect data from (must be persisted)
-   * @param field_name Vector field name to train on
-   * @param options Collection options
-   * @param indexers Optional specific indexers to use (if empty, will use segment->get_vector_indexer)
-   * @return Training records with labels filled
-   */
-  static Result<std::vector<core_interface::TrainingRecord>> CollectTrainingData(
-      const Segment::Ptr& segment,
-      const std::string& field_name,
-      const TrainingDataCollectorOptions& options,
-      const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
-
   /**
    * @brief Collect training data with gt_cmps information for table generation
    *

From 7a9d8e234b4ea2db2955e0d9cf86fd38def07d0e Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 22:05:58 +0800
Subject: [PATCH 076/126] cleanup: add benchmark concurrent warmup defaults

---
 scripts/benchmark_hnsw_vs_omega.json |  14 +++-
 scripts/benchmark_hnsw_vs_omega.py   | 112 +++++++++++++++++++++++++++
 2 files changed, 124 insertions(+), 2 deletions(-)

diff --git a/scripts/benchmark_hnsw_vs_omega.json b/scripts/benchmark_hnsw_vs_omega.json
index 5a72b15b3..232bee693 100644
--- a/scripts/benchmark_hnsw_vs_omega.json
+++ b/scripts/benchmark_hnsw_vs_omega.json
@@ -1,5 +1,10 @@
 {
   "cohere_1m": {
+    "warmup": {
+      "enabled": true,
+      "duration": 15,
+      "num_concurrency": "4"
+    },
     "common": {
       "case_type": "Performance768D1M",
       "num_concurrency": "12,14,16,18,20",
@@ -18,7 +23,7 @@
       "path": "cohere_1m_omega",
       "db_label": "16c64g-v0.1-omega-m15-ef300",
       "target_recalls": [
-        0.91
+        0.90
       ],
       "args": {
         "min_vector_threshold": 100000,
@@ -35,6 +40,11 @@
     }
   },
   "cohere_10m": {
+    "warmup": {
+      "enabled": true,
+      "duration": 15,
+      "num_concurrency": "4"
+    },
     "common": {
       "case_type": "Performance768D10M",
       "num_concurrency": "12,14,16,18,20",
@@ -54,7 +64,7 @@
       "path": "cohere_10m_omega",
       "db_label": "16c64g-v0.1-omega-m50-ef300",
       "target_recalls": [
-        0.91
+        0.95
       ],
       "args": {
         "min_vector_threshold": 100000,
diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index 2531c9f19..39e8a5760 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -79,9 +79,73 @@ def parse_args() -> argparse.Namespace:
         default=None,
         help="Directory containing VectorDBBench JSON result files",
     )
+    parser.add_argument(
+        "--concurrent-warmup",
+        action="store_true",
+        help="Run a concurrent-only warmup pass before the measured search benchmark",
+    )
+    parser.add_argument(
+        "--warmup-duration",
+        type=int,
+        default=None,
+        help="Warmup concurrency duration in seconds "
+        "(default: config warmup.duration or 15)",
+    )
+    parser.add_argument(
+        "--warmup-num-concurrency",
+        type=str,
+        default=None,
+        help="Warmup concurrency list, e.g. '4' or '4,8' "
+        "(default: config warmup.num_concurrency or the first configured concurrency)",
+    )
     return parser.parse_args()
 
 
+def resolve_warmup_settings(
+    args: argparse.Namespace, common: dict[str, object], config: dict[str, object]
+) -> tuple[bool, int, str]:
+    warmup_config = config.get("warmup", {})
+    enabled = args.concurrent_warmup or bool(warmup_config.get("enabled", False))
+    configured_concurrency = str(common.get("num_concurrency", "1"))
+    default_num_concurrency = str(
+        warmup_config.get("num_concurrency", configured_concurrency.split(",")[0])
+    )
+    num_concurrency = args.warmup_num_concurrency or default_num_concurrency
+    duration = args.warmup_duration or int(warmup_config.get("duration", 15))
+    return enabled, duration, num_concurrency
+
+
+def run_concurrent_warmup(
+    *,
+    label: str,
+    vectordbbench_cmd: list[str],
+    client_name: str,
+    path: Path,
+    db_label: str,
+    case_type: str,
+    common_args: dict[str, object],
+    specific_args: dict[str, object],
+    vectordbbench_root: Path,
+    dry_run: bool,
+    extra_flags: list[str] | None = None,
+) -> int:
+    print(f"\n[Warmup] Running concurrent-only warmup for {label}...")
+    warmup_flags = ["skip-drop-old", "skip-load", "skip-search-serial"]
+    if extra_flags:
+        warmup_flags.extend(extra_flags)
+    warmup_cmd = build_base_command(
+        vectordbbench_cmd,
+        client_name,
+        path,
+        db_label,
+        case_type,
+        common_args,
+        specific_args,
+        warmup_flags,
+    )
+    return run_command(warmup_cmd, vectordbbench_root, dry_run=dry_run)
+
+
 def main() -> int:
     args = parse_args()
     config_path = Path(args.config).expanduser().resolve()
@@ -102,6 +166,9 @@ def main() -> int:
     hnsw_config = must_get(config, "hnsw")
     omega_config = must_get(config, "omega")
     profiling_config = config.get("profiling", {})
+    warmup_enabled, warmup_duration, warmup_num_concurrency = resolve_warmup_settings(
+        args, common, config
+    )
 
     case_type = must_get(common, "case_type")
     hnsw_path = resolve_index_path(benchmark_dir, must_get(hnsw_config, "path"))
@@ -117,6 +184,9 @@ def main() -> int:
     hnsw_common_args = {k: v for k, v in common.items() if k != "case_type"}
     hnsw_specific_args = hnsw_config.get("args", {})
     omega_specific_args = omega_config.get("args", {})
+    warmup_common_args = dict(hnsw_common_args)
+    warmup_common_args["num_concurrency"] = warmup_num_concurrency
+    warmup_common_args["concurrency_duration"] = warmup_duration
 
     print("=" * 70)
     print(f"VectorDBBench: Zvec HNSW vs OMEGA ({dataset_name})")
@@ -130,6 +200,14 @@ def main() -> int:
     print(f"hnsw_path: {hnsw_path}")
     print(f"omega_path: {omega_path}")
     print(f"target_recalls: {target_recalls}")
+    print(
+        "concurrent_warmup: "
+        + (
+            f"enabled (num_concurrency={warmup_num_concurrency}, duration={warmup_duration}s)"
+            if warmup_enabled
+            else "disabled"
+        )
+    )
     print(
         "build_mode: "
         + ("retrain model only (reuse existing index)" if args.retrain_only else "build index + train model")
@@ -166,6 +244,22 @@ def main() -> int:
                 )
 
         if not args.build_only:
+            if warmup_enabled:
+                warmup_ret = run_concurrent_warmup(
+                    label="HNSW",
+                    vectordbbench_cmd=vectordbbench_cmd,
+                    client_name="zvec",
+                    path=hnsw_path,
+                    db_label=hnsw_db_label,
+                    case_type=case_type,
+                    common_args=warmup_common_args,
+                    specific_args=hnsw_specific_args,
+                    vectordbbench_root=vectordbbench_root,
+                    dry_run=args.dry_run,
+                )
+                if warmup_ret != 0 and not args.dry_run:
+                    print("ERROR: HNSW concurrent warmup failed!")
+                    return 1
             print("\n[Phase 2] Running HNSW search benchmark...")
             before_files = snapshot_result_files(results_dir)
             cmd = build_base_command(
@@ -267,6 +361,24 @@ def main() -> int:
         if not args.build_only:
             for target_recall in target_recalls:
                 print_header(f"OMEGA Search Benchmark (target_recall={target_recall})")
+                if warmup_enabled:
+                    warmup_extra_flags = ["retrain-only"] if args.retrain_only else None
+                    warmup_ret = run_concurrent_warmup(
+                        label=f"OMEGA target_recall={target_recall}",
+                        vectordbbench_cmd=vectordbbench_cmd,
+                        client_name="zvecomega",
+                        path=omega_path,
+                        db_label=omega_db_label,
+                        case_type=case_type,
+                        common_args=warmup_common_args,
+                        specific_args={**omega_specific_args, "target_recall": target_recall},
+                        vectordbbench_root=vectordbbench_root,
+                        dry_run=args.dry_run,
+                        extra_flags=warmup_extra_flags,
+                    )
+                    if warmup_ret != 0 and not args.dry_run:
+                        print("ERROR: OMEGA concurrent warmup failed!")
+                        return 1
                 before_files = snapshot_result_files(results_dir)
                 search_flags = ["skip-drop-old", "skip-load"]
                 if args.retrain_only:

From c2a82c93f143c4b36706c96f8fa8b7bb10cb10b3 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 29 Mar 2026 22:43:05 +0800
Subject: [PATCH 077/126] cleanup: remove unused omega prediction microbench

---
 tools/core/CMakeLists.txt              |   8 -
 tools/core/omega_predict_microbench.cc | 267 -------------------------
 2 files changed, 275 deletions(-)
 delete mode 100644 tools/core/omega_predict_microbench.cc

diff --git a/tools/core/CMakeLists.txt b/tools/core/CMakeLists.txt
index a2c649a54..4fde32bba 100644
--- a/tools/core/CMakeLists.txt
+++ b/tools/core/CMakeLists.txt
@@ -52,14 +52,6 @@ cc_binary(
     LIBS gflags yaml-cpp magic_enum roaring ${ZVEC_TOOL_CORE_INTERFACE_LIBS} ${ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS}
 )
 
-cc_binary(
-    NAME omega_predict_microbench
-    STRICT PACKED
-    SRCS omega_predict_microbench.cc
-    INCS ${PROJECT_ROOT_DIR}/src/core/ ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
-    LIBS omega
-)
-
 cc_binary(
     NAME recall_original
     STRICT PACKED
diff --git a/tools/core/omega_predict_microbench.cc b/tools/core/omega_predict_microbench.cc
deleted file mode 100644
index 04d913b68..000000000
--- a/tools/core/omega_predict_microbench.cc
+++ /dev/null
@@ -1,267 +0,0 @@
-#include <algorithm>
-#include <array>
-#include <atomic>
-#include <chrono>
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
-#include <iomanip>
-#include <iostream>
-#include <random>
-#include <string>
-#include <thread>
-#include <vector>
-
-#include "omega/model_manager.h"
-
-namespace {
-
-struct Options {
-  std::string model_dir;
-  uint64_t iterations = 1000000;
-  uint64_t warmup = 10000;
-  int threads = 1;
-  size_t feature_pool_size = 1024;
-  bool random_features = false;
-};
-
-struct Stats {
-  double elapsed_sec = 0.0;
-  double avg_us_per_call = 0.0;
-  double qps = 0.0;
-  double checksum = 0.0;
-};
-
-void PrintUsage(const char* argv0) {
-  std::cerr
-      << "Usage: " << argv0 << " --model-dir <path> [options]\n"
-      << "Options:\n"
-      << "  --iterations <n>         Total measured calls across all threads\n"
-      << "  --warmup <n>             Warmup calls per thread\n"
-      << "  --threads <n>            Number of benchmark threads\n"
-      << "  --feature-pool-size <n>  Number of synthetic feature rows\n"
-      << "  --random-features        Use random synthetic features\n";
-}
-
-bool ParseArgs(int argc, char** argv, Options* opts) {
-  for (int i = 1; i < argc; ++i) {
-    const char* arg = argv[i];
-    if (std::strcmp(arg, "--model-dir") == 0 && i + 1 < argc) {
-      opts->model_dir = argv[++i];
-    } else if (std::strcmp(arg, "--iterations") == 0 && i + 1 < argc) {
-      opts->iterations = std::strtoull(argv[++i], nullptr, 10);
-    } else if (std::strcmp(arg, "--warmup") == 0 && i + 1 < argc) {
-      opts->warmup = std::strtoull(argv[++i], nullptr, 10);
-    } else if (std::strcmp(arg, "--threads") == 0 && i + 1 < argc) {
-      opts->threads = std::max(1, std::atoi(argv[++i]));
-    } else if (std::strcmp(arg, "--feature-pool-size") == 0 && i + 1 < argc) {
-      opts->feature_pool_size =
-          std::max<uint64_t>(1, std::strtoull(argv[++i], nullptr, 10));
-    } else if (std::strcmp(arg, "--random-features") == 0) {
-      opts->random_features = true;
-    } else if (std::strcmp(arg, "--help") == 0 ||
-               std::strcmp(arg, "-h") == 0) {
-      PrintUsage(argv[0]);
-      return false;
-    } else {
-      std::cerr << "Unknown argument: " << arg << "\n";
-      PrintUsage(argv[0]);
-      return false;
-    }
-  }
-  if (opts->model_dir.empty()) {
-    PrintUsage(argv[0]);
-    return false;
-  }
-  return true;
-}
-
-std::vector<std::array<float, 11>> BuildFeaturePool(const Options& opts) {
-  std::vector<std::array<float, 11>> pool(opts.feature_pool_size);
-  std::mt19937 rng(12345);
-  std::uniform_real_distribution<float> frac_dist(0.0f, 1.0f);
-  std::uniform_int_distribution<int> hops_dist(3, 64);
-  std::uniform_int_distribution<int> cmps_dist(150, 5000);
-
-  for (size_t i = 0; i < pool.size(); ++i) {
-    auto& f = pool[i];
-    if (opts.random_features) {
-      f[0] = static_cast<float>(hops_dist(rng));
-      f[1] = static_cast<float>(cmps_dist(rng));
-      f[2] = 0.10f + 0.25f * frac_dist(rng);
-      f[3] = 0.20f + 0.30f * frac_dist(rng);
-      f[4] = 0.10f + 0.20f * frac_dist(rng);
-      f[5] = 0.0005f + 0.02f * frac_dist(rng);
-      f[6] = 0.01f + 0.08f * frac_dist(rng);
-      f[7] = 0.20f + 0.30f * frac_dist(rng);
-      f[8] = 0.11f + 0.20f * frac_dist(rng);
-      f[9] = 0.10f + 0.18f * frac_dist(rng);
-      f[10] = 0.13f + 0.24f * frac_dist(rng);
-    } else {
-      f = {20.0f + static_cast<float>(i % 7),
-           1800.0f + static_cast<float>((i * 37) % 700),
-           0.125f + 0.001f * static_cast<float>(i % 11),
-           0.337f + 0.001f * static_cast<float>(i % 13),
-           0.182f + 0.001f * static_cast<float>(i % 17),
-           0.008f + 0.0001f * static_cast<float>(i % 19),
-           0.091f + 0.0007f * static_cast<float>(i % 23),
-           0.304f + 0.0008f * static_cast<float>(i % 29),
-           0.171f + 0.0005f * static_cast<float>(i % 31),
-           0.149f + 0.0005f * static_cast<float>(i % 37),
-           0.212f + 0.0006f * static_cast<float>(i % 41)};
-    }
-  }
-  return pool;
-}
-
-float CalibrateProbability(const omega::ModelTables& tables, double probability) {
-  if (tables.threshold_table.empty()) {
-    return static_cast<float>(probability);
-  }
-  int score_key = static_cast<int>(std::round(probability * 10000.0));
-  auto it = tables.threshold_table.upper_bound(score_key);
-  if (it != tables.threshold_table.begin()) {
-    --it;
-  }
-  return it->second;
-}
-
-template <typename Fn>
-Stats RunBenchmark(const std::string& name, const Options& opts, Fn fn) {
-  const uint64_t total_iterations = std::max<uint64_t>(1, opts.iterations);
-  const int thread_count = std::max(1, opts.threads);
-  const uint64_t base_iters = total_iterations / thread_count;
-  const uint64_t extra_iters = total_iterations % thread_count;
-
-  std::atomic<int> ready{0};
-  std::atomic<bool> go{false};
-  std::vector<std::thread> workers;
-  std::vector<double> checksums(thread_count, 0.0);
-
-  auto start = std::chrono::steady_clock::time_point{};
-  auto end = std::chrono::steady_clock::time_point{};
-
-  for (int tid = 0; tid < thread_count; ++tid) {
-    workers.emplace_back([&, tid]() {
-      const uint64_t iters = base_iters + (static_cast<uint64_t>(tid) < extra_iters ? 1 : 0);
-      double local_sum = 0.0;
-      for (uint64_t i = 0; i < opts.warmup; ++i) {
-        local_sum += fn(tid, i);
-      }
-      ready.fetch_add(1, std::memory_order_release);
-      while (!go.load(std::memory_order_acquire)) {
-      }
-      for (uint64_t i = 0; i < iters; ++i) {
-        local_sum += fn(tid, i + opts.warmup);
-      }
-      checksums[tid] = local_sum;
-    });
-  }
-
-  while (ready.load(std::memory_order_acquire) != thread_count) {
-  }
-  start = std::chrono::steady_clock::now();
-  go.store(true, std::memory_order_release);
-
-  for (auto& worker : workers) {
-    worker.join();
-  }
-  end = std::chrono::steady_clock::now();
-
-  const double elapsed_sec =
-      std::chrono::duration_cast<std::chrono::duration<double>>(end - start)
-          .count();
-  double checksum = 0.0;
-  for (double value : checksums) {
-    checksum += value;
-  }
-
-  Stats stats;
-  stats.elapsed_sec = elapsed_sec;
-  stats.avg_us_per_call = elapsed_sec * 1e6 / static_cast<double>(total_iterations);
-  stats.qps = static_cast<double>(total_iterations) / elapsed_sec;
-  stats.checksum = checksum;
-
-  std::cout << std::fixed << std::setprecision(3)
-            << name << ": total_calls=" << total_iterations
-            << " threads=" << thread_count
-            << " elapsed_s=" << stats.elapsed_sec
-            << " avg_us_per_call=" << stats.avg_us_per_call
-            << " qps=" << stats.qps
-            << " checksum=" << stats.checksum << "\n";
-
-  return stats;
-}
-
-}  // namespace
-
-int main(int argc, char** argv) {
-  Options opts;
-  if (!ParseArgs(argc, argv, &opts)) {
-    return 1;
-  }
-
-  omega::ModelManager manager;
-  if (!manager.LoadModel(opts.model_dir)) {
-    std::cerr << "Failed to load model from " << opts.model_dir << "\n";
-    return 2;
-  }
-
-  const omega::GBDTModel* model = manager.GetModel();
-  const omega::ModelTables* tables = manager.GetTables();
-  if (model == nullptr || tables == nullptr || !model->IsLoaded()) {
-    std::cerr << "Model manager did not return a loaded model\n";
-    return 3;
-  }
-
-  auto feature_pool = BuildFeaturePool(opts);
-  std::vector<std::array<double, 11>> feature_pool_double(feature_pool.size());
-  for (size_t i = 0; i < feature_pool.size(); ++i) {
-    for (size_t j = 0; j < feature_pool[i].size(); ++j) {
-      feature_pool_double[i][j] = static_cast<double>(feature_pool[i][j]);
-    }
-  }
-
-  std::cout << "OMEGA prediction microbenchmark\n";
-  std::cout << "model_dir=" << opts.model_dir
-            << " iterations=" << opts.iterations
-            << " warmup=" << opts.warmup
-            << " threads=" << opts.threads
-            << " feature_pool_size=" << opts.feature_pool_size
-            << " random_features=" << (opts.random_features ? 1 : 0) << "\n";
-
-  RunBenchmark("pack_only", opts, [&](int tid, uint64_t iter) -> double {
-    const auto& src = feature_pool[(iter + static_cast<uint64_t>(tid)) % feature_pool.size()];
-    std::array<double, 11> dst{};
-    for (size_t j = 0; j < src.size(); ++j) {
-      dst[j] = static_cast<double>(src[j]);
-    }
-    return dst[0] + dst[10];
-  });
-
-  RunBenchmark("predict_raw_prebuilt", opts, [&](int tid, uint64_t iter) -> double {
-    const auto& features =
-        feature_pool_double[(iter + static_cast<uint64_t>(tid)) % feature_pool_double.size()];
-    return model->PredictRaw(features.data(), static_cast<int32_t>(features.size()));
-  });
-
-  RunBenchmark("predict_prob_prebuilt", opts, [&](int tid, uint64_t iter) -> double {
-    const auto& features =
-        feature_pool_double[(iter + static_cast<uint64_t>(tid)) % feature_pool_double.size()];
-    return model->Predict(features.data(), static_cast<int32_t>(features.size()));
-  });
-
-  RunBenchmark("predict_calibrated_pack", opts, [&](int tid, uint64_t iter) -> double {
-    const auto& src = feature_pool[(iter + static_cast<uint64_t>(tid)) % feature_pool.size()];
-    std::array<double, 11> dst{};
-    for (size_t j = 0; j < src.size(); ++j) {
-      dst[j] = static_cast<double>(src[j]);
-    }
-    double raw_score =
-        model->PredictRaw(dst.data(), static_cast<int32_t>(dst.size()));
-    double probability = 1.0 / (1.0 + std::exp(-raw_score));
-    return CalibrateProbability(*tables, probability);
-  });
-
-  return 0;
-}

From 1d3d89ea01b212703dbf5f39a8d398e36e996a27 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 30 Mar 2026 00:06:11 +0800
Subject: [PATCH 078/126] cleanup: switch benchmark flow to core tools

---
 scripts/benchmark_hnsw_vs_omega.py  |  691 +++++++--------
 scripts/benchmark_lib.py            | 1264 ++++++++++++++++++++-------
 src/core/interface/index_factory.cc |   81 ++
 3 files changed, 1331 insertions(+), 705 deletions(-)

diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index 39e8a5760..ace78fa10 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -1,27 +1,31 @@
 #!/usr/bin/env python3
-"""Generic VectorDBBench runner for Zvec HNSW vs Zvec+OMEGA."""
+"""Benchmark Zvec HNSW vs OMEGA without VectorDBBench."""
+
+from __future__ import annotations
 
 import argparse
 import sys
 from pathlib import Path
+
 from benchmark_lib import (
     BenchmarkResult,
-    build_base_command,
     build_hnsw_profile,
+    build_index,
     build_omega_profile,
+    compute_recall_with_zvec,
+    discover_index_files,
     get_offline_load_duration,
-    get_run_result,
-    latency_summary_from_profile,
     load_dataset_config,
     merge_omega_detailed_profile,
     must_get,
+    prepare_dataset_artifacts,
     print_header,
+    resolve_core_tools,
+    resolve_dataset_spec,
     resolve_index_path,
     resolve_paths,
-    resolve_vectordbbench_command,
-    run_command,
-    run_command_capture,
-    snapshot_result_files,
+    run_concurrency_benchmark,
+    run_profile_benchmark,
     validate_profile_output,
     write_grouped_profiling_summaries,
     write_offline_summary,
@@ -29,9 +33,7 @@
 
 
 def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description="Generic VectorDBBench runner for Zvec HNSW vs OMEGA"
-    )
+    parser = argparse.ArgumentParser(description="Benchmark Zvec HNSW vs OMEGA")
     parser.add_argument("--config", required=True, help="Path to benchmark JSON config")
     parser.add_argument(
         "--dataset",
@@ -44,7 +46,7 @@ def parse_args() -> argparse.Namespace:
         default=None,
         help="Optional comma-separated override for omega.target_recalls in the JSON config",
     )
-    parser.add_argument("--dry-run", action="store_true", help="Print commands without executing")
+    parser.add_argument("--dry-run", action="store_true", help="Print actions without executing")
     parser.add_argument("--skip-hnsw", action="store_true", help="Skip HNSW benchmark")
     parser.add_argument("--skip-omega", action="store_true", help="Skip OMEGA benchmark")
     parser.add_argument("--build-only", action="store_true", help="Only build index, skip search")
@@ -60,105 +62,287 @@ def parse_args() -> argparse.Namespace:
         default=None,
         help="Path to zvec repo root (default: auto-detect from this script)",
     )
-    parser.add_argument(
-        "--vectordbbench-root",
-        type=str,
-        default=None,
-        help="Path to VectorDBBench repo root (default: $VECTORDBBENCH_ROOT or sibling repo)",
-    )
     parser.add_argument(
         "--benchmark-dir",
         type=str,
         default=None,
-        help="Directory used to store benchmark artifacts "
-        "(default: config benchmark_dir, $ZVEC_BENCHMARK_DIR, or <zvec_root>/benchmark_results)",
+        help="Directory used to store benchmark artifacts",
     )
     parser.add_argument(
-        "--results-dir",
+        "--dataset-root",
         type=str,
         default=None,
-        help="Directory containing VectorDBBench JSON result files",
-    )
-    parser.add_argument(
-        "--concurrent-warmup",
-        action="store_true",
-        help="Run a concurrent-only warmup pass before the measured search benchmark",
+        help="Root directory containing the raw dataset files",
     )
-    parser.add_argument(
-        "--warmup-duration",
-        type=int,
-        default=None,
-        help="Warmup concurrency duration in seconds "
-        "(default: config warmup.duration or 15)",
+    return parser.parse_args()
+
+
+def run_hnsw(
+    *,
+    args: argparse.Namespace,
+    dataset_name: str,
+    dataset_spec: dict[str, object],
+    dataset_artifacts: dict[str, object],
+    bench_bin: Path,
+    hnsw_path: Path,
+    hnsw_db_label: str,
+    common: dict[str, object],
+    hnsw_config: dict[str, object],
+    profiling_config: dict[str, object],
+) -> BenchmarkResult:
+    print_header("HNSW Benchmark")
+
+    hnsw_specific_args = hnsw_config.get("args", {})
+    if not args.search_only:
+        print("\n[Phase 1] Building HNSW index...")
+        offline_metrics = build_index(
+            index_kind="HNSW",
+            index_path=hnsw_path,
+            dataset_spec=dataset_spec,
+            dataset_artifacts=dataset_artifacts,
+            common_args=common,
+            specific_args=hnsw_specific_args,
+            retrain_only=False,
+            dry_run=args.dry_run,
+        )
+        if not args.dry_run:
+            write_offline_summary(hnsw_path, hnsw_db_label, offline_metrics)
+
+    if args.build_only:
+        return BenchmarkResult(
+            type="HNSW",
+            path=str(hnsw_path),
+            success=True,
+            target_recall=None,
+            load_duration=get_offline_load_duration(hnsw_path),
+        )
+
+    index_files = discover_index_files(hnsw_path)
+    recall = compute_recall_with_zvec(
+        index_kind="HNSW",
+        index_path=hnsw_path,
+        dataset_artifacts=dataset_artifacts,
+        common_args=common,
+        target_recall=None,
+        dry_run=args.dry_run,
     )
-    parser.add_argument(
-        "--warmup-num-concurrency",
-        type=str,
-        default=None,
-        help="Warmup concurrency list, e.g. '4' or '4,8' "
-        "(default: config warmup.num_concurrency or the first configured concurrency)",
+
+    benchmark = run_concurrency_benchmark(
+        bench_bin=bench_bin,
+        index_files=index_files,
+        dataset_artifacts=dataset_artifacts,
+        dataset_spec=dataset_spec,
+        common_args=common,
+        target_recall=None,
+        dry_run=args.dry_run,
     )
-    return parser.parse_args()
+    online = benchmark["summary"]
+    success = online.get("retcode", 0) == 0
 
+    hnsw_profile = None
+    if success and not args.dry_run:
+        print("\n[Profiling] Running HNSW single-thread profiling pass...")
+        profile_ret, profile_output, profile_bench = run_profile_benchmark(
+            bench_bin=bench_bin,
+            index_files=index_files,
+            dataset_artifacts=dataset_artifacts,
+            dataset_spec=dataset_spec,
+            common_args=common,
+            target_recall=None,
+            dry_run=False,
+            extra_env={
+                "ZVEC_LOG_LEVEL": "INFO",
+                "ZVEC_HNSW_LOG_QUERY_STATS": "1",
+                "ZVEC_HNSW_LOG_QUERY_LIMIT": str(profiling_config.get("hnsw_query_limit", 2000)),
+            },
+        )
+        validate_profile_output("HNSW", profile_ret, profile_output, "HNSW query stats:")
+        hnsw_profile = build_hnsw_profile(
+            {"qps": online.get("qps"), "recall": recall},
+            profile_output,
+            profile_bench,
+        )
 
-def resolve_warmup_settings(
-    args: argparse.Namespace, common: dict[str, object], config: dict[str, object]
-) -> tuple[bool, int, str]:
-    warmup_config = config.get("warmup", {})
-    enabled = args.concurrent_warmup or bool(warmup_config.get("enabled", False))
-    configured_concurrency = str(common.get("num_concurrency", "1"))
-    default_num_concurrency = str(
-        warmup_config.get("num_concurrency", configured_concurrency.split(",")[0])
+    return BenchmarkResult(
+        type="HNSW",
+        path=str(hnsw_path),
+        success=success,
+        target_recall=None,
+        load_duration=get_offline_load_duration(hnsw_path),
+        qps=online.get("qps"),
+        avg_latency_ms=online.get("avg_latency_ms"),
+        p50_latency_ms=online.get("p50_latency_ms"),
+        p90_latency_ms=online.get("p90_latency_ms"),
+        p95_latency_ms=online.get("p95_latency_ms"),
+        p99_latency_ms=online.get("p99_latency_ms"),
+        recall=recall,
+        profiling=hnsw_profile,
     )
-    num_concurrency = args.warmup_num_concurrency or default_num_concurrency
-    duration = args.warmup_duration or int(warmup_config.get("duration", 15))
-    return enabled, duration, num_concurrency
 
 
-def run_concurrent_warmup(
+def run_omega(
     *,
-    label: str,
-    vectordbbench_cmd: list[str],
-    client_name: str,
-    path: Path,
-    db_label: str,
-    case_type: str,
-    common_args: dict[str, object],
-    specific_args: dict[str, object],
-    vectordbbench_root: Path,
-    dry_run: bool,
-    extra_flags: list[str] | None = None,
-) -> int:
-    print(f"\n[Warmup] Running concurrent-only warmup for {label}...")
-    warmup_flags = ["skip-drop-old", "skip-load", "skip-search-serial"]
-    if extra_flags:
-        warmup_flags.extend(extra_flags)
-    warmup_cmd = build_base_command(
-        vectordbbench_cmd,
-        client_name,
-        path,
-        db_label,
-        case_type,
-        common_args,
-        specific_args,
-        warmup_flags,
-    )
-    return run_command(warmup_cmd, vectordbbench_root, dry_run=dry_run)
+    args: argparse.Namespace,
+    dataset_spec: dict[str, object],
+    dataset_artifacts: dict[str, object],
+    bench_bin: Path,
+    omega_path: Path,
+    omega_db_label: str,
+    common: dict[str, object],
+    omega_config: dict[str, object],
+    profiling_config: dict[str, object],
+    hnsw_profile: dict[str, object] | None,
+    target_recalls: list[float],
+) -> list[BenchmarkResult]:
+    print_header("OMEGA Benchmark")
+
+    omega_specific_args = omega_config.get("args", {})
+    if not args.search_only:
+        if args.retrain_only:
+            print("\n[Phase 1] Retraining OMEGA model only (reusing existing index)...")
+        else:
+            print("\n[Phase 1] Building OMEGA index + training model...")
+        offline_metrics = build_index(
+            index_kind="OMEGA",
+            index_path=omega_path,
+            dataset_spec=dataset_spec,
+            dataset_artifacts=dataset_artifacts,
+            common_args=common,
+            specific_args=omega_specific_args,
+            retrain_only=args.retrain_only,
+            dry_run=args.dry_run,
+        )
+        if not args.dry_run:
+            write_offline_summary(
+                omega_path,
+                omega_db_label,
+                offline_metrics,
+                retrain_only=args.retrain_only,
+            )
+
+    if args.build_only:
+        return [
+            BenchmarkResult(
+                type="OMEGA",
+                path=str(omega_path),
+                success=True,
+                target_recall=target_recall,
+                load_duration=get_offline_load_duration(omega_path),
+            )
+            for target_recall in target_recalls
+        ]
+
+    results: list[BenchmarkResult] = []
+    index_files = discover_index_files(omega_path)
+    omega_common = dict(common)
+    omega_common["k"] = int(common["k"])
+
+    for target_recall in target_recalls:
+        print_header(f"OMEGA Search Benchmark (target_recall={target_recall})")
+        recall = compute_recall_with_zvec(
+            index_kind="OMEGA",
+            index_path=omega_path,
+            dataset_artifacts=dataset_artifacts,
+            common_args=common,
+            target_recall=target_recall,
+            dry_run=args.dry_run,
+        )
+        benchmark = run_concurrency_benchmark(
+            bench_bin=bench_bin,
+            index_files=index_files,
+            dataset_artifacts=dataset_artifacts,
+            dataset_spec=dataset_spec,
+            common_args=omega_common,
+            target_recall=target_recall,
+            dry_run=args.dry_run,
+        )
+        online = benchmark["summary"]
+        success = online.get("retcode", 0) == 0
+
+        omega_profile = None
+        if success and not args.dry_run:
+            print("\n[Profiling] Running OMEGA single-thread profiling pass...")
+            profile_env = {
+                "ZVEC_LOG_LEVEL": "INFO",
+                "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
+                "ZVEC_OMEGA_LOG_QUERY_LIMIT": str(profiling_config.get("omega_query_limit", 2000)),
+            }
+            profile_ret, profile_output, profile_bench = run_profile_benchmark(
+                bench_bin=bench_bin,
+                index_files=index_files,
+                dataset_artifacts=dataset_artifacts,
+                dataset_spec=dataset_spec,
+                common_args=omega_common,
+                target_recall=target_recall,
+                dry_run=False,
+                extra_env=profile_env,
+            )
+            validate_profile_output("OMEGA", profile_ret, profile_output, "OMEGA query stats:")
+            omega_profile = build_omega_profile(
+                {"qps": online.get("qps"), "recall": recall},
+                profile_output,
+                profile_bench,
+                hnsw_profile,
+            )
+            if profiling_config.get("omega_profile_control_timing", True):
+                print("\n[Profiling] Running OMEGA control-timing pass...")
+                detailed_ret, detailed_output, detailed_bench = run_profile_benchmark(
+                    bench_bin=bench_bin,
+                    index_files=index_files,
+                    dataset_artifacts=dataset_artifacts,
+                    dataset_spec=dataset_spec,
+                    common_args=omega_common,
+                    target_recall=target_recall,
+                    dry_run=False,
+                    extra_env={**profile_env, "ZVEC_OMEGA_PROFILE_CONTROL_TIMING": "1"},
+                )
+                validate_profile_output(
+                    "OMEGA", detailed_ret, detailed_output, "OMEGA query stats:"
+                )
+                detailed_profile = build_omega_profile(
+                    {"qps": online.get("qps"), "recall": recall},
+                    detailed_output,
+                    detailed_bench,
+                    hnsw_profile,
+                )
+                omega_profile = merge_omega_detailed_profile(omega_profile, detailed_profile)
+
+        results.append(
+            BenchmarkResult(
+                type="OMEGA",
+                path=str(omega_path),
+                success=success,
+                target_recall=target_recall,
+                load_duration=get_offline_load_duration(omega_path),
+                qps=online.get("qps"),
+                avg_latency_ms=online.get("avg_latency_ms"),
+                p50_latency_ms=online.get("p50_latency_ms"),
+                p90_latency_ms=online.get("p90_latency_ms"),
+                p95_latency_ms=online.get("p95_latency_ms"),
+                p99_latency_ms=online.get("p99_latency_ms"),
+                recall=recall,
+                profiling=omega_profile,
+            )
+        )
+
+    return results
 
 
 def main() -> int:
     args = parse_args()
     config_path = Path(args.config).expanduser().resolve()
     config = load_dataset_config(config_path, args.dataset)
-    zvec_root, vectordbbench_root, benchmark_dir, results_dir = resolve_paths(
+    zvec_root, benchmark_dir = resolve_paths(
         Path(__file__).resolve(),
         config,
         args.zvec_root,
-        args.vectordbbench_root,
         args.benchmark_dir,
-        args.results_dir,
     )
-    vectordbbench_cmd = resolve_vectordbbench_command()
+    dataset_spec = resolve_dataset_spec(args.dataset, config, args.dataset_root)
+    dataset_artifacts = prepare_dataset_artifacts(
+        args.dataset, dataset_spec, benchmark_dir, dry_run=args.dry_run
+    )
+    bench_bin, recall_bin = resolve_core_tools(zvec_root)
     benchmark_dir.mkdir(parents=True, exist_ok=True)
 
     dataset_name = args.dataset
@@ -166,11 +350,7 @@ def main() -> int:
     hnsw_config = must_get(config, "hnsw")
     omega_config = must_get(config, "omega")
     profiling_config = config.get("profiling", {})
-    warmup_enabled, warmup_duration, warmup_num_concurrency = resolve_warmup_settings(
-        args, common, config
-    )
 
-    case_type = must_get(common, "case_type")
     hnsw_path = resolve_index_path(benchmark_dir, must_get(hnsw_config, "path"))
     omega_path = resolve_index_path(benchmark_dir, must_get(omega_config, "path"))
     hnsw_db_label = must_get(hnsw_config, "db_label")
@@ -181,294 +361,62 @@ def main() -> int:
     if not target_recalls:
         raise ValueError("omega.target_recalls must be a non-empty list")
 
-    hnsw_common_args = {k: v for k, v in common.items() if k != "case_type"}
-    hnsw_specific_args = hnsw_config.get("args", {})
-    omega_specific_args = omega_config.get("args", {})
-    warmup_common_args = dict(hnsw_common_args)
-    warmup_common_args["num_concurrency"] = warmup_num_concurrency
-    warmup_common_args["concurrency_duration"] = warmup_duration
-
     print("=" * 70)
-    print(f"VectorDBBench: Zvec HNSW vs OMEGA ({dataset_name})")
+    print(f"Zvec HNSW vs OMEGA ({dataset_name})")
     print(f"Config: {config_path}")
     print("=" * 70)
     print(f"zvec_root: {zvec_root}")
-    print(f"vectordbbench_root: {vectordbbench_root}")
-    print(f"vectordbbench_cmd: {' '.join(vectordbbench_cmd)}")
     print(f"benchmark_dir: {benchmark_dir}")
-    print(f"results_dir: {results_dir}")
+    print(f"dataset_dir: {dataset_spec['dataset_dir']}")
+    print(f"bench_bin: {bench_bin}")
+    print(f"recall_bin: {recall_bin}")
     print(f"hnsw_path: {hnsw_path}")
     print(f"omega_path: {omega_path}")
     print(f"target_recalls: {target_recalls}")
-    print(
-        "concurrent_warmup: "
-        + (
-            f"enabled (num_concurrency={warmup_num_concurrency}, duration={warmup_duration}s)"
-            if warmup_enabled
-            else "disabled"
-        )
-    )
-    print(
-        "build_mode: "
-        + ("retrain model only (reuse existing index)" if args.retrain_only else "build index + train model")
-    )
     print("=" * 70)
 
     results: list[BenchmarkResult] = []
+    hnsw_profile = None
 
     if not args.skip_hnsw:
-        print_header("HNSW Benchmark")
-
-        if not args.search_only:
-            print("\n[Phase 1] Building HNSW index...")
-            before_files = snapshot_result_files(results_dir)
-            cmd = build_base_command(
-                vectordbbench_cmd,
-                "zvec",
-                hnsw_path,
-                hnsw_db_label,
-                case_type,
-                hnsw_common_args,
-                hnsw_specific_args,
-                ["skip-search-serial", "skip-search-concurrent"],
-            )
-            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-            if ret != 0 and not args.dry_run:
-                print("ERROR: HNSW build failed!")
-                return 1
-            if not args.dry_run:
-                write_offline_summary(
-                    hnsw_path,
-                    hnsw_db_label,
-                    get_run_result(hnsw_db_label, before_files, results_dir),
-                )
-
-        if not args.build_only:
-            if warmup_enabled:
-                warmup_ret = run_concurrent_warmup(
-                    label="HNSW",
-                    vectordbbench_cmd=vectordbbench_cmd,
-                    client_name="zvec",
-                    path=hnsw_path,
-                    db_label=hnsw_db_label,
-                    case_type=case_type,
-                    common_args=warmup_common_args,
-                    specific_args=hnsw_specific_args,
-                    vectordbbench_root=vectordbbench_root,
-                    dry_run=args.dry_run,
-                )
-                if warmup_ret != 0 and not args.dry_run:
-                    print("ERROR: HNSW concurrent warmup failed!")
-                    return 1
-            print("\n[Phase 2] Running HNSW search benchmark...")
-            before_files = snapshot_result_files(results_dir)
-            cmd = build_base_command(
-                vectordbbench_cmd,
-                "zvec",
-                hnsw_path,
-                hnsw_db_label,
-                case_type,
-                hnsw_common_args,
-                hnsw_specific_args,
-                ["skip-drop-old", "skip-load"],
-            )
-            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-            metrics = get_run_result(hnsw_db_label, before_files, results_dir) if not args.dry_run else {}
-            load_duration = get_offline_load_duration(hnsw_path)
-            hnsw_profile = None
-            if ret == 0 and not args.dry_run:
-                print("\n[Profiling] Running HNSW serial-only profiling pass...")
-                profile_cmd = build_base_command(
-                    vectordbbench_cmd,
-                    "zvec",
-                    hnsw_path,
-                    hnsw_db_label,
-                    case_type,
-                    hnsw_common_args,
-                    hnsw_specific_args,
-                    ["skip-drop-old", "skip-load", "skip-search-concurrent"],
-                )
-                profile_ret, profile_output = run_command_capture(
-                    profile_cmd,
-                    vectordbbench_root,
-                    dry_run=False,
-                    extra_env={
-                        "ZVEC_LOG_LEVEL": "INFO",
-                        "ZVEC_HNSW_LOG_QUERY_STATS": "1",
-                        "ZVEC_HNSW_LOG_QUERY_LIMIT": str(profiling_config.get("hnsw_query_limit", 2000)),
-                    },
-                )
-                validate_profile_output("HNSW", profile_ret, profile_output, "HNSW query stats:")
-                hnsw_profile = build_hnsw_profile(metrics, profile_output)
-            latency_summary = latency_summary_from_profile(hnsw_profile)
-            results.append(
-                BenchmarkResult(
-                    type="HNSW",
-                    path=str(hnsw_path),
-                    success=ret == 0,
-                    target_recall=None,
-                    load_duration=load_duration if load_duration is not None else metrics.get("load_duration"),
-                    qps=metrics.get("qps"),
-                    avg_latency_ms=latency_summary["avg_latency_ms"],
-                    p50_latency_ms=latency_summary["p50_latency_ms"],
-                    p90_latency_ms=latency_summary["p90_latency_ms"],
-                    p95_latency_ms=latency_summary["p95_latency_ms"],
-                    p99_latency_ms=latency_summary["p99_latency_ms"],
-                    recall=metrics.get("recall"),
-                    profiling=hnsw_profile,
-                )
-            )
+        hnsw_result = run_hnsw(
+            args=args,
+            dataset_name=dataset_name,
+            dataset_spec=dataset_spec,
+            dataset_artifacts=dataset_artifacts,
+            bench_bin=bench_bin,
+            hnsw_path=hnsw_path,
+            hnsw_db_label=hnsw_db_label,
+            common=common,
+            hnsw_config=hnsw_config,
+            profiling_config=profiling_config,
+        )
+        results.append(hnsw_result)
+        hnsw_profile = hnsw_result.profiling
 
     if not args.skip_omega:
-        build_target_recall = target_recalls[0]
-        print_header("OMEGA Benchmark")
-
-        if not args.search_only:
-            if args.retrain_only:
-                print("\n[Phase 1] Retraining OMEGA model only (reusing existing index)...")
-            else:
-                print("\n[Phase 1] Building OMEGA index + training model...")
-            print(
-                f"Build-time target_recall is ignored by training; using first requested value "
-                f"for CLI compatibility: {build_target_recall}"
+        results.extend(
+            run_omega(
+                args=args,
+                dataset_spec=dataset_spec,
+                dataset_artifacts=dataset_artifacts,
+                bench_bin=bench_bin,
+                omega_path=omega_path,
+                omega_db_label=omega_db_label,
+                common=common,
+                omega_config=omega_config,
+                profiling_config=profiling_config,
+                hnsw_profile=hnsw_profile,
+                target_recalls=target_recalls,
             )
-            before_files = snapshot_result_files(results_dir)
-            build_flags = ["skip-search-serial", "skip-search-concurrent"]
-            if args.retrain_only:
-                build_flags.extend(["skip-drop-old", "skip-load", "retrain-only"])
-            cmd = build_base_command(
-                vectordbbench_cmd,
-                "zvecomega",
-                omega_path,
-                omega_db_label,
-                case_type,
-                hnsw_common_args,
-                {**omega_specific_args, "target_recall": build_target_recall},
-                build_flags,
-            )
-            ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-            if ret != 0 and not args.dry_run:
-                print("ERROR: OMEGA build failed!")
-                return 1
-            if not args.dry_run:
-                write_offline_summary(
-                    omega_path,
-                    omega_db_label,
-                    get_run_result(omega_db_label, before_files, results_dir),
-                    retrain_only=args.retrain_only,
-                )
-
-        if not args.build_only:
-            for target_recall in target_recalls:
-                print_header(f"OMEGA Search Benchmark (target_recall={target_recall})")
-                if warmup_enabled:
-                    warmup_extra_flags = ["retrain-only"] if args.retrain_only else None
-                    warmup_ret = run_concurrent_warmup(
-                        label=f"OMEGA target_recall={target_recall}",
-                        vectordbbench_cmd=vectordbbench_cmd,
-                        client_name="zvecomega",
-                        path=omega_path,
-                        db_label=omega_db_label,
-                        case_type=case_type,
-                        common_args=warmup_common_args,
-                        specific_args={**omega_specific_args, "target_recall": target_recall},
-                        vectordbbench_root=vectordbbench_root,
-                        dry_run=args.dry_run,
-                        extra_flags=warmup_extra_flags,
-                    )
-                    if warmup_ret != 0 and not args.dry_run:
-                        print("ERROR: OMEGA concurrent warmup failed!")
-                        return 1
-                before_files = snapshot_result_files(results_dir)
-                search_flags = ["skip-drop-old", "skip-load"]
-                if args.retrain_only:
-                    search_flags.append("retrain-only")
-                cmd = build_base_command(
-                    vectordbbench_cmd,
-                    "zvecomega",
-                    omega_path,
-                    omega_db_label,
-                    case_type,
-                    hnsw_common_args,
-                    {**omega_specific_args, "target_recall": target_recall},
-                    search_flags,
-                )
-                ret = run_command(cmd, vectordbbench_root, dry_run=args.dry_run)
-                metrics = get_run_result(omega_db_label, before_files, results_dir) if not args.dry_run else {}
-                load_duration = get_offline_load_duration(omega_path)
-                omega_profile = None
-                if ret == 0 and not args.dry_run:
-                    print("\n[Profiling] Running OMEGA serial-only latency pass...")
-                    profile_flags = ["skip-drop-old", "skip-load", "skip-search-concurrent"]
-                    if args.retrain_only:
-                        profile_flags.append("retrain-only")
-                    profile_cmd = build_base_command(
-                        vectordbbench_cmd,
-                        "zvecomega",
-                        omega_path,
-                        omega_db_label,
-                        case_type,
-                        hnsw_common_args,
-                        {**omega_specific_args, "target_recall": target_recall},
-                        profile_flags,
-                    )
-                    profile_env = {
-                        "ZVEC_LOG_LEVEL": "INFO",
-                        "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
-                        "ZVEC_OMEGA_LOG_QUERY_LIMIT": str(profiling_config.get("omega_query_limit", 2000)),
-                    }
-                    profile_ret, profile_output = run_command_capture(
-                        profile_cmd,
-                        vectordbbench_root,
-                        dry_run=False,
-                        extra_env=profile_env,
-                    )
-                    validate_profile_output("OMEGA", profile_ret, profile_output, "OMEGA query stats:")
-                    baseline_profile = next(
-                        (result.profiling for result in results if result.type == "HNSW" and result.profiling),
-                        None,
-                    )
-                    omega_profile = build_omega_profile(metrics, profile_output, baseline_profile)
-                    if profiling_config.get("omega_profile_control_timing", True):
-                        print("\n[Profiling] Running OMEGA detailed control-timing pass...")
-                        detailed_env = dict(profile_env)
-                        detailed_env["ZVEC_OMEGA_PROFILE_CONTROL_TIMING"] = "1"
-                        detailed_ret, detailed_output = run_command_capture(
-                            profile_cmd,
-                            vectordbbench_root,
-                            dry_run=False,
-                            extra_env=detailed_env,
-                        )
-                        validate_profile_output(
-                            "OMEGA", detailed_ret, detailed_output, "OMEGA query stats:"
-                        )
-                        detailed_profile = build_omega_profile(
-                            metrics, detailed_output, baseline_profile
-                        )
-                        omega_profile = merge_omega_detailed_profile(
-                            omega_profile, detailed_profile
-                        )
-                latency_summary = latency_summary_from_profile(omega_profile)
-                results.append(
-                    BenchmarkResult(
-                        type="OMEGA",
-                        path=str(omega_path),
-                        success=ret == 0,
-                        target_recall=target_recall,
-                        load_duration=load_duration if load_duration is not None else metrics.get("load_duration"),
-                        qps=metrics.get("qps"),
-                        avg_latency_ms=latency_summary["avg_latency_ms"],
-                        p50_latency_ms=latency_summary["p50_latency_ms"],
-                        p90_latency_ms=latency_summary["p90_latency_ms"],
-                        p95_latency_ms=latency_summary["p95_latency_ms"],
-                        p99_latency_ms=latency_summary["p99_latency_ms"],
-                        recall=metrics.get("recall"),
-                        profiling=omega_profile,
-                    )
-                )
+        )
 
     if results:
-        written_summary_paths = write_grouped_profiling_summaries(dataset_name, results)
+        written_summary_paths = (
+            write_grouped_profiling_summaries(dataset_name, results)
+            if not args.dry_run
+            else []
+        )
         print("\n\n" + "=" * 70)
         print("Benchmark Summary")
         print("=" * 70)
@@ -481,11 +429,15 @@ def main() -> int:
         for result in results:
             tr = f"{result.target_recall:.2f}" if result.target_recall is not None else "N/A"
             status = "OK" if result.success else "FAILED"
-            ld = f"{result.load_duration:.1f}" if result.load_duration else "N/A"
-            qps = f"{result.qps:.1f}" if result.qps else "N/A"
-            avg_latency = f"{result.avg_latency_ms:.3f}" if result.avg_latency_ms is not None else "N/A"
-            p95_latency = f"{result.p95_latency_ms:.3f}" if result.p95_latency_ms is not None else "N/A"
-            recall = f"{result.recall:.4f}" if result.recall else "N/A"
+            ld = f"{result.load_duration:.1f}" if result.load_duration is not None else "N/A"
+            qps = f"{result.qps:.1f}" if result.qps is not None else "N/A"
+            avg_latency = (
+                f"{result.avg_latency_ms:.3f}" if result.avg_latency_ms is not None else "N/A"
+            )
+            p95_latency = (
+                f"{result.p95_latency_ms:.3f}" if result.p95_latency_ms is not None else "N/A"
+            )
+            recall = f"{result.recall:.4f}" if result.recall is not None else "N/A"
             print(
                 f"{result.type:<10} {tr:<15} {ld:<12} {qps:<8} "
                 f"{avg_latency:<16} {p95_latency:<16} {recall:<10} {status:<10}"
@@ -493,7 +445,11 @@ def main() -> int:
 
         print("\nProfiling Summary")
         print("-" * 75)
-        print(f"{'Type':<10} {'target_recall':<15} {'avg_lat(ms)':<12} {'avg_cmps':<12} {'avg_pred_calls':<16} {'avg_model_ms':<14} {'saved_cmps':<12}")
+        print(
+            f"{'Type':<10} {'target_recall':<15} {'avg_lat(ms)':<12} "
+            f"{'avg_cmps':<12} {'avg_pred_calls':<16} {'avg_model_ms':<14} "
+            f"{'saved_cmps':<12}"
+        )
         for result in results:
             profile = result.profiling or {}
             tr = f"{result.target_recall:.2f}" if result.target_recall is not None else "N/A"
@@ -511,15 +467,10 @@ def main() -> int:
                 f"{(f'{avg_model_ms:.3f}' if avg_model_ms is not None else 'N/A'):<14} "
                 f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
             )
+
         print()
         for path in written_summary_paths:
-            print(f"Profiling JSON: {path}")
-
-    print("\nTo view results:")
-    print("  vectordbbench results")
-    print("\nOr start the web UI:")
-    print("  vectordbbench start")
-    print()
+            print(f"Summary JSON: {path}")
 
     return 0 if all(result.success for result in results) else 1
 
diff --git a/scripts/benchmark_lib.py b/scripts/benchmark_lib.py
index b5beb7de0..6388c8ce2 100644
--- a/scripts/benchmark_lib.py
+++ b/scripts/benchmark_lib.py
@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
 
-import importlib
+from __future__ import annotations
+
 import json
 import os
 import re
+import shutil
 import subprocess
-import sys
-import tempfile
+import time
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
@@ -31,6 +32,27 @@ class BenchmarkResult:
 
 
 KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")
+AVG_LINE_PATTERN = re.compile(r"Avg latency:\s*([0-9.]+)ms qps:\s*([0-9.]+)")
+PERCENTILE_PATTERN = re.compile(r"(\d+)\s+Percentile:\s*([0-9.]+)\s+ms")
+PROCESS_LINE_PATTERN = re.compile(
+    r"Process query:\s*(\d+), total process time:\s*(\d+)ms, duration:\s*(\d+)ms"
+)
+RECALL_PATTERN = re.compile(r"Recall@(\d+):\s*([0-9.]+)")
+
+_ZVEC_INITIALIZED = False
+
+DATASET_SPECS: dict[str, dict[str, Any]] = {
+    "cohere_1m": {
+        "dataset_dirname": "cohere/cohere_medium_1m",
+        "dimension": 768,
+        "metric_type": "COSINE",
+    },
+    "cohere_10m": {
+        "dataset_dirname": "cohere/cohere_large_10m",
+        "dimension": 768,
+        "metric_type": "COSINE",
+    },
+}
 
 
 def load_json(path: Path) -> dict[str, Any]:
@@ -51,22 +73,25 @@ def load_dataset_config(path: Path, dataset_name: str) -> dict[str, Any]:
     return dataset_config
 
 
+def must_get(config: dict[str, Any], key: str) -> Any:
+    if key not in config:
+        raise KeyError(f"Missing required config key: {key}")
+    return config[key]
+
+
+def print_header(title: str) -> None:
+    print("\n" + "=" * 70)
+    print(title)
+    print("=" * 70)
+
+
 def resolve_paths(
     script_path: Path,
     config: dict[str, Any],
     zvec_root_arg: str | None,
-    vectordbbench_root_arg: str | None,
     benchmark_dir_arg: str | None,
-    results_dir_arg: str | None,
-) -> tuple[Path, Path, Path, Path]:
+) -> tuple[Path, Path]:
     zvec_root = Path(zvec_root_arg).resolve() if zvec_root_arg else script_path.parent.parent
-    vectordbbench_root = (
-        Path(vectordbbench_root_arg).resolve()
-        if vectordbbench_root_arg
-        else Path(
-            os.environ.get("VECTORDBBENCH_ROOT", zvec_root.parent / "VectorDBBench")
-        ).resolve()
-    )
 
     config_benchmark_dir = config.get("benchmark_dir")
     if benchmark_dir_arg:
@@ -74,29 +99,14 @@ def resolve_paths(
     elif config_benchmark_dir:
         benchmark_dir = Path(config_benchmark_dir).expanduser().resolve()
     else:
-        benchmark_dir = Path(
-            os.environ.get("ZVEC_BENCHMARK_DIR", zvec_root / "benchmark_results")
-        ).resolve()
-
-    source_results_dir = vectordbbench_root / "vectordb_bench" / "results" / "Zvec"
-    if results_dir_arg:
-        results_dir = Path(results_dir_arg).resolve()
-    elif config.get("results_dir"):
-        results_dir = Path(config["results_dir"]).expanduser().resolve()
-    elif source_results_dir.exists():
-        results_dir = source_results_dir
-    else:
-        try:
-            bench_config = importlib.import_module("vectordb_bench").config
-            results_dir = Path(bench_config.RESULTS_LOCAL_DIR).resolve() / "Zvec"
-        except Exception:
-            results_dir = source_results_dir
+        benchmark_dir = (zvec_root / "benchmark_results").resolve()
 
-    return zvec_root, vectordbbench_root, benchmark_dir, results_dir
+    return zvec_root, benchmark_dir
 
 
-def resolve_vectordbbench_command() -> list[str]:
-    return [sys.executable, "-m", "vectordb_bench.cli.vectordbbench"]
+def resolve_index_path(benchmark_dir: Path, configured_path: str) -> Path:
+    path = Path(configured_path).expanduser()
+    return path.resolve() if path.is_absolute() else (benchmark_dir / path).resolve()
 
 
 def parse_scalar(value: str) -> Any:
@@ -122,15 +132,12 @@ def avg_metric(records: list[dict[str, Any]], key: str) -> float | None:
     return sum(values) / len(values)
 
 
-def percentile_metric(
-    records: list[dict[str, Any]], key: str, percentile: float
-) -> float | None:
+def percentile_metric(records: list[dict[str, Any]], key: str, percentile: float) -> float | None:
     values = sorted(float(record[key]) for record in records if key in record)
     if not values:
         return None
     if len(values) == 1:
         return values[0]
-
     rank = (len(values) - 1) * percentile / 100.0
     lower = int(rank)
     upper = min(lower + 1, len(values) - 1)
@@ -140,69 +147,90 @@ def percentile_metric(
     return values[lower] * (1.0 - weight) + values[upper] * weight
 
 
-def parse_serial_runner_summary(output: str) -> dict[str, Any]:
-    summary = {}
-    for line in output.splitlines():
-        if "search entire test_data:" not in line:
-            continue
-        summary = parse_key_values(line)
-    return summary
-
-
 def parse_query_records(output: str, prefix: str) -> list[dict[str, Any]]:
     records = []
     for line in output.splitlines():
-        if prefix not in line:
-            continue
-        records.append(parse_key_values(line))
+        if prefix in line:
+            records.append(parse_key_values(line))
     return records
 
 
-def build_hnsw_profile(metrics: dict[str, Any], output: str) -> dict[str, Any]:
+def parse_bench_output(output: str) -> dict[str, Any]:
+    metrics: dict[str, Any] = {}
+    for line in output.splitlines():
+        if (match := PROCESS_LINE_PATTERN.search(line)) is not None:
+            metrics["process_query_count"] = int(match.group(1))
+            metrics["total_process_time_ms"] = int(match.group(2))
+            metrics["duration_ms"] = int(match.group(3))
+        elif (match := AVG_LINE_PATTERN.search(line)) is not None:
+            metrics["avg_latency_ms"] = float(match.group(1))
+            metrics["qps"] = float(match.group(2))
+        elif (match := PERCENTILE_PATTERN.search(line)) is not None:
+            metrics[f"p{match.group(1)}_latency_ms"] = float(match.group(2))
+    return metrics
+
+
+def parse_recall_output(output: str, topk: int) -> float | None:
+    recall_by_k: dict[int, float] = {}
+    for line in output.splitlines():
+        match = RECALL_PATTERN.search(line)
+        if match is not None:
+            recall_by_k[int(match.group(1))] = float(match.group(2))
+    if topk in recall_by_k:
+        return recall_by_k[topk]
+    if recall_by_k:
+        return recall_by_k[max(recall_by_k)]
+    return None
+
+
+def build_hnsw_profile(
+    metrics: dict[str, Any], output: str, bench_summary: dict[str, Any]
+) -> dict[str, Any]:
     query_records = parse_query_records(output, "HNSW query stats:")
-    serial_summary = parse_serial_runner_summary(output)
-    avg_latency_ms = avg_metric(query_records, "latency_ms")
-    p50_latency_ms = percentile_metric(query_records, "latency_ms", 50)
-    p90_latency_ms = percentile_metric(query_records, "latency_ms", 90)
-    p95_latency_ms = percentile_metric(query_records, "latency_ms", 95)
-    p99_latency_ms = percentile_metric(query_records, "latency_ms", 99)
     return {
         "benchmark_recall": metrics.get("recall"),
         "benchmark_qps": metrics.get("qps"),
         "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": avg_latency_ms,
-        "profile_p50_end2end_latency_ms": p50_latency_ms,
-        "profile_p90_end2end_latency_ms": p90_latency_ms,
-        "profile_p95_end2end_latency_ms": p95_latency_ms,
-        "profile_p99_end2end_latency_ms": p99_latency_ms,
+        "profile_avg_end2end_latency_ms": bench_summary.get("avg_latency_ms"),
+        "profile_p50_end2end_latency_ms": bench_summary.get("p50_latency_ms"),
+        "profile_p90_end2end_latency_ms": bench_summary.get("p90_latency_ms"),
+        "profile_p95_end2end_latency_ms": bench_summary.get("p95_latency_ms"),
+        "profile_p99_end2end_latency_ms": bench_summary.get("p99_latency_ms"),
         "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
         "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
         "profile_avg_pure_search_ms": avg_metric(query_records, "pure_search_ms"),
-        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "profile_serial_p99_s": serial_summary.get("p99"),
-        "profile_serial_p95_s": serial_summary.get("p95"),
-        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
+        "profile_serial_avg_latency_s": (
+            bench_summary["avg_latency_ms"] / 1000.0
+            if bench_summary.get("avg_latency_ms") is not None
+            else None
+        ),
+        "profile_serial_p99_s": (
+            bench_summary["p99_latency_ms"] / 1000.0
+            if bench_summary.get("p99_latency_ms") is not None
+            else None
+        ),
+        "profile_serial_p95_s": (
+            bench_summary["p95_latency_ms"] / 1000.0
+            if bench_summary.get("p95_latency_ms") is not None
+            else None
+        ),
+        "profile_serial_avg_recall": metrics.get("recall"),
     }
 
 
 def build_omega_profile(
-    metrics: dict[str, Any], output: str, hnsw_profile: dict[str, Any] | None
+    metrics: dict[str, Any],
+    output: str,
+    bench_summary: dict[str, Any],
+    hnsw_profile: dict[str, Any] | None,
 ) -> dict[str, Any]:
     query_records = parse_query_records(output, "OMEGA query stats:")
-    serial_summary = parse_serial_runner_summary(output)
-    avg_latency_ms = avg_metric(query_records, "total_ms")
-    p50_latency_ms = percentile_metric(query_records, "total_ms", 50)
-    p90_latency_ms = percentile_metric(query_records, "total_ms", 90)
-    p95_latency_ms = percentile_metric(query_records, "total_ms", 95)
-    p99_latency_ms = percentile_metric(query_records, "total_ms", 99)
 
     avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
     avg_core_search_ms = avg_metric(query_records, "core_search_ms")
     avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
     avg_hook_total_ms = avg_metric(query_records, "hook_total_ms")
-    avg_search_only_ms = (
-        avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
-    )
+    avg_search_only_ms = avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
 
     cmp_time_ms = None
     if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_search_only_ms is not None:
@@ -224,11 +252,11 @@ def build_omega_profile(
         "benchmark_recall": metrics.get("recall"),
         "benchmark_qps": metrics.get("qps"),
         "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": avg_latency_ms,
-        "profile_p50_end2end_latency_ms": p50_latency_ms,
-        "profile_p90_end2end_latency_ms": p90_latency_ms,
-        "profile_p95_end2end_latency_ms": p95_latency_ms,
-        "profile_p99_end2end_latency_ms": p99_latency_ms,
+        "profile_avg_end2end_latency_ms": bench_summary.get("avg_latency_ms"),
+        "profile_p50_end2end_latency_ms": bench_summary.get("p50_latency_ms"),
+        "profile_p90_end2end_latency_ms": bench_summary.get("p90_latency_ms"),
+        "profile_p95_end2end_latency_ms": bench_summary.get("p95_latency_ms"),
+        "profile_p99_end2end_latency_ms": bench_summary.get("p99_latency_ms"),
         "profile_avg_cmps": avg_pairwise_dist_cnt,
         "profile_avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
         "profile_avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
@@ -244,27 +272,58 @@ def build_omega_profile(
         "profile_avg_hook_total_ms": avg_hook_total_ms,
         "profile_avg_hook_body_ms": avg_metric(query_records, "hook_body_ms"),
         "profile_avg_hook_dispatch_ms": avg_metric(query_records, "hook_dispatch_ms"),
-        "profile_avg_report_visit_candidate_ms": avg_metric(
-            query_records, "report_visit_candidate_ms"
-        ),
+        "profile_avg_report_visit_candidate_ms": avg_metric(query_records, "report_visit_candidate_ms"),
         "profile_avg_should_predict_ms": avg_metric(query_records, "should_predict_ms"),
         "profile_avg_report_hop_ms": avg_metric(query_records, "report_hop_ms"),
-        "profile_avg_update_top_candidates_ms": avg_metric(
-            query_records, "update_top_candidates_ms"
-        ),
-        "profile_avg_push_traversal_window_ms": avg_metric(
-            query_records, "push_traversal_window_ms"
-        ),
+        "profile_avg_update_top_candidates_ms": avg_metric(query_records, "update_top_candidates_ms"),
+        "profile_avg_push_traversal_window_ms": avg_metric(query_records, "push_traversal_window_ms"),
         "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
         "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
         "profile_avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
-        "profile_serial_avg_latency_s": serial_summary.get("avg_latency"),
-        "profile_serial_p99_s": serial_summary.get("p99"),
-        "profile_serial_p95_s": serial_summary.get("p95"),
-        "profile_serial_avg_recall": serial_summary.get("avg_recall"),
+        "profile_serial_avg_latency_s": (
+            bench_summary["avg_latency_ms"] / 1000.0
+            if bench_summary.get("avg_latency_ms") is not None
+            else None
+        ),
+        "profile_serial_p99_s": (
+            bench_summary["p99_latency_ms"] / 1000.0
+            if bench_summary.get("p99_latency_ms") is not None
+            else None
+        ),
+        "profile_serial_p95_s": (
+            bench_summary["p95_latency_ms"] / 1000.0
+            if bench_summary.get("p95_latency_ms") is not None
+            else None
+        ),
+        "profile_serial_avg_recall": metrics.get("recall"),
     }
 
 
+def merge_omega_detailed_profile(
+    summary_profile: dict[str, Any], detailed_profile: dict[str, Any]
+) -> dict[str, Any]:
+    merged = dict(summary_profile)
+    detailed_keys = [
+        "profile_avg_model_overhead_ms",
+        "profile_avg_should_stop_ms",
+        "profile_avg_prediction_eval_ms",
+        "profile_avg_core_search_ms",
+        "profile_avg_pure_search_ms",
+        "profile_avg_hook_total_ms",
+        "profile_avg_hook_body_ms",
+        "profile_avg_hook_dispatch_ms",
+        "profile_avg_report_visit_candidate_ms",
+        "profile_avg_should_predict_ms",
+        "profile_avg_report_hop_ms",
+        "profile_avg_update_top_candidates_ms",
+        "profile_avg_push_traversal_window_ms",
+        "profile_avg_model_overhead_cmp_equiv",
+    ]
+    for key in detailed_keys:
+        merged[key] = detailed_profile.get(key)
+    return merged
+
+
 def profiling_output_path(index_path: Path) -> Path:
     return index_path / "online_benchmark_summary.json"
 
@@ -274,9 +333,7 @@ def write_profiling_summary(index_path: Path, payload: dict[str, Any]) -> None:
         json.dump(payload, f, indent=2, sort_keys=True)
 
 
-def write_grouped_profiling_summaries(
-    dataset: str, results: list[BenchmarkResult]
-) -> list[Path]:
+def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResult]) -> list[Path]:
     written_paths: list[Path] = []
     grouped: dict[str, list[BenchmarkResult]] = {}
     for result in results:
@@ -313,121 +370,6 @@ def write_grouped_profiling_summaries(
     return written_paths
 
 
-def get_latest_result(db_label: str, results_dir: Path) -> dict[str, Any]:
-    if not results_dir.exists():
-        return {}
-
-    result_files = sorted(
-        results_dir.glob("result_*.json"),
-        key=lambda f: f.stat().st_mtime,
-        reverse=True,
-    )
-    for result_file in result_files:
-        try:
-            with open(result_file) as f:
-                data = json.load(f)
-            for result in data.get("results", []):
-                task_config = result.get("task_config", {})
-                db_config = task_config.get("db_config", {})
-                if db_config.get("db_label") == db_label:
-                    metrics = result.get("metrics", {})
-                    return {
-                        "insert_duration": metrics.get("insert_duration"),
-                        "optimize_duration": metrics.get("optimize_duration"),
-                        "load_duration": metrics.get("load_duration"),
-                        "qps": metrics.get("qps"),
-                        "avg_latency_ms": metrics.get("serial_latency_avg"),
-                        "p95_latency_ms": metrics.get("serial_latency_p95"),
-                        "p99_latency_ms": metrics.get("serial_latency_p99"),
-                        "recall": metrics.get("recall"),
-                    }
-        except Exception:
-            continue
-    return {}
-
-
-def latency_summary_from_profile(profile: dict[str, Any] | None) -> dict[str, float | None]:
-    profile = profile or {}
-    return {
-        "avg_latency_ms": profile.get("profile_avg_end2end_latency_ms"),
-        "p50_latency_ms": profile.get("profile_p50_end2end_latency_ms"),
-        "p90_latency_ms": profile.get("profile_p90_end2end_latency_ms"),
-        "p95_latency_ms": profile.get("profile_p95_end2end_latency_ms"),
-        "p99_latency_ms": profile.get("profile_p99_end2end_latency_ms"),
-    }
-
-
-def merge_omega_detailed_profile(
-    summary_profile: dict[str, Any], detailed_profile: dict[str, Any]
-) -> dict[str, Any]:
-    merged = dict(summary_profile)
-    detailed_keys = [
-        "profile_avg_model_overhead_ms",
-        "profile_avg_should_stop_ms",
-        "profile_avg_prediction_eval_ms",
-        "profile_avg_core_search_ms",
-        "profile_avg_pure_search_ms",
-        "profile_avg_hook_total_ms",
-        "profile_avg_hook_body_ms",
-        "profile_avg_hook_dispatch_ms",
-        "profile_avg_report_visit_candidate_ms",
-        "profile_avg_should_predict_ms",
-        "profile_avg_report_hop_ms",
-        "profile_avg_update_top_candidates_ms",
-        "profile_avg_push_traversal_window_ms",
-        "profile_avg_model_overhead_cmp_equiv",
-    ]
-    for key in detailed_keys:
-        merged[key] = detailed_profile.get(key)
-    return merged
-
-
-def snapshot_result_files(results_dir: Path) -> set[str]:
-    if not results_dir.exists():
-        return set()
-    return {str(p) for p in results_dir.glob("result_*.json")}
-
-
-def extract_result_from_file(result_file: Path, db_label: str) -> dict[str, Any]:
-    try:
-        with open(result_file) as f:
-            data = json.load(f)
-        for result in data.get("results", []):
-            task_config = result.get("task_config", {})
-            db_config = task_config.get("db_config", {})
-            if db_config.get("db_label") == db_label:
-                metrics = result.get("metrics", {})
-                return {
-                    "insert_duration": metrics.get("insert_duration"),
-                    "optimize_duration": metrics.get("optimize_duration"),
-                    "load_duration": metrics.get("load_duration"),
-                    "qps": metrics.get("qps"),
-                    "recall": metrics.get("recall"),
-                }
-    except Exception:
-        return {}
-    return {}
-
-
-def get_run_result(
-    db_label: str, before_files: set[str], results_dir: Path
-) -> dict[str, Any]:
-    if not results_dir.exists():
-        return {}
-
-    current_files = {str(p) for p in results_dir.glob("result_*.json")}
-    new_files = sorted(
-        [Path(p) for p in current_files - before_files],
-        key=lambda p: p.stat().st_mtime,
-        reverse=True,
-    )
-    for result_file in new_files:
-        metrics = extract_result_from_file(result_file, db_label)
-        if metrics:
-            return metrics
-    return get_latest_result(db_label, results_dir)
-
-
 def offline_summary_path(index_path: Path) -> Path:
     return index_path / "offline_benchmark_summary.json"
 
@@ -457,9 +399,7 @@ def build_offline_summary(
     metrics: dict[str, Any],
     retrain_only: bool = False,
 ) -> dict[str, Any]:
-    previous_summary = (
-        read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
-    )
+    previous_summary = read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
     previous_offline = previous_summary.get("offline", {})
     previous_omega_training = previous_summary.get("omega_training", {})
 
@@ -477,6 +417,9 @@ def build_offline_summary(
             "lightgbm_timing_ms": read_json_if_exists(
                 omega_model_dir / "lightgbm_training_timing.json"
             ),
+            "lightgbm_training_metrics": read_json_if_exists(
+                omega_model_dir / "lightgbm_training_metrics.json"
+            ),
         }
 
     if retrain_only:
@@ -491,15 +434,11 @@ def build_offline_summary(
             + sum_timing_ms(omega_training.get("lightgbm_timing_ms", {}))
         ) / 1000.0
         if old_optimize_duration is not None:
-            optimize_duration = round(
-                old_optimize_duration - old_training_s + new_training_s, 4
-            )
-        else:
-            optimize_duration = metrics.get("optimize_duration")
+            optimize_duration = round(old_optimize_duration - old_training_s + new_training_s, 4)
         load_duration = (
             round(insert_duration + optimize_duration, 4)
             if insert_duration is not None and optimize_duration is not None
-            else metrics.get("load_duration")
+            else None
         )
 
     summary = {
@@ -518,154 +457,809 @@ def build_offline_summary(
 
 
 def write_offline_summary(
-    index_path: Path,
-    db_label: str,
-    metrics: dict[str, Any],
-    retrain_only: bool = False,
-) -> None:
+    index_path: Path, db_label: str, metrics: dict[str, Any], retrain_only: bool = False
+) -> Path:
     summary = build_offline_summary(index_path, db_label, metrics, retrain_only=retrain_only)
-    with open(offline_summary_path(index_path), "w") as f:
+    path = offline_summary_path(index_path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with open(path, "w") as f:
         json.dump(summary, f, indent=2, sort_keys=True)
+    return path
 
 
 def get_offline_load_duration(index_path: Path) -> float | None:
-    summary = read_json_if_exists(offline_summary_path(index_path))
-    return summary.get("offline", {}).get("load_duration_s")
+    return read_json_if_exists(offline_summary_path(index_path)).get("offline", {}).get(
+        "load_duration_s"
+    )
 
 
-def run_command(
-    cmd: list[str],
-    vectordbbench_root: Path,
-    dry_run: bool = False,
-    extra_env: dict[str, str] | None = None,
-) -> int:
-    cmd_str = " \\\n    ".join(cmd)
-    print(f"\n{'=' * 60}")
-    print(f"Command:\n{cmd_str}")
-    print(f"{'=' * 60}\n")
-    if dry_run:
-        print("[DRY RUN] Command not executed")
-        return 0
+def latency_summary_from_profile(profile: dict[str, Any] | None) -> dict[str, float | None]:
+    profile = profile or {}
+    return {
+        "avg_latency_ms": profile.get("profile_avg_end2end_latency_ms"),
+        "p50_latency_ms": profile.get("profile_p50_end2end_latency_ms"),
+        "p90_latency_ms": profile.get("profile_p90_end2end_latency_ms"),
+        "p95_latency_ms": profile.get("profile_p95_end2end_latency_ms"),
+        "p99_latency_ms": profile.get("profile_p99_end2end_latency_ms"),
+    }
 
-    cwd = vectordbbench_root if vectordbbench_root.exists() else None
-    env = os.environ.copy()
-    if extra_env:
-        env.update(extra_env)
-    result = subprocess.run(cmd, cwd=cwd, env=env)
-    return result.returncode
 
+def resolve_dataset_spec(
+    dataset_name: str, config: dict[str, Any], dataset_root_arg: str | None
+) -> dict[str, Any]:
+    default = DATASET_SPECS.get(dataset_name, {})
+    dataset_root = None
+    if dataset_root_arg:
+        dataset_root = Path(dataset_root_arg).expanduser().resolve()
+    elif config.get("dataset_root"):
+        dataset_root = Path(config["dataset_root"]).expanduser().resolve()
+    elif os.environ.get("DATASET_LOCAL_DIR"):
+        dataset_root = Path(os.environ["DATASET_LOCAL_DIR"]).expanduser().resolve()
+
+    dataset_dirname = config.get("dataset_dirname", default.get("dataset_dirname"))
+    if dataset_root is None or not dataset_dirname:
+        raise ValueError(
+            "Dataset root is not configured. Set --dataset-root, config.dataset_root, "
+            "or DATASET_LOCAL_DIR."
+        )
 
-def run_command_capture(
-    cmd: list[str],
-    vectordbbench_root: Path,
-    dry_run: bool = False,
-    extra_env: dict[str, str] | None = None,
-) -> tuple[int, str]:
-    cmd_str = " \\\n    ".join(cmd)
-    print(f"\n{'=' * 60}")
-    print(f"Command:\n{cmd_str}")
-    print(f"{'=' * 60}\n")
+    dimension = int(config.get("dimension", default.get("dimension", 0)))
+    metric_type = str(config.get("metric_type", default.get("metric_type", "COSINE"))).upper()
+    if dimension <= 0:
+        raise ValueError(f"Missing dataset dimension for {dataset_name}")
 
-    if dry_run:
-        print("[DRY RUN] Command not executed")
-        return 0, ""
+    dataset_dir = (dataset_root / dataset_dirname).resolve()
+    return {
+        "dataset_root": dataset_root,
+        "dataset_dir": dataset_dir,
+        "dimension": dimension,
+        "metric_type": metric_type,
+    }
 
-    cwd = vectordbbench_root if vectordbbench_root.exists() else None
-    env = os.environ.copy()
-    if extra_env:
-        env.update(extra_env)
-    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".log") as tmp:
-        tmp_path = Path(tmp.name)
 
+def _require_polars():
     try:
-        with tmp_path.open("w+") as tmp:
-            result = subprocess.run(
-                cmd, cwd=cwd, env=env, stdout=tmp, stderr=subprocess.STDOUT, text=True
-            )
-            tmp.flush()
-            tmp.seek(0)
-            output = tmp.read()
-        print(output, end="" if output.endswith("\n") or not output else "\n")
-        return result.returncode, output
-    finally:
-        tmp_path.unlink(missing_ok=True)
+        import polars as pl
+    except ImportError as exc:
+        raise RuntimeError(
+            "This script requires polars in the active Python environment."
+        ) from exc
+    return pl
+
+
+def _sorted_train_files(dataset_dir: Path) -> list[Path]:
+    candidates: list[Path] = []
+    for pattern in [
+        "shuffle_train-*.parquet",
+        "train-*.parquet",
+        "shuffle_train.parquet",
+        "train.parquet",
+    ]:
+        candidates.extend(sorted(dataset_dir.glob(pattern)))
+    unique: list[Path] = []
+    seen: set[Path] = set()
+    for path in candidates:
+        if path not in seen:
+            seen.add(path)
+            unique.append(path)
+    return unique
+
+
+def prepare_dataset_artifacts(
+    dataset_name: str,
+    dataset_spec: dict[str, Any],
+    benchmark_dir: Path,
+    dry_run: bool = False,
+) -> dict[str, Path]:
+    dataset_dir = dataset_spec["dataset_dir"]
+    query_parquet = dataset_dir / "test.parquet"
+    gt_parquet = dataset_dir / "neighbors.parquet"
+    train_files = _sorted_train_files(dataset_dir)
+    if not dry_run:
+        if not dataset_dir.exists():
+            raise FileNotFoundError(f"Dataset directory not found: {dataset_dir}")
+        if not query_parquet.exists():
+            raise FileNotFoundError(f"Missing query parquet: {query_parquet}")
+        if not gt_parquet.exists():
+            raise FileNotFoundError(f"Missing ground-truth parquet: {gt_parquet}")
+        if not train_files:
+            raise FileNotFoundError(f"No train parquet files found under: {dataset_dir}")
+
+    cache_dir = (benchmark_dir / "_dataset_cache" / dataset_name).resolve()
+    query_txt = cache_dir / "query.txt"
+    gt_txt = cache_dir / "groundtruth.txt"
+    cache_dir.mkdir(parents=True, exist_ok=True)
+
+    if not dry_run:
+        refresh_query = (not query_txt.exists()) or query_txt.stat().st_mtime < query_parquet.stat().st_mtime
+        refresh_gt = (not gt_txt.exists()) or gt_txt.stat().st_mtime < gt_parquet.stat().st_mtime
+        if refresh_query:
+            _write_query_text(query_parquet, query_txt)
+        if refresh_gt:
+            _write_groundtruth_text(gt_parquet, gt_txt)
 
+    return {
+        "dataset_dir": dataset_dir,
+        "query_parquet": query_parquet,
+        "gt_parquet": gt_parquet,
+        "query_txt": query_txt,
+        "groundtruth_txt": gt_txt,
+        "train_files": train_files,
+    }
 
-def must_get(config: dict[str, Any], key: str) -> Any:
-    if key not in config:
-        raise ValueError(f"Missing required config key: {key}")
-    return config[key]
 
+def _write_query_text(query_parquet: Path, output_path: Path) -> None:
+    pl = _require_polars()
+    frame = pl.read_parquet(query_parquet).sort("id")
+    with open(output_path, "w") as f:
+        for row in frame.iter_rows(named=True):
+            vector = row["emb"]
+            vector_text = " ".join(str(round(float(v), 16)) for v in vector)
+            f.write(f"{int(row['id'])};{vector_text};\n")
 
-def resolve_index_path(benchmark_dir: Path, path_value: str) -> Path:
-    path = Path(path_value).expanduser()
-    return path.resolve() if path.is_absolute() else (benchmark_dir / path).resolve()
 
+def _write_groundtruth_text(gt_parquet: Path, output_path: Path) -> None:
+    pl = _require_polars()
+    frame = pl.read_parquet(gt_parquet).sort("id")
+    with open(output_path, "w") as f:
+        for row in frame.iter_rows(named=True):
+            neighbors = " ".join(str(int(v)) for v in row["neighbors_id"])
+            f.write(f"{int(row['id'])};{neighbors}\n")
+
+
+def _ensure_zvec_initialized() -> None:
+    global _ZVEC_INITIALIZED
+    if _ZVEC_INITIALIZED:
+        return
+    import zvec
+
+    zvec.init(log_level=zvec.LogLevel.WARN)
+    _ZVEC_INITIALIZED = True
+
+
+def _quantize_type_from_name(name: str):
+    import zvec
+
+    normalized = str(name).upper()
+    mapping = {
+        "": zvec.QuantizeType.UNDEFINED,
+        "UNDEFINED": zvec.QuantizeType.UNDEFINED,
+        "FP16": zvec.QuantizeType.FP16,
+        "INT8": zvec.QuantizeType.INT8,
+        "INT4": zvec.QuantizeType.INT4,
+    }
+    if normalized not in mapping:
+        raise ValueError(f"Unsupported quantize type: {name}")
+    return mapping[normalized]
+
+
+def _metric_type_from_name(name: str):
+    import zvec
+
+    normalized = str(name).upper()
+    mapping = {
+        "COSINE": zvec.MetricType.COSINE,
+        "IP": zvec.MetricType.IP,
+        "L2": zvec.MetricType.L2,
+    }
+    if normalized not in mapping:
+        raise ValueError(f"Unsupported metric type: {name}")
+    return mapping[normalized]
 
-def append_option(cmd: list[str], key: str, value: Any) -> None:
-    if value is None:
+
+def _maybe_destroy_collection(path: Path) -> None:
+    import zvec
+
+    if not path.exists():
         return
-    flag = f"--{key.replace('_', '-')}"
-    if isinstance(value, bool):
-        if value:
-            cmd.append(flag)
+    try:
+        zvec.open(str(path)).destroy()
         return
-    if isinstance(value, list):
-        cmd.extend([flag, ",".join(str(v) for v in value)])
+    except Exception:
+        pass
+    shutil.rmtree(path, ignore_errors=True)
+
+
+def _build_schema(
+    index_kind: str,
+    dimension: int,
+    metric_type: str,
+    common_args: dict[str, Any],
+    specific_args: dict[str, Any],
+):
+    import zvec
+
+    quantize_type = _quantize_type_from_name(common_args.get("quantize_type", ""))
+    metric = _metric_type_from_name(metric_type)
+    if index_kind == "OMEGA":
+        index_param = zvec.OmegaIndexParam(
+            metric_type=metric,
+            m=int(common_args["m"]),
+            ef_construction=int(specific_args.get("ef_construction", 500)),
+            quantize_type=quantize_type,
+            min_vector_threshold=int(specific_args["min_vector_threshold"]),
+            num_training_queries=int(specific_args["num_training_queries"]),
+            ef_training=int(specific_args["ef_training"]),
+            window_size=int(specific_args["window_size"]),
+            ef_groundtruth=int(specific_args["ef_groundtruth"]),
+            k_train=int(specific_args.get("k_train", 1)),
+        )
     else:
-        cmd.extend([flag, str(value)])
+        index_param = zvec.HnswIndexParam(
+            metric_type=metric,
+            m=int(common_args["m"]),
+            ef_construction=int(specific_args.get("ef_construction", 500)),
+            quantize_type=quantize_type,
+        )
+
+    return zvec.CollectionSchema(
+        name=f"{index_kind.lower()}_benchmark",
+        fields=[
+            zvec.FieldSchema(
+                "id",
+                zvec.DataType.INT64,
+                nullable=False,
+                index_param=zvec.InvertIndexParam(enable_range_optimization=True),
+            )
+        ],
+        vectors=[
+            zvec.VectorSchema(
+                "dense",
+                zvec.DataType.VECTOR_FP32,
+                dimension=dimension,
+                index_param=index_param,
+            )
+        ],
+    )
 
 
-def extend_with_args(cmd: list[str], args_map: dict[str, Any] | None) -> None:
-    if not args_map:
-        return
-    for key, value in args_map.items():
-        append_option(cmd, key, value)
+def build_index(
+    *,
+    index_kind: str,
+    index_path: Path,
+    dataset_spec: dict[str, Any],
+    dataset_artifacts: dict[str, Any],
+    common_args: dict[str, Any],
+    specific_args: dict[str, Any],
+    retrain_only: bool,
+    dry_run: bool,
+) -> dict[str, Any]:
+    if dry_run:
+        print(f"[Dry-run] Build {index_kind} at {index_path}")
+        return {"insert_duration": None, "optimize_duration": None, "load_duration": None}
 
+    _ensure_zvec_initialized()
+    import zvec
 
-def extend_with_flags(cmd: list[str], flags: list[str] | None) -> None:
-    if not flags:
-        return
-    for flag in flags:
-        cmd.append(f"--{flag}")
+    if retrain_only:
+        collection = zvec.open(
+            str(index_path), zvec.CollectionOption(read_only=False, enable_mmap=True)
+        )
+        insert_duration = None
+    else:
+        _maybe_destroy_collection(index_path)
+        schema = _build_schema(
+            index_kind,
+            dataset_spec["dimension"],
+            dataset_spec["metric_type"],
+            common_args,
+            specific_args,
+        )
+        collection = zvec.create_and_open(
+            str(index_path),
+            schema,
+            zvec.CollectionOption(read_only=False, enable_mmap=True),
+        )
+        insert_duration = _insert_training_data(collection, dataset_artifacts["train_files"])
 
+    optimize_start = time.perf_counter()
+    collection.optimize(option=zvec.OptimizeOption(retrain_only=retrain_only))
+    optimize_duration = time.perf_counter() - optimize_start
+    try:
+        collection.flush()
+    except Exception:
+        pass
+    del collection
 
-def build_base_command(
-    vectordbbench_cmd: list[str],
-    client_name: str,
-    path: Path,
-    db_label: str,
-    case_type: str,
+    load_duration = None
+    if insert_duration is not None:
+        load_duration = insert_duration + optimize_duration
+    elif optimize_duration is not None:
+        load_duration = optimize_duration
+
+    return {
+        "insert_duration": round(insert_duration, 4) if insert_duration is not None else None,
+        "optimize_duration": round(optimize_duration, 4) if optimize_duration is not None else None,
+        "load_duration": round(load_duration, 4) if load_duration is not None else None,
+    }
+
+
+def _insert_training_data(collection, train_files: list[Path], batch_size: int = 1000) -> float:
+    import zvec
+
+    pl = _require_polars()
+    start = time.perf_counter()
+    for train_file in train_files:
+        frame = pl.read_parquet(train_file)
+        for offset in range(0, frame.height, batch_size):
+            batch = frame.slice(offset, batch_size)
+            ids = batch["id"].to_list()
+            vectors = batch["emb"].to_list()
+            docs = [
+                zvec.Doc(
+                    id=str(int(doc_id)),
+                    fields={"id": int(doc_id)},
+                    vectors={"dense": vector},
+                )
+                for doc_id, vector in zip(ids, vectors, strict=True)
+            ]
+            collection.insert(docs)
+    return time.perf_counter() - start
+
+
+def compute_recall_with_zvec(
+    *,
+    index_kind: str,
+    index_path: Path,
+    dataset_artifacts: dict[str, Any],
     common_args: dict[str, Any],
-    specific_args: dict[str, Any] | None = None,
-    extra_flags: list[str] | None = None,
-) -> list[str]:
-    cmd = [
-        *vectordbbench_cmd,
-        client_name,
-        "--path",
-        str(path),
-        "--db-label",
-        db_label,
-        "--case-type",
-        case_type,
+    target_recall: float | None,
+    dry_run: bool,
+) -> float | None:
+    if dry_run:
+        return None
+
+    _ensure_zvec_initialized()
+    import zvec
+
+    pl = _require_polars()
+    query_frame = pl.read_parquet(dataset_artifacts["query_parquet"]).sort("id")
+    gt_frame = pl.read_parquet(dataset_artifacts["gt_parquet"]).sort("id")
+    gt_map = {
+        int(row["id"]): [int(value) for value in row["neighbors_id"][: int(common_args["k"])]]
+        for row in gt_frame.iter_rows(named=True)
+    }
+
+    option = zvec.CollectionOption(read_only=True, enable_mmap=True)
+    collection = zvec.open(str(index_path), option)
+    use_refiner = bool(common_args.get("is_using_refiner", False))
+    if index_kind == "OMEGA":
+        query_param = zvec.OmegaQueryParam(
+            ef=int(common_args["ef_search"]),
+            target_recall=float(target_recall),
+            is_using_refiner=use_refiner,
+        )
+    else:
+        query_param = zvec.HnswQueryParam(
+            ef=int(common_args["ef_search"]),
+            is_using_refiner=use_refiner,
+        )
+
+    recall_sum = 0.0
+    query_count = 0
+    topk = int(common_args["k"])
+    for row in query_frame.iter_rows(named=True):
+        query_id = int(row["id"])
+        gt = gt_map.get(query_id)
+        if not gt:
+            continue
+        results = collection.query(
+            vectors=zvec.VectorQuery(field_name="dense", vector=row["emb"], param=query_param),
+            topk=topk,
+            output_fields=[],
+        )
+        pred = [int(doc.id) for doc in results[:topk]]
+        recall_sum += len(set(pred) & set(gt)) / float(topk)
+        query_count += 1
+
+    del collection
+    if query_count == 0:
+        return None
+    return recall_sum / query_count
+
+
+def resolve_core_tools(zvec_root: Path) -> tuple[Path, Path]:
+    bench_bin = (zvec_root / "build/bin/bench").resolve()
+    recall_bin = (zvec_root / "build/bin/recall").resolve()
+    if not bench_bin.exists():
+        raise FileNotFoundError(f"bench binary not found: {bench_bin}")
+    if not recall_bin.exists():
+        raise FileNotFoundError(f"recall binary not found: {recall_bin}")
+    return bench_bin, recall_bin
+
+
+def _metric_type_name_for_core(metric_type: str) -> str:
+    mapping = {
+        "COSINE": "kCosine",
+        "IP": "kInnerProduct",
+        "L2": "kL2sq",
+    }
+    normalized = str(metric_type).upper()
+    if normalized not in mapping:
+        raise ValueError(f"Unsupported metric type: {metric_type}")
+    return mapping[normalized]
+
+
+def _quantizer_json(quantize_type: str) -> dict[str, Any] | None:
+    normalized = str(quantize_type).upper()
+    if normalized in {"", "UNDEFINED"}:
+        return None
+    mapping = {
+        "FP16": "kFP16",
+        "INT8": "kInt8",
+        "INT4": "kInt4",
+    }
+    if normalized not in mapping:
+        raise ValueError(f"Unsupported quantize type: {quantize_type}")
+    return {"type": mapping[normalized]}
+
+
+def build_core_index_config_json(
+    *,
+    index_type: str,
+    metric_type: str,
+    dimension: int,
+    m: int,
+    ef_construction: int,
+    quantize_type: str,
+) -> str:
+    payload: dict[str, Any] = {
+        "index_type": index_type,
+        "metric_type": _metric_type_name_for_core(metric_type),
+        "dimension": int(dimension),
+        "version": 0,
+        "is_sparse": False,
+        "data_type": "DT_FP32",
+        "use_id_map": False,
+        "is_huge_page": False,
+        "m": int(m),
+        "ef_construction": int(ef_construction),
+    }
+    quantizer = _quantizer_json(quantize_type)
+    if quantizer is not None:
+        payload["quantizer_param"] = quantizer
+    return json.dumps(payload, separators=(",", ":"))
+
+
+def build_core_query_param_json(
+    *,
+    index_type: str,
+    ef_search: int,
+    topk: int,
+    target_recall: float | None = None,
+) -> str:
+    payload: dict[str, Any] = {
+        "index_type": index_type,
+        "topk": int(topk),
+        "fetch_vector": False,
+        "radius": 0.0,
+        "is_linear": False,
+        "ef_search": int(ef_search),
+    }
+    if target_recall is not None:
+        payload["target_recall"] = float(target_recall)
+    return json.dumps(payload, separators=(",", ":"))
+
+
+def discover_index_files(index_path: Path) -> dict[str, Path | None]:
+    coarse_candidates = sorted(index_path.glob("*/dense.qindex.*.proxima"))
+    full_candidates = sorted(index_path.glob("*/dense.index.*.proxima"))
+    primary = coarse_candidates[0] if coarse_candidates else (full_candidates[0] if full_candidates else None)
+    reference = full_candidates[0] if full_candidates else None
+    if primary is None:
+        raise FileNotFoundError(f"No core index file found under {index_path}")
+    return {"primary": primary, "reference": reference}
+
+
+def _yaml_quote(value: str) -> str:
+    return "'" + value.replace("'", "''") + "'"
+
+
+def _write_core_config(
+    *,
+    path: Path,
+    index_path: Path,
+    index_config_json: str,
+    query_param_json: str,
+    query_file: Path,
+    topk: int,
+    use_refiner: bool,
+    reference_index_path: Path | None,
+    metric_type: str,
+    dimension: int,
+    m: int,
+    ef_construction: int,
+    bench_thread_count: int | None = None,
+    bench_secs: int | None = None,
+    recall_thread_count: int | None = None,
+    groundtruth_file: Path | None = None,
+) -> None:
+    lines = [
+        "IndexCommon:",
+        f"  IndexPath: {_yaml_quote(str(index_path))}",
+        f"  IndexConfig: {_yaml_quote(index_config_json)}",
+        f"  TopK: {_yaml_quote(str(topk))}",
+        f"  QueryFile: {_yaml_quote(str(query_file))}",
+        "  QueryType: 'float'",
+        "  QueryFirstSep: ';'",
+        "  QuerySecondSep: ' '",
     ]
-    extend_with_args(cmd, common_args)
-    extend_with_args(cmd, specific_args)
-    extend_with_flags(cmd, extra_flags)
-    return cmd
+    if bench_thread_count is not None:
+        lines.append(f"  BenchThreadCount: {bench_thread_count}")
+    if bench_secs is not None:
+        lines.append(f"  BenchSecs: {bench_secs}")
+        lines.append("  BenchIterCount: 1000000000")
+    if recall_thread_count is not None:
+        lines.append(f"  RecallThreadCount: {recall_thread_count}")
+        lines.append(f"  RecallGTCount: {topk}")
+        lines.append("  CompareById: true")
+    if groundtruth_file is not None:
+        lines.append(f"  GroundTruthFile: {_yaml_quote(str(groundtruth_file))}")
+        lines.append("  GroundTruthFirstSep: ';'")
+        lines.append("  GroundTruthSecondSep: ' '")
+
+    lines.extend(
+        [
+            "QueryConfig:",
+            f"  QueryParam: {_yaml_quote(query_param_json)}",
+        ]
+    )
 
+    if use_refiner:
+        if reference_index_path is None:
+            raise ValueError("Refiner requested but reference index is missing")
+        reference_config = build_core_index_config_json(
+            index_type="kHNSW",
+            metric_type=metric_type,
+            dimension=dimension,
+            m=m,
+            ef_construction=ef_construction,
+            quantize_type="UNDEFINED",
+        )
+        lines.extend(
+            [
+                "  RefinerConfig:",
+                "    ScaleFactor: 2",
+                "    ReferenceIndex:",
+                f"      Config: {_yaml_quote(reference_config)}",
+                f"      Path: {_yaml_quote(str(reference_index_path))}",
+            ]
+        )
 
-def validate_profile_output(profile_name: str, ret: int, output: str, prefix: str) -> None:
-    if ret != 0:
-        raise RuntimeError(f"{profile_name} profiling pass failed with exit code {ret}")
-    if not parse_query_records(output, prefix):
-        raise RuntimeError(
-            f"{profile_name} profiling pass completed without any '{prefix}' records in stdout"
+    path.write_text("\n".join(lines) + "\n")
+
+
+def run_command_capture(
+    cmd: list[str],
+    *,
+    cwd: Path | None = None,
+    dry_run: bool = False,
+    extra_env: dict[str, str] | None = None,
+) -> tuple[int, str]:
+    printable = " ".join(str(token) for token in cmd)
+    print(printable)
+    if dry_run:
+        return 0, ""
+
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+    completed = subprocess.run(
+        cmd,
+        cwd=str(cwd) if cwd else None,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        check=False,
+    )
+    return completed.returncode, completed.stdout
+
+
+def _temporary_config_path(prefix: str, parent: Path) -> Path:
+    parent.mkdir(parents=True, exist_ok=True)
+    return parent / f"{prefix}_{int(time.time() * 1000)}.yaml"
+
+
+def run_bench(
+    *,
+    bench_bin: Path,
+    index_file: Path,
+    query_file: Path,
+    metric_type: str,
+    dimension: int,
+    m: int,
+    ef_construction: int,
+    quantize_type: str,
+    ef_search: int,
+    topk: int,
+    bench_thread_count: int,
+    bench_secs: int,
+    use_refiner: bool,
+    reference_index_path: Path | None,
+    target_recall: float | None = None,
+    dry_run: bool = False,
+    extra_env: dict[str, str] | None = None,
+) -> tuple[int, str, dict[str, Any]]:
+    config_path = _temporary_config_path("bench", index_file.parent)
+    try:
+        _write_core_config(
+            path=config_path,
+            index_path=index_file,
+            index_config_json=build_core_index_config_json(
+                index_type="kOMEGA" if target_recall is not None else "kHNSW",
+                metric_type=metric_type,
+                dimension=dimension,
+                m=m,
+                ef_construction=ef_construction,
+                quantize_type=quantize_type,
+            ),
+            query_param_json=build_core_query_param_json(
+                index_type="kOMEGA" if target_recall is not None else "kHNSW",
+                ef_search=ef_search,
+                topk=topk,
+                target_recall=target_recall,
+            ),
+            query_file=query_file,
+            topk=topk,
+            use_refiner=use_refiner,
+            reference_index_path=reference_index_path,
+            metric_type=metric_type,
+            dimension=dimension,
+            m=m,
+            ef_construction=ef_construction,
+            bench_thread_count=bench_thread_count,
+            bench_secs=bench_secs,
+        )
+        ret, output = run_command_capture(
+            [str(bench_bin), str(config_path)],
+            dry_run=dry_run,
+            extra_env=extra_env,
         )
+        return ret, output, parse_bench_output(output)
+    finally:
+        if config_path.exists():
+            config_path.unlink()
+
+
+def run_recall(
+    *,
+    recall_bin: Path,
+    index_file: Path,
+    query_file: Path,
+    groundtruth_file: Path,
+    metric_type: str,
+    dimension: int,
+    m: int,
+    ef_construction: int,
+    quantize_type: str,
+    ef_search: int,
+    topk: int,
+    use_refiner: bool,
+    reference_index_path: Path | None,
+    target_recall: float | None = None,
+    dry_run: bool = False,
+) -> tuple[int, str, float | None]:
+    config_path = _temporary_config_path("recall", index_file.parent)
+    try:
+        _write_core_config(
+            path=config_path,
+            index_path=index_file,
+            index_config_json=build_core_index_config_json(
+                index_type="kOMEGA" if target_recall is not None else "kHNSW",
+                metric_type=metric_type,
+                dimension=dimension,
+                m=m,
+                ef_construction=ef_construction,
+                quantize_type=quantize_type,
+            ),
+            query_param_json=build_core_query_param_json(
+                index_type="kOMEGA" if target_recall is not None else "kHNSW",
+                ef_search=ef_search,
+                topk=topk,
+                target_recall=target_recall,
+            ),
+            query_file=query_file,
+            topk=topk,
+            use_refiner=use_refiner,
+            reference_index_path=reference_index_path,
+            metric_type=metric_type,
+            dimension=dimension,
+            m=m,
+            ef_construction=ef_construction,
+            recall_thread_count=1,
+            groundtruth_file=groundtruth_file,
+        )
+        ret, output = run_command_capture([str(recall_bin), str(config_path)], dry_run=dry_run)
+        return ret, output, parse_recall_output(output, topk)
+    finally:
+        if config_path.exists():
+            config_path.unlink()
 
 
-def print_header(title: str) -> None:
-    print("\n\n" + "#" * 70)
-    print(f"# {title}")
-    print("#" * 70)
+def run_concurrency_benchmark(
+    *,
+    bench_bin: Path,
+    index_files: dict[str, Path | None],
+    dataset_artifacts: dict[str, Any],
+    dataset_spec: dict[str, Any],
+    common_args: dict[str, Any],
+    target_recall: float | None,
+    dry_run: bool,
+) -> dict[str, Any]:
+    ef_search = int(common_args["ef_search"])
+    topk = int(common_args["k"])
+    m = int(common_args["m"])
+    ef_construction = int(common_args.get("ef_construction", 500))
+    quantize_type = str(common_args.get("quantize_type", "UNDEFINED"))
+    use_refiner = bool(common_args.get("is_using_refiner", False))
+    duration = int(common_args["concurrency_duration"])
+    thread_counts = [int(value) for value in str(common_args["num_concurrency"]).split(",") if value]
+
+    best_summary: dict[str, Any] | None = None
+    best_output = ""
+    for thread_count in thread_counts:
+        ret, output, summary = run_bench(
+            bench_bin=bench_bin,
+            index_file=index_files["primary"],
+            query_file=dataset_artifacts["query_txt"],
+            metric_type=dataset_spec["metric_type"],
+            dimension=dataset_spec["dimension"],
+            m=m,
+            ef_construction=ef_construction,
+            quantize_type=quantize_type,
+            ef_search=ef_search,
+            topk=topk,
+            bench_thread_count=thread_count,
+            bench_secs=duration,
+            use_refiner=use_refiner,
+            reference_index_path=index_files["reference"],
+            target_recall=target_recall,
+            dry_run=dry_run,
+        )
+        summary["thread_count"] = thread_count
+        summary["retcode"] = ret
+        if best_summary is None or (summary.get("qps") or 0.0) > (best_summary.get("qps") or 0.0):
+            best_summary = summary
+            best_output = output
+
+    return {"summary": best_summary or {}, "output": best_output}
+
+
+def run_profile_benchmark(
+    *,
+    bench_bin: Path,
+    index_files: dict[str, Path | None],
+    dataset_artifacts: dict[str, Any],
+    dataset_spec: dict[str, Any],
+    common_args: dict[str, Any],
+    target_recall: float | None,
+    dry_run: bool,
+    extra_env: dict[str, str] | None,
+) -> tuple[int, str, dict[str, Any]]:
+    return run_bench(
+        bench_bin=bench_bin,
+        index_file=index_files["primary"],
+        query_file=dataset_artifacts["query_txt"],
+        metric_type=dataset_spec["metric_type"],
+        dimension=dataset_spec["dimension"],
+        m=int(common_args["m"]),
+        ef_construction=int(common_args.get("ef_construction", 500)),
+        quantize_type=str(common_args.get("quantize_type", "UNDEFINED")),
+        ef_search=int(common_args["ef_search"]),
+        topk=int(common_args["k"]),
+        bench_thread_count=1,
+        bench_secs=max(1, int(common_args.get("profiling_duration", 1))),
+        use_refiner=bool(common_args.get("is_using_refiner", False)),
+        reference_index_path=index_files["reference"],
+        target_recall=target_recall,
+        dry_run=dry_run,
+        extra_env=extra_env,
+    )
+
+
+def validate_profile_output(label: str, retcode: int, output: str, expected_prefix: str) -> None:
+    if retcode != 0:
+        raise RuntimeError(f"{label} profiling command failed with exit code {retcode}")
+    if expected_prefix not in output:
+        raise RuntimeError(f"{label} profiling output does not contain '{expected_prefix}'")
diff --git a/src/core/interface/index_factory.cc b/src/core/interface/index_factory.cc
index 50c0f973b..6e5c882e3 100644
--- a/src/core/interface/index_factory.cc
+++ b/src/core/interface/index_factory.cc
@@ -98,6 +98,48 @@ BaseIndexParam::Pointer IndexFactory::DeserializeIndexParamFromJson(
       }
       return param;
     }
+    case IndexType::kOMEGA: {
+      HNSWIndexParam::Pointer param = std::make_shared<HNSWIndexParam>();
+      auto deserialize_quantizer = [&](const ailego::JsonObject &obj) -> bool {
+        ailego::JsonValue quantizer_json_value;
+        if (obj.has("quantizer_param")) {
+          if (obj.get("quantizer_param", &quantizer_json_value);
+              quantizer_json_value.is_object()) {
+            if (!param->quantizer_param.DeserializeFromJson(
+                    quantizer_json_value.as_json_string().as_stl_string())) {
+              return false;
+            }
+          }
+        }
+        return true;
+      };
+
+      if (!extract_enum_from_json<MetricType>(json_obj, "metric_type",
+                                              param->metric_type,
+                                              tmp_json_value) ||
+          !extract_enum_from_json<DataType>(json_obj, "data_type",
+                                           param->data_type,
+                                           tmp_json_value) ||
+          !extract_value_from_json(json_obj, "dimension", param->dimension,
+                                  tmp_json_value) ||
+          !extract_value_from_json(json_obj, "version", param->version,
+                                  tmp_json_value) ||
+          !extract_value_from_json(json_obj, "is_sparse", param->is_sparse,
+                                  tmp_json_value) ||
+          !extract_value_from_json(json_obj, "use_id_map", param->use_id_map,
+                                  tmp_json_value) ||
+          !extract_value_from_json(json_obj, "is_huge_page",
+                                  param->is_huge_page, tmp_json_value) ||
+          !extract_value_from_json(json_obj, "m", param->m, tmp_json_value) ||
+          !extract_value_from_json(json_obj, "ef_construction",
+                                  param->ef_construction, tmp_json_value) ||
+          !deserialize_quantizer(json_obj)) {
+        LOG_ERROR("Failed to deserialize omega index param");
+        return nullptr;
+      }
+      param->index_type = IndexType::kOMEGA;
+      return param;
+    }
     case IndexType::kIVF: {
       IVFIndexParam::Pointer param = std::make_shared<IVFIndexParam>();
       if (!param->DeserializeFromJson(json_str)) {
@@ -144,6 +186,14 @@ std::string IndexFactory::QueryParamSerializeToJson(const QueryParamType &param,
       json_obj.set("ef_search", ailego::JsonValue(param.ef_search));
     }
     index_type = IndexType::kHNSW;
+  } else if constexpr (std::is_same_v<QueryParamType, OmegaQueryParam>) {
+    if (!omit_empty_value || param.ef_search != 0) {
+      json_obj.set("ef_search", ailego::JsonValue(param.ef_search));
+    }
+    if (!omit_empty_value || param.target_recall != 0.0f) {
+      json_obj.set("target_recall", ailego::JsonValue(param.target_recall));
+    }
+    index_type = IndexType::kOMEGA;
   } else if constexpr (std::is_same_v<QueryParamType, IVFQueryParam>) {
     if (!omit_empty_value || param.nprobe != 0) {
       json_obj.set("nprobe", ailego::JsonValue(param.nprobe));
@@ -168,6 +218,8 @@ template std::string IndexFactory::QueryParamSerializeToJson<FlatQueryParam>(
     const FlatQueryParam &param, bool omit_empty_value);
 template std::string IndexFactory::QueryParamSerializeToJson<HNSWQueryParam>(
     const HNSWQueryParam &param, bool omit_empty_value);
+template std::string IndexFactory::QueryParamSerializeToJson<OmegaQueryParam>(
+    const OmegaQueryParam &param, bool omit_empty_value);
 template std::string IndexFactory::QueryParamSerializeToJson<IVFQueryParam>(
     const IVFQueryParam &param, bool omit_empty_value);
 
@@ -236,6 +288,22 @@ typename QueryParamType::Pointer IndexFactory::QueryParamDeserializeFromJson(
         return nullptr;
       }
       return param;
+    } else if (index_type == IndexType::kOMEGA) {
+      auto param = std::make_shared<OmegaQueryParam>();
+      if (!parse_common_fields(param)) {
+        return nullptr;
+      }
+      if (!extract_value_from_json(json_obj, "ef_search", param->ef_search,
+                                   tmp_json_value)) {
+        LOG_ERROR("Failed to deserialize ef_search");
+        return nullptr;
+      }
+      if (!extract_value_from_json(json_obj, "target_recall",
+                                   param->target_recall, tmp_json_value)) {
+        LOG_ERROR("Failed to deserialize target_recall");
+        return nullptr;
+      }
+      return param;
     } else if (index_type == IndexType::kIVF) {
       auto param = std::make_shared<IVFQueryParam>();
       if (!parse_common_fields(param)) {
@@ -264,6 +332,17 @@ typename QueryParamType::Pointer IndexFactory::QueryParamDeserializeFromJson(
         LOG_ERROR("Failed to deserialize ef_search");
         return nullptr;
       }
+    } else if constexpr (std::is_same_v<QueryParamType, OmegaQueryParam>) {
+      if (!extract_value_from_json(json_obj, "ef_search", param->ef_search,
+                                   tmp_json_value)) {
+        LOG_ERROR("Failed to deserialize ef_search");
+        return nullptr;
+      }
+      if (!extract_value_from_json(json_obj, "target_recall",
+                                   param->target_recall, tmp_json_value)) {
+        LOG_ERROR("Failed to deserialize target_recall");
+        return nullptr;
+      }
     } else if constexpr (std::is_same_v<QueryParamType, IVFQueryParam>) {
       if (!extract_value_from_json(json_obj, "nprobe", param->nprobe,
                                    tmp_json_value)) {
@@ -286,6 +365,8 @@ template FlatQueryParam::Pointer IndexFactory::QueryParamDeserializeFromJson<
     FlatQueryParam>(const std::string &json_str);
 template HNSWQueryParam::Pointer IndexFactory::QueryParamDeserializeFromJson<
     HNSWQueryParam>(const std::string &json_str);
+template OmegaQueryParam::Pointer IndexFactory::QueryParamDeserializeFromJson<
+    OmegaQueryParam>(const std::string &json_str);
 template IVFQueryParam::Pointer IndexFactory::QueryParamDeserializeFromJson<
     IVFQueryParam>(const std::string &json_str);
 

From b51bde8f95b39a148737658cfcdecb59a9e7f2b2 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 30 Mar 2026 00:38:17 +0800
Subject: [PATCH 079/126] cleanup: add dataset auto-download for benchmark
 scripts

---
 scripts/benchmark_hnsw_vs_omega.py |  1 -
 scripts/benchmark_lib.py           | 81 ++++++++++++++++++++++++++++--
 2 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index ace78fa10..d8bf082d5 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-"""Benchmark Zvec HNSW vs OMEGA without VectorDBBench."""
 
 from __future__ import annotations
 
diff --git a/scripts/benchmark_lib.py b/scripts/benchmark_lib.py
index 6388c8ce2..7f00c9ef3 100644
--- a/scripts/benchmark_lib.py
+++ b/scripts/benchmark_lib.py
@@ -8,6 +8,7 @@
 import shutil
 import subprocess
 import time
+import urllib.request
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
@@ -44,16 +45,25 @@ class BenchmarkResult:
 DATASET_SPECS: dict[str, dict[str, Any]] = {
     "cohere_1m": {
         "dataset_dirname": "cohere/cohere_medium_1m",
+        "remote_dirname": "cohere_medium_1m",
         "dimension": 768,
         "metric_type": "COSINE",
+        "train_files": ["shuffle_train.parquet"],
     },
     "cohere_10m": {
         "dataset_dirname": "cohere/cohere_large_10m",
+        "remote_dirname": "cohere_large_10m",
         "dimension": 768,
         "metric_type": "COSINE",
+        "train_files": [f"shuffle_train-{idx:02d}-of-10.parquet" for idx in range(10)],
     },
 }
 
+DATASET_DOWNLOAD_BASE_URLS = {
+    "S3": "https://assets.zilliz.com/benchmark",
+    "ALIYUNOSS": "https://assets.zilliz.com.cn/benchmark",
+}
+
 
 def load_json(path: Path) -> dict[str, Any]:
     with open(path) as f:
@@ -495,16 +505,29 @@ def resolve_dataset_spec(
         dataset_root = Path(config["dataset_root"]).expanduser().resolve()
     elif os.environ.get("DATASET_LOCAL_DIR"):
         dataset_root = Path(os.environ["DATASET_LOCAL_DIR"]).expanduser().resolve()
+    else:
+        dataset_root = Path("/tmp/zvec/dataset").resolve()
 
     dataset_dirname = config.get("dataset_dirname", default.get("dataset_dirname"))
-    if dataset_root is None or not dataset_dirname:
+    if not dataset_dirname:
         raise ValueError(
-            "Dataset root is not configured. Set --dataset-root, config.dataset_root, "
-            "or DATASET_LOCAL_DIR."
+            f"Dataset directory name is not configured for {dataset_name}."
         )
 
     dimension = int(config.get("dimension", default.get("dimension", 0)))
     metric_type = str(config.get("metric_type", default.get("metric_type", "COSINE"))).upper()
+    remote_dirname = str(config.get("remote_dirname", default.get("remote_dirname", "")))
+    train_files = list(config.get("train_files", default.get("train_files", [])))
+    dataset_source = str(config.get("dataset_source", os.environ.get("ZVEC_DATASET_SOURCE", "S3")))
+    download_base_url = str(
+        config.get(
+            "dataset_base_url",
+            os.environ.get(
+                "ZVEC_DATASET_BASE_URL",
+                DATASET_DOWNLOAD_BASE_URLS.get(dataset_source.upper(), DATASET_DOWNLOAD_BASE_URLS["S3"]),
+            ),
+        )
+    )
     if dimension <= 0:
         raise ValueError(f"Missing dataset dimension for {dataset_name}")
 
@@ -514,6 +537,9 @@ def resolve_dataset_spec(
         "dataset_dir": dataset_dir,
         "dimension": dimension,
         "metric_type": metric_type,
+        "remote_dirname": remote_dirname,
+        "train_files": train_files,
+        "download_base_url": download_base_url.rstrip("/"),
     }
 
 
@@ -545,6 +571,54 @@ def _sorted_train_files(dataset_dir: Path) -> list[Path]:
     return unique
 
 
+def _dataset_required_files(dataset_name: str, dataset_spec: dict[str, Any]) -> list[str]:
+    required = list(dataset_spec.get("train_files", []))
+    if not required:
+        raise ValueError(
+            f"Dataset {dataset_name} does not define train_files for auto-download"
+        )
+    required.extend(["test.parquet", "neighbors.parquet"])
+    return required
+
+
+def _download_file(url: str, output_path: Path) -> None:
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    tmp_path = output_path.with_suffix(output_path.suffix + ".tmp")
+    try:
+        with urllib.request.urlopen(url) as response, open(tmp_path, "wb") as out:
+            shutil.copyfileobj(response, out)
+        tmp_path.replace(output_path)
+    finally:
+        tmp_path.unlink(missing_ok=True)
+
+
+def ensure_dataset_available(dataset_name: str, dataset_spec: dict[str, Any], dry_run: bool) -> None:
+    dataset_dir = dataset_spec["dataset_dir"]
+    required_files = _dataset_required_files(dataset_name, dataset_spec)
+    missing_files = [name for name in required_files if not (dataset_dir / name).exists()]
+    if not missing_files:
+        return
+
+    remote_dirname = dataset_spec.get("remote_dirname")
+    if not remote_dirname:
+        raise FileNotFoundError(
+            f"Dataset directory is incomplete and auto-download is not configured: {dataset_dir}"
+        )
+
+    base_url = dataset_spec["download_base_url"]
+    print(f"Dataset files missing under {dataset_dir}, downloading from {base_url}/{remote_dirname} ...")
+    if dry_run:
+        for name in missing_files:
+            print(f"[Dry-run] download {base_url}/{remote_dirname}/{name} -> {dataset_dir / name}")
+        return
+
+    for name in missing_files:
+        url = f"{base_url}/{remote_dirname}/{name}"
+        output_path = dataset_dir / name
+        print(f"Downloading {url}")
+        _download_file(url, output_path)
+
+
 def prepare_dataset_artifacts(
     dataset_name: str,
     dataset_spec: dict[str, Any],
@@ -554,6 +628,7 @@ def prepare_dataset_artifacts(
     dataset_dir = dataset_spec["dataset_dir"]
     query_parquet = dataset_dir / "test.parquet"
     gt_parquet = dataset_dir / "neighbors.parquet"
+    ensure_dataset_available(dataset_name, dataset_spec, dry_run)
     train_files = _sorted_train_files(dataset_dir)
     if not dry_run:
         if not dataset_dir.exists():

From 22bbd40475ca077a7371262092e4c895fd3ddab6 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 30 Mar 2026 00:49:00 +0800
Subject: [PATCH 080/126] cleanup: surface benchmark script failure output

---
 scripts/benchmark_lib.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/scripts/benchmark_lib.py b/scripts/benchmark_lib.py
index 7f00c9ef3..cf5633b01 100644
--- a/scripts/benchmark_lib.py
+++ b/scripts/benchmark_lib.py
@@ -1128,6 +1128,8 @@ def run_command_capture(
         text=True,
         check=False,
     )
+    if completed.returncode != 0 and completed.stdout:
+        print(completed.stdout, end="" if completed.stdout.endswith("\n") else "\n")
     return completed.returncode, completed.stdout
 
 
@@ -1337,4 +1339,8 @@ def validate_profile_output(label: str, retcode: int, output: str, expected_pref
     if retcode != 0:
         raise RuntimeError(f"{label} profiling command failed with exit code {retcode}")
     if expected_prefix not in output:
-        raise RuntimeError(f"{label} profiling output does not contain '{expected_prefix}'")
+        tail = "\n".join(output.splitlines()[-40:]) if output else "<empty output>"
+        raise RuntimeError(
+            f"{label} profiling output does not contain '{expected_prefix}'. "
+            f"Last output lines:\n{tail}"
+        )

From 6dd828929cc7c68ca5e06833972bf0b09634feb4 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 30 Mar 2026 00:53:19 +0800
Subject: [PATCH 081/126] cleanup: drop benchmark refiner and warmup config

---
 scripts/benchmark_hnsw_vs_omega.json | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/scripts/benchmark_hnsw_vs_omega.json b/scripts/benchmark_hnsw_vs_omega.json
index 232bee693..0eb604172 100644
--- a/scripts/benchmark_hnsw_vs_omega.json
+++ b/scripts/benchmark_hnsw_vs_omega.json
@@ -1,10 +1,5 @@
 {
   "cohere_1m": {
-    "warmup": {
-      "enabled": true,
-      "duration": 15,
-      "num_concurrency": "4"
-    },
     "common": {
       "case_type": "Performance768D1M",
       "num_concurrency": "12,14,16,18,20",
@@ -40,11 +35,6 @@
     }
   },
   "cohere_10m": {
-    "warmup": {
-      "enabled": true,
-      "duration": 15,
-      "num_concurrency": "4"
-    },
     "common": {
       "case_type": "Performance768D10M",
       "num_concurrency": "12,14,16,18,20",
@@ -52,8 +42,7 @@
       "k": 100,
       "m": 50,
       "ef_search": 300,
-      "quantize_type": "int8",
-      "is_using_refiner": true
+      "quantize_type": "int8"
     },
     "hnsw": {
       "path": "cohere_10m_hnsw",

From e898606327ca3daf7bce4004d63581a8de32a5cd Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 30 Mar 2026 22:59:30 +0800
Subject: [PATCH 082/126] cleanup: sync omega library cleanup

---
 src/core/algorithm/omega/omega_streamer.cc | 4 ++--
 thirdparty/omega/OMEGALib                  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 5c0fda3a0..c6567a078 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -213,8 +213,8 @@ void CollectOmegaTrainingOutputs(OmegaSearchHandle omega_search,
       const auto& omega_record = (*records_vec)[i];
       core_interface::TrainingRecord record;
       record.query_id = omega_record.query_id;
-      record.hops_visited = omega_record.hops;
-      record.cmps_visited = omega_record.cmps;
+      record.hops_visited = omega_record.hops_visited;
+      record.cmps_visited = omega_record.cmps_visited;
       record.dist_1st = omega_record.dist_1st;
       record.dist_start = omega_record.dist_start;
 
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index ac25139c3..4fdc04945 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit ac25139c3ea0dbe9dbfe00d05900870c66a36318
+Subproject commit 4fdc04945ec56343d6954b0bb6e26228dbf8fc1d

From 9bae4531ed81dfe647749c76a608abb0ec386435 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 30 Mar 2026 23:10:57 +0800
Subject: [PATCH 083/126] cleanup: sync omega feature extractor removal

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 4fdc04945..ea4022e01 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 4fdc04945ec56343d6954b0bb6e26228dbf8fc1d
+Subproject commit ea4022e0175971552adf3660a5f3fa58c5d77e6a

From 546a838a2b88665ba2698021afeafc78880a1923 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Mon, 30 Mar 2026 23:15:30 +0800
Subject: [PATCH 084/126] cleanup: sync omega trainer cleanup

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index ea4022e01..d99f00547 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit ea4022e0175971552adf3660a5f3fa58c5d77e6a
+Subproject commit d99f00547a9f8984232f55a74565f7aa13dd74fd

From 43ca6ec1b5a0d52c24af5fc23d739715735faef7 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Tue, 31 Mar 2026 03:42:12 +0800
Subject: [PATCH 085/126] update benchmark config to m64 recall092

---
 scripts/benchmark_hnsw_vs_omega.json | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/scripts/benchmark_hnsw_vs_omega.json b/scripts/benchmark_hnsw_vs_omega.json
index 0eb604172..d3f22ddb7 100644
--- a/scripts/benchmark_hnsw_vs_omega.json
+++ b/scripts/benchmark_hnsw_vs_omega.json
@@ -5,20 +5,20 @@
       "num_concurrency": "12,14,16,18,20",
       "concurrency_duration": 30,
       "k": 100,
-      "m": 15,
+      "m": 64,
       "ef_search": 300,
       "quantize_type": "int8"
     },
     "hnsw": {
       "path": "cohere_1m_hnsw",
-      "db_label": "16c64g-v0.1-hnsw-m15-ef300",
+      "db_label": "16c64g-v0.1-hnsw-m64-ef300",
       "args": {}
     },
     "omega": {
       "path": "cohere_1m_omega",
-      "db_label": "16c64g-v0.1-omega-m15-ef300",
+      "db_label": "16c64g-v0.1-omega-m64-ef300",
       "target_recalls": [
-        0.90
+        0.92
       ],
       "args": {
         "min_vector_threshold": 100000,
@@ -40,20 +40,20 @@
       "num_concurrency": "12,14,16,18,20",
       "concurrency_duration": 30,
       "k": 100,
-      "m": 50,
+      "m": 64,
       "ef_search": 300,
       "quantize_type": "int8"
     },
     "hnsw": {
       "path": "cohere_10m_hnsw",
-      "db_label": "16c64g-v0.1-hnsw-m50-ef300",
+      "db_label": "16c64g-v0.1-hnsw-m64-ef300",
       "args": {}
     },
     "omega": {
       "path": "cohere_10m_omega",
-      "db_label": "16c64g-v0.1-omega-m50-ef300",
+      "db_label": "16c64g-v0.1-omega-m64-ef300",
       "target_recalls": [
-        0.95
+        0.92
       ],
       "args": {
         "min_vector_threshold": 100000,

From 61dc836c32ed42ac8c56c103cbbaebfec9482ac4 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Tue, 31 Mar 2026 19:17:22 +0800
Subject: [PATCH 086/126] update cohere10m benchmark params

---
 scripts/benchmark_hnsw_vs_omega.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/benchmark_hnsw_vs_omega.json b/scripts/benchmark_hnsw_vs_omega.json
index d3f22ddb7..a0c2acd15 100644
--- a/scripts/benchmark_hnsw_vs_omega.json
+++ b/scripts/benchmark_hnsw_vs_omega.json
@@ -41,7 +41,7 @@
       "concurrency_duration": 30,
       "k": 100,
       "m": 64,
-      "ef_search": 300,
+      "ef_search": 600,
       "quantize_type": "int8"
     },
     "hnsw": {
@@ -58,9 +58,9 @@
       "args": {
         "min_vector_threshold": 100000,
         "num_training_queries": 4000,
-        "ef_training": 500,
+        "ef_training": 1000,
         "window_size": 100,
-        "ef_groundtruth": 1000
+        "ef_groundtruth": 2000
       }
     },
     "profiling": {

From f086f4f69e918e02c2bdfd8a4f2e7cb7cd87a087 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 1 Apr 2026 14:44:47 +0800
Subject: [PATCH 087/126] Remove OMEGA profiling path

---
 scripts/benchmark_hnsw_vs_omega.json        |  10 -
 scripts/benchmark_hnsw_vs_omega.py          | 117 +-------
 scripts/benchmark_lib.py                    | 209 +-------------
 src/core/algorithm/hnsw/hnsw_algorithm.cc   |  30 +-
 src/core/algorithm/hnsw/hnsw_algorithm.h    |  11 +-
 src/core/algorithm/omega/omega_hook_utils.h |  56 +---
 src/core/algorithm/omega/omega_streamer.cc  | 295 +-------------------
 src/core/algorithm/omega/omega_streamer.h   |   3 -
 thirdparty/omega/OMEGALib                   |   2 +-
 9 files changed, 37 insertions(+), 696 deletions(-)

diff --git a/scripts/benchmark_hnsw_vs_omega.json b/scripts/benchmark_hnsw_vs_omega.json
index a0c2acd15..217c526d7 100644
--- a/scripts/benchmark_hnsw_vs_omega.json
+++ b/scripts/benchmark_hnsw_vs_omega.json
@@ -27,11 +27,6 @@
         "window_size": 100,
         "ef_groundtruth": 1000
       }
-    },
-    "profiling": {
-      "hnsw_query_limit": 2000,
-      "omega_query_limit": 2000,
-      "omega_profile_control_timing": true
     }
   },
   "cohere_10m": {
@@ -62,11 +57,6 @@
         "window_size": 100,
         "ef_groundtruth": 2000
       }
-    },
-    "profiling": {
-      "hnsw_query_limit": 2000,
-      "omega_query_limit": 2000,
-      "omega_profile_control_timing": true
     }
   }
 }
diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index d8bf082d5..54932a539 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -8,14 +8,11 @@
 
 from benchmark_lib import (
     BenchmarkResult,
-    build_hnsw_profile,
     build_index,
-    build_omega_profile,
     compute_recall_with_zvec,
     discover_index_files,
     get_offline_load_duration,
     load_dataset_config,
-    merge_omega_detailed_profile,
     must_get,
     prepare_dataset_artifacts,
     print_header,
@@ -24,9 +21,7 @@
     resolve_index_path,
     resolve_paths,
     run_concurrency_benchmark,
-    run_profile_benchmark,
-    validate_profile_output,
-    write_grouped_profiling_summaries,
+    write_grouped_online_summaries,
     write_offline_summary,
 )
 
@@ -87,7 +82,6 @@ def run_hnsw(
     hnsw_db_label: str,
     common: dict[str, object],
     hnsw_config: dict[str, object],
-    profiling_config: dict[str, object],
 ) -> BenchmarkResult:
     print_header("HNSW Benchmark")
 
@@ -138,30 +132,6 @@ def run_hnsw(
     online = benchmark["summary"]
     success = online.get("retcode", 0) == 0
 
-    hnsw_profile = None
-    if success and not args.dry_run:
-        print("\n[Profiling] Running HNSW single-thread profiling pass...")
-        profile_ret, profile_output, profile_bench = run_profile_benchmark(
-            bench_bin=bench_bin,
-            index_files=index_files,
-            dataset_artifacts=dataset_artifacts,
-            dataset_spec=dataset_spec,
-            common_args=common,
-            target_recall=None,
-            dry_run=False,
-            extra_env={
-                "ZVEC_LOG_LEVEL": "INFO",
-                "ZVEC_HNSW_LOG_QUERY_STATS": "1",
-                "ZVEC_HNSW_LOG_QUERY_LIMIT": str(profiling_config.get("hnsw_query_limit", 2000)),
-            },
-        )
-        validate_profile_output("HNSW", profile_ret, profile_output, "HNSW query stats:")
-        hnsw_profile = build_hnsw_profile(
-            {"qps": online.get("qps"), "recall": recall},
-            profile_output,
-            profile_bench,
-        )
-
     return BenchmarkResult(
         type="HNSW",
         path=str(hnsw_path),
@@ -175,7 +145,6 @@ def run_hnsw(
         p95_latency_ms=online.get("p95_latency_ms"),
         p99_latency_ms=online.get("p99_latency_ms"),
         recall=recall,
-        profiling=hnsw_profile,
     )
 
 
@@ -189,8 +158,6 @@ def run_omega(
     omega_db_label: str,
     common: dict[str, object],
     omega_config: dict[str, object],
-    profiling_config: dict[str, object],
-    hnsw_profile: dict[str, object] | None,
     target_recalls: list[float],
 ) -> list[BenchmarkResult]:
     print_header("OMEGA Benchmark")
@@ -258,54 +225,6 @@ def run_omega(
         online = benchmark["summary"]
         success = online.get("retcode", 0) == 0
 
-        omega_profile = None
-        if success and not args.dry_run:
-            print("\n[Profiling] Running OMEGA single-thread profiling pass...")
-            profile_env = {
-                "ZVEC_LOG_LEVEL": "INFO",
-                "ZVEC_OMEGA_LOG_QUERY_STATS": "1",
-                "ZVEC_OMEGA_LOG_QUERY_LIMIT": str(profiling_config.get("omega_query_limit", 2000)),
-            }
-            profile_ret, profile_output, profile_bench = run_profile_benchmark(
-                bench_bin=bench_bin,
-                index_files=index_files,
-                dataset_artifacts=dataset_artifacts,
-                dataset_spec=dataset_spec,
-                common_args=omega_common,
-                target_recall=target_recall,
-                dry_run=False,
-                extra_env=profile_env,
-            )
-            validate_profile_output("OMEGA", profile_ret, profile_output, "OMEGA query stats:")
-            omega_profile = build_omega_profile(
-                {"qps": online.get("qps"), "recall": recall},
-                profile_output,
-                profile_bench,
-                hnsw_profile,
-            )
-            if profiling_config.get("omega_profile_control_timing", True):
-                print("\n[Profiling] Running OMEGA control-timing pass...")
-                detailed_ret, detailed_output, detailed_bench = run_profile_benchmark(
-                    bench_bin=bench_bin,
-                    index_files=index_files,
-                    dataset_artifacts=dataset_artifacts,
-                    dataset_spec=dataset_spec,
-                    common_args=omega_common,
-                    target_recall=target_recall,
-                    dry_run=False,
-                    extra_env={**profile_env, "ZVEC_OMEGA_PROFILE_CONTROL_TIMING": "1"},
-                )
-                validate_profile_output(
-                    "OMEGA", detailed_ret, detailed_output, "OMEGA query stats:"
-                )
-                detailed_profile = build_omega_profile(
-                    {"qps": online.get("qps"), "recall": recall},
-                    detailed_output,
-                    detailed_bench,
-                    hnsw_profile,
-                )
-                omega_profile = merge_omega_detailed_profile(omega_profile, detailed_profile)
-
         results.append(
             BenchmarkResult(
                 type="OMEGA",
@@ -320,7 +239,6 @@ def run_omega(
                 p95_latency_ms=online.get("p95_latency_ms"),
                 p99_latency_ms=online.get("p99_latency_ms"),
                 recall=recall,
-                profiling=omega_profile,
             )
         )
 
@@ -348,7 +266,6 @@ def main() -> int:
     common = must_get(config, "common")
     hnsw_config = must_get(config, "hnsw")
     omega_config = must_get(config, "omega")
-    profiling_config = config.get("profiling", {})
 
     hnsw_path = resolve_index_path(benchmark_dir, must_get(hnsw_config, "path"))
     omega_path = resolve_index_path(benchmark_dir, must_get(omega_config, "path"))
@@ -375,7 +292,6 @@ def main() -> int:
     print("=" * 70)
 
     results: list[BenchmarkResult] = []
-    hnsw_profile = None
 
     if not args.skip_hnsw:
         hnsw_result = run_hnsw(
@@ -388,10 +304,8 @@ def main() -> int:
             hnsw_db_label=hnsw_db_label,
             common=common,
             hnsw_config=hnsw_config,
-            profiling_config=profiling_config,
         )
         results.append(hnsw_result)
-        hnsw_profile = hnsw_result.profiling
 
     if not args.skip_omega:
         results.extend(
@@ -404,15 +318,13 @@ def main() -> int:
                 omega_db_label=omega_db_label,
                 common=common,
                 omega_config=omega_config,
-                profiling_config=profiling_config,
-                hnsw_profile=hnsw_profile,
                 target_recalls=target_recalls,
             )
         )
 
     if results:
         written_summary_paths = (
-            write_grouped_profiling_summaries(dataset_name, results)
+            write_grouped_online_summaries(dataset_name, results)
             if not args.dry_run
             else []
         )
@@ -442,31 +354,6 @@ def main() -> int:
                 f"{avg_latency:<16} {p95_latency:<16} {recall:<10} {status:<10}"
             )
 
-        print("\nProfiling Summary")
-        print("-" * 75)
-        print(
-            f"{'Type':<10} {'target_recall':<15} {'avg_lat(ms)':<12} "
-            f"{'avg_cmps':<12} {'avg_pred_calls':<16} {'avg_model_ms':<14} "
-            f"{'saved_cmps':<12}"
-        )
-        for result in results:
-            profile = result.profiling or {}
-            tr = f"{result.target_recall:.2f}" if result.target_recall is not None else "N/A"
-            avg_lat = profile.get("profile_avg_end2end_latency_ms")
-            avg_cmps = profile.get("profile_avg_cmps")
-            avg_pred_calls = profile.get("profile_avg_prediction_calls")
-            avg_model_ms = profile.get("profile_avg_model_overhead_ms")
-            saved_cmps = profile.get("profile_avg_early_stop_saved_cmps")
-            print(
-                f"{result.type:<10} "
-                f"{tr:<15} "
-                f"{(f'{avg_lat:.3f}' if avg_lat is not None else 'N/A'):<12} "
-                f"{(f'{avg_cmps:.1f}' if avg_cmps is not None else 'N/A'):<12} "
-                f"{(f'{avg_pred_calls:.2f}' if avg_pred_calls is not None else 'N/A'):<16} "
-                f"{(f'{avg_model_ms:.3f}' if avg_model_ms is not None else 'N/A'):<14} "
-                f"{(f'{saved_cmps:.1f}' if saved_cmps is not None else 'N/A'):<12}"
-            )
-
         print()
         for path in written_summary_paths:
             print(f"Summary JSON: {path}")
diff --git a/scripts/benchmark_lib.py b/scripts/benchmark_lib.py
index cf5633b01..541fdb084 100644
--- a/scripts/benchmark_lib.py
+++ b/scripts/benchmark_lib.py
@@ -29,7 +29,6 @@ class BenchmarkResult:
     p90_latency_ms: float | None = None
     p95_latency_ms: float | None = None
     p99_latency_ms: float | None = None
-    profiling: dict[str, Any] | None = None
 
 
 KV_PATTERN = re.compile(r"([A-Za-z_]+)=([^\s,]+)")
@@ -193,157 +192,16 @@ def parse_recall_output(output: str, topk: int) -> float | None:
     return None
 
 
-def build_hnsw_profile(
-    metrics: dict[str, Any], output: str, bench_summary: dict[str, Any]
-) -> dict[str, Any]:
-    query_records = parse_query_records(output, "HNSW query stats:")
-    return {
-        "benchmark_recall": metrics.get("recall"),
-        "benchmark_qps": metrics.get("qps"),
-        "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": bench_summary.get("avg_latency_ms"),
-        "profile_p50_end2end_latency_ms": bench_summary.get("p50_latency_ms"),
-        "profile_p90_end2end_latency_ms": bench_summary.get("p90_latency_ms"),
-        "profile_p95_end2end_latency_ms": bench_summary.get("p95_latency_ms"),
-        "profile_p99_end2end_latency_ms": bench_summary.get("p99_latency_ms"),
-        "profile_avg_cmps": avg_metric(query_records, "pairwise_dist_cnt"),
-        "profile_avg_scan_cmps": avg_metric(query_records, "cmps"),
-        "profile_avg_pure_search_ms": avg_metric(query_records, "pure_search_ms"),
-        "profile_serial_avg_latency_s": (
-            bench_summary["avg_latency_ms"] / 1000.0
-            if bench_summary.get("avg_latency_ms") is not None
-            else None
-        ),
-        "profile_serial_p99_s": (
-            bench_summary["p99_latency_ms"] / 1000.0
-            if bench_summary.get("p99_latency_ms") is not None
-            else None
-        ),
-        "profile_serial_p95_s": (
-            bench_summary["p95_latency_ms"] / 1000.0
-            if bench_summary.get("p95_latency_ms") is not None
-            else None
-        ),
-        "profile_serial_avg_recall": metrics.get("recall"),
-    }
-
-
-def build_omega_profile(
-    metrics: dict[str, Any],
-    output: str,
-    bench_summary: dict[str, Any],
-    hnsw_profile: dict[str, Any] | None,
-) -> dict[str, Any]:
-    query_records = parse_query_records(output, "OMEGA query stats:")
-
-    avg_pairwise_dist_cnt = avg_metric(query_records, "pairwise_dist_cnt")
-    avg_core_search_ms = avg_metric(query_records, "core_search_ms")
-    avg_pure_search_ms = avg_metric(query_records, "pure_search_ms")
-    avg_hook_total_ms = avg_metric(query_records, "hook_total_ms")
-    avg_search_only_ms = avg_pure_search_ms if avg_pure_search_ms is not None else avg_core_search_ms
-
-    cmp_time_ms = None
-    if avg_pairwise_dist_cnt and avg_pairwise_dist_cnt > 0 and avg_search_only_ms is not None:
-        cmp_time_ms = avg_search_only_ms / avg_pairwise_dist_cnt
-
-    model_overhead_cmp_equiv = None
-    if cmp_time_ms and cmp_time_ms > 0 and avg_hook_total_ms is not None:
-        model_overhead_cmp_equiv = avg_hook_total_ms / cmp_time_ms
-
-    avg_saved_cmps = None
-    if (
-        hnsw_profile
-        and hnsw_profile.get("profile_avg_cmps") is not None
-        and avg_pairwise_dist_cnt is not None
-    ):
-        avg_saved_cmps = hnsw_profile["profile_avg_cmps"] - avg_pairwise_dist_cnt
-
-    return {
-        "benchmark_recall": metrics.get("recall"),
-        "benchmark_qps": metrics.get("qps"),
-        "profile_query_count": len(query_records),
-        "profile_avg_end2end_latency_ms": bench_summary.get("avg_latency_ms"),
-        "profile_p50_end2end_latency_ms": bench_summary.get("p50_latency_ms"),
-        "profile_p90_end2end_latency_ms": bench_summary.get("p90_latency_ms"),
-        "profile_p95_end2end_latency_ms": bench_summary.get("p95_latency_ms"),
-        "profile_p99_end2end_latency_ms": bench_summary.get("p99_latency_ms"),
-        "profile_avg_cmps": avg_pairwise_dist_cnt,
-        "profile_avg_scan_cmps": avg_metric(query_records, "scan_cmps"),
-        "profile_avg_omega_cmps": avg_metric(query_records, "omega_cmps"),
-        "profile_avg_prediction_calls": avg_metric(query_records, "prediction_calls"),
-        "profile_avg_should_stop_calls": avg_metric(query_records, "should_stop_calls"),
-        "profile_avg_advance_calls": avg_metric(query_records, "advance_calls"),
-        "profile_avg_model_overhead_ms": avg_hook_total_ms,
-        "profile_avg_setup_ms": avg_metric(query_records, "setup_ms"),
-        "profile_avg_should_stop_ms": avg_metric(query_records, "should_stop_ms"),
-        "profile_avg_prediction_eval_ms": avg_metric(query_records, "prediction_eval_ms"),
-        "profile_avg_core_search_ms": avg_core_search_ms,
-        "profile_avg_pure_search_ms": avg_pure_search_ms,
-        "profile_avg_hook_total_ms": avg_hook_total_ms,
-        "profile_avg_hook_body_ms": avg_metric(query_records, "hook_body_ms"),
-        "profile_avg_hook_dispatch_ms": avg_metric(query_records, "hook_dispatch_ms"),
-        "profile_avg_report_visit_candidate_ms": avg_metric(query_records, "report_visit_candidate_ms"),
-        "profile_avg_should_predict_ms": avg_metric(query_records, "should_predict_ms"),
-        "profile_avg_report_hop_ms": avg_metric(query_records, "report_hop_ms"),
-        "profile_avg_update_top_candidates_ms": avg_metric(query_records, "update_top_candidates_ms"),
-        "profile_avg_push_traversal_window_ms": avg_metric(query_records, "push_traversal_window_ms"),
-        "profile_avg_model_overhead_cmp_equiv": model_overhead_cmp_equiv,
-        "profile_avg_early_stop_saved_cmps": avg_saved_cmps,
-        "profile_avg_early_stop_hit_rate": avg_metric(query_records, "early_stop_hit"),
-        "profile_serial_avg_latency_s": (
-            bench_summary["avg_latency_ms"] / 1000.0
-            if bench_summary.get("avg_latency_ms") is not None
-            else None
-        ),
-        "profile_serial_p99_s": (
-            bench_summary["p99_latency_ms"] / 1000.0
-            if bench_summary.get("p99_latency_ms") is not None
-            else None
-        ),
-        "profile_serial_p95_s": (
-            bench_summary["p95_latency_ms"] / 1000.0
-            if bench_summary.get("p95_latency_ms") is not None
-            else None
-        ),
-        "profile_serial_avg_recall": metrics.get("recall"),
-    }
-
-
-def merge_omega_detailed_profile(
-    summary_profile: dict[str, Any], detailed_profile: dict[str, Any]
-) -> dict[str, Any]:
-    merged = dict(summary_profile)
-    detailed_keys = [
-        "profile_avg_model_overhead_ms",
-        "profile_avg_should_stop_ms",
-        "profile_avg_prediction_eval_ms",
-        "profile_avg_core_search_ms",
-        "profile_avg_pure_search_ms",
-        "profile_avg_hook_total_ms",
-        "profile_avg_hook_body_ms",
-        "profile_avg_hook_dispatch_ms",
-        "profile_avg_report_visit_candidate_ms",
-        "profile_avg_should_predict_ms",
-        "profile_avg_report_hop_ms",
-        "profile_avg_update_top_candidates_ms",
-        "profile_avg_push_traversal_window_ms",
-        "profile_avg_model_overhead_cmp_equiv",
-    ]
-    for key in detailed_keys:
-        merged[key] = detailed_profile.get(key)
-    return merged
-
-
-def profiling_output_path(index_path: Path) -> Path:
+def online_summary_path(index_path: Path) -> Path:
     return index_path / "online_benchmark_summary.json"
 
 
-def write_profiling_summary(index_path: Path, payload: dict[str, Any]) -> None:
-    with open(profiling_output_path(index_path), "w") as f:
+def write_online_summary(index_path: Path, payload: dict[str, Any]) -> None:
+    with open(online_summary_path(index_path), "w") as f:
         json.dump(payload, f, indent=2, sort_keys=True)
 
 
-def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResult]) -> list[Path]:
+def write_grouped_online_summaries(dataset: str, results: list[BenchmarkResult]) -> list[Path]:
     written_paths: list[Path] = []
     grouped: dict[str, list[BenchmarkResult]] = {}
     for result in results:
@@ -351,7 +209,7 @@ def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResul
 
     for path_str, grouped_results in grouped.items():
         index_path = Path(path_str)
-        write_profiling_summary(
+        write_online_summary(
             index_path,
             {
                 "generated_at": datetime.now().isoformat(),
@@ -369,13 +227,12 @@ def write_grouped_profiling_summaries(dataset: str, results: list[BenchmarkResul
                         "p95_latency_ms": result.p95_latency_ms,
                         "p99_latency_ms": result.p99_latency_ms,
                         "recall": result.recall,
-                        "profiling": result.profiling,
                     }
                     for result in grouped_results
                 ],
             },
         )
-        written_paths.append(profiling_output_path(index_path))
+        written_paths.append(online_summary_path(index_path))
 
     return written_paths
 
@@ -482,18 +339,6 @@ def get_offline_load_duration(index_path: Path) -> float | None:
         "load_duration_s"
     )
 
-
-def latency_summary_from_profile(profile: dict[str, Any] | None) -> dict[str, float | None]:
-    profile = profile or {}
-    return {
-        "avg_latency_ms": profile.get("profile_avg_end2end_latency_ms"),
-        "p50_latency_ms": profile.get("profile_p50_end2end_latency_ms"),
-        "p90_latency_ms": profile.get("profile_p90_end2end_latency_ms"),
-        "p95_latency_ms": profile.get("profile_p95_end2end_latency_ms"),
-        "p99_latency_ms": profile.get("profile_p99_end2end_latency_ms"),
-    }
-
-
 def resolve_dataset_spec(
     dataset_name: str, config: dict[str, Any], dataset_root_arg: str | None
 ) -> dict[str, Any]:
@@ -1302,45 +1147,3 @@ def run_concurrency_benchmark(
 
     return {"summary": best_summary or {}, "output": best_output}
 
-
-def run_profile_benchmark(
-    *,
-    bench_bin: Path,
-    index_files: dict[str, Path | None],
-    dataset_artifacts: dict[str, Any],
-    dataset_spec: dict[str, Any],
-    common_args: dict[str, Any],
-    target_recall: float | None,
-    dry_run: bool,
-    extra_env: dict[str, str] | None,
-) -> tuple[int, str, dict[str, Any]]:
-    return run_bench(
-        bench_bin=bench_bin,
-        index_file=index_files["primary"],
-        query_file=dataset_artifacts["query_txt"],
-        metric_type=dataset_spec["metric_type"],
-        dimension=dataset_spec["dimension"],
-        m=int(common_args["m"]),
-        ef_construction=int(common_args.get("ef_construction", 500)),
-        quantize_type=str(common_args.get("quantize_type", "UNDEFINED")),
-        ef_search=int(common_args["ef_search"]),
-        topk=int(common_args["k"]),
-        bench_thread_count=1,
-        bench_secs=max(1, int(common_args.get("profiling_duration", 1))),
-        use_refiner=bool(common_args.get("is_using_refiner", False)),
-        reference_index_path=index_files["reference"],
-        target_recall=target_recall,
-        dry_run=dry_run,
-        extra_env=extra_env,
-    )
-
-
-def validate_profile_output(label: str, retcode: int, output: str, expected_prefix: str) -> None:
-    if retcode != 0:
-        raise RuntimeError(f"{label} profiling command failed with exit code {retcode}")
-    if expected_prefix not in output:
-        tail = "\n".join(output.splitlines()[-40:]) if output else "<empty output>"
-        raise RuntimeError(
-            f"{label} profiling output does not contain '{expected_prefix}'. "
-            f"Last output lines:\n{tail}"
-        )
diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.cc b/src/core/algorithm/hnsw/hnsw_algorithm.cc
index dfbd95a3b..0c1551493 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.cc
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.cc
@@ -228,18 +228,6 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
     filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };
   }
 
-  auto run_timed_hook = [&](auto &&fn) {
-    if (hooks == nullptr || !hooks->collect_timing || hooks->now_ns == nullptr ||
-        hooks->elapsed_ns == nullptr || hooks->hook_total_time_ns == nullptr) {
-      return fn();
-    }
-    uint64_t start_ns = hooks->now_ns();
-    auto result = fn();
-    *hooks->hook_total_time_ns +=
-        hooks->elapsed_ns(start_ns, hooks->now_ns());
-    return result;
-  };
-
   const uint32_t result_topk_limit = ctx->topk();
   const bool track_hook_result_topk =
       hooks != nullptr && hooks->on_visit_candidate != nullptr &&
@@ -259,18 +247,12 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
 
   candidates.emplace(*entry_point, *dist);
   if (hooks != nullptr && hooks->on_level0_entry != nullptr) {
-    run_timed_hook([&]() {
-      hooks->on_level0_entry(*entry_point, *dist, entry_inserted_to_topk,
-                             hooks->user_data);
-      return 0;
-    });
+    hooks->on_level0_entry(*entry_point, *dist, entry_inserted_to_topk,
+                           hooks->user_data);
   }
   while (!candidates.empty() && !ctx->reach_scan_limit()) {
     if (hooks != nullptr && hooks->on_hop != nullptr) {
-      run_timed_hook([&]() {
-        hooks->on_hop(hooks->user_data);
-        return 0;
-      });
+      hooks->on_hop(hooks->user_data);
     }
 
     auto top = candidates.begin();
@@ -358,10 +340,8 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
       }
 
       if (hooks != nullptr && hooks->on_visit_candidate != nullptr) {
-        bool should_stop = run_timed_hook([&]() {
-          return hooks->on_visit_candidate(node, cur_dist, inserted_to_topk,
-                                           hooks->user_data);
-        });
+        bool should_stop = hooks->on_visit_candidate(
+            node, cur_dist, inserted_to_topk, hooks->user_data);
         if (should_stop) {
           return true;
         }
diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.h b/src/core/algorithm/hnsw/hnsw_algorithm.h
index dcea105f1..ee3c87789 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.h
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.h
@@ -27,9 +27,8 @@ class HnswAlgorithm {
  public:
   typedef std::unique_ptr<HnswAlgorithm> UPointer;
 
-  // SearchHooks is the integration seam used by OMEGA and a small amount of
-  // profiling tooling. Callbacks are invoked from the level-0 search loop in
-  // this order:
+  // SearchHooks is the integration seam used by OMEGA. Callbacks are invoked
+  // from the level-0 search loop in this order:
   // 1. on_level0_entry once after the initial level-0 entry point is accepted
   // 2. on_hop once per popped candidate expansion
   // 3. on_visit_candidate once per candidate comparison at level 0
@@ -40,14 +39,8 @@ class HnswAlgorithm {
   // Returning true from on_visit_candidate requests early termination of the
   // level-0 search. This is currently used by OMEGA adaptive stopping.
   //
-  // collect_timing/now_ns/elapsed_ns/hook_total_time_ns are profiling-only
-  // fields; they do not affect hook semantics.
   struct SearchHooks {
     void *user_data{nullptr};
-    bool collect_timing{false};
-    uint64_t (*now_ns)(){nullptr};
-    uint64_t (*elapsed_ns)(uint64_t start, uint64_t end){nullptr};
-    uint64_t *hook_total_time_ns{nullptr};
     void (*on_level0_entry)(node_id_t id, dist_t dist, bool inserted_to_topk,
                             void *user_data){nullptr};
     void (*on_hop)(void *user_data){nullptr};
diff --git a/src/core/algorithm/omega/omega_hook_utils.h b/src/core/algorithm/omega/omega_hook_utils.h
index 5524ac415..d59031d3a 100644
--- a/src/core/algorithm/omega/omega_hook_utils.h
+++ b/src/core/algorithm/omega/omega_hook_utils.h
@@ -19,7 +19,6 @@
 #include <string>
 #include <vector>
 #include <omega/search_context.h>
-#include "utility/rdtsc_timer.h"
 #include "../hnsw/hnsw_entity.h"
 
 namespace zvec::core {
@@ -65,27 +64,11 @@ struct OmegaHookState {
 
   omega::SearchContext* search_ctx{nullptr};
   bool enable_early_stopping{false};
-  bool collect_control_timing{false};
-  uint64_t* hook_body_time_ns{nullptr};
   bool per_cmp_reporting{false};
   PendingVisitBuffer pending_candidates;
   int batch_min_interval{1};
 };
 
-template <typename Fn>
-inline void RunOmegaControlHook(const OmegaHookState& state, Fn&& fn) {
-  if (!state.collect_control_timing) {
-    fn();
-    return;
-  }
-  auto control_start = RdtscTimer::Now();
-  fn();
-  if (state.hook_body_time_ns != nullptr) {
-    *state.hook_body_time_ns += RdtscTimer::ElapsedNs(
-        control_start, RdtscTimer::Now());
-  }
-}
-
 inline void ResetOmegaHookState(OmegaHookState* state) {
   if (state->search_ctx != nullptr) {
     state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
@@ -117,18 +100,15 @@ inline bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count)
 
   flush_count = std::min(flush_count, state->pending_candidates.count);
   bool should_predict = false;
-  RunOmegaControlHook(*state, [&]() {
-    should_predict = state->search_ctx->ReportVisitCandidates(
-        state->pending_candidates.Data(), static_cast<size_t>(flush_count));
-  });
+  should_predict = state->search_ctx->ReportVisitCandidates(
+      state->pending_candidates.Data(), static_cast<size_t>(flush_count));
   state->pending_candidates.Clear();
   if (!state->enable_early_stopping || !should_predict) {
     return false;
   }
 
   bool should_stop = false;
-  RunOmegaControlHook(
-      *state, [&]() { should_stop = state->search_ctx->ShouldStopEarly(); });
+  should_stop = state->search_ctx->ShouldStopEarly();
   return should_stop;
 }
 
@@ -143,22 +123,18 @@ inline void OnOmegaLevel0Entry(node_id_t id, dist_t dist,
                                bool /*inserted_to_topk*/, void* user_data) {
   auto& state = *static_cast<OmegaHookState*>(user_data);
   if (state.per_cmp_reporting) {
-    RunOmegaControlHook(state, [&]() {
-      state.search_ctx->SetDistStart(dist);
-      state.search_ctx->ReportVisitCandidate(id, dist, true);
-    });
+    state.search_ctx->SetDistStart(dist);
+    state.search_ctx->ReportVisitCandidate(id, dist, true);
     return;
   }
-  RunOmegaControlHook(state, [&]() {
-    state.search_ctx->SetDistStart(dist);
-    state.pending_candidates.Push({static_cast<int>(id), dist, true});
-  });
+  state.search_ctx->SetDistStart(dist);
+  state.pending_candidates.Push({static_cast<int>(id), dist, true});
   MaybeFlushOmegaPendingCandidates(&state);
 }
 
 inline void OnOmegaHop(void* user_data) {
   auto& state = *static_cast<OmegaHookState*>(user_data);
-  RunOmegaControlHook(state, [&]() { state.search_ctx->ReportHop(); });
+  state.search_ctx->ReportHop();
 }
 
 inline bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
@@ -166,24 +142,18 @@ inline bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
   auto& state = *static_cast<OmegaHookState*>(user_data);
   if (state.per_cmp_reporting) {
     bool should_predict = false;
-    RunOmegaControlHook(state, [&]() {
-      should_predict =
-          state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
-    });
+    should_predict =
+        state.search_ctx->ReportVisitCandidate(id, dist, inserted_to_topk);
     if (!state.enable_early_stopping || !should_predict) {
       return false;
     }
     bool should_stop = false;
-    RunOmegaControlHook(
-        state, [&]() { should_stop = state.search_ctx->ShouldStopEarly(); });
+    should_stop = state.search_ctx->ShouldStopEarly();
     return should_stop;
   }
-  RunOmegaControlHook(state, [&]() {
-    state.pending_candidates.Push(
-        {static_cast<int>(id), dist, inserted_to_topk});
-  });
+  state.pending_candidates.Push(
+      {static_cast<int>(id), dist, inserted_to_topk});
   return MaybeFlushOmegaPendingCandidates(&state);
 }
 
 }  // namespace zvec::core
-
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index c6567a078..7a16a6a2f 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -18,161 +18,39 @@
 #include <zvec/core/framework/index_logger.h>
 #include <zvec/ailego/io/file.h>
 #include "omega_hook_utils.h"
-#include "utility/rdtsc_timer.h"
 #include "../hnsw/hnsw_entity.h"
 #include "../hnsw/hnsw_context.h"
 #include "omega_context.h"
 #include "omega_params.h"
 #include <omega/omega_api.h>
 #include <omega/search_context.h>
-#include <cstdlib>
 
 namespace zvec {
 namespace core {
 
 namespace {
 
-bool ShouldLogEveryQueryStats() {
-  const char* value = std::getenv("ZVEC_OMEGA_LOG_QUERY_STATS");
-  if (value == nullptr) {
-    return false;
-  }
-  return std::string(value) != "0";
-}
-
-uint64_t GetQueryStatsLimit() {
-  const char* value = std::getenv("ZVEC_OMEGA_LOG_QUERY_LIMIT");
-  if (value == nullptr || *value == '\0') {
-    return 0;
-  }
-  char* end = nullptr;
-  unsigned long long parsed = std::strtoull(value, &end, 10);
-  if (end == value) {
-    return 0;
-  }
-  return static_cast<uint64_t>(parsed);
-}
-
-bool ShouldLogQueryStats(uint64_t query_seq) {
-  if (!ShouldLogEveryQueryStats()) {
-    return false;
-  }
-  uint64_t limit = GetQueryStatsLimit();
-  return limit == 0 || query_seq < limit;
-}
-
-bool IsOmegaControlTimingEnabled() {
-  const char* value = std::getenv("ZVEC_OMEGA_PROFILE_CONTROL_TIMING");
-  if (value == nullptr) {
-    return false;
-  }
-  return value[0] != '\0' && value[0] != '0';
-}
-
-uint64_t OmegaProfilingNowNs() {
-  return RdtscTimer::Now();
-}
-
-uint64_t OmegaProfilingElapsedNs(uint64_t start, uint64_t end) {
-  return RdtscTimer::ElapsedNs(start, end);
-}
-
 struct OmegaHookSetup {
   OmegaHookState state;
   HnswAlgorithm::SearchHooks hooks;
 };
 
-struct OmegaFinalStats {
-  int hops{0};
-  int cmps{0};
-  int collected_gt{0};
-  float predicted_recall_avg{0.0f};
-  float predicted_recall_at_target{0.0f};
-  int omega_early_stop_hit{0};
-  unsigned long long should_stop_calls{0};
-  unsigned long long prediction_calls{0};
-  unsigned long long should_stop_time_ns{0};
-  unsigned long long prediction_eval_time_ns{0};
-  unsigned long long sorted_window_time_ns{0};
-  unsigned long long average_recall_eval_time_ns{0};
-  unsigned long long prediction_feature_prep_time_ns{0};
-  unsigned long long report_visit_candidate_time_ns{0};
-  unsigned long long report_hop_time_ns{0};
-  unsigned long long update_top_candidates_time_ns{0};
-  unsigned long long push_traversal_window_time_ns{0};
-  unsigned long long collected_gt_advance_count{0};
-  unsigned long long should_stop_calls_with_advance{0};
-  unsigned long long max_prediction_calls_per_should_stop{0};
-};
-
-struct OmegaTimingSummary {
-  uint64_t query_total_time_ns{0};
-  uint64_t query_reset_time_ns{0};
-  uint64_t query_search_time_ns{0};
-  uint64_t query_setup_time_ns{0};
-  uint64_t hook_total_time_ns{0};
-  uint64_t hook_body_time_ns{0};
-  uint64_t pure_search_time_ns{0};
-  uint64_t hook_dispatch_time_ns{0};
-};
-
 OmegaHookSetup CreateOmegaHookSetup(omega::SearchContext* omega_search_ctx,
                                     bool enable_early_stopping,
-                                    bool collect_control_timing,
-                                    bool per_cmp_reporting,
-                                    uint64_t* hook_body_time_ns,
-                                    uint64_t* hook_total_time_ns) {
+                                    bool per_cmp_reporting) {
   OmegaHookSetup setup;
   setup.state.search_ctx = omega_search_ctx;
   setup.state.enable_early_stopping = enable_early_stopping;
-  setup.state.collect_control_timing = collect_control_timing;
-  setup.state.hook_body_time_ns = hook_body_time_ns;
   setup.state.per_cmp_reporting = per_cmp_reporting;
   ResetOmegaHookState(&setup.state);
 
   setup.hooks.user_data = &setup.state;
-  setup.hooks.collect_timing = collect_control_timing;
-  setup.hooks.now_ns = &OmegaProfilingNowNs;
-  setup.hooks.elapsed_ns = &OmegaProfilingElapsedNs;
-  setup.hooks.hook_total_time_ns = hook_total_time_ns;
   setup.hooks.on_level0_entry = OnOmegaLevel0Entry;
   setup.hooks.on_hop = OnOmegaHop;
   setup.hooks.on_visit_candidate = OnOmegaVisitCandidate;
   return setup;
 }
 
-OmegaFinalStats CollectOmegaFinalStats(omega::SearchContext* omega_search_ctx) {
-  OmegaFinalStats stats;
-  omega_search_ctx->GetStats(&stats.hops, &stats.cmps, &stats.collected_gt);
-  stats.predicted_recall_avg = omega_search_ctx->GetLastPredictedRecallAvg();
-  stats.predicted_recall_at_target =
-      omega_search_ctx->GetLastPredictedRecallAtTarget();
-  stats.omega_early_stop_hit = omega_search_ctx->EarlyStopHit() ? 1 : 0;
-  stats.should_stop_calls = omega_search_ctx->GetShouldStopCalls();
-  stats.prediction_calls = omega_search_ctx->GetPredictionCalls();
-  stats.should_stop_time_ns = omega_search_ctx->GetShouldStopTimeNs();
-  stats.prediction_eval_time_ns = omega_search_ctx->GetPredictionEvalTimeNs();
-  stats.sorted_window_time_ns = omega_search_ctx->GetSortedWindowTimeNs();
-  stats.average_recall_eval_time_ns =
-      omega_search_ctx->GetAverageRecallEvalTimeNs();
-  stats.prediction_feature_prep_time_ns =
-      omega_search_ctx->GetPredictionFeaturePrepTimeNs();
-  stats.report_visit_candidate_time_ns =
-      omega_search_ctx->GetReportVisitCandidateTimeNs();
-  stats.report_hop_time_ns = omega_search_ctx->GetReportHopTimeNs();
-  stats.update_top_candidates_time_ns =
-      omega_search_ctx->GetUpdateTopCandidatesTimeNs();
-  stats.push_traversal_window_time_ns =
-      omega_search_ctx->GetPushTraversalWindowTimeNs();
-  stats.collected_gt_advance_count =
-      omega_search_ctx->GetCollectedGtAdvanceCount();
-  stats.should_stop_calls_with_advance =
-      omega_search_ctx->GetShouldStopCallsWithAdvance();
-  stats.max_prediction_calls_per_should_stop =
-      omega_search_ctx->GetMaxPredictionCallsPerShouldStop();
-  return stats;
-}
-
 void EnableOmegaTrainingIfNeeded(OmegaSearchHandle omega_search, int query_id,
                                  bool training_mode_enabled,
                                  const std::vector<std::vector<uint64_t>>& training_ground_truth,
@@ -252,110 +130,6 @@ void CollectOmegaTrainingOutputs(OmegaSearchHandle omega_search,
   omega_ctx->set_gt_cmps(gt_cmps_vec, total_cmps);
 }
 
-void LogOmegaRuntimeStatsOnce(std::atomic<bool>* debug_stats_logged,
-                              bool model_loaded, float target_recall,
-                              size_t scan_cmps, uint64_t pairwise_dist_cnt,
-                              const OmegaFinalStats& final_stats,
-                              bool early_stop_hit) {
-  bool expected = false;
-  if (!debug_stats_logged->compare_exchange_strong(expected, true)) {
-    return;
-  }
-
-  LOG_INFO("OMEGA runtime stats: model_loaded=%d target_recall=%.4f "
-           "scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d "
-           "collected_gt=%d predicted_recall_avg=%.4f "
-           "predicted_recall_at_target=%.4f early_stop_hit=%d "
-           "should_stop_calls=%llu prediction_calls=%llu "
-           "advance_calls=%llu collected_gt_advance=%llu "
-           "max_pred_per_stop=%llu should_stop_ms=%.3f "
-           "prediction_eval_ms=%.3f",
-           model_loaded ? 1 : 0, target_recall, scan_cmps,
-           static_cast<unsigned long long>(pairwise_dist_cnt), final_stats.cmps,
-           final_stats.collected_gt, final_stats.predicted_recall_avg,
-           final_stats.predicted_recall_at_target, early_stop_hit ? 1 : 0,
-           final_stats.should_stop_calls, final_stats.prediction_calls,
-           final_stats.should_stop_calls_with_advance,
-           final_stats.collected_gt_advance_count,
-           final_stats.max_prediction_calls_per_should_stop,
-           static_cast<double>(final_stats.should_stop_time_ns) / 1e6,
-           static_cast<double>(final_stats.prediction_eval_time_ns) / 1e6);
-}
-
-void LogOmegaQueryStats(uint64_t query_seq, bool model_loaded,
-                        float target_recall, size_t scan_cmps,
-                        uint64_t pairwise_dist_cnt,
-                        const OmegaFinalStats& final_stats,
-                        const OmegaTimingSummary& timing,
-                        bool collect_control_timing, bool early_stop_hit) {
-  if (collect_control_timing) {
-    LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
-             "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
-             "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
-             "early_stop_hit=%d should_stop_calls=%llu "
-             "prediction_calls=%llu advance_calls=%llu "
-             "collected_gt_advance=%llu max_pred_per_stop=%llu "
-             "should_stop_ms=%.3f prediction_eval_ms=%.3f "
-             "setup_ms=%.3f reset_query_ms=%.3f "
-             "core_search_ms=%.3f hook_total_ms=%.3f hook_body_ms=%.3f "
-             "hook_dispatch_ms=%.3f pure_search_ms=%.3f "
-             "report_visit_candidate_ms=%.3f "
-             "report_hop_ms=%.3f update_top_candidates_ms=%.3f "
-             "push_traversal_window_ms=%.3f total_ms=%.3f",
-             static_cast<unsigned long long>(query_seq), model_loaded ? 1 : 0,
-             target_recall, scan_cmps,
-             static_cast<unsigned long long>(pairwise_dist_cnt),
-             final_stats.cmps, final_stats.collected_gt,
-             final_stats.predicted_recall_avg,
-             final_stats.predicted_recall_at_target, early_stop_hit ? 1 : 0,
-             final_stats.should_stop_calls, final_stats.prediction_calls,
-             final_stats.should_stop_calls_with_advance,
-             final_stats.collected_gt_advance_count,
-             final_stats.max_prediction_calls_per_should_stop,
-             static_cast<double>(final_stats.should_stop_time_ns) / 1e6,
-             static_cast<double>(final_stats.prediction_eval_time_ns) / 1e6,
-             static_cast<double>(timing.query_setup_time_ns) / 1e6,
-             static_cast<double>(timing.query_reset_time_ns) / 1e6,
-             static_cast<double>(timing.query_search_time_ns) / 1e6,
-             static_cast<double>(timing.hook_total_time_ns) / 1e6,
-             static_cast<double>(timing.hook_body_time_ns) / 1e6,
-             static_cast<double>(timing.hook_dispatch_time_ns) / 1e6,
-             static_cast<double>(timing.pure_search_time_ns) / 1e6,
-             static_cast<double>(final_stats.report_visit_candidate_time_ns) / 1e6,
-             static_cast<double>(final_stats.report_hop_time_ns) / 1e6,
-             static_cast<double>(final_stats.update_top_candidates_time_ns) / 1e6,
-             static_cast<double>(final_stats.push_traversal_window_time_ns) / 1e6,
-             static_cast<double>(timing.query_total_time_ns) / 1e6);
-    return;
-  }
-
-  LOG_INFO("OMEGA query stats: query_seq=%llu model_loaded=%d "
-           "target_recall=%.4f scan_cmps=%zu pairwise_dist_cnt=%llu omega_cmps=%d collected_gt=%d "
-           "predicted_recall_avg=%.4f predicted_recall_at_target=%.4f "
-           "early_stop_hit=%d should_stop_calls=%llu "
-           "prediction_calls=%llu advance_calls=%llu "
-           "collected_gt_advance=%llu max_pred_per_stop=%llu "
-           "should_stop_ms=%.3f prediction_eval_ms=%.3f "
-           "setup_ms=%.3f reset_query_ms=%.3f "
-           "core_search_ms=%.3f search_with_hooks_ms=%.3f total_ms=%.3f",
-           static_cast<unsigned long long>(query_seq), model_loaded ? 1 : 0,
-           target_recall, scan_cmps,
-           static_cast<unsigned long long>(pairwise_dist_cnt), final_stats.cmps,
-           final_stats.collected_gt, final_stats.predicted_recall_avg,
-           final_stats.predicted_recall_at_target, early_stop_hit ? 1 : 0,
-           final_stats.should_stop_calls, final_stats.prediction_calls,
-           final_stats.should_stop_calls_with_advance,
-           final_stats.collected_gt_advance_count,
-           final_stats.max_prediction_calls_per_should_stop,
-           static_cast<double>(final_stats.should_stop_time_ns) / 1e6,
-           static_cast<double>(final_stats.prediction_eval_time_ns) / 1e6,
-           static_cast<double>(timing.query_setup_time_ns) / 1e6,
-           static_cast<double>(timing.query_reset_time_ns) / 1e6,
-           static_cast<double>(timing.query_search_time_ns) / 1e6,
-           static_cast<double>(timing.query_search_time_ns) / 1e6,
-           static_cast<double>(timing.query_total_time_ns) / 1e6);
-}
-
 }  // namespace
 
 bool OmegaStreamer::LoadModel(const std::string& model_dir) {
@@ -390,8 +164,6 @@ bool OmegaStreamer::IsModelLoaded() const {
 
 int OmegaStreamer::open(IndexStorage::Pointer stg) {
   std::string index_path = stg ? stg->file_path() : "";
-  debug_stats_logged_.store(false);
-  query_stats_sequence_.store(0);
 
   int ret = HnswStreamer::open(std::move(stg));
   if (ret != 0) {
@@ -461,10 +233,7 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qmeta,
                                      uint32_t count, Context::Pointer &context,
                                      bool enable_early_stopping) const {
-  auto query_total_start = RdtscTimer::Now();
-  const bool collect_control_timing = IsOmegaControlTimingEnabled();
-  uint64_t hook_total_time_ns = 0;
-  uint64_t hook_body_time_ns = 0;
+  (void)qmeta;
 
   // Cast context to OmegaContext to access training_query_id
   auto *omega_ctx = dynamic_cast<OmegaContext*>(context.get());
@@ -532,15 +301,11 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
                                            search_batch_distance_);
   hnsw_ctx->resize_results(count);
   hnsw_ctx->check_need_adjuct_ctx(entity_.doc_cnt());
-  auto query_reset_start = RdtscTimer::Now();
   hnsw_ctx->reset_query(query);
-  auto query_reset_end = RdtscTimer::Now();
   OmegaHookSetup hook_setup =
       CreateOmegaHookSetup(omega_search_ctx, enable_early_stopping,
-                           collect_control_timing, training_mode_enabled_,
-                           &hook_body_time_ns, &hook_total_time_ns);
+                           training_mode_enabled_);
   bool early_stop_hit = false;
-  auto query_search_start = RdtscTimer::Now();
   int ret =
       alg_->search_with_hooks(hnsw_ctx, &hook_setup.hooks, &early_stop_hit);
   if (ret != 0) {
@@ -549,56 +314,12 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     return ret;
   }
   MaybeFlushOmegaPendingCandidates(&hook_setup.state);
-  auto query_search_end = RdtscTimer::Now();
-
-  uint64_t query_total_time_ns =
-      RdtscTimer::ElapsedNs(query_total_start, RdtscTimer::Now());
-  uint64_t query_reset_time_ns =
-      RdtscTimer::ElapsedNs(query_reset_start, query_reset_end);
-  uint64_t query_search_time_ns =
-      RdtscTimer::ElapsedNs(query_search_start, query_search_end);
-  uint64_t query_setup_time_ns = 0;
-  if (query_total_time_ns > (query_reset_time_ns + query_search_time_ns)) {
-    query_setup_time_ns =
-        query_total_time_ns - query_reset_time_ns - query_search_time_ns;
-  }
-  uint64_t query_seq = query_stats_sequence_.fetch_add(1);
-  const OmegaFinalStats final_stats = CollectOmegaFinalStats(omega_search_ctx);
+  int hops = 0;
+  int cmps = 0;
+  omega_search_ctx->GetStats(&hops, &cmps, nullptr);
   LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu, early_stop=%d",
-            final_stats.cmps, final_stats.hops, hnsw_ctx->topk_heap().size(),
-            enable_early_stopping);
-  if (enable_early_stopping) {
-    const bool model_loaded = IsModelLoaded();
-    size_t scan_cmps = hnsw_ctx->get_scan_num();
-    uint64_t pairwise_dist_cnt = hnsw_ctx->get_pairwise_dist_num();
-    OmegaTimingSummary timing;
-    timing.query_total_time_ns = query_total_time_ns;
-    timing.query_reset_time_ns = query_reset_time_ns;
-    timing.query_search_time_ns = query_search_time_ns;
-    timing.query_setup_time_ns = query_setup_time_ns;
-    timing.hook_total_time_ns = hook_total_time_ns;
-    timing.hook_body_time_ns = hook_body_time_ns;
-    if (collect_control_timing) {
-      timing.pure_search_time_ns =
-          query_search_time_ns > hook_total_time_ns
-              ? (query_search_time_ns - hook_total_time_ns)
-              : 0;
-      timing.hook_dispatch_time_ns =
-          hook_total_time_ns > hook_body_time_ns
-              ? (hook_total_time_ns - hook_body_time_ns)
-              : 0;
-    }
-    const bool omega_early_stop_hit =
-        early_stop_hit || final_stats.omega_early_stop_hit != 0;
-    LogOmegaRuntimeStatsOnce(&debug_stats_logged_, model_loaded, target_recall,
-                             scan_cmps, pairwise_dist_cnt, final_stats,
-                             omega_early_stop_hit);
-    if (ShouldLogQueryStats(query_seq)) {
-      LogOmegaQueryStats(query_seq, model_loaded, target_recall, scan_cmps,
-                         pairwise_dist_cnt, final_stats, timing,
-                         collect_control_timing, omega_early_stop_hit);
-    }
-  }
+            cmps, hops, hnsw_ctx->topk_heap().size(),
+            (early_stop_hit || omega_search_ctx->EarlyStopHit()) ? 1 : 0);
 
   // Match HNSW timing semantics: result materialization is outside the
   // search-core timer and happens after logging.
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index 41a525e1e..31534f35b 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -17,7 +17,6 @@
 #include "omega_context.h"
 #include <zvec/core/interface/training.h>
 #include <omega/omega_api.h>
-#include <atomic>
 #include <mutex>
 #include <memory>
 
@@ -105,8 +104,6 @@ class OmegaStreamer : public HnswStreamer {
   // Inference mode state
   mutable OmegaModelHandle omega_model_{nullptr};
   mutable std::mutex model_mutex_;
-  mutable std::atomic<bool> debug_stats_logged_{false};
-  mutable std::atomic<uint64_t> query_stats_sequence_{0};
   float target_recall_{0.95f};
   int window_size_{100};
 };
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index d99f00547..eb65afd99 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit d99f00547a9f8984232f55a74565f7aa13dd74fd
+Subproject commit eb65afd99522570a277a7dae6d43d9dc43b98104

From 5a30ab4d9f5d24a09d78d05e1aef50fbdbf8983f Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 1 Apr 2026 19:04:27 +0800
Subject: [PATCH 088/126] Update OMEGA tests and integration code

---
 python/tests/test_collection.py               |  98 +++++++++++++
 python/tests/test_params.py                   | 138 ++++++++++++++++++
 .../mixed_reducer/mixed_streamer_reducer.cc   |  20 ++-
 src/include/zvec/db/query_params.h            |   6 +-
 tests/core/algorithm/CMakeLists.txt           |   3 +
 tests/core/algorithm/omega/CMakeLists.txt     |  16 ++
 .../omega/omega_search_context_test.cc        | 106 ++++++++++++++
 tests/core/interface/CMakeLists.txt           |  29 +++-
 .../core/interface/omega_query_param_test.cc  |  79 ++++++++++
 .../interface/omega_training_session_test.cc  |  90 ++++++++++++
 thirdparty/omega/OMEGALib                     |   2 +-
 11 files changed, 573 insertions(+), 14 deletions(-)
 create mode 100644 tests/core/algorithm/omega/CMakeLists.txt
 create mode 100644 tests/core/algorithm/omega/omega_search_context_test.cc
 create mode 100644 tests/core/interface/omega_query_param_test.cc
 create mode 100644 tests/core/interface/omega_training_session_test.cc

diff --git a/python/tests/test_collection.py b/python/tests/test_collection.py
index 7d021d6fd..eb5ddc305 100644
--- a/python/tests/test_collection.py
+++ b/python/tests/test_collection.py
@@ -23,7 +23,10 @@
     Doc,
     FieldSchema,
     HnswIndexParam,
+    OmegaIndexParam,
+    OmegaQueryParam,
     InvertIndexParam,
+    MetricType,
     LogLevel,
     LogType,
     VectorSchema,
@@ -129,6 +132,75 @@ def test_collection(
                 print(f"Warning: failed to destroy collection: {e}")
 
 
+@pytest.fixture(scope="session")
+def omega_collection_schema():
+    return zvec.CollectionSchema(
+        name="omega_test_collection",
+        fields=[
+            FieldSchema(
+                "id",
+                DataType.INT64,
+                nullable=False,
+                index_param=InvertIndexParam(enable_range_optimization=True),
+            ),
+            FieldSchema("name", DataType.STRING, nullable=False),
+        ],
+        vectors=[
+            VectorSchema(
+                "dense",
+                DataType.VECTOR_FP32,
+                dimension=128,
+                index_param=OmegaIndexParam(
+                    metric_type=MetricType.L2,
+                    min_vector_threshold=100000,
+                    num_training_queries=16,
+                    ef_training=64,
+                    window_size=32,
+                ),
+            )
+        ],
+    )
+
+
+@pytest.fixture(scope="function")
+def omega_test_collection(
+    tmp_path_factory, omega_collection_schema, collection_option
+) -> Collection:
+    temp_dir = tmp_path_factory.mktemp("zvec_omega")
+    collection_path = temp_dir / "omega_test_collection"
+
+    coll = zvec.create_and_open(
+        path=str(collection_path),
+        schema=omega_collection_schema,
+        option=collection_option,
+    )
+
+    assert coll is not None
+    assert coll.path == str(collection_path)
+    assert coll.schema.vectors[0].index_param.type == IndexType.OMEGA
+
+    try:
+        yield coll
+    finally:
+        if hasattr(coll, "destroy") and coll is not None:
+            try:
+                coll.destroy()
+            except Exception as e:
+                print(f"Warning: failed to destroy omega collection: {e}")
+
+
+@pytest.fixture
+def omega_multiple_docs():
+    return [
+        Doc(
+            id=f"{id}",
+            fields={"id": id, "name": f"doc-{id}"},
+            vectors={"dense": [id + 0.1] * 128},
+        )
+        for id in range(1, 65)
+    ]
+
+
 @pytest.fixture
 def collection_with_single_doc(test_collection: Collection, single_doc) -> Collection:
     # Setup: insert single doc
@@ -969,6 +1041,32 @@ def test_collection_query_by_id(
         )
         assert len(result) == 10
 
+    def test_omega_collection_schema_uses_omega_index(
+        self, omega_test_collection: Collection
+    ):
+        vector_schema = omega_test_collection.schema.vector("dense")
+        assert vector_schema is not None
+        assert vector_schema.index_param.type == IndexType.OMEGA
+
+    def test_omega_collection_query_by_id_with_omega_param(
+        self, omega_test_collection: Collection, omega_multiple_docs
+    ):
+        result = omega_test_collection.insert(omega_multiple_docs)
+        assert len(result) == len(omega_multiple_docs)
+        for item in result:
+            assert item.ok()
+
+        query_result = omega_test_collection.query(
+            VectorQuery(
+                field_name="dense",
+                vector=omega_multiple_docs[0].vector("dense"),
+                param=OmegaQueryParam(ef=128, target_recall=0.91),
+            ),
+            topk=5,
+        )
+        assert len(query_result) > 0
+        assert query_result[0].id == omega_multiple_docs[0].id
+
     def test_collection_query_multi_vector_with_same_field(
         self, collection_with_multiple_docs: Collection, multiple_docs
     ):
diff --git a/python/tests/test_params.py b/python/tests/test_params.py
index 0a85a7a38..f7d3cb6ff 100644
--- a/python/tests/test_params.py
+++ b/python/tests/test_params.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import pickle
 import sys
 import time
 
@@ -25,11 +26,13 @@
     CollectionOption,
     FlatIndexParam,
     HnswIndexParam,
+    OmegaIndexParam,
     IndexOption,
     InvertIndexParam,
     IVFIndexParam,
     OptimizeOption,
     HnswQueryParam,
+    OmegaQueryParam,
     IVFQueryParam,
     VectorQuery,
     IndexType,
@@ -177,6 +180,69 @@ def test_readonly_attributes(self, attr):
             setattr(param, attr, getattr(param, attr))
 
 
+# ----------------------------
+# OMEGA Index Param Test Case
+# ----------------------------
+class TestOmegaIndexParam:
+    def test_default(self):
+        param = OmegaIndexParam()
+        assert param.type == IndexType.OMEGA
+        assert param.metric_type == MetricType.IP
+        assert param.m == 50
+        assert param.ef_construction == 500
+        assert param.quantize_type == QuantizeType.UNDEFINED
+        assert param.min_vector_threshold == 100000
+        assert param.num_training_queries == 1000
+        assert param.ef_training == 1000
+        assert param.window_size == 100
+        assert param.ef_groundtruth == 0
+        assert param.k_train == 1
+
+    def test_custom(self):
+        param = OmegaIndexParam(
+            metric_type=MetricType.COSINE,
+            m=32,
+            ef_construction=700,
+            quantize_type=QuantizeType.INT8,
+            min_vector_threshold=1234,
+            num_training_queries=567,
+            ef_training=890,
+            window_size=42,
+            ef_groundtruth=321,
+            k_train=3,
+        )
+        assert param.metric_type == MetricType.COSINE
+        assert param.m == 32
+        assert param.ef_construction == 700
+        assert param.quantize_type == QuantizeType.INT8
+        assert param.min_vector_threshold == 1234
+        assert param.num_training_queries == 567
+        assert param.ef_training == 890
+        assert param.window_size == 42
+        assert param.ef_groundtruth == 321
+        assert param.k_train == 3
+
+    def test_pickle_round_trip(self):
+        param = OmegaIndexParam(
+            metric_type=MetricType.COSINE,
+            m=24,
+            ef_construction=320,
+            quantize_type=QuantizeType.INT8,
+            min_vector_threshold=2048,
+            num_training_queries=256,
+            ef_training=640,
+            window_size=48,
+            ef_groundtruth=96,
+            k_train=2,
+        )
+        restored = pickle.loads(pickle.dumps(param))
+        assert restored.type == IndexType.OMEGA
+        assert restored.metric_type == MetricType.COSINE
+        assert restored.m == 24
+        assert restored.ef_training == 640
+        assert restored.k_train == 2
+
+
 # ----------------------------
 # CollectionOption Test Case
 # ----------------------------
@@ -345,6 +411,51 @@ def test_readonly_attributes(self):
                 param.is_linear = True
 
 
+# ----------------------------
+# OMEGA Query Param Test Case
+# ----------------------------
+class TestOmegaQueryParam:
+    def test_default(self):
+        param = OmegaQueryParam()
+        assert param.type == IndexType.OMEGA
+        assert param.ef == 300
+        assert param.target_recall == pytest.approx(0.95)
+        assert param.radius == pytest.approx(0.0)
+        assert param.is_linear is False
+        assert param.is_using_refiner is False
+
+    def test_custom(self):
+        param = OmegaQueryParam(
+            ef=480,
+            target_recall=0.92,
+            radius=1.5,
+            is_linear=True,
+            is_using_refiner=True,
+        )
+        assert param.type == IndexType.OMEGA
+        assert param.ef == 480
+        assert param.target_recall == pytest.approx(0.92)
+        assert param.radius == pytest.approx(1.5)
+        assert param.is_linear is True
+        assert param.is_using_refiner is True
+
+    def test_pickle_round_trip(self):
+        param = OmegaQueryParam(
+            ef=384,
+            target_recall=0.91,
+            radius=0.25,
+            is_linear=True,
+            is_using_refiner=True,
+        )
+        restored = pickle.loads(pickle.dumps(param))
+        assert restored.type == IndexType.OMEGA
+        assert restored.ef == 384
+        assert restored.target_recall == pytest.approx(0.91)
+        assert restored.radius == pytest.approx(0.25)
+        assert restored.is_linear is True
+        assert restored.is_using_refiner is True
+
+
 # # ----------------------------
 # # IVFQueryParam Test Case
 # # ----------------------------
@@ -389,6 +500,15 @@ def test_init_with_valid_vector(self):
         assert vq.vector == vec
         assert vq.param == param
 
+    def test_init_with_valid_omega_param(self):
+        vec = [0.1, 0.2, 0.3]
+        param = OmegaQueryParam(ef=256, target_recall=0.91)
+        vq = VectorQuery(field_name="embedding", vector=vec, param=param)
+        assert vq.field_name == "embedding"
+        assert vq.vector == vec
+        assert vq.param == param
+        assert vq.param.target_recall == pytest.approx(0.91)
+
     def test_init_both_id_and_vector_raises_error(self):
         with pytest.raises(ValueError):
             VectorQuery(field_name="embedding", id="doc123", vector=[0.1])._validate()
@@ -413,3 +533,21 @@ def test_validate_fails_on_both_id_and_vector(self):
         vq = VectorQuery(field_name="test", id="doc123", vector=[0.1])
         with pytest.raises(ValueError):
             vq._validate()
+
+
+class TestVectorSchemaWithOmega:
+    def test_accepts_omega_index_param(self):
+        schema = VectorSchema(
+            name="dense",
+            data_type=DataType.VECTOR_FP32,
+            dimension=8,
+            index_param=OmegaIndexParam(
+                metric_type=MetricType.COSINE,
+                m=16,
+                ef_construction=300,
+                window_size=64,
+            ),
+        )
+        assert schema.index_param.type == IndexType.OMEGA
+        assert schema.index_param.metric_type == MetricType.COSINE
+        assert schema.index_param.window_size == 64
diff --git a/src/core/mixed_reducer/mixed_streamer_reducer.cc b/src/core/mixed_reducer/mixed_streamer_reducer.cc
index c59abc8d7..46757b168 100644
--- a/src/core/mixed_reducer/mixed_streamer_reducer.cc
+++ b/src/core/mixed_reducer/mixed_streamer_reducer.cc
@@ -199,18 +199,24 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
   if (target_builder_ != nullptr) {
     IndexBuild();
 
-    // CRITICAL FIX: After IndexBuild(), the builder's entity has the graph data (1500 docs),
-    // but the streamer's entity is still empty (0 docs). They are separate objects!
-    // Solution: Dump builder to storage, then close and reopen streamer to reload the data.
+    // Best-effort persistence hook for builder-backed indexes. Some newer
+    // flows want the built graph persisted immediately after reduce(), but
+    // legacy paths such as IVF already perform their own dump/reopen later in
+    // the merge flow. Missing storage context must not break those existing
+    // paths.
 
     if (target_storage_ == nullptr) {
-      LOG_ERROR("target_storage_ is null, cannot dump/reload");
-      return IndexError_Runtime;
+      LOG_WARN("target_storage_ is null, skip dump/reload hook");
+      LOG_INFO("End brute force reduce. cost time: [%zu]s",
+               (size_t)timer.seconds());
+      return 0;
     }
 
     if (target_file_path_.empty()) {
-      LOG_ERROR("target_file_path_ is empty, cannot dump/reload");
-      return IndexError_Runtime;
+      LOG_WARN("target_file_path_ is empty, skip dump/reload hook");
+      LOG_INFO("End brute force reduce. cost time: [%zu]s",
+               (size_t)timer.seconds());
+      return 0;
     }
 
 
diff --git a/src/include/zvec/db/query_params.h b/src/include/zvec/db/query_params.h
index a8f7b23a9..7466860fd 100644
--- a/src/include/zvec/db/query_params.h
+++ b/src/include/zvec/db/query_params.h
@@ -111,7 +111,9 @@ class OmegaQueryParams : public HnswQueryParams {
                    float radius = 0.0f, bool is_linear = false,
                    bool is_using_refiner = false)
       : HnswQueryParams(ef, radius, is_linear, is_using_refiner),
-        target_recall_(target_recall) {}
+        target_recall_(target_recall) {
+    set_type(IndexType::OMEGA);
+  }
 
   virtual ~OmegaQueryParams() = default;
 
@@ -206,4 +208,4 @@ class FlatQueryParams : public QueryParams {
   float scale_factor_{10};
 };
 
-}  // namespace zvec
\ No newline at end of file
+}  // namespace zvec
diff --git a/tests/core/algorithm/CMakeLists.txt b/tests/core/algorithm/CMakeLists.txt
index 9ef1ec2a0..c5f19e6c3 100644
--- a/tests/core/algorithm/CMakeLists.txt
+++ b/tests/core/algorithm/CMakeLists.txt
@@ -10,3 +10,6 @@ cc_directories(hnsw_sparse)
 if(RABITQ_SUPPORTED)
 cc_directories(hnsw_rabitq)
 endif()
+if(ZVEC_ENABLE_OMEGA)
+cc_directories(omega)
+endif()
diff --git a/tests/core/algorithm/omega/CMakeLists.txt b/tests/core/algorithm/omega/CMakeLists.txt
new file mode 100644
index 000000000..72f47635e
--- /dev/null
+++ b/tests/core/algorithm/omega/CMakeLists.txt
@@ -0,0 +1,16 @@
+include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake)
+
+file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)
+
+foreach(CC_SRCS ${ALL_TEST_SRCS})
+  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
+  cc_gtest(
+      NAME ${CC_TARGET}
+      STRICT
+      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer
+           core_knn_hnsw core_knn_flat core_knn_omega core_interface omega
+      SRCS ${CC_SRCS}
+      INCS . ${CMAKE_SOURCE_DIR}/src/core ${CMAKE_SOURCE_DIR}/src/core/algorithm
+           ${CMAKE_SOURCE_DIR}/thirdparty/omega/OMEGALib/include
+    )
+endforeach()
diff --git a/tests/core/algorithm/omega/omega_search_context_test.cc b/tests/core/algorithm/omega/omega_search_context_test.cc
new file mode 100644
index 000000000..eb73c0735
--- /dev/null
+++ b/tests/core/algorithm/omega/omega_search_context_test.cc
@@ -0,0 +1,106 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include <omega/model_manager.h>
+#include <omega/search_context.h>
+#include <fstream>
+#include <string>
+#include <vector>
+
+namespace omega {
+
+TEST(OmegaSearchContextTest, DefaultStateWithoutModelDoesNotEarlyStop) {
+  SearchContext ctx(nullptr, nullptr, 0.95f, 3, 5);
+
+  int hops = -1;
+  int comparisons = -1;
+  int collected_gt = -1;
+  ctx.GetStats(&hops, &comparisons, &collected_gt);
+
+  EXPECT_EQ(hops, 0);
+  EXPECT_EQ(comparisons, 0);
+  EXPECT_EQ(collected_gt, 0);
+  EXPECT_EQ(ctx.GetK(), 3);
+  EXPECT_EQ(ctx.GetNextPredictionCmps(), 50);
+  EXPECT_EQ(ctx.GetPredictionBatchMinInterval(), 10);
+  EXPECT_FALSE(ctx.ShouldTrackTraversalWindow());
+  EXPECT_FALSE(ctx.ShouldStopEarly());
+  EXPECT_FALSE(ctx.EarlyStopHit());
+}
+
+TEST(OmegaSearchContextTest, TrainingModeCollectsRecordsAndGtCmps) {
+  SearchContext ctx(nullptr, nullptr, 0.95f, 2, 4);
+  ctx.EnableTrainingMode(7, std::vector<int>{11, 22}, 2);
+  ctx.SetDistStart(0.5f);
+
+  EXPECT_TRUE(ctx.ShouldTrackTraversalWindow());
+
+  ctx.ReportHop();
+  EXPECT_FALSE(ctx.ReportVisitCandidate(11, 0.1f, true));
+  ctx.ReportHop();
+  EXPECT_FALSE(ctx.ReportVisitCandidate(22, 0.2f, true));
+
+  const auto& records = ctx.GetTrainingRecords();
+  ASSERT_EQ(records.size(), 2U);
+  EXPECT_EQ(records[0].query_id, 7);
+  EXPECT_EQ(records[0].cmps_visited, 1);
+  EXPECT_EQ(records[0].hops_visited, 1);
+  EXPECT_EQ(records[0].label, 0);
+  ASSERT_EQ(records[0].traversal_window_stats.size(), 7U);
+
+  EXPECT_EQ(records[1].query_id, 7);
+  EXPECT_EQ(records[1].cmps_visited, 2);
+  EXPECT_EQ(records[1].hops_visited, 2);
+  EXPECT_EQ(records[1].label, 1);
+  ASSERT_EQ(records[1].traversal_window_stats.size(), 7U);
+
+  const auto& gt_cmps = ctx.GetGtCmpsPerRank();
+  ASSERT_EQ(gt_cmps.size(), 2U);
+  EXPECT_EQ(gt_cmps[0], 1);
+  EXPECT_EQ(gt_cmps[1], 2);
+  EXPECT_EQ(ctx.GetTotalCmps(), 2);
+}
+
+TEST(OmegaSearchContextTest, ReportVisitCandidatesReturnsPredictionPointAtBoundary) {
+  SearchContext ctx(nullptr, nullptr, 0.95f, 2, 3);
+
+  std::vector<SearchContext::VisitCandidate> warmup;
+  warmup.reserve(49);
+  for (int i = 0; i < 49; ++i) {
+    warmup.push_back({100 + i, 10.0f + static_cast<float>(i), i < 2});
+  }
+
+  EXPECT_FALSE(ctx.ReportVisitCandidates(warmup.data(), warmup.size()));
+  EXPECT_EQ(ctx.GetTotalCmps(), 49);
+  EXPECT_EQ(ctx.GetTopCandidateCountForHook(), 2);
+
+  EXPECT_TRUE(ctx.ReportVisitCandidate(999, 0.01f, true));
+  EXPECT_EQ(ctx.GetTotalCmps(), 50);
+  EXPECT_EQ(ctx.GetTopCandidateCountForHook(), 2);
+
+  ctx.Reset();
+  EXPECT_EQ(ctx.GetTotalCmps(), 0);
+  EXPECT_EQ(ctx.GetTopCandidateCountForHook(), 0);
+  EXPECT_EQ(ctx.GetNextPredictionCmps(), 50);
+}
+
+TEST(OmegaModelManagerTest, MissingModelFileFailsClearly) {
+  ModelManager manager;
+  EXPECT_FALSE(manager.LoadModel("this/path/should/not/exist"));
+  EXPECT_FALSE(manager.IsLoaded());
+  EXPECT_EQ(manager.GetModel(), nullptr);
+}
+
+}  // namespace omega
diff --git a/tests/core/interface/CMakeLists.txt b/tests/core/interface/CMakeLists.txt
index 62e7bc933..ad2f36c0a 100644
--- a/tests/core/interface/CMakeLists.txt
+++ b/tests/core/interface/CMakeLists.txt
@@ -2,14 +2,35 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
 
 file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)
 
+set(ZVEC_TEST_CORE_INTERFACE_LIBS
+    zvec_ailego
+    core_framework
+    core_metric
+    core_interface
+    core_knn_flat
+    core_utility
+    core_quantizer
+    sparsehash
+    core_knn_hnsw
+    core_mix_reducer
+    core_knn_flat_sparse
+    core_knn_hnsw_sparse
+    core_knn_ivf
+    core_knn_hnsw_rabitq
+)
+
+if(ZVEC_ENABLE_OMEGA)
+  list(APPEND ZVEC_TEST_CORE_INTERFACE_LIBS core_knn_omega omega)
+endif()
+
 foreach(CC_SRCS ${ALL_TEST_SRCS})
   get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
   cc_gtest(
       NAME ${CC_TARGET}
       STRICT
-      LIBS zvec_ailego core_framework core_metric core_interface core_knn_flat core_utility core_quantizer sparsehash core_knn_hnsw core_mix_reducer
-          core_knn_flat_sparse core_knn_hnsw_sparse core_knn_ivf core_knn_hnsw_rabitq
+      LIBS ${ZVEC_TEST_CORE_INTERFACE_LIBS}
       SRCS ${CC_SRCS}
-      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
+      INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
+           ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
     )
-endforeach()
\ No newline at end of file
+endforeach()
diff --git a/tests/core/interface/omega_query_param_test.cc b/tests/core/interface/omega_query_param_test.cc
new file mode 100644
index 000000000..58807eb85
--- /dev/null
+++ b/tests/core/interface/omega_query_param_test.cc
@@ -0,0 +1,79 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "zvec/core/interface/index_factory.h"
+#include "zvec/core/interface/index_param.h"
+
+namespace zvec::core_interface {
+
+TEST(OmegaQueryParamTest, ClonePreservesOmegaFields) {
+  OmegaQueryParam param;
+  param.topk = 20;
+  param.fetch_vector = true;
+  param.radius = 1.25f;
+  param.is_linear = true;
+  param.ef_search = 432;
+  param.training_query_id = 7;
+  param.target_recall = 0.91f;
+
+  auto cloned_base = param.Clone();
+  auto cloned = std::dynamic_pointer_cast<OmegaQueryParam>(cloned_base);
+  ASSERT_NE(cloned, nullptr);
+  EXPECT_EQ(cloned->topk, 20U);
+  EXPECT_TRUE(cloned->fetch_vector);
+  EXPECT_FLOAT_EQ(cloned->radius, 1.25f);
+  EXPECT_TRUE(cloned->is_linear);
+  EXPECT_EQ(cloned->ef_search, 432U);
+  EXPECT_EQ(cloned->training_query_id, 7);
+  EXPECT_FLOAT_EQ(cloned->target_recall, 0.91f);
+}
+
+TEST(OmegaQueryParamTest, JsonRoundTripPreservesTargetRecall) {
+  OmegaQueryParam param;
+  param.topk = 15;
+  param.fetch_vector = true;
+  param.ef_search = 512;
+  param.target_recall = 0.92f;
+
+  const std::string json =
+      IndexFactory::QueryParamSerializeToJson(param, false);
+  auto restored =
+      IndexFactory::QueryParamDeserializeFromJson<OmegaQueryParam>(json);
+
+  ASSERT_NE(restored, nullptr);
+  EXPECT_EQ(restored->topk, 15U);
+  EXPECT_TRUE(restored->fetch_vector);
+  EXPECT_EQ(restored->ef_search, 512U);
+  EXPECT_FLOAT_EQ(restored->target_recall, 0.92f);
+}
+
+TEST(OmegaQueryParamTest, BaseDeserializerReturnsOmegaType) {
+  OmegaQueryParam param;
+  param.ef_search = 256;
+  param.target_recall = 0.9f;
+
+  const std::string json =
+      IndexFactory::QueryParamSerializeToJson(param, false);
+  auto restored =
+      IndexFactory::QueryParamDeserializeFromJson<BaseIndexQueryParam>(json);
+
+  ASSERT_NE(restored, nullptr);
+  auto* omega = dynamic_cast<OmegaQueryParam*>(restored.get());
+  ASSERT_NE(omega, nullptr);
+  EXPECT_EQ(omega->ef_search, 256U);
+  EXPECT_FLOAT_EQ(omega->target_recall, 0.9f);
+}
+
+}  // namespace zvec::core_interface
diff --git a/tests/core/interface/omega_training_session_test.cc b/tests/core/interface/omega_training_session_test.cc
new file mode 100644
index 000000000..450c4532a
--- /dev/null
+++ b/tests/core/interface/omega_training_session_test.cc
@@ -0,0 +1,90 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "core/interface/indexes/omega_training_session.h"
+
+namespace zvec::core_interface {
+
+TEST(OmegaTrainingSessionTest, StartFailsWithoutStreamer) {
+  OmegaTrainingSession session(nullptr);
+  TrainingSessionConfig config;
+  config.topk = 3;
+  config.ground_truth = {{1, 2, 3}};
+
+  auto status = session.Start(config);
+  EXPECT_FALSE(status.ok());
+}
+
+TEST(OmegaTrainingSessionTest, ConsumeArtifactsAggregatesRecordsAndGtCmps) {
+  OmegaTrainingSession session(nullptr);
+
+  QueryTrainingArtifacts first;
+  first.training_query_id = 0;
+  first.total_cmps = 13;
+  first.gt_cmps_per_rank = {3, 7, 11};
+  first.records.push_back(
+      TrainingRecord{0, 1, 3, 0.1f, 0.2f, std::vector<float>(7, 1.0f), 1});
+
+  QueryTrainingArtifacts second;
+  second.training_query_id = 2;
+  second.total_cmps = 21;
+  second.gt_cmps_per_rank = {5, 9, 15};
+  second.records.push_back(
+      TrainingRecord{2, 4, 8, 0.3f, 0.4f, std::vector<float>(7, 2.0f), 0});
+
+  session.CollectQueryArtifacts(std::move(first));
+  session.CollectQueryArtifacts(std::move(second));
+
+  TrainingArtifacts artifacts = session.ConsumeArtifacts();
+  ASSERT_EQ(artifacts.records.size(), 2U);
+  EXPECT_EQ(artifacts.records[0].query_id, 0);
+  EXPECT_EQ(artifacts.records[1].query_id, 2);
+
+  ASSERT_EQ(artifacts.gt_cmps_data.topk, 3U);
+  ASSERT_EQ(artifacts.gt_cmps_data.num_queries, 3U);
+  ASSERT_EQ(artifacts.gt_cmps_data.gt_cmps.size(), 3U);
+  ASSERT_EQ(artifacts.gt_cmps_data.total_cmps.size(), 3U);
+
+  EXPECT_EQ(artifacts.gt_cmps_data.gt_cmps[0][0], 3);
+  EXPECT_EQ(artifacts.gt_cmps_data.gt_cmps[0][2], 11);
+  EXPECT_EQ(artifacts.gt_cmps_data.total_cmps[0], 13);
+
+  EXPECT_EQ(artifacts.gt_cmps_data.gt_cmps[1][0], 0);
+  EXPECT_EQ(artifacts.gt_cmps_data.total_cmps[1], 0);
+
+  EXPECT_EQ(artifacts.gt_cmps_data.gt_cmps[2][1], 9);
+  EXPECT_EQ(artifacts.gt_cmps_data.total_cmps[2], 21);
+
+  TrainingArtifacts drained = session.ConsumeArtifacts();
+  EXPECT_TRUE(drained.records.empty());
+  EXPECT_TRUE(drained.gt_cmps_data.gt_cmps.empty());
+}
+
+TEST(OmegaTrainingSessionTest, ConsumeArtifactsUsesConfiguredShapeWhenAvailable) {
+  OmegaTrainingSession session(nullptr);
+
+  QueryTrainingArtifacts only;
+  only.training_query_id = 1;
+  only.total_cmps = 8;
+  only.gt_cmps_per_rank = {2, 4};
+  session.CollectQueryArtifacts(std::move(only));
+
+  TrainingArtifacts inferred = session.ConsumeArtifacts();
+  ASSERT_EQ(inferred.gt_cmps_data.num_queries, 2U);
+  ASSERT_EQ(inferred.gt_cmps_data.topk, 2U);
+  EXPECT_EQ(inferred.gt_cmps_data.total_cmps[1], 8);
+}
+
+}  // namespace zvec::core_interface
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index eb65afd99..92d0989ea 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit eb65afd99522570a277a7dae6d43d9dc43b98104
+Subproject commit 92d0989ead6d5aa19f979bc67a99f3fe28dc7db9

From d98b153c0a7f25df182b11d9db3a6011df351747 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 1 Apr 2026 19:25:42 +0800
Subject: [PATCH 089/126] Simplify OMEGA build integration

---
 CMakeLists.txt                                |  6 +-----
 pyproject.toml                                |  1 -
 src/core/CMakeLists.txt                       | 11 +----------
 src/core/algorithm/CMakeLists.txt             |  5 +----
 src/db/collection.cc                          |  2 --
 src/db/index/segment/segment.cc               |  2 --
 src/db/training/omega_model_trainer.cc        |  4 ----
 src/db/training/omega_model_trainer.h         |  4 ----
 src/db/training/omega_training_coordinator.cc | 10 ----------
 tests/core/algorithm/CMakeLists.txt           |  2 --
 tests/core/interface/CMakeLists.txt           |  6 ++----
 thirdparty/CMakeLists.txt                     | 10 ++--------
 tools/core/CMakeLists.txt                     |  5 +----
 13 files changed, 8 insertions(+), 60 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9d75e66a6..f8e987237 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,11 +65,7 @@ message(STATUS "BUILD_C_BINDINGS:${BUILD_C_BINDINGS}")
 option(BUILD_TOOLS "Build tools" ON)
 message(STATUS "BUILD_TOOLS:${BUILD_TOOLS}")
 
-option(ZVEC_ENABLE_OMEGA "Enable OMEGA support with LightGBM (requires LightGBM library)" OFF)
-message(STATUS "ZVEC_ENABLE_OMEGA:${ZVEC_ENABLE_OMEGA}")
-if(ZVEC_ENABLE_OMEGA)
-    add_definitions(-DZVEC_ENABLE_OMEGA)
-endif()
+message(STATUS "OMEGA support: always enabled")
 option(RABITQ_ENABLE_AVX512 "Compile RaBitQ with AVX-512 support" OFF)
 
 if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64" AND NOT ANDROID)
diff --git a/pyproject.toml b/pyproject.toml
index f6c598d40..5d167ef19 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -124,7 +124,6 @@ sdist.include = [
 [tool.scikit-build.cmake.define]
 BUILD_TOOLS = "OFF"
 BUILD_PYTHON_BINDINGS = "ON"
-ZVEC_ENABLE_OMEGA = "OFF"
 #CMAKE_VERBOSE_MAKEFILE = "ON"
 
 # Setuptools config for test pypi
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index d09d65581..856f3e388 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -32,12 +32,6 @@ cc_directory(mixed_reducer)
 git_version(GIT_SRCS_VER ${CMAKE_CURRENT_SOURCE_DIR})
 file(GLOB_RECURSE ALL_CORE_SRCS *.cc *.c *.h)
 
-# Exclude omega algorithm files when OMEGA is disabled
-if(NOT ZVEC_ENABLE_OMEGA)
-    list(FILTER ALL_CORE_SRCS EXCLUDE REGEX ".*/algorithm/omega/.*")
-    list(FILTER ALL_CORE_SRCS EXCLUDE REGEX ".*/omega_index\\.cc$")
-endif()
-
 # Remove algorithm/hnsw_rabitq implementation files if not supported.
 # interface/indexes/hnsw_rabitq_index.cc is kept because it provides the vtable
 # for HNSWRabitqIndex and guards rabitqlib usage with #if RABITQ_SUPPORTED.
@@ -45,10 +39,7 @@ if(NOT RABITQ_SUPPORTED)
   list(FILTER ALL_CORE_SRCS EXCLUDE REGEX ".*/algorithm/hnsw_rabitq/.*")
 endif()
 
-set(CORE_LIBS zvec_ailego zvec_turbo sparsehash magic_enum rabitqlib)
-if(ZVEC_ENABLE_OMEGA)
-    list(APPEND CORE_LIBS omega)
-endif()
+set(CORE_LIBS zvec_ailego zvec_turbo sparsehash magic_enum rabitqlib omega)
 
 cc_library(
     NAME zvec_core STATIC STRICT PACKED
diff --git a/src/core/algorithm/CMakeLists.txt b/src/core/algorithm/CMakeLists.txt
index 5a5fa8917..6bf6c4390 100644
--- a/src/core/algorithm/CMakeLists.txt
+++ b/src/core/algorithm/CMakeLists.txt
@@ -7,10 +7,7 @@ cc_directory(flat_sparse)
 cc_directory(ivf)
 cc_directory(hnsw)
 cc_directory(hnsw_sparse)
-# Only include omega when ZVEC_ENABLE_OMEGA is ON
-if(ZVEC_ENABLE_OMEGA)
-    cc_directory(omega)
-endif()
+cc_directory(omega)
 if(RABITQ_SUPPORTED)
   message(STATUS "BUILD RABITQ")
   cc_directory(hnsw_rabitq)
diff --git a/src/db/collection.cc b/src/db/collection.cc
index 43ac86b57..bcb56b5e1 100644
--- a/src/db/collection.cc
+++ b/src/db/collection.cc
@@ -44,10 +44,8 @@
 #include "db/index/segment/segment_helper.h"
 #include "db/index/segment/segment_manager.h"
 #include "db/sqlengine/sqlengine.h"
-#ifdef ZVEC_ENABLE_OMEGA
 #include "db/training/omega_model_trainer.h"
 #include "db/training/training_data_collector.h"
-#endif
 
 namespace zvec {
 
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index e8b577c80..fbc59dfdb 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -68,9 +68,7 @@
 #include "sql_expr_parser.h"
 #include "db/training/omega_training_coordinator.h"
 #include "db/training/training_data_collector.h"
-#ifdef ZVEC_ENABLE_OMEGA
 #include "db/training/omega_model_trainer.h"
-#endif
 
 namespace zvec {
 
diff --git a/src/db/training/omega_model_trainer.cc b/src/db/training/omega_model_trainer.cc
index 1ef8e94b7..e9d20c542 100644
--- a/src/db/training/omega_model_trainer.cc
+++ b/src/db/training/omega_model_trainer.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifdef ZVEC_ENABLE_OMEGA
-
 #include "omega_model_trainer.h"
 #include <omega/omega_trainer.h>
 #include <algorithm>
@@ -122,5 +120,3 @@ Status OmegaModelTrainer::TrainModelWithGtCmps(
 }
 
 }  // namespace zvec
-
-#endif  // ZVEC_ENABLE_OMEGA
diff --git a/src/db/training/omega_model_trainer.h b/src/db/training/omega_model_trainer.h
index ace1775f3..7e7a726de 100644
--- a/src/db/training/omega_model_trainer.h
+++ b/src/db/training/omega_model_trainer.h
@@ -14,8 +14,6 @@
 
 #pragma once
 
-#ifdef ZVEC_ENABLE_OMEGA
-
 #include <algorithm>
 #include <thread>
 #include <string>
@@ -78,5 +76,3 @@ class OmegaModelTrainer {
 };
 
 }  // namespace zvec
-
-#endif  // ZVEC_ENABLE_OMEGA
diff --git a/src/db/training/omega_training_coordinator.cc b/src/db/training/omega_training_coordinator.cc
index 843a2b67f..836046835 100644
--- a/src/db/training/omega_training_coordinator.cc
+++ b/src/db/training/omega_training_coordinator.cc
@@ -20,9 +20,7 @@
 #include <vector>
 #include <zvec/ailego/logger/logger.h>
 #include "db/common/file_helper.h"
-#ifdef ZVEC_ENABLE_OMEGA
 #include "db/training/omega_model_trainer.h"
-#endif
 
 namespace zvec {
 
@@ -218,7 +216,6 @@ Status TrainOmegaModelAfterBuild(
     return Status::OK();
   }
 
-#ifdef ZVEC_ENABLE_OMEGA
   OmegaModelTrainerOptions trainer_opts;
   trainer_opts.output_dir = model_output_dir;
   trainer_opts.verbose = true;
@@ -238,9 +235,6 @@ Status TrainOmegaModelAfterBuild(
     LOG_WARN("OMEGA model training failed: %s",
              train_status.message().c_str());
   }
-#else
-  LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
-#endif
 
   return Status::OK();
 }
@@ -281,7 +275,6 @@ Status TrainOmegaModelAfterRetrainCollect(
   LOG_INFO("Training data stats: %zu positive, %zu negative samples",
            positive_count, negative_count);
 
-#ifdef ZVEC_ENABLE_OMEGA
   LOG_WARN("OMEGA retrain step 2/2: start model training for field '%s' in segment %d",
            field_name.c_str(), segment_id);
   OmegaModelTrainerOptions trainer_options;
@@ -308,9 +301,6 @@ Status TrainOmegaModelAfterRetrainCollect(
 
   LOG_WARN("OMEGA retrain step 2/2: finished model training for segment %d, output: %s",
            segment_id, trainer_options.output_dir.c_str());
-#else
-  LOG_INFO("OMEGA training skipped (ZVEC_ENABLE_OMEGA not defined)");
-#endif
 
   return Status::OK();
 }
diff --git a/tests/core/algorithm/CMakeLists.txt b/tests/core/algorithm/CMakeLists.txt
index c5f19e6c3..5e317baeb 100644
--- a/tests/core/algorithm/CMakeLists.txt
+++ b/tests/core/algorithm/CMakeLists.txt
@@ -10,6 +10,4 @@ cc_directories(hnsw_sparse)
 if(RABITQ_SUPPORTED)
 cc_directories(hnsw_rabitq)
 endif()
-if(ZVEC_ENABLE_OMEGA)
 cc_directories(omega)
-endif()
diff --git a/tests/core/interface/CMakeLists.txt b/tests/core/interface/CMakeLists.txt
index ad2f36c0a..829ee0172 100644
--- a/tests/core/interface/CMakeLists.txt
+++ b/tests/core/interface/CMakeLists.txt
@@ -17,12 +17,10 @@ set(ZVEC_TEST_CORE_INTERFACE_LIBS
     core_knn_hnsw_sparse
     core_knn_ivf
     core_knn_hnsw_rabitq
+    core_knn_omega
+    omega
 )
 
-if(ZVEC_ENABLE_OMEGA)
-  list(APPEND ZVEC_TEST_CORE_INTERFACE_LIBS core_knn_omega omega)
-endif()
-
 foreach(CC_SRCS ${ALL_TEST_SRCS})
   get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
   cc_gtest(
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index 730e08468..fcf590447 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -26,11 +26,5 @@ add_subdirectory(CRoaring CRoaring EXCLUDE_FROM_ALL)
 add_subdirectory(arrow arrow EXCLUDE_FROM_ALL)
 add_subdirectory(magic_enum magic_enum EXCLUDE_FROM_ALL)
 add_subdirectory(RaBitQ-Library RaBitQ-Library EXCLUDE_FROM_ALL)
-
-# omega is only built when ZVEC_ENABLE_OMEGA is ON
-if(ZVEC_ENABLE_OMEGA)
-  message(STATUS "ZVEC: Building omega library with LightGBM support")
-  add_subdirectory(omega omega EXCLUDE_FROM_ALL)
-else()
-  message(STATUS "ZVEC: Skipping omega library (ZVEC_ENABLE_OMEGA=OFF)")
-endif()
+message(STATUS "ZVEC: Building omega library with LightGBM support")
+add_subdirectory(omega omega EXCLUDE_FROM_ALL)
diff --git a/tools/core/CMakeLists.txt b/tools/core/CMakeLists.txt
index 9c932760d..7c0d81ba2 100644
--- a/tools/core/CMakeLists.txt
+++ b/tools/core/CMakeLists.txt
@@ -16,10 +16,7 @@ set(ZVEC_TOOL_CORE_INTERFACE_LIBS
     core_interface
 )
 
-set(ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS core_mix_reducer)
-if(ZVEC_ENABLE_OMEGA)
-  list(APPEND ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS core_knn_omega)
-endif()
+set(ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS core_mix_reducer core_knn_omega)
 
 cc_binary(
     NAME txt2vecs

From 758895f0644fc44a5f5fd839e920e033dce191a9 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 1 Apr 2026 19:32:12 +0800
Subject: [PATCH 090/126] Fix OMEGA context filter naming

---
 src/core/algorithm/omega/omega_searcher.cc | 2 +-
 src/core/algorithm/omega/omega_streamer.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index c5922e708..280b6824c 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -169,7 +169,7 @@ IndexSearcher::Context::Pointer OmegaSearcher::create_context() const {
   uint32_t filter_mode =
       bf_enabled_ ? VisitFilter::BloomFilter : VisitFilter::ByteMap;
   ctx->set_filter_mode(filter_mode);
-  ctx->set_filter_negative_probility(bf_negative_probility_);
+  ctx->set_filter_negative_probability(bf_negative_probability_);
   ctx->set_magic(magic_);
   ctx->set_force_padding_topk(force_padding_topk_enabled_);
   ctx->set_bruteforce_threshold(bruteforce_threshold_);
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index 7a16a6a2f..aad20972d 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -358,7 +358,7 @@ IndexStreamer::Context::Pointer OmegaStreamer::create_context(void) const {
   ctx->set_max_scan_ratio(max_scan_ratio_);
   ctx->set_filter_mode(bf_enabled_ ? VisitFilter::BloomFilter
                                    : VisitFilter::ByteMap);
-  ctx->set_filter_negative_probility(bf_negative_prob_);
+  ctx->set_filter_negative_probability(bf_negative_prob_);
   ctx->set_magic(magic_);
   ctx->set_force_padding_topk(force_padding_topk_enabled_);
   ctx->set_bruteforce_threshold(bruteforce_threshold_);

From a0c59c15cae25e4227643a7d3f579ac0897d77d2 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 1 Apr 2026 20:16:50 +0800
Subject: [PATCH 091/126] Add OMEGA workflow Python tests

---
 python/tests/test_collection.py | 165 ++++++++++++++++++++++++++++++++
 1 file changed, 165 insertions(+)

diff --git a/python/tests/test_collection.py b/python/tests/test_collection.py
index eb5ddc305..45a61765e 100644
--- a/python/tests/test_collection.py
+++ b/python/tests/test_collection.py
@@ -13,16 +13,19 @@
 # limitations under the License.
 from __future__ import annotations
 
+from pathlib import Path
 
 import pytest
 import zvec
 from zvec import (
     Collection,
+    CollectionSchema,
     CollectionOption,
     DataType,
     Doc,
     FieldSchema,
     HnswIndexParam,
+    HnswQueryParam,
     OmegaIndexParam,
     OmegaQueryParam,
     InvertIndexParam,
@@ -201,6 +204,65 @@ def omega_multiple_docs():
     ]
 
 
+@pytest.fixture
+def omega_workflow_docs():
+    return [
+        Doc(
+            id=f"{id}",
+            fields={"id": id, "name": f"workflow-doc-{id}"},
+            vectors={"dense": [float(id) + 0.1] * 128},
+        )
+        for id in range(1, 129)
+    ]
+
+
+def _create_vector_collection(
+    tmp_path_factory,
+    collection_option: CollectionOption,
+    collection_name: str,
+    vector_index_param,
+) -> Collection:
+    temp_dir = tmp_path_factory.mktemp(collection_name)
+    collection_path = temp_dir / collection_name
+    schema = CollectionSchema(
+        name=collection_name,
+        fields=[
+            FieldSchema(
+                "id",
+                DataType.INT64,
+                nullable=False,
+                index_param=InvertIndexParam(enable_range_optimization=True),
+            ),
+            FieldSchema("name", DataType.STRING, nullable=False),
+        ],
+        vectors=[
+            VectorSchema(
+                "dense",
+                DataType.VECTOR_FP32,
+                dimension=128,
+                index_param=vector_index_param,
+            )
+        ],
+    )
+    coll = zvec.create_and_open(
+        path=str(collection_path), schema=schema, option=collection_option
+    )
+    assert coll is not None
+    return coll
+
+
+def _omega_model_files(collection: Collection) -> set[str]:
+    return {
+        path.name
+        for path in Path(collection.path).rglob("*")
+        if path.is_file() and path.parent.name == "omega_model"
+    }
+
+
+def _result_ids(result) -> list[str]:
+    return [doc.id for doc in result]
+
+
 @pytest.fixture
 def collection_with_single_doc(test_collection: Collection, single_doc) -> Collection:
     # Setup: insert single doc
@@ -1067,6 +1129,109 @@ def test_omega_collection_query_by_id_with_omega_param(
         assert len(query_result) > 0
         assert query_result[0].id == omega_multiple_docs[0].id
 
+    def test_omega_workflow_optimize_trains_model_and_query_runs(
+        self, tmp_path_factory, collection_option, omega_workflow_docs
+    ):
+        omega_collection = _create_vector_collection(
+            tmp_path_factory,
+            collection_option,
+            "omega_workflow_active",
+            OmegaIndexParam(
+                metric_type=MetricType.L2,
+                min_vector_threshold=32,
+                num_training_queries=16,
+                ef_training=64,
+                ef_groundtruth=128,
+                window_size=32,
+            ),
+        )
+        try:
+            result = omega_collection.insert(omega_workflow_docs)
+            assert len(result) == len(omega_workflow_docs)
+            for item in result:
+                assert item.ok()
+
+            omega_collection.optimize(option=OptimizeOption(concurrency=1))
+
+            model_files = _omega_model_files(omega_collection)
+            assert {
+                "model.txt",
+                "threshold_table.txt",
+                "interval_table.txt",
+                "gt_collected_table.txt",
+                "gt_cmps_all_table.txt",
+            }.issubset(model_files)
+
+            query_result = omega_collection.query(
+                VectorQuery(
+                    field_name="dense",
+                    vector=omega_workflow_docs[31].vector("dense"),
+                    param=OmegaQueryParam(ef=128, target_recall=0.91),
+                ),
+                topk=10,
+            )
+
+            assert len(query_result) == 10
+            assert query_result[0].id == omega_workflow_docs[31].id
+        finally:
+            omega_collection.destroy()
+
+    def test_omega_query_falls_back_to_hnsw_when_model_not_trained(
+        self, tmp_path_factory, collection_option, omega_workflow_docs
+    ):
+        hnsw_collection = _create_vector_collection(
+            tmp_path_factory,
+            collection_option,
+            "hnsw_workflow_baseline",
+            HnswIndexParam(metric_type=MetricType.L2),
+        )
+        omega_collection = _create_vector_collection(
+            tmp_path_factory,
+            collection_option,
+            "omega_workflow_fallback",
+            OmegaIndexParam(
+                metric_type=MetricType.L2,
+                min_vector_threshold=100000,
+                num_training_queries=16,
+                ef_training=64,
+                ef_groundtruth=128,
+                window_size=32,
+            ),
+        )
+        try:
+            hnsw_insert_result = hnsw_collection.insert(omega_workflow_docs)
+            omega_insert_result = omega_collection.insert(omega_workflow_docs)
+            assert len(hnsw_insert_result) == len(omega_workflow_docs)
+            assert len(omega_insert_result) == len(omega_workflow_docs)
+
+            hnsw_collection.optimize(option=OptimizeOption(concurrency=1))
+            omega_collection.optimize(option=OptimizeOption(concurrency=1))
+
+            assert "model.txt" not in _omega_model_files(omega_collection)
+
+            query_vector = omega_workflow_docs[63].vector("dense")
+            hnsw_result = hnsw_collection.query(
+                VectorQuery(
+                    field_name="dense",
+                    vector=query_vector,
+                    param=HnswQueryParam(ef=128),
+                ),
+                topk=10,
+            )
+            omega_result = omega_collection.query(
+                VectorQuery(
+                    field_name="dense",
+                    vector=query_vector,
+                    param=OmegaQueryParam(ef=128, target_recall=0.91),
+                ),
+                topk=10,
+            )
+
+            assert _result_ids(omega_result) == _result_ids(hnsw_result)
+        finally:
+            omega_collection.destroy()
+            hnsw_collection.destroy()
+
     def test_collection_query_multi_vector_with_same_field(
         self, collection_with_multiple_docs: Collection, multiple_docs
     ):

From ec74517c6764f6d591bbd981aba350650bd49dfa Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 1 Apr 2026 20:40:10 +0800
Subject: [PATCH 092/126] Stabilize OMEGA fallback test

---
 python/tests/test_collection.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tests/test_collection.py b/python/tests/test_collection.py
index 45a61765e..01b84a641 100644
--- a/python/tests/test_collection.py
+++ b/python/tests/test_collection.py
@@ -1209,7 +1209,9 @@ def test_omega_query_falls_back_to_hnsw_when_model_not_trained(
 
             assert "model.txt" not in _omega_model_files(omega_collection)
 
-            query_vector = omega_workflow_docs[63].vector("dense")
+            # Use a non-document query vector to avoid equal-distance ties,
+            # so fallback-to-HNSW can be checked via exact result equality.
+            query_vector = [64.3] * 128
             hnsw_result = hnsw_collection.query(
                 VectorQuery(
                     field_name="dense",

From 7d1c191abfe3845f58efb542b08de490641df359 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 1 Apr 2026 21:30:56 +0800
Subject: [PATCH 093/126] Update OMEGALib submodule

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 92d0989ea..be0c2016a 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 92d0989ead6d5aa19f979bc67a99f3fe28dc7db9
+Subproject commit be0c2016ad5617b525d9243d951e19ca4aa18b5d

From 0968728869d555b99b504a205b80539048430a86 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Wed, 1 Apr 2026 21:48:06 +0800
Subject: [PATCH 094/126] Update OMEGALib submodule

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index be0c2016a..34856eee7 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit be0c2016ad5617b525d9243d951e19ca4aa18b5d
+Subproject commit 34856eee71eca823b04adfeb0b6adf13f20d3656

From 1976fc971e17dc557c5234459cdb9558ef6f08ef Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 15:00:07 +0800
Subject: [PATCH 095/126] Fix benchmark script lint issues

---
 python/zvec/__init__.py            |  22 +--
 scripts/benchmark_hnsw_vs_omega.py | 247 +++++++++++++++++++----------
 scripts/benchmark_lib.py           | 195 ++++++++++++-----------
 3 files changed, 264 insertions(+), 200 deletions(-)

diff --git a/python/zvec/__init__.py b/python/zvec/__init__.py
index e8f995d5b..817988bf8 100644
--- a/python/zvec/__init__.py
+++ b/python/zvec/__init__.py
@@ -21,13 +21,7 @@
     from importlib.metadata import PackageNotFoundError
 
 
-# ==============================
-# Public API — grouped by category
-# ==============================
-
 from . import model as model
-
-# —— Extensions ——
 from .extension import (
     BM25EmbeddingFunction,
     DefaultLocalDenseEmbedding,
@@ -46,16 +40,10 @@
     SparseEmbeddingFunction,
     WeightedReRanker,
 )
-
-# —— Typing ——
 from .model import param as param
 from .model import schema as schema
-
-# —— Core data structures ——
 from .model.collection import Collection
 from .model.doc import Doc
-
-# —— Query & index parameters ——
 from .model.param import (
     AddColumnOption,
     AlterColumnOption,
@@ -74,11 +62,7 @@
     OptimizeOption,
 )
 from .model.param.vector_query import VectorQuery
-
-# —— Schema & field definitions ——
 from .model.schema import CollectionSchema, CollectionStats, FieldSchema, VectorSchema
-
-# —— tools ——
 from .tool import require_module
 from .typing import (
     DataType,
@@ -89,12 +73,8 @@
     StatusCode,
 )
 from .typing.enum import LogLevel, LogType
-
-# —— lifecycle ——
 from .zvec import create_and_open, init, open
-# ==============================
-# Public interface declaration
-# ==============================
+
 __all__ = [
     # Zvec functions
     "create_and_open",
diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index 54932a539..8879ad994 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -1,16 +1,16 @@
-#!/usr/bin/env python3
-
 from __future__ import annotations
 
 import argparse
 import sys
 from pathlib import Path
+from typing import Any
 
 from benchmark_lib import (
     BenchmarkResult,
     build_index,
     compute_recall_with_zvec,
     discover_index_files,
+    emit,
     get_offline_load_duration,
     load_dataset_config,
     must_get,
@@ -74,7 +74,6 @@ def parse_args() -> argparse.Namespace:
 def run_hnsw(
     *,
     args: argparse.Namespace,
-    dataset_name: str,
     dataset_spec: dict[str, object],
     dataset_artifacts: dict[str, object],
     bench_bin: Path,
@@ -87,7 +86,7 @@ def run_hnsw(
 
     hnsw_specific_args = hnsw_config.get("args", {})
     if not args.search_only:
-        print("\n[Phase 1] Building HNSW index...")
+        emit("\n[Phase 1] Building HNSW index...")
         offline_metrics = build_index(
             index_kind="HNSW",
             index_path=hnsw_path,
@@ -164,10 +163,12 @@ def run_omega(
 
     omega_specific_args = omega_config.get("args", {})
     if not args.search_only:
-        if args.retrain_only:
-            print("\n[Phase 1] Retraining OMEGA model only (reusing existing index)...")
-        else:
-            print("\n[Phase 1] Building OMEGA index + training model...")
+        phase_message = (
+            "\n[Phase 1] Retraining OMEGA model only (reusing existing index)..."
+            if args.retrain_only
+            else "\n[Phase 1] Building OMEGA index + training model..."
+        )
+        emit(phase_message)
         offline_metrics = build_index(
             index_kind="OMEGA",
             index_path=omega_path,
@@ -198,6 +199,27 @@ def run_omega(
             for target_recall in target_recalls
         ]
 
+    return _run_omega_searches(
+        args=args,
+        dataset_spec=dataset_spec,
+        dataset_artifacts=dataset_artifacts,
+        bench_bin=bench_bin,
+        omega_path=omega_path,
+        common=common,
+        target_recalls=target_recalls,
+    )
+
+
+def _run_omega_searches(
+    *,
+    args: argparse.Namespace,
+    dataset_spec: dict[str, object],
+    dataset_artifacts: dict[str, object],
+    bench_bin: Path,
+    omega_path: Path,
+    common: dict[str, object],
+    target_recalls: list[float],
+) -> list[BenchmarkResult]:
     results: list[BenchmarkResult] = []
     index_files = discover_index_files(omega_path)
     omega_common = dict(common)
@@ -223,13 +245,11 @@ def run_omega(
             dry_run=args.dry_run,
         )
         online = benchmark["summary"]
-        success = online.get("retcode", 0) == 0
-
         results.append(
             BenchmarkResult(
                 type="OMEGA",
                 path=str(omega_path),
-                success=success,
+                success=online.get("retcode", 0) == 0,
                 target_recall=target_recall,
                 load_duration=get_offline_load_duration(omega_path),
                 qps=online.get("qps"),
@@ -245,8 +265,78 @@ def run_omega(
     return results
 
 
-def main() -> int:
-    args = parse_args()
+def _parse_target_recalls(
+    args: argparse.Namespace, omega_config: dict[str, object]
+) -> list[float]:
+    target_recalls = omega_config.get("target_recalls", [])
+    if args.target_recalls:
+        target_recalls = [float(value) for value in args.target_recalls.split(",") if value]
+    if not target_recalls:
+        raise ValueError("omega.target_recalls must be a non-empty list")
+    return list(target_recalls)
+
+
+def _emit_run_header(
+    *,
+    dataset_name: str,
+    config_path: Path,
+    zvec_root: Path,
+    benchmark_dir: Path,
+    dataset_spec: dict[str, object],
+    bench_bin: Path,
+    recall_bin: Path,
+    hnsw_path: Path,
+    omega_path: Path,
+    target_recalls: list[float],
+) -> None:
+    emit("=" * 70)
+    emit(f"Zvec HNSW vs OMEGA ({dataset_name})")
+    emit(f"Config: {config_path}")
+    emit("=" * 70)
+    emit(f"zvec_root: {zvec_root}")
+    emit(f"benchmark_dir: {benchmark_dir}")
+    emit(f"dataset_dir: {dataset_spec['dataset_dir']}")
+    emit(f"bench_bin: {bench_bin}")
+    emit(f"recall_bin: {recall_bin}")
+    emit(f"hnsw_path: {hnsw_path}")
+    emit(f"omega_path: {omega_path}")
+    emit(f"target_recalls: {target_recalls}")
+    emit("=" * 70)
+
+
+def _format_summary_row(result: BenchmarkResult) -> str:
+    tr = f"{result.target_recall:.2f}" if result.target_recall is not None else "N/A"
+    status = "OK" if result.success else "FAILED"
+    ld = f"{result.load_duration:.1f}" if result.load_duration is not None else "N/A"
+    qps = f"{result.qps:.1f}" if result.qps is not None else "N/A"
+    avg_latency = f"{result.avg_latency_ms:.3f}" if result.avg_latency_ms is not None else "N/A"
+    p95_latency = f"{result.p95_latency_ms:.3f}" if result.p95_latency_ms is not None else "N/A"
+    recall = f"{result.recall:.4f}" if result.recall is not None else "N/A"
+    return (
+        f"{result.type:<10} {tr:<15} {ld:<12} {qps:<8} "
+        f"{avg_latency:<16} {p95_latency:<16} {recall:<10} {status:<10}"
+    )
+
+
+def _emit_result_summary(results: list[BenchmarkResult], summary_paths: list[Path]) -> None:
+    emit(f"\n\n{'=' * 70}")
+    emit("Benchmark Summary")
+    emit("=" * 70)
+    emit(
+        f"{'Type':<10} {'target_recall':<15} {'load_dur(s)':<12} "
+        f"{'qps':<8} {'avg_latency(ms)':<16} {'p95_latency(ms)':<16} "
+        f"{'recall':<10} {'Status':<10}"
+    )
+    emit("-" * 100)
+    for result in results:
+        emit(_format_summary_row(result))
+
+    emit()
+    for path in summary_paths:
+        emit(f"Summary JSON: {path}")
+
+
+def _resolve_run_context(args: argparse.Namespace) -> dict[str, Any]:
     config_path = Path(args.config).expanduser().resolve()
     config = load_dataset_config(config_path, args.dataset)
     zvec_root, benchmark_dir = resolve_paths(
@@ -262,101 +352,82 @@ def main() -> int:
     bench_bin, recall_bin = resolve_core_tools(zvec_root)
     benchmark_dir.mkdir(parents=True, exist_ok=True)
 
-    dataset_name = args.dataset
-    common = must_get(config, "common")
     hnsw_config = must_get(config, "hnsw")
     omega_config = must_get(config, "omega")
 
-    hnsw_path = resolve_index_path(benchmark_dir, must_get(hnsw_config, "path"))
-    omega_path = resolve_index_path(benchmark_dir, must_get(omega_config, "path"))
-    hnsw_db_label = must_get(hnsw_config, "db_label")
-    omega_db_label = must_get(omega_config, "db_label")
-    target_recalls = omega_config.get("target_recalls", [])
-    if args.target_recalls:
-        target_recalls = [float(value) for value in args.target_recalls.split(",") if value]
-    if not target_recalls:
-        raise ValueError("omega.target_recalls must be a non-empty list")
+    return {
+        "config_path": config_path,
+        "dataset_name": args.dataset,
+        "dataset_spec": dataset_spec,
+        "dataset_artifacts": dataset_artifacts,
+        "zvec_root": zvec_root,
+        "benchmark_dir": benchmark_dir,
+        "bench_bin": bench_bin,
+        "recall_bin": recall_bin,
+        "common": must_get(config, "common"),
+        "hnsw_config": hnsw_config,
+        "omega_config": omega_config,
+        "hnsw_path": resolve_index_path(benchmark_dir, must_get(hnsw_config, "path")),
+        "omega_path": resolve_index_path(benchmark_dir, must_get(omega_config, "path")),
+        "hnsw_db_label": must_get(hnsw_config, "db_label"),
+        "omega_db_label": must_get(omega_config, "db_label"),
+        "target_recalls": _parse_target_recalls(args, omega_config),
+    }
 
-    print("=" * 70)
-    print(f"Zvec HNSW vs OMEGA ({dataset_name})")
-    print(f"Config: {config_path}")
-    print("=" * 70)
-    print(f"zvec_root: {zvec_root}")
-    print(f"benchmark_dir: {benchmark_dir}")
-    print(f"dataset_dir: {dataset_spec['dataset_dir']}")
-    print(f"bench_bin: {bench_bin}")
-    print(f"recall_bin: {recall_bin}")
-    print(f"hnsw_path: {hnsw_path}")
-    print(f"omega_path: {omega_path}")
-    print(f"target_recalls: {target_recalls}")
-    print("=" * 70)
 
-    results: list[BenchmarkResult] = []
+def main() -> int:
+    args = parse_args()
+    context = _resolve_run_context(args)
+    _emit_run_header(
+        dataset_name=context["dataset_name"],
+        config_path=context["config_path"],
+        zvec_root=context["zvec_root"],
+        benchmark_dir=context["benchmark_dir"],
+        dataset_spec=context["dataset_spec"],
+        bench_bin=context["bench_bin"],
+        recall_bin=context["recall_bin"],
+        hnsw_path=context["hnsw_path"],
+        omega_path=context["omega_path"],
+        target_recalls=context["target_recalls"],
+    )
 
+    results: list[BenchmarkResult] = []
     if not args.skip_hnsw:
-        hnsw_result = run_hnsw(
-            args=args,
-            dataset_name=dataset_name,
-            dataset_spec=dataset_spec,
-            dataset_artifacts=dataset_artifacts,
-            bench_bin=bench_bin,
-            hnsw_path=hnsw_path,
-            hnsw_db_label=hnsw_db_label,
-            common=common,
-            hnsw_config=hnsw_config,
+        results.append(
+            run_hnsw(
+                args=args,
+                dataset_spec=context["dataset_spec"],
+                dataset_artifacts=context["dataset_artifacts"],
+                bench_bin=context["bench_bin"],
+                hnsw_path=context["hnsw_path"],
+                hnsw_db_label=context["hnsw_db_label"],
+                common=context["common"],
+                hnsw_config=context["hnsw_config"],
+            )
         )
-        results.append(hnsw_result)
 
     if not args.skip_omega:
         results.extend(
             run_omega(
                 args=args,
-                dataset_spec=dataset_spec,
-                dataset_artifacts=dataset_artifacts,
-                bench_bin=bench_bin,
-                omega_path=omega_path,
-                omega_db_label=omega_db_label,
-                common=common,
-                omega_config=omega_config,
-                target_recalls=target_recalls,
+                dataset_spec=context["dataset_spec"],
+                dataset_artifacts=context["dataset_artifacts"],
+                bench_bin=context["bench_bin"],
+                omega_path=context["omega_path"],
+                omega_db_label=context["omega_db_label"],
+                common=context["common"],
+                omega_config=context["omega_config"],
+                target_recalls=context["target_recalls"],
             )
         )
 
     if results:
-        written_summary_paths = (
-            write_grouped_online_summaries(dataset_name, results)
+        summary_paths = (
+            write_grouped_online_summaries(context["dataset_name"], results)
             if not args.dry_run
             else []
         )
-        print("\n\n" + "=" * 70)
-        print("Benchmark Summary")
-        print("=" * 70)
-        print(
-            f"{'Type':<10} {'target_recall':<15} {'load_dur(s)':<12} "
-            f"{'qps':<8} {'avg_latency(ms)':<16} {'p95_latency(ms)':<16} "
-            f"{'recall':<10} {'Status':<10}"
-        )
-        print("-" * 100)
-        for result in results:
-            tr = f"{result.target_recall:.2f}" if result.target_recall is not None else "N/A"
-            status = "OK" if result.success else "FAILED"
-            ld = f"{result.load_duration:.1f}" if result.load_duration is not None else "N/A"
-            qps = f"{result.qps:.1f}" if result.qps is not None else "N/A"
-            avg_latency = (
-                f"{result.avg_latency_ms:.3f}" if result.avg_latency_ms is not None else "N/A"
-            )
-            p95_latency = (
-                f"{result.p95_latency_ms:.3f}" if result.p95_latency_ms is not None else "N/A"
-            )
-            recall = f"{result.recall:.4f}" if result.recall is not None else "N/A"
-            print(
-                f"{result.type:<10} {tr:<15} {ld:<12} {qps:<8} "
-                f"{avg_latency:<16} {p95_latency:<16} {recall:<10} {status:<10}"
-            )
-
-        print()
-        for path in written_summary_paths:
-            print(f"Summary JSON: {path}")
+        _emit_result_summary(results, summary_paths)
 
     return 0 if all(result.success for result in results) else 1
 
diff --git a/scripts/benchmark_lib.py b/scripts/benchmark_lib.py
index 541fdb084..680f8c279 100644
--- a/scripts/benchmark_lib.py
+++ b/scripts/benchmark_lib.py
@@ -1,19 +1,30 @@
-#!/usr/bin/env python3
-
 from __future__ import annotations
 
+import contextlib
 import json
 import os
 import re
 import shutil
 import subprocess
+import sys
 import time
 import urllib.request
 from dataclasses import dataclass
 from datetime import datetime
+from functools import lru_cache
 from pathlib import Path
 from typing import Any
 
+try:
+    import polars as pl
+except ImportError:
+    pl = None
+
+try:
+    import zvec
+except ImportError:
+    zvec = None
+
 
 @dataclass
 class BenchmarkResult:
@@ -39,8 +50,6 @@ class BenchmarkResult:
 )
 RECALL_PATTERN = re.compile(r"Recall@(\d+):\s*([0-9.]+)")
 
-_ZVEC_INITIALIZED = False
-
 DATASET_SPECS: dict[str, dict[str, Any]] = {
     "cohere_1m": {
         "dataset_dirname": "cohere/cohere_medium_1m",
@@ -65,7 +74,7 @@ class BenchmarkResult:
 
 
 def load_json(path: Path) -> dict[str, Any]:
-    with open(path) as f:
+    with path.open() as f:
         return json.load(f)
 
 
@@ -89,9 +98,13 @@ def must_get(config: dict[str, Any], key: str) -> Any:
 
 
 def print_header(title: str) -> None:
-    print("\n" + "=" * 70)
-    print(title)
-    print("=" * 70)
+    emit(f"\n{'=' * 70}")
+    emit(title)
+    emit("=" * 70)
+
+
+def emit(message: str = "") -> None:
+    sys.stdout.write(f"{message}\n")
 
 
 def resolve_paths(
@@ -197,7 +210,7 @@ def online_summary_path(index_path: Path) -> Path:
 
 
 def write_online_summary(index_path: Path, payload: dict[str, Any]) -> None:
-    with open(online_summary_path(index_path), "w") as f:
+    with online_summary_path(index_path).open("w") as f:
         json.dump(payload, f, indent=2, sort_keys=True)
 
 
@@ -245,7 +258,7 @@ def read_json_if_exists(path: Path) -> dict[str, Any]:
     if not path.exists():
         return {}
     try:
-        with open(path) as f:
+        with path.open() as f:
             return json.load(f)
     except Exception:
         return {}
@@ -329,7 +342,7 @@ def write_offline_summary(
     summary = build_offline_summary(index_path, db_label, metrics, retrain_only=retrain_only)
     path = offline_summary_path(index_path)
     path.parent.mkdir(parents=True, exist_ok=True)
-    with open(path, "w") as f:
+    with path.open("w") as f:
         json.dump(summary, f, indent=2, sort_keys=True)
     return path
 
@@ -339,6 +352,7 @@ def get_offline_load_duration(index_path: Path) -> float | None:
         "load_duration_s"
     )
 
+
 def resolve_dataset_spec(
     dataset_name: str, config: dict[str, Any], dataset_root_arg: str | None
 ) -> dict[str, Any]:
@@ -363,13 +377,17 @@ def resolve_dataset_spec(
     metric_type = str(config.get("metric_type", default.get("metric_type", "COSINE"))).upper()
     remote_dirname = str(config.get("remote_dirname", default.get("remote_dirname", "")))
     train_files = list(config.get("train_files", default.get("train_files", [])))
-    dataset_source = str(config.get("dataset_source", os.environ.get("ZVEC_DATASET_SOURCE", "S3")))
+    dataset_source = str(
+        config.get("dataset_source", os.environ.get("ZVEC_DATASET_SOURCE", "S3"))
+    )
     download_base_url = str(
         config.get(
             "dataset_base_url",
             os.environ.get(
                 "ZVEC_DATASET_BASE_URL",
-                DATASET_DOWNLOAD_BASE_URLS.get(dataset_source.upper(), DATASET_DOWNLOAD_BASE_URLS["S3"]),
+                DATASET_DOWNLOAD_BASE_URLS.get(
+                    dataset_source.upper(), DATASET_DOWNLOAD_BASE_URLS["S3"]
+                ),
             ),
         )
     )
@@ -389,15 +407,19 @@ def resolve_dataset_spec(
 
 
 def _require_polars():
-    try:
-        import polars as pl
-    except ImportError as exc:
+    if pl is None:
         raise RuntimeError(
             "This script requires polars in the active Python environment."
-        ) from exc
+        )
     return pl
 
 
+def _require_zvec():
+    if zvec is None:
+        raise RuntimeError("This script requires zvec in the active Python environment.")
+    return zvec
+
+
 def _sorted_train_files(dataset_dir: Path) -> list[Path]:
     candidates: list[Path] = []
     for pattern in [
@@ -430,7 +452,7 @@ def _download_file(url: str, output_path: Path) -> None:
     output_path.parent.mkdir(parents=True, exist_ok=True)
     tmp_path = output_path.with_suffix(output_path.suffix + ".tmp")
     try:
-        with urllib.request.urlopen(url) as response, open(tmp_path, "wb") as out:
+        with urllib.request.urlopen(url) as response, tmp_path.open("wb") as out:
             shutil.copyfileobj(response, out)
         tmp_path.replace(output_path)
     finally:
@@ -451,16 +473,16 @@ def ensure_dataset_available(dataset_name: str, dataset_spec: dict[str, Any], dr
         )
 
     base_url = dataset_spec["download_base_url"]
-    print(f"Dataset files missing under {dataset_dir}, downloading from {base_url}/{remote_dirname} ...")
+    emit(f"Dataset files missing under {dataset_dir}, downloading from {base_url}/{remote_dirname} ...")
     if dry_run:
         for name in missing_files:
-            print(f"[Dry-run] download {base_url}/{remote_dirname}/{name} -> {dataset_dir / name}")
+            emit(f"[Dry-run] download {base_url}/{remote_dirname}/{name} -> {dataset_dir / name}")
         return
 
     for name in missing_files:
         url = f"{base_url}/{remote_dirname}/{name}"
         output_path = dataset_dir / name
-        print(f"Downloading {url}")
+        emit(f"Downloading {url}")
         _download_file(url, output_path)
 
 
@@ -491,7 +513,10 @@ def prepare_dataset_artifacts(
     cache_dir.mkdir(parents=True, exist_ok=True)
 
     if not dry_run:
-        refresh_query = (not query_txt.exists()) or query_txt.stat().st_mtime < query_parquet.stat().st_mtime
+        refresh_query = (
+            not query_txt.exists()
+            or query_txt.stat().st_mtime < query_parquet.stat().st_mtime
+        )
         refresh_gt = (not gt_txt.exists()) or gt_txt.stat().st_mtime < gt_parquet.stat().st_mtime
         if refresh_query:
             _write_query_text(query_parquet, query_txt)
@@ -509,9 +534,9 @@ def prepare_dataset_artifacts(
 
 
 def _write_query_text(query_parquet: Path, output_path: Path) -> None:
-    pl = _require_polars()
-    frame = pl.read_parquet(query_parquet).sort("id")
-    with open(output_path, "w") as f:
+    polars = _require_polars()
+    frame = polars.read_parquet(query_parquet).sort("id")
+    with output_path.open("w") as f:
         for row in frame.iter_rows(named=True):
             vector = row["emb"]
             vector_text = " ".join(str(round(float(v), 16)) for v in vector)
@@ -519,34 +544,30 @@ def _write_query_text(query_parquet: Path, output_path: Path) -> None:
 
 
 def _write_groundtruth_text(gt_parquet: Path, output_path: Path) -> None:
-    pl = _require_polars()
-    frame = pl.read_parquet(gt_parquet).sort("id")
-    with open(output_path, "w") as f:
+    polars = _require_polars()
+    frame = polars.read_parquet(gt_parquet).sort("id")
+    with output_path.open("w") as f:
         for row in frame.iter_rows(named=True):
             neighbors = " ".join(str(int(v)) for v in row["neighbors_id"])
             f.write(f"{int(row['id'])};{neighbors}\n")
 
 
-def _ensure_zvec_initialized() -> None:
-    global _ZVEC_INITIALIZED
-    if _ZVEC_INITIALIZED:
-        return
-    import zvec
-
-    zvec.init(log_level=zvec.LogLevel.WARN)
-    _ZVEC_INITIALIZED = True
+@lru_cache(maxsize=1)
+def _initialized_zvec():
+    module = _require_zvec()
+    module.init(log_level=module.LogLevel.WARN)
+    return module
 
 
 def _quantize_type_from_name(name: str):
-    import zvec
-
+    module = _initialized_zvec()
     normalized = str(name).upper()
     mapping = {
-        "": zvec.QuantizeType.UNDEFINED,
-        "UNDEFINED": zvec.QuantizeType.UNDEFINED,
-        "FP16": zvec.QuantizeType.FP16,
-        "INT8": zvec.QuantizeType.INT8,
-        "INT4": zvec.QuantizeType.INT4,
+        "": module.QuantizeType.UNDEFINED,
+        "UNDEFINED": module.QuantizeType.UNDEFINED,
+        "FP16": module.QuantizeType.FP16,
+        "INT8": module.QuantizeType.INT8,
+        "INT4": module.QuantizeType.INT4,
     }
     if normalized not in mapping:
         raise ValueError(f"Unsupported quantize type: {name}")
@@ -554,13 +575,12 @@ def _quantize_type_from_name(name: str):
 
 
 def _metric_type_from_name(name: str):
-    import zvec
-
+    module = _initialized_zvec()
     normalized = str(name).upper()
     mapping = {
-        "COSINE": zvec.MetricType.COSINE,
-        "IP": zvec.MetricType.IP,
-        "L2": zvec.MetricType.L2,
+        "COSINE": module.MetricType.COSINE,
+        "IP": module.MetricType.IP,
+        "L2": module.MetricType.L2,
     }
     if normalized not in mapping:
         raise ValueError(f"Unsupported metric type: {name}")
@@ -568,12 +588,11 @@ def _metric_type_from_name(name: str):
 
 
 def _maybe_destroy_collection(path: Path) -> None:
-    import zvec
-
+    module = _initialized_zvec()
     if not path.exists():
         return
     try:
-        zvec.open(str(path)).destroy()
+        module.open(str(path)).destroy()
         return
     except Exception:
         pass
@@ -587,12 +606,11 @@ def _build_schema(
     common_args: dict[str, Any],
     specific_args: dict[str, Any],
 ):
-    import zvec
-
+    module = _initialized_zvec()
     quantize_type = _quantize_type_from_name(common_args.get("quantize_type", ""))
     metric = _metric_type_from_name(metric_type)
     if index_kind == "OMEGA":
-        index_param = zvec.OmegaIndexParam(
+        index_param = module.OmegaIndexParam(
             metric_type=metric,
             m=int(common_args["m"]),
             ef_construction=int(specific_args.get("ef_construction", 500)),
@@ -605,27 +623,27 @@ def _build_schema(
             k_train=int(specific_args.get("k_train", 1)),
         )
     else:
-        index_param = zvec.HnswIndexParam(
+        index_param = module.HnswIndexParam(
             metric_type=metric,
             m=int(common_args["m"]),
             ef_construction=int(specific_args.get("ef_construction", 500)),
             quantize_type=quantize_type,
         )
 
-    return zvec.CollectionSchema(
+    return module.CollectionSchema(
         name=f"{index_kind.lower()}_benchmark",
         fields=[
-            zvec.FieldSchema(
+            module.FieldSchema(
                 "id",
-                zvec.DataType.INT64,
+                module.DataType.INT64,
                 nullable=False,
-                index_param=zvec.InvertIndexParam(enable_range_optimization=True),
+                index_param=module.InvertIndexParam(enable_range_optimization=True),
             )
         ],
         vectors=[
-            zvec.VectorSchema(
+            module.VectorSchema(
                 "dense",
-                zvec.DataType.VECTOR_FP32,
+                module.DataType.VECTOR_FP32,
                 dimension=dimension,
                 index_param=index_param,
             )
@@ -645,15 +663,14 @@ def build_index(
     dry_run: bool,
 ) -> dict[str, Any]:
     if dry_run:
-        print(f"[Dry-run] Build {index_kind} at {index_path}")
+        emit(f"[Dry-run] Build {index_kind} at {index_path}")
         return {"insert_duration": None, "optimize_duration": None, "load_duration": None}
 
-    _ensure_zvec_initialized()
-    import zvec
+    module = _initialized_zvec()
 
     if retrain_only:
-        collection = zvec.open(
-            str(index_path), zvec.CollectionOption(read_only=False, enable_mmap=True)
+        collection = module.open(
+            str(index_path), module.CollectionOption(read_only=False, enable_mmap=True)
         )
         insert_duration = None
     else:
@@ -665,20 +682,18 @@ def build_index(
             common_args,
             specific_args,
         )
-        collection = zvec.create_and_open(
+        collection = module.create_and_open(
             str(index_path),
             schema,
-            zvec.CollectionOption(read_only=False, enable_mmap=True),
+            module.CollectionOption(read_only=False, enable_mmap=True),
         )
         insert_duration = _insert_training_data(collection, dataset_artifacts["train_files"])
 
     optimize_start = time.perf_counter()
-    collection.optimize(option=zvec.OptimizeOption(retrain_only=retrain_only))
+    collection.optimize(option=module.OptimizeOption(retrain_only=retrain_only))
     optimize_duration = time.perf_counter() - optimize_start
-    try:
+    with contextlib.suppress(Exception):
         collection.flush()
-    except Exception:
-        pass
     del collection
 
     load_duration = None
@@ -695,18 +710,17 @@ def build_index(
 
 
 def _insert_training_data(collection, train_files: list[Path], batch_size: int = 1000) -> float:
-    import zvec
-
-    pl = _require_polars()
+    module = _initialized_zvec()
+    polars = _require_polars()
     start = time.perf_counter()
     for train_file in train_files:
-        frame = pl.read_parquet(train_file)
+        frame = polars.read_parquet(train_file)
         for offset in range(0, frame.height, batch_size):
             batch = frame.slice(offset, batch_size)
             ids = batch["id"].to_list()
             vectors = batch["emb"].to_list()
             docs = [
-                zvec.Doc(
+                module.Doc(
                     id=str(int(doc_id)),
                     fields={"id": int(doc_id)},
                     vectors={"dense": vector},
@@ -729,28 +743,26 @@ def compute_recall_with_zvec(
     if dry_run:
         return None
 
-    _ensure_zvec_initialized()
-    import zvec
-
-    pl = _require_polars()
-    query_frame = pl.read_parquet(dataset_artifacts["query_parquet"]).sort("id")
-    gt_frame = pl.read_parquet(dataset_artifacts["gt_parquet"]).sort("id")
+    module = _initialized_zvec()
+    polars = _require_polars()
+    query_frame = polars.read_parquet(dataset_artifacts["query_parquet"]).sort("id")
+    gt_frame = polars.read_parquet(dataset_artifacts["gt_parquet"]).sort("id")
     gt_map = {
         int(row["id"]): [int(value) for value in row["neighbors_id"][: int(common_args["k"])]]
         for row in gt_frame.iter_rows(named=True)
     }
 
-    option = zvec.CollectionOption(read_only=True, enable_mmap=True)
-    collection = zvec.open(str(index_path), option)
+    option = module.CollectionOption(read_only=True, enable_mmap=True)
+    collection = module.open(str(index_path), option)
     use_refiner = bool(common_args.get("is_using_refiner", False))
     if index_kind == "OMEGA":
-        query_param = zvec.OmegaQueryParam(
+        query_param = module.OmegaQueryParam(
             ef=int(common_args["ef_search"]),
             target_recall=float(target_recall),
             is_using_refiner=use_refiner,
         )
     else:
-        query_param = zvec.HnswQueryParam(
+        query_param = module.HnswQueryParam(
             ef=int(common_args["ef_search"]),
             is_using_refiner=use_refiner,
         )
@@ -764,7 +776,7 @@ def compute_recall_with_zvec(
         if not gt:
             continue
         results = collection.query(
-            vectors=zvec.VectorQuery(field_name="dense", vector=row["emb"], param=query_param),
+            vectors=module.VectorQuery(field_name="dense", vector=row["emb"], param=query_param),
             topk=topk,
             output_fields=[],
         )
@@ -957,7 +969,7 @@ def run_command_capture(
     extra_env: dict[str, str] | None = None,
 ) -> tuple[int, str]:
     printable = " ".join(str(token) for token in cmd)
-    print(printable)
+    emit(printable)
     if dry_run:
         return 0, ""
 
@@ -974,7 +986,9 @@ def run_command_capture(
         check=False,
     )
     if completed.returncode != 0 and completed.stdout:
-        print(completed.stdout, end="" if completed.stdout.endswith("\n") else "\n")
+        sys.stdout.write(completed.stdout)
+        if not completed.stdout.endswith("\n"):
+            sys.stdout.write("\n")
     return completed.returncode, completed.stdout
 
 
@@ -1146,4 +1160,3 @@ def run_concurrency_benchmark(
             best_output = output
 
     return {"summary": best_summary or {}, "output": best_output}
-

From 2c30539b26d5d9127ddcf5c1d248f2776157cd8b Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 15:06:05 +0800
Subject: [PATCH 096/126] Format benchmark Python code

---
 python/zvec/model/schema/field_schema.py |   4 +-
 scripts/benchmark_hnsw_vs_omega.py       |  32 ++++--
 scripts/benchmark_lib.py                 | 118 +++++++++++++++++------
 3 files changed, 116 insertions(+), 38 deletions(-)

diff --git a/python/zvec/model/schema/field_schema.py b/python/zvec/model/schema/field_schema.py
index dad2a3a94..1233d1599 100644
--- a/python/zvec/model/schema/field_schema.py
+++ b/python/zvec/model/schema/field_schema.py
@@ -276,7 +276,9 @@ def dimension(self) -> int:
         return self._cpp_obj.dimension
 
     @property
-    def index_param(self) -> Union[HnswIndexParam, IVFIndexParam, FlatIndexParam, OmegaIndexParam]:
+    def index_param(
+        self,
+    ) -> Union[HnswIndexParam, IVFIndexParam, FlatIndexParam, OmegaIndexParam]:
         """Union[HnswIndexParam, IVFIndexParam, FlatIndexParam, OmegaIndexParam]: Index configuration for the vector."""
         return self._cpp_obj.index_param
 
diff --git a/scripts/benchmark_hnsw_vs_omega.py b/scripts/benchmark_hnsw_vs_omega.py
index 8879ad994..31b9aee04 100644
--- a/scripts/benchmark_hnsw_vs_omega.py
+++ b/scripts/benchmark_hnsw_vs_omega.py
@@ -40,11 +40,19 @@ def parse_args() -> argparse.Namespace:
         default=None,
         help="Optional comma-separated override for omega.target_recalls in the JSON config",
     )
-    parser.add_argument("--dry-run", action="store_true", help="Print actions without executing")
+    parser.add_argument(
+        "--dry-run", action="store_true", help="Print actions without executing"
+    )
     parser.add_argument("--skip-hnsw", action="store_true", help="Skip HNSW benchmark")
-    parser.add_argument("--skip-omega", action="store_true", help="Skip OMEGA benchmark")
-    parser.add_argument("--build-only", action="store_true", help="Only build index, skip search")
-    parser.add_argument("--search-only", action="store_true", help="Only run search on existing index")
+    parser.add_argument(
+        "--skip-omega", action="store_true", help="Skip OMEGA benchmark"
+    )
+    parser.add_argument(
+        "--build-only", action="store_true", help="Only build index, skip search"
+    )
+    parser.add_argument(
+        "--search-only", action="store_true", help="Only run search on existing index"
+    )
     parser.add_argument(
         "--retrain-only",
         action="store_true",
@@ -270,7 +278,9 @@ def _parse_target_recalls(
 ) -> list[float]:
     target_recalls = omega_config.get("target_recalls", [])
     if args.target_recalls:
-        target_recalls = [float(value) for value in args.target_recalls.split(",") if value]
+        target_recalls = [
+            float(value) for value in args.target_recalls.split(",") if value
+        ]
     if not target_recalls:
         raise ValueError("omega.target_recalls must be a non-empty list")
     return list(target_recalls)
@@ -309,8 +319,12 @@ def _format_summary_row(result: BenchmarkResult) -> str:
     status = "OK" if result.success else "FAILED"
     ld = f"{result.load_duration:.1f}" if result.load_duration is not None else "N/A"
     qps = f"{result.qps:.1f}" if result.qps is not None else "N/A"
-    avg_latency = f"{result.avg_latency_ms:.3f}" if result.avg_latency_ms is not None else "N/A"
-    p95_latency = f"{result.p95_latency_ms:.3f}" if result.p95_latency_ms is not None else "N/A"
+    avg_latency = (
+        f"{result.avg_latency_ms:.3f}" if result.avg_latency_ms is not None else "N/A"
+    )
+    p95_latency = (
+        f"{result.p95_latency_ms:.3f}" if result.p95_latency_ms is not None else "N/A"
+    )
     recall = f"{result.recall:.4f}" if result.recall is not None else "N/A"
     return (
         f"{result.type:<10} {tr:<15} {ld:<12} {qps:<8} "
@@ -318,7 +332,9 @@ def _format_summary_row(result: BenchmarkResult) -> str:
     )
 
 
-def _emit_result_summary(results: list[BenchmarkResult], summary_paths: list[Path]) -> None:
+def _emit_result_summary(
+    results: list[BenchmarkResult], summary_paths: list[Path]
+) -> None:
     emit(f"\n\n{'=' * 70}")
     emit("Benchmark Summary")
     emit("=" * 70)
diff --git a/scripts/benchmark_lib.py b/scripts/benchmark_lib.py
index 680f8c279..2499b7002 100644
--- a/scripts/benchmark_lib.py
+++ b/scripts/benchmark_lib.py
@@ -113,7 +113,9 @@ def resolve_paths(
     zvec_root_arg: str | None,
     benchmark_dir_arg: str | None,
 ) -> tuple[Path, Path]:
-    zvec_root = Path(zvec_root_arg).resolve() if zvec_root_arg else script_path.parent.parent
+    zvec_root = (
+        Path(zvec_root_arg).resolve() if zvec_root_arg else script_path.parent.parent
+    )
 
     config_benchmark_dir = config.get("benchmark_dir")
     if benchmark_dir_arg:
@@ -154,7 +156,9 @@ def avg_metric(records: list[dict[str, Any]], key: str) -> float | None:
     return sum(values) / len(values)
 
 
-def percentile_metric(records: list[dict[str, Any]], key: str, percentile: float) -> float | None:
+def percentile_metric(
+    records: list[dict[str, Any]], key: str, percentile: float
+) -> float | None:
     values = sorted(float(record[key]) for record in records if key in record)
     if not values:
         return None
@@ -214,7 +218,9 @@ def write_online_summary(index_path: Path, payload: dict[str, Any]) -> None:
         json.dump(payload, f, indent=2, sort_keys=True)
 
 
-def write_grouped_online_summaries(dataset: str, results: list[BenchmarkResult]) -> list[Path]:
+def write_grouped_online_summaries(
+    dataset: str, results: list[BenchmarkResult]
+) -> list[Path]:
     written_paths: list[Path] = []
     grouped: dict[str, list[BenchmarkResult]] = {}
     for result in results:
@@ -279,7 +285,9 @@ def build_offline_summary(
     metrics: dict[str, Any],
     retrain_only: bool = False,
 ) -> dict[str, Any]:
-    previous_summary = read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
+    previous_summary = (
+        read_json_if_exists(offline_summary_path(index_path)) if retrain_only else {}
+    )
     previous_offline = previous_summary.get("offline", {})
     previous_omega_training = previous_summary.get("omega_training", {})
 
@@ -314,7 +322,9 @@ def build_offline_summary(
             + sum_timing_ms(omega_training.get("lightgbm_timing_ms", {}))
         ) / 1000.0
         if old_optimize_duration is not None:
-            optimize_duration = round(old_optimize_duration - old_training_s + new_training_s, 4)
+            optimize_duration = round(
+                old_optimize_duration - old_training_s + new_training_s, 4
+            )
         load_duration = (
             round(insert_duration + optimize_duration, 4)
             if insert_duration is not None and optimize_duration is not None
@@ -339,7 +349,9 @@ def build_offline_summary(
 def write_offline_summary(
     index_path: Path, db_label: str, metrics: dict[str, Any], retrain_only: bool = False
 ) -> Path:
-    summary = build_offline_summary(index_path, db_label, metrics, retrain_only=retrain_only)
+    summary = build_offline_summary(
+        index_path, db_label, metrics, retrain_only=retrain_only
+    )
     path = offline_summary_path(index_path)
     path.parent.mkdir(parents=True, exist_ok=True)
     with path.open("w") as f:
@@ -348,8 +360,10 @@ def write_offline_summary(
 
 
 def get_offline_load_duration(index_path: Path) -> float | None:
-    return read_json_if_exists(offline_summary_path(index_path)).get("offline", {}).get(
-        "load_duration_s"
+    return (
+        read_json_if_exists(offline_summary_path(index_path))
+        .get("offline", {})
+        .get("load_duration_s")
     )
 
 
@@ -374,8 +388,12 @@ def resolve_dataset_spec(
         )
 
     dimension = int(config.get("dimension", default.get("dimension", 0)))
-    metric_type = str(config.get("metric_type", default.get("metric_type", "COSINE"))).upper()
-    remote_dirname = str(config.get("remote_dirname", default.get("remote_dirname", "")))
+    metric_type = str(
+        config.get("metric_type", default.get("metric_type", "COSINE"))
+    ).upper()
+    remote_dirname = str(
+        config.get("remote_dirname", default.get("remote_dirname", ""))
+    )
     train_files = list(config.get("train_files", default.get("train_files", [])))
     dataset_source = str(
         config.get("dataset_source", os.environ.get("ZVEC_DATASET_SOURCE", "S3"))
@@ -416,7 +434,9 @@ def _require_polars():
 
 def _require_zvec():
     if zvec is None:
-        raise RuntimeError("This script requires zvec in the active Python environment.")
+        raise RuntimeError(
+            "This script requires zvec in the active Python environment."
+        )
     return zvec
 
 
@@ -438,7 +458,9 @@ def _sorted_train_files(dataset_dir: Path) -> list[Path]:
     return unique
 
 
-def _dataset_required_files(dataset_name: str, dataset_spec: dict[str, Any]) -> list[str]:
+def _dataset_required_files(
+    dataset_name: str, dataset_spec: dict[str, Any]
+) -> list[str]:
     required = list(dataset_spec.get("train_files", []))
     if not required:
         raise ValueError(
@@ -459,10 +481,14 @@ def _download_file(url: str, output_path: Path) -> None:
         tmp_path.unlink(missing_ok=True)
 
 
-def ensure_dataset_available(dataset_name: str, dataset_spec: dict[str, Any], dry_run: bool) -> None:
+def ensure_dataset_available(
+    dataset_name: str, dataset_spec: dict[str, Any], dry_run: bool
+) -> None:
     dataset_dir = dataset_spec["dataset_dir"]
     required_files = _dataset_required_files(dataset_name, dataset_spec)
-    missing_files = [name for name in required_files if not (dataset_dir / name).exists()]
+    missing_files = [
+        name for name in required_files if not (dataset_dir / name).exists()
+    ]
     if not missing_files:
         return
 
@@ -473,10 +499,14 @@ def ensure_dataset_available(dataset_name: str, dataset_spec: dict[str, Any], dr
         )
 
     base_url = dataset_spec["download_base_url"]
-    emit(f"Dataset files missing under {dataset_dir}, downloading from {base_url}/{remote_dirname} ...")
+    emit(
+        f"Dataset files missing under {dataset_dir}, downloading from {base_url}/{remote_dirname} ..."
+    )
     if dry_run:
         for name in missing_files:
-            emit(f"[Dry-run] download {base_url}/{remote_dirname}/{name} -> {dataset_dir / name}")
+            emit(
+                f"[Dry-run] download {base_url}/{remote_dirname}/{name} -> {dataset_dir / name}"
+            )
         return
 
     for name in missing_files:
@@ -505,7 +535,9 @@ def prepare_dataset_artifacts(
         if not gt_parquet.exists():
             raise FileNotFoundError(f"Missing ground-truth parquet: {gt_parquet}")
         if not train_files:
-            raise FileNotFoundError(f"No train parquet files found under: {dataset_dir}")
+            raise FileNotFoundError(
+                f"No train parquet files found under: {dataset_dir}"
+            )
 
     cache_dir = (benchmark_dir / "_dataset_cache" / dataset_name).resolve()
     query_txt = cache_dir / "query.txt"
@@ -517,7 +549,9 @@ def prepare_dataset_artifacts(
             not query_txt.exists()
             or query_txt.stat().st_mtime < query_parquet.stat().st_mtime
         )
-        refresh_gt = (not gt_txt.exists()) or gt_txt.stat().st_mtime < gt_parquet.stat().st_mtime
+        refresh_gt = (
+            not gt_txt.exists()
+        ) or gt_txt.stat().st_mtime < gt_parquet.stat().st_mtime
         if refresh_query:
             _write_query_text(query_parquet, query_txt)
         if refresh_gt:
@@ -664,7 +698,11 @@ def build_index(
 ) -> dict[str, Any]:
     if dry_run:
         emit(f"[Dry-run] Build {index_kind} at {index_path}")
-        return {"insert_duration": None, "optimize_duration": None, "load_duration": None}
+        return {
+            "insert_duration": None,
+            "optimize_duration": None,
+            "load_duration": None,
+        }
 
     module = _initialized_zvec()
 
@@ -687,7 +725,9 @@ def build_index(
             schema,
             module.CollectionOption(read_only=False, enable_mmap=True),
         )
-        insert_duration = _insert_training_data(collection, dataset_artifacts["train_files"])
+        insert_duration = _insert_training_data(
+            collection, dataset_artifacts["train_files"]
+        )
 
     optimize_start = time.perf_counter()
     collection.optimize(option=module.OptimizeOption(retrain_only=retrain_only))
@@ -703,13 +743,19 @@ def build_index(
         load_duration = optimize_duration
 
     return {
-        "insert_duration": round(insert_duration, 4) if insert_duration is not None else None,
-        "optimize_duration": round(optimize_duration, 4) if optimize_duration is not None else None,
+        "insert_duration": round(insert_duration, 4)
+        if insert_duration is not None
+        else None,
+        "optimize_duration": round(optimize_duration, 4)
+        if optimize_duration is not None
+        else None,
         "load_duration": round(load_duration, 4) if load_duration is not None else None,
     }
 
 
-def _insert_training_data(collection, train_files: list[Path], batch_size: int = 1000) -> float:
+def _insert_training_data(
+    collection, train_files: list[Path], batch_size: int = 1000
+) -> float:
     module = _initialized_zvec()
     polars = _require_polars()
     start = time.perf_counter()
@@ -748,7 +794,9 @@ def compute_recall_with_zvec(
     query_frame = polars.read_parquet(dataset_artifacts["query_parquet"]).sort("id")
     gt_frame = polars.read_parquet(dataset_artifacts["gt_parquet"]).sort("id")
     gt_map = {
-        int(row["id"]): [int(value) for value in row["neighbors_id"][: int(common_args["k"])]]
+        int(row["id"]): [
+            int(value) for value in row["neighbors_id"][: int(common_args["k"])]
+        ]
         for row in gt_frame.iter_rows(named=True)
     }
 
@@ -776,7 +824,9 @@ def compute_recall_with_zvec(
         if not gt:
             continue
         results = collection.query(
-            vectors=module.VectorQuery(field_name="dense", vector=row["emb"], param=query_param),
+            vectors=module.VectorQuery(
+                field_name="dense", vector=row["emb"], param=query_param
+            ),
             topk=topk,
             output_fields=[],
         )
@@ -876,7 +926,11 @@ def build_core_query_param_json(
 def discover_index_files(index_path: Path) -> dict[str, Path | None]:
     coarse_candidates = sorted(index_path.glob("*/dense.qindex.*.proxima"))
     full_candidates = sorted(index_path.glob("*/dense.index.*.proxima"))
-    primary = coarse_candidates[0] if coarse_candidates else (full_candidates[0] if full_candidates else None)
+    primary = (
+        coarse_candidates[0]
+        if coarse_candidates
+        else (full_candidates[0] if full_candidates else None)
+    )
     reference = full_candidates[0] if full_candidates else None
     if primary is None:
         raise FileNotFoundError(f"No core index file found under {index_path}")
@@ -1106,7 +1160,9 @@ def run_recall(
             recall_thread_count=1,
             groundtruth_file=groundtruth_file,
         )
-        ret, output = run_command_capture([str(recall_bin), str(config_path)], dry_run=dry_run)
+        ret, output = run_command_capture(
+            [str(recall_bin), str(config_path)], dry_run=dry_run
+        )
         return ret, output, parse_recall_output(output, topk)
     finally:
         if config_path.exists():
@@ -1130,7 +1186,9 @@ def run_concurrency_benchmark(
     quantize_type = str(common_args.get("quantize_type", "UNDEFINED"))
     use_refiner = bool(common_args.get("is_using_refiner", False))
     duration = int(common_args["concurrency_duration"])
-    thread_counts = [int(value) for value in str(common_args["num_concurrency"]).split(",") if value]
+    thread_counts = [
+        int(value) for value in str(common_args["num_concurrency"]).split(",") if value
+    ]
 
     best_summary: dict[str, Any] | None = None
     best_output = ""
@@ -1155,7 +1213,9 @@ def run_concurrency_benchmark(
         )
         summary["thread_count"] = thread_count
         summary["retcode"] = ret
-        if best_summary is None or (summary.get("qps") or 0.0) > (best_summary.get("qps") or 0.0):
+        if best_summary is None or (summary.get("qps") or 0.0) > (
+            best_summary.get("qps") or 0.0
+        ):
             best_summary = summary
             best_output = output
 

From 6a53e637f62d6eb4c181758e120dd3b3ec0472aa Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 15:12:54 +0800
Subject: [PATCH 097/126] Format C++ code with clang-format

---
 src/binding/python/binding.cc                 |   2 +-
 .../python/model/param/python_param.cc        |  54 ++--
 src/core/algorithm/hnsw/hnsw_algorithm.cc     |  13 +-
 src/core/algorithm/hnsw/hnsw_algorithm.h      |   6 +-
 src/core/algorithm/hnsw/hnsw_context.h        |  12 +-
 .../algorithm/hnsw/hnsw_dist_calculator.h     |   2 +-
 src/core/algorithm/hnsw/hnsw_streamer.cc      |  49 +--
 src/core/algorithm/omega/omega_context.h      |  21 +-
 src/core/algorithm/omega/omega_hook_utils.h   |  43 +--
 src/core/algorithm/omega/omega_searcher.cc    |  52 +--
 src/core/algorithm/omega/omega_searcher.h     |   4 +-
 src/core/algorithm/omega/omega_streamer.cc    | 123 ++++----
 src/core/algorithm/omega/omega_streamer.h     |  24 +-
 src/core/interface/index.cc                   |  11 +-
 src/core/interface/index_factory.cc           |  20 +-
 src/core/interface/indexes/hnsw_index.cc      |   2 +-
 src/core/interface/indexes/omega_index.cc     |  14 +-
 .../indexes/omega_training_session.cc         |  15 +-
 .../indexes/omega_training_session.h          |   8 +-
 .../mixed_reducer/mixed_streamer_reducer.cc   |  10 +-
 .../mixed_reducer/mixed_streamer_reducer.h    |   3 +-
 src/core/utility/buffer_storage.cc            |   1 +
 src/core/utility/rdtsc_timer.cc               |   9 +-
 src/db/collection.cc                          |  13 +-
 .../column/vector_column/engine_helper.hpp    |  14 +-
 .../vector_column/vector_column_indexer.cc    |  13 +-
 .../vector_column/vector_column_indexer.h     |  15 +-
 src/db/index/common/proto_converter.cc        |   9 +-
 src/db/index/common/proto_converter.h         |   3 +-
 src/db/index/common/schema.cc                 |   4 +-
 src/db/index/segment/segment.cc               | 139 ++++----
 src/db/training/omega_model_trainer.cc        |  57 ++--
 src/db/training/omega_model_trainer.h         |   8 +-
 src/db/training/omega_training_coordinator.cc | 126 ++++----
 src/db/training/omega_training_coordinator.h  |  29 +-
 src/db/training/query_generator.cc            |  23 +-
 src/db/training/query_generator.h             |  21 +-
 src/db/training/training_data_collector.cc    | 296 ++++++++++--------
 src/db/training/training_data_collector.h     |  74 +++--
 src/include/zvec/core/interface/index.h       |  20 +-
 src/include/zvec/core/interface/index_param.h |   3 +-
 src/include/zvec/core/interface/training.h    |   5 +-
 .../zvec/core/interface/training_capable.h    |   3 +-
 .../zvec/core/interface/training_session.h    |   4 +-
 src/include/zvec/db/index_params.h            |  36 +--
 src/include/zvec/db/query_params.h            |   5 +-
 46 files changed, 763 insertions(+), 655 deletions(-)

diff --git a/src/binding/python/binding.cc b/src/binding/python/binding.cc
index 091878152..7a4d8acf6 100644
--- a/src/binding/python/binding.cc
+++ b/src/binding/python/binding.cc
@@ -12,13 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <pybind11/stl.h>
 #include "python_collection.h"
 #include "python_config.h"
 #include "python_doc.h"
 #include "python_param.h"
 #include "python_schema.h"
 #include "python_type.h"
-#include <pybind11/stl.h>
 
 namespace zvec {
 PYBIND11_MODULE(_zvec, m) {
diff --git a/src/binding/python/model/param/python_param.cc b/src/binding/python/model/param/python_param.cc
index 94b3be9c7..b413814a4 100644
--- a/src/binding/python/model/param/python_param.cc
+++ b/src/binding/python/model/param/python_param.cc
@@ -705,7 +705,8 @@ predict when to stop searching.
     10
 )pbdoc");
   omega_params
-      .def(py::init<MetricType, int, int, QuantizeType, uint32_t, size_t, int, int, int, int>(),
+      .def(py::init<MetricType, int, int, QuantizeType, uint32_t, size_t, int,
+                    int, int, int>(),
            py::arg("metric_type") = MetricType::IP,
            py::arg("m") = core_interface::kDefaultHnswNeighborCnt,
            py::arg("ef_construction") =
@@ -713,10 +714,8 @@ predict when to stop searching.
            py::arg("quantize_type") = QuantizeType::UNDEFINED,
            py::arg("min_vector_threshold") = 100000,
            py::arg("num_training_queries") = 1000,
-           py::arg("ef_training") = 1000,
-           py::arg("window_size") = 100,
-           py::arg("ef_groundtruth") = 0,
-           py::arg("k_train") = 1)
+           py::arg("ef_training") = 1000, py::arg("window_size") = 100,
+           py::arg("ef_groundtruth") = 0, py::arg("k_train") = 1)
       .def_property_readonly(
           "m", &OmegaIndexParams::m,
           "int: Maximum number of neighbors per node in upper layers.")
@@ -738,9 +737,9 @@ predict when to stop searching.
       .def_property_readonly(
           "ef_groundtruth", &OmegaIndexParams::ef_groundtruth,
           "int: ef for ground truth computation (0=brute force, >0=HNSW).")
-      .def_property_readonly(
-          "k_train", &OmegaIndexParams::k_train,
-          "int: Number of top GT results required for a positive training label.")
+      .def_property_readonly("k_train", &OmegaIndexParams::k_train,
+                             "int: Number of top GT results required for a "
+                             "positive training label.")
       .def(
           "to_dict",
           [](const OmegaIndexParams &self) -> py::dict {
@@ -772,25 +771,21 @@ predict when to stop searching.
                     std::to_string(self.min_vector_threshold()) +
                     ", \"num_training_queries\":" +
                     std::to_string(self.num_training_queries()) +
-                    ", \"ef_training\":" +
-                    std::to_string(self.ef_training()) +
-                    ", \"window_size\":" +
-                    std::to_string(self.window_size()) +
+                    ", \"ef_training\":" + std::to_string(self.ef_training()) +
+                    ", \"window_size\":" + std::to_string(self.window_size()) +
                     ", \"ef_groundtruth\":" +
                     std::to_string(self.ef_groundtruth()) +
-                    ", \"k_train\":" +
-                    std::to_string(self.k_train()) +
+                    ", \"k_train\":" + std::to_string(self.k_train()) +
                     ", \"quantize_type\":" +
                     quantize_type_to_string(self.quantize_type()) + "}";
            })
       .def(py::pickle(
           [](const OmegaIndexParams &self) {
-            return py::make_tuple(self.metric_type(), self.m(),
-                                  self.ef_construction(), self.quantize_type(),
-                                  self.min_vector_threshold(),
-                                  self.num_training_queries(),
-                                  self.ef_training(), self.window_size(),
-                                  self.ef_groundtruth(), self.k_train());
+            return py::make_tuple(
+                self.metric_type(), self.m(), self.ef_construction(),
+                self.quantize_type(), self.min_vector_threshold(),
+                self.num_training_queries(), self.ef_training(),
+                self.window_size(), self.ef_groundtruth(), self.k_train());
           },
           [](py::tuple t) {
             if (t.size() == 10) {
@@ -926,7 +921,8 @@ Constructs an HnswQueryParam instance.
           }));
 
   // binding omega query params
-  py::class_<OmegaQueryParams, HnswQueryParams, std::shared_ptr<OmegaQueryParams>>
+  py::class_<OmegaQueryParams, HnswQueryParams,
+             std::shared_ptr<OmegaQueryParams>>
       omega_query_params(m, "OmegaQueryParam", R"pbdoc(
 Query parameters for OMEGA index with adaptive early stopping.
 
@@ -953,9 +949,8 @@ dynamically adjust search effort to meet a target recall.
   omega_query_params
       .def(py::init<int, float, float, bool, bool>(),
            py::arg("ef") = core_interface::kDefaultHnswEfSearch,
-           py::arg("target_recall") = 0.95f,
-           py::arg("radius") = 0.0f, py::arg("is_linear") = false,
-           py::arg("is_using_refiner") = false,
+           py::arg("target_recall") = 0.95f, py::arg("radius") = 0.0f,
+           py::arg("is_linear") = false, py::arg("is_using_refiner") = false,
            R"pbdoc(
 Constructs an OmegaQueryParam instance.
 
@@ -970,7 +965,9 @@ Constructs an OmegaQueryParam instance.
 )pbdoc")
       .def_property_readonly(
           "target_recall",
-          [](const OmegaQueryParams &self) -> float { return self.target_recall(); },
+          [](const OmegaQueryParams &self) -> float {
+            return self.target_recall();
+          },
           "float: Target recall for OMEGA early stopping (0.0 to 1.0).")
       .def("__repr__",
            [](const OmegaQueryParams &self) -> std::string {
@@ -978,7 +975,8 @@ Constructs an OmegaQueryParam instance.
                     "\"type\":" +
                     index_type_to_string(self.type()) +
                     ", \"ef\":" + std::to_string(self.ef()) +
-                    ", \"target_recall\":" + std::to_string(self.target_recall()) +
+                    ", \"target_recall\":" +
+                    std::to_string(self.target_recall()) +
                     ", \"radius\":" + std::to_string(self.radius()) +
                     ", \"is_linear\":" + std::to_string(self.is_linear()) +
                     ", \"is_using_refiner\":" +
@@ -993,8 +991,8 @@ Constructs an OmegaQueryParam instance.
           [](py::tuple t) {
             if (t.size() != 5)
               throw std::runtime_error("Invalid state for OmegaQueryParams");
-            auto obj = std::make_shared<OmegaQueryParams>(
-                t[0].cast<int>(), t[1].cast<float>());
+            auto obj = std::make_shared<OmegaQueryParams>(t[0].cast<int>(),
+                                                          t[1].cast<float>());
             obj->set_radius(t[2].cast<float>());
             obj->set_is_linear(t[3].cast<bool>());
             obj->set_is_using_refiner(t[4].cast<bool>());
diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.cc b/src/core/algorithm/hnsw/hnsw_algorithm.cc
index a6f7ed830..ce7c6ace6 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.cc
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.cc
@@ -90,8 +90,7 @@ int HnswAlgorithm::fast_search(HnswContext *ctx) const {
   return search_internal(ctx, false, nullptr, nullptr);
 }
 
-int HnswAlgorithm::search_with_hooks(HnswContext *ctx,
-                                     const SearchHooks *hooks,
+int HnswAlgorithm::search_with_hooks(HnswContext *ctx, const SearchHooks *hooks,
                                      bool *stopped_early) const {
   return search_internal(ctx, true, hooks, stopped_early);
 }
@@ -228,9 +227,9 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
   }
 
   const uint32_t result_topk_limit = ctx->topk();
-  const bool track_hook_result_topk =
-      hooks != nullptr && hooks->on_visit_candidate != nullptr &&
-      result_topk_limit > 0;
+  const bool track_hook_result_topk = hooks != nullptr &&
+                                      hooks->on_visit_candidate != nullptr &&
+                                      result_topk_limit > 0;
   TopkHeap hook_result_topk(result_topk_limit > 0 ? result_topk_limit : 1U);
 
   candidates.clear();
@@ -329,8 +328,8 @@ bool HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
         if (!filter(node)) {
           topk.emplace(node, cur_dist);
           if (track_hook_result_topk) {
-            inserted_to_topk =
-                !hook_result_topk.full() || cur_dist < hook_result_topk[0].second;
+            inserted_to_topk = !hook_result_topk.full() ||
+                               cur_dist < hook_result_topk[0].second;
             if (inserted_to_topk) {
               hook_result_topk.emplace(node, cur_dist);
             }
diff --git a/src/core/algorithm/hnsw/hnsw_algorithm.h b/src/core/algorithm/hnsw/hnsw_algorithm.h
index 58d02d8df..b7127bb48 100644
--- a/src/core/algorithm/hnsw/hnsw_algorithm.h
+++ b/src/core/algorithm/hnsw/hnsw_algorithm.h
@@ -44,8 +44,7 @@ class HnswAlgorithm {
     void (*on_level0_entry)(node_id_t id, dist_t dist, bool inserted_to_topk,
                             void *user_data){nullptr};
     void (*on_hop)(void *user_data){nullptr};
-    bool (*on_visit_candidate)(node_id_t id, dist_t dist,
-                               bool inserted_to_topk,
+    bool (*on_visit_candidate)(node_id_t id, dist_t dist, bool inserted_to_topk,
                                void *user_data){nullptr};
   };
 
@@ -113,8 +112,7 @@ class HnswAlgorithm {
   }
 
  private:
-  int search_internal(HnswContext *ctx, bool use_lock,
-                      const SearchHooks *hooks,
+  int search_internal(HnswContext *ctx, bool use_lock, const SearchHooks *hooks,
                       bool *stopped_early) const;
 
   //! Select in upper layer to get entry point for next layer search
diff --git a/src/core/algorithm/hnsw/hnsw_context.h b/src/core/algorithm/hnsw/hnsw_context.h
index f9bbd955f..81d4852d7 100644
--- a/src/core/algorithm/hnsw/hnsw_context.h
+++ b/src/core/algorithm/hnsw/hnsw_context.h
@@ -96,12 +96,12 @@ class HnswContext : public IndexContext {
   //! Retrieve string of debug
   virtual std::string debug_string(void) const override {
     char buf[4096];
-    size_t size = snprintf(
-        buf, sizeof(buf),
-        "scan_cnt=%zu,pairwise_dist_cnt=%zu,get_vector_cnt=%u,get_neighbors_cnt=%u,dup_node=%u",
-        get_scan_num(), get_pairwise_dist_num(),
-        stats_get_vector_cnt_, stats_get_neighbors_cnt_,
-        stats_visit_dup_cnt_);
+    size_t size =
+        snprintf(buf, sizeof(buf),
+                 "scan_cnt=%zu,pairwise_dist_cnt=%zu,get_vector_cnt=%u,get_"
+                 "neighbors_cnt=%u,dup_node=%u",
+                 get_scan_num(), get_pairwise_dist_num(), stats_get_vector_cnt_,
+                 stats_get_neighbors_cnt_, stats_visit_dup_cnt_);
     return std::string(buf, size);
   }
 
diff --git a/src/core/algorithm/hnsw/hnsw_dist_calculator.h b/src/core/algorithm/hnsw/hnsw_dist_calculator.h
index e7461ef20..6c410026c 100644
--- a/src/core/algorithm/hnsw/hnsw_dist_calculator.h
+++ b/src/core/algorithm/hnsw/hnsw_dist_calculator.h
@@ -223,7 +223,7 @@ class HnswDistCalculator {
   const void *query_;
   uint32_t dim_;
 
-  uint32_t compare_cnt_;  // record distance compute times
+  uint32_t compare_cnt_;        // record distance compute times
   uint64_t pairwise_dist_cnt_;  // record actual pairwise distance work
   // uint32_t compare_cnt_batch_;  // record batch distance compute time
   bool error_{false};
diff --git a/src/core/algorithm/hnsw/hnsw_streamer.cc b/src/core/algorithm/hnsw/hnsw_streamer.cc
index ba751827b..0bfd24c51 100644
--- a/src/core/algorithm/hnsw/hnsw_streamer.cc
+++ b/src/core/algorithm/hnsw/hnsw_streamer.cc
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "hnsw_streamer.h"
-#include <iostream>
 #include <atomic>
 #include <cstdlib>
+#include <iostream>
 #include <ailego/internal/cpu_features.h>
 #include <ailego/pattern/defer.h>
 #include <ailego/utility/memory_helper.h>
@@ -32,7 +32,7 @@ namespace {
 
 bool ShouldLogHnswQueryStats(uint64_t query_seq) {
   static const bool enabled = []() {
-    const char* value = std::getenv("ZVEC_HNSW_LOG_QUERY_STATS");
+    const char *value = std::getenv("ZVEC_HNSW_LOG_QUERY_STATS");
     if (value == nullptr) {
       return false;
     }
@@ -43,11 +43,11 @@ bool ShouldLogHnswQueryStats(uint64_t query_seq) {
   }
 
   static const uint64_t limit = []() -> uint64_t {
-    const char* value = std::getenv("ZVEC_HNSW_LOG_QUERY_LIMIT");
+    const char *value = std::getenv("ZVEC_HNSW_LOG_QUERY_LIMIT");
     if (value == nullptr || *value == '\0') {
       return std::numeric_limits<uint64_t>::max();
     }
-    char* end = nullptr;
+    char *end = nullptr;
     unsigned long long parsed = std::strtoull(value, &end, 10);
     if (end == value) {
       return std::numeric_limits<uint64_t>::max();
@@ -59,14 +59,14 @@ bool ShouldLogHnswQueryStats(uint64_t query_seq) {
 }
 
 bool UseEmptyHnswHooks() {
-  const char* value = std::getenv("ZVEC_HNSW_ENABLE_EMPTY_HOOKS");
+  const char *value = std::getenv("ZVEC_HNSW_ENABLE_EMPTY_HOOKS");
   if (value == nullptr) {
     return false;
   }
   return std::string(value) != "0";
 }
 
-std::atomic<uint64_t>& HnswQueryStatsSequence() {
+std::atomic<uint64_t> &HnswQueryStatsSequence() {
   static std::atomic<uint64_t> sequence{0};
   return sequence;
 }
@@ -85,9 +85,9 @@ int HnswStreamer::FastSearch(HnswContext *ctx) const {
   return alg_->fast_search(ctx);
 }
 
-int HnswStreamer::FastSearchWithHooks(
-    HnswContext *ctx, const HnswAlgorithm::SearchHooks *hooks,
-    bool *stopped_early) const {
+int HnswStreamer::FastSearchWithHooks(HnswContext *ctx,
+                                      const HnswAlgorithm::SearchHooks *hooks,
+                                      bool *stopped_early) const {
   return alg_->fast_search_with_hooks(ctx, hooks, stopped_early);
 }
 
@@ -696,16 +696,18 @@ int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
     auto query_search_end = RdtscTimer::Now();
     auto query_search_time_ns =
         RdtscTimer::ElapsedNs(query_search_start, query_search_end);
-    auto query_latency_ns = RdtscTimer::ElapsedNs(query_start, RdtscTimer::Now());
+    auto query_latency_ns =
+        RdtscTimer::ElapsedNs(query_start, RdtscTimer::Now());
     uint64_t query_seq = HnswQueryStatsSequence().fetch_add(1);
     if (ShouldLogHnswQueryStats(query_seq)) {
-      LOG_INFO("HNSW query stats: query_seq=%llu hook_mode=%s cmps=%zu "
-               "pairwise_dist_cnt=%zu pure_search_ms=%.3f latency_ms=%.3f",
-               static_cast<unsigned long long>(query_seq),
-               use_empty_hooks ? "empty" : "none", ctx->get_scan_num(),
-               ctx->get_pairwise_dist_num(),
-               static_cast<double>(query_search_time_ns) / 1e6,
-               static_cast<double>(query_latency_ns) / 1e6);
+      LOG_INFO(
+          "HNSW query stats: query_seq=%llu hook_mode=%s cmps=%zu "
+          "pairwise_dist_cnt=%zu pure_search_ms=%.3f latency_ms=%.3f",
+          static_cast<unsigned long long>(query_seq),
+          use_empty_hooks ? "empty" : "none", ctx->get_scan_num(),
+          ctx->get_pairwise_dist_num(),
+          static_cast<double>(query_search_time_ns) / 1e6,
+          static_cast<double>(query_latency_ns) / 1e6);
     }
     ctx->topk_to_result(q);
     query = static_cast<const char *>(query) + qmeta.element_size();
@@ -822,13 +824,16 @@ int HnswStreamer::search_bf_impl(
           topk.emplace(id, dist);
         }
       }
-      auto query_latency_ns = RdtscTimer::ElapsedNs(query_start, RdtscTimer::Now());
+      auto query_latency_ns =
+          RdtscTimer::ElapsedNs(query_start, RdtscTimer::Now());
       uint64_t query_seq = HnswQueryStatsSequence().fetch_add(1);
       if (ShouldLogHnswQueryStats(query_seq)) {
-        LOG_INFO("HNSW query stats: query_seq=%llu cmps=%zu pairwise_dist_cnt=%zu latency_ms=%.3f",
-                 static_cast<unsigned long long>(query_seq), ctx->get_scan_num(),
-                 ctx->get_pairwise_dist_num(),
-                 static_cast<double>(query_latency_ns) / 1e6);
+        LOG_INFO(
+            "HNSW query stats: query_seq=%llu cmps=%zu pairwise_dist_cnt=%zu "
+            "latency_ms=%.3f",
+            static_cast<unsigned long long>(query_seq), ctx->get_scan_num(),
+            ctx->get_pairwise_dist_num(),
+            static_cast<double>(query_latency_ns) / 1e6);
       }
       ctx->topk_to_result(q);
       query = static_cast<const char *>(query) + qmeta.element_size();
diff --git a/src/core/algorithm/omega/omega_context.h b/src/core/algorithm/omega/omega_context.h
index cc66c26aa..943096c1b 100644
--- a/src/core/algorithm/omega/omega_context.h
+++ b/src/core/algorithm/omega/omega_context.h
@@ -14,9 +14,9 @@
 
 #pragma once
 
-#include "../hnsw/hnsw_context.h"
-#include "omega_params.h"
 #include <zvec/core/interface/training.h>
+#include "omega_params.h"
+#include "../hnsw/hnsw_context.h"
 
 namespace zvec {
 namespace core {
@@ -32,12 +32,16 @@ class OmegaContext : public HnswContext {
   //! Constructor
   OmegaContext(size_t dimension, const IndexMetric::Pointer &metric,
                const HnswEntity::Pointer &entity)
-      : HnswContext(dimension, metric, entity), target_recall_(0.95f), training_query_id_(-1) {}
+      : HnswContext(dimension, metric, entity),
+        target_recall_(0.95f),
+        training_query_id_(-1) {}
 
   //! Constructor
   OmegaContext(const IndexMetric::Pointer &metric,
                const HnswEntity::Pointer &entity)
-      : HnswContext(metric, entity), target_recall_(0.95f), training_query_id_(-1) {}
+      : HnswContext(metric, entity),
+        target_recall_(0.95f),
+        training_query_id_(-1) {}
 
   //! Destructor
   virtual ~OmegaContext() = default;
@@ -71,7 +75,7 @@ class OmegaContext : public HnswContext {
   }
 
   //! Set gt_cmps data for this query
-  void set_gt_cmps(const std::vector<int>& gt_cmps, int total_cmps) {
+  void set_gt_cmps(const std::vector<int> &gt_cmps, int total_cmps) {
     gt_cmps_per_rank_ = gt_cmps;
     total_cmps_ = total_cmps;
   }
@@ -111,11 +115,12 @@ class OmegaContext : public HnswContext {
   }
 
  private:
-  float target_recall_;  // Per-query target recall
+  float target_recall_;    // Per-query target recall
   int training_query_id_;  // Per-query training query ID for parallel training
-  std::vector<core_interface::TrainingRecord> training_records_;  // Per-query training records
+  std::vector<core_interface::TrainingRecord>
+      training_records_;               // Per-query training records
   std::vector<int> gt_cmps_per_rank_;  // cmps value when each GT rank was found
-  int total_cmps_ = 0;  // Total cmps for this search
+  int total_cmps_ = 0;                 // Total cmps for this search
 };
 
 }  // namespace core
diff --git a/src/core/algorithm/omega/omega_hook_utils.h b/src/core/algorithm/omega/omega_hook_utils.h
index d59031d3a..16fd11e12 100644
--- a/src/core/algorithm/omega/omega_hook_utils.h
+++ b/src/core/algorithm/omega/omega_hook_utils.h
@@ -24,7 +24,7 @@
 namespace zvec::core {
 
 inline bool DisableOmegaModelPrediction() {
-  const char* value = std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION");
+  const char *value = std::getenv("ZVEC_OMEGA_DISABLE_MODEL_PREDICTION");
   if (value == nullptr) {
     return false;
   }
@@ -43,16 +43,20 @@ struct OmegaHookState {
       storage.resize(std::max(1, capacity));
     }
 
-    bool Empty() const { return count == 0; }
+    bool Empty() const {
+      return count == 0;
+    }
 
-    int Capacity() const { return static_cast<int>(storage.size()); }
+    int Capacity() const {
+      return static_cast<int>(storage.size());
+    }
 
-    void Push(const omega::SearchContext::VisitCandidate& candidate) {
+    void Push(const omega::SearchContext::VisitCandidate &candidate) {
       storage[(head + count) % Capacity()] = candidate;
       ++count;
     }
 
-    const omega::SearchContext::VisitCandidate* Data() const {
+    const omega::SearchContext::VisitCandidate *Data() const {
       return storage.data() + head;
     }
 
@@ -62,23 +66,24 @@ struct OmegaHookState {
     }
   };
 
-  omega::SearchContext* search_ctx{nullptr};
+  omega::SearchContext *search_ctx{nullptr};
   bool enable_early_stopping{false};
   bool per_cmp_reporting{false};
   PendingVisitBuffer pending_candidates;
   int batch_min_interval{1};
 };
 
-inline void ResetOmegaHookState(OmegaHookState* state) {
+inline void ResetOmegaHookState(OmegaHookState *state) {
   if (state->search_ctx != nullptr) {
-    state->batch_min_interval = state->search_ctx->GetPredictionBatchMinInterval();
+    state->batch_min_interval =
+        state->search_ctx->GetPredictionBatchMinInterval();
   } else {
     state->batch_min_interval = 1;
   }
   state->pending_candidates.Reset(state->batch_min_interval);
 }
 
-inline bool ShouldFlushOmegaPendingCandidates(const OmegaHookState& state) {
+inline bool ShouldFlushOmegaPendingCandidates(const OmegaHookState &state) {
   if (state.pending_candidates.Empty()) {
     return false;
   }
@@ -92,7 +97,8 @@ inline bool ShouldFlushOmegaPendingCandidates(const OmegaHookState& state) {
          state.search_ctx->GetNextPredictionCmps();
 }
 
-inline bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count) {
+inline bool FlushOmegaPendingCandidates(OmegaHookState *state,
+                                        int flush_count) {
   if (state->search_ctx == nullptr || flush_count <= 0 ||
       state->pending_candidates.Empty()) {
     return false;
@@ -112,7 +118,7 @@ inline bool FlushOmegaPendingCandidates(OmegaHookState* state, int flush_count)
   return should_stop;
 }
 
-inline bool MaybeFlushOmegaPendingCandidates(OmegaHookState* state) {
+inline bool MaybeFlushOmegaPendingCandidates(OmegaHookState *state) {
   if (!ShouldFlushOmegaPendingCandidates(*state)) {
     return false;
   }
@@ -120,8 +126,8 @@ inline bool MaybeFlushOmegaPendingCandidates(OmegaHookState* state) {
 }
 
 inline void OnOmegaLevel0Entry(node_id_t id, dist_t dist,
-                               bool /*inserted_to_topk*/, void* user_data) {
-  auto& state = *static_cast<OmegaHookState*>(user_data);
+                               bool /*inserted_to_topk*/, void *user_data) {
+  auto &state = *static_cast<OmegaHookState *>(user_data);
   if (state.per_cmp_reporting) {
     state.search_ctx->SetDistStart(dist);
     state.search_ctx->ReportVisitCandidate(id, dist, true);
@@ -132,14 +138,14 @@ inline void OnOmegaLevel0Entry(node_id_t id, dist_t dist,
   MaybeFlushOmegaPendingCandidates(&state);
 }
 
-inline void OnOmegaHop(void* user_data) {
-  auto& state = *static_cast<OmegaHookState*>(user_data);
+inline void OnOmegaHop(void *user_data) {
+  auto &state = *static_cast<OmegaHookState *>(user_data);
   state.search_ctx->ReportHop();
 }
 
 inline bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
-                                  bool inserted_to_topk, void* user_data) {
-  auto& state = *static_cast<OmegaHookState*>(user_data);
+                                  bool inserted_to_topk, void *user_data) {
+  auto &state = *static_cast<OmegaHookState *>(user_data);
   if (state.per_cmp_reporting) {
     bool should_predict = false;
     should_predict =
@@ -151,8 +157,7 @@ inline bool OnOmegaVisitCandidate(node_id_t id, dist_t dist,
     should_stop = state.search_ctx->ShouldStopEarly();
     return should_stop;
   }
-  state.pending_candidates.Push(
-      {static_cast<int>(id), dist, inserted_to_topk});
+  state.pending_candidates.Push({static_cast<int>(id), dist, inserted_to_topk});
   return MaybeFlushOmegaPendingCandidates(&state);
 }
 
diff --git a/src/core/algorithm/omega/omega_searcher.cc b/src/core/algorithm/omega/omega_searcher.cc
index 280b6824c..a942c5aff 100644
--- a/src/core/algorithm/omega/omega_searcher.cc
+++ b/src/core/algorithm/omega/omega_searcher.cc
@@ -13,15 +13,15 @@
 // limitations under the License.
 
 #include "omega_searcher.h"
-#include "omega_context.h"
-#include "omega_hook_utils.h"
-#include "omega_params.h"
+#include <limits>
+#include <omega/search_context.h>
 #include <zvec/core/framework/index_error.h>
 #include <zvec/core/framework/index_factory.h>
 #include <zvec/core/framework/index_logger.h>
-#include <omega/search_context.h>
+#include "omega_context.h"
+#include "omega_hook_utils.h"
+#include "omega_params.h"
 #include "../hnsw/hnsw_context.h"
-#include <limits>
 
 namespace zvec {
 namespace core {
@@ -49,9 +49,15 @@ bool OmegaSearcher::should_use_omega() const {
 
 int OmegaSearcher::init(const ailego::Params &params) {
   // Get OMEGA-specific parameters
-  omega_enabled_ = params.has("omega.enabled") ? params.get_as_bool("omega.enabled") : false;
-  min_vector_threshold_ = params.has("omega.min_vector_threshold") ? params.get_as_uint32("omega.min_vector_threshold") : 100000;
-  window_size_ = params.has("omega.window_size") ? params.get_as_int32("omega.window_size") : 100;
+  omega_enabled_ =
+      params.has("omega.enabled") ? params.get_as_bool("omega.enabled") : false;
+  min_vector_threshold_ =
+      params.has("omega.min_vector_threshold")
+          ? params.get_as_uint32("omega.min_vector_threshold")
+          : 100000;
+  window_size_ = params.has("omega.window_size")
+                     ? params.get_as_int32("omega.window_size")
+                     : 100;
 
   // Call parent class init
   int ret = HnswSearcher::init(params);
@@ -60,9 +66,10 @@ int OmegaSearcher::init(const ailego::Params &params) {
     return ret;
   }
 
-  LOG_INFO("OmegaSearcher initialized (omega_enabled=%d, min_threshold=%u, "
-           "window_size=%d)",
-           omega_enabled_, min_vector_threshold_, window_size_);
+  LOG_INFO(
+      "OmegaSearcher initialized (omega_enabled=%d, min_threshold=%u, "
+      "window_size=%d)",
+      omega_enabled_, min_vector_threshold_, window_size_);
   return 0;
 }
 
@@ -111,7 +118,8 @@ int OmegaSearcher::load(IndexStorage::Pointer container,
         ret = omega_model_load(omega_model_, effective_model_dir.c_str());
         if (ret == 0 && omega_model_is_loaded(omega_model_)) {
           use_omega_mode_ = true;
-          LOG_INFO("OMEGA model loaded successfully from %s", effective_model_dir.c_str());
+          LOG_INFO("OMEGA model loaded successfully from %s",
+                   effective_model_dir.c_str());
         } else {
           LOG_WARN("Failed to load OMEGA model from %s, falling back to HNSW",
                    effective_model_dir.c_str());
@@ -120,7 +128,9 @@ int OmegaSearcher::load(IndexStorage::Pointer container,
         }
       }
     } else {
-      LOG_WARN("OMEGA enabled but cannot derive omega_model path from index storage, falling back to HNSW");
+      LOG_WARN(
+          "OMEGA enabled but cannot derive omega_model path from index "
+          "storage, falling back to HNSW");
     }
   } else {
     if (omega_enabled_) {
@@ -184,8 +194,7 @@ IndexSearcher::Context::Pointer OmegaSearcher::create_context() const {
 }
 
 int OmegaSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
-                               uint32_t count,
-                               ContextPointer &context) const {
+                               uint32_t count, ContextPointer &context) const {
   // If OMEGA mode is not active, delegate to parent HNSW
   if (!should_use_omega()) {
     return HnswSearcher::search_impl(query, qmeta, count, context);
@@ -195,11 +204,11 @@ int OmegaSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
   return adaptive_search(query, qmeta, count, context);
 }
 
-int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmeta,
-                                   uint32_t count,
+int OmegaSearcher::adaptive_search(const void *query,
+                                   const IndexQueryMeta &qmeta, uint32_t count,
                                    ContextPointer &context) const {
   // Cast context to OmegaContext to access OMEGA-specific features
-  auto *omega_ctx = dynamic_cast<OmegaContext*>(context.get());
+  auto *omega_ctx = dynamic_cast<OmegaContext *>(context.get());
   if (omega_ctx == nullptr) {
     LOG_ERROR("Context is not OmegaContext");
     return IndexError_InvalidArgument;
@@ -226,7 +235,8 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
     LOG_WARN("Failed to create OMEGA search context, falling back to HNSW");
     return HnswSearcher::search_impl(query, qmeta, count, context);
   }
-  omega::SearchContext* omega_search_ctx = omega_search_get_cpp_context(omega_search);
+  omega::SearchContext *omega_search_ctx =
+      omega_search_get_cpp_context(omega_search);
   if (omega_search_ctx == nullptr) {
     omega_search_destroy(omega_search);
     LOG_WARN("Failed to get OMEGA search context, falling back to HNSW");
@@ -268,8 +278,8 @@ int OmegaSearcher::adaptive_search(const void *query, const IndexQueryMeta &qmet
   // Get final statistics
   int hops, cmps, collected_gt;
   omega_search_ctx->GetStats(&hops, &cmps, &collected_gt);
-  LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu",
-            cmps, hops, omega_ctx->topk_heap().size());
+  LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu", cmps, hops,
+            omega_ctx->topk_heap().size());
 
   // Cleanup
   omega_search_destroy(omega_search);
diff --git a/src/core/algorithm/omega/omega_searcher.h b/src/core/algorithm/omega/omega_searcher.h
index 0b3c2ee97..29cbb93cf 100644
--- a/src/core/algorithm/omega/omega_searcher.h
+++ b/src/core/algorithm/omega/omega_searcher.h
@@ -13,9 +13,9 @@
 // limitations under the License.
 #pragma once
 
+#include <omega/omega_api.h>
 #include <zvec/core/framework/index_framework.h>
 #include "../hnsw/hnsw_searcher.h"
-#include <omega/omega_api.h>
 
 namespace zvec {
 namespace core {
@@ -32,7 +32,7 @@ class OmegaSearcher : public HnswSearcher {
   OmegaSearcher(void);
   ~OmegaSearcher(void);
 
- OmegaSearcher(const OmegaSearcher &) = delete;
+  OmegaSearcher(const OmegaSearcher &) = delete;
   OmegaSearcher &operator=(const OmegaSearcher &) = delete;
 
  protected:
diff --git a/src/core/algorithm/omega/omega_streamer.cc b/src/core/algorithm/omega/omega_streamer.cc
index aad20972d..9d2ad6087 100644
--- a/src/core/algorithm/omega/omega_streamer.cc
+++ b/src/core/algorithm/omega/omega_streamer.cc
@@ -13,17 +13,17 @@
 // limitations under the License.
 
 #include "omega_streamer.h"
+#include <omega/omega_api.h>
+#include <omega/search_context.h>
+#include <zvec/ailego/io/file.h>
 #include <zvec/core/framework/index_factory.h>
 #include <zvec/core/framework/index_helper.h>
 #include <zvec/core/framework/index_logger.h>
-#include <zvec/ailego/io/file.h>
-#include "omega_hook_utils.h"
-#include "../hnsw/hnsw_entity.h"
-#include "../hnsw/hnsw_context.h"
 #include "omega_context.h"
+#include "omega_hook_utils.h"
 #include "omega_params.h"
-#include <omega/omega_api.h>
-#include <omega/search_context.h>
+#include "../hnsw/hnsw_context.h"
+#include "../hnsw/hnsw_entity.h"
 
 namespace zvec {
 namespace core {
@@ -35,7 +35,7 @@ struct OmegaHookSetup {
   HnswAlgorithm::SearchHooks hooks;
 };
 
-OmegaHookSetup CreateOmegaHookSetup(omega::SearchContext* omega_search_ctx,
+OmegaHookSetup CreateOmegaHookSetup(omega::SearchContext *omega_search_ctx,
                                     bool enable_early_stopping,
                                     bool per_cmp_reporting) {
   OmegaHookSetup setup;
@@ -51,10 +51,10 @@ OmegaHookSetup CreateOmegaHookSetup(omega::SearchContext* omega_search_ctx,
   return setup;
 }
 
-void EnableOmegaTrainingIfNeeded(OmegaSearchHandle omega_search, int query_id,
-                                 bool training_mode_enabled,
-                                 const std::vector<std::vector<uint64_t>>& training_ground_truth,
-                                 int training_k_train) {
+void EnableOmegaTrainingIfNeeded(
+    OmegaSearchHandle omega_search, int query_id, bool training_mode_enabled,
+    const std::vector<std::vector<uint64_t>> &training_ground_truth,
+    int training_k_train) {
   if (!training_mode_enabled) {
     return;
   }
@@ -62,7 +62,7 @@ void EnableOmegaTrainingIfNeeded(OmegaSearchHandle omega_search, int query_id,
   std::vector<int> gt_for_query;
   if (query_id >= 0 &&
       static_cast<size_t>(query_id) < training_ground_truth.size()) {
-    const auto& gt = training_ground_truth[query_id];
+    const auto &gt = training_ground_truth[query_id];
     gt_for_query.reserve(gt.size());
     for (uint64_t node_id : gt) {
       gt_for_query.push_back(static_cast<int>(node_id));
@@ -71,24 +71,24 @@ void EnableOmegaTrainingIfNeeded(OmegaSearchHandle omega_search, int query_id,
 
   omega_search_enable_training(omega_search, query_id, gt_for_query.data(),
                                gt_for_query.size(), training_k_train);
-  LOG_DEBUG("Training mode enabled for query_id=%d with %zu GT nodes",
-            query_id, gt_for_query.size());
+  LOG_DEBUG("Training mode enabled for query_id=%d with %zu GT nodes", query_id,
+            gt_for_query.size());
 }
 
 void CollectOmegaTrainingOutputs(OmegaSearchHandle omega_search,
-                                 OmegaContext* omega_ctx, int query_id) {
+                                 OmegaContext *omega_ctx, int query_id) {
   if (omega_ctx == nullptr) {
     return;
   }
 
   size_t record_count = omega_search_get_training_records_count(omega_search);
   if (record_count > 0) {
-    const void* records_ptr = omega_search_get_training_records(omega_search);
-    const auto* records_vec =
-        static_cast<const std::vector<omega::TrainingRecord>*>(records_ptr);
+    const void *records_ptr = omega_search_get_training_records(omega_search);
+    const auto *records_vec =
+        static_cast<const std::vector<omega::TrainingRecord> *>(records_ptr);
 
     for (size_t i = 0; i < record_count; ++i) {
-      const auto& omega_record = (*records_vec)[i];
+      const auto &omega_record = (*records_vec)[i];
       core_interface::TrainingRecord record;
       record.query_id = omega_record.query_id;
       record.hops_visited = omega_record.hops_visited;
@@ -115,14 +115,14 @@ void CollectOmegaTrainingOutputs(OmegaSearchHandle omega_search,
     return;
   }
 
-  const int* gt_cmps_ptr = omega_search_get_gt_cmps(omega_search);
+  const int *gt_cmps_ptr = omega_search_get_gt_cmps(omega_search);
   int total_cmps = omega_search_get_total_cmps(omega_search);
   if (gt_cmps_ptr == nullptr) {
     return;
   }
 
   std::vector<int> gt_cmps_vec(gt_cmps_ptr, gt_cmps_ptr + gt_cmps_count);
-  for (auto& v : gt_cmps_vec) {
+  for (auto &v : gt_cmps_vec) {
     if (v < 0) {
       v = total_cmps;
     }
@@ -132,7 +132,7 @@ void CollectOmegaTrainingOutputs(OmegaSearchHandle omega_search,
 
 }  // namespace
 
-bool OmegaStreamer::LoadModel(const std::string& model_dir) {
+bool OmegaStreamer::LoadModel(const std::string &model_dir) {
   std::lock_guard<std::mutex> lock(model_mutex_);
 
   if (omega_model_ != nullptr) {
@@ -179,28 +179,33 @@ int OmegaStreamer::open(IndexStorage::Pointer stg) {
   }
 
   if (index_path.empty()) {
-    LOG_WARN("OmegaStreamer open: storage file path is empty, using HNSW fallback");
+    LOG_WARN(
+        "OmegaStreamer open: storage file path is empty, using HNSW fallback");
     return 0;
   }
 
   size_t last_slash = index_path.rfind('/');
   if (last_slash == std::string::npos) {
-    LOG_WARN("OmegaStreamer open: cannot derive omega_model path from index path %s",
-             index_path.c_str());
+    LOG_WARN(
+        "OmegaStreamer open: cannot derive omega_model path from index path %s",
+        index_path.c_str());
     return 0;
   }
 
   std::string model_dir = index_path.substr(0, last_slash) + "/omega_model";
   std::string model_path = model_dir + "/model.txt";
   if (!ailego::File::IsExist(model_path)) {
-    LOG_INFO("OmegaStreamer open: no OMEGA model found at %s, using HNSW fallback",
-             model_dir.c_str());
+    LOG_INFO(
+        "OmegaStreamer open: no OMEGA model found at %s, using HNSW fallback",
+        model_dir.c_str());
     return 0;
   }
 
   if (!LoadModel(model_dir)) {
-    LOG_WARN("OmegaStreamer open: failed to load OMEGA model from %s, using HNSW fallback",
-             model_dir.c_str());
+    LOG_WARN(
+        "OmegaStreamer open: failed to load OMEGA model from %s, using HNSW "
+        "fallback",
+        model_dir.c_str());
   }
 
   return 0;
@@ -214,10 +219,10 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
 int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                                uint32_t count,
                                Context::Pointer &context) const {
-  // Determine mode: training (no early stopping) vs inference (with early stopping)
-  bool enable_early_stopping =
-      !training_mode_enabled_ && IsModelLoaded() &&
-      !DisableOmegaModelPrediction();
+  // Determine mode: training (no early stopping) vs inference (with early
+  // stopping)
+  bool enable_early_stopping = !training_mode_enabled_ && IsModelLoaded() &&
+                               !DisableOmegaModelPrediction();
 
   if (training_mode_enabled_) {
     LOG_DEBUG("OmegaStreamer: training mode, early stopping DISABLED");
@@ -230,13 +235,14 @@ int OmegaStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   return omega_search_impl(query, qmeta, count, context, enable_early_stopping);
 }
 
-int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qmeta,
+int OmegaStreamer::omega_search_impl(const void *query,
+                                     const IndexQueryMeta &qmeta,
                                      uint32_t count, Context::Pointer &context,
                                      bool enable_early_stopping) const {
   (void)qmeta;
 
   // Cast context to OmegaContext to access training_query_id
-  auto *omega_ctx = dynamic_cast<OmegaContext*>(context.get());
+  auto *omega_ctx = dynamic_cast<OmegaContext *>(context.get());
   int query_id = current_query_id_;  // Default to member variable
   if (omega_ctx != nullptr && omega_ctx->training_query_id() >= 0) {
     query_id = omega_ctx->training_query_id();
@@ -249,7 +255,7 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   }
 
   // Cast context to HnswContext to access HNSW-specific features
-  auto *hnsw_ctx = dynamic_cast<HnswContext*>(context.get());
+  auto *hnsw_ctx = dynamic_cast<HnswContext *>(context.get());
   if (hnsw_ctx == nullptr) {
     LOG_ERROR("Context is not HnswContext");
     return IndexError_InvalidArgument;
@@ -264,7 +270,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
 
   // In training mode: model=nullptr (collect features only)
   // In inference mode: model=omega_model_ (use for early stopping)
-  OmegaModelHandle model_to_use = enable_early_stopping ? omega_model_ : nullptr;
+  OmegaModelHandle model_to_use =
+      enable_early_stopping ? omega_model_ : nullptr;
 
   OmegaSearchHandle omega_search = omega_search_create_with_params(
       model_to_use, target_recall, omega_topk, window_size_);
@@ -273,7 +280,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
     LOG_ERROR("Failed to create OMEGA search context");
     return IndexError_Runtime;
   }
-  omega::SearchContext* omega_search_ctx = omega_search_get_cpp_context(omega_search);
+  omega::SearchContext *omega_search_ctx =
+      omega_search_get_cpp_context(omega_search);
   if (omega_search_ctx == nullptr) {
     omega_search_destroy(omega_search);
     LOG_ERROR("Failed to get OMEGA search context");
@@ -302,9 +310,8 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   hnsw_ctx->resize_results(count);
   hnsw_ctx->check_need_adjuct_ctx(entity_.doc_cnt());
   hnsw_ctx->reset_query(query);
-  OmegaHookSetup hook_setup =
-      CreateOmegaHookSetup(omega_search_ctx, enable_early_stopping,
-                           training_mode_enabled_);
+  OmegaHookSetup hook_setup = CreateOmegaHookSetup(
+      omega_search_ctx, enable_early_stopping, training_mode_enabled_);
   bool early_stop_hit = false;
   int ret =
       alg_->search_with_hooks(hnsw_ctx, &hook_setup.hooks, &early_stop_hit);
@@ -317,9 +324,10 @@ int OmegaStreamer::omega_search_impl(const void *query, const IndexQueryMeta &qm
   int hops = 0;
   int cmps = 0;
   omega_search_ctx->GetStats(&hops, &cmps, nullptr);
-  LOG_DEBUG("OMEGA search completed: cmps=%d, hops=%d, results=%zu, early_stop=%d",
-            cmps, hops, hnsw_ctx->topk_heap().size(),
-            (early_stop_hit || omega_search_ctx->EarlyStopHit()) ? 1 : 0);
+  LOG_DEBUG(
+      "OMEGA search completed: cmps=%d, hops=%d, results=%zu, early_stop=%d",
+      cmps, hops, hnsw_ctx->topk_heap().size(),
+      (early_stop_hit || omega_search_ctx->EarlyStopHit()) ? 1 : 0);
 
   // Match HNSW timing semantics: result materialization is outside the
   // search-core timer and happens after logging.
@@ -380,7 +388,7 @@ int OmegaStreamer::dump(const IndexDumper::Pointer &dumper) {
   // Persist the OMEGA searcher params alongside the dumped index metadata so a
   // reopened index reconstructs the same searcher-side behavior.
   ailego::Params searcher_params;
-  const auto& streamer_params = meta_.streamer_params();
+  const auto &streamer_params = meta_.streamer_params();
 
   // Copy the omega.* params needed by OmegaSearcher::init().
   if (streamer_params.has("omega.enabled")) {
@@ -388,22 +396,27 @@ int OmegaStreamer::dump(const IndexDumper::Pointer &dumper) {
                            streamer_params.get_as_bool("omega.enabled"));
   }
   if (streamer_params.has("omega.min_vector_threshold")) {
-    searcher_params.insert("omega.min_vector_threshold",
-                           streamer_params.get_as_uint32("omega.min_vector_threshold"));
+    searcher_params.insert(
+        "omega.min_vector_threshold",
+        streamer_params.get_as_uint32("omega.min_vector_threshold"));
   }
   if (streamer_params.has("omega.window_size")) {
     searcher_params.insert("omega.window_size",
                            streamer_params.get_as_int32("omega.window_size"));
   }
 
-  LOG_INFO("OmegaStreamer::dump: passing omega params to searcher "
-           "(enabled=%d, min_threshold=%u, window_size=%d)",
-           searcher_params.has("omega.enabled") ?
-               searcher_params.get_as_bool("omega.enabled") : false,
-           searcher_params.has("omega.min_vector_threshold") ?
-               searcher_params.get_as_uint32("omega.min_vector_threshold") : 0,
-           searcher_params.has("omega.window_size") ?
-               searcher_params.get_as_int32("omega.window_size") : 0);
+  LOG_INFO(
+      "OmegaStreamer::dump: passing omega params to searcher "
+      "(enabled=%d, min_threshold=%u, window_size=%d)",
+      searcher_params.has("omega.enabled")
+          ? searcher_params.get_as_bool("omega.enabled")
+          : false,
+      searcher_params.has("omega.min_vector_threshold")
+          ? searcher_params.get_as_uint32("omega.min_vector_threshold")
+          : 0,
+      searcher_params.has("omega.window_size")
+          ? searcher_params.get_as_int32("omega.window_size")
+          : 0);
 
   meta_.set_searcher("OmegaSearcher", HnswEntity::kRevision, searcher_params);
 
diff --git a/src/core/algorithm/omega/omega_streamer.h b/src/core/algorithm/omega/omega_streamer.h
index 31534f35b..4c4775b0c 100644
--- a/src/core/algorithm/omega/omega_streamer.h
+++ b/src/core/algorithm/omega/omega_streamer.h
@@ -13,12 +13,12 @@
 // limitations under the License.
 #pragma once
 
-#include "../hnsw/hnsw_streamer.h"
-#include "omega_context.h"
-#include <zvec/core/interface/training.h>
-#include <omega/omega_api.h>
-#include <mutex>
 #include <memory>
+#include <mutex>
+#include <omega/omega_api.h>
+#include <zvec/core/interface/training.h>
+#include "omega_context.h"
+#include "../hnsw/hnsw_streamer.h"
 
 namespace zvec {
 namespace core {
@@ -48,10 +48,14 @@ class OmegaStreamer : public HnswStreamer {
   OmegaStreamer &operator=(const OmegaStreamer &streamer) = delete;
 
   // Training-mode configuration forwarded into per-search contexts.
-  void EnableTrainingMode(bool enable) { training_mode_enabled_ = enable; }
-  void SetCurrentQueryId(int query_id) { current_query_id_ = query_id; }
-  void SetTrainingGroundTruth(const std::vector<std::vector<uint64_t>>& ground_truth,
-                               int k_train = 1) {
+  void EnableTrainingMode(bool enable) {
+    training_mode_enabled_ = enable;
+  }
+  void SetCurrentQueryId(int query_id) {
+    current_query_id_ = query_id;
+  }
+  void SetTrainingGroundTruth(
+      const std::vector<std::vector<uint64_t>> &ground_truth, int k_train = 1) {
     training_ground_truth_ = ground_truth;
     training_k_train_ = k_train;
   }
@@ -87,7 +91,7 @@ class OmegaStreamer : public HnswStreamer {
 
  private:
   // Search-mode configuration shared across searches for this streamer.
-  bool LoadModel(const std::string& model_dir);
+  bool LoadModel(const std::string &model_dir);
   bool IsModelLoaded() const;
 
   // Perform OMEGA adaptive search (shared between training and inference mode)
diff --git a/src/core/interface/index.cc b/src/core/interface/index.cc
index 9a440d548..f21e202f7 100644
--- a/src/core/interface/index.cc
+++ b/src/core/interface/index.cc
@@ -16,8 +16,8 @@
 #include <zvec/core/framework/index_error.h>
 #include <zvec/core/framework/index_storage.h>
 #include <zvec/core/interface/index.h>
-#include "../mixed_reducer/mixed_streamer_reducer.h"
 #include "../mixed_reducer/mixed_reducer_params.h"
+#include "../mixed_reducer/mixed_streamer_reducer.h"
 
 namespace zvec::core_interface {
 
@@ -815,7 +815,8 @@ int Index::Merge(const std::vector<Index::Pointer> &indexes,
 
 
   // Set storage and file path for dump/reload operations
-  auto* mixed_reducer = dynamic_cast<core::MixedStreamerReducer*>(reducer.get());
+  auto *mixed_reducer =
+      dynamic_cast<core::MixedStreamerReducer *>(reducer.get());
   if (mixed_reducer != nullptr) {
     mixed_reducer->set_storage(storage_, file_path_);
   }
@@ -842,9 +843,11 @@ int Index::Merge(const std::vector<Index::Pointer> &indexes,
 
   // Generic training support: Check if this index supports training capability
   // The actual training orchestration happens at the db layer (Segment level)
-  auto* training_capable = this->GetTrainingCapability();
+  auto *training_capable = this->GetTrainingCapability();
   if (training_capable != nullptr) {
-    LOG_INFO("Index merge completed for trainable index, training can now be performed");
+    LOG_INFO(
+        "Index merge completed for trainable index, training can now be "
+        "performed");
   }
 
   return 0;
diff --git a/src/core/interface/index_factory.cc b/src/core/interface/index_factory.cc
index 32e2c569e..b4d0893f3 100644
--- a/src/core/interface/index_factory.cc
+++ b/src/core/interface/index_factory.cc
@@ -116,25 +116,23 @@ BaseIndexParam::Pointer IndexFactory::DeserializeIndexParamFromJson(
         return true;
       };
 
-      if (!extract_enum_from_json<MetricType>(json_obj, "metric_type",
-                                              param->metric_type,
-                                              tmp_json_value) ||
+      if (!extract_enum_from_json<MetricType>(
+              json_obj, "metric_type", param->metric_type, tmp_json_value) ||
           !extract_enum_from_json<DataType>(json_obj, "data_type",
-                                           param->data_type,
-                                           tmp_json_value) ||
+                                            param->data_type, tmp_json_value) ||
           !extract_value_from_json(json_obj, "dimension", param->dimension,
-                                  tmp_json_value) ||
+                                   tmp_json_value) ||
           !extract_value_from_json(json_obj, "version", param->version,
-                                  tmp_json_value) ||
+                                   tmp_json_value) ||
           !extract_value_from_json(json_obj, "is_sparse", param->is_sparse,
-                                  tmp_json_value) ||
+                                   tmp_json_value) ||
           !extract_value_from_json(json_obj, "use_id_map", param->use_id_map,
-                                  tmp_json_value) ||
+                                   tmp_json_value) ||
           !extract_value_from_json(json_obj, "is_huge_page",
-                                  param->is_huge_page, tmp_json_value) ||
+                                   param->is_huge_page, tmp_json_value) ||
           !extract_value_from_json(json_obj, "m", param->m, tmp_json_value) ||
           !extract_value_from_json(json_obj, "ef_construction",
-                                  param->ef_construction, tmp_json_value) ||
+                                   param->ef_construction, tmp_json_value) ||
           !deserialize_quantizer(json_obj)) {
         LOG_ERROR("Failed to deserialize omega index param");
         return nullptr;
diff --git a/src/core/interface/indexes/hnsw_index.cc b/src/core/interface/indexes/hnsw_index.cc
index 53344e30e..76a33fe72 100644
--- a/src/core/interface/indexes/hnsw_index.cc
+++ b/src/core/interface/indexes/hnsw_index.cc
@@ -15,9 +15,9 @@
 #include <memory>
 #include <string>
 #include <zvec/core/interface/index.h>
-#include "algorithm/omega/omega_params.h"
 #include "algorithm/hnsw/hnsw_params.h"
 #include "algorithm/hnsw_sparse/hnsw_sparse_params.h"
+#include "algorithm/omega/omega_params.h"
 
 namespace zvec::core_interface {
 
diff --git a/src/core/interface/indexes/omega_index.cc b/src/core/interface/indexes/omega_index.cc
index b6e590b7d..3ec172065 100644
--- a/src/core/interface/indexes/omega_index.cc
+++ b/src/core/interface/indexes/omega_index.cc
@@ -12,12 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <zvec/core/framework/index_factory.h>
 #include <zvec/core/interface/index.h>
-#include "algorithm/omega/omega_streamer.h"
-#include "algorithm/omega/omega_params.h"
 #include "algorithm/hnsw/hnsw_params.h"
+#include "algorithm/omega/omega_params.h"
+#include "algorithm/omega/omega_streamer.h"
 #include "omega_training_session.h"
-#include <zvec/core/framework/index_factory.h>
 
 namespace zvec::core_interface {
 
@@ -26,7 +26,6 @@ namespace zvec::core_interface {
 // for creating the correct streamer and injecting OMEGA query params into the
 // search context. It does not own the adaptive-search algorithm itself.
 int OmegaIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
-
   // Reuse HNSWIndex setup so the HNSW-compatible on-disk/index metadata is
   // initialized consistently before swapping in the OMEGA-aware streamer.
   int ret = HNSWIndex::CreateAndInitStreamer(param);
@@ -45,8 +44,7 @@ int OmegaIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
     return core::IndexError_Runtime;
   }
 
-  if (ailego_unlikely(
-          streamer_->init(saved_meta, saved_params) != 0)) {
+  if (ailego_unlikely(streamer_->init(saved_meta, saved_params) != 0)) {
     LOG_ERROR("Failed to init OmegaStreamer");
     return core::IndexError_Runtime;
   }
@@ -60,8 +58,8 @@ int OmegaIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
 
 
 ITrainingSession::Pointer OmegaIndex::CreateTrainingSession() {
-  if (auto* omega_streamer =
-          streamer_ ? dynamic_cast<core::OmegaStreamer*>(streamer_.get())
+  if (auto *omega_streamer =
+          streamer_ ? dynamic_cast<core::OmegaStreamer *>(streamer_.get())
                     : nullptr) {
     return std::make_shared<OmegaTrainingSession>(omega_streamer);
   }
diff --git a/src/core/interface/indexes/omega_training_session.cc b/src/core/interface/indexes/omega_training_session.cc
index f7976e1c5..f2ec7297f 100644
--- a/src/core/interface/indexes/omega_training_session.cc
+++ b/src/core/interface/indexes/omega_training_session.cc
@@ -13,12 +13,11 @@
 // limitations under the License.
 
 #include "omega_training_session.h"
-
 #include "algorithm/omega/omega_streamer.h"
 
 namespace zvec::core_interface {
 
-zvec::Status OmegaTrainingSession::Start(const TrainingSessionConfig& config) {
+zvec::Status OmegaTrainingSession::Start(const TrainingSessionConfig &config) {
   std::lock_guard<std::mutex> lock(mutex_);
   if (streamer_ == nullptr) {
     return zvec::Status::InvalidArgument("Omega streamer is not available");
@@ -39,7 +38,8 @@ void OmegaTrainingSession::BeginQuery(int query_id) {
   }
 }
 
-void OmegaTrainingSession::CollectQueryArtifacts(QueryTrainingArtifacts&& artifacts) {
+void OmegaTrainingSession::CollectQueryArtifacts(
+    QueryTrainingArtifacts &&artifacts) {
   std::lock_guard<std::mutex> lock(mutex_);
   if (!artifacts.records.empty()) {
     records_.insert(records_.end(),
@@ -65,7 +65,7 @@ TrainingArtifacts OmegaTrainingSession::ConsumeArtifacts() {
     }
     size_t topk = topk_;
     if (topk == 0) {
-      for (const auto& entry : gt_cmps_map_) {
+      for (const auto &entry : gt_cmps_map_) {
         if (!entry.second.first.empty()) {
           topk = entry.second.first.size();
           break;
@@ -80,14 +80,15 @@ TrainingArtifacts OmegaTrainingSession::ConsumeArtifacts() {
     for (size_t q = 0; q < num_queries; ++q) {
       artifacts.gt_cmps_data.gt_cmps[q].resize(topk, 0);
     }
-    for (const auto& entry : gt_cmps_map_) {
+    for (const auto &entry : gt_cmps_map_) {
       size_t query_id = static_cast<size_t>(entry.first);
       if (query_id >= num_queries) {
         continue;
       }
-      const auto& [gt_cmps_per_rank, total_cmps] = entry.second;
+      const auto &[gt_cmps_per_rank, total_cmps] = entry.second;
       artifacts.gt_cmps_data.total_cmps[query_id] = total_cmps;
-      for (size_t rank = 0; rank < gt_cmps_per_rank.size() && rank < topk; ++rank) {
+      for (size_t rank = 0; rank < gt_cmps_per_rank.size() && rank < topk;
+           ++rank) {
         artifacts.gt_cmps_data.gt_cmps[query_id][rank] = gt_cmps_per_rank[rank];
       }
     }
diff --git a/src/core/interface/indexes/omega_training_session.h b/src/core/interface/indexes/omega_training_session.h
index c4041488e..8915ad551 100644
--- a/src/core/interface/indexes/omega_training_session.h
+++ b/src/core/interface/indexes/omega_training_session.h
@@ -27,14 +27,14 @@ namespace core_interface {
 
 class OmegaTrainingSession : public ITrainingSession {
  public:
-  explicit OmegaTrainingSession(core::OmegaStreamer* streamer)
+  explicit OmegaTrainingSession(core::OmegaStreamer *streamer)
       : streamer_(streamer) {}
 
-  zvec::Status Start(const TrainingSessionConfig& config) override;
+  zvec::Status Start(const TrainingSessionConfig &config) override;
 
   void BeginQuery(int query_id) override;
 
-  void CollectQueryArtifacts(QueryTrainingArtifacts&& artifacts) override;
+  void CollectQueryArtifacts(QueryTrainingArtifacts &&artifacts) override;
 
   TrainingArtifacts ConsumeArtifacts() override;
 
@@ -43,7 +43,7 @@ class OmegaTrainingSession : public ITrainingSession {
  private:
   void ResetArtifactsLocked();
 
-  core::OmegaStreamer* streamer_{nullptr};
+  core::OmegaStreamer *streamer_{nullptr};
   std::mutex mutex_;
   size_t topk_{0};
   size_t num_queries_{0};
diff --git a/src/core/mixed_reducer/mixed_streamer_reducer.cc b/src/core/mixed_reducer/mixed_streamer_reducer.cc
index 46757b168..1f2b58ea4 100644
--- a/src/core/mixed_reducer/mixed_streamer_reducer.cc
+++ b/src/core/mixed_reducer/mixed_streamer_reducer.cc
@@ -230,7 +230,8 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
     // Initialize the dumper with the file path
     int ret = dumper->create(target_file_path_);
     if (ret != 0) {
-      LOG_ERROR("Failed to create dumper at path=%s, ret=%d", target_file_path_.c_str(), ret);
+      LOG_ERROR("Failed to create dumper at path=%s, ret=%d",
+                target_file_path_.c_str(), ret);
       return ret;
     }
 
@@ -249,9 +250,10 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
     }
 
 
-    // NOTE: We cannot safely reload the streamer here (close/open causes crashes).
-    // The streamer will properly load data when the collection is reopened.
-    // For now, auto-training will need to handle the case where streamer doc_count=0.
+    // NOTE: We cannot safely reload the streamer here (close/open causes
+    // crashes). The streamer will properly load data when the collection is
+    // reopened. For now, auto-training will need to handle the case where
+    // streamer doc_count=0.
   } else {
   }
 
diff --git a/src/core/mixed_reducer/mixed_streamer_reducer.h b/src/core/mixed_reducer/mixed_streamer_reducer.h
index 0a203cfd0..aba9e5c14 100644
--- a/src/core/mixed_reducer/mixed_streamer_reducer.h
+++ b/src/core/mixed_reducer/mixed_streamer_reducer.h
@@ -53,7 +53,8 @@ class MixedStreamerReducer : public IndexStreamerReducer {
       const IndexQueryMeta &original_query_meta) override;
 
   // Set the storage and file path for dump/reload operations
-  void set_storage(const IndexStorage::Pointer &storage, const std::string &file_path) {
+  void set_storage(const IndexStorage::Pointer &storage,
+                   const std::string &file_path) {
     target_storage_ = storage;
     target_file_path_ = file_path;
   }
diff --git a/src/core/utility/buffer_storage.cc b/src/core/utility/buffer_storage.cc
index 8025cd729..ba82bc764 100644
--- a/src/core/utility/buffer_storage.cc
+++ b/src/core/utility/buffer_storage.cc
@@ -421,6 +421,7 @@ class BufferStorage : public IndexStorage {
   uint32_t get_context_offset() {
     return header_.content_offset;
   }
+
  protected:
   //! Initialize index version segment
   int init_version_segment(void) {
diff --git a/src/core/utility/rdtsc_timer.cc b/src/core/utility/rdtsc_timer.cc
index 9a69a59d5..d4e54c8a8 100644
--- a/src/core/utility/rdtsc_timer.cc
+++ b/src/core/utility/rdtsc_timer.cc
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "utility/rdtsc_timer.h"
-
 #include <ctime>
 
 namespace zvec {
@@ -24,10 +23,7 @@ RdtscTimer::tick_t RdtscTimer::Now() {
   uint32_t lo = 0;
   uint32_t hi = 0;
   uint32_t aux = 0;
-  __asm__ __volatile__("rdtscp"
-                       : "=a"(lo), "=d"(hi), "=c"(aux)
-                       :
-                       :);
+  __asm__ __volatile__("rdtscp" : "=a"(lo), "=d"(hi), "=c"(aux) : :);
   return (static_cast<uint64_t>(hi) << 32) | lo;
 #else
   return MonotonicRawNs();
@@ -39,8 +35,7 @@ uint64_t RdtscTimer::ElapsedNs(tick_t start, tick_t end) {
   if (end <= start) {
     return 0;
   }
-  return static_cast<uint64_t>(
-      static_cast<double>(end - start) * NsPerTick());
+  return static_cast<uint64_t>(static_cast<double>(end - start) * NsPerTick());
 #else
   return end > start ? (end - start) : 0;
 #endif
diff --git a/src/db/collection.cc b/src/db/collection.cc
index bcb56b5e1..98162c83f 100644
--- a/src/db/collection.cc
+++ b/src/db/collection.cc
@@ -824,9 +824,10 @@ Status CollectionImpl::Optimize(const OptimizeOptions &options) {
   }
 
   if (options.retrain_only_) {
-    LOG_WARN("Optimize running in OMEGA retrain-only mode on %zu persisted segments",
-             persist_segments.size());
-    for (auto& segment : persist_segments) {
+    LOG_WARN(
+        "Optimize running in OMEGA retrain-only mode on %zu persisted segments",
+        persist_segments.size());
+    for (auto &segment : persist_segments) {
       auto s = segment->retrain_omega_model();
       CHECK_RETURN_STATUS(s);
     }
@@ -834,7 +835,8 @@ Status CollectionImpl::Optimize(const OptimizeOptions &options) {
   }
 
   // Step 1: Build vector indexes if not ready
-  // This ensures indexes are built even for single segments that won't be compacted
+  // This ensures indexes are built even for single segments that won't be
+  // compacted
   std::vector<SegmentTask::Ptr> index_build_tasks;
   for (auto &segment : persist_segments) {
     if (!segment->all_vector_index_ready()) {
@@ -845,7 +847,8 @@ Status CollectionImpl::Optimize(const OptimizeOptions &options) {
   }
 
   if (!index_build_tasks.empty()) {
-    LOG_INFO("Building vector indexes for %zu segments", index_build_tasks.size());
+    LOG_INFO("Building vector indexes for %zu segments",
+             index_build_tasks.size());
     auto s = execute_tasks(index_build_tasks);
     CHECK_RETURN_STATUS(s);
 
diff --git a/src/db/index/column/vector_column/engine_helper.hpp b/src/db/index/column/vector_column/engine_helper.hpp
index fa23c1321..09063b32b 100644
--- a/src/db/index/column/vector_column/engine_helper.hpp
+++ b/src/db/index/column/vector_column/engine_helper.hpp
@@ -162,7 +162,8 @@ class ProximaEngineHelper {
           auto db_hnsw_query_params = dynamic_cast<const HnswQueryParams *>(
               query_params.query_params.get());
           hnsw_query_param->ef_search = db_hnsw_query_params->ef();
-          hnsw_query_param->training_query_id = db_hnsw_query_params->training_query_id();
+          hnsw_query_param->training_query_id =
+              db_hnsw_query_params->training_query_id();
         }
         return std::move(hnsw_query_param);
       }
@@ -178,11 +179,14 @@ class ProximaEngineHelper {
         }
         auto &omega_query_param = omega_query_param_result.value();
         if (query_params.query_params) {
-          if (auto* db_omega_query_params = dynamic_cast<const OmegaQueryParams *>(
-                  query_params.query_params.get())) {
+          if (auto *db_omega_query_params =
+                  dynamic_cast<const OmegaQueryParams *>(
+                      query_params.query_params.get())) {
             omega_query_param->ef_search = db_omega_query_params->ef();
-            omega_query_param->target_recall = db_omega_query_params->target_recall();
-            omega_query_param->training_query_id = db_omega_query_params->training_query_id();
+            omega_query_param->target_recall =
+                db_omega_query_params->target_recall();
+            omega_query_param->training_query_id =
+                db_omega_query_params->training_query_id();
           }
         }
         return std::move(omega_query_param);
diff --git a/src/db/index/column/vector_column/vector_column_indexer.cc b/src/db/index/column/vector_column/vector_column_indexer.cc
index f0cac5e83..7d284a262 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.cc
+++ b/src/db/index/column/vector_column/vector_column_indexer.cc
@@ -208,8 +208,10 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
 
   if (training_session != nullptr) {
     LOG_INFO(
-        "VectorColumnIndexer training search: query_id=%d records=%zu gt_cmps=%zu total_cmps=%d",
-        search_result.training_query_id_, search_result.training_records_.size(),
+        "VectorColumnIndexer training search: query_id=%d records=%zu "
+        "gt_cmps=%zu total_cmps=%d",
+        search_result.training_query_id_,
+        search_result.training_records_.size(),
         search_result.gt_cmps_per_rank_.size(), search_result.total_cmps_);
   }
 
@@ -229,7 +231,8 @@ Result<IndexResults::Ptr> VectorColumnIndexer::Search(
   return result;
 }
 
-core_interface::ITrainingCapable* VectorColumnIndexer::GetTrainingCapability() const {
+core_interface::ITrainingCapable *VectorColumnIndexer::GetTrainingCapability()
+    const {
   if (index != nullptr) {
     return index->GetTrainingCapability();
   }
@@ -239,7 +242,7 @@ core_interface::ITrainingCapable* VectorColumnIndexer::GetTrainingCapability() c
 core_interface::ITrainingSession::Pointer
 VectorColumnIndexer::CreateTrainingSession() const {
   if (index != nullptr) {
-    if (auto* training_capable = index->GetTrainingCapability()) {
+    if (auto *training_capable = index->GetTrainingCapability()) {
       return training_capable->CreateTrainingSession();
     }
   }
@@ -247,7 +250,7 @@ VectorColumnIndexer::CreateTrainingSession() const {
 }
 
 void VectorColumnIndexer::SetTrainingSession(
-    const core_interface::ITrainingSession::Pointer& session) {
+    const core_interface::ITrainingSession::Pointer &session) {
   std::lock_guard<std::mutex> lock(training_mutex_);
   training_session_ = session;
 }
diff --git a/src/db/index/column/vector_column/vector_column_indexer.h b/src/db/index/column/vector_column/vector_column_indexer.h
index f2c7aa16e..9ec2ebab9 100644
--- a/src/db/index/column/vector_column/vector_column_indexer.h
+++ b/src/db/index/column/vector_column/vector_column_indexer.h
@@ -12,11 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
+#include <map>
+#include <mutex>
 #include <string>
 #include <utility>
 #include <variant>
-#include <mutex>
-#include <map>
 #include <ailego/parallel/lock.h>
 #include <zvec/ailego/pattern/expected.hpp>
 #include <zvec/ailego/utility/string_helper.h>
@@ -98,13 +98,15 @@ class VectorColumnIndexer {
   /**
    * @brief Check if the underlying index supports training capability.
    *
-   * @return Pointer to ITrainingCapable interface if supported, nullptr otherwise
+   * @return Pointer to ITrainingCapable interface if supported, nullptr
+   * otherwise
    */
-  core_interface::ITrainingCapable* GetTrainingCapability() const;
+  core_interface::ITrainingCapable *GetTrainingCapability() const;
 
   core_interface::ITrainingSession::Pointer CreateTrainingSession() const;
 
-  void SetTrainingSession(const core_interface::ITrainingSession::Pointer& session);
+  void SetTrainingSession(
+      const core_interface::ITrainingSession::Pointer &session);
 
   void ClearTrainingSession();
 
@@ -131,7 +133,8 @@ class VectorColumnIndexer {
   MetricType metric_type() const {
     auto index_params = field_schema_.index_params();
     if (index_params) {
-      auto vector_params = std::dynamic_pointer_cast<VectorIndexParams>(index_params);
+      auto vector_params =
+          std::dynamic_pointer_cast<VectorIndexParams>(index_params);
       if (vector_params) {
         return vector_params->metric_type();
       }
diff --git a/src/db/index/common/proto_converter.cc b/src/db/index/common/proto_converter.cc
index 6109d33b6..b6ac9c100 100644
--- a/src/db/index/common/proto_converter.cc
+++ b/src/db/index/common/proto_converter.cc
@@ -108,12 +108,9 @@ OmegaIndexParams::OPtr ProtoConverter::FromPb(
       MetricTypeCodeBook::Get(params_pb.base().metric_type()), params_pb.m(),
       params_pb.ef_construction(),
       QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()),
-      params_pb.min_vector_threshold(),
-      params_pb.num_training_queries(),
-      params_pb.ef_training(),
-      params_pb.window_size(),
-      params_pb.ef_groundtruth(),
-      params_pb.k_train());
+      params_pb.min_vector_threshold(), params_pb.num_training_queries(),
+      params_pb.ef_training(), params_pb.window_size(),
+      params_pb.ef_groundtruth(), params_pb.k_train());
 
   return params;
 }
diff --git a/src/db/index/common/proto_converter.h b/src/db/index/common/proto_converter.h
index ba95a53c4..f6859487d 100644
--- a/src/db/index/common/proto_converter.h
+++ b/src/db/index/common/proto_converter.h
@@ -39,7 +39,8 @@ struct ProtoConverter {
   static proto::IVFIndexParams ToPb(const IVFIndexParams *params);
 
   // OmegaIndexParams
-  static OmegaIndexParams::OPtr FromPb(const proto::OmegaIndexParams &params_pb);
+  static OmegaIndexParams::OPtr FromPb(
+      const proto::OmegaIndexParams &params_pb);
   static proto::OmegaIndexParams ToPb(const OmegaIndexParams *params);
 
   // InvertIndexParams
diff --git a/src/db/index/common/schema.cc b/src/db/index/common/schema.cc
index 3c980c2b1..c43b8964e 100644
--- a/src/db/index/common/schema.cc
+++ b/src/db/index/common/schema.cc
@@ -54,8 +54,8 @@ std::unordered_set<DataType> support_sparse_vector_type = {
 };
 
 std::unordered_set<IndexType> support_dense_vector_index = {
-    IndexType::FLAT, IndexType::HNSW, IndexType::HNSW_RABITQ,
-    IndexType::IVF, IndexType::OMEGA};
+    IndexType::FLAT, IndexType::HNSW, IndexType::HNSW_RABITQ, IndexType::IVF,
+    IndexType::OMEGA};
 
 std::unordered_set<IndexType> support_sparse_vector_index = {IndexType::FLAT,
                                                              IndexType::HNSW};
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index fbc59dfdb..00a2c56ed 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -59,6 +59,9 @@
 #include "db/index/storage/mmap_forward_store.h"
 #include "db/index/storage/store_helper.h"
 #include "db/index/storage/wal/wal_file.h"
+#include "db/training/omega_model_trainer.h"
+#include "db/training/omega_training_coordinator.h"
+#include "db/training/training_data_collector.h"
 #include "zvec/ailego/container/params.h"
 #include "zvec/core/framework/index_factory.h"
 #include "zvec/core/framework/index_meta.h"
@@ -66,15 +69,10 @@
 #include "zvec/core/framework/index_reformer.h"
 #include "column_merging_reader.h"
 #include "sql_expr_parser.h"
-#include "db/training/omega_training_coordinator.h"
-#include "db/training/training_data_collector.h"
-#include "db/training/omega_model_trainer.h"
 
 namespace zvec {
 
-namespace {
-
-}  // namespace
+namespace {}  // namespace
 
 void global_init() {
   static std::once_flag once;
@@ -294,8 +292,8 @@ class SegmentImpl : public Segment,
 
   // Auto-training for OMEGA index (called after Merge completes)
   Status auto_train_omega_index_internal(
-      const std::string& field_name,
-      const std::vector<VectorColumnIndexer::Ptr>& indexers);
+      const std::string &field_name,
+      const std::vector<VectorColumnIndexer::Ptr> &indexers);
 
   Status recover();
   Status open_wal_file();
@@ -1584,7 +1582,7 @@ Status SegmentImpl::create_all_vector_index(
   new_segment_meta->set_indexed_vector_fields(vector_field_names);
   *segment_meta = new_segment_meta;
 
-  for (const auto& field_name : vector_field_names) {
+  for (const auto &field_name : vector_field_names) {
   }
 
   // Note: OMEGA training is now performed in merge_vector_indexer() immediately
@@ -1621,7 +1619,7 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
   CHECK_RETURN_STATUS_EXPECTED(s);
 
   // Check if this is a trainable index (OMEGA)
-  auto* training_capable = vector_indexer->GetTrainingCapability();
+  auto *training_capable = vector_indexer->GetTrainingCapability();
   bool needs_training = false;
   std::string model_output_dir;
   OmegaTrainingParams omega_training_params;
@@ -1632,32 +1630,42 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
     size_t doc_count = vector_indexer->doc_count();
     if (doc_count >= omega_training_params.min_vector_threshold) {
       needs_training = true;
-      LOG_INFO("Trainable index detected after merge for field '%s' in segment %d (doc_count=%zu >= min_vector_threshold=%u)",
-               column.c_str(), id(), doc_count,
-               omega_training_params.min_vector_threshold);
+      LOG_INFO(
+          "Trainable index detected after merge for field '%s' in segment %d "
+          "(doc_count=%zu >= min_vector_threshold=%u)",
+          column.c_str(), id(), doc_count,
+          omega_training_params.min_vector_threshold);
     } else {
-      LOG_INFO("Skipping OMEGA training for field '%s': doc_count=%zu < min_vector_threshold=%u",
-               column.c_str(), doc_count,
-               omega_training_params.min_vector_threshold);
+      LOG_INFO(
+          "Skipping OMEGA training for field '%s': doc_count=%zu < "
+          "min_vector_threshold=%u",
+          column.c_str(), doc_count,
+          omega_training_params.min_vector_threshold);
     }
   }
 
-  // OPTIMIZATION: Collect training data BEFORE Flush() while the in-memory graph still exists.
-  // This avoids the expensive disk reload (~2 minutes for 1M vectors) that was previously needed.
-  // The model training itself doesn't need the graph, only the collected records.
+  // OPTIMIZATION: Collect training data BEFORE Flush() while the in-memory
+  // graph still exists. This avoids the expensive disk reload (~2 minutes for
+  // 1M vectors) that was previously needed. The model training itself doesn't
+  // need the graph, only the collected records.
   std::optional<TrainingDataCollectorResult> training_result_opt;
 
   if (needs_training) {
     // Compute model output directory
-    std::string segment_dir = index_file_path.substr(0, index_file_path.rfind('/'));
+    std::string segment_dir =
+        index_file_path.substr(0, index_file_path.rfind('/'));
     model_output_dir = segment_dir + "/omega_model";
 
-    LOG_INFO("Starting OMEGA training data collection for field '%s' (using in-memory graph before flush)", column.c_str());
-    LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d, k_train=%d",
-             omega_training_params.num_training_queries,
-             omega_training_params.ef_training,
-             omega_training_params.ef_groundtruth,
-             omega_training_params.k_train);
+    LOG_INFO(
+        "Starting OMEGA training data collection for field '%s' (using "
+        "in-memory graph before flush)",
+        column.c_str());
+    LOG_INFO(
+        "Using OMEGA index params: num_training_queries=%zu, ef_training=%d, "
+        "ef_groundtruth=%d, k_train=%d",
+        omega_training_params.num_training_queries,
+        omega_training_params.ef_training, omega_training_params.ef_groundtruth,
+        omega_training_params.k_train);
 
     auto training_result = CollectOmegaTrainingDataBeforeFlush(
         shared_from_this(), column, vector_indexer, omega_training_params,
@@ -1676,7 +1684,8 @@ Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
   s = vector_indexer->Flush();
   CHECK_RETURN_STATUS_EXPECTED(s);
 
-  // Train the model using the previously collected data (doesn't need the graph)
+  // Train the model using the previously collected data (doesn't need the
+  // graph)
   if (needs_training && training_result_opt.has_value()) {
     auto s_train = TrainOmegaModelAfterBuild(training_result_opt.value(),
                                              model_output_dir);
@@ -2404,8 +2413,8 @@ Status SegmentImpl::cleanup() {
 }
 
 Status SegmentImpl::auto_train_omega_index_internal(
-    const std::string& field_name,
-    const std::vector<VectorColumnIndexer::Ptr>& indexers) {
+    const std::string &field_name,
+    const std::vector<VectorColumnIndexer::Ptr> &indexers) {
   LOG_WARN("Starting auto-training for OMEGA index on field '%s' in segment %d",
            field_name.c_str(), id());
 
@@ -2413,49 +2422,57 @@ Status SegmentImpl::auto_train_omega_index_internal(
   auto field = collection_schema_->get_field(field_name);
   if (field && field->index_params()) {
     omega_training_params = ResolveOmegaTrainingParams(field->index_params());
-    LOG_INFO("Using OMEGA index params: num_training_queries=%zu, ef_training=%d, ef_groundtruth=%d, min_vector_threshold=%u, k_train=%d",
-             omega_training_params.num_training_queries,
-             omega_training_params.ef_training,
-             omega_training_params.ef_groundtruth,
-             omega_training_params.min_vector_threshold,
-             omega_training_params.k_train);
+    LOG_INFO(
+        "Using OMEGA index params: num_training_queries=%zu, ef_training=%d, "
+        "ef_groundtruth=%d, min_vector_threshold=%u, k_train=%d",
+        omega_training_params.num_training_queries,
+        omega_training_params.ef_training, omega_training_params.ef_groundtruth,
+        omega_training_params.min_vector_threshold,
+        omega_training_params.k_train);
   }
 
   // Check if we have enough vectors to justify training
   size_t total_doc_count = 0;
-  for (const auto& indexer : indexers) {
+  for (const auto &indexer : indexers) {
     total_doc_count += indexer->doc_count();
   }
 
   if (total_doc_count < omega_training_params.min_vector_threshold) {
-    LOG_INFO("Skipping OMEGA training for field '%s': doc_count=%zu < min_vector_threshold=%u",
-             field_name.c_str(), total_doc_count,
-             omega_training_params.min_vector_threshold);
+    LOG_INFO(
+        "Skipping OMEGA training for field '%s': doc_count=%zu < "
+        "min_vector_threshold=%u",
+        field_name.c_str(), total_doc_count,
+        omega_training_params.min_vector_threshold);
     return Status::OK();
   }
 
-  LOG_INFO("Proceeding with OMEGA training: doc_count=%zu >= min_vector_threshold=%u",
-           total_doc_count, omega_training_params.min_vector_threshold);
+  LOG_INFO(
+      "Proceeding with OMEGA training: doc_count=%zu >= "
+      "min_vector_threshold=%u",
+      total_doc_count, omega_training_params.min_vector_threshold);
 
   // Step 1: Collect training data using the provided indexers
-  LOG_WARN("OMEGA retrain step 1/2: start collecting training data for field '%s' in segment %d",
-           field_name.c_str(), id());
+  LOG_WARN(
+      "OMEGA retrain step 1/2: start collecting training data for field '%s' "
+      "in segment %d",
+      field_name.c_str(), id());
   const std::string model_output_dir =
       FileHelper::MakeSegmentPath(path_, id()) + "/omega_model";
-  auto training_records_result = CollectOmegaRetrainingData(
-      shared_from_this(), field_name, indexers, omega_training_params,
-      model_output_dir);
+  auto training_records_result =
+      CollectOmegaRetrainingData(shared_from_this(), field_name, indexers,
+                                 omega_training_params, model_output_dir);
 
   if (!training_records_result.has_value()) {
-    return Status::InternalError(
-        "Failed to collect training data: " +
-        training_records_result.error().message());
+    return Status::InternalError("Failed to collect training data: " +
+                                 training_records_result.error().message());
   }
 
-  LOG_WARN("OMEGA retrain step 1/2: finished collecting training data for field '%s' in segment %d",
-           field_name.c_str(), id());
+  LOG_WARN(
+      "OMEGA retrain step 1/2: finished collecting training data for field "
+      "'%s' in segment %d",
+      field_name.c_str(), id());
 
-  auto& training_result = training_records_result.value();
+  auto &training_result = training_records_result.value();
   LOG_INFO("Collected %zu training records for segment %d",
            training_result.records.size(), id());
 
@@ -2464,7 +2481,7 @@ Status SegmentImpl::auto_train_omega_index_internal(
 }
 
 Status SegmentImpl::retrain_omega_model() {
-  for (const auto& field : collection_schema_->vector_fields()) {
+  for (const auto &field : collection_schema_->vector_fields()) {
     if (!field->index_params()) {
       continue;
     }
@@ -2476,13 +2493,17 @@ Status SegmentImpl::retrain_omega_model() {
 
     auto indexers = get_vector_indexer(field->name());
     if (indexers.empty()) {
-      LOG_INFO("Skipping OMEGA retraining for field '%s' in segment %d: no vector indexers loaded",
-               field->name().c_str(), id());
+      LOG_INFO(
+          "Skipping OMEGA retraining for field '%s' in segment %d: no vector "
+          "indexers loaded",
+          field->name().c_str(), id());
       continue;
     }
 
-    LOG_WARN("Retraining OMEGA model for field '%s' in segment %d using existing index",
-             field->name().c_str(), id());
+    LOG_WARN(
+        "Retraining OMEGA model for field '%s' in segment %d using existing "
+        "index",
+        field->name().c_str(), id());
     auto s = auto_train_omega_index_internal(field->name(), indexers);
     CHECK_RETURN_STATUS(s);
   }
@@ -4161,10 +4182,8 @@ Status SegmentImpl::load_scalar_index_blocks(bool create) {
 }
 
 Status SegmentImpl::load_vector_index_blocks() {
-
   int block_index = 0;
   for (const auto &block : segment_meta_->persisted_blocks()) {
-
     if (block.type() == BlockType::VECTOR_INDEX ||
         block.type() == BlockType::VECTOR_INDEX_QUANTIZE) {
       // vector block only contained 1 column
@@ -4186,7 +4205,7 @@ Status SegmentImpl::load_vector_index_blocks() {
               MakeDefaultVectorIndexParams(vector_index_params->metric_type()));
         } else {
         }
-      } else{
+      } else {
         if (!segment_meta_->vector_indexed(column)) {
           new_field_params.set_index_params(MakeDefaultQuantVectorIndexParams(
               vector_index_params->metric_type(),
diff --git a/src/db/training/omega_model_trainer.cc b/src/db/training/omega_model_trainer.cc
index e9d20c542..176050deb 100644
--- a/src/db/training/omega_model_trainer.cc
+++ b/src/db/training/omega_model_trainer.cc
@@ -13,9 +13,9 @@
 // limitations under the License.
 
 #include "omega_model_trainer.h"
-#include <omega/omega_trainer.h>
 #include <algorithm>
 #include <chrono>
+#include <omega/omega_trainer.h>
 #include <zvec/ailego/logger/logger.h>
 
 namespace zvec {
@@ -23,7 +23,7 @@ namespace zvec {
 namespace {
 
 // Convert zvec TrainingRecord to omega TrainingRecord
-omega::TrainingRecord ConvertRecord(const core_interface::TrainingRecord& src) {
+omega::TrainingRecord ConvertRecord(const core_interface::TrainingRecord &src) {
   omega::TrainingRecord dst;
   dst.query_id = src.query_id;
   dst.hops_visited = src.hops_visited;
@@ -32,13 +32,13 @@ omega::TrainingRecord ConvertRecord(const core_interface::TrainingRecord& src) {
   dst.dist_start = src.dist_start;
   // Convert std::array<float, 7> to std::vector<float>
   dst.traversal_window_stats.assign(src.traversal_window_stats.begin(),
-                                     src.traversal_window_stats.end());
+                                    src.traversal_window_stats.end());
   dst.label = src.label;  // Already computed in real-time during search
   return dst;
 }
 
 // Convert zvec GtCmpsData to omega GtCmpsData
-omega::GtCmpsData ConvertGtCmpsData(const core_interface::GtCmpsData& src) {
+omega::GtCmpsData ConvertGtCmpsData(const core_interface::GtCmpsData &src) {
   omega::GtCmpsData dst;
   dst.num_queries = src.num_queries;
   dst.topk = src.topk;
@@ -50,9 +50,9 @@ omega::GtCmpsData ConvertGtCmpsData(const core_interface::GtCmpsData& src) {
 }  // namespace
 
 Status OmegaModelTrainer::TrainModelWithGtCmps(
-    const std::vector<core_interface::TrainingRecord>& training_records,
-    const core_interface::GtCmpsData& gt_cmps_data,
-    const OmegaModelTrainerOptions& options) {
+    const std::vector<core_interface::TrainingRecord> &training_records,
+    const core_interface::GtCmpsData &gt_cmps_data,
+    const OmegaModelTrainerOptions &options) {
   if (training_records.empty()) {
     return Status::InvalidArgument("Training records are empty");
   }
@@ -69,23 +69,23 @@ Status OmegaModelTrainer::TrainModelWithGtCmps(
   // Convert training records
   std::vector<omega::TrainingRecord> omega_records;
   omega_records.reserve(training_records.size());
-  for (const auto& r : training_records) {
+  for (const auto &r : training_records) {
     omega_records.push_back(ConvertRecord(r));
   }
-  std::sort(omega_records.begin(), omega_records.end(),
-            [](const omega::TrainingRecord& lhs,
-               const omega::TrainingRecord& rhs) {
-              if (lhs.query_id != rhs.query_id) {
-                return lhs.query_id < rhs.query_id;
-              }
-              if (lhs.cmps_visited != rhs.cmps_visited) {
-                return lhs.cmps_visited < rhs.cmps_visited;
-              }
-              if (lhs.hops_visited != rhs.hops_visited) {
-                return lhs.hops_visited < rhs.hops_visited;
-              }
-              return lhs.label < rhs.label;
-            });
+  std::sort(
+      omega_records.begin(), omega_records.end(),
+      [](const omega::TrainingRecord &lhs, const omega::TrainingRecord &rhs) {
+        if (lhs.query_id != rhs.query_id) {
+          return lhs.query_id < rhs.query_id;
+        }
+        if (lhs.cmps_visited != rhs.cmps_visited) {
+          return lhs.cmps_visited < rhs.cmps_visited;
+        }
+        if (lhs.hops_visited != rhs.hops_visited) {
+          return lhs.hops_visited < rhs.hops_visited;
+        }
+        return lhs.label < rhs.label;
+      });
 
   // Convert gt_cmps data
   omega::GtCmpsData omega_gt_cmps = ConvertGtCmpsData(gt_cmps_data);
@@ -103,18 +103,23 @@ Status OmegaModelTrainer::TrainModelWithGtCmps(
   trainer_options.topk = gt_cmps_data.topk > 0 ? gt_cmps_data.topk : 100;
 
   // Train model
-  int ret = omega::OmegaTrainer::TrainModel(omega_records, omega_gt_cmps, trainer_options);
+  int ret = omega::OmegaTrainer::TrainModel(omega_records, omega_gt_cmps,
+                                            trainer_options);
 
   auto total_end = std::chrono::high_resolution_clock::now();
-  auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count();
+  auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                      total_end - total_start)
+                      .count();
 
   if (ret != 0) {
     LOG_ERROR("OMEGA model training failed (return code: %d)", ret);
     return Status::InternalError("OMEGA model training failed");
   }
 
-  LOG_INFO("[TIMING] TrainModelWithGtCmps (C++ LightGBM) TOTAL: %ld ms", total_ms);
-  LOG_INFO("Successfully trained OMEGA model, output: %s", options.output_dir.c_str());
+  LOG_INFO("[TIMING] TrainModelWithGtCmps (C++ LightGBM) TOTAL: %ld ms",
+           total_ms);
+  LOG_INFO("Successfully trained OMEGA model, output: %s",
+           options.output_dir.c_str());
 
   return Status::OK();
 }
diff --git a/src/db/training/omega_model_trainer.h b/src/db/training/omega_model_trainer.h
index 7e7a726de..266e00b06 100644
--- a/src/db/training/omega_model_trainer.h
+++ b/src/db/training/omega_model_trainer.h
@@ -15,8 +15,8 @@
 #pragma once
 
 #include <algorithm>
-#include <thread>
 #include <string>
+#include <thread>
 #include <vector>
 #include <zvec/core/interface/training.h>
 #include <zvec/db/status.h>
@@ -70,9 +70,9 @@ class OmegaModelTrainer {
    * @return Status indicating success or failure
    */
   static Status TrainModelWithGtCmps(
-      const std::vector<core_interface::TrainingRecord>& training_records,
-      const core_interface::GtCmpsData& gt_cmps_data,
-      const OmegaModelTrainerOptions& options);
+      const std::vector<core_interface::TrainingRecord> &training_records,
+      const core_interface::GtCmpsData &gt_cmps_data,
+      const OmegaModelTrainerOptions &options);
 };
 
 }  // namespace zvec
diff --git a/src/db/training/omega_training_coordinator.cc b/src/db/training/omega_training_coordinator.cc
index 836046835..2a0aad3dc 100644
--- a/src/db/training/omega_training_coordinator.cc
+++ b/src/db/training/omega_training_coordinator.cc
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "db/training/omega_training_coordinator.h"
-
 #include <algorithm>
 #include <cstdint>
 #include <fstream>
@@ -32,8 +31,8 @@ constexpr uint32_t kOmegaQueryCacheVersion = 1;
 }  // namespace
 
 static void WriteOmegaTimingStatsJson(
-    const std::string& output_path,
-    const std::vector<std::pair<std::string, int64_t>>& stats) {
+    const std::string &output_path,
+    const std::vector<std::pair<std::string, int64_t>> &stats) {
   std::ofstream ofs(output_path);
   if (!ofs.is_open()) {
     return;
@@ -49,19 +48,19 @@ static void WriteOmegaTimingStatsJson(
   ofs << "}\n";
 }
 
-static std::string OmegaQueryCachePath(const std::string& model_output_dir) {
+static std::string OmegaQueryCachePath(const std::string &model_output_dir) {
   return model_output_dir + "/training_queries.bin";
 }
 
 static bool SaveOmegaTrainingQueryCache(
-    const std::string& model_output_dir,
-    const std::vector<std::vector<float>>& queries,
-    const std::vector<uint64_t>& query_doc_ids) {
+    const std::string &model_output_dir,
+    const std::vector<std::vector<float>> &queries,
+    const std::vector<uint64_t> &query_doc_ids) {
   if (queries.empty() || queries.size() != query_doc_ids.size()) {
     return false;
   }
   const uint32_t dim = static_cast<uint32_t>(queries[0].size());
-  for (const auto& query : queries) {
+  for (const auto &query : queries) {
     if (query.size() != dim) {
       return false;
     }
@@ -73,25 +72,25 @@ static bool SaveOmegaTrainingQueryCache(
   }
 
   const uint64_t num_queries = queries.size();
-  ofs.write(reinterpret_cast<const char*>(&kOmegaQueryCacheMagic),
+  ofs.write(reinterpret_cast<const char *>(&kOmegaQueryCacheMagic),
             sizeof(kOmegaQueryCacheMagic));
-  ofs.write(reinterpret_cast<const char*>(&kOmegaQueryCacheVersion),
+  ofs.write(reinterpret_cast<const char *>(&kOmegaQueryCacheVersion),
             sizeof(kOmegaQueryCacheVersion));
-  ofs.write(reinterpret_cast<const char*>(&num_queries), sizeof(num_queries));
-  ofs.write(reinterpret_cast<const char*>(&dim), sizeof(dim));
+  ofs.write(reinterpret_cast<const char *>(&num_queries), sizeof(num_queries));
+  ofs.write(reinterpret_cast<const char *>(&dim), sizeof(dim));
   for (size_t i = 0; i < queries.size(); ++i) {
-    ofs.write(reinterpret_cast<const char*>(&query_doc_ids[i]),
+    ofs.write(reinterpret_cast<const char *>(&query_doc_ids[i]),
               sizeof(query_doc_ids[i]));
-    ofs.write(reinterpret_cast<const char*>(queries[i].data()),
+    ofs.write(reinterpret_cast<const char *>(queries[i].data()),
               static_cast<std::streamsize>(dim * sizeof(float)));
   }
   return ofs.good();
 }
 
 static bool LoadOmegaTrainingQueryCache(
-    const std::string& model_output_dir,
-    std::vector<std::vector<float>>* queries,
-    std::vector<uint64_t>* query_doc_ids) {
+    const std::string &model_output_dir,
+    std::vector<std::vector<float>> *queries,
+    std::vector<uint64_t> *query_doc_ids) {
   std::ifstream ifs(OmegaQueryCachePath(model_output_dir), std::ios::binary);
   if (!ifs.is_open()) {
     return false;
@@ -101,10 +100,10 @@ static bool LoadOmegaTrainingQueryCache(
   uint32_t version = 0;
   uint64_t num_queries = 0;
   uint32_t dim = 0;
-  ifs.read(reinterpret_cast<char*>(&magic), sizeof(magic));
-  ifs.read(reinterpret_cast<char*>(&version), sizeof(version));
-  ifs.read(reinterpret_cast<char*>(&num_queries), sizeof(num_queries));
-  ifs.read(reinterpret_cast<char*>(&dim), sizeof(dim));
+  ifs.read(reinterpret_cast<char *>(&magic), sizeof(magic));
+  ifs.read(reinterpret_cast<char *>(&version), sizeof(version));
+  ifs.read(reinterpret_cast<char *>(&num_queries), sizeof(num_queries));
+  ifs.read(reinterpret_cast<char *>(&dim), sizeof(dim));
   if (!ifs.good() || magic != kOmegaQueryCacheMagic ||
       version != kOmegaQueryCacheVersion || num_queries == 0 || dim == 0) {
     return false;
@@ -113,8 +112,8 @@ static bool LoadOmegaTrainingQueryCache(
   queries->assign(num_queries, std::vector<float>(dim));
   query_doc_ids->assign(num_queries, 0);
   for (size_t i = 0; i < num_queries; ++i) {
-    ifs.read(reinterpret_cast<char*>(&(*query_doc_ids)[i]), sizeof(uint64_t));
-    ifs.read(reinterpret_cast<char*>((*queries)[i].data()),
+    ifs.read(reinterpret_cast<char *>(&(*query_doc_ids)[i]), sizeof(uint64_t));
+    ifs.read(reinterpret_cast<char *>((*queries)[i].data()),
              static_cast<std::streamsize>(dim * sizeof(float)));
     if (!ifs.good()) {
       queries->clear();
@@ -126,7 +125,7 @@ static bool LoadOmegaTrainingQueryCache(
 }
 
 OmegaTrainingParams ResolveOmegaTrainingParams(
-    const IndexParams::Ptr& index_params) {
+    const IndexParams::Ptr &index_params) {
   OmegaTrainingParams params;
   auto omega_params = std::dynamic_pointer_cast<OmegaIndexParams>(index_params);
   if (!omega_params) {
@@ -142,11 +141,9 @@ OmegaTrainingParams ResolveOmegaTrainingParams(
 }
 
 Result<TrainingDataCollectorResult> CollectOmegaTrainingDataBeforeFlush(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    const VectorColumnIndexer::Ptr& vector_indexer,
-    const OmegaTrainingParams& params,
-    const std::string& model_output_dir) {
+    const Segment::Ptr &segment, const std::string &field_name,
+    const VectorColumnIndexer::Ptr &vector_indexer,
+    const OmegaTrainingParams &params, const std::string &model_output_dir) {
   TrainingDataCollectorOptions collector_opts;
   const size_t doc_count = vector_indexer->doc_count();
   collector_opts.num_training_queries =
@@ -179,11 +176,9 @@ Result<TrainingDataCollectorResult> CollectOmegaTrainingDataBeforeFlush(
 }
 
 Result<TrainingDataCollectorResult> CollectOmegaRetrainingData(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    const std::vector<VectorColumnIndexer::Ptr>& indexers,
-    const OmegaTrainingParams& params,
-    const std::string& model_output_dir) {
+    const Segment::Ptr &segment, const std::string &field_name,
+    const std::vector<VectorColumnIndexer::Ptr> &indexers,
+    const OmegaTrainingParams &params, const std::string &model_output_dir) {
   TrainingDataCollectorOptions collector_options;
   collector_options.num_training_queries = params.num_training_queries;
   collector_options.ef_training = params.ef_training;
@@ -196,23 +191,27 @@ Result<TrainingDataCollectorResult> CollectOmegaRetrainingData(
   if (LoadOmegaTrainingQueryCache(model_output_dir, &cached_queries,
                                   &cached_query_doc_ids)) {
     LOG_WARN("Loaded %zu cached held-out queries for OMEGA retraining from %s",
-             cached_queries.size(), OmegaQueryCachePath(model_output_dir).c_str());
+             cached_queries.size(),
+             OmegaQueryCachePath(model_output_dir).c_str());
     return TrainingDataCollector::CollectTrainingDataWithGtCmpsFromQueries(
         segment, field_name, cached_queries, cached_query_doc_ids,
         collector_options, indexers);
   }
 
-  LOG_WARN("OMEGA retrain query cache not found, falling back to sampling held-out queries from persisted segment");
+  LOG_WARN(
+      "OMEGA retrain query cache not found, falling back to sampling held-out "
+      "queries from persisted segment");
   return TrainingDataCollector::CollectTrainingDataWithGtCmps(
       segment, field_name, collector_options, indexers);
 }
 
 Status TrainOmegaModelAfterBuild(
-    const TrainingDataCollectorResult& training_result,
-    const std::string& model_output_dir) {
+    const TrainingDataCollectorResult &training_result,
+    const std::string &model_output_dir) {
   if (training_result.records.size() < 100) {
-    LOG_INFO("Skipping model training: only %zu records collected (need >= 100)",
-             training_result.records.size());
+    LOG_INFO(
+        "Skipping model training: only %zu records collected (need >= 100)",
+        training_result.records.size());
     return Status::OK();
   }
 
@@ -232,19 +231,17 @@ Status TrainOmegaModelAfterBuild(
     LOG_INFO("OMEGA model training completed successfully: %s",
              trainer_opts.output_dir.c_str());
   } else {
-    LOG_WARN("OMEGA model training failed: %s",
-             train_status.message().c_str());
+    LOG_WARN("OMEGA model training failed: %s", train_status.message().c_str());
   }
 
   return Status::OK();
 }
 
 Status TrainOmegaModelAfterRetrainCollect(
-    const TrainingDataCollectorResult& training_result,
-    const std::string& model_output_dir,
-    SegmentID segment_id,
-    const std::string& field_name) {
-  const auto& training_records = training_result.records;
+    const TrainingDataCollectorResult &training_result,
+    const std::string &model_output_dir, SegmentID segment_id,
+    const std::string &field_name) {
+  const auto &training_records = training_result.records;
   if (training_records.empty()) {
     LOG_WARN("No training records collected, skipping model training");
     return Status::OK();
@@ -252,7 +249,7 @@ Status TrainOmegaModelAfterRetrainCollect(
 
   size_t positive_count = 0;
   size_t negative_count = 0;
-  for (const auto& record : training_records) {
+  for (const auto &record : training_records) {
     if (record.label == 1) {
       positive_count++;
     } else {
@@ -261,31 +258,36 @@ Status TrainOmegaModelAfterRetrainCollect(
   }
 
   if (positive_count == 0 || negative_count == 0) {
-    LOG_WARN("Insufficient training samples: %zu positive, %zu negative. Need both > 0. Skipping training.",
-             positive_count, negative_count);
+    LOG_WARN(
+        "Insufficient training samples: %zu positive, %zu negative. Need both "
+        "> 0. Skipping training.",
+        positive_count, negative_count);
     return Status::OK();
   }
 
   if (positive_count < 50 || negative_count < 50) {
-    LOG_WARN("Too few training samples: %zu positive, %zu negative. Need at least 50 of each. Skipping training.",
-             positive_count, negative_count);
+    LOG_WARN(
+        "Too few training samples: %zu positive, %zu negative. Need at least "
+        "50 of each. Skipping training.",
+        positive_count, negative_count);
     return Status::OK();
   }
 
   LOG_INFO("Training data stats: %zu positive, %zu negative samples",
            positive_count, negative_count);
 
-  LOG_WARN("OMEGA retrain step 2/2: start model training for field '%s' in segment %d",
-           field_name.c_str(), segment_id);
+  LOG_WARN(
+      "OMEGA retrain step 2/2: start model training for field '%s' in segment "
+      "%d",
+      field_name.c_str(), segment_id);
   OmegaModelTrainerOptions trainer_options;
   trainer_options.output_dir = model_output_dir;
   trainer_options.verbose = true;
 
   if (!FileHelper::DirectoryExists(trainer_options.output_dir) &&
       !FileHelper::CreateDirectory(trainer_options.output_dir)) {
-    return Status::InternalError(
-        "Failed to create model output directory: " +
-        trainer_options.output_dir);
+    return Status::InternalError("Failed to create model output directory: " +
+                                 trainer_options.output_dir);
   }
 
   WriteOmegaTimingStatsJson(
@@ -295,12 +297,14 @@ Status TrainOmegaModelAfterRetrainCollect(
   auto train_status = OmegaModelTrainer::TrainModelWithGtCmps(
       training_records, training_result.gt_cmps_data, trainer_options);
   if (!train_status.ok()) {
-    return Status::InternalError(
-        "Failed to train OMEGA model: " + train_status.message());
+    return Status::InternalError("Failed to train OMEGA model: " +
+                                 train_status.message());
   }
 
-  LOG_WARN("OMEGA retrain step 2/2: finished model training for segment %d, output: %s",
-           segment_id, trainer_options.output_dir.c_str());
+  LOG_WARN(
+      "OMEGA retrain step 2/2: finished model training for segment %d, output: "
+      "%s",
+      segment_id, trainer_options.output_dir.c_str());
 
   return Status::OK();
 }
diff --git a/src/db/training/omega_training_coordinator.h b/src/db/training/omega_training_coordinator.h
index d8a8fcc53..e5f59b341 100644
--- a/src/db/training/omega_training_coordinator.h
+++ b/src/db/training/omega_training_coordinator.h
@@ -35,30 +35,25 @@ struct OmegaTrainingParams {
 };
 
 OmegaTrainingParams ResolveOmegaTrainingParams(
-    const IndexParams::Ptr& index_params);
+    const IndexParams::Ptr &index_params);
 
 Result<TrainingDataCollectorResult> CollectOmegaTrainingDataBeforeFlush(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    const VectorColumnIndexer::Ptr& vector_indexer,
-    const OmegaTrainingParams& params,
-    const std::string& model_output_dir);
+    const Segment::Ptr &segment, const std::string &field_name,
+    const VectorColumnIndexer::Ptr &vector_indexer,
+    const OmegaTrainingParams &params, const std::string &model_output_dir);
 
 Result<TrainingDataCollectorResult> CollectOmegaRetrainingData(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    const std::vector<VectorColumnIndexer::Ptr>& indexers,
-    const OmegaTrainingParams& params,
-    const std::string& model_output_dir);
+    const Segment::Ptr &segment, const std::string &field_name,
+    const std::vector<VectorColumnIndexer::Ptr> &indexers,
+    const OmegaTrainingParams &params, const std::string &model_output_dir);
 
 Status TrainOmegaModelAfterBuild(
-    const TrainingDataCollectorResult& training_result,
-    const std::string& model_output_dir);
+    const TrainingDataCollectorResult &training_result,
+    const std::string &model_output_dir);
 
 Status TrainOmegaModelAfterRetrainCollect(
-    const TrainingDataCollectorResult& training_result,
-    const std::string& model_output_dir,
-    SegmentID segment_id,
-    const std::string& field_name);
+    const TrainingDataCollectorResult &training_result,
+    const std::string &model_output_dir, SegmentID segment_id,
+    const std::string &field_name);
 
 }  // namespace zvec
diff --git a/src/db/training/query_generator.cc b/src/db/training/query_generator.cc
index 9b99ee849..99c5f2263 100644
--- a/src/db/training/query_generator.cc
+++ b/src/db/training/query_generator.cc
@@ -19,10 +19,8 @@
 namespace zvec {
 
 SampledVectors TrainingQueryGenerator::SampleBaseVectorsWithIds(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    size_t num_samples,
-    uint64_t seed) {
+    const Segment::Ptr &segment, const std::string &field_name,
+    size_t num_samples, uint64_t seed) {
   SampledVectors result;
 
   // Get total document count
@@ -78,21 +76,22 @@ SampledVectors TrainingQueryGenerator::SampleBaseVectorsWithIds(
 }
 
 SampledVectors TrainingQueryGenerator::GenerateHeldOutQueries(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    size_t num_queries,
-    uint64_t seed) {
+    const Segment::Ptr &segment, const std::string &field_name,
+    size_t num_queries, uint64_t seed) {
   // Sample vectors directly from the index - no noise added
-  // These vectors will be used as queries, with their doc_ids excluded from ground truth
-  auto result = SampleBaseVectorsWithIds(segment, field_name, num_queries, seed);
+  // These vectors will be used as queries, with their doc_ids excluded from
+  // ground truth
+  auto result =
+      SampleBaseVectorsWithIds(segment, field_name, num_queries, seed);
 
   if (result.vectors.empty()) {
     LOG_ERROR("Failed to sample vectors from segment for held-out queries");
     return result;
   }
 
-  LOG_INFO("Generated %zu held-out queries (vectors sampled directly from index)",
-           result.vectors.size());
+  LOG_INFO(
+      "Generated %zu held-out queries (vectors sampled directly from index)",
+      result.vectors.size());
 
   return result;
 }
diff --git a/src/db/training/query_generator.h b/src/db/training/query_generator.h
index f7ac81b3c..077020529 100644
--- a/src/db/training/query_generator.h
+++ b/src/db/training/query_generator.h
@@ -27,7 +27,8 @@ namespace zvec {
  */
 struct SampledVectors {
   std::vector<std::vector<float>> vectors;
-  std::vector<uint64_t> doc_ids;  // doc_id of each sampled vector (for exclusion in GT)
+  std::vector<uint64_t>
+      doc_ids;  // doc_id of each sampled vector (for exclusion in GT)
 };
 
 /**
@@ -49,11 +50,10 @@ class TrainingQueryGenerator {
    * @param seed Random seed for reproducibility
    * @return SampledVectors with vectors and their doc_ids
    */
-  static SampledVectors SampleBaseVectorsWithIds(
-      const Segment::Ptr& segment,
-      const std::string& field_name,
-      size_t num_samples,
-      uint64_t seed = 42);
+  static SampledVectors SampleBaseVectorsWithIds(const Segment::Ptr &segment,
+                                                 const std::string &field_name,
+                                                 size_t num_samples,
+                                                 uint64_t seed = 42);
 
   /**
    * @brief Generate training queries using held-out approach
@@ -67,11 +67,10 @@ class TrainingQueryGenerator {
    * @param seed Random seed for reproducibility
    * @return SampledVectors with query vectors and their doc_ids
    */
-  static SampledVectors GenerateHeldOutQueries(
-      const Segment::Ptr& segment,
-      const std::string& field_name,
-      size_t num_queries,
-      uint64_t seed = 42);
+  static SampledVectors GenerateHeldOutQueries(const Segment::Ptr &segment,
+                                               const std::string &field_name,
+                                               size_t num_queries,
+                                               uint64_t seed = 42);
 };
 
 }  // namespace zvec
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 105d624e9..939deb504 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -14,17 +14,17 @@
 
 #include "training_data_collector.h"
 #include <algorithm>
-#include <chrono>
 #include <atomic>
+#include <chrono>
 #include <mutex>
 #include <thread>
 #include <unordered_map>
 #include <unordered_set>
+#include <omega/ground_truth.h>
 #include <zvec/ailego/logger/logger.h>
 #include <zvec/db/query_params.h>
 #include "db/index/column/vector_column/vector_column_params.h"
 #include "query_generator.h"
-#include <omega/ground_truth.h>
 
 namespace zvec {
 
@@ -36,13 +36,13 @@ struct TimingStatsState {
   std::unordered_map<std::string, size_t> index_by_name;
 };
 
-TimingStatsState& GetTimingStatsState() {
+TimingStatsState &GetTimingStatsState() {
   static TimingStatsState state;
   return state;
 }
 
-void RecordTimingStat(const std::string& name, int64_t duration_ms) {
-  auto& state = GetTimingStatsState();
+void RecordTimingStat(const std::string &name, int64_t duration_ms) {
+  auto &state = GetTimingStatsState();
   std::lock_guard<std::mutex> lock(state.mu);
   auto it = state.index_by_name.find(name);
   if (it == state.index_by_name.end()) {
@@ -55,22 +55,25 @@ void RecordTimingStat(const std::string& name, int64_t duration_ms) {
 
 class ScopedTimer {
  public:
-  explicit ScopedTimer(const std::string& name) : name_(name) {
+  explicit ScopedTimer(const std::string &name) : name_(name) {
     start_ = std::chrono::high_resolution_clock::now();
   }
   ~ScopedTimer() {
     auto end = std::chrono::high_resolution_clock::now();
-    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start_).count();
+    auto duration =
+        std::chrono::duration_cast<std::chrono::milliseconds>(end - start_)
+            .count();
     RecordTimingStat(name_, duration);
   }
+
  private:
   std::string name_;
   std::chrono::high_resolution_clock::time_point start_;
 };
 
 std::vector<VectorColumnIndexer::Ptr> ResolveTrainingIndexers(
-    const Segment::Ptr& segment, const std::string& field_name,
-    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+    const Segment::Ptr &segment, const std::string &field_name,
+    const std::vector<VectorColumnIndexer::Ptr> &provided_indexers) {
   if (!provided_indexers.empty()) {
     return provided_indexers;
   }
@@ -78,8 +81,8 @@ std::vector<VectorColumnIndexer::Ptr> ResolveTrainingIndexers(
 }
 
 std::vector<core_interface::ITrainingSession::Pointer> StartTrainingSessions(
-    const std::vector<VectorColumnIndexer::Ptr>& indexers,
-    const std::vector<std::vector<uint64_t>>& ground_truth, size_t topk,
+    const std::vector<VectorColumnIndexer::Ptr> &indexers,
+    const std::vector<std::vector<uint64_t>> &ground_truth, size_t topk,
     int k_train) {
   std::vector<core_interface::ITrainingSession::Pointer> sessions;
   sessions.reserve(indexers.size());
@@ -89,7 +92,7 @@ std::vector<core_interface::ITrainingSession::Pointer> StartTrainingSessions(
   config.topk = topk;
   config.k_train = k_train;
 
-  for (auto& indexer : indexers) {
+  for (auto &indexer : indexers) {
     auto session = indexer->CreateTrainingSession();
     if (session == nullptr) {
       LOG_WARN("Indexer does not expose a training session");
@@ -111,9 +114,9 @@ std::vector<core_interface::ITrainingSession::Pointer> StartTrainingSessions(
 }
 
 core_interface::TrainingArtifacts ConsumeTrainingArtifacts(
-    const std::vector<core_interface::ITrainingSession::Pointer>& sessions) {
+    const std::vector<core_interface::ITrainingSession::Pointer> &sessions) {
   core_interface::TrainingArtifacts merged;
-  for (const auto& session : sessions) {
+  for (const auto &session : sessions) {
     if (session == nullptr) {
       continue;
     }
@@ -130,8 +133,8 @@ core_interface::TrainingArtifacts ConsumeTrainingArtifacts(
 }
 
 void FinishTrainingSessions(
-    const std::vector<VectorColumnIndexer::Ptr>& indexers,
-    const std::vector<core_interface::ITrainingSession::Pointer>& sessions) {
+    const std::vector<VectorColumnIndexer::Ptr> &indexers,
+    const std::vector<core_interface::ITrainingSession::Pointer> &sessions) {
   for (size_t i = 0; i < indexers.size(); ++i) {
     if (i < sessions.size() && sessions[i] != nullptr) {
       sessions[i]->Finish();
@@ -142,14 +145,14 @@ void FinishTrainingSessions(
 }  // namespace
 
 void TrainingDataCollector::ResetTimingStats() {
-  auto& state = GetTimingStatsState();
+  auto &state = GetTimingStatsState();
   std::lock_guard<std::mutex> lock(state.mu);
   state.ordered_stats.clear();
   state.index_by_name.clear();
 }
 
 TrainingDataCollector::TimingStats TrainingDataCollector::ConsumeTimingStats() {
-  auto& state = GetTimingStatsState();
+  auto &state = GetTimingStatsState();
   std::lock_guard<std::mutex> lock(state.mu);
   TimingStats timings = std::move(state.ordered_stats);
   state.ordered_stats.clear();
@@ -157,19 +160,20 @@ TrainingDataCollector::TimingStats TrainingDataCollector::ConsumeTimingStats() {
   return timings;
 }
 
-Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFromQueriesImpl(
-    const Segment::Ptr& segment, const std::string& field_name,
-    const std::vector<std::vector<float>>& training_queries,
-    const std::vector<std::vector<uint64_t>>& provided_ground_truth,
-    const TrainingDataCollectorOptions& options,
-    const std::vector<uint64_t>& query_doc_ids,
-    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+Result<TrainingDataCollectorResult>
+TrainingDataCollector::CollectTrainingDataFromQueriesImpl(
+    const Segment::Ptr &segment, const std::string &field_name,
+    const std::vector<std::vector<float>> &training_queries,
+    const std::vector<std::vector<uint64_t>> &provided_ground_truth,
+    const TrainingDataCollectorOptions &options,
+    const std::vector<uint64_t> &query_doc_ids,
+    const std::vector<VectorColumnIndexer::Ptr> &provided_indexers) {
   std::vector<VectorColumnIndexer::Ptr> indexers =
       ResolveTrainingIndexers(segment, field_name, provided_indexers);
 
   if (indexers.empty()) {
-    return tl::make_unexpected(
-        Status::InternalError("No vector indexers found for field: " + field_name));
+    return tl::make_unexpected(Status::InternalError(
+        "No vector indexers found for field: " + field_name));
   }
 
   if (training_queries.empty()) {
@@ -185,16 +189,17 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
              options.topk, options.ef_groundtruth);
     ScopedTimer timer("Step2: ComputeGroundTruth");
     ground_truth = TrainingDataCollector::ComputeGroundTruth(
-        segment, field_name, training_queries, options.topk, options.num_threads,
-        query_doc_ids, options.ef_groundtruth, metric_type, indexers);
+        segment, field_name, training_queries, options.topk,
+        options.num_threads, query_doc_ids, options.ef_groundtruth, metric_type,
+        indexers);
   } else if (ground_truth.size() != training_queries.size()) {
     return tl::make_unexpected(Status::InvalidArgument(
         "Ground truth size does not match query count"));
   }
 
   if (ground_truth.empty()) {
-    return tl::make_unexpected(Status::InternalError(
-        "Failed to obtain ground truth"));
+    return tl::make_unexpected(
+        Status::InternalError("Failed to obtain ground truth"));
   }
 
   LOG_INFO("Starting training sessions for %zu queries on %zu indexers",
@@ -225,11 +230,12 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
 
     auto worker = [&](size_t start_idx, size_t end_idx) {
       for (size_t query_idx = start_idx; query_idx < end_idx; ++query_idx) {
-        const auto& query_vector = training_queries[query_idx];
+        const auto &query_vector = training_queries[query_idx];
 
         vector_column_params::VectorData vector_data;
         vector_data.vector = vector_column_params::DenseVector{
-            .data = const_cast<void*>(static_cast<const void*>(query_vector.data()))};
+            .data = const_cast<void *>(
+                static_cast<const void *>(query_vector.data()))};
 
         vector_column_params::QueryParams query_params;
         query_params.topk = options.topk;
@@ -242,7 +248,8 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
         query_params.query_params = omega_params;
 
         if (indexers.size() != 1 && query_idx == start_idx) {
-          LOG_WARN("Expected 1 indexer but found %zu, using first one only", indexers.size());
+          LOG_WARN("Expected 1 indexer but found %zu, using first one only",
+                   indexers.size());
         }
 
         // Persisted OMEGA collections currently do not propagate per-query
@@ -261,7 +268,7 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
           continue;
         }
 
-        auto& results = search_result.value();
+        auto &results = search_result.value();
         std::vector<uint64_t> result_ids;
         result_ids.reserve(results->count());
         auto iter = results->create_iterator();
@@ -279,22 +286,23 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
         (training_queries.size() + actual_threads - 1) / actual_threads;
     for (size_t t = 0; t < actual_threads; ++t) {
       size_t start_idx = t * queries_per_thread;
-      size_t end_idx = std::min(start_idx + queries_per_thread, training_queries.size());
+      size_t end_idx =
+          std::min(start_idx + queries_per_thread, training_queries.size());
       if (start_idx < end_idx) {
         threads.emplace_back(worker, start_idx, end_idx);
       }
     }
 
-    for (auto& thread : threads) {
+    for (auto &thread : threads) {
       thread.join();
     }
 
     auto search_end = std::chrono::high_resolution_clock::now();
-    auto total_ms =
-        std::chrono::duration_cast<std::chrono::milliseconds>(search_end - search_start)
-            .count();
-    LOG_INFO("Training searches completed in %zu ms (%zu threads)",
-             total_ms, actual_threads);
+    auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                        search_end - search_start)
+                        .count();
+    LOG_INFO("Training searches completed in %zu ms (%zu threads)", total_ms,
+             actual_threads);
   }
 
   LOG_INFO("Collecting training records from indexers");
@@ -306,7 +314,7 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
              training_artifacts.records.size());
   }
 
-  auto& all_records = training_artifacts.records;
+  auto &all_records = training_artifacts.records;
 
   if (all_records.empty()) {
     LOG_WARN("No training records collected from any indexer");
@@ -314,24 +322,28 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
 
   size_t positive_count = 0;
   size_t negative_count = 0;
-  for (const auto& record : all_records) {
+  for (const auto &record : all_records) {
     if (record.label > 0) {
       ++positive_count;
     } else {
       ++negative_count;
     }
   }
-  LOG_INFO("Collected %zu records: %zu positive, %zu negative (labels computed in real-time)",
-           all_records.size(), positive_count, negative_count);
+  LOG_INFO(
+      "Collected %zu records: %zu positive, %zu negative (labels computed in "
+      "real-time)",
+      all_records.size(), positive_count, negative_count);
 
   LOG_INFO("Collecting gt_cmps data from indexers");
-  core_interface::GtCmpsData gt_cmps_data = std::move(training_artifacts.gt_cmps_data);
+  core_interface::GtCmpsData gt_cmps_data =
+      std::move(training_artifacts.gt_cmps_data);
   {
     ScopedTimer timer("Step6: GetGtCmpsData");
     if (gt_cmps_data.gt_cmps.empty()) {
-      LOG_WARN("No actual gt_cmps data collected, falling back to approximation");
-      gt_cmps_data =
-          TrainingDataCollector::ComputeGtCmps(all_records, ground_truth, options.topk);
+      LOG_WARN(
+          "No actual gt_cmps data collected, falling back to approximation");
+      gt_cmps_data = TrainingDataCollector::ComputeGtCmps(
+          all_records, ground_truth, options.topk);
     } else {
       LOG_INFO("Got actual gt_cmps data for %zu queries, topk=%zu",
                gt_cmps_data.num_queries, gt_cmps_data.topk);
@@ -353,15 +365,11 @@ Result<TrainingDataCollectorResult> TrainingDataCollector::CollectTrainingDataFr
 // ============ END DEBUG TIMING UTILITIES ============
 
 std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    const std::vector<std::vector<float>>& queries,
-    size_t topk,
-    size_t num_threads,
-    const std::vector<uint64_t>& query_doc_ids,
-    int ef_groundtruth,
-    MetricType metric_type,
-    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+    const Segment::Ptr &segment, const std::string &field_name,
+    const std::vector<std::vector<float>> &queries, size_t topk,
+    size_t num_threads, const std::vector<uint64_t> &query_doc_ids,
+    int ef_groundtruth, MetricType metric_type,
+    const std::vector<VectorColumnIndexer::Ptr> &provided_indexers) {
   std::vector<std::vector<uint64_t>> ground_truth(queries.size());
 
   if (queries.empty()) {
@@ -369,18 +377,22 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   }
 
   // Check if we have query doc_ids for self-exclusion (held-out mode)
-  bool held_out_mode = !query_doc_ids.empty() && query_doc_ids.size() == queries.size();
+  bool held_out_mode =
+      !query_doc_ids.empty() && query_doc_ids.size() == queries.size();
   if (held_out_mode) {
-    LOG_INFO("Computing ground truth in held-out mode (excluding self-matches)");
+    LOG_INFO(
+        "Computing ground truth in held-out mode (excluding self-matches)");
   }
 
   // Get total document count
   uint64_t doc_count = segment->doc_count();
   size_t dim = queries[0].size();
 
-  LOG_INFO("Computing ground truth: %zu queries, %zu base vectors, dim=%zu, topk=%zu, metric=%s, ef_groundtruth=%d",
-           queries.size(), static_cast<size_t>(doc_count), dim, topk,
-           MetricTypeCodeBook::AsString(metric_type).c_str(), ef_groundtruth);
+  LOG_INFO(
+      "Computing ground truth: %zu queries, %zu base vectors, dim=%zu, "
+      "topk=%zu, metric=%s, ef_groundtruth=%d",
+      queries.size(), static_cast<size_t>(doc_count), dim, topk,
+      MetricTypeCodeBook::AsString(metric_type).c_str(), ef_groundtruth);
 
   auto start_time = std::chrono::high_resolution_clock::now();
 
@@ -390,8 +402,9 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   // ============================================================
   if (ef_groundtruth > 0) {
     // Use provided indexers if available, otherwise get from segment
-    // IMPORTANT: We must use provided_indexers when available because after Flush,
-    // segment->get_vector_indexer() returns stale indexers with cleared in-memory data
+    // IMPORTANT: We must use provided_indexers when available because after
+    // Flush, segment->get_vector_indexer() returns stale indexers with cleared
+    // in-memory data
     std::vector<VectorColumnIndexer::Ptr> indexers;
     if (!provided_indexers.empty()) {
       indexers = provided_indexers;
@@ -400,7 +413,10 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
     }
 
     if (indexers.empty()) {
-      LOG_ERROR("No vector indexers found for field '%s', falling back to brute force", field_name.c_str());
+      LOG_ERROR(
+          "No vector indexers found for field '%s', falling back to brute "
+          "force",
+          field_name.c_str());
       ef_groundtruth = 0;  // Fall back to brute force
     } else {
       // For held-out mode, we need topk+1 to exclude self
@@ -416,19 +432,22 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
         // Warmup count: use a fraction of queries spread across threads
         // This helps load different parts of the index in parallel
-        size_t actual_threads = num_threads > 0 ? num_threads : std::thread::hardware_concurrency();
+        size_t actual_threads =
+            num_threads > 0 ? num_threads : std::thread::hardware_concurrency();
         size_t warmup_per_thread = 5;  // Each thread does 5 warmup queries
-        size_t warmup_total = std::min(actual_threads * warmup_per_thread, queries.size());
+        size_t warmup_total =
+            std::min(actual_threads * warmup_per_thread, queries.size());
 
         // Parallel warmup using std::thread
         std::vector<std::thread> warmup_threads;
         auto warmup_worker = [&](size_t start_idx, size_t count) {
-          for (size_t i = 0; i < count && (start_idx + i) < queries.size(); ++i) {
+          for (size_t i = 0; i < count && (start_idx + i) < queries.size();
+               ++i) {
             size_t q_idx = start_idx + i;
             vector_column_params::VectorData vector_data;
             vector_data.vector = vector_column_params::DenseVector{
-                .data = const_cast<void*>(static_cast<const void*>(queries[q_idx].data()))
-            };
+                .data = const_cast<void *>(
+                    static_cast<const void *>(queries[q_idx].data()))};
 
             vector_column_params::QueryParams query_params;
             query_params.topk = actual_topk;
@@ -437,7 +456,8 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
             auto omega_params = std::make_shared<OmegaQueryParams>();
             omega_params->set_ef(ef_groundtruth);
-            omega_params->set_training_query_id(-1);  // Warmup, don't collect training data
+            omega_params->set_training_query_id(
+                -1);  // Warmup, don't collect training data
             query_params.query_params = omega_params;
 
             static_cast<void>(indexers[0]->Search(vector_data, query_params));
@@ -446,25 +466,33 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
         // Launch warmup threads
         size_t queries_per_warmup_thread = warmup_total / actual_threads;
-        for (size_t t = 0; t < actual_threads && t * queries_per_warmup_thread < warmup_total; ++t) {
+        for (size_t t = 0;
+             t < actual_threads && t * queries_per_warmup_thread < warmup_total;
+             ++t) {
           size_t start = t * queries_per_warmup_thread;
-          size_t count = std::min(queries_per_warmup_thread, warmup_total - start);
+          size_t count =
+              std::min(queries_per_warmup_thread, warmup_total - start);
           if (count > 0) {
             warmup_threads.emplace_back(warmup_worker, start, count);
           }
         }
 
-        for (auto& t : warmup_threads) {
+        for (auto &t : warmup_threads) {
           t.join();
         }
 
         auto warmup_end = std::chrono::high_resolution_clock::now();
-        auto warmup_ms = std::chrono::duration_cast<std::chrono::milliseconds>(warmup_end - warmup_start).count();
+        auto warmup_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                             warmup_end - warmup_start)
+                             .count();
 
-        // Note: If warmup takes very long (>60s), recommend using ef_groundtruth=0 (Eigen brute force)
+        // Note: If warmup takes very long (>60s), recommend using
+        // ef_groundtruth=0 (Eigen brute force)
         if (warmup_ms > 60000) {
-          LOG_INFO("HNSW warmup took %zu ms. For cold indexes, consider using ef_groundtruth=0 (Eigen brute force)",
-                   warmup_ms);
+          LOG_INFO(
+              "HNSW warmup took %zu ms. For cold indexes, consider using "
+              "ef_groundtruth=0 (Eigen brute force)",
+              warmup_ms);
         }
       }
 
@@ -472,7 +500,8 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
       ground_truth.resize(queries.size());
 
       // Use std::thread instead of OpenMP (same as training searches)
-      size_t actual_threads = num_threads > 0 ? num_threads : std::thread::hardware_concurrency();
+      size_t actual_threads =
+          num_threads > 0 ? num_threads : std::thread::hardware_concurrency();
       actual_threads = std::min(actual_threads, queries.size());
 
       auto worker = [&](size_t start_idx, size_t end_idx) {
@@ -480,8 +509,8 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
           // Prepare query parameters (exactly same as training searches)
           vector_column_params::VectorData vector_data;
           vector_data.vector = vector_column_params::DenseVector{
-              .data = const_cast<void*>(static_cast<const void*>(queries[q].data()))
-          };
+              .data = const_cast<void *>(
+                  static_cast<const void *>(queries[q].data()))};
 
           vector_column_params::QueryParams query_params;
           query_params.topk = actual_topk;
@@ -497,7 +526,7 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
           // Search on first indexer (same as training searches)
           auto search_result = indexers[0]->Search(vector_data, query_params);
           if (search_result.has_value()) {
-            auto& results = search_result.value();
+            auto &results = search_result.value();
             std::vector<uint64_t> result_ids;
             result_ids.reserve(results->count());
             auto iter = results->create_iterator();
@@ -519,22 +548,26 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
       // Launch threads
       std::vector<std::thread> threads;
-      size_t queries_per_thread = (queries.size() + actual_threads - 1) / actual_threads;
+      size_t queries_per_thread =
+          (queries.size() + actual_threads - 1) / actual_threads;
       for (size_t t = 0; t < actual_threads; ++t) {
         size_t start_idx = t * queries_per_thread;
-        size_t end_idx = std::min(start_idx + queries_per_thread, queries.size());
+        size_t end_idx =
+            std::min(start_idx + queries_per_thread, queries.size());
         if (start_idx < end_idx) {
           threads.emplace_back(worker, start_idx, end_idx);
         }
       }
 
       // Wait for all threads
-      for (auto& thread : threads) {
+      for (auto &thread : threads) {
         thread.join();
       }
 
       auto end_time = std::chrono::high_resolution_clock::now();
-      auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
+      auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                          end_time - start_time)
+                          .count();
       LOG_INFO("Computed ground truth (HNSW ef=%d) for %zu queries in %zu ms",
                ef_groundtruth, queries.size(), total_ms);
       return ground_truth;
@@ -567,11 +600,13 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   std::atomic<bool> load_error{false};
 
   // Load vectors in parallel
-  size_t actual_threads = num_threads > 0 ? num_threads : std::thread::hardware_concurrency();
+  size_t actual_threads =
+      num_threads > 0 ? num_threads : std::thread::hardware_concurrency();
   actual_threads = std::min(actual_threads, static_cast<size_t>(doc_count));
 
   auto load_worker = [&](size_t start_idx, size_t end_idx) {
-    for (size_t doc_idx = start_idx; doc_idx < end_idx && !load_error; ++doc_idx) {
+    for (size_t doc_idx = start_idx; doc_idx < end_idx && !load_error;
+         ++doc_idx) {
       auto doc = segment->Fetch(doc_idx);
       if (!doc) {
         LOG_WARN("Failed to fetch document at index %zu", doc_idx);
@@ -581,19 +616,22 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
       auto vector_opt = doc->get<std::vector<float>>(field_name);
       if (!vector_opt.has_value()) {
-        LOG_WARN("Document at index %zu does not have field '%s'", doc_idx, field_name.c_str());
+        LOG_WARN("Document at index %zu does not have field '%s'", doc_idx,
+                 field_name.c_str());
         load_error = true;
         continue;
       }
 
-      const auto& vec = vector_opt.value();
+      const auto &vec = vector_opt.value();
       if (vec.size() != dim) {
-        LOG_WARN("Vector at index %zu has wrong dimension: %zu vs %zu", doc_idx, vec.size(), dim);
+        LOG_WARN("Vector at index %zu has wrong dimension: %zu vs %zu", doc_idx,
+                 vec.size(), dim);
         load_error = true;
         continue;
       }
 
-      std::memcpy(base_vectors.data() + doc_idx * dim, vec.data(), dim * sizeof(float));
+      std::memcpy(base_vectors.data() + doc_idx * dim, vec.data(),
+                  dim * sizeof(float));
     }
   };
 
@@ -602,18 +640,21 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
 
   for (size_t t = 0; t < actual_threads; ++t) {
     size_t start_idx = t * docs_per_thread;
-    size_t end_idx = std::min(start_idx + docs_per_thread, static_cast<size_t>(doc_count));
+    size_t end_idx =
+        std::min(start_idx + docs_per_thread, static_cast<size_t>(doc_count));
     if (start_idx < end_idx) {
       load_threads.emplace_back(load_worker, start_idx, end_idx);
     }
   }
 
-  for (auto& thread : load_threads) {
+  for (auto &thread : load_threads) {
     thread.join();
   }
 
   auto load_end = std::chrono::high_resolution_clock::now();
-  auto load_ms = std::chrono::duration_cast<std::chrono::milliseconds>(load_end - load_start).count();
+  auto load_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                     load_end - load_start)
+                     .count();
 
   if (load_error) {
     LOG_ERROR("Failed to load all base vectors, cannot compute ground truth");
@@ -623,39 +664,39 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   // Step 2: Flatten query vectors
   std::vector<float> query_flat(queries.size() * dim);
   for (size_t i = 0; i < queries.size(); ++i) {
-    std::memcpy(query_flat.data() + i * dim, queries[i].data(), dim * sizeof(float));
+    std::memcpy(query_flat.data() + i * dim, queries[i].data(),
+                dim * sizeof(float));
   }
 
   // Step 3: Call OmegaLib's fast ground truth computation (Eigen)
   auto compute_start = std::chrono::high_resolution_clock::now();
 
   ground_truth = omega::ComputeGroundTruth(
-      base_vectors.data(),
-      query_flat.data(),
-      doc_count,
-      queries.size(),
-      dim,
-      topk,
-      omega_metric,
-      held_out_mode,
+      base_vectors.data(), query_flat.data(), doc_count, queries.size(), dim,
+      topk, omega_metric, held_out_mode,
       query_doc_ids);  // Pass query-to-base mapping for correct self-exclusion
 
   auto compute_end = std::chrono::high_resolution_clock::now();
-  auto compute_ms = std::chrono::duration_cast<std::chrono::milliseconds>(compute_end - compute_start).count();
+  auto compute_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                        compute_end - compute_start)
+                        .count();
 
   auto total_end = std::chrono::high_resolution_clock::now();
-  auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(total_end - start_time).count();
+  auto total_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                      total_end - start_time)
+                      .count();
 
-  LOG_INFO("Computed ground truth (Eigen brute force) for %zu queries in %zu ms (load: %zu ms, compute: %zu ms)",
-           queries.size(), total_ms, load_ms, compute_ms);
+  LOG_INFO(
+      "Computed ground truth (Eigen brute force) for %zu queries in %zu ms "
+      "(load: %zu ms, compute: %zu ms)",
+      queries.size(), total_ms, load_ms, compute_ms);
 
   return ground_truth;
 }
 
 core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
-    const std::vector<core_interface::TrainingRecord>& records,
-    const std::vector<std::vector<uint64_t>>& ground_truth,
-    size_t topk) {
+    const std::vector<core_interface::TrainingRecord> &records,
+    const std::vector<std::vector<uint64_t>> &ground_truth, size_t topk) {
   // NOTE: This is a FALLBACK approximation method.
   // The preferred method is to collect actual gt_cmps during search via
   // VectorColumnIndexer::GetGtCmpsData(), which tracks the exact cmps value
@@ -687,14 +728,14 @@ core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
   std::unordered_map<int, int> query_max_cmps;
   std::unordered_map<int, int> query_first_found_cmps;
 
-  for (const auto& record : records) {
+  for (const auto &record : records) {
     int query_id = record.query_id;
     if (query_id < 0 || query_id >= static_cast<int>(ground_truth.size())) {
       continue;
     }
 
     // Track max cmps for each query
-    auto& max_cmps = query_max_cmps[query_id];
+    auto &max_cmps = query_max_cmps[query_id];
     max_cmps = std::max(max_cmps, record.cmps_visited);
 
     // Track first cmps where label became 1
@@ -715,23 +756,25 @@ core_interface::GtCmpsData TrainingDataCollector::ComputeGtCmps(
 
     // Use first_found_cmps if available, otherwise use total_cmps
     auto it = query_first_found_cmps.find(query_id);
-    int gt_cmps_value = (it != query_first_found_cmps.end()) ? it->second : result.total_cmps[q];
+    int gt_cmps_value = (it != query_first_found_cmps.end())
+                            ? it->second
+                            : result.total_cmps[q];
 
     for (size_t r = 0; r < result.gt_cmps[q].size(); ++r) {
       result.gt_cmps[q][r] = gt_cmps_value;
     }
   }
 
-  LOG_INFO("Computed gt_cmps (approximation) for %zu queries, topk=%zu", result.num_queries, result.topk);
+  LOG_INFO("Computed gt_cmps (approximation) for %zu queries, topk=%zu",
+           result.num_queries, result.topk);
   return result;
 }
 
 Result<TrainingDataCollectorResult>
 TrainingDataCollector::CollectTrainingDataWithGtCmps(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    const TrainingDataCollectorOptions& options,
-    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+    const Segment::Ptr &segment, const std::string &field_name,
+    const TrainingDataCollectorOptions &options,
+    const std::vector<VectorColumnIndexer::Ptr> &provided_indexers) {
   ResetTimingStats();
   ScopedTimer total_timer("CollectTrainingDataWithGtCmps [TOTAL]");
   LOG_INFO("Generating %zu held-out training queries for field '%s'",
@@ -754,12 +797,11 @@ TrainingDataCollector::CollectTrainingDataWithGtCmps(
 
 Result<TrainingDataCollectorResult>
 TrainingDataCollector::CollectTrainingDataWithGtCmpsFromQueries(
-    const Segment::Ptr& segment,
-    const std::string& field_name,
-    const std::vector<std::vector<float>>& training_queries,
-    const std::vector<uint64_t>& query_doc_ids,
-    const TrainingDataCollectorOptions& options,
-    const std::vector<VectorColumnIndexer::Ptr>& provided_indexers) {
+    const Segment::Ptr &segment, const std::string &field_name,
+    const std::vector<std::vector<float>> &training_queries,
+    const std::vector<uint64_t> &query_doc_ids,
+    const TrainingDataCollectorOptions &options,
+    const std::vector<VectorColumnIndexer::Ptr> &provided_indexers) {
   ResetTimingStats();
   ScopedTimer total_timer("CollectTrainingDataWithGtCmps [TOTAL]");
   LOG_INFO("Reusing %zu cached held-out training queries for field '%s'",
diff --git a/src/db/training/training_data_collector.h b/src/db/training/training_data_collector.h
index 1b520c209..3ef3c205c 100644
--- a/src/db/training/training_data_collector.h
+++ b/src/db/training/training_data_collector.h
@@ -36,16 +36,18 @@ struct TrainingDataCollectorOptions {
   // ef parameter for training searches (large value for recall ≈ 1)
   int ef_training = 1000;
 
-  // ef parameter for ground truth computation (0 = brute force, >0 = HNSW with this ef)
-  // Using HNSW with large ef is much faster than brute force while maintaining high accuracy
+  // ef parameter for ground truth computation (0 = brute force, >0 = HNSW with
+  // this ef) Using HNSW with large ef is much faster than brute force while
+  // maintaining high accuracy
   int ef_groundtruth = 0;
 
   // Top-K results to retrieve per query
   size_t topk = 100;
 
   // K_train: number of ground truth results that must be collected for label=1
-  // Label=1 iff the top K_train ground truth nodes are all in collected_node_ids
-  // Typically set to 1 (i.e., label=1 when the 1st ground truth is found)
+  // Label=1 iff the top K_train ground truth nodes are all in
+  // collected_node_ids Typically set to 1 (i.e., label=1 when the 1st ground
+  // truth is found)
   size_t k_train = 1;
 
   // Random seed for reproducibility
@@ -95,18 +97,17 @@ class TrainingDataCollector {
    * @return TrainingDataCollectorResult with records and gt_cmps_data
    */
   static Result<TrainingDataCollectorResult> CollectTrainingDataWithGtCmps(
-      const Segment::Ptr& segment,
-      const std::string& field_name,
-      const TrainingDataCollectorOptions& options,
-      const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
-
-  static Result<TrainingDataCollectorResult> CollectTrainingDataWithGtCmpsFromQueries(
-      const Segment::Ptr& segment,
-      const std::string& field_name,
-      const std::vector<std::vector<float>>& training_queries,
-      const std::vector<uint64_t>& query_doc_ids,
-      const TrainingDataCollectorOptions& options,
-      const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
+      const Segment::Ptr &segment, const std::string &field_name,
+      const TrainingDataCollectorOptions &options,
+      const std::vector<VectorColumnIndexer::Ptr> &indexers = {});
+
+  static Result<TrainingDataCollectorResult>
+  CollectTrainingDataWithGtCmpsFromQueries(
+      const Segment::Ptr &segment, const std::string &field_name,
+      const std::vector<std::vector<float>> &training_queries,
+      const std::vector<uint64_t> &query_doc_ids,
+      const TrainingDataCollectorOptions &options,
+      const std::vector<VectorColumnIndexer::Ptr> &indexers = {});
 
  private:
   /**
@@ -117,28 +118,27 @@ class TrainingDataCollector {
    * @param queries Training query vectors
    * @param topk Number of top results to retrieve
    * @param num_threads Number of threads (0 = hardware_concurrency)
-   * @param query_doc_ids Optional doc_ids of query vectors (for self-exclusion in held-out mode)
+   * @param query_doc_ids Optional doc_ids of query vectors (for self-exclusion
+   * in held-out mode)
    * @param ef_groundtruth ef value for HNSW search (0 = brute force, >0 = HNSW)
    * @param metric_type Distance metric type (L2, IP, COSINE)
-   * @param indexers Optional pre-opened indexers (for HNSW GT, avoids using stale indexers from segment)
+   * @param indexers Optional pre-opened indexers (for HNSW GT, avoids using
+   * stale indexers from segment)
    * @return Ground truth doc IDs for each query
    */
   static std::vector<std::vector<uint64_t>> ComputeGroundTruth(
-      const Segment::Ptr& segment,
-      const std::string& field_name,
-      const std::vector<std::vector<float>>& queries,
-      size_t topk,
-      size_t num_threads,
-      const std::vector<uint64_t>& query_doc_ids = {},
-      int ef_groundtruth = 0,
-      MetricType metric_type = MetricType::IP,
-      const std::vector<VectorColumnIndexer::Ptr>& indexers = {});
+      const Segment::Ptr &segment, const std::string &field_name,
+      const std::vector<std::vector<float>> &queries, size_t topk,
+      size_t num_threads, const std::vector<uint64_t> &query_doc_ids = {},
+      int ef_groundtruth = 0, MetricType metric_type = MetricType::IP,
+      const std::vector<VectorColumnIndexer::Ptr> &indexers = {});
 
   /**
    * @brief Compute gt_cmps data from training records and ground truth
    *
    * For each query and each GT rank, find the cmps value when that GT was first
-   * collected. This data is used to generate gt_collected_table and gt_cmps_all_table.
+   * collected. This data is used to generate gt_collected_table and
+   * gt_cmps_all_table.
    *
    * @param records Training records (must be sorted by query_id, then by cmps)
    * @param ground_truth Ground truth doc IDs per query
@@ -146,18 +146,16 @@ class TrainingDataCollector {
    * @return GtCmpsData structure with computed gt_cmps
    */
   static core_interface::GtCmpsData ComputeGtCmps(
-      const std::vector<core_interface::TrainingRecord>& records,
-      const std::vector<std::vector<uint64_t>>& ground_truth,
-      size_t topk);
+      const std::vector<core_interface::TrainingRecord> &records,
+      const std::vector<std::vector<uint64_t>> &ground_truth, size_t topk);
 
   static Result<TrainingDataCollectorResult> CollectTrainingDataFromQueriesImpl(
-      const Segment::Ptr& segment,
-      const std::string& field_name,
-      const std::vector<std::vector<float>>& training_queries,
-      const std::vector<std::vector<uint64_t>>& provided_ground_truth,
-      const TrainingDataCollectorOptions& options,
-      const std::vector<uint64_t>& query_doc_ids,
-      const std::vector<VectorColumnIndexer::Ptr>& provided_indexers);
+      const Segment::Ptr &segment, const std::string &field_name,
+      const std::vector<std::vector<float>> &training_queries,
+      const std::vector<std::vector<uint64_t>> &provided_ground_truth,
+      const TrainingDataCollectorOptions &options,
+      const std::vector<uint64_t> &query_doc_ids,
+      const std::vector<VectorColumnIndexer::Ptr> &provided_indexers);
 };
 
 }  // namespace zvec
diff --git a/src/include/zvec/core/interface/index.h b/src/include/zvec/core/interface/index.h
index 5ad1dc005..ac484872c 100644
--- a/src/include/zvec/core/interface/index.h
+++ b/src/include/zvec/core/interface/index.h
@@ -28,12 +28,12 @@
 #include <zvec/core/framework/index_metric.h>
 #include <zvec/core/framework/index_reducer.h>
 #include <zvec/core/framework/index_reformer.h>
-#include <zvec/core/interface/training.h>
-#include <zvec/core/interface/training_session.h>
 #include <zvec/core/framework/index_searcher.h>
 #include <zvec/core/framework/index_storage.h>
 #include <zvec/core/interface/index_param.h>
+#include <zvec/core/interface/training.h>
 #include <zvec/core/interface/training_capable.h>
+#include <zvec/core/interface/training_session.h>
 #include <zvec/db/status.h>
 #include "zvec/core/framework/index_provider.h"
 
@@ -104,9 +104,10 @@ struct SearchResult {
   // Training records collected during search (for OMEGA training mode)
   std::vector<TrainingRecord> training_records_{};
   // GT cmps data: cmps value when each GT rank was found (for OMEGA training)
-  // gt_cmps_per_rank_[rank] = cmps when GT[rank] first entered topk (-1 if not found)
+  // gt_cmps_per_rank_[rank] = cmps when GT[rank] first entered topk (-1 if not
+  // found)
   std::vector<int> gt_cmps_per_rank_{};
-  int total_cmps_{0};  // Total comparisons in this search
+  int total_cmps_{0};          // Total comparisons in this search
   int training_query_id_{-1};  // Query ID for this search (-1 if not training)
 };
 
@@ -150,10 +151,11 @@ class Index {
    * This method allows indexes to optionally provide training functionality
    * without polluting the base Index class. Follows the Capability Pattern.
    *
-   * @return Pointer to ITrainingCapable interface if supported, nullptr otherwise
+   * @return Pointer to ITrainingCapable interface if supported, nullptr
+   * otherwise
    *
    */
-  virtual class ITrainingCapable* GetTrainingCapability() {
+  virtual class ITrainingCapable *GetTrainingCapability() {
     return nullptr;  // Default: capability not supported
   }
 
@@ -348,15 +350,15 @@ class HNSWRabitqIndex : public Index {
  * OmegaIndex is a specialized HNSW index that supports training mode for
  * collecting features to train the OMEGA early stopping model.
  *
- * It implements the ITrainingCapable interface to provide training functionality
- * without modifying the generic HNSWIndex class.
+ * It implements the ITrainingCapable interface to provide training
+ * functionality without modifying the generic HNSWIndex class.
  */
 class OmegaIndex : public HNSWIndex, public ITrainingCapable {
  public:
   OmegaIndex() = default;
 
   // Override GetTrainingCapability to return this
-  ITrainingCapable* GetTrainingCapability() override {
+  ITrainingCapable *GetTrainingCapability() override {
     return this;
   }
 
diff --git a/src/include/zvec/core/interface/index_param.h b/src/include/zvec/core/interface/index_param.h
index 6be1712fa..4fc4246f5 100644
--- a/src/include/zvec/core/interface/index_param.h
+++ b/src/include/zvec/core/interface/index_param.h
@@ -184,7 +184,8 @@ struct HNSWQueryParam : public BaseIndexQueryParam {
   using Pointer = std::shared_ptr<HNSWQueryParam>;
 
   uint32_t ef_search = kDefaultHnswEfSearch;
-  int training_query_id = -1;  // For parallel training searches, -1 means use global
+  int training_query_id =
+      -1;  // For parallel training searches, -1 means use global
 
   BaseIndexQueryParam::Pointer Clone() const override {
     return std::make_shared<HNSWQueryParam>(*this);
diff --git a/src/include/zvec/core/interface/training.h b/src/include/zvec/core/interface/training.h
index d7f357a6c..0ac939142 100644
--- a/src/include/zvec/core/interface/training.h
+++ b/src/include/zvec/core/interface/training.h
@@ -59,8 +59,9 @@ struct TrainingRecord {
 /**
  * @brief Ground truth cmps data for OMEGA table generation.
  *
- * For each query, stores the cmps value when each ground truth result was found.
- * This data is used to generate gt_collected_table and gt_cmps_all_table.
+ * For each query, stores the cmps value when each ground truth result was
+ * found. This data is used to generate gt_collected_table and
+ * gt_cmps_all_table.
  *
  * gt_cmps[query_id][rank] = cmps value when GT[rank] was collected
  *                         = total_cmps if GT[rank] was never found
diff --git a/src/include/zvec/core/interface/training_capable.h b/src/include/zvec/core/interface/training_capable.h
index 158cc5930..dc7452fe7 100644
--- a/src/include/zvec/core/interface/training_capable.h
+++ b/src/include/zvec/core/interface/training_capable.h
@@ -20,7 +20,8 @@ namespace zvec {
 namespace core_interface {
 
 /**
- * @brief Training capability interface for indexes that support post-build training.
+ * @brief Training capability interface for indexes that support post-build
+ * training.
  *
  * This interface follows the Capability Pattern, allowing indexes to optionally
  * provide training functionality without polluting the base Index class.
diff --git a/src/include/zvec/core/interface/training_session.h b/src/include/zvec/core/interface/training_session.h
index 102c1f5c2..6fb31e9bd 100644
--- a/src/include/zvec/core/interface/training_session.h
+++ b/src/include/zvec/core/interface/training_session.h
@@ -46,11 +46,11 @@ class ITrainingSession {
 
   virtual ~ITrainingSession() = default;
 
-  virtual zvec::Status Start(const TrainingSessionConfig& config) = 0;
+  virtual zvec::Status Start(const TrainingSessionConfig &config) = 0;
 
   virtual void BeginQuery(int query_id) = 0;
 
-  virtual void CollectQueryArtifacts(QueryTrainingArtifacts&& artifacts) = 0;
+  virtual void CollectQueryArtifacts(QueryTrainingArtifacts &&artifacts) = 0;
 
   virtual TrainingArtifacts ConsumeArtifacts() = 0;
 
diff --git a/src/include/zvec/db/index_params.h b/src/include/zvec/db/index_params.h
index fee75a1e2..61a37238d 100644
--- a/src/include/zvec/db/index_params.h
+++ b/src/include/zvec/db/index_params.h
@@ -439,11 +439,10 @@ class OmegaIndexParams : public VectorIndexParams {
       int ef_construction = core_interface::kDefaultHnswEfConstruction,
       QuantizeType quantize_type = QuantizeType::UNDEFINED,
       uint32_t min_vector_threshold = 100000,
-      size_t num_training_queries = 1000,
-      int ef_training = 1000,
-      int window_size = 100,
-      int ef_groundtruth = 0,
-      int k_train = 1)  // 0 means use brute force, >0 means use HNSW with this ef
+      size_t num_training_queries = 1000, int ef_training = 1000,
+      int window_size = 100, int ef_groundtruth = 0,
+      int k_train =
+          1)  // 0 means use brute force, >0 means use HNSW with this ef
       : VectorIndexParams(IndexType::OMEGA, metric_type, quantize_type),
         m_(m),
         ef_construction_(ef_construction),
@@ -458,11 +457,10 @@ class OmegaIndexParams : public VectorIndexParams {
 
  public:
   Ptr clone() const override {
-    return std::make_shared<OmegaIndexParams>(metric_type_, m_, ef_construction_,
-                                             quantize_type_, min_vector_threshold_,
-                                             num_training_queries_, ef_training_,
-                                             window_size_, ef_groundtruth_,
-                                             k_train_);
+    return std::make_shared<OmegaIndexParams>(
+        metric_type_, m_, ef_construction_, quantize_type_,
+        min_vector_threshold_, num_training_queries_, ef_training_,
+        window_size_, ef_groundtruth_, k_train_);
   }
 
   std::string to_string() const override {
@@ -472,10 +470,9 @@ class OmegaIndexParams : public VectorIndexParams {
     oss << base_str << ",m:" << m_ << ",ef_construction:" << ef_construction_
         << ",min_vector_threshold:" << min_vector_threshold_
         << ",num_training_queries:" << num_training_queries_
-        << ",ef_training:" << ef_training_
-        << ",window_size:" << window_size_
-        << ",ef_groundtruth:" << ef_groundtruth_
-        << ",k_train:" << k_train_ << "}";
+        << ",ef_training:" << ef_training_ << ",window_size:" << window_size_
+        << ",ef_groundtruth:" << ef_groundtruth_ << ",k_train:" << k_train_
+        << "}";
     return oss.str();
   }
 
@@ -486,18 +483,17 @@ class OmegaIndexParams : public VectorIndexParams {
            m_ == static_cast<const OmegaIndexParams &>(other).m_ &&
            ef_construction_ ==
                static_cast<const OmegaIndexParams &>(other).ef_construction_ &&
-           min_vector_threshold_ ==
-               static_cast<const OmegaIndexParams &>(other).min_vector_threshold_ &&
-           num_training_queries_ ==
-               static_cast<const OmegaIndexParams &>(other).num_training_queries_ &&
+           min_vector_threshold_ == static_cast<const OmegaIndexParams &>(other)
+                                        .min_vector_threshold_ &&
+           num_training_queries_ == static_cast<const OmegaIndexParams &>(other)
+                                        .num_training_queries_ &&
            ef_training_ ==
                static_cast<const OmegaIndexParams &>(other).ef_training_ &&
            window_size_ ==
                static_cast<const OmegaIndexParams &>(other).window_size_ &&
            ef_groundtruth_ ==
                static_cast<const OmegaIndexParams &>(other).ef_groundtruth_ &&
-           k_train_ ==
-               static_cast<const OmegaIndexParams &>(other).k_train_ &&
+           k_train_ == static_cast<const OmegaIndexParams &>(other).k_train_ &&
            quantize_type() ==
                static_cast<const OmegaIndexParams &>(other).quantize_type();
   }
diff --git a/src/include/zvec/db/query_params.h b/src/include/zvec/db/query_params.h
index 7466860fd..03b38ed68 100644
--- a/src/include/zvec/db/query_params.h
+++ b/src/include/zvec/db/query_params.h
@@ -107,9 +107,8 @@ class HnswQueryParams : public QueryParams {
 class OmegaQueryParams : public HnswQueryParams {
  public:
   OmegaQueryParams(int ef = core_interface::kDefaultHnswEfSearch,
-                   float target_recall = 0.95f,
-                   float radius = 0.0f, bool is_linear = false,
-                   bool is_using_refiner = false)
+                   float target_recall = 0.95f, float radius = 0.0f,
+                   bool is_linear = false, bool is_using_refiner = false)
       : HnswQueryParams(ef, radius, is_linear, is_using_refiner),
         target_recall_(target_recall) {
     set_type(IndexType::OMEGA);

From 07980c7ed7b2c878fb5256c1237c895d12183242 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 15:33:14 +0800
Subject: [PATCH 098/126] Update OMEGALib submodule

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 34856eee7..9dbecbce5 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 34856eee71eca823b04adfeb0b6adf13f20d3656
+Subproject commit 9dbecbce557c540b1945293d7afcc8e1c40b358f

From 90ffba505617539942a91698a148dd5316a035a3 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 15:44:05 +0800
Subject: [PATCH 099/126] Fix recursive submodule init in CI

---
 .github/workflows/04-android-build.yml | 4 +++-
 .github/workflows/scripts/run_vdb.sh   | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/04-android-build.yml b/.github/workflows/04-android-build.yml
index 5ee6fe345..63c70a5b5 100644
--- a/.github/workflows/04-android-build.yml
+++ b/.github/workflows/04-android-build.yml
@@ -20,6 +20,8 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v6
+        with:
+          submodules: recursive
 
       - name: Cache dependencies
         uses: actions/cache@v5
@@ -60,7 +62,7 @@ jobs:
       - name: Use host env to compile protoc
         shell: bash
         run: |
-          git submodule update --init
+          git submodule update --init --recursive
           if [ ! -d "build-host" ]; then
             export CCACHE_BASEDIR="$GITHUB_WORKSPACE"
             export CCACHE_NOHASHDIR=1
diff --git a/.github/workflows/scripts/run_vdb.sh b/.github/workflows/scripts/run_vdb.sh
index f153a598f..b944fc5b2 100644
--- a/.github/workflows/scripts/run_vdb.sh
+++ b/.github/workflows/scripts/run_vdb.sh
@@ -15,7 +15,7 @@ echo "workspace: $GITHUB_WORKSPACE"
 DB_LABEL_PREFIX="Zvec16c64g-$COMMIT_ID"
 
 # install zvec
-git submodule update --init
+git submodule update --init --recursive
 
 # for debug
 #cd ..
@@ -85,4 +85,4 @@ EOF
         cat prom_metrics.txt
         curl --data-binary @prom_metrics.txt "http://47.93.34.27:9091/metrics/job/benchmarks-${CASE_TYPE}/case_type/${CASE_TYPE}/quantize_type/${QUANTIZE_TYPE}" -v
     done
-done
\ No newline at end of file
+done

From 0b7309c501f83b3e06923ff7d9c87b93aa9b9538 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 15:49:01 +0800
Subject: [PATCH 100/126] Update OMEGALib for Windows UTF-8 fix

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 9dbecbce5..d4ffc7f88 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 9dbecbce557c540b1945293d7afcc8e1c40b358f
+Subproject commit d4ffc7f88bfe378f87d5f5b9254caf7c5a2b3409

From 0c6f8c495d4e527fef54d92353707101eac837d1 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 16:15:13 +0800
Subject: [PATCH 101/126] Update OMEGALib for Android network build fix

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index d4ffc7f88..6b8975a2d 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit d4ffc7f88bfe378f87d5f5b9254caf7c5a2b3409
+Subproject commit 6b8975a2d5defbf5acd42e1e78c3fdbc558c62cd

From 43b3afd2850e79cfd6b6ce283f6ee1ec9d89fc5d Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 16:21:58 +0800
Subject: [PATCH 102/126] Fix omega unit test link dependencies

---
 tests/core/algorithm/omega/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/core/algorithm/omega/CMakeLists.txt b/tests/core/algorithm/omega/CMakeLists.txt
index 72f47635e..33fcbb712 100644
--- a/tests/core/algorithm/omega/CMakeLists.txt
+++ b/tests/core/algorithm/omega/CMakeLists.txt
@@ -8,7 +8,8 @@ foreach(CC_SRCS ${ALL_TEST_SRCS})
       NAME ${CC_TARGET}
       STRICT
       LIBS zvec_ailego core_framework core_utility core_metric core_quantizer
-           core_knn_hnsw core_knn_flat core_knn_omega core_interface omega
+           core_knn_hnsw core_knn_flat core_knn_omega core_interface
+           core_mix_reducer omega
       SRCS ${CC_SRCS}
       INCS . ${CMAKE_SOURCE_DIR}/src/core ${CMAKE_SOURCE_DIR}/src/core/algorithm
            ${CMAKE_SOURCE_DIR}/thirdparty/omega/OMEGALib/include

From be84830e11a1b733dfbef93254b037ffe624e9df Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 17:09:21 +0800
Subject: [PATCH 103/126] Update OMEGALib for LightGBM stub linkage

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 6b8975a2d..1b6dcc8ac 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 6b8975a2d5defbf5acd42e1e78c3fdbc558c62cd
+Subproject commit 1b6dcc8ac729788829480b20ae892976cf825c0a

From 6607743523a94adcb14c51575fa363524ded8edf Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 17:11:24 +0800
Subject: [PATCH 104/126] Fix HNSW core utility linkage on macOS

---
 src/core/algorithm/hnsw/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/algorithm/hnsw/CMakeLists.txt b/src/core/algorithm/hnsw/CMakeLists.txt
index f4a105402..ed6b8d96d 100644
--- a/src/core/algorithm/hnsw/CMakeLists.txt
+++ b/src/core/algorithm/hnsw/CMakeLists.txt
@@ -5,7 +5,7 @@ cc_library(
     NAME core_knn_hnsw 
     STATIC SHARED STRICT ALWAYS_LINK
     SRCS *.cc
-    LIBS core_framework sparsehash
+    LIBS core_framework core_utility sparsehash
     INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
     VERSION "${PROXIMA_ZVEC_VERSION}"
   )

From 0c728dd41fa162c34bf5861e83083e89f4f0bafd Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 17:13:57 +0800
Subject: [PATCH 105/126] Tighten core target link dependencies

---
 src/core/algorithm/hnsw_rabitq/CMakeLists.txt | 4 ++--
 src/core/algorithm/hnsw_sparse/CMakeLists.txt | 2 +-
 src/core/interface/CMakeLists.txt             | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/core/algorithm/hnsw_rabitq/CMakeLists.txt b/src/core/algorithm/hnsw_rabitq/CMakeLists.txt
index ed547dc76..cb333ad84 100644
--- a/src/core/algorithm/hnsw_rabitq/CMakeLists.txt
+++ b/src/core/algorithm/hnsw_rabitq/CMakeLists.txt
@@ -15,7 +15,7 @@ cc_library(
     NAME core_knn_hnsw_rabitq
     STATIC SHARED STRICT ALWAYS_LINK
     SRCS *.cc
-    LIBS core_framework rabitqlib sparsehash
+    LIBS core_framework core_utility rabitqlib sparsehash
     INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
     VERSION "${PROXIMA_ZVEC_VERSION}"
-  )
\ No newline at end of file
+  )
diff --git a/src/core/algorithm/hnsw_sparse/CMakeLists.txt b/src/core/algorithm/hnsw_sparse/CMakeLists.txt
index fe26d10e1..38ba102a6 100644
--- a/src/core/algorithm/hnsw_sparse/CMakeLists.txt
+++ b/src/core/algorithm/hnsw_sparse/CMakeLists.txt
@@ -5,7 +5,7 @@ cc_library(
     NAME core_knn_hnsw_sparse 
     STATIC SHARED STRICT ALWAYS_LINK
     SRCS *.cc
-    LIBS core_framework sparsehash
+    LIBS core_framework core_utility sparsehash
     INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
     VERSION "${PROXIMA_ZVEC_VERSION}"
   )
diff --git a/src/core/interface/CMakeLists.txt b/src/core/interface/CMakeLists.txt
index 778f55cb3..049a702e1 100644
--- a/src/core/interface/CMakeLists.txt
+++ b/src/core/interface/CMakeLists.txt
@@ -5,6 +5,6 @@ cc_library(
         NAME core_interface STATIC STRICT ALWAYS_LINK
         SRCS *.cc indexes/*.cc
         INCS . ${PROJECT_ROOT_DIR}/src/include ${PROJECT_ROOT_DIR}/src/ ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
-        LIBS zvec_ailego core_framework sparsehash magic_enum rabitqlib
+        LIBS zvec_ailego core_framework core_mix_reducer sparsehash magic_enum rabitqlib
         VERSION "${PROXIMA_ZVEC_VERSION}"
 )

From 77f56b86474523e60b310aae6fdf9e2b410af97b Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 17:18:05 +0800
Subject: [PATCH 106/126] Update OMEGALib for stub compile flags

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 1b6dcc8ac..36882a53c 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 1b6dcc8ac729788829480b20ae892976cf825c0a
+Subproject commit 36882a53cddd583e642dacb2829d638223d7c6d8

From 527dc705d6d8e869c3e7399c41385d44880fb7c8 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 17:23:06 +0800
Subject: [PATCH 107/126] Update OMEGALib for USE_SOCKET stub fix

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 36882a53c..8d158e4ee 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 36882a53cddd583e642dacb2829d638223d7c6d8
+Subproject commit 8d158e4eeb3d37589c3c86dfd6ba30b2e38140c8

From 013795db6c8d48c60866e2555ff7b4033df83c60 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 17:27:32 +0800
Subject: [PATCH 108/126] Update OMEGALib for Linkers stub emission

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index 8d158e4ee..b7a05115b 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit 8d158e4eeb3d37589c3c86dfd6ba30b2e38140c8
+Subproject commit b7a05115b4cd82f75a7159634b5abbb9f05f8bb1

From 9258982e19c0d87657e6c731352eeb22d023126b Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 17:59:47 +0800
Subject: [PATCH 109/126] Revert OMEGALib LightGBM stub workaround

---
 thirdparty/omega/OMEGALib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index b7a05115b..d4ffc7f88 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit b7a05115b4cd82f75a7159634b5abbb9f05f8bb1
+Subproject commit d4ffc7f88bfe378f87d5f5b9254caf7c5a2b3409

From fceebbade6cdefbf50cb616661790db40f02eb31 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Thu, 2 Apr 2026 20:00:26 +0800
Subject: [PATCH 110/126] Fix MSVC UTF-8 and core interface link deps

---
 cmake/bazel.cmake                         | 6 +++---
 src/core/interface/CMakeLists.txt         | 2 +-
 tests/core/algorithm/omega/CMakeLists.txt | 2 +-
 thirdparty/omega/OMEGALib                 | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/cmake/bazel.cmake b/cmake/bazel.cmake
index 2910cc166..457669a4a 100644
--- a/cmake/bazel.cmake
+++ b/cmake/bazel.cmake
@@ -453,7 +453,7 @@ if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
       "$<$<CXX_COMPILER_ID:Clang>:-Wall;-Wextra;-Wshadow>"
       "$<$<CXX_COMPILER_ID:AppleClang>:-Wall;-Wextra;-Wshadow>"
       "$<$<CXX_COMPILER_ID:GNU>:-Wall;-Wextra;-Wshadow-local;-Wno-misleading-indentation>"
-      "$<$<CXX_COMPILER_ID:MSVC>:/W4>"
+      "$<$<CXX_COMPILER_ID:MSVC>:/W4;/utf-8>"
       ${BAZEL_CC_ASAN_COMPILE_FLAGS}
       ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}
     )
@@ -463,7 +463,7 @@ else()
       "$<$<CXX_COMPILER_ID:Clang>:-Wall;-Wextra;-Wshadow>"
       "$<$<CXX_COMPILER_ID:AppleClang>:-Wall;-Wextra;-Wshadow>"
       "$<$<CXX_COMPILER_ID:GNU>:-Wall;-Wextra;-Wshadow;-Wno-misleading-indentation>"
-      "$<$<CXX_COMPILER_ID:MSVC>:/W4>"
+      "$<$<CXX_COMPILER_ID:MSVC>:/W4;/utf-8>"
       ${BAZEL_CC_ASAN_COMPILE_FLAGS}
       ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}
     )
@@ -484,7 +484,7 @@ set(
     "$<$<CXX_COMPILER_ID:Clang>:-Wall>"
     "$<$<CXX_COMPILER_ID:AppleClang>:-Wall>"
     "$<$<CXX_COMPILER_ID:GNU>:-Wall>"
-    "$<$<CXX_COMPILER_ID:MSVC>:/W3>"
+    "$<$<CXX_COMPILER_ID:MSVC>:/W3;/utf-8>"
     ${BAZEL_CC_ASAN_COMPILE_FLAGS}
     ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}
   )
diff --git a/src/core/interface/CMakeLists.txt b/src/core/interface/CMakeLists.txt
index 049a702e1..a6e568d6a 100644
--- a/src/core/interface/CMakeLists.txt
+++ b/src/core/interface/CMakeLists.txt
@@ -5,6 +5,6 @@ cc_library(
         NAME core_interface STATIC STRICT ALWAYS_LINK
         SRCS *.cc indexes/*.cc
         INCS . ${PROJECT_ROOT_DIR}/src/include ${PROJECT_ROOT_DIR}/src/ ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
-        LIBS zvec_ailego core_framework core_mix_reducer sparsehash magic_enum rabitqlib
+        LIBS zvec_ailego core_framework core_mix_reducer core_knn_omega core_knn_hnsw_rabitq sparsehash magic_enum rabitqlib
         VERSION "${PROXIMA_ZVEC_VERSION}"
 )
diff --git a/tests/core/algorithm/omega/CMakeLists.txt b/tests/core/algorithm/omega/CMakeLists.txt
index 33fcbb712..53d5756e2 100644
--- a/tests/core/algorithm/omega/CMakeLists.txt
+++ b/tests/core/algorithm/omega/CMakeLists.txt
@@ -8,7 +8,7 @@ foreach(CC_SRCS ${ALL_TEST_SRCS})
       NAME ${CC_TARGET}
       STRICT
       LIBS zvec_ailego core_framework core_utility core_metric core_quantizer
-           core_knn_hnsw core_knn_flat core_knn_omega core_interface
+           core_knn_hnsw core_knn_hnsw_rabitq core_knn_flat core_knn_omega core_interface
            core_mix_reducer omega
       SRCS ${CC_SRCS}
       INCS . ${CMAKE_SOURCE_DIR}/src/core ${CMAKE_SOURCE_DIR}/src/core/algorithm
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index d4ffc7f88..e0c5d0f7d 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit d4ffc7f88bfe378f87d5f5b9254caf7c5a2b3409
+Subproject commit e0c5d0f7d75ca9b7396ed046fd37893872b65b9b

From 5f36f3cd16bb35f3b8cb341fa01c7499d36c5fb0 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 3 Apr 2026 00:02:59 +0800
Subject: [PATCH 111/126] Fix omega build and optimize regressions

---
 src/core/algorithm/hnsw_rabitq/CMakeLists.txt |  2 +-
 .../mixed_reducer/mixed_streamer_reducer.cc   | 47 ++++++++++-----
 src/db/collection.cc                          | 58 ++-----------------
 src/db/index/CMakeLists.txt                   | 10 +++-
 .../interface/omega_training_session_test.cc  | 24 ++++++--
 tests/db/sqlengine/mock_segment.h             |  4 ++
 thirdparty/omega/OMEGALib                     |  2 +-
 7 files changed, 72 insertions(+), 75 deletions(-)

diff --git a/src/core/algorithm/hnsw_rabitq/CMakeLists.txt b/src/core/algorithm/hnsw_rabitq/CMakeLists.txt
index cb333ad84..e3da57dbd 100644
--- a/src/core/algorithm/hnsw_rabitq/CMakeLists.txt
+++ b/src/core/algorithm/hnsw_rabitq/CMakeLists.txt
@@ -15,7 +15,7 @@ cc_library(
     NAME core_knn_hnsw_rabitq
     STATIC SHARED STRICT ALWAYS_LINK
     SRCS *.cc
-    LIBS core_framework core_utility rabitqlib sparsehash
+    LIBS core_framework core_utility core_knn_cluster rabitqlib sparsehash
     INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
     VERSION "${PROXIMA_ZVEC_VERSION}"
   )
diff --git a/src/core/mixed_reducer/mixed_streamer_reducer.cc b/src/core/mixed_reducer/mixed_streamer_reducer.cc
index 1f2b58ea4..5c2c07776 100644
--- a/src/core/mixed_reducer/mixed_streamer_reducer.cc
+++ b/src/core/mixed_reducer/mixed_streamer_reducer.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "mixed_streamer_reducer.h"
+#include <algorithm>
 #include <ailego/pattern/defer.h>
 #include <utility/sparse_utility.h>
 #include <zvec/ailego/utility/file_helper.h>
@@ -141,7 +142,8 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
   ailego::ElapsedTime timer;
 
 
-  std::vector<int> add_results(num_of_add_threads_, -1);
+  const size_t add_thread_count = enable_pk_rewrite_ ? 1 : num_of_add_threads_;
+  std::vector<int> add_results(add_thread_count, -1);
   auto add_group = thread_pool_->make_group();
 
   std::vector<int> read_results(streamers_.size(), -1);
@@ -149,7 +151,7 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
   uint32_t id_offset = 0, next_id = 0;
 
   if (is_sparse_) {
-    for (size_t i = 0; i < num_of_add_threads_; i++) {
+    for (size_t i = 0; i < add_thread_count; i++) {
       add_group->submit(ailego::Closure::New(
           this, &MixedStreamerReducer::add_sparse_vec, &add_results[i]));
     }
@@ -162,7 +164,7 @@ int MixedStreamerReducer::reduce(const IndexFilter &filter) {
 
     sparse_mt_list_.done();
   } else {
-    for (size_t i = 0; i < num_of_add_threads_; i++) {
+    for (size_t i = 0; i < add_thread_count; i++) {
       add_group->submit(ailego::Closure::New(
           this, &MixedStreamerReducer::add_vec, &add_results[i]));
       // add_vec(&add_results[i]);
@@ -304,6 +306,7 @@ int MixedStreamerReducer::read_vec(size_t source_streamer_index,
 
   IndexProvider::Pointer provider = streamer->create_provider();
   IndexProvider::Iterator::Pointer iterator = provider->create_iterator();
+  std::vector<std::pair<uint32_t, std::vector<uint8_t>>> pending_items;
 
   while (iterator->is_valid()) {
     if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {
@@ -332,13 +335,19 @@ int MixedStreamerReducer::read_vec(size_t source_streamer_index,
       memcpy(bytes.data(), iterator->data(), bytes.size());
     }
 
-    // TODO: use id instead of key
-    if (!mt_list_.produce(VectorItem((*next_id)++, std::move(bytes)))) {
-      LOG_ERROR("Produce vector to queue failed. key[%lu]",
-                (size_t)iterator->key());
+    pending_items.emplace_back(iterator->key() + id_offset, std::move(bytes));
+    iterator->next();
+  }
+
+  std::sort(pending_items.begin(), pending_items.end(),
+            [](const auto &lhs, const auto &rhs) {
+              return lhs.first < rhs.first;
+            });
+  for (auto &item : pending_items) {
+    if (!mt_list_.produce(VectorItem((*next_id)++, std::move(item.second)))) {
+      LOG_ERROR("Produce vector to queue failed. key[%u]", item.first);
       return IndexError_Runtime;
     }
-    iterator->next();
   }
   return 0;
 }
@@ -508,6 +517,7 @@ int MixedStreamerReducer::read_sparse_vec(size_t source_streamer_index,
       streamer->create_sparse_provider();
   IndexStreamer::SparseProvider::Iterator::Pointer iterator =
       provider->create_iterator();
+  std::vector<SparseVectorItem> pending_items;
 
   while (iterator->is_valid()) {
     if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {
@@ -547,15 +557,24 @@ int MixedStreamerReducer::read_sparse_vec(size_t source_streamer_index,
     memcpy(sparse_indices.data(), iterator->sparse_indices(),
            sparse_indices.size() * sizeof(uint32_t));
 
-    // TODO: use id instead of key
-    if (!sparse_mt_list_.produce(SparseVectorItem((*next_id)++,
-                                                  std::move(sparse_indices),
-                                                  std::move(sparse_values)))) {
+    pending_items.emplace_back(iterator->key() + id_offset,
+                               std::move(sparse_indices),
+                               std::move(sparse_values));
+    iterator->next();
+  }
+
+  std::sort(pending_items.begin(), pending_items.end(),
+            [](const SparseVectorItem &lhs, const SparseVectorItem &rhs) {
+              return lhs.pkey_ < rhs.pkey_;
+            });
+  for (auto &item : pending_items) {
+    if (!sparse_mt_list_.produce(SparseVectorItem(
+            (*next_id)++, std::move(item.sparse_indices_),
+            std::move(item.sparse_values_)))) {
       LOG_ERROR("Produce vector to queue failed. key[%lu]",
-                (size_t)iterator->key());
+                static_cast<size_t>(item.pkey_));
       return IndexError_Runtime;
     }
-    iterator->next();
   }
   return 0;
 }
diff --git a/src/db/collection.cc b/src/db/collection.cc
index 98162c83f..792c1cda6 100644
--- a/src/db/collection.cc
+++ b/src/db/collection.cc
@@ -834,60 +834,10 @@ Status CollectionImpl::Optimize(const OptimizeOptions &options) {
     return Status::OK();
   }
 
-  // Step 1: Build vector indexes if not ready
-  // This ensures indexes are built even for single segments that won't be
-  // compacted
-  std::vector<SegmentTask::Ptr> index_build_tasks;
-  for (auto &segment : persist_segments) {
-    if (!segment->all_vector_index_ready()) {
-      // Build all vector indexes for this segment
-      index_build_tasks.push_back(SegmentTask::CreateCreateVectorIndexTask(
-          CreateVectorIndexTask{segment, "", nullptr, options.concurrency_}));
-    }
-  }
-
-  if (!index_build_tasks.empty()) {
-    LOG_INFO("Building vector indexes for %zu segments",
-             index_build_tasks.size());
-    auto s = execute_tasks(index_build_tasks);
-    CHECK_RETURN_STATUS(s);
-
-    // Update segment metadata
-    std::lock_guard write_lock(write_mtx_);
-    Version new_version = version_manager_->get_current_version();
-
-    for (auto &task : index_build_tasks) {
-      auto task_info = task->task_info();
-      if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {
-        auto create_index_task = std::get<CreateVectorIndexTask>(task_info);
-        s = new_version.update_persisted_segment_meta(
-            create_index_task.output_segment_meta_);
-        CHECK_RETURN_STATUS(s);
-      }
-    }
-
-    s = version_manager_->apply(new_version);
-    CHECK_RETURN_STATUS(s);
-    s = version_manager_->flush();
-    CHECK_RETURN_STATUS(s);
-
-    // Reload indexes in segments
-    for (auto &task : index_build_tasks) {
-      auto task_info = task->task_info();
-      if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {
-        auto create_index_task = std::get<CreateVectorIndexTask>(task_info);
-        s = create_index_task.input_segment_->reload_vector_index(
-            *schema_, create_index_task.output_segment_meta_,
-            create_index_task.output_vector_indexers_,
-            create_index_task.output_quant_vector_indexers_);
-        CHECK_RETURN_STATUS(s);
-      }
-    }
-
-    LOG_INFO("Completed building vector indexes");
-  }
-
-  // Step 2: build segment compact task
+  // Build optimize tasks once so compacted segments are merged directly from
+  // their current per-segment sources. Pre-building filtered vector indexes for
+  // every persisted segment would shift source row ids before compaction and
+  // break alignment with scalar row-id filters.
   auto delete_store_clone = delete_store_->clone();
   auto tasks =
       build_compact_task(schema_, persist_segments, options.concurrency_,
diff --git a/src/db/index/CMakeLists.txt b/src/db/index/CMakeLists.txt
index 4420050e6..01e5a0661 100644
--- a/src/db/index/CMakeLists.txt
+++ b/src/db/index/CMakeLists.txt
@@ -3,11 +3,19 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
 
 cc_library(
     NAME zvec_index STATIC STRICT
-    SRCS *.cc segment/*.cc column/vector_column/*.cc column/inverted_column/*.cc storage/*.cc storage/wal/*.cc common/*.cc
+    SRCS *.cc
+         segment/*.cc
+         column/vector_column/*.cc
+         column/inverted_column/*.cc
+         storage/*.cc
+         storage/wal/*.cc
+         common/*.cc
+         ../training/*.cc
     LIBS zvec_common
          zvec_proto
          rocksdb
          core_interface
+         omega
          Arrow::arrow_static
          Arrow::arrow_compute
          Arrow::arrow_dataset
diff --git a/tests/core/interface/omega_training_session_test.cc b/tests/core/interface/omega_training_session_test.cc
index 450c4532a..b75c57df5 100644
--- a/tests/core/interface/omega_training_session_test.cc
+++ b/tests/core/interface/omega_training_session_test.cc
@@ -34,15 +34,31 @@ TEST(OmegaTrainingSessionTest, ConsumeArtifactsAggregatesRecordsAndGtCmps) {
   first.training_query_id = 0;
   first.total_cmps = 13;
   first.gt_cmps_per_rank = {3, 7, 11};
-  first.records.push_back(
-      TrainingRecord{0, 1, 3, 0.1f, 0.2f, std::vector<float>(7, 1.0f), 1});
+  TrainingRecord first_record;
+  first_record.query_id = 0;
+  first_record.hops_visited = 1;
+  first_record.cmps_visited = 3;
+  first_record.dist_1st = 0.1f;
+  first_record.dist_start = 0.2f;
+  first_record.traversal_window_stats = {1.0f, 1.0f, 1.0f, 1.0f,
+                                         1.0f, 1.0f, 1.0f};
+  first_record.label = 1;
+  first.records.push_back(first_record);
 
   QueryTrainingArtifacts second;
   second.training_query_id = 2;
   second.total_cmps = 21;
   second.gt_cmps_per_rank = {5, 9, 15};
-  second.records.push_back(
-      TrainingRecord{2, 4, 8, 0.3f, 0.4f, std::vector<float>(7, 2.0f), 0});
+  TrainingRecord second_record;
+  second_record.query_id = 2;
+  second_record.hops_visited = 4;
+  second_record.cmps_visited = 8;
+  second_record.dist_1st = 0.3f;
+  second_record.dist_start = 0.4f;
+  second_record.traversal_window_stats = {2.0f, 2.0f, 2.0f, 2.0f,
+                                          2.0f, 2.0f, 2.0f};
+  second_record.label = 0;
+  second.records.push_back(second_record);
 
   session.CollectQueryArtifacts(std::move(first));
   session.CollectQueryArtifacts(std::move(second));
diff --git a/tests/db/sqlengine/mock_segment.h b/tests/db/sqlengine/mock_segment.h
index 6b46e2385..87e5368ab 100644
--- a/tests/db/sqlengine/mock_segment.h
+++ b/tests/db/sqlengine/mock_segment.h
@@ -504,6 +504,10 @@ class MockSegment : public Segment {
     return Status::OK();
   }
 
+  Status retrain_omega_model() override {
+    return Status::OK();
+  }
+
   Status destroy() override {
     return Status::OK();
   }
diff --git a/thirdparty/omega/OMEGALib b/thirdparty/omega/OMEGALib
index e0c5d0f7d..4c530aa16 160000
--- a/thirdparty/omega/OMEGALib
+++ b/thirdparty/omega/OMEGALib
@@ -1 +1 @@
-Subproject commit e0c5d0f7d75ca9b7396ed046fd37893872b65b9b
+Subproject commit 4c530aa16fc232ed9c52cb9c85ffeb67b30b6c87

From 7dfd196652eb4eb49a65cc0ed44852e567dfe86a Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 3 Apr 2026 00:21:58 +0800
Subject: [PATCH 112/126] Format omega integration fixes

---
 src/core/mixed_reducer/mixed_streamer_reducer.cc    | 13 ++++++-------
 tests/core/interface/omega_training_session_test.cc |  5 +++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/core/mixed_reducer/mixed_streamer_reducer.cc b/src/core/mixed_reducer/mixed_streamer_reducer.cc
index 5c2c07776..19b9e3fa2 100644
--- a/src/core/mixed_reducer/mixed_streamer_reducer.cc
+++ b/src/core/mixed_reducer/mixed_streamer_reducer.cc
@@ -339,10 +339,9 @@ int MixedStreamerReducer::read_vec(size_t source_streamer_index,
     iterator->next();
   }
 
-  std::sort(pending_items.begin(), pending_items.end(),
-            [](const auto &lhs, const auto &rhs) {
-              return lhs.first < rhs.first;
-            });
+  std::sort(
+      pending_items.begin(), pending_items.end(),
+      [](const auto &lhs, const auto &rhs) { return lhs.first < rhs.first; });
   for (auto &item : pending_items) {
     if (!mt_list_.produce(VectorItem((*next_id)++, std::move(item.second)))) {
       LOG_ERROR("Produce vector to queue failed. key[%u]", item.first);
@@ -568,9 +567,9 @@ int MixedStreamerReducer::read_sparse_vec(size_t source_streamer_index,
               return lhs.pkey_ < rhs.pkey_;
             });
   for (auto &item : pending_items) {
-    if (!sparse_mt_list_.produce(SparseVectorItem(
-            (*next_id)++, std::move(item.sparse_indices_),
-            std::move(item.sparse_values_)))) {
+    if (!sparse_mt_list_.produce(
+            SparseVectorItem((*next_id)++, std::move(item.sparse_indices_),
+                             std::move(item.sparse_values_)))) {
       LOG_ERROR("Produce vector to queue failed. key[%lu]",
                 static_cast<size_t>(item.pkey_));
       return IndexError_Runtime;
diff --git a/tests/core/interface/omega_training_session_test.cc b/tests/core/interface/omega_training_session_test.cc
index b75c57df5..fceaa2889 100644
--- a/tests/core/interface/omega_training_session_test.cc
+++ b/tests/core/interface/omega_training_session_test.cc
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <gtest/gtest.h>
 #include "core/interface/indexes/omega_training_session.h"
+#include <gtest/gtest.h>
 
 namespace zvec::core_interface {
 
@@ -88,7 +88,8 @@ TEST(OmegaTrainingSessionTest, ConsumeArtifactsAggregatesRecordsAndGtCmps) {
   EXPECT_TRUE(drained.gt_cmps_data.gt_cmps.empty());
 }
 
-TEST(OmegaTrainingSessionTest, ConsumeArtifactsUsesConfiguredShapeWhenAvailable) {
+TEST(OmegaTrainingSessionTest,
+     ConsumeArtifactsUsesConfiguredShapeWhenAvailable) {
   OmegaTrainingSession session(nullptr);
 
   QueryTrainingArtifacts only;

From 63bc93ba6d5901c9be364747ea3fa8442773a3d6 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 3 Apr 2026 01:24:27 +0800
Subject: [PATCH 113/126] Fix example linking and rename steady timer

---
 examples/c++/CMakeLists.txt                   | 16 ++++
 examples/c/CMakeLists.txt                     | 14 +++-
 src/core/algorithm/hnsw/hnsw_streamer.cc      | 16 ++--
 src/core/utility/rdtsc_timer.cc               | 75 -------------------
 src/core/utility/steady_clock_timer.cc        | 31 ++++++++
 .../{rdtsc_timer.h => steady_clock_timer.h}   | 20 ++---
 6 files changed, 73 insertions(+), 99 deletions(-)
 delete mode 100644 src/core/utility/rdtsc_timer.cc
 create mode 100644 src/core/utility/steady_clock_timer.cc
 rename src/core/utility/{rdtsc_timer.h => steady_clock_timer.h} (68%)

diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index 13b8d7c59..bcb8f74f8 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -33,10 +33,18 @@ endif()
 
 # --- Dependency groups ---
 find_package(Threads REQUIRED)
+find_package(OpenMP QUIET)
 
+set(zvec_openmp_deps)
+if(OpenMP_FOUND)
+    list(APPEND zvec_openmp_deps OpenMP::OpenMP_CXX)
+endif()
 
 set(zvec_core_deps
     zvec_turbo
+    omega
+    _lightgbm
+    ${zvec_openmp_deps}
 )
 
 if (NOT WIN32)
@@ -52,6 +60,8 @@ if (NOT WIN32)
     set(zvec_db_deps
             roaring
             rocksdb
+            omega
+            _lightgbm
             arrow
             arrow_acero
             arrow_bundled_dependencies
@@ -63,6 +73,7 @@ if (NOT WIN32)
             ${GFLAGS_LIB}
             ${PROTOBUF_LIB}
             lz4
+            ${zvec_openmp_deps}
     )
 else ()
     # Windows static libraries use different naming conventions
@@ -77,6 +88,8 @@ else ()
     set(zvec_db_deps
             roaring
             rocksdb
+            omega
+            _lightgbm
             arrow_static
             arrow_acero_static
             arrow_bundled_dependencies
@@ -90,6 +103,7 @@ else ()
             lz4
             rpcrt4
             shlwapi
+            ${zvec_openmp_deps}
     )
 endif ()
 
@@ -124,6 +138,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 elseif(APPLE)
     target_link_libraries(zvec-core INTERFACE
         -Wl,-force_load ${ZVEC_LIB_DIR}/libzvec_core.a
+        -Wl,-force_load ${ZVEC_LIB_DIR}/libomega.a
         zvec-ailego
         ${zvec_core_deps}
     )
@@ -161,6 +176,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
     )
 elseif(APPLE)
     target_link_libraries(zvec-db INTERFACE
+        -Wl,-force_load ${ZVEC_LIB_DIR}/libzvec_db.a
         zvec_db
         zvec-core
         zvec-ailego
diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt
index 5edb881a6..9c82576e4 100644
--- a/examples/c/CMakeLists.txt
+++ b/examples/c/CMakeLists.txt
@@ -45,6 +45,12 @@ link_directories(${ZVEC_LIB_DIR} ${ZVEC_DEPENDENCY_LIB_DIR})
 
 # Find required packages
 find_package(Threads REQUIRED)
+find_package(OpenMP QUIET)
+
+set(zvec_openmp_deps)
+if(OpenMP_FOUND)
+    list(APPEND zvec_openmp_deps OpenMP::OpenMP_CXX)
+endif()
 
 # --- Determine debug/release library names ---
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -62,6 +68,8 @@ if(NOT WIN32)
     set(zvec_c_api_deps
         roaring
         rocksdb
+        omega
+        _lightgbm
         arrow
         arrow_acero
         arrow_bundled_dependencies
@@ -73,6 +81,7 @@ if(NOT WIN32)
         ${GFLAGS_LIB}
         ${PROTOBUF_LIB}
         lz4
+        ${zvec_openmp_deps}
         ${CMAKE_THREAD_LIBS_INIT}
         ${CMAKE_DL_LIBS}
     )
@@ -82,6 +91,8 @@ else()
     set(zvec_c_api_deps
         roaring
         rocksdb
+        omega
+        _lightgbm
         arrow_static
         arrow_acero_static
         arrow_bundled_dependencies
@@ -93,6 +104,7 @@ else()
         ${GFLAGS_LIB}
         ${PROTOBUF_LIB}
         lz4
+        ${zvec_openmp_deps}
         ${CMAKE_THREAD_LIBS_INIT}
         rpcrt4
         shlwapi
@@ -206,4 +218,4 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID)
     set_property(TARGET c_api_basic_example c_api_collection_schema_example c_api_doc_example
                  c_api_index_example c_api_field_schema_example c_api_optimized_example
                  PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
-endif()
\ No newline at end of file
+endif()
diff --git a/src/core/algorithm/hnsw/hnsw_streamer.cc b/src/core/algorithm/hnsw/hnsw_streamer.cc
index 0bfd24c51..35ac4a6ab 100644
--- a/src/core/algorithm/hnsw/hnsw_streamer.cc
+++ b/src/core/algorithm/hnsw/hnsw_streamer.cc
@@ -18,8 +18,8 @@
 #include <ailego/internal/cpu_features.h>
 #include <ailego/pattern/defer.h>
 #include <ailego/utility/memory_helper.h>
-#include "utility/rdtsc_timer.h"
 #include "utility/sparse_utility.h"
+#include "utility/steady_clock_timer.h"
 #include "hnsw_algorithm.h"
 #include "hnsw_context.h"
 #include "hnsw_dist_calculator.h"
@@ -680,9 +680,9 @@ int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
   const bool use_empty_hooks = UseEmptyHnswHooks();
   HnswAlgorithm::SearchHooks empty_hooks;
   for (size_t q = 0; q < count; ++q) {
-    auto query_start = RdtscTimer::Now();
+    auto query_start = SteadyClockTimer::Now();
     ctx->reset_query(query);
-    auto query_search_start = RdtscTimer::Now();
+    auto query_search_start = SteadyClockTimer::Now();
     if (use_empty_hooks) {
       bool stopped_early = false;
       ret = alg_->search_with_hooks(ctx, &empty_hooks, &stopped_early);
@@ -693,11 +693,11 @@ int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
       LOG_ERROR("Hnsw searcher fast search failed");
       return ret;
     }
-    auto query_search_end = RdtscTimer::Now();
+    auto query_search_end = SteadyClockTimer::Now();
     auto query_search_time_ns =
-        RdtscTimer::ElapsedNs(query_search_start, query_search_end);
+        SteadyClockTimer::ElapsedNs(query_search_start, query_search_end);
     auto query_latency_ns =
-        RdtscTimer::ElapsedNs(query_start, RdtscTimer::Now());
+        SteadyClockTimer::ElapsedNs(query_start, SteadyClockTimer::Now());
     uint64_t query_seq = HnswQueryStatsSequence().fetch_add(1);
     if (ShouldLogHnswQueryStats(query_seq)) {
       LOG_INFO(
@@ -811,7 +811,7 @@ int HnswStreamer::search_bf_impl(
     auto &topk = ctx->topk_heap();
 
     for (size_t q = 0; q < count; ++q) {
-      auto query_start = RdtscTimer::Now();
+      auto query_start = SteadyClockTimer::Now();
       ctx->reset_query(query);
       topk.clear();
       for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
@@ -825,7 +825,7 @@ int HnswStreamer::search_bf_impl(
         }
       }
       auto query_latency_ns =
-          RdtscTimer::ElapsedNs(query_start, RdtscTimer::Now());
+          SteadyClockTimer::ElapsedNs(query_start, SteadyClockTimer::Now());
       uint64_t query_seq = HnswQueryStatsSequence().fetch_add(1);
       if (ShouldLogHnswQueryStats(query_seq)) {
         LOG_INFO(
diff --git a/src/core/utility/rdtsc_timer.cc b/src/core/utility/rdtsc_timer.cc
deleted file mode 100644
index d4e54c8a8..000000000
--- a/src/core/utility/rdtsc_timer.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2025-present the zvec project
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "utility/rdtsc_timer.h"
-#include <ctime>
-
-namespace zvec {
-namespace core {
-
-RdtscTimer::tick_t RdtscTimer::Now() {
-#if ZVEC_CORE_HAS_TSC
-  uint32_t lo = 0;
-  uint32_t hi = 0;
-  uint32_t aux = 0;
-  __asm__ __volatile__("rdtscp" : "=a"(lo), "=d"(hi), "=c"(aux) : :);
-  return (static_cast<uint64_t>(hi) << 32) | lo;
-#else
-  return MonotonicRawNs();
-#endif
-}
-
-uint64_t RdtscTimer::ElapsedNs(tick_t start, tick_t end) {
-#if ZVEC_CORE_HAS_TSC
-  if (end <= start) {
-    return 0;
-  }
-  return static_cast<uint64_t>(static_cast<double>(end - start) * NsPerTick());
-#else
-  return end > start ? (end - start) : 0;
-#endif
-}
-
-uint64_t RdtscTimer::MonotonicRawNs() {
-  struct timespec ts {};
-  clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
-  return static_cast<uint64_t>(ts.tv_sec) * 1000000000ull +
-         static_cast<uint64_t>(ts.tv_nsec);
-}
-
-double RdtscTimer::NsPerTick() {
-  static const double ns_per_tick = CalibrateNsPerTick();
-  return ns_per_tick;
-}
-
-double RdtscTimer::CalibrateNsPerTick() {
-  constexpr uint64_t kMinCalibrationNs = 5 * 1000 * 1000;
-  const uint64_t start_ns = MonotonicRawNs();
-  const tick_t start_tick = Now();
-
-  uint64_t end_ns = start_ns;
-  while (end_ns - start_ns < kMinCalibrationNs) {
-    end_ns = MonotonicRawNs();
-  }
-
-  const tick_t end_tick = Now();
-  if (end_tick <= start_tick) {
-    return 1.0;
-  }
-  return static_cast<double>(end_ns - start_ns) /
-         static_cast<double>(end_tick - start_tick);
-}
-
-}  // namespace core
-}  // namespace zvec
diff --git a/src/core/utility/steady_clock_timer.cc b/src/core/utility/steady_clock_timer.cc
new file mode 100644
index 000000000..7232e6166
--- /dev/null
+++ b/src/core/utility/steady_clock_timer.cc
@@ -0,0 +1,31 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "utility/steady_clock_timer.h"
+
+namespace zvec {
+namespace core {
+
+SteadyClockTimer::tick_t SteadyClockTimer::Now() {
+  const auto now = std::chrono::steady_clock::now().time_since_epoch();
+  return static_cast<tick_t>(
+      std::chrono::duration_cast<std::chrono::nanoseconds>(now).count());
+}
+
+uint64_t SteadyClockTimer::ElapsedNs(tick_t start, tick_t end) {
+  return end > start ? (end - start) : 0;
+}
+
+}  // namespace core
+}  // namespace zvec
diff --git a/src/core/utility/rdtsc_timer.h b/src/core/utility/steady_clock_timer.h
similarity index 68%
rename from src/core/utility/rdtsc_timer.h
rename to src/core/utility/steady_clock_timer.h
index 4d244af9b..96da46b0b 100644
--- a/src/core/utility/rdtsc_timer.h
+++ b/src/core/utility/steady_clock_timer.h
@@ -12,34 +12,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ZVEC_CORE_UTILITY_RDTSC_TIMER_H_
-#define ZVEC_CORE_UTILITY_RDTSC_TIMER_H_
+#ifndef ZVEC_CORE_UTILITY_STEADY_CLOCK_TIMER_H_
+#define ZVEC_CORE_UTILITY_STEADY_CLOCK_TIMER_H_
 
+#include <chrono>
 #include <cstdint>
 
-#if defined(__x86_64__) || defined(__i386__)
-#define ZVEC_CORE_HAS_TSC 1
-#else
-#define ZVEC_CORE_HAS_TSC 0
-#endif
-
 namespace zvec {
 namespace core {
 
-class RdtscTimer {
+class SteadyClockTimer {
  public:
   using tick_t = uint64_t;
 
   static tick_t Now();
   static uint64_t ElapsedNs(tick_t start, tick_t end);
-
- private:
-  static uint64_t MonotonicRawNs();
-  static double NsPerTick();
-  static double CalibrateNsPerTick();
 };
 
 }  // namespace core
 }  // namespace zvec
 
-#endif  // ZVEC_CORE_UTILITY_RDTSC_TIMER_H_
+#endif  // ZVEC_CORE_UTILITY_STEADY_CLOCK_TIMER_H_

From c045298c7515597cb04945ad6508e8d33f0cfe08 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 3 Apr 2026 02:44:53 +0800
Subject: [PATCH 114/126] Fix macOS examples and C++17 training init

---
 examples/c++/CMakeLists.txt                |  2 +-
 src/db/training/training_data_collector.cc | 10 ++++------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index bcb8f74f8..27ab0ed67 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -138,7 +138,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 elseif(APPLE)
     target_link_libraries(zvec-core INTERFACE
         -Wl,-force_load ${ZVEC_LIB_DIR}/libzvec_core.a
-        -Wl,-force_load ${ZVEC_LIB_DIR}/libomega.a
+        -Wl,-force_load ${ZVEC_DEPENDENCY_LIB_DIR}/libomega.a
         zvec-ailego
         ${zvec_core_deps}
     )
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 939deb504..883b56de2 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -234,8 +234,7 @@ TrainingDataCollector::CollectTrainingDataFromQueriesImpl(
 
         vector_column_params::VectorData vector_data;
         vector_data.vector = vector_column_params::DenseVector{
-            .data = const_cast<void *>(
-                static_cast<const void *>(query_vector.data()))};
+            const_cast<void *>(static_cast<const void *>(query_vector.data()))};
 
         vector_column_params::QueryParams query_params;
         query_params.topk = options.topk;
@@ -445,8 +444,8 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
                ++i) {
             size_t q_idx = start_idx + i;
             vector_column_params::VectorData vector_data;
-            vector_data.vector = vector_column_params::DenseVector{
-                .data = const_cast<void *>(
+            vector_data.vector =
+                vector_column_params::DenseVector{const_cast<void *>(
                     static_cast<const void *>(queries[q_idx].data()))};
 
             vector_column_params::QueryParams query_params;
@@ -509,8 +508,7 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
           // Prepare query parameters (exactly same as training searches)
           vector_column_params::VectorData vector_data;
           vector_data.vector = vector_column_params::DenseVector{
-              .data = const_cast<void *>(
-                  static_cast<const void *>(queries[q].data()))};
+              const_cast<void *>(static_cast<const void *>(queries[q].data()))};
 
           vector_column_params::QueryParams query_params;
           query_params.topk = actual_topk;

From 02246e38a09ae06bf16265d39a83106c78e4ecca Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 3 Apr 2026 13:16:15 +0800
Subject: [PATCH 115/126] Keep omega binding registration linked

---
 src/binding/python/CMakeLists.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/binding/python/CMakeLists.txt b/src/binding/python/CMakeLists.txt
index 7e5169176..f0136902e 100644
--- a/src/binding/python/CMakeLists.txt
+++ b/src/binding/python/CMakeLists.txt
@@ -21,6 +21,7 @@ pybind11_add_module(_zvec ${SRC_LISTS})
 if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
     target_link_libraries(_zvec PRIVATE
             -Wl,--whole-archive
+            $<TARGET_FILE:core_knn_omega_static>
             $<TARGET_FILE:core_knn_flat_static>
             $<TARGET_FILE:core_knn_flat_sparse_static>
             $<TARGET_FILE:core_knn_hnsw_static>
@@ -40,6 +41,7 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
     )
 elseif (APPLE)
     target_link_libraries(_zvec PRIVATE
+            -Wl,-force_load,$<TARGET_FILE:core_knn_omega_static>
             -Wl,-force_load,$<TARGET_FILE:core_knn_flat_static>
             -Wl,-force_load,$<TARGET_FILE:core_knn_flat_sparse_static>
             -Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_static>
@@ -58,6 +60,7 @@ elseif (APPLE)
     )
 elseif (MSVC)
     set(_zvec_whole_archive_libs
+            core_knn_omega_static
             core_knn_flat_static
             core_knn_flat_sparse_static
             core_knn_hnsw_static
@@ -80,4 +83,4 @@ elseif (MSVC)
     endforeach()
 endif ()
 
-target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include)
\ No newline at end of file
+target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include)

From 346d8b61ac91ca0c0ead6db66aee7508f54d822e Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 3 Apr 2026 18:07:20 +0800
Subject: [PATCH 116/126] Gate omega build paths and tests by platform/config

---
 CMakeLists.txt                               |  9 +-
 examples/c++/CMakeLists.txt                  | 35 ++++++--
 examples/c/CMakeLists.txt                    | 24 +++++-
 python/tests/test_collection.py              | 22 +++++
 python/tests/test_params.py                  | 17 ++++
 src/binding/python/CMakeLists.txt            | 16 +++-
 src/core/CMakeLists.txt                      | 10 ++-
 src/core/algorithm/CMakeLists.txt            | 18 +++-
 src/core/interface/CMakeLists.txt            | 33 +++++++-
 src/core/interface/index_factory.cc          |  5 ++
 src/core/interface/indexes/hnsw_index.cc     |  4 +
 src/db/CMakeLists.txt                        | 11 ++-
 src/db/index/CMakeLists.txt                  | 39 ++++++---
 src/db/index/common/schema.cc                |  6 ++
 src/db/training/omega_training_coordinator.h | 38 +++++++++
 src/db/training/training_data_collector.cc   | 86 ++++++++++++++++++++
 tests/core/algorithm/CMakeLists.txt          |  2 +
 tests/core/interface/CMakeLists.txt          | 21 ++++-
 thirdparty/CMakeLists.txt                    |  8 +-
 tools/core/CMakeLists.txt                    |  5 +-
 20 files changed, 368 insertions(+), 41 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8e987237..a368854d5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,7 +65,14 @@ message(STATUS "BUILD_C_BINDINGS:${BUILD_C_BINDINGS}")
 option(BUILD_TOOLS "Build tools" ON)
 message(STATUS "BUILD_TOOLS:${BUILD_TOOLS}")
 
-message(STATUS "OMEGA support: always enabled")
+if(ANDROID)
+    option(ZVEC_ENABLE_OMEGA "Build OMEGA support" OFF)
+else()
+    option(ZVEC_ENABLE_OMEGA "Build OMEGA support" ON)
+endif()
+message(STATUS "ZVEC_ENABLE_OMEGA:${ZVEC_ENABLE_OMEGA}")
+add_compile_definitions(ZVEC_ENABLE_OMEGA=$<BOOL:${ZVEC_ENABLE_OMEGA}>)
+
 option(RABITQ_ENABLE_AVX512 "Compile RaBitQ with AVX-512 support" OFF)
 
 if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64" AND NOT ANDROID)
diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index 27ab0ed67..63a3db85e 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -16,6 +16,20 @@ set(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include)
 set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
 set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
 
+set(_zvec_host_omega_default OFF)
+if(WIN32)
+    if(EXISTS "${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/Release/omega.lib" AND
+       EXISTS "${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/Release/lib_lightgbm.lib")
+        set(_zvec_host_omega_default ON)
+    endif()
+else()
+    if(EXISTS "${ZVEC_DEPENDENCY_LIB_DIR}/libomega.a" AND
+       EXISTS "${ZVEC_DEPENDENCY_LIB_DIR}/lib_lightgbm.a")
+        set(_zvec_host_omega_default ON)
+    endif()
+endif()
+option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" ${_zvec_host_omega_default})
+
 # Add include and library search paths
 include_directories(${ZVEC_INCLUDE_DIR})
 link_directories(${ZVEC_LIB_DIR} ${ZVEC_DEPENDENCY_LIB_DIR})
@@ -42,10 +56,11 @@ endif()
 
 set(zvec_core_deps
     zvec_turbo
-    omega
-    _lightgbm
     ${zvec_openmp_deps}
 )
+if(ZVEC_ENABLE_OMEGA)
+    list(APPEND zvec_core_deps omega _lightgbm)
+endif()
 
 if (NOT WIN32)
     set(zvec_ailego_deps
@@ -60,8 +75,6 @@ if (NOT WIN32)
     set(zvec_db_deps
             roaring
             rocksdb
-            omega
-            _lightgbm
             arrow
             arrow_acero
             arrow_bundled_dependencies
@@ -75,6 +88,9 @@ if (NOT WIN32)
             lz4
             ${zvec_openmp_deps}
     )
+    if(ZVEC_ENABLE_OMEGA)
+        list(APPEND zvec_db_deps omega _lightgbm)
+    endif()
 else ()
     # Windows static libraries use different naming conventions
     set(PROTOBUF_LIB libprotobuf)
@@ -88,8 +104,6 @@ else ()
     set(zvec_db_deps
             roaring
             rocksdb
-            omega
-            _lightgbm
             arrow_static
             arrow_acero_static
             arrow_bundled_dependencies
@@ -105,6 +119,9 @@ else ()
             shlwapi
             ${zvec_openmp_deps}
     )
+    if(ZVEC_ENABLE_OMEGA)
+        list(APPEND zvec_db_deps omega _lightgbm)
+    endif()
 endif ()
 
 # --- Create INTERFACE targets for Zvec components ---
@@ -138,10 +155,14 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 elseif(APPLE)
     target_link_libraries(zvec-core INTERFACE
         -Wl,-force_load ${ZVEC_LIB_DIR}/libzvec_core.a
-        -Wl,-force_load ${ZVEC_DEPENDENCY_LIB_DIR}/libomega.a
         zvec-ailego
         ${zvec_core_deps}
     )
+    if(ZVEC_ENABLE_OMEGA)
+        target_link_libraries(zvec-core INTERFACE
+            -Wl,-force_load ${ZVEC_DEPENDENCY_LIB_DIR}/libomega.a
+        )
+    endif()
 elseif(ANDROID)
     target_link_libraries(zvec-core INTERFACE
         -Wl,--whole-archive
diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt
index 9c82576e4..eb4a35d7d 100644
--- a/examples/c/CMakeLists.txt
+++ b/examples/c/CMakeLists.txt
@@ -39,6 +39,20 @@ else()
     set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
 endif()
 
+set(_zvec_host_omega_default OFF)
+if(WIN32)
+    if(EXISTS "${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/Release/omega.lib" AND
+       EXISTS "${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/Release/lib_lightgbm.lib")
+        set(_zvec_host_omega_default ON)
+    endif()
+else()
+    if(EXISTS "${ZVEC_DEPENDENCY_LIB_DIR}/libomega.a" AND
+       EXISTS "${ZVEC_DEPENDENCY_LIB_DIR}/lib_lightgbm.a")
+        set(_zvec_host_omega_default ON)
+    endif()
+endif()
+option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" ${_zvec_host_omega_default})
+
 # Add include and library search paths
 include_directories(${ZVEC_INCLUDE_DIR} ${ZVEC_GENERATED_INCLUDE_DIR})
 link_directories(${ZVEC_LIB_DIR} ${ZVEC_DEPENDENCY_LIB_DIR})
@@ -68,8 +82,6 @@ if(NOT WIN32)
     set(zvec_c_api_deps
         roaring
         rocksdb
-        omega
-        _lightgbm
         arrow
         arrow_acero
         arrow_bundled_dependencies
@@ -85,14 +97,15 @@ if(NOT WIN32)
         ${CMAKE_THREAD_LIBS_INIT}
         ${CMAKE_DL_LIBS}
     )
+    if(ZVEC_ENABLE_OMEGA)
+        list(APPEND zvec_c_api_deps omega _lightgbm)
+    endif()
 else()
     # Windows static libraries use different naming conventions
     set(PROTOBUF_LIB libprotobuf)
     set(zvec_c_api_deps
         roaring
         rocksdb
-        omega
-        _lightgbm
         arrow_static
         arrow_acero_static
         arrow_bundled_dependencies
@@ -109,6 +122,9 @@ else()
         rpcrt4
         shlwapi
     )
+    if(ZVEC_ENABLE_OMEGA)
+        list(APPEND zvec_c_api_deps omega _lightgbm)
+    endif()
 endif()
 
 # Create INTERFACE target for zvec_c_api with platform-specific linking
diff --git a/python/tests/test_collection.py b/python/tests/test_collection.py
index 01b84a641..5113bf6a5 100644
--- a/python/tests/test_collection.py
+++ b/python/tests/test_collection.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 from __future__ import annotations
 
+import os
+import sys
 from pathlib import Path
 
 import pytest
@@ -40,6 +42,18 @@
     OptimizeOption,
 )
 
+IS_ANDROID = hasattr(sys, "getandroidapilevel") or "ANDROID_ROOT" in os.environ
+OMEGA_ENABLED = os.environ.get("ZVEC_ENABLE_OMEGA", "1").lower() not in {
+    "0",
+    "off",
+    "false",
+    "no",
+}
+OMEGA_AVAILABLE = OMEGA_ENABLED and not IS_ANDROID
+OMEGA_ANDROID_SKIP = pytest.mark.skipif(
+    not OMEGA_AVAILABLE, reason="OMEGA is disabled on this build/platform"
+)
+
 # ==================== Common ====================
 
 
@@ -135,6 +149,7 @@ def test_collection(
                 print(f"Warning: failed to destroy collection: {e}")
 
 
+@OMEGA_ANDROID_SKIP
 @pytest.fixture(scope="session")
 def omega_collection_schema():
     return zvec.CollectionSchema(
@@ -165,6 +180,7 @@ def omega_collection_schema():
     )
 
 
+@OMEGA_ANDROID_SKIP
 @pytest.fixture(scope="function")
 def omega_test_collection(
     tmp_path_factory, omega_collection_schema, collection_option
@@ -192,6 +208,7 @@ def omega_test_collection(
                 print(f"Warning: failed to destroy omega collection: {e}")
 
 
+@OMEGA_ANDROID_SKIP
 @pytest.fixture
 def omega_multiple_docs():
     return [
@@ -204,6 +221,7 @@ def omega_multiple_docs():
     ]
 
 
+@OMEGA_ANDROID_SKIP
 @pytest.fixture
 def omega_workflow_docs():
     return [
@@ -1103,6 +1121,7 @@ def test_collection_query_by_id(
         )
         assert len(result) == 10
 
+    @OMEGA_ANDROID_SKIP
     def test_omega_collection_schema_uses_omega_index(
         self, omega_test_collection: Collection
     ):
@@ -1110,6 +1129,7 @@ def test_omega_collection_schema_uses_omega_index(
         assert vector_schema is not None
         assert vector_schema.index_param.type == IndexType.OMEGA
 
+    @OMEGA_ANDROID_SKIP
     def test_omega_collection_query_by_id_with_omega_param(
         self, omega_test_collection: Collection, omega_multiple_docs
     ):
@@ -1129,6 +1149,7 @@ def test_omega_collection_query_by_id_with_omega_param(
         assert len(query_result) > 0
         assert query_result[0].id == omega_multiple_docs[0].id
 
+    @OMEGA_ANDROID_SKIP
     def test_omega_workflow_optimize_trains_model_and_query_runs(
         self, tmp_path_factory, collection_option, omega_workflow_docs
     ):
@@ -1176,6 +1197,7 @@ def test_omega_workflow_optimize_trains_model_and_query_runs(
         finally:
             omega_collection.destroy()
 
+    @OMEGA_ANDROID_SKIP
     def test_omega_query_falls_back_to_hnsw_when_model_not_trained(
         self, tmp_path_factory, collection_option, omega_workflow_docs
     ):
diff --git a/python/tests/test_params.py b/python/tests/test_params.py
index f7d3cb6ff..24be0b21d 100644
--- a/python/tests/test_params.py
+++ b/python/tests/test_params.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import os
 import pickle
 import sys
 import time
@@ -44,6 +45,18 @@
 
 from _zvec.param import _VectorQuery
 
+IS_ANDROID = hasattr(sys, "getandroidapilevel") or "ANDROID_ROOT" in os.environ
+OMEGA_ENABLED = os.environ.get("ZVEC_ENABLE_OMEGA", "1").lower() not in {
+    "0",
+    "off",
+    "false",
+    "no",
+}
+OMEGA_AVAILABLE = OMEGA_ENABLED and not IS_ANDROID
+OMEGA_ANDROID_SKIP = pytest.mark.skipif(
+    not OMEGA_AVAILABLE, reason="OMEGA is disabled on this build/platform"
+)
+
 # ----------------------------
 # Invert Index Param Test Case
 # ----------------------------
@@ -183,6 +196,7 @@ def test_readonly_attributes(self, attr):
 # ----------------------------
 # OMEGA Index Param Test Case
 # ----------------------------
+@OMEGA_ANDROID_SKIP
 class TestOmegaIndexParam:
     def test_default(self):
         param = OmegaIndexParam()
@@ -414,6 +428,7 @@ def test_readonly_attributes(self):
 # ----------------------------
 # OMEGA Query Param Test Case
 # ----------------------------
+@OMEGA_ANDROID_SKIP
 class TestOmegaQueryParam:
     def test_default(self):
         param = OmegaQueryParam()
@@ -500,6 +515,7 @@ def test_init_with_valid_vector(self):
         assert vq.vector == vec
         assert vq.param == param
 
+    @OMEGA_ANDROID_SKIP
     def test_init_with_valid_omega_param(self):
         vec = [0.1, 0.2, 0.3]
         param = OmegaQueryParam(ef=256, target_recall=0.91)
@@ -535,6 +551,7 @@ def test_validate_fails_on_both_id_and_vector(self):
             vq._validate()
 
 
+@OMEGA_ANDROID_SKIP
 class TestVectorSchemaWithOmega:
     def test_accepts_omega_index_param(self):
         schema = VectorSchema(
diff --git a/src/binding/python/CMakeLists.txt b/src/binding/python/CMakeLists.txt
index f0136902e..3a176834d 100644
--- a/src/binding/python/CMakeLists.txt
+++ b/src/binding/python/CMakeLists.txt
@@ -19,9 +19,13 @@ set(SRC_LISTS
 pybind11_add_module(_zvec ${SRC_LISTS})
 
 if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+    set(_zvec_force_link_libs)
+    if(ZVEC_ENABLE_OMEGA)
+        list(APPEND _zvec_force_link_libs $<TARGET_FILE:core_knn_omega_static>)
+    endif()
     target_link_libraries(_zvec PRIVATE
             -Wl,--whole-archive
-            $<TARGET_FILE:core_knn_omega_static>
+            ${_zvec_force_link_libs}
             $<TARGET_FILE:core_knn_flat_static>
             $<TARGET_FILE:core_knn_flat_sparse_static>
             $<TARGET_FILE:core_knn_hnsw_static>
@@ -40,8 +44,12 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
             "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
     )
 elseif (APPLE)
+    set(_zvec_force_load_libs)
+    if(ZVEC_ENABLE_OMEGA)
+        list(APPEND _zvec_force_load_libs -Wl,-force_load,$<TARGET_FILE:core_knn_omega_static>)
+    endif()
     target_link_libraries(_zvec PRIVATE
-            -Wl,-force_load,$<TARGET_FILE:core_knn_omega_static>
+            ${_zvec_force_load_libs}
             -Wl,-force_load,$<TARGET_FILE:core_knn_flat_static>
             -Wl,-force_load,$<TARGET_FILE:core_knn_flat_sparse_static>
             -Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_static>
@@ -60,7 +68,6 @@ elseif (APPLE)
     )
 elseif (MSVC)
     set(_zvec_whole_archive_libs
-            core_knn_omega_static
             core_knn_flat_static
             core_knn_flat_sparse_static
             core_knn_hnsw_static
@@ -72,6 +79,9 @@ elseif (MSVC)
             core_utility_static
             core_quantizer_static
     )
+    if(ZVEC_ENABLE_OMEGA)
+        list(PREPEND _zvec_whole_archive_libs core_knn_omega_static)
+    endif()
     target_link_libraries(_zvec PRIVATE
             ${_zvec_whole_archive_libs}
             zvec_db
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 856f3e388..bdf1bbdf0 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -39,7 +39,15 @@ if(NOT RABITQ_SUPPORTED)
   list(FILTER ALL_CORE_SRCS EXCLUDE REGEX ".*/algorithm/hnsw_rabitq/.*")
 endif()
 
-set(CORE_LIBS zvec_ailego zvec_turbo sparsehash magic_enum rabitqlib omega)
+if(NOT ZVEC_ENABLE_OMEGA)
+  list(FILTER ALL_CORE_SRCS EXCLUDE REGEX ".*/algorithm/omega/.*")
+  list(FILTER ALL_CORE_SRCS EXCLUDE REGEX ".*/interface/indexes/omega_.*")
+endif()
+
+set(CORE_LIBS zvec_ailego zvec_turbo sparsehash magic_enum rabitqlib)
+if(ZVEC_ENABLE_OMEGA)
+  list(APPEND CORE_LIBS omega)
+endif()
 
 cc_library(
     NAME zvec_core STATIC STRICT PACKED
diff --git a/src/core/algorithm/CMakeLists.txt b/src/core/algorithm/CMakeLists.txt
index 6bf6c4390..7f8e4e0c7 100644
--- a/src/core/algorithm/CMakeLists.txt
+++ b/src/core/algorithm/CMakeLists.txt
@@ -7,7 +7,23 @@ cc_directory(flat_sparse)
 cc_directory(ivf)
 cc_directory(hnsw)
 cc_directory(hnsw_sparse)
-cc_directory(omega)
+if(ZVEC_ENABLE_OMEGA)
+  cc_directory(omega)
+else()
+  file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/omega_stub.cc
+    "// Stub implementation for disabled OMEGA support\n"
+    "namespace zvec { namespace core { /* empty namespace for compatibility */ } }\n"
+  )
+
+  cc_library(
+      NAME core_knn_omega
+      STATIC SHARED STRICT ALWAYS_LINK
+      SRCS ${CMAKE_CURRENT_BINARY_DIR}/omega_stub.cc
+      LIBS core_framework
+      INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
+      VERSION "${PROXIMA_ZVEC_VERSION}"
+    )
+endif()
 if(RABITQ_SUPPORTED)
   message(STATUS "BUILD RABITQ")
   cc_directory(hnsw_rabitq)
diff --git a/src/core/interface/CMakeLists.txt b/src/core/interface/CMakeLists.txt
index a6e568d6a..dd3b0d350 100644
--- a/src/core/interface/CMakeLists.txt
+++ b/src/core/interface/CMakeLists.txt
@@ -1,10 +1,37 @@
 include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
 include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
 
+file(GLOB CORE_INTERFACE_SRCS *.cc indexes/*.cc)
+if(NOT ZVEC_ENABLE_OMEGA)
+  list(FILTER CORE_INTERFACE_SRCS EXCLUDE REGEX ".*/indexes/omega_index\\.cc$")
+  list(FILTER CORE_INTERFACE_SRCS EXCLUDE REGEX ".*/indexes/omega_training_session\\.cc$")
+endif()
+
+set(CORE_INTERFACE_INCS
+    .
+    ${PROJECT_ROOT_DIR}/src/include
+    ${PROJECT_ROOT_DIR}/src/
+    ${PROJECT_ROOT_DIR}/src/core)
+if(ZVEC_ENABLE_OMEGA)
+  list(APPEND CORE_INTERFACE_INCS ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include)
+endif()
+
+set(CORE_INTERFACE_LIBS
+    zvec_ailego
+    core_framework
+    core_mix_reducer
+    core_knn_hnsw_rabitq
+    sparsehash
+    magic_enum
+    rabitqlib)
+if(ZVEC_ENABLE_OMEGA)
+  list(APPEND CORE_INTERFACE_LIBS core_knn_omega)
+endif()
+
 cc_library(
         NAME core_interface STATIC STRICT ALWAYS_LINK
-        SRCS *.cc indexes/*.cc
-        INCS . ${PROJECT_ROOT_DIR}/src/include ${PROJECT_ROOT_DIR}/src/ ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
-        LIBS zvec_ailego core_framework core_mix_reducer core_knn_omega core_knn_hnsw_rabitq sparsehash magic_enum rabitqlib
+        SRCS ${CORE_INTERFACE_SRCS}
+        INCS ${CORE_INTERFACE_INCS}
+        LIBS ${CORE_INTERFACE_LIBS}
         VERSION "${PROXIMA_ZVEC_VERSION}"
 )
diff --git a/src/core/interface/index_factory.cc b/src/core/interface/index_factory.cc
index b4d0893f3..88cccf6f1 100644
--- a/src/core/interface/index_factory.cc
+++ b/src/core/interface/index_factory.cc
@@ -44,7 +44,12 @@ Index::Pointer IndexFactory::CreateAndInitIndex(const BaseIndexParam &param) {
   } else if (param.index_type == IndexType::kHNSW) {
     ptr = std::make_shared<HNSWIndex>();
   } else if (param.index_type == IndexType::kOMEGA) {
+#if ZVEC_ENABLE_OMEGA
     ptr = std::make_shared<OmegaIndex>();
+#else
+    LOG_ERROR("OMEGA is not supported on this platform");
+    return nullptr;
+#endif
   } else if (param.index_type == IndexType::kIVF) {
     ptr = std::make_shared<IVFIndex>();
   } else if (param.index_type == IndexType::kHNSWRabitq) {
diff --git a/src/core/interface/indexes/hnsw_index.cc b/src/core/interface/indexes/hnsw_index.cc
index 76a33fe72..360b56c13 100644
--- a/src/core/interface/indexes/hnsw_index.cc
+++ b/src/core/interface/indexes/hnsw_index.cc
@@ -17,7 +17,9 @@
 #include <zvec/core/interface/index.h>
 #include "algorithm/hnsw/hnsw_params.h"
 #include "algorithm/hnsw_sparse/hnsw_sparse_params.h"
+#if ZVEC_ENABLE_OMEGA
 #include "algorithm/omega/omega_params.h"
+#endif
 
 namespace zvec::core_interface {
 
@@ -107,8 +109,10 @@ int HNSWIndex::_prepare_for_search(
   params.set(core::PARAM_HNSW_STREAMER_EF, real_search_ef);
 
   if (hnsw_search_param->training_query_id >= 0) {
+#if ZVEC_ENABLE_OMEGA
     params.set(core::PARAM_OMEGA_SEARCHER_TRAINING_QUERY_ID,
                hnsw_search_param->training_query_id);
+#endif
   }
 
   context->update(params);
diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt
index 65ad7421e..c2d541435 100644
--- a/src/db/CMakeLists.txt
+++ b/src/db/CMakeLists.txt
@@ -12,11 +12,20 @@ cc_directory(index)
 cc_directory(sqlengine)
 
 file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h)
+if(NOT ZVEC_ENABLE_OMEGA)
+  list(FILTER ALL_DB_SRCS EXCLUDE REGEX ".*/training/omega_model_trainer\\.cc$")
+  list(FILTER ALL_DB_SRCS EXCLUDE REGEX ".*/training/omega_training_coordinator\\.cc$")
+endif()
+
+set(ZVEC_DB_INCS . ${CMAKE_CURRENT_BINARY_DIR})
+if(ZVEC_ENABLE_OMEGA)
+  list(APPEND ZVEC_DB_INCS ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include)
+endif()
 
 cc_library(
   NAME zvec_db STATIC STRICT SRCS_NO_GLOB PACKED
   SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc
-  INCS . ${CMAKE_CURRENT_BINARY_DIR} ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
+  INCS ${ZVEC_DB_INCS}
   PUBINCS ${PROJECT_ROOT_DIR}/src/include
   LIBS
     zvec_ailego
diff --git a/src/db/index/CMakeLists.txt b/src/db/index/CMakeLists.txt
index 01e5a0661..d08b54dec 100644
--- a/src/db/index/CMakeLists.txt
+++ b/src/db/index/CMakeLists.txt
@@ -1,24 +1,37 @@
 include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
 include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
 
-cc_library(
-    NAME zvec_index STATIC STRICT
-    SRCS *.cc
-         segment/*.cc
-         column/vector_column/*.cc
-         column/inverted_column/*.cc
-         storage/*.cc
-         storage/wal/*.cc
-         common/*.cc
-         ../training/*.cc
-    LIBS zvec_common
+file(GLOB ZVEC_INDEX_SRCS
+     *.cc
+     segment/*.cc
+     column/vector_column/*.cc
+     column/inverted_column/*.cc
+     storage/*.cc
+     storage/wal/*.cc
+     common/*.cc
+     ../training/*.cc)
+
+if(NOT ZVEC_ENABLE_OMEGA)
+  list(FILTER ZVEC_INDEX_SRCS EXCLUDE REGEX ".*/training/omega_model_trainer\\.cc$")
+  list(FILTER ZVEC_INDEX_SRCS EXCLUDE REGEX ".*/training/omega_training_coordinator\\.cc$")
+endif()
+
+set(ZVEC_INDEX_LIBS
+         zvec_common
          zvec_proto
          rocksdb
          core_interface
-         omega
          Arrow::arrow_static
          Arrow::arrow_compute
-         Arrow::arrow_dataset
+         Arrow::arrow_dataset)
+if(ZVEC_ENABLE_OMEGA)
+  list(APPEND ZVEC_INDEX_LIBS omega)
+endif()
+
+cc_library(
+    NAME zvec_index STATIC STRICT
+    SRCS ${ZVEC_INDEX_SRCS}
+    LIBS ${ZVEC_INDEX_LIBS}
     INCS .  ${PROJECT_ROOT_DIR}/src
     VERSION "${PROXIMA_ZVEC_VERSION}"
   )
diff --git a/src/db/index/common/schema.cc b/src/db/index/common/schema.cc
index c43b8964e..2b1f01209 100644
--- a/src/db/index/common/schema.cc
+++ b/src/db/index/common/schema.cc
@@ -170,6 +170,12 @@ Status FieldSchema::validate() const {
         }
       }
 
+#if !ZVEC_ENABLE_OMEGA
+      if (index_params_->type() == IndexType::OMEGA) {
+        return Status::NotSupported("OMEGA is not supported on Android");
+      }
+#endif
+
 
       if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {
         auto iter = quantize_type_map.find(data_type_);
diff --git a/src/db/training/omega_training_coordinator.h b/src/db/training/omega_training_coordinator.h
index e5f59b341..d62ff4584 100644
--- a/src/db/training/omega_training_coordinator.h
+++ b/src/db/training/omega_training_coordinator.h
@@ -37,6 +37,7 @@ struct OmegaTrainingParams {
 OmegaTrainingParams ResolveOmegaTrainingParams(
     const IndexParams::Ptr &index_params);
 
+#if ZVEC_ENABLE_OMEGA
 Result<TrainingDataCollectorResult> CollectOmegaTrainingDataBeforeFlush(
     const Segment::Ptr &segment, const std::string &field_name,
     const VectorColumnIndexer::Ptr &vector_indexer,
@@ -55,5 +56,42 @@ Status TrainOmegaModelAfterRetrainCollect(
     const TrainingDataCollectorResult &training_result,
     const std::string &model_output_dir, SegmentID segment_id,
     const std::string &field_name);
+#else
+inline OmegaTrainingParams ResolveOmegaTrainingParams(
+    const IndexParams::Ptr & /*index_params*/) {
+  return {};
+}
+
+inline Result<TrainingDataCollectorResult> CollectOmegaTrainingDataBeforeFlush(
+    const Segment::Ptr & /*segment*/, const std::string & /*field_name*/,
+    const VectorColumnIndexer::Ptr & /*vector_indexer*/,
+    const OmegaTrainingParams & /*params*/,
+    const std::string & /*model_output_dir*/) {
+  return tl::make_unexpected(
+      Status::NotSupported("OMEGA is disabled on Android"));
+}
+
+inline Result<TrainingDataCollectorResult> CollectOmegaRetrainingData(
+    const Segment::Ptr & /*segment*/, const std::string & /*field_name*/,
+    const std::vector<VectorColumnIndexer::Ptr> & /*indexers*/,
+    const OmegaTrainingParams & /*params*/,
+    const std::string & /*model_output_dir*/) {
+  return tl::make_unexpected(
+      Status::NotSupported("OMEGA is disabled on Android"));
+}
+
+inline Status TrainOmegaModelAfterBuild(
+    const TrainingDataCollectorResult & /*training_result*/,
+    const std::string & /*model_output_dir*/) {
+  return Status::NotSupported("OMEGA is disabled on Android");
+}
+
+inline Status TrainOmegaModelAfterRetrainCollect(
+    const TrainingDataCollectorResult & /*training_result*/,
+    const std::string & /*model_output_dir*/, SegmentID /*segment_id*/,
+    const std::string & /*field_name*/) {
+  return Status::NotSupported("OMEGA is disabled on Android");
+}
+#endif
 
 }  // namespace zvec
diff --git a/src/db/training/training_data_collector.cc b/src/db/training/training_data_collector.cc
index 883b56de2..50e431310 100644
--- a/src/db/training/training_data_collector.cc
+++ b/src/db/training/training_data_collector.cc
@@ -16,11 +16,14 @@
 #include <algorithm>
 #include <atomic>
 #include <chrono>
+#include <cstring>
 #include <mutex>
 #include <thread>
 #include <unordered_map>
 #include <unordered_set>
+#if ZVEC_ENABLE_OMEGA
 #include <omega/ground_truth.h>
+#endif
 #include <zvec/ailego/logger/logger.h>
 #include <zvec/db/query_params.h>
 #include "db/index/column/vector_column/vector_column_params.h"
@@ -28,6 +31,82 @@
 
 namespace zvec {
 
+namespace {
+
+#if !ZVEC_ENABLE_OMEGA
+std::vector<std::vector<uint64_t>> ComputeGroundTruthFallbackBruteForce(
+    const Segment::Ptr &segment, const std::string &field_name,
+    const std::vector<std::vector<float>> &queries, size_t topk,
+    const std::vector<uint64_t> &query_doc_ids, MetricType metric_type) {
+  std::vector<std::vector<uint64_t>> ground_truth(queries.size());
+  const bool held_out_mode =
+      !query_doc_ids.empty() && query_doc_ids.size() == queries.size();
+  const uint64_t doc_count = segment->doc_count();
+  if (queries.empty() || doc_count == 0) {
+    return ground_truth;
+  }
+
+  for (size_t q = 0; q < queries.size(); ++q) {
+    std::vector<std::pair<float, uint64_t>> scored;
+    scored.reserve(static_cast<size_t>(doc_count));
+
+    for (uint64_t doc_id = 0; doc_id < doc_count; ++doc_id) {
+      if (held_out_mode && doc_id == query_doc_ids[q]) {
+        continue;
+      }
+      auto doc = segment->Fetch(doc_id);
+      if (!doc) {
+        continue;
+      }
+      auto vector_opt = doc->get<std::vector<float>>(field_name);
+      if (!vector_opt.has_value()) {
+        continue;
+      }
+      const auto &base = vector_opt.value();
+      if (base.size() != queries[q].size()) {
+        continue;
+      }
+
+      float score = 0.0f;
+      if (metric_type == MetricType::IP || metric_type == MetricType::COSINE) {
+        for (size_t d = 0; d < base.size(); ++d) {
+          score += queries[q][d] * base[d];
+        }
+      } else {
+        for (size_t d = 0; d < base.size(); ++d) {
+          const float diff = queries[q][d] - base[d];
+          score += diff * diff;
+        }
+      }
+      scored.emplace_back(score, doc_id);
+    }
+
+    const auto limit = std::min(topk, scored.size());
+    if (metric_type == MetricType::L2) {
+      std::partial_sort(scored.begin(), scored.begin() + limit, scored.end(),
+                        [](const auto &lhs, const auto &rhs) {
+                          return lhs.first < rhs.first;
+                        });
+    } else {
+      std::partial_sort(scored.begin(), scored.begin() + limit, scored.end(),
+                        [](const auto &lhs, const auto &rhs) {
+                          return lhs.first > rhs.first;
+                        });
+    }
+
+    auto &result = ground_truth[q];
+    result.reserve(limit);
+    for (size_t i = 0; i < limit; ++i) {
+      result.push_back(scored[i].second);
+    }
+  }
+
+  return ground_truth;
+}
+#endif
+
+}  // namespace
+
 // ============ DEBUG TIMING UTILITIES ============
 namespace {
 struct TimingStatsState {
@@ -577,6 +656,7 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   // Exact results, uses batch matrix multiplication
   // ============================================================
   // Convert zvec MetricType to omega MetricType
+#if ZVEC_ENABLE_OMEGA
   omega::MetricType omega_metric;
   switch (metric_type) {
     case MetricType::L2:
@@ -590,6 +670,7 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
       omega_metric = omega::MetricType::IP;
       break;
   }
+#endif
 
   // Step 1: Load all base vectors into memory
   auto load_start = std::chrono::high_resolution_clock::now();
@@ -669,10 +750,15 @@ std::vector<std::vector<uint64_t>> TrainingDataCollector::ComputeGroundTruth(
   // Step 3: Call OmegaLib's fast ground truth computation (Eigen)
   auto compute_start = std::chrono::high_resolution_clock::now();
 
+#if ZVEC_ENABLE_OMEGA
   ground_truth = omega::ComputeGroundTruth(
       base_vectors.data(), query_flat.data(), doc_count, queries.size(), dim,
       topk, omega_metric, held_out_mode,
       query_doc_ids);  // Pass query-to-base mapping for correct self-exclusion
+#else
+  ground_truth = ComputeGroundTruthFallbackBruteForce(
+      segment, field_name, queries, topk, query_doc_ids, metric_type);
+#endif
 
   auto compute_end = std::chrono::high_resolution_clock::now();
   auto compute_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
diff --git a/tests/core/algorithm/CMakeLists.txt b/tests/core/algorithm/CMakeLists.txt
index 5e317baeb..c5f19e6c3 100644
--- a/tests/core/algorithm/CMakeLists.txt
+++ b/tests/core/algorithm/CMakeLists.txt
@@ -10,4 +10,6 @@ cc_directories(hnsw_sparse)
 if(RABITQ_SUPPORTED)
 cc_directories(hnsw_rabitq)
 endif()
+if(ZVEC_ENABLE_OMEGA)
 cc_directories(omega)
+endif()
diff --git a/tests/core/interface/CMakeLists.txt b/tests/core/interface/CMakeLists.txt
index 829ee0172..4323b5722 100644
--- a/tests/core/interface/CMakeLists.txt
+++ b/tests/core/interface/CMakeLists.txt
@@ -1,6 +1,9 @@
 include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
 
 file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)
+if(NOT ZVEC_ENABLE_OMEGA)
+  list(FILTER ALL_TEST_SRCS EXCLUDE REGEX ".*/omega_.*_test\\.cc$")
+endif()
 
 set(ZVEC_TEST_CORE_INTERFACE_LIBS
     zvec_ailego
@@ -17,9 +20,20 @@ set(ZVEC_TEST_CORE_INTERFACE_LIBS
     core_knn_hnsw_sparse
     core_knn_ivf
     core_knn_hnsw_rabitq
-    core_knn_omega
-    omega
 )
+if(ZVEC_ENABLE_OMEGA)
+  list(APPEND ZVEC_TEST_CORE_INTERFACE_LIBS core_knn_omega omega)
+endif()
+
+set(ZVEC_TEST_CORE_INTERFACE_INCS
+    .
+    ${PROJECT_ROOT_DIR}/src
+    ${PROJECT_ROOT_DIR}/src/core
+    ${PROJECT_ROOT_DIR}/src/core/algorithm)
+if(ZVEC_ENABLE_OMEGA)
+  list(APPEND ZVEC_TEST_CORE_INTERFACE_INCS
+       ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include)
+endif()
 
 foreach(CC_SRCS ${ALL_TEST_SRCS})
   get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
@@ -28,7 +42,6 @@ foreach(CC_SRCS ${ALL_TEST_SRCS})
       STRICT
       LIBS ${ZVEC_TEST_CORE_INTERFACE_LIBS}
       SRCS ${CC_SRCS}
-      INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
-           ${PROJECT_ROOT_DIR}/thirdparty/omega/OMEGALib/include
+      INCS ${ZVEC_TEST_CORE_INTERFACE_INCS}
     )
 endforeach()
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index fcf590447..83f93585f 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -26,5 +26,9 @@ add_subdirectory(CRoaring CRoaring EXCLUDE_FROM_ALL)
 add_subdirectory(arrow arrow EXCLUDE_FROM_ALL)
 add_subdirectory(magic_enum magic_enum EXCLUDE_FROM_ALL)
 add_subdirectory(RaBitQ-Library RaBitQ-Library EXCLUDE_FROM_ALL)
-message(STATUS "ZVEC: Building omega library with LightGBM support")
-add_subdirectory(omega omega EXCLUDE_FROM_ALL)
+if(ZVEC_ENABLE_OMEGA)
+  message(STATUS "ZVEC: Building omega library with LightGBM support")
+  add_subdirectory(omega omega EXCLUDE_FROM_ALL)
+else()
+  message(STATUS "ZVEC: OMEGA support disabled")
+endif()
diff --git a/tools/core/CMakeLists.txt b/tools/core/CMakeLists.txt
index 7c0d81ba2..791f577df 100644
--- a/tools/core/CMakeLists.txt
+++ b/tools/core/CMakeLists.txt
@@ -16,7 +16,10 @@ set(ZVEC_TOOL_CORE_INTERFACE_LIBS
     core_interface
 )
 
-set(ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS core_mix_reducer core_knn_omega)
+set(ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS core_mix_reducer)
+if(ZVEC_ENABLE_OMEGA)
+    list(APPEND ZVEC_TOOL_CORE_INTERFACE_IMPL_LIBS core_knn_omega)
+endif()
 
 cc_binary(
     NAME txt2vecs

From aec406c151f5090e7af3b7eb83c7d5b923a374d1 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 3 Apr 2026 18:50:02 +0800
Subject: [PATCH 117/126] Fix Android examples OpenMP and pytest 9 fixture
 skips

---
 examples/c++/CMakeLists.txt     |  2 +-
 examples/c/CMakeLists.txt       |  2 +-
 python/tests/test_collection.py | 13 +++++++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index 63a3db85e..a07445d0a 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -50,7 +50,7 @@ find_package(Threads REQUIRED)
 find_package(OpenMP QUIET)
 
 set(zvec_openmp_deps)
-if(OpenMP_FOUND)
+if(OpenMP_FOUND AND NOT ANDROID)
     list(APPEND zvec_openmp_deps OpenMP::OpenMP_CXX)
 endif()
 
diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt
index eb4a35d7d..737cc4465 100644
--- a/examples/c/CMakeLists.txt
+++ b/examples/c/CMakeLists.txt
@@ -62,7 +62,7 @@ find_package(Threads REQUIRED)
 find_package(OpenMP QUIET)
 
 set(zvec_openmp_deps)
-if(OpenMP_FOUND)
+if(OpenMP_FOUND AND NOT ANDROID)
     list(APPEND zvec_openmp_deps OpenMP::OpenMP_CXX)
 endif()
 
diff --git a/python/tests/test_collection.py b/python/tests/test_collection.py
index 5113bf6a5..542257099 100644
--- a/python/tests/test_collection.py
+++ b/python/tests/test_collection.py
@@ -54,6 +54,11 @@
     not OMEGA_AVAILABLE, reason="OMEGA is disabled on this build/platform"
 )
 
+
+def _require_omega() -> None:
+    if not OMEGA_AVAILABLE:
+        pytest.skip("OMEGA is disabled on this build/platform")
+
 # ==================== Common ====================
 
 
@@ -149,9 +154,9 @@ def test_collection(
                 print(f"Warning: failed to destroy collection: {e}")
 
 
-@OMEGA_ANDROID_SKIP
 @pytest.fixture(scope="session")
 def omega_collection_schema():
+    _require_omega()
     return zvec.CollectionSchema(
         name="omega_test_collection",
         fields=[
@@ -180,11 +185,11 @@ def omega_collection_schema():
     )
 
 
-@OMEGA_ANDROID_SKIP
 @pytest.fixture(scope="function")
 def omega_test_collection(
     tmp_path_factory, omega_collection_schema, collection_option
 ) -> Collection:
+    _require_omega()
     temp_dir = tmp_path_factory.mktemp("zvec_omega")
     collection_path = temp_dir / "omega_test_collection"
 
@@ -208,9 +213,9 @@ def omega_test_collection(
                 print(f"Warning: failed to destroy omega collection: {e}")
 
 
-@OMEGA_ANDROID_SKIP
 @pytest.fixture
 def omega_multiple_docs():
+    _require_omega()
     return [
         Doc(
             id=f"{id}",
@@ -221,9 +226,9 @@ def omega_multiple_docs():
     ]
 
 
-@OMEGA_ANDROID_SKIP
 @pytest.fixture
 def omega_workflow_docs():
+    _require_omega()
     return [
         Doc(
             id=f"{id}",

From c173e8ffa6c3dca1f653a7e7c7f210e9abef447d Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 3 Apr 2026 21:51:33 +0800
Subject: [PATCH 118/126] chore: sync pytest omega test formatting

---
 python/tests/test_collection.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/tests/test_collection.py b/python/tests/test_collection.py
index 542257099..6f37b5dfe 100644
--- a/python/tests/test_collection.py
+++ b/python/tests/test_collection.py
@@ -59,6 +59,7 @@ def _require_omega() -> None:
     if not OMEGA_AVAILABLE:
         pytest.skip("OMEGA is disabled on this build/platform")
 
+
 # ==================== Common ====================
 
 

From 24641125026cf0e175f311101bac9d5a4d0fe8f6 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Fri, 3 Apr 2026 23:47:43 +0800
Subject: [PATCH 119/126] fix: link omega examples on windows

---
 examples/c++/CMakeLists.txt | 21 ++++++++++++++++-----
 examples/c/CMakeLists.txt   | 10 ++++++++--
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index a07445d0a..501c6b9dc 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -13,8 +13,13 @@ if(NOT DEFINED HOST_BUILD_DIR)
 endif()
 
 set(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include)
-set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
-set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
+if(WIN32)
+    set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib/$<CONFIG>)
+    set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/$<CONFIG>)
+else()
+    set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
+    set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
+endif()
 
 set(_zvec_host_omega_default OFF)
 if(WIN32)
@@ -45,6 +50,12 @@ else()
     set(PROTOBUF_LIB protobuf)
 endif()
 
+if(WIN32)
+    set(ZVEC_LIGHTGBM_LIB lib_lightgbm)
+else()
+    set(ZVEC_LIGHTGBM_LIB _lightgbm)
+endif()
+
 # --- Dependency groups ---
 find_package(Threads REQUIRED)
 find_package(OpenMP QUIET)
@@ -59,7 +70,7 @@ set(zvec_core_deps
     ${zvec_openmp_deps}
 )
 if(ZVEC_ENABLE_OMEGA)
-    list(APPEND zvec_core_deps omega _lightgbm)
+    list(APPEND zvec_core_deps omega ${ZVEC_LIGHTGBM_LIB})
 endif()
 
 if (NOT WIN32)
@@ -89,7 +100,7 @@ if (NOT WIN32)
             ${zvec_openmp_deps}
     )
     if(ZVEC_ENABLE_OMEGA)
-        list(APPEND zvec_db_deps omega _lightgbm)
+        list(APPEND zvec_db_deps omega ${ZVEC_LIGHTGBM_LIB})
     endif()
 else ()
     # Windows static libraries use different naming conventions
@@ -120,7 +131,7 @@ else ()
             ${zvec_openmp_deps}
     )
     if(ZVEC_ENABLE_OMEGA)
-        list(APPEND zvec_db_deps omega _lightgbm)
+        list(APPEND zvec_db_deps omega ${ZVEC_LIGHTGBM_LIB})
     endif()
 endif ()
 
diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt
index 737cc4465..f2da06c73 100644
--- a/examples/c/CMakeLists.txt
+++ b/examples/c/CMakeLists.txt
@@ -77,6 +77,12 @@ else()
     set(PROTOBUF_LIB protobuf)
 endif()
 
+if(WIN32)
+    set(ZVEC_LIGHTGBM_LIB lib_lightgbm)
+else()
+    set(ZVEC_LIGHTGBM_LIB _lightgbm)
+endif()
+
 # --- Dependency groups ---
 if(NOT WIN32)
     set(zvec_c_api_deps
@@ -98,7 +104,7 @@ if(NOT WIN32)
         ${CMAKE_DL_LIBS}
     )
     if(ZVEC_ENABLE_OMEGA)
-        list(APPEND zvec_c_api_deps omega _lightgbm)
+        list(APPEND zvec_c_api_deps omega ${ZVEC_LIGHTGBM_LIB})
     endif()
 else()
     # Windows static libraries use different naming conventions
@@ -123,7 +129,7 @@ else()
         shlwapi
     )
     if(ZVEC_ENABLE_OMEGA)
-        list(APPEND zvec_c_api_deps omega _lightgbm)
+        list(APPEND zvec_c_api_deps omega ${ZVEC_LIGHTGBM_LIB})
     endif()
 endif()
 

From 04dedbd869710bfd8c201ec0d55b14e2fa7a63e3 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sat, 4 Apr 2026 00:26:17 +0800
Subject: [PATCH 120/126] fix: stabilize omega example linking

---
 examples/c++/CMakeLists.txt | 58 +++++++++++++++++++++++++------------
 examples/c/CMakeLists.txt   | 55 +++++++++++++++++++++++------------
 2 files changed, 76 insertions(+), 37 deletions(-)

diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index 501c6b9dc..9fcdfd18f 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -13,27 +13,21 @@ if(NOT DEFINED HOST_BUILD_DIR)
 endif()
 
 set(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include)
+set(ZVEC_LIB_ROOT_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
+set(ZVEC_DEPENDENCY_LIB_ROOT_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
 if(WIN32)
-    set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib/$<CONFIG>)
-    set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/$<CONFIG>)
+    set(ZVEC_LIB_DIR ${ZVEC_LIB_ROOT_DIR}/$<CONFIG>)
+    set(ZVEC_DEPENDENCY_LIB_DIR ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/$<CONFIG>)
 else()
-    set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
-    set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
+    set(ZVEC_LIB_DIR ${ZVEC_LIB_ROOT_DIR})
+    set(ZVEC_DEPENDENCY_LIB_DIR ${ZVEC_DEPENDENCY_LIB_ROOT_DIR})
 endif()
 
-set(_zvec_host_omega_default OFF)
-if(WIN32)
-    if(EXISTS "${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/Release/omega.lib" AND
-       EXISTS "${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/Release/lib_lightgbm.lib")
-        set(_zvec_host_omega_default ON)
-    endif()
+if(ANDROID)
+    option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" OFF)
 else()
-    if(EXISTS "${ZVEC_DEPENDENCY_LIB_DIR}/libomega.a" AND
-       EXISTS "${ZVEC_DEPENDENCY_LIB_DIR}/lib_lightgbm.a")
-        set(_zvec_host_omega_default ON)
-    endif()
+    option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" ON)
 endif()
-option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" ${_zvec_host_omega_default})
 
 # Add include and library search paths
 include_directories(${ZVEC_INCLUDE_DIR})
@@ -56,6 +50,32 @@ else()
     set(ZVEC_LIGHTGBM_LIB _lightgbm)
 endif()
 
+if(ZVEC_ENABLE_OMEGA)
+    set(_zvec_omega_search_paths
+        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}
+        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/Debug
+        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/Release
+        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/RelWithDebInfo
+        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/MinSizeRel
+    )
+    find_library(ZVEC_OMEGA_LIB
+        NAMES omega
+        PATHS ${_zvec_omega_search_paths}
+        NO_DEFAULT_PATH
+    )
+    find_library(ZVEC_LIGHTGBM_LIB
+        NAMES lib_lightgbm _lightgbm
+        PATHS ${_zvec_omega_search_paths}
+        NO_DEFAULT_PATH
+    )
+    if(NOT ZVEC_OMEGA_LIB OR NOT ZVEC_LIGHTGBM_LIB)
+        message(FATAL_ERROR
+            "ZVEC_ENABLE_OMEGA=ON but failed to locate OMEGA host libraries under "
+            "${ZVEC_DEPENDENCY_LIB_ROOT_DIR}"
+        )
+    endif()
+endif()
+
 # --- Dependency groups ---
 find_package(Threads REQUIRED)
 find_package(OpenMP QUIET)
@@ -70,7 +90,7 @@ set(zvec_core_deps
     ${zvec_openmp_deps}
 )
 if(ZVEC_ENABLE_OMEGA)
-    list(APPEND zvec_core_deps omega ${ZVEC_LIGHTGBM_LIB})
+    list(APPEND zvec_core_deps ${ZVEC_OMEGA_LIB} ${ZVEC_LIGHTGBM_LIB})
 endif()
 
 if (NOT WIN32)
@@ -100,7 +120,7 @@ if (NOT WIN32)
             ${zvec_openmp_deps}
     )
     if(ZVEC_ENABLE_OMEGA)
-        list(APPEND zvec_db_deps omega ${ZVEC_LIGHTGBM_LIB})
+        list(APPEND zvec_db_deps ${ZVEC_OMEGA_LIB} ${ZVEC_LIGHTGBM_LIB})
     endif()
 else ()
     # Windows static libraries use different naming conventions
@@ -131,7 +151,7 @@ else ()
             ${zvec_openmp_deps}
     )
     if(ZVEC_ENABLE_OMEGA)
-        list(APPEND zvec_db_deps omega ${ZVEC_LIGHTGBM_LIB})
+        list(APPEND zvec_db_deps ${ZVEC_OMEGA_LIB} ${ZVEC_LIGHTGBM_LIB})
     endif()
 endif ()
 
@@ -171,7 +191,7 @@ elseif(APPLE)
     )
     if(ZVEC_ENABLE_OMEGA)
         target_link_libraries(zvec-core INTERFACE
-            -Wl,-force_load ${ZVEC_DEPENDENCY_LIB_DIR}/libomega.a
+            -Wl,-force_load ${ZVEC_OMEGA_LIB}
         )
     endif()
 elseif(ANDROID)
diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt
index f2da06c73..a54e8987d 100644
--- a/examples/c/CMakeLists.txt
+++ b/examples/c/CMakeLists.txt
@@ -29,29 +29,22 @@ endif()
 
 set(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include)
 set(ZVEC_GENERATED_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/src/generated)
+set(ZVEC_LIB_ROOT_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
+set(ZVEC_DEPENDENCY_LIB_ROOT_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
 
-# On Windows, libraries are in Debug/Release subdirectories
 if(WIN32)
-    set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib/$<CONFIG>)
-    set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/$<CONFIG>)
+    set(ZVEC_LIB_DIR ${ZVEC_LIB_ROOT_DIR}/$<CONFIG>)
+    set(ZVEC_DEPENDENCY_LIB_DIR ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/$<CONFIG>)
 else()
-    set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
-    set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
+    set(ZVEC_LIB_DIR ${ZVEC_LIB_ROOT_DIR})
+    set(ZVEC_DEPENDENCY_LIB_DIR ${ZVEC_DEPENDENCY_LIB_ROOT_DIR})
 endif()
 
-set(_zvec_host_omega_default OFF)
-if(WIN32)
-    if(EXISTS "${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/Release/omega.lib" AND
-       EXISTS "${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib/Release/lib_lightgbm.lib")
-        set(_zvec_host_omega_default ON)
-    endif()
+if(ANDROID)
+    option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" OFF)
 else()
-    if(EXISTS "${ZVEC_DEPENDENCY_LIB_DIR}/libomega.a" AND
-       EXISTS "${ZVEC_DEPENDENCY_LIB_DIR}/lib_lightgbm.a")
-        set(_zvec_host_omega_default ON)
-    endif()
+    option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" ON)
 endif()
-option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" ${_zvec_host_omega_default})
 
 # Add include and library search paths
 include_directories(${ZVEC_INCLUDE_DIR} ${ZVEC_GENERATED_INCLUDE_DIR})
@@ -77,6 +70,32 @@ else()
     set(PROTOBUF_LIB protobuf)
 endif()
 
+if(ZVEC_ENABLE_OMEGA)
+    set(_zvec_omega_search_paths
+        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}
+        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/Debug
+        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/Release
+        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/RelWithDebInfo
+        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/MinSizeRel
+    )
+    find_library(ZVEC_OMEGA_LIB
+        NAMES omega
+        PATHS ${_zvec_omega_search_paths}
+        NO_DEFAULT_PATH
+    )
+    find_library(ZVEC_LIGHTGBM_LIB
+        NAMES lib_lightgbm _lightgbm
+        PATHS ${_zvec_omega_search_paths}
+        NO_DEFAULT_PATH
+    )
+    if(NOT ZVEC_OMEGA_LIB OR NOT ZVEC_LIGHTGBM_LIB)
+        message(FATAL_ERROR
+            "ZVEC_ENABLE_OMEGA=ON but failed to locate OMEGA host libraries under "
+            "${ZVEC_DEPENDENCY_LIB_ROOT_DIR}"
+        )
+    endif()
+endif()
+
 if(WIN32)
     set(ZVEC_LIGHTGBM_LIB lib_lightgbm)
 else()
@@ -104,7 +123,7 @@ if(NOT WIN32)
         ${CMAKE_DL_LIBS}
     )
     if(ZVEC_ENABLE_OMEGA)
-        list(APPEND zvec_c_api_deps omega ${ZVEC_LIGHTGBM_LIB})
+        list(APPEND zvec_c_api_deps ${ZVEC_OMEGA_LIB} ${ZVEC_LIGHTGBM_LIB})
     endif()
 else()
     # Windows static libraries use different naming conventions
@@ -129,7 +148,7 @@ else()
         shlwapi
     )
     if(ZVEC_ENABLE_OMEGA)
-        list(APPEND zvec_c_api_deps omega ${ZVEC_LIGHTGBM_LIB})
+        list(APPEND zvec_c_api_deps ${ZVEC_OMEGA_LIB} ${ZVEC_LIGHTGBM_LIB})
     endif()
 endif()
 

From 5fafa0e3c388c5d97bdb4c06301c257b206942ad Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sat, 4 Apr 2026 01:47:09 +0800
Subject: [PATCH 121/126] fix: simplify example library paths

---
 examples/c++/CMakeLists.txt | 23 ++++++++---------------
 examples/c/CMakeLists.txt   | 24 ++++++++----------------
 2 files changed, 16 insertions(+), 31 deletions(-)

diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index 9fcdfd18f..02b051e2d 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -13,15 +13,8 @@ if(NOT DEFINED HOST_BUILD_DIR)
 endif()
 
 set(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include)
-set(ZVEC_LIB_ROOT_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
-set(ZVEC_DEPENDENCY_LIB_ROOT_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
-if(WIN32)
-    set(ZVEC_LIB_DIR ${ZVEC_LIB_ROOT_DIR}/$<CONFIG>)
-    set(ZVEC_DEPENDENCY_LIB_DIR ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/$<CONFIG>)
-else()
-    set(ZVEC_LIB_DIR ${ZVEC_LIB_ROOT_DIR})
-    set(ZVEC_DEPENDENCY_LIB_DIR ${ZVEC_DEPENDENCY_LIB_ROOT_DIR})
-endif()
+set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
+set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
 
 if(ANDROID)
     option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" OFF)
@@ -52,11 +45,11 @@ endif()
 
 if(ZVEC_ENABLE_OMEGA)
     set(_zvec_omega_search_paths
-        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}
-        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/Debug
-        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/Release
-        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/RelWithDebInfo
-        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/MinSizeRel
+        ${ZVEC_DEPENDENCY_LIB_DIR}
+        ${ZVEC_DEPENDENCY_LIB_DIR}/Debug
+        ${ZVEC_DEPENDENCY_LIB_DIR}/Release
+        ${ZVEC_DEPENDENCY_LIB_DIR}/RelWithDebInfo
+        ${ZVEC_DEPENDENCY_LIB_DIR}/MinSizeRel
     )
     find_library(ZVEC_OMEGA_LIB
         NAMES omega
@@ -71,7 +64,7 @@ if(ZVEC_ENABLE_OMEGA)
     if(NOT ZVEC_OMEGA_LIB OR NOT ZVEC_LIGHTGBM_LIB)
         message(FATAL_ERROR
             "ZVEC_ENABLE_OMEGA=ON but failed to locate OMEGA host libraries under "
-            "${ZVEC_DEPENDENCY_LIB_ROOT_DIR}"
+            "${ZVEC_DEPENDENCY_LIB_DIR}"
         )
     endif()
 endif()
diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt
index a54e8987d..83e15a8ba 100644
--- a/examples/c/CMakeLists.txt
+++ b/examples/c/CMakeLists.txt
@@ -29,16 +29,8 @@ endif()
 
 set(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include)
 set(ZVEC_GENERATED_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/src/generated)
-set(ZVEC_LIB_ROOT_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
-set(ZVEC_DEPENDENCY_LIB_ROOT_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
-
-if(WIN32)
-    set(ZVEC_LIB_DIR ${ZVEC_LIB_ROOT_DIR}/$<CONFIG>)
-    set(ZVEC_DEPENDENCY_LIB_DIR ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/$<CONFIG>)
-else()
-    set(ZVEC_LIB_DIR ${ZVEC_LIB_ROOT_DIR})
-    set(ZVEC_DEPENDENCY_LIB_DIR ${ZVEC_DEPENDENCY_LIB_ROOT_DIR})
-endif()
+set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
+set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
 
 if(ANDROID)
     option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" OFF)
@@ -72,11 +64,11 @@ endif()
 
 if(ZVEC_ENABLE_OMEGA)
     set(_zvec_omega_search_paths
-        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}
-        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/Debug
-        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/Release
-        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/RelWithDebInfo
-        ${ZVEC_DEPENDENCY_LIB_ROOT_DIR}/MinSizeRel
+        ${ZVEC_DEPENDENCY_LIB_DIR}
+        ${ZVEC_DEPENDENCY_LIB_DIR}/Debug
+        ${ZVEC_DEPENDENCY_LIB_DIR}/Release
+        ${ZVEC_DEPENDENCY_LIB_DIR}/RelWithDebInfo
+        ${ZVEC_DEPENDENCY_LIB_DIR}/MinSizeRel
     )
     find_library(ZVEC_OMEGA_LIB
         NAMES omega
@@ -91,7 +83,7 @@ if(ZVEC_ENABLE_OMEGA)
     if(NOT ZVEC_OMEGA_LIB OR NOT ZVEC_LIGHTGBM_LIB)
         message(FATAL_ERROR
             "ZVEC_ENABLE_OMEGA=ON but failed to locate OMEGA host libraries under "
-            "${ZVEC_DEPENDENCY_LIB_ROOT_DIR}"
+            "${ZVEC_DEPENDENCY_LIB_DIR}"
         )
     endif()
 endif()

From c61bf5865a9021f08a995cda42054ad5bebb7d88 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sat, 4 Apr 2026 01:54:20 +0800
Subject: [PATCH 122/126] ci: add windows examples workflow

---
 .github/workflows/06-windows-examples.yml | 98 +++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 .github/workflows/06-windows-examples.yml

diff --git a/.github/workflows/06-windows-examples.yml b/.github/workflows/06-windows-examples.yml
new file mode 100644
index 000000000..6f9d1600c
--- /dev/null
+++ b/.github/workflows/06-windows-examples.yml
@@ -0,0 +1,98 @@
+name: Windows Examples
+
+on:
+  push:
+    paths:
+      - '.github/workflows/06-windows-examples.yml'
+      - '.github/workflows/05-windows-build.yml'
+      - 'examples/c/**'
+      - 'examples/c++/**'
+      - 'src/**'
+      - 'CMakeLists.txt'
+      - 'pyproject.toml'
+  pull_request:
+    paths:
+      - '.github/workflows/06-windows-examples.yml'
+      - '.github/workflows/05-windows-build.yml'
+      - 'examples/c/**'
+      - 'examples/c++/**'
+      - 'src/**'
+      - 'CMakeLists.txt'
+      - 'pyproject.toml'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  windows-examples:
+    name: Windows Examples (windows-2022)
+    runs-on: windows-2022
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+        with:
+          submodules: recursive
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+          cache-dependency-path: 'pyproject.toml'
+
+      - name: Set up MSVC environment
+        uses: ilammy/msvc-dev-cmd@v1
+        with:
+          arch: x64
+
+      - name: Set up environment variables
+        run: |
+          $nproc = (Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors
+          echo "NPROC=$nproc" >> $env:GITHUB_ENV
+          echo "Using $nproc parallel jobs for builds"
+        shell: powershell
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip `
+            pybind11==3.0 `
+            cmake==3.30.0 `
+            ninja==1.11.1 `
+            scikit-build-core `
+            setuptools_scm
+        shell: powershell
+
+      - name: Build host libraries
+        run: |
+          cd "$env:GITHUB_WORKSPACE"
+          $env:CMAKE_GENERATOR = "Ninja"
+          $env:CMAKE_BUILD_PARALLEL_LEVEL = "$env:NPROC"
+          python -m pip install -v . `
+            --no-build-isolation
+        shell: powershell
+
+      - name: Build C++ examples
+        run: |
+          cd "$env:GITHUB_WORKSPACE\examples\c++"
+          if (Test-Path build) { Remove-Item -Recurse -Force build }
+          mkdir build
+          cd build
+          cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release
+          cmake --build . --parallel $env:NPROC
+        shell: powershell
+
+      - name: Build C examples
+        run: |
+          cd "$env:GITHUB_WORKSPACE\examples\c"
+          if (Test-Path build) { Remove-Item -Recurse -Force build }
+          mkdir build
+          cd build
+          cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release
+          cmake --build . --parallel $env:NPROC
+        shell: powershell

From 1bcae21cb759f47727ef78ff80362e3d9bad3f0d Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sat, 4 Apr 2026 01:56:55 +0800
Subject: [PATCH 123/126] ci: simplify windows examples triggers

---
 .github/workflows/06-windows-examples.yml | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/.github/workflows/06-windows-examples.yml b/.github/workflows/06-windows-examples.yml
index 6f9d1600c..5c2c2cfe3 100644
--- a/.github/workflows/06-windows-examples.yml
+++ b/.github/workflows/06-windows-examples.yml
@@ -1,24 +1,7 @@
 name: Windows Examples
 
 on:
-  push:
-    paths:
-      - '.github/workflows/06-windows-examples.yml'
-      - '.github/workflows/05-windows-build.yml'
-      - 'examples/c/**'
-      - 'examples/c++/**'
-      - 'src/**'
-      - 'CMakeLists.txt'
-      - 'pyproject.toml'
-  pull_request:
-    paths:
-      - '.github/workflows/06-windows-examples.yml'
-      - '.github/workflows/05-windows-build.yml'
-      - 'examples/c/**'
-      - 'examples/c++/**'
-      - 'src/**'
-      - 'CMakeLists.txt'
-      - 'pyproject.toml'
+  workflow_call:
   workflow_dispatch:
 
 concurrency:

From 0c0d76749b4c24900625bf157f0c90fb558eb921 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sat, 4 Apr 2026 01:59:27 +0800
Subject: [PATCH 124/126] ci: remove windows examples workflow

---
 .github/workflows/06-windows-examples.yml | 81 -----------------------
 1 file changed, 81 deletions(-)
 delete mode 100644 .github/workflows/06-windows-examples.yml

diff --git a/.github/workflows/06-windows-examples.yml b/.github/workflows/06-windows-examples.yml
deleted file mode 100644
index 5c2c2cfe3..000000000
--- a/.github/workflows/06-windows-examples.yml
+++ /dev/null
@@ -1,81 +0,0 @@
-name: Windows Examples
-
-on:
-  workflow_call:
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  windows-examples:
-    name: Windows Examples (windows-2022)
-    runs-on: windows-2022
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
-        with:
-          submodules: recursive
-
-      - name: Set up Python
-        uses: actions/setup-python@v6
-        with:
-          python-version: '3.10'
-          cache: 'pip'
-          cache-dependency-path: 'pyproject.toml'
-
-      - name: Set up MSVC environment
-        uses: ilammy/msvc-dev-cmd@v1
-        with:
-          arch: x64
-
-      - name: Set up environment variables
-        run: |
-          $nproc = (Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors
-          echo "NPROC=$nproc" >> $env:GITHUB_ENV
-          echo "Using $nproc parallel jobs for builds"
-        shell: powershell
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip `
-            pybind11==3.0 `
-            cmake==3.30.0 `
-            ninja==1.11.1 `
-            scikit-build-core `
-            setuptools_scm
-        shell: powershell
-
-      - name: Build host libraries
-        run: |
-          cd "$env:GITHUB_WORKSPACE"
-          $env:CMAKE_GENERATOR = "Ninja"
-          $env:CMAKE_BUILD_PARALLEL_LEVEL = "$env:NPROC"
-          python -m pip install -v . `
-            --no-build-isolation
-        shell: powershell
-
-      - name: Build C++ examples
-        run: |
-          cd "$env:GITHUB_WORKSPACE\examples\c++"
-          if (Test-Path build) { Remove-Item -Recurse -Force build }
-          mkdir build
-          cd build
-          cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release
-          cmake --build . --parallel $env:NPROC
-        shell: powershell
-
-      - name: Build C examples
-        run: |
-          cd "$env:GITHUB_WORKSPACE\examples\c"
-          if (Test-Path build) { Remove-Item -Recurse -Force build }
-          mkdir build
-          cd build
-          cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release
-          cmake --build . --parallel $env:NPROC
-        shell: powershell

From bd4697c1b06ef69a89f6956e2481be1d8ba21567 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 5 Apr 2026 19:35:34 +0800
Subject: [PATCH 125/126] Add OMEGA integration test and examples

---
 examples/c++/CMakeLists.txt                   |  24 ++-
 examples/c++/omega/main.cc                    |  93 ++++++++++
 examples/c/CMakeLists.txt                     |  24 ++-
 examples/c/omega_example.c                    | 170 +++++++++++++++++
 src/binding/c/c_api.cc                        | 175 ++++++++++++++++--
 src/include/zvec/c_api.h                      | 131 +++++++++++++
 .../core/interface/index_param_builders.h     |  25 ++-
 .../interface/omega_index_integration_test.cc | 158 ++++++++++++++++
 8 files changed, 773 insertions(+), 27 deletions(-)
 create mode 100644 examples/c++/omega/main.cc
 create mode 100644 examples/c/omega_example.c
 create mode 100644 tests/core/interface/omega_index_integration_test.cc

diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index 02b051e2d..91c3bf548 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -37,12 +37,6 @@ else()
     set(PROTOBUF_LIB protobuf)
 endif()
 
-if(WIN32)
-    set(ZVEC_LIGHTGBM_LIB lib_lightgbm)
-else()
-    set(ZVEC_LIGHTGBM_LIB _lightgbm)
-endif()
-
 if(ZVEC_ENABLE_OMEGA)
     set(_zvec_omega_search_paths
         ${ZVEC_DEPENDENCY_LIB_DIR}
@@ -269,6 +263,13 @@ target_link_libraries(ailego-example PRIVATE
     zvec-ailego
 )
 
+if(ZVEC_ENABLE_OMEGA)
+    add_executable(omega-example omega/main.cc)
+    target_link_libraries(omega-example PRIVATE
+        zvec-core
+    )
+endif()
+
 # Strip symbols to reduce executable size
 if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID)
     add_custom_command(TARGET db-example POST_BUILD
@@ -280,6 +281,11 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID)
     add_custom_command(TARGET ailego-example POST_BUILD
         COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:ailego-example>"
         COMMENT "Stripping symbols from ailego-example")
+    if(ZVEC_ENABLE_OMEGA)
+        add_custom_command(TARGET omega-example POST_BUILD
+            COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:omega-example>"
+            COMMENT "Stripping symbols from omega-example")
+    endif()
 endif()
 
 # Optimize for size
@@ -288,4 +294,10 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID)
                  PROPERTY COMPILE_FLAGS "-Os")
     set_property(TARGET db-example core-example ailego-example
                  PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
+    if(ZVEC_ENABLE_OMEGA)
+        set_property(TARGET omega-example
+                     PROPERTY COMPILE_FLAGS "-Os")
+        set_property(TARGET omega-example
+                     PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
+    endif()
 endif()
diff --git a/examples/c++/omega/main.cc b/examples/c++/omega/main.cc
new file mode 100644
index 000000000..9fb6e8993
--- /dev/null
+++ b/examples/c++/omega/main.cc
@@ -0,0 +1,93 @@
+#include <filesystem>
+#include <iostream>
+#include <vector>
+#include <zvec/core/interface/index.h>
+#include <zvec/core/interface/index_factory.h>
+#include <zvec/core/interface/index_param_builders.h>
+
+using namespace zvec::core_interface;
+
+namespace {
+
+constexpr uint32_t kDimension = 32;
+const std::string kIndexPath = "omega_example.index";
+
+BaseIndexParam::Pointer CreateOmegaParam() {
+  auto param = HNSWIndexParamBuilder()
+                   .WithMetricType(MetricType::kInnerProduct)
+                   .WithDataType(DataType::DT_FP32)
+                   .WithDimension(kDimension)
+                   .WithIsSparse(false)
+                   .WithM(8)
+                   .WithEFConstruction(64)
+                   .Build();
+  param->index_type = IndexType::kOMEGA;
+  return param;
+}
+
+}  // namespace
+
+int main() {
+  std::filesystem::remove_all(kIndexPath);
+
+  auto index = IndexFactory::CreateAndInitIndex(*CreateOmegaParam());
+  if (!index) {
+    std::cerr << "failed to create omega index" << std::endl;
+    return 1;
+  }
+
+  if (index->Open(kIndexPath,
+                  StorageOptions{StorageOptions::StorageType::kMMAP, true}) !=
+      0) {
+    std::cerr << "failed to open omega index" << std::endl;
+    return 1;
+  }
+
+  for (uint32_t doc_id = 0; doc_id < 6; ++doc_id) {
+    std::vector<float> values(kDimension, static_cast<float>(doc_id) / 10.0f);
+    values[0] = 1.0f + static_cast<float>(doc_id);
+    VectorData vector_data;
+    vector_data.vector = DenseVector{values.data()};
+    if (index->Add(vector_data, doc_id) != 0) {
+      std::cerr << "failed to add document " << doc_id << std::endl;
+      return 1;
+    }
+  }
+
+  if (index->Train() != 0) {
+    std::cerr << "failed to train omega index" << std::endl;
+    return 1;
+  }
+
+  std::vector<float> query_values(kDimension, 0.0f);
+  query_values[0] = 1.0f;
+  VectorData query{DenseVector{query_values.data()}};
+
+  auto query_param = OmegaQueryParamBuilder()
+                         .with_topk(3)
+                         .with_fetch_vector(true)
+                         .with_ef_search(32)
+                         .with_target_recall(0.95f)
+                         .build();
+
+  SearchResult result;
+  if (index->Search(query, query_param, &result) != 0) {
+    std::cerr << "failed to search omega index" << std::endl;
+    return 1;
+  }
+
+  std::cout << "omega results: " << result.doc_list_.size() << std::endl;
+  if (result.doc_list_.empty()) {
+    std::cerr << "omega example returned no results" << std::endl;
+    return 1;
+  }
+
+  std::cout << "top result key=" << result.doc_list_[0].key()
+            << " score=" << result.doc_list_[0].score() << std::endl;
+  if (index->Close() != 0) {
+    std::cerr << "failed to close omega index" << std::endl;
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt
index 83e15a8ba..353d2641a 100644
--- a/examples/c/CMakeLists.txt
+++ b/examples/c/CMakeLists.txt
@@ -88,12 +88,6 @@ if(ZVEC_ENABLE_OMEGA)
     endif()
 endif()
 
-if(WIN32)
-    set(ZVEC_LIGHTGBM_LIB lib_lightgbm)
-else()
-    set(ZVEC_LIGHTGBM_LIB _lightgbm)
-endif()
-
 # --- Dependency groups ---
 if(NOT WIN32)
     set(zvec_c_api_deps
@@ -221,6 +215,13 @@ target_link_libraries(c_api_optimized_example PRIVATE
     zvec-c-api
 )
 
+if(ZVEC_ENABLE_OMEGA)
+    add_executable(c_api_omega_example omega_example.c)
+    target_link_libraries(c_api_omega_example PRIVATE
+        zvec-c-api
+    )
+endif()
+
 # Strip symbols to reduce executable size
 if(CMAKE_BUILD_TYPE STREQUAL "Release" AND (ANDROID OR (CMAKE_SYSTEM_NAME STREQUAL "Linux")))
     add_custom_command(TARGET c_api_basic_example POST_BUILD
@@ -241,6 +242,11 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" AND (ANDROID OR (CMAKE_SYSTEM_NAME STREQU
     add_custom_command(TARGET c_api_optimized_example POST_BUILD
         COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:c_api_optimized_example>"
         COMMENT "Stripping symbols from c_api_optimized_example")
+    if(ZVEC_ENABLE_OMEGA)
+        add_custom_command(TARGET c_api_omega_example POST_BUILD
+            COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:c_api_omega_example>"
+            COMMENT "Stripping symbols from c_api_omega_example")
+    endif()
 endif()
 
 # Optimize for size
@@ -251,4 +257,10 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID)
     set_property(TARGET c_api_basic_example c_api_collection_schema_example c_api_doc_example
                  c_api_index_example c_api_field_schema_example c_api_optimized_example
                  PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
+    if(ZVEC_ENABLE_OMEGA)
+        set_property(TARGET c_api_omega_example
+                     PROPERTY COMPILE_FLAGS "-Os")
+        set_property(TARGET c_api_omega_example
+                     PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
+    endif()
 endif()
diff --git a/examples/c/omega_example.c b/examples/c/omega_example.c
new file mode 100644
index 000000000..433f3f32b
--- /dev/null
+++ b/examples/c/omega_example.c
@@ -0,0 +1,170 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "zvec/c_api.h"
+
+static ZVecErrorCode check(ZVecErrorCode error, const char *context) {
+  if (error != ZVEC_OK) {
+    char *error_msg = NULL;
+    zvec_get_last_error(&error_msg);
+    fprintf(stderr, "%s failed: %s\n", context,
+            error_msg ? error_msg : "unknown error");
+    zvec_free(error_msg);
+  }
+  return error;
+}
+
+static ZVecErrorCode create_omega_collection(ZVecCollection **collection) {
+  ZVecCollectionSchema *schema = zvec_collection_schema_create("omega_collection");
+  ZVecCollectionOptions *options = NULL;
+  ZVecIndexParams *invert_params = NULL;
+  ZVecIndexParams *omega_params = NULL;
+  ZVecFieldSchema *id_field = NULL;
+  ZVecFieldSchema *embedding_field = NULL;
+  ZVecErrorCode error = ZVEC_OK;
+
+  if (!schema) {
+    return ZVEC_ERROR_INTERNAL_ERROR;
+  }
+
+  invert_params = zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT);
+  omega_params = zvec_index_params_create(ZVEC_INDEX_TYPE_OMEGA);
+  options = zvec_collection_options_create();
+  if (!invert_params || !omega_params || !options) {
+    error = ZVEC_ERROR_RESOURCE_EXHAUSTED;
+    goto cleanup;
+  }
+
+  zvec_index_params_set_invert_params(invert_params, true, false);
+  zvec_index_params_set_metric_type(omega_params, ZVEC_METRIC_TYPE_IP);
+  zvec_index_params_set_hnsw_params(omega_params, 8, 64);
+
+  id_field = zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0);
+  embedding_field = zvec_field_schema_create(
+      "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4);
+  if (!id_field || !embedding_field) {
+    error = ZVEC_ERROR_RESOURCE_EXHAUSTED;
+    goto cleanup;
+  }
+
+  zvec_field_schema_set_index_params(id_field, invert_params);
+  zvec_field_schema_set_index_params(embedding_field, omega_params);
+
+  error = zvec_collection_schema_add_field(schema, id_field);
+  if (error != ZVEC_OK) goto cleanup;
+  error = zvec_collection_schema_add_field(schema, embedding_field);
+  if (error != ZVEC_OK) goto cleanup;
+
+  error = zvec_collection_create_and_open("./omega_collection", schema, options,
+                                          collection);
+
+cleanup:
+  zvec_index_params_destroy(invert_params);
+  zvec_index_params_destroy(omega_params);
+  zvec_collection_options_destroy(options);
+  zvec_collection_schema_destroy(schema);
+  return error;
+}
+
+int main(void) {
+  ZVecCollection *collection = NULL;
+  ZVecVectorQuery *query = NULL;
+  ZVecOmegaQueryParams *omega_query = NULL;
+  ZVecDoc *docs[2] = {NULL, NULL};
+  ZVecDoc **results = NULL;
+  size_t result_count = 0;
+  int exit_code = 1;
+  float vector1[] = {1.0f, 0.1f, 0.1f, 0.1f};
+  float vector2[] = {2.0f, 0.2f, 0.2f, 0.2f};
+
+  remove("./omega_collection");
+
+  if (check(create_omega_collection(&collection), "create_omega_collection") !=
+      ZVEC_OK) {
+    goto cleanup;
+  }
+
+  for (int i = 0; i < 2; ++i) {
+    docs[i] = zvec_doc_create();
+    if (!docs[i]) {
+      fprintf(stderr, "failed to create document %d\n", i);
+      goto cleanup;
+    }
+  }
+
+  zvec_doc_set_pk(docs[0], "doc1");
+  zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", 4);
+  zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32,
+                              vector1, sizeof(vector1));
+
+  zvec_doc_set_pk(docs[1], "doc2");
+  zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", 4);
+  zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32,
+                              vector2, sizeof(vector2));
+
+  {
+    size_t success_count = 0;
+    size_t error_count = 0;
+    if (check(zvec_collection_insert(collection, (const ZVecDoc **)docs, 2,
+                                     &success_count, &error_count),
+              "zvec_collection_insert") != ZVEC_OK) {
+      goto cleanup;
+    }
+  }
+
+  if (check(zvec_collection_flush(collection), "zvec_collection_flush") !=
+      ZVEC_OK) {
+    goto cleanup;
+  }
+
+  query = zvec_vector_query_create();
+  omega_query = zvec_query_params_omega_create(32, 0.95f, 0.0f, false, false);
+  if (!query || !omega_query) {
+    fprintf(stderr, "failed to create omega query\n");
+    goto cleanup;
+  }
+
+  zvec_vector_query_set_field_name(query, "embedding");
+  zvec_vector_query_set_query_vector(query, vector1, sizeof(vector1));
+  zvec_vector_query_set_topk(query, 2);
+  zvec_vector_query_set_include_doc_id(query, true);
+  zvec_vector_query_set_omega_params(query, omega_query);
+
+  if (check(zvec_collection_query(collection, query, &results, &result_count),
+            "zvec_collection_query") != ZVEC_OK) {
+    goto cleanup;
+  }
+
+  printf("omega c example results: %zu\n", result_count);
+  if (result_count == 0) {
+    fprintf(stderr, "omega c example returned no results\n");
+    goto cleanup;
+  }
+  printf("top result score: %.4f\n", zvec_doc_get_score(results[0]));
+
+  exit_code = 0;
+
+cleanup:
+  zvec_docs_free(results, result_count);
+  zvec_vector_query_destroy(query);
+  if (collection) {
+    zvec_collection_destroy(collection);
+  }
+  zvec_doc_destroy(docs[0]);
+  zvec_doc_destroy(docs[1]);
+  return exit_code;
+}
diff --git a/src/binding/c/c_api.cc b/src/binding/c/c_api.cc
index 255b03a1e..8764265bf 100644
--- a/src/binding/c/c_api.cc
+++ b/src/binding/c/c_api.cc
@@ -1332,6 +1332,14 @@ ZVecIndexParams *zvec_index_params_create(ZVecIndexType index_type) {
                   zvec::core_interface::kDefaultHnswEfConstruction,  // ef_construction
                   zvec::QuantizeType::UNDEFINED);
           break;
+        case ZVEC_INDEX_TYPE_OMEGA:
+          cpp_params =
+              new zvec::OmegaIndexParams(
+                  zvec::MetricType::L2,  // metric_type
+                  zvec::core_interface::kDefaultHnswNeighborCnt,  // m
+                  zvec::core_interface::kDefaultHnswEfConstruction,  // ef_construction
+                  zvec::QuantizeType::UNDEFINED);
+          break;
         case ZVEC_INDEX_TYPE_IVF:
           cpp_params =
               new zvec::IVFIndexParams(zvec::MetricType::L2,  // metric_type
@@ -1490,15 +1498,21 @@ ZVecErrorCode zvec_index_params_set_hnsw_params(ZVecIndexParams *params, int m,
     return ZVEC_ERROR_INVALID_ARGUMENT;
   }
   auto *cpp_params = reinterpret_cast<zvec::IndexParams *>(params);
-  auto *hnsw_params = dynamic_cast<zvec::HnswIndexParams *>(cpp_params);
-  if (!hnsw_params) {
+  if (auto *hnsw_params = dynamic_cast<zvec::HnswIndexParams *>(cpp_params)) {
+    hnsw_params->set_m(m);
+    hnsw_params->set_ef_construction(ef_construction);
+    return ZVEC_OK;
+  }
+  if (auto *omega_params = dynamic_cast<zvec::OmegaIndexParams *>(cpp_params)) {
+    omega_params->set_m(m);
+    omega_params->set_ef_construction(ef_construction);
+    return ZVEC_OK;
+  }
+  {
     SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT,
-                   "Invalid params or not HNSW index type");
+                   "Invalid params or not HNSW/OMEGA index type");
     return ZVEC_ERROR_INVALID_ARGUMENT;
   }
-  hnsw_params->set_m(m);
-  hnsw_params->set_ef_construction(ef_construction);
-  return ZVEC_OK;
 }
 
 /**
@@ -1513,13 +1527,19 @@ int zvec_index_params_get_hnsw_m(const ZVecIndexParams *params) {
     return 0;
   }
   auto *cpp_params = reinterpret_cast<const zvec::IndexParams *>(params);
-  auto *hnsw_params = dynamic_cast<const zvec::HnswIndexParams *>(cpp_params);
-  if (!hnsw_params) {
+  if (auto *hnsw_params =
+          dynamic_cast<const zvec::HnswIndexParams *>(cpp_params)) {
+    return hnsw_params->m();
+  }
+  if (auto *omega_params =
+          dynamic_cast<const zvec::OmegaIndexParams *>(cpp_params)) {
+    return omega_params->m();
+  }
+  {
     SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT,
-                   "Invalid params or not HNSW index type");
+                   "Invalid params or not HNSW/OMEGA index type");
     return 0;
   }
-  return hnsw_params->m();
 }
 
 /**
@@ -1534,13 +1554,19 @@ int zvec_index_params_get_hnsw_ef_construction(const ZVecIndexParams *params) {
     return 0;
   }
   auto *cpp_params = reinterpret_cast<const zvec::IndexParams *>(params);
-  auto *hnsw_params = dynamic_cast<const zvec::HnswIndexParams *>(cpp_params);
-  if (!hnsw_params) {
+  if (auto *hnsw_params =
+          dynamic_cast<const zvec::HnswIndexParams *>(cpp_params)) {
+    return hnsw_params->ef_construction();
+  }
+  if (auto *omega_params =
+          dynamic_cast<const zvec::OmegaIndexParams *>(cpp_params)) {
+    return omega_params->ef_construction();
+  }
+  {
     SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT,
-                   "Invalid params or not HNSW index type");
+                   "Invalid params or not HNSW/OMEGA index type");
     return 0;
   }
-  return hnsw_params->ef_construction();
 }
 
 /**
@@ -4609,6 +4635,113 @@ ZVecHnswQueryParams *zvec_query_params_hnsw_create(int ef, float radius,
   return nullptr;
 }
 
+ZVecOmegaQueryParams *zvec_query_params_omega_create(int ef,
+                                                     float target_recall,
+                                                     float radius,
+                                                     bool is_linear,
+                                                     bool is_using_refiner) {
+  ZVEC_TRY_RETURN_NULL(
+      "Failed to create OmegaQueryParams",
+      auto *params = new zvec::OmegaQueryParams(ef, target_recall, radius,
+                                                is_linear, is_using_refiner);
+      return reinterpret_cast<ZVecOmegaQueryParams *>(params);)
+
+  return nullptr;
+}
+
+void zvec_query_params_omega_destroy(ZVecOmegaQueryParams *params) {
+  if (params) {
+    delete reinterpret_cast<zvec::OmegaQueryParams *>(params);
+  }
+}
+
+ZVecErrorCode zvec_query_params_omega_set_ef(ZVecOmegaQueryParams *params,
+                                             int ef) {
+  if (!params) {
+    SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Params pointer is null");
+    return ZVEC_ERROR_INVALID_ARGUMENT;
+  }
+  auto *ptr = reinterpret_cast<zvec::OmegaQueryParams *>(params);
+  ptr->set_ef(ef);
+  return ZVEC_OK;
+}
+
+int zvec_query_params_omega_get_ef(const ZVecOmegaQueryParams *params) {
+  if (!params) return 0;
+  auto *ptr = reinterpret_cast<const zvec::OmegaQueryParams *>(params);
+  return ptr->ef();
+}
+
+ZVecErrorCode zvec_query_params_omega_set_target_recall(
+    ZVecOmegaQueryParams *params, float target_recall) {
+  if (!params) {
+    SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Params pointer is null");
+    return ZVEC_ERROR_INVALID_ARGUMENT;
+  }
+  auto *ptr = reinterpret_cast<zvec::OmegaQueryParams *>(params);
+  ptr->set_target_recall(target_recall);
+  return ZVEC_OK;
+}
+
+float zvec_query_params_omega_get_target_recall(
+    const ZVecOmegaQueryParams *params) {
+  if (!params) return 0.0f;
+  auto *ptr = reinterpret_cast<const zvec::OmegaQueryParams *>(params);
+  return ptr->target_recall();
+}
+
+ZVecErrorCode zvec_query_params_omega_set_radius(ZVecOmegaQueryParams *params,
+                                                 float radius) {
+  if (!params) {
+    SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Params pointer is null");
+    return ZVEC_ERROR_INVALID_ARGUMENT;
+  }
+  auto *ptr = reinterpret_cast<zvec::OmegaQueryParams *>(params);
+  ptr->set_radius(radius);
+  return ZVEC_OK;
+}
+
+float zvec_query_params_omega_get_radius(const ZVecOmegaQueryParams *params) {
+  if (!params) return 0.0f;
+  auto *ptr = reinterpret_cast<const zvec::OmegaQueryParams *>(params);
+  return ptr->radius();
+}
+
+ZVecErrorCode zvec_query_params_omega_set_is_linear(
+    ZVecOmegaQueryParams *params, bool is_linear) {
+  if (!params) {
+    SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Params pointer is null");
+    return ZVEC_ERROR_INVALID_ARGUMENT;
+  }
+  auto *ptr = reinterpret_cast<zvec::OmegaQueryParams *>(params);
+  ptr->set_is_linear(is_linear);
+  return ZVEC_OK;
+}
+
+bool zvec_query_params_omega_get_is_linear(const ZVecOmegaQueryParams *params) {
+  if (!params) return false;
+  auto *ptr = reinterpret_cast<const zvec::OmegaQueryParams *>(params);
+  return ptr->is_linear();
+}
+
+ZVecErrorCode zvec_query_params_omega_set_is_using_refiner(
+    ZVecOmegaQueryParams *params, bool is_using_refiner) {
+  if (!params) {
+    SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Params pointer is null");
+    return ZVEC_ERROR_INVALID_ARGUMENT;
+  }
+  auto *ptr = reinterpret_cast<zvec::OmegaQueryParams *>(params);
+  ptr->set_is_using_refiner(is_using_refiner);
+  return ZVEC_OK;
+}
+
+bool zvec_query_params_omega_get_is_using_refiner(
+    const ZVecOmegaQueryParams *params) {
+  if (!params) return false;
+  auto *ptr = reinterpret_cast<const zvec::OmegaQueryParams *>(params);
+  return ptr->is_using_refiner();
+}
+
 void zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params) {
   if (params) {
     delete reinterpret_cast<zvec::HnswQueryParams *>(params);
@@ -5102,6 +5235,20 @@ ZVecErrorCode zvec_vector_query_set_hnsw_params(
   return ZVEC_OK;
 }
 
+ZVecErrorCode zvec_vector_query_set_omega_params(
+    ZVecVectorQuery *query, ZVecOmegaQueryParams *omega_params) {
+  if (!query || !omega_params) {
+    SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT,
+                   "Query or OMEGA params pointer is null");
+    return ZVEC_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto *query_ptr = reinterpret_cast<zvec::VectorQuery *>(query);
+  auto *params_ptr = reinterpret_cast<zvec::OmegaQueryParams *>(omega_params);
+  query_ptr->query_params_.reset(params_ptr);
+  return ZVEC_OK;
+}
+
 ZVecErrorCode zvec_vector_query_set_ivf_params(ZVecVectorQuery *query,
                                                ZVecIVFQueryParams *ivf_params) {
   if (!query || !ivf_params) {
diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h
index 75eba707b..730b3572e 100644
--- a/src/include/zvec/c_api.h
+++ b/src/include/zvec/c_api.h
@@ -767,6 +767,7 @@ typedef uint32_t ZVecIndexType;
 #define ZVEC_INDEX_TYPE_HNSW 1
 #define ZVEC_INDEX_TYPE_IVF 2
 #define ZVEC_INDEX_TYPE_FLAT 3
+#define ZVEC_INDEX_TYPE_OMEGA 11
 #define ZVEC_INDEX_TYPE_INVERT 10
 
 /**
@@ -983,6 +984,16 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_index_params_set_invert_params(
  */
 typedef struct ZVecHnswQueryParams ZVecHnswQueryParams;
 
+/**
+ * @brief OMEGA query parameters handle (opaque pointer)
+ *
+ * Internally maps to zvec::OmegaQueryParams* (raw pointer).
+ * Created by zvec_query_params_omega_create() and destroyed by
+ * zvec_query_params_omega_destroy(). Caller owns the pointer and must
+ * explicitly destroy it.
+ */
+typedef struct ZVecOmegaQueryParams ZVecOmegaQueryParams;
+
 /**
  * @brief IVF query parameters handle (opaque pointer)
  *
@@ -1120,6 +1131,117 @@ ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_hnsw_set_is_using_refiner(
 ZVEC_EXPORT bool ZVEC_CALL
 zvec_query_params_hnsw_get_is_using_refiner(const ZVecHnswQueryParams *params);
 
+// -----------------------------------------------------------------------------
+// ZVecOmegaQueryParams (OMEGA Query Parameters)
+// -----------------------------------------------------------------------------
+
+/**
+ * @brief Create OMEGA query parameters
+ * @param ef Exploration factor during search
+ * @param target_recall Target recall used by OMEGA early stopping
+ * @param radius Search radius
+ * @param is_linear Whether linear search
+ * @param is_using_refiner Whether using refiner
+ * @return ZVecOmegaQueryParams* Pointer to the newly created OMEGA query
+ * parameters
+ */
+ZVEC_EXPORT ZVecOmegaQueryParams *ZVEC_CALL zvec_query_params_omega_create(
+    int ef, float target_recall, float radius, bool is_linear,
+    bool is_using_refiner);
+
+/**
+ * @brief Destroy OMEGA query parameters
+ * @param params OMEGA query parameters pointer
+ */
+ZVEC_EXPORT void ZVEC_CALL
+zvec_query_params_omega_destroy(ZVecOmegaQueryParams *params);
+
+/**
+ * @brief Set exploration factor
+ * @param params OMEGA query parameters pointer
+ * @param ef Exploration factor
+ * @return ZVecErrorCode Error code
+ */
+ZVEC_EXPORT ZVecErrorCode ZVEC_CALL
+zvec_query_params_omega_set_ef(ZVecOmegaQueryParams *params, int ef);
+
+/**
+ * @brief Get exploration factor
+ * @param params OMEGA query parameters pointer
+ * @return int Exploration factor
+ */
+ZVEC_EXPORT int ZVEC_CALL
+zvec_query_params_omega_get_ef(const ZVecOmegaQueryParams *params);
+
+/**
+ * @brief Set target recall
+ * @param params OMEGA query parameters pointer
+ * @param target_recall Target recall
+ * @return ZVecErrorCode Error code
+ */
+ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_omega_set_target_recall(
+    ZVecOmegaQueryParams *params, float target_recall);
+
+/**
+ * @brief Get target recall
+ * @param params OMEGA query parameters pointer
+ * @return float Target recall
+ */
+ZVEC_EXPORT float ZVEC_CALL
+zvec_query_params_omega_get_target_recall(const ZVecOmegaQueryParams *params);
+
+/**
+ * @brief Set search radius
+ * @param params OMEGA query parameters pointer
+ * @param radius Search radius
+ * @return ZVecErrorCode Error code
+ */
+ZVEC_EXPORT ZVecErrorCode ZVEC_CALL
+zvec_query_params_omega_set_radius(ZVecOmegaQueryParams *params, float radius);
+
+/**
+ * @brief Get search radius
+ * @param params OMEGA query parameters pointer
+ * @return float Search radius
+ */
+ZVEC_EXPORT float ZVEC_CALL
+zvec_query_params_omega_get_radius(const ZVecOmegaQueryParams *params);
+
+/**
+ * @brief Set linear search mode
+ * @param params OMEGA query parameters pointer
+ * @param is_linear Whether linear search
+ * @return ZVecErrorCode Error code
+ */
+ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_omega_set_is_linear(
+    ZVecOmegaQueryParams *params, bool is_linear);
+
+/**
+ * @brief Get linear search mode
+ * @param params OMEGA query parameters pointer
+ * @return bool Whether linear search
+ */
+ZVEC_EXPORT bool ZVEC_CALL
+zvec_query_params_omega_get_is_linear(const ZVecOmegaQueryParams *params);
+
+/**
+ * @brief Set whether to use refiner
+ * @param params OMEGA query parameters pointer
+ * @param is_using_refiner Whether to use refiner
+ * @return ZVecErrorCode Error code
+ */
+ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_omega_set_is_using_refiner(
+    ZVecOmegaQueryParams *params, bool is_using_refiner);
+
+/**
+ * @brief Get whether to use refiner
+ * @param params OMEGA query parameters pointer
+ * @return bool Whether to use refiner
+ */
+ZVEC_EXPORT bool ZVEC_CALL
+zvec_query_params_omega_get_is_using_refiner(
+    const ZVecOmegaQueryParams *params);
+
 // -----------------------------------------------------------------------------
 // ZVecIVFQueryParams (IVF Query Parameters)
 // -----------------------------------------------------------------------------
@@ -1469,6 +1591,15 @@ zvec_vector_query_set_query_params(ZVecVectorQuery *query, void *params);
 ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_hnsw_params(
     ZVecVectorQuery *query, ZVecHnswQueryParams *hnsw_params);
 
+/**
+ * @brief Set OMEGA query parameters (takes ownership)
+ * @param query Vector query pointer
+ * @param omega_params OMEGA query parameters pointer
+ * @return ZVecErrorCode Error code
+ */
+ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_omega_params(
+    ZVecVectorQuery *query, ZVecOmegaQueryParams *omega_params);
+
 /**
  * @brief Set IVF query parameters (takes ownership)
  * @param query Vector query pointer
diff --git a/src/include/zvec/core/interface/index_param_builders.h b/src/include/zvec/core/interface/index_param_builders.h
index e22ecb392..ce20c408c 100644
--- a/src/include/zvec/core/interface/index_param_builders.h
+++ b/src/include/zvec/core/interface/index_param_builders.h
@@ -299,6 +299,29 @@ class HNSWQueryParamBuilder
   }
 };
 
+class OmegaQueryParamBuilder
+    : public BaseIndexQueryParamBuilder<OmegaQueryParam, OmegaQueryParamBuilder> {
+ public:
+  OmegaQueryParamBuilder &with_ef_search(int ef_search) {
+    m_param.ef_search = ef_search;
+    return *this;
+  }
+
+  OmegaQueryParamBuilder &with_training_query_id(int training_query_id) {
+    m_param.training_query_id = training_query_id;
+    return *this;
+  }
+
+  OmegaQueryParamBuilder &with_target_recall(float target_recall) {
+    m_param.target_recall = target_recall;
+    return *this;
+  }
+
+  OmegaQueryParam::Pointer build() {
+    return std::make_shared<OmegaQueryParam>(std::move(m_param));
+  }
+};
+
 // Example Usage:
 // HNSWQueryParam::Pointer hnsw_config = HNSWQueryParamBuilder()
 //     .with_topk(5)
@@ -407,4 +430,4 @@ class SCANNIndexParamBuilder {
 };
 
 }  // namespace predefined
-}  // namespace zvec::core_interface
\ No newline at end of file
+}  // namespace zvec::core_interface
diff --git a/tests/core/interface/omega_index_integration_test.cc b/tests/core/interface/omega_index_integration_test.cc
new file mode 100644
index 000000000..d9734315e
--- /dev/null
+++ b/tests/core/interface/omega_index_integration_test.cc
@@ -0,0 +1,158 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <gtest/gtest.h>
+#include "tests/test_util.h"
+#include "zvec/core/interface/index.h"
+#include "zvec/core/interface/index_factory.h"
+#include "zvec/core/interface/index_param_builders.h"
+
+namespace zvec::core_interface {
+namespace {
+
+constexpr uint32_t kDimension = 16;
+const std::string kIndexPath = "OmegaIndexIntegrationTest/test.index";
+
+BaseIndexParam::Pointer CreateOmegaIndexParam() {
+  auto param = HNSWIndexParamBuilder()
+                   .WithMetricType(MetricType::kInnerProduct)
+                   .WithDataType(DataType::DT_FP32)
+                   .WithDimension(kDimension)
+                   .WithIsSparse(false)
+                   .WithM(8)
+                   .WithEFConstruction(64)
+                   .Build();
+  param->index_type = IndexType::kOMEGA;
+  return param;
+}
+
+void PopulateIndex(const Index::Pointer &index, uint32_t doc_count) {
+  for (uint32_t doc_id = 0; doc_id < doc_count; ++doc_id) {
+    std::vector<float> values(kDimension, static_cast<float>(doc_id) / 10.0f);
+    values[0] = 1.0f + static_cast<float>(doc_id);
+
+    VectorData vector_data;
+    vector_data.vector = DenseVector{values.data()};
+    ASSERT_EQ(index->Add(vector_data, doc_id), 0);
+  }
+}
+
+VectorData MakeQuery(float base) {
+  static std::vector<float> values;
+  values.assign(kDimension, base);
+  values[0] = 1.0f;
+  return VectorData{DenseVector{values.data()}};
+}
+
+class OmegaIndexIntegrationTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    zvec::test_util::RemoveTestPath("OmegaIndexIntegrationTest");
+  }
+
+  void TearDown() override {
+    zvec::test_util::RemoveTestPath("OmegaIndexIntegrationTest");
+  }
+};
+
+TEST_F(OmegaIndexIntegrationTest, SearchFallsBackWithoutModelAndReturnsResults) {
+  auto index = IndexFactory::CreateAndInitIndex(*CreateOmegaIndexParam());
+  ASSERT_NE(index, nullptr);
+  ASSERT_EQ(index->Open(kIndexPath,
+                        {StorageOptions::StorageType::kMMAP, true}),
+            0);
+
+  PopulateIndex(index, 8);
+  ASSERT_EQ(index->Train(), 0);
+
+  auto query_param = OmegaQueryParamBuilder()
+                         .with_topk(3)
+                         .with_fetch_vector(true)
+                         .with_ef_search(32)
+                         .with_target_recall(0.90f)
+                         .build();
+
+  auto query = MakeQuery(0.0f);
+  SearchResult result;
+  ASSERT_EQ(index->Search(query, query_param, &result), 0);
+  ASSERT_EQ(result.doc_list_.size(), 3U);
+  EXPECT_EQ(result.doc_list_[0].key(), 7U);
+  EXPECT_TRUE(result.training_records_.empty());
+  EXPECT_TRUE(result.gt_cmps_per_rank_.empty());
+
+  ASSERT_EQ(index->Close(), 0);
+}
+
+TEST_F(OmegaIndexIntegrationTest, TrainingSessionCollectsArtifactsThroughIndexSearch) {
+  auto index = IndexFactory::CreateAndInitIndex(*CreateOmegaIndexParam());
+  ASSERT_NE(index, nullptr);
+  ASSERT_EQ(index->Open(kIndexPath,
+                        {StorageOptions::StorageType::kMMAP, true}),
+            0);
+
+  PopulateIndex(index, 8);
+  ASSERT_EQ(index->Train(), 0);
+
+  auto *training_capable = index->GetTrainingCapability();
+  ASSERT_NE(training_capable, nullptr);
+
+  auto session = training_capable->CreateTrainingSession();
+  ASSERT_NE(session, nullptr);
+
+  TrainingSessionConfig config;
+  config.topk = 1;
+  config.k_train = 1;
+  config.ground_truth = {{0}};
+  ASSERT_TRUE(session->Start(config).ok());
+  session->BeginQuery(0);
+
+  auto query_param = OmegaQueryParamBuilder()
+                         .with_topk(3)
+                         .with_ef_search(32)
+                         .with_training_query_id(0)
+                         .with_target_recall(0.95f)
+                         .build();
+
+  auto query = MakeQuery(0.0f);
+  SearchResult result;
+  ASSERT_EQ(index->Search(query, query_param, &result), 0);
+  EXPECT_EQ(result.training_query_id_, 0);
+  EXPECT_FALSE(result.training_records_.empty());
+  ASSERT_EQ(result.gt_cmps_per_rank_.size(), 1U);
+  EXPECT_GT(result.total_cmps_, 0);
+
+  QueryTrainingArtifacts artifacts;
+  artifacts.records = result.training_records_;
+  artifacts.gt_cmps_per_rank = result.gt_cmps_per_rank_;
+  artifacts.total_cmps = result.total_cmps_;
+  artifacts.training_query_id = result.training_query_id_;
+  session->CollectQueryArtifacts(std::move(artifacts));
+
+  TrainingArtifacts consumed = session->ConsumeArtifacts();
+  ASSERT_FALSE(consumed.records.empty());
+  ASSERT_EQ(consumed.gt_cmps_data.num_queries, 1U);
+  ASSERT_EQ(consumed.gt_cmps_data.topk, 1U);
+  ASSERT_EQ(consumed.gt_cmps_data.gt_cmps.size(), 1U);
+  ASSERT_EQ(consumed.gt_cmps_data.total_cmps.size(), 1U);
+  EXPECT_EQ(consumed.gt_cmps_data.total_cmps[0], result.total_cmps_);
+
+  session->Finish();
+  ASSERT_EQ(index->Close(), 0);
+}
+
+}  // namespace
+}  // namespace zvec::core_interface

From 88a1aa792ac7c4578ba9279098b5f23d70bf4644 Mon Sep 17 00:00:00 2001
From: Yifan Peng <yf_peng@sjtu.edu.cn>
Date: Sun, 5 Apr 2026 19:43:03 +0800
Subject: [PATCH 126/126] Fix lint formatting for OMEGA changes

---
 examples/c++/omega/main.cc                         |  5 ++---
 examples/c/omega_example.c                         |  3 ++-
 src/include/zvec/c_api.h                           | 14 +++++++-------
 .../zvec/core/interface/index_param_builders.h     |  3 ++-
 .../core/interface/omega_index_integration_test.cc | 12 ++++++------
 5 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/examples/c++/omega/main.cc b/examples/c++/omega/main.cc
index 9fb6e8993..b15b5d331 100644
--- a/examples/c++/omega/main.cc
+++ b/examples/c++/omega/main.cc
@@ -36,9 +36,8 @@ int main() {
     return 1;
   }
 
-  if (index->Open(kIndexPath,
-                  StorageOptions{StorageOptions::StorageType::kMMAP, true}) !=
-      0) {
+  if (index->Open(kIndexPath, StorageOptions{StorageOptions::StorageType::kMMAP,
+                                             true}) != 0) {
     std::cerr << "failed to open omega index" << std::endl;
     return 1;
   }
diff --git a/examples/c/omega_example.c b/examples/c/omega_example.c
index 433f3f32b..caf1c5c5d 100644
--- a/examples/c/omega_example.c
+++ b/examples/c/omega_example.c
@@ -29,7 +29,8 @@ static ZVecErrorCode check(ZVecErrorCode error, const char *context) {
 }
 
 static ZVecErrorCode create_omega_collection(ZVecCollection **collection) {
-  ZVecCollectionSchema *schema = zvec_collection_schema_create("omega_collection");
+  ZVecCollectionSchema *schema =
+      zvec_collection_schema_create("omega_collection");
   ZVecCollectionOptions *options = NULL;
   ZVecIndexParams *invert_params = NULL;
   ZVecIndexParams *omega_params = NULL;
diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h
index 730b3572e..62d5f137f 100644
--- a/src/include/zvec/c_api.h
+++ b/src/include/zvec/c_api.h
@@ -1145,9 +1145,9 @@ zvec_query_params_hnsw_get_is_using_refiner(const ZVecHnswQueryParams *params);
  * @return ZVecOmegaQueryParams* Pointer to the newly created OMEGA query
  * parameters
  */
-ZVEC_EXPORT ZVecOmegaQueryParams *ZVEC_CALL zvec_query_params_omega_create(
-    int ef, float target_recall, float radius, bool is_linear,
-    bool is_using_refiner);
+ZVEC_EXPORT ZVecOmegaQueryParams *ZVEC_CALL
+zvec_query_params_omega_create(int ef, float target_recall, float radius,
+                               bool is_linear, bool is_using_refiner);
 
 /**
  * @brief Destroy OMEGA query parameters
@@ -1230,16 +1230,16 @@ zvec_query_params_omega_get_is_linear(const ZVecOmegaQueryParams *params);
  * @param is_using_refiner Whether to use refiner
  * @return ZVecErrorCode Error code
  */
-ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_omega_set_is_using_refiner(
-    ZVecOmegaQueryParams *params, bool is_using_refiner);
+ZVEC_EXPORT ZVecErrorCode ZVEC_CALL
+zvec_query_params_omega_set_is_using_refiner(ZVecOmegaQueryParams *params,
+                                             bool is_using_refiner);
 
 /**
  * @brief Get whether to use refiner
  * @param params OMEGA query parameters pointer
  * @return bool Whether to use refiner
  */
-ZVEC_EXPORT bool ZVEC_CALL
-zvec_query_params_omega_get_is_using_refiner(
+ZVEC_EXPORT bool ZVEC_CALL zvec_query_params_omega_get_is_using_refiner(
     const ZVecOmegaQueryParams *params);
 
 // -----------------------------------------------------------------------------
diff --git a/src/include/zvec/core/interface/index_param_builders.h b/src/include/zvec/core/interface/index_param_builders.h
index ce20c408c..49b2a8c36 100644
--- a/src/include/zvec/core/interface/index_param_builders.h
+++ b/src/include/zvec/core/interface/index_param_builders.h
@@ -300,7 +300,8 @@ class HNSWQueryParamBuilder
 };
 
 class OmegaQueryParamBuilder
-    : public BaseIndexQueryParamBuilder<OmegaQueryParam, OmegaQueryParamBuilder> {
+    : public BaseIndexQueryParamBuilder<OmegaQueryParam,
+                                        OmegaQueryParamBuilder> {
  public:
   OmegaQueryParamBuilder &with_ef_search(int ef_search) {
     m_param.ef_search = ef_search;
diff --git a/tests/core/interface/omega_index_integration_test.cc b/tests/core/interface/omega_index_integration_test.cc
index d9734315e..fc27b2639 100644
--- a/tests/core/interface/omega_index_integration_test.cc
+++ b/tests/core/interface/omega_index_integration_test.cc
@@ -69,11 +69,11 @@ class OmegaIndexIntegrationTest : public ::testing::Test {
   }
 };
 
-TEST_F(OmegaIndexIntegrationTest, SearchFallsBackWithoutModelAndReturnsResults) {
+TEST_F(OmegaIndexIntegrationTest,
+       SearchFallsBackWithoutModelAndReturnsResults) {
   auto index = IndexFactory::CreateAndInitIndex(*CreateOmegaIndexParam());
   ASSERT_NE(index, nullptr);
-  ASSERT_EQ(index->Open(kIndexPath,
-                        {StorageOptions::StorageType::kMMAP, true}),
+  ASSERT_EQ(index->Open(kIndexPath, {StorageOptions::StorageType::kMMAP, true}),
             0);
 
   PopulateIndex(index, 8);
@@ -97,11 +97,11 @@ TEST_F(OmegaIndexIntegrationTest, SearchFallsBackWithoutModelAndReturnsResults)
   ASSERT_EQ(index->Close(), 0);
 }
 
-TEST_F(OmegaIndexIntegrationTest, TrainingSessionCollectsArtifactsThroughIndexSearch) {
+TEST_F(OmegaIndexIntegrationTest,
+       TrainingSessionCollectsArtifactsThroughIndexSearch) {
   auto index = IndexFactory::CreateAndInitIndex(*CreateOmegaIndexParam());
   ASSERT_NE(index, nullptr);
-  ASSERT_EQ(index->Open(kIndexPath,
-                        {StorageOptions::StorageType::kMMAP, true}),
+  ASSERT_EQ(index->Open(kIndexPath, {StorageOptions::StorageType::kMMAP, true}),
             0);
 
   PopulateIndex(index, 8);