From 939804253d01abb3a194025c0f04cedf47eae1c9 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Thu, 6 Nov 2025 09:09:55 -0800
Subject: [PATCH 01/23] Enable IVF train only mode where centroid are built but
 data is not added in the cluster. Added comprhensive tests for IVF

---
 include/svs/index/ivf/common.h              | 120 +++--
 include/svs/index/ivf/hierarchical_kmeans.h | 181 ++++----
 include/svs/index/ivf/index.h               |   5 +-
 include/svs/index/ivf/kmeans.h              |  73 +--
 tests/svs/index/ivf/common.cpp              | 299 +++++++++++++
 tests/svs/index/ivf/hierarchical_kmeans.cpp | 470 +++++++++++++++++++-
 tests/svs/index/ivf/kmeans.cpp              | 388 +++++++++++++++-
 7 files changed, 1390 insertions(+), 146 deletions(-)
diff --git a/include/svs/index/ivf/common.h b/include/svs/index/ivf/common.h
index 28f12151b..a75ccff29 100644
--- a/include/svs/index/ivf/common.h
+++ b/include/svs/index/ivf/common.h
@@ -224,56 +224,88 @@ template <typename T>
 void compute_matmul(
     const T* data, const T* centroids, float* results, size_t m, size_t n, size_t k
 ) {
+    // Validate parameters to avoid Intel MKL errors
+    if (m == 0 || n == 0 || k == 0) {
+        return; // Nothing to compute
+    }
+
+    // Check for integer overflow when casting to int (MKL requirement)
+    constexpr size_t max_int = static_cast<size_t>(std::numeric_limits<int>::max());
+    if (m > max_int || n > max_int || k > max_int) {
+        throw ANNEXCEPTION(
+            "Matrix dimensions too large for Intel MKL GEMM: m={}, n={}, k={}", m, n, k
+        );
+    }
+
     if constexpr (std::is_same_v<T, float>) {
+        // Cast size_t parameters to int for MKL GEMM functions
+        int m_int = static_cast<int>(m);
+        int n_int = static_cast<int>(n);
+        int k_int = static_cast<int>(k);
+
         cblas_sgemm(
             CblasRowMajor, // CBLAS_LAYOUT layout
             CblasNoTrans,  // CBLAS_TRANSPOSE TransA
             CblasTrans,    // CBLAS_TRANSPOSE TransB
-            m,             // const int M
-            n,             // const int N
-            k,             // const int K
-            1.0,           // float alpha
+            m_int,         // const int M
+            n_int,         // const int N
+            k_int,         // const int K
+            1.0f,          // float alpha (explicitly float)
             data,          // const float* A
-            k,             // const int lda
+            k_int,         // const int lda
             centroids,     // const float* B
-            k,             // const int ldb
-            0.0,           // const float beta
+            k_int,         // const int ldb
+            0.0f,          // const float beta (explicitly float)
             results,       // float* c
-            n              // const int ldc
+            n_int          // const int ldc
         );
     } else if constexpr (std::is_same_v<T, BFloat16>) {
+        // Intel MKL BFloat16 GEMM requires careful parameter casting to avoid parameter
+        // errors Ensure all integer parameters are properly cast to int (MKL expects int,
+        // not size_t)
+        int m_int = static_cast<int>(m);
+        int n_int = static_cast<int>(n);
+        int k_int = static_cast<int>(k);
+
         cblas_gemm_bf16bf16f32(
             CblasRowMajor,              // CBLAS_LAYOUT layout
             CblasNoTrans,               // CBLAS_TRANSPOSE TransA
             CblasTrans,                 // CBLAS_TRANSPOSE TransB
-            m,                          // const int M
-            n,                          // const int N
-            k,                          // const int K
-            1.0,                        // float alpha
+            m_int,                      // const int M
+            n_int,                      // const int N
+            k_int,                      // const int K
+            1.0f,                       // float alpha (explicitly float)
             (const uint16_t*)data,      // const *uint16_t A
-            k,                          // const int lda
+            k_int,                      // const int lda
             (const uint16_t*)centroids, // const uint16_t* B
-            k,                          // const int ldb
-            0.0,                        // const float beta
+            k_int,                      // const int ldb
+            0.0f,                       // const float beta (explicitly float)
             results,                    // float* c
-            n                           // const int ldc
+            n_int                       // const int ldc
         );
     } else if constexpr (std::is_same_v<T, Float16>) {
+        // Intel MKL Float16 GEMM requires careful parameter casting to avoid parameter
+        // errors Ensure all integer parameters are properly cast to int (MKL expects int,
+        // not size_t)
+        int m_int = static_cast<int>(m);
+        int n_int = static_cast<int>(n);
+        int k_int = static_cast<int>(k);
+
         cblas_gemm_f16f16f32(
             CblasRowMajor,              // CBLAS_LAYOUT layout
             CblasNoTrans,               // CBLAS_TRANSPOSE TransA
             CblasTrans,                 // CBLAS_TRANSPOSE TransB
-            m,                          // const int M
-            n,                          // const int N
-            k,                          // const int K
-            1.0,                        // float alpha
+            m_int,                      // const int M
+            n_int,                      // const int N
+            k_int,                      // const int K
+            1.0f,                       // float alpha (explicitly float)
             (const uint16_t*)data,      // const *uint16_t A
-            k,                          // const int lda
+            k_int,                      // const int lda
             (const uint16_t*)centroids, // const uint16_t* B
-            k,                          // const int ldb
-            0.0,                        // const float beta
+            k_int,                      // const int ldb
+            0.0f,                       // const float beta (explicitly float)
             results,                    // float* c
-            n                           // const int ldc
+            n_int                       // const int ldc
         );
     } else {
         throw ANNEXCEPTION("GEMM type not supported!");
@@ -338,20 +370,40 @@ void centroid_assignment(
     Pool& threadpool,
     lib::Timer& timer
 ) {
+    using DataType = typename Data::element_type;
+    using CentroidType = T;
+
+    // Convert data to match centroid type if necessary
+    data::SimpleData<CentroidType> data_conv;
+    if constexpr (!std::is_same_v<CentroidType, DataType>) {
+        data_conv = convert_data<CentroidType>(data, threadpool);
+    }
+
     auto generate_assignments = timer.push_back("generate assignments");
     threads::parallel_for(
         threadpool,
         threads::StaticPartition{batch_range.size()},
         [&](auto indices, auto /*tid*/) {
             auto range = threads::UnitRange(indices);
-            compute_matmul(
-                data.get_datum(range.start()).data(),
-                centroids.data(),
-                matmul_results.get_datum(range.start()).data(),
-                range.size(),
-                centroids.size(),
-                data.dimensions()
-            );
+            if constexpr (!std::is_same_v<CentroidType, DataType>) {
+                compute_matmul(
+                    data_conv.get_datum(range.start()).data(),
+                    centroids.data(),
+                    matmul_results.get_datum(range.start()).data(),
+                    range.size(),
+                    centroids.size(),
+                    data.dimensions()
+                );
+            } else {
+                compute_matmul(
+                    data.get_datum(range.start()).data(),
+                    centroids.data(),
+                    matmul_results.get_datum(range.start()).data(),
+                    range.size(),
+                    centroids.size(),
+                    data.dimensions()
+                );
+            }
             if constexpr (std::is_same_v<Distance, distance::DistanceIP>) {
                 for (auto i : indices) {
                     auto nearest =
@@ -680,10 +732,10 @@ std::vector<float> maybe_compute_norms(const Data& data, Pool& threadpool) {
 /// @brief Assign all points to clusters according to assignments
 template <std::integral I = uint32_t, typename Data>
 std::vector<std::vector<I>> group_assignments(
-    const std::vector<size_t>& assignments, size_t num_clusters, const Data& data_train
+    const std::vector<size_t>& assignments, size_t num_clusters, const Data& data
 ) {
     std::vector<std::vector<I>> clusters(num_clusters);
-    for (auto i : data_train.eachindex())
+    for (auto i : data.eachindex())
         clusters[assignments[i]].push_back(i);
     return clusters;
 }
diff --git a/include/svs/index/ivf/hierarchical_kmeans.h b/include/svs/index/ivf/hierarchical_kmeans.h
index 168540868..cf4384141 100644
--- a/include/svs/index/ivf/hierarchical_kmeans.h
+++ b/include/svs/index/ivf/hierarchical_kmeans.h
@@ -71,7 +71,8 @@ auto hierarchical_kmeans_clustering_impl(
     Distance& distance,
     Pool& threadpool,
     lib::Type<I> SVS_UNUSED(integer_type) = {},
-    svs::logging::logger_ptr logger = svs::logging::get()
+    svs::logging::logger_ptr logger = svs::logging::get(),
+    bool train_only = false
 ) {
     auto timer = lib::Timer();
     auto kmeans_timer = timer.push_back("Hierarchical kmeans clustering");
@@ -157,41 +158,52 @@ auto hierarchical_kmeans_clustering_impl(
     auto clusters_level1 =
         group_assignments(assignments_level1, num_level1_clusters, data_train);
 
-    // Step 5: Assign all data to clusters
+    std::vector<std::vector<I>> clusters_level1_all;
+
+    // Declare timer outside if block to avoid scope issues
     auto all_assignments_time = timer.push_back("level1 all assignments");
-    auto all_assignments_alloc = timer.push_back("level1 all assignments alloc");
-    auto assignments_level1_all = std::vector<size_t>(data.size());
-    all_assignments_alloc.finish();
 
-    batchsize = parameters.minibatch_size_;
-    num_batches = lib::div_round_up(data.size(), batchsize);
+    if (!train_only) {
+        // Step 5: Assign all data to clusters
+        auto all_assignments_alloc = timer.push_back("level1 all assignments alloc");
+        auto assignments_level1_all = std::vector<size_t>(data.size());
+        all_assignments_alloc.finish();
 
-    data_norm = maybe_compute_norms<Distance>(data, threadpool);
-    auto data_batch = data::SimpleData<BuildType, Dims, Alloc>{batchsize, ndims};
-    for (size_t batch = 0; batch < num_batches; ++batch) {
-        auto this_batch = threads::UnitRange{
-            batch * batchsize, std::min((batch + 1) * batchsize, data.size())};
-        auto data_batch_view = data::make_view(data, this_batch);
-        auto all_assignments_convert = timer.push_back("level1 all assignments convert");
-        convert_data(data_batch_view, data_batch, threadpool);
-        all_assignments_convert.finish();
-        centroid_assignment(
-            data_batch,
-            data_norm,
-            this_batch,
-            distance,
-            centroids_level1,
-            centroids_level1_norm,
-            assignments_level1_all,
-            matmul_results_level1,
-            threadpool,
-            timer
-        );
+        batchsize = parameters.minibatch_size_;
+        num_batches = lib::div_round_up(data.size(), batchsize);
+
+        data_norm = maybe_compute_norms<Distance>(data, threadpool);
+        auto data_batch = data::SimpleData<BuildType, Dims, Alloc>{batchsize, ndims};
+        for (size_t batch = 0; batch < num_batches; ++batch) {
+            auto this_batch = threads::UnitRange{
+                batch * batchsize, std::min((batch + 1) * batchsize, data.size())};
+            auto data_batch_view = data::make_view(data, this_batch);
+            auto all_assignments_convert =
+                timer.push_back("level1 all assignments convert");
+            convert_data(data_batch_view, data_batch, threadpool);
+            all_assignments_convert.finish();
+            centroid_assignment(
+                data_batch,
+                data_norm,
+                this_batch,
+                distance,
+                centroids_level1,
+                centroids_level1_norm,
+                assignments_level1_all,
+                matmul_results_level1,
+                threadpool,
+                timer
+            );
+        }
+        auto all_assignments_cluster = timer.push_back("level1 all assignments clusters");
+        clusters_level1_all =
+            group_assignments(assignments_level1_all, num_level1_clusters, data);
+        all_assignments_cluster.finish();
+        all_assignments_time.finish();
+    } else {
+        // For train_only, create empty clusters
+        clusters_level1_all.resize(num_level1_clusters);
     }
-    auto all_assignments_cluster = timer.push_back("level1 all assignments clusters");
-    auto clusters_level1_all =
-        group_assignments(assignments_level1_all, num_level1_clusters, data);
-    all_assignments_cluster.finish();
 
     all_assignments_time.finish();
     level1_training_time.finish();
@@ -206,10 +218,20 @@ auto hierarchical_kmeans_clustering_impl(
     auto clusters_final = std::vector<std::vector<I>>(num_clusters);
 
     size_t max_data_per_cluster = 0;
-    for (size_t cluster = 0; cluster < num_level1_clusters; cluster++) {
-        max_data_per_cluster = clusters_level1_all[cluster].size() > max_data_per_cluster
-                                   ? clusters_level1_all[cluster].size()
-                                   : max_data_per_cluster;
+    if (!train_only) {
+        for (size_t cluster = 0; cluster < num_level1_clusters; cluster++) {
+            max_data_per_cluster =
+                clusters_level1_all[cluster].size() > max_data_per_cluster
+                    ? clusters_level1_all[cluster].size()
+                    : max_data_per_cluster;
+        }
+    } else {
+        // In train_only mode, use training clusters for Level 2 training
+        for (size_t cluster = 0; cluster < num_level1_clusters; cluster++) {
+            max_data_per_cluster = clusters_level1[cluster].size() > max_data_per_cluster
+                                       ? clusters_level1[cluster].size()
+                                       : max_data_per_cluster;
+        }
     }
     auto data_level2 =
         data::SimpleData<BuildType, Dims, Alloc>{max_data_per_cluster, ndims};
@@ -219,7 +241,8 @@ auto hierarchical_kmeans_clustering_impl(
     for (size_t cluster = 0; cluster < num_level1_clusters; cluster++) {
         size_t num_clusters_l2 = num_level2_clusters[cluster];
         size_t num_assignments_l2 = clusters_level1[cluster].size();
-        size_t num_assignments_l2_all = clusters_level1_all[cluster].size();
+        size_t num_assignments_l2_all =
+            train_only ? 0 : clusters_level1_all[cluster].size();
 
         auto matmul_results_level2 =
             data::SimpleData<float>{parameters.minibatch_size_, num_clusters_l2};
@@ -255,47 +278,51 @@ auto hierarchical_kmeans_clustering_impl(
         );
 
         auto all_assignments_level2 = timer.push_back("level2 all assignments");
-        threads::parallel_for(
-            threadpool,
-            threads::StaticPartition{num_assignments_l2_all},
-            [&](auto indices, auto /*tid*/) {
-                for (auto i : indices) {
-                    data_level2.set_datum(
-                        i, data.get_datum(clusters_level1_all[cluster][i])
-                    );
-                }
-            }
-        );
 
-        batchsize = parameters.minibatch_size_;
-        num_batches = lib::div_round_up(num_assignments_l2_all, batchsize);
-
-        data_norm = maybe_compute_norms<Distance>(data_level2, threadpool);
-        auto centroids_level2_norm =
-            maybe_compute_norms<Distance>(centroids_level2_fp32, threadpool);
-        for (size_t batch = 0; batch < num_batches; ++batch) {
-            auto this_batch = threads::UnitRange{
-                batch * batchsize,
-                std::min((batch + 1) * batchsize, num_assignments_l2_all)};
-            auto data_batch = data::make_view(data_level2, this_batch);
-            centroid_assignment(
-                data_batch,
-                data_norm,
-                this_batch,
-                distance,
-                centroids_level2,
-                centroids_level2_norm,
-                assignments_level2_all,
-                matmul_results_level2,
+        if (!train_only) {
+            // Only do Level 2 assignments if not in train_only mode
+            threads::parallel_for(
                 threadpool,
-                timer
+                threads::StaticPartition{num_assignments_l2_all},
+                [&](auto indices, auto /*tid*/) {
+                    for (auto i : indices) {
+                        data_level2.set_datum(
+                            i, data.get_datum(clusters_level1_all[cluster][i])
+                        );
+                    }
+                }
             );
-        }
 
-        for (size_t i = 0; i < num_assignments_l2_all; i++) {
-            clusters_final[cluster_start + assignments_level2_all[i]].push_back(
-                clusters_level1_all[cluster][i]
-            );
+            batchsize = parameters.minibatch_size_;
+            num_batches = lib::div_round_up(num_assignments_l2_all, batchsize);
+
+            data_norm = maybe_compute_norms<Distance>(data_level2, threadpool);
+            auto centroids_level2_norm =
+                maybe_compute_norms<Distance>(centroids_level2_fp32, threadpool);
+            for (size_t batch = 0; batch < num_batches; ++batch) {
+                auto this_batch = threads::UnitRange{
+                    batch * batchsize,
+                    std::min((batch + 1) * batchsize, num_assignments_l2_all)};
+                auto data_batch = data::make_view(data_level2, this_batch);
+                centroid_assignment(
+                    data_batch,
+                    data_norm,
+                    this_batch,
+                    distance,
+                    centroids_level2,
+                    centroids_level2_norm,
+                    assignments_level2_all,
+                    matmul_results_level2,
+                    threadpool,
+                    timer
+                );
+            }
+
+            for (size_t i = 0; i < num_assignments_l2_all; i++) {
+                clusters_final[cluster_start + assignments_level2_all[i]].push_back(
+                    clusters_level1_all[cluster][i]
+                );
+            }
         }
 
         threads::parallel_for(
@@ -313,6 +340,7 @@ auto hierarchical_kmeans_clustering_impl(
         cluster_start += num_clusters_l2;
         all_assignments_level2.finish();
     }
+
     level2_training_time.finish();
 
     kmeans_timer.finish();
@@ -338,10 +366,11 @@ auto hierarchical_kmeans_clustering(
     Distance& distance,
     Pool& threadpool,
     lib::Type<I> integer_type = {},
-    svs::logging::logger_ptr logger = svs::logging::get()
+    svs::logging::logger_ptr logger = svs::logging::get(),
+    bool train_only = false
 ) {
     return hierarchical_kmeans_clustering_impl<BuildType>(
-        parameters, data, distance, threadpool, integer_type, std::move(logger)
+        parameters, data, distance, threadpool, integer_type, std::move(logger), train_only
     );
 }
 
diff --git a/include/svs/index/ivf/index.h b/include/svs/index/ivf/index.h
index fb3f3c80c..59eb83cfe 100644
--- a/include/svs/index/ivf/index.h
+++ b/include/svs/index/ivf/index.h
@@ -388,6 +388,7 @@ auto build_clustering(
     const DataProto& data_proto,
     Distance distance,
     ThreadpoolProto threadpool_proto,
+    bool train_only = false,
     svs::logging::logger_ptr logger = svs::logging::get()
 ) {
     auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
@@ -402,11 +403,11 @@ auto build_clustering(
     // Choose clustering method based on parameters
     if (parameters.is_hierarchical_) {
         std::tie(centroids, clusters) = hierarchical_kmeans_clustering<BuildType>(
-            parameters, data, distance, threadpool, Idx{}, logger
+            parameters, data, distance, threadpool, Idx{}, logger, train_only
         );
     } else {
         std::tie(centroids, clusters) = kmeans_clustering<BuildType>(
-            parameters, data, distance, threadpool, Idx{}, logger
+            parameters, data, distance, threadpool, Idx{}, logger, train_only
         );
     }
 
diff --git a/include/svs/index/ivf/kmeans.h b/include/svs/index/ivf/kmeans.h
index c29d5c7fe..51df16357 100644
--- a/include/svs/index/ivf/kmeans.h
+++ b/include/svs/index/ivf/kmeans.h
@@ -32,7 +32,8 @@ auto kmeans_clustering_impl(
     Distance& distance,
     Pool& threadpool,
     lib::Type<I> SVS_UNUSED(integer_type) = {},
-    svs::logging::logger_ptr logger = svs::logging::get()
+    svs::logging::logger_ptr logger = svs::logging::get(),
+    bool train_only = false
 ) {
     auto timer = lib::Timer();
     auto kmeans_timer = timer.push_back("Non-hierarchical kmeans clustering");
@@ -74,38 +75,45 @@ auto kmeans_clustering_impl(
         parameters, data_train, distance, centroids, matmul_results, rng, threadpool, timer
     );
 
-    auto final_assignments_time = timer.push_back("final assignments");
-    auto assignments = std::vector<size_t>(data.size());
-    auto batchsize = parameters.minibatch_size_;
-    auto num_batches = lib::div_round_up(data.size(), batchsize);
+    std::vector<std::vector<uint32_t>> clusters;
 
-    auto data_norm = maybe_compute_norms<Distance>(data, threadpool);
-    auto centroids_norm = maybe_compute_norms<Distance>(centroids_fp32, threadpool);
+    if (train_only) {
+        // Only train centroids, return empty clusters
+        clusters.resize(num_centroids);
+    } else {
+        // Step 4: Assign all data to clusters
+        auto final_assignments_time = timer.push_back("final assignments");
+        auto assignments = std::vector<size_t>(data.size());
+        auto batchsize = parameters.minibatch_size_;
+        auto num_batches = lib::div_round_up(data.size(), batchsize);
 
-    // Step 4: Assign training data to clusters
-    auto data_batch = data::SimpleData<BuildType, Dims, Alloc>{batchsize, ndims};
-    for (size_t batch = 0; batch < num_batches; ++batch) {
-        auto this_batch = threads::UnitRange{
-            batch * batchsize, std::min((batch + 1) * batchsize, data.size())};
-        auto data_batch_view = data::make_view(data, this_batch);
-        convert_data(data_batch_view, data_batch, threadpool);
-        centroid_assignment(
-            data_batch,
-            data_norm,
-            this_batch,
-            distance,
-            centroids,
-            centroids_norm,
-            assignments,
-            matmul_results,
-            threadpool,
-            timer
-        );
-    }
+        auto data_norm = maybe_compute_norms<Distance>(data, threadpool);
+        auto centroids_norm = maybe_compute_norms<Distance>(centroids_fp32, threadpool);
 
-    // Step 5: Assign all data to clusters
-    auto clusters = group_assignments(assignments, num_centroids, data);
-    final_assignments_time.finish();
+        auto data_batch = data::SimpleData<BuildType, Dims, Alloc>{batchsize, ndims};
+        for (size_t batch = 0; batch < num_batches; ++batch) {
+            auto this_batch = threads::UnitRange{
+                batch * batchsize, std::min((batch + 1) * batchsize, data.size())};
+            auto data_batch_view = data::make_view(data, this_batch);
+            convert_data(data_batch_view, data_batch, threadpool);
+            centroid_assignment(
+                data_batch,
+                data_norm,
+                this_batch,
+                distance,
+                centroids,
+                centroids_norm,
+                assignments,
+                matmul_results,
+                threadpool,
+                timer
+            );
+        }
+
+        // Step 5: Group assignments into clusters
+        clusters = group_assignments(assignments, num_centroids, data);
+        final_assignments_time.finish();
+    }
     kmeans_timer.finish();
     svs::logging::debug(logger, "{}", timer);
     svs::logging::debug(
@@ -126,10 +134,11 @@ auto kmeans_clustering(
     Distance& distance,
     Pool& threadpool,
     lib::Type<I> integer_type = {},
-    svs::logging::logger_ptr logger = svs::logging::get()
+    svs::logging::logger_ptr logger = svs::logging::get(),
+    bool train_only = false
 ) {
     return kmeans_clustering_impl<BuildType>(
-        parameters, data, distance, threadpool, integer_type, std::move(logger)
+        parameters, data, distance, threadpool, integer_type, std::move(logger), train_only
     );
 }
 } // namespace svs::index::ivf
diff --git a/tests/svs/index/ivf/common.cpp b/tests/svs/index/ivf/common.cpp
index 39df8503a..8a7841af3 100644
--- a/tests/svs/index/ivf/common.cpp
+++ b/tests/svs/index/ivf/common.cpp
@@ -24,6 +24,16 @@
 // catch
 #include "catch2/catch_test_macros.hpp"
 
+// svs
+#include "svs/core/data.h"
+#include "svs/core/distance.h"
+#include "svs/lib/threads.h"
+
+// stl
+#include <algorithm>
+#include <numeric>
+#include <vector>
+
 CATCH_TEST_CASE("Kmeans Clustering", "[ivf][parameters]") {
     namespace ivf = svs::index::ivf;
     CATCH_SECTION("IVF Build Parameters") {
@@ -69,3 +79,292 @@ CATCH_TEST_CASE("Kmeans Clustering", "[ivf][parameters]") {
         CATCH_REQUIRE(svs::lib::test_self_save_load(p, dir));
     }
 }
+
+CATCH_TEST_CASE("Common Utility Functions", "[ivf][common][core]") {
+    namespace ivf = svs::index::ivf;
+
+    CATCH_SECTION("compute_matmul - All Data Types") {
+        // Test matrix multiplication for different data types
+        constexpr size_t m = 10; // number of data points
+        constexpr size_t n = 5;  // number of centroids
+        constexpr size_t k = 8;  // dimensions
+
+        auto test_matmul = [&]<typename T>() {
+            // Create test data
+            auto data = svs::data::SimpleData<T>(m, k);
+            auto centroids = svs::data::SimpleData<T>(n, k);
+            auto results = svs::data::SimpleData<float>(m, n);
+
+            // Fill with test values
+            for (size_t i = 0; i < m; ++i) {
+                auto datum = data.get_datum(i);
+                for (size_t j = 0; j < k; ++j) {
+                    datum[j] = static_cast<T>(i + j * 0.1);
+                }
+            }
+
+            for (size_t i = 0; i < n; ++i) {
+                auto centroid = centroids.get_datum(i);
+                for (size_t j = 0; j < k; ++j) {
+                    centroid[j] = static_cast<T>(i * 0.5 + j);
+                }
+            }
+
+            // Compute matrix multiplication
+            ivf::compute_matmul(data.data(), centroids.data(), results.data(), m, n, k);
+
+            // Verify results are valid (not NaN or Inf)
+            for (size_t i = 0; i < m; ++i) {
+                for (size_t j = 0; j < n; ++j) {
+                    float val = results.get_datum(i)[j];
+                    CATCH_REQUIRE(std::isfinite(val));
+                }
+            }
+
+            // Verify dimensions match expected output
+            CATCH_REQUIRE(results.size() == m);
+            CATCH_REQUIRE(results.dimensions() == n);
+        };
+
+        // Test all data types
+        test_matmul.operator()<float>();
+        test_matmul.operator()<svs::Float16>();
+        test_matmul.operator()<svs::BFloat16>();
+    }
+
+    CATCH_SECTION("compute_matmul - Edge Cases") {
+        // Test with zero dimensions (should return without error)
+        auto results = svs::data::SimpleData<float>(0, 0);
+        auto data = svs::data::SimpleData<float>(0, 0);
+        auto centroids = svs::data::SimpleData<float>(0, 0);
+
+        // Should not crash with zero dimensions
+        ivf::compute_matmul(data.data(), centroids.data(), results.data(), 0, 0, 0);
+
+        // Test with single point and single centroid
+        auto data_single = svs::data::SimpleData<float>(1, 4);
+        auto centroid_single = svs::data::SimpleData<float>(1, 4);
+        auto result_single = svs::data::SimpleData<float>(1, 1);
+
+        auto datum = data_single.get_datum(0);
+        auto centroid = centroid_single.get_datum(0);
+        for (size_t i = 0; i < 4; ++i) {
+            datum[i] = static_cast<float>(i);
+            centroid[i] = static_cast<float>(i + 1);
+        }
+
+        ivf::compute_matmul(
+            data_single.data(), centroid_single.data(), result_single.data(), 1, 1, 4
+        );
+
+        CATCH_REQUIRE(std::isfinite(result_single.get_datum(0)[0]));
+    }
+
+    CATCH_SECTION("convert_data - Type Conversions") {
+        auto threadpool = svs::threads::as_threadpool(4);
+
+        // Test float to Float16 conversion
+        auto data_float = svs::data::SimpleData<float>(10, 8);
+        for (size_t i = 0; i < data_float.size(); ++i) {
+            auto datum = data_float.get_datum(i);
+            for (size_t j = 0; j < data_float.dimensions(); ++j) {
+                datum[j] = static_cast<float>(i * 10 + j);
+            }
+        }
+
+        auto data_fp16 = ivf::convert_data<svs::Float16>(data_float, threadpool);
+        CATCH_REQUIRE(data_fp16.size() == data_float.size());
+        CATCH_REQUIRE(data_fp16.dimensions() == data_float.dimensions());
+
+        // Test float to BFloat16 conversion
+        auto data_bf16 = ivf::convert_data<svs::BFloat16>(data_float, threadpool);
+        CATCH_REQUIRE(data_bf16.size() == data_float.size());
+        CATCH_REQUIRE(data_bf16.dimensions() == data_float.dimensions());
+
+        // Test Float16 to float conversion
+        auto data_back = ivf::convert_data<float>(data_fp16, threadpool);
+        CATCH_REQUIRE(data_back.size() == data_fp16.size());
+        CATCH_REQUIRE(data_back.dimensions() == data_fp16.dimensions());
+    }
+
+    CATCH_SECTION("generate_norms") {
+        auto threadpool = svs::threads::as_threadpool(4);
+
+        // Create test data
+        auto data = svs::data::SimpleData<float>(20, 10);
+        for (size_t i = 0; i < data.size(); ++i) {
+            auto datum = data.get_datum(i);
+            for (size_t j = 0; j < data.dimensions(); ++j) {
+                datum[j] = static_cast<float>(i + j);
+            }
+        }
+
+        std::vector<float> norms(data.size());
+        ivf::generate_norms(data, norms, threadpool);
+
+        // Verify norms are computed
+        CATCH_REQUIRE(norms.size() == data.size());
+        for (const auto& norm : norms) {
+            CATCH_REQUIRE(norm >= 0.0f);
+            CATCH_REQUIRE(std::isfinite(norm));
+        }
+    }
+
+    CATCH_SECTION("maybe_compute_norms") {
+        auto threadpool = svs::threads::as_threadpool(4);
+        auto data = svs::data::SimpleData<float>(15, 8);
+
+        for (size_t i = 0; i < data.size(); ++i) {
+            auto datum = data.get_datum(i);
+            for (size_t j = 0; j < data.dimensions(); ++j) {
+                datum[j] = static_cast<float>(i + j * 0.5);
+            }
+        }
+
+        // For L2 distance, norms should be computed
+        auto norms_l2 = ivf::maybe_compute_norms<svs::DistanceL2>(data, threadpool);
+        CATCH_REQUIRE(norms_l2.size() == data.size());
+        for (const auto& norm : norms_l2) {
+            CATCH_REQUIRE(norm >= 0.0f);
+        }
+
+        // For IP distance, norms should be empty
+        auto norms_ip = ivf::maybe_compute_norms<svs::DistanceIP>(data, threadpool);
+        CATCH_REQUIRE(norms_ip.empty());
+    }
+
+    CATCH_SECTION("group_assignments") {
+        // Test grouping assignments
+        size_t num_centroids = 5;
+        size_t data_size = 50;
+
+        // Create assignments (each point assigned to a centroid)
+        std::vector<size_t> assignments(data_size);
+        for (size_t i = 0; i < data_size; ++i) {
+            assignments[i] = i % num_centroids;
+        }
+
+        auto data = svs::data::SimpleData<float>(data_size, 8);
+        auto groups = ivf::group_assignments(assignments, num_centroids, data);
+
+        CATCH_REQUIRE(groups.size() == num_centroids);
+
+        // Verify all points are assigned
+        size_t total_assigned = 0;
+        for (const auto& group : groups) {
+            total_assigned += group.size();
+        }
+        CATCH_REQUIRE(total_assigned == data_size);
+
+        // Verify each group has expected size
+        for (const auto& group : groups) {
+            CATCH_REQUIRE(group.size() == data_size / num_centroids);
+        }
+    }
+
+    CATCH_SECTION("make_training_set") {
+        auto threadpool = svs::threads::as_threadpool(4);
+        auto rng = std::mt19937(12345);
+
+        // Create full dataset
+        size_t full_size = 100;
+        size_t training_size = 30;
+        auto data = svs::data::SimpleData<float>(full_size, 16);
+
+        for (size_t i = 0; i < data.size(); ++i) {
+            auto datum = data.get_datum(i);
+            for (size_t j = 0; j < data.dimensions(); ++j) {
+                datum[j] = static_cast<float>(i * 10 + j);
+            }
+        }
+
+        std::vector<size_t> ids(training_size);
+        auto training_set =
+            ivf::make_training_set<float, decltype(data), svs::lib::Allocator<float>>(
+                data, ids, training_size, rng, threadpool
+            );
+
+        CATCH_REQUIRE(training_set.size() == training_size);
+        CATCH_REQUIRE(training_set.dimensions() == data.dimensions());
+        CATCH_REQUIRE(ids.size() == training_size);
+
+        // Verify IDs are valid and unique
+        std::unordered_set<size_t> unique_ids(ids.begin(), ids.end());
+        CATCH_REQUIRE(unique_ids.size() == training_size);
+        for (const auto& id : ids) {
+            CATCH_REQUIRE(id < full_size);
+        }
+    }
+
+    CATCH_SECTION("init_centroids") {
+        auto threadpool = svs::threads::as_threadpool(4);
+        auto rng = std::mt19937(54321);
+
+        // Create training data
+        size_t training_size = 50;
+        size_t num_centroids = 10;
+        auto trainset = svs::data::SimpleData<float>(training_size, 12);
+
+        for (size_t i = 0; i < trainset.size(); ++i) {
+            auto datum = trainset.get_datum(i);
+            for (size_t j = 0; j < trainset.dimensions(); ++j) {
+                datum[j] = static_cast<float>(i + j * 0.3);
+            }
+        }
+
+        std::vector<size_t> ids(num_centroids);
+        auto centroids =
+            ivf::init_centroids<float>(trainset, ids, num_centroids, rng, threadpool);
+
+        CATCH_REQUIRE(centroids.size() == num_centroids);
+        CATCH_REQUIRE(centroids.dimensions() == trainset.dimensions());
+
+        // Verify centroids are from training set
+        for (size_t i = 0; i < num_centroids; ++i) {
+            auto centroid = centroids.get_datum(i);
+            bool found = false;
+            for (size_t j = 0; j < trainset.size(); ++j) {
+                auto train_point = trainset.get_datum(j);
+                bool matches = true;
+                for (size_t k = 0; k < trainset.dimensions(); ++k) {
+                    if (std::abs(centroid[k] - train_point[k]) > 1e-6f) {
+                        matches = false;
+                        break;
+                    }
+                }
+                if (matches) {
+                    found = true;
+                    break;
+                }
+            }
+            CATCH_REQUIRE(found);
+        }
+    }
+
+    CATCH_SECTION("normalize_centroids") {
+        auto threadpool = svs::threads::as_threadpool(4);
+        auto timer = svs::lib::Timer();
+
+        // Create centroids with non-unit norms
+        auto centroids = svs::data::SimpleData<float>(8, 10);
+        for (size_t i = 0; i < centroids.size(); ++i) {
+            auto centroid = centroids.get_datum(i);
+            for (size_t j = 0; j < centroids.dimensions(); ++j) {
+                centroid[j] = static_cast<float>((i + 1) * (j + 1));
+            }
+        }
+
+        ivf::normalize_centroids(centroids, threadpool, timer);
+
+        // Verify centroids are normalized (L2 norm = 1)
+        for (size_t i = 0; i < centroids.size(); ++i) {
+            auto centroid = centroids.get_datum(i);
+            float norm_sq = 0.0f;
+            for (size_t j = 0; j < centroids.dimensions(); ++j) {
+                norm_sq += centroid[j] * centroid[j];
+            }
+            float norm = std::sqrt(norm_sq);
+            CATCH_REQUIRE(std::abs(norm - 1.0f) < 1e-5f);
+        }
+    }
+}
diff --git a/tests/svs/index/ivf/hierarchical_kmeans.cpp b/tests/svs/index/ivf/hierarchical_kmeans.cpp
index db15940c3..e448e53c0 100644
--- a/tests/svs/index/ivf/hierarchical_kmeans.cpp
+++ b/tests/svs/index/ivf/hierarchical_kmeans.cpp
@@ -17,6 +17,10 @@
 // header under test
 #include "svs/index/ivf/hierarchical_kmeans.h"
 
+// additional headers for train_only test
+#include "svs/index/ivf/index.h"
+#include "svs/index/ivf/kmeans.h"
+
 // tests
 #include "tests/utils/test_dataset.h"
 #include "tests/utils/utils.h"
@@ -25,6 +29,7 @@
 #include "catch2/catch_test_macros.hpp"
 
 // stl
+#include <cmath>
 #include <unordered_map>
 
 namespace {
@@ -62,14 +67,477 @@ void test_hierarchical_kmeans_clustering(const Data& data, Distance distance) {
     }
 }
 
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_train_only_centroids_match(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+
+    // Test both flat and hierarchical k-means with different modes
+    for (bool is_hierarchical : {false, true}) {
+        for (size_t n_centroids : {25}) {
+            for (size_t minibatch : {25}) {
+                for (size_t iters : {3}) {
+                    for (float training_fraction : {0.6}) {
+                        auto params = ivf::IVFBuildParameters()
+                                          .num_centroids(n_centroids)
+                                          .minibatch_size(minibatch)
+                                          .num_iterations(iters)
+                                          .is_hierarchical(is_hierarchical)
+                                          .training_fraction(training_fraction)
+                                          .seed(12345); // Fixed seed for reproducibility
+
+                        if (is_hierarchical) {
+                            params.hierarchical_level1_clusters(5);
+                        }
+
+                        size_t num_threads = 4;
+
+                        // Run with train_only = false (normal mode)
+                        auto [centroids_normal, clusters_normal] =
+                            ivf::build_clustering<BuildType>(
+                                params, data, distance, num_threads, false
+                            );
+
+                        // Run with train_only = true
+                        auto [centroids_train_only, clusters_train_only] =
+                            ivf::build_clustering<BuildType>(
+                                params, data, distance, num_threads, true
+                            );
+
+                        // Verify centroids are identical
+                        CATCH_REQUIRE(
+                            centroids_normal.size() == centroids_train_only.size()
+                        );
+                        CATCH_REQUIRE(
+                            centroids_normal.dimensions() ==
+                            centroids_train_only.dimensions()
+                        );
+
+                        constexpr float tolerance = 1e-6f;
+                        for (size_t i = 0; i < centroids_normal.size(); ++i) {
+                            auto datum_normal = centroids_normal.get_datum(i);
+                            auto datum_train_only = centroids_train_only.get_datum(i);
+
+                            for (size_t j = 0; j < centroids_normal.dimensions(); ++j) {
+                                float diff =
+                                    std::abs(datum_normal[j] - datum_train_only[j]);
+                                CATCH_REQUIRE(diff < tolerance);
+                            }
+                        }
+
+                        // Verify train_only clusters are empty (as expected)
+                        for (const auto& cluster : clusters_train_only) {
+                            CATCH_REQUIRE(cluster.empty());
+                        }
+
+                        // Verify normal mode has non-empty clusters (at least some)
+                        bool has_non_empty_cluster = false;
+                        for (const auto& cluster : clusters_normal) {
+                            if (!cluster.empty()) {
+                                has_non_empty_cluster = true;
+                                break;
+                            }
+                        }
+                        CATCH_REQUIRE(has_non_empty_cluster);
+                    }
+                }
+            }
+        }
+    }
+}
+
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_hierarchical_kmeans_level1_clusters(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+
+    // Test different Level 1 cluster configurations
+    for (size_t n_centroids : {64, 100}) {
+        for (size_t l1_clusters : {0, 4, 8, 16}) { // 0 means auto-calculate
+            auto params = ivf::IVFBuildParameters()
+                              .num_centroids(n_centroids)
+                              .minibatch_size(25)
+                              .num_iterations(3)
+                              .is_hierarchical(true)
+                              .training_fraction(0.6f)
+                              .hierarchical_level1_clusters(l1_clusters);
+
+            auto threadpool = svs::threads::as_threadpool(4);
+            auto [centroids, clusters] = hierarchical_kmeans_clustering<BuildType>(
+                params, data, distance, threadpool
+            );
+
+            CATCH_REQUIRE(centroids.size() == n_centroids);
+            CATCH_REQUIRE(centroids.dimensions() == data.dimensions());
+            CATCH_REQUIRE(clusters.size() == n_centroids);
+
+            // Verify all data points are assigned
+            std::unordered_set<uint32_t> assigned_points;
+            for (const auto& cluster : clusters) {
+                for (auto point_id : cluster) {
+                    CATCH_REQUIRE(point_id < data.size());
+                    assigned_points.insert(point_id);
+                }
+            }
+            CATCH_REQUIRE(assigned_points.size() == data.size());
+        }
+    }
+}
+
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_hierarchical_kmeans_reproducibility(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+
+    const size_t seed = 98765;
+    const size_t n_centroids = 50;
+    const size_t l1_clusters = 7;
+
+    auto params1 = ivf::IVFBuildParameters()
+                       .num_centroids(n_centroids)
+                       .minibatch_size(25)
+                       .num_iterations(4)
+                       .is_hierarchical(true)
+                       .training_fraction(0.7f)
+                       .hierarchical_level1_clusters(l1_clusters)
+                       .seed(seed);
+
+    auto params2 = ivf::IVFBuildParameters()
+                       .num_centroids(n_centroids)
+                       .minibatch_size(25)
+                       .num_iterations(4)
+                       .is_hierarchical(true)
+                       .training_fraction(0.7f)
+                       .hierarchical_level1_clusters(l1_clusters)
+                       .seed(seed);
+
+    auto threadpool = svs::threads::as_threadpool(4);
+
+    auto [centroids1, clusters1] =
+        hierarchical_kmeans_clustering<BuildType>(params1, data, distance, threadpool);
+
+    auto [centroids2, clusters2] =
+        hierarchical_kmeans_clustering<BuildType>(params2, data, distance, threadpool);
+
+    // Verify centroids are identical
+    CATCH_REQUIRE(centroids1.size() == centroids2.size());
+    constexpr float tolerance = 1e-6f;
+
+    for (size_t i = 0; i < centroids1.size(); ++i) {
+        auto centroid1 = centroids1.get_datum(i);
+        auto centroid2 = centroids2.get_datum(i);
+
+        for (size_t j = 0; j < centroids1.dimensions(); ++j) {
+            float diff = std::abs(centroid1[j] - centroid2[j]);
+            CATCH_REQUIRE(diff < tolerance);
+        }
+    }
+}
+
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_hierarchical_vs_flat_kmeans(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+
+    const size_t n_centroids = 36;
+
+    // Flat k-means
+    auto flat_params = ivf::IVFBuildParameters()
+                           .num_centroids(n_centroids)
+                           .minibatch_size(25)
+                           .num_iterations(3)
+                           .is_hierarchical(false)
+                           .training_fraction(0.6f)
+                           .seed(555);
+
+    // Hierarchical k-means
+    auto hierarchical_params = ivf::IVFBuildParameters()
+                                   .num_centroids(n_centroids)
+                                   .minibatch_size(25)
+                                   .num_iterations(3)
+                                   .is_hierarchical(true)
+                                   .training_fraction(0.6f)
+                                   .hierarchical_level1_clusters(6)
+                                   .seed(555);
+
+    auto threadpool = svs::threads::as_threadpool(4);
+
+    auto [flat_centroids, flat_clusters] =
+        ivf::kmeans_clustering<BuildType>(flat_params, data, distance, threadpool);
+
+    auto [hierarchical_centroids, hierarchical_clusters] =
+        hierarchical_kmeans_clustering<BuildType>(
+            hierarchical_params, data, distance, threadpool
+        );
+
+    // Both should produce same number of centroids and clusters
+    CATCH_REQUIRE(flat_centroids.size() == n_centroids);
+    CATCH_REQUIRE(hierarchical_centroids.size() == n_centroids);
+    CATCH_REQUIRE(flat_clusters.size() == n_centroids);
+    CATCH_REQUIRE(hierarchical_clusters.size() == n_centroids);
+
+    // Both should assign all points
+    std::unordered_set<uint32_t> flat_points, hierarchical_points;
+
+    for (const auto& cluster : flat_clusters) {
+        for (auto point_id : cluster) {
+            flat_points.insert(point_id);
+        }
+    }
+
+    for (const auto& cluster : hierarchical_clusters) {
+        for (auto point_id : cluster) {
+            hierarchical_points.insert(point_id);
+        }
+    }
+
+    CATCH_REQUIRE(flat_points.size() == data.size());
+    CATCH_REQUIRE(hierarchical_points.size() == data.size());
+}
+
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_hierarchical_kmeans_edge_cases(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+    auto threadpool = svs::threads::as_threadpool(4);
+
+    // Test with Level 1 clusters equal to total centroids (degenerate case)
+    {
+        const size_t n_centroids = 16;
+        auto params = ivf::IVFBuildParameters()
+                          .num_centroids(n_centroids)
+                          .minibatch_size(20)
+                          .num_iterations(2)
+                          .is_hierarchical(true)
+                          .training_fraction(0.5f)
+                          .hierarchical_level1_clusters(n_centroids);
+
+        auto [centroids, clusters] =
+            hierarchical_kmeans_clustering<BuildType>(params, data, distance, threadpool);
+
+        CATCH_REQUIRE(centroids.size() == n_centroids);
+        CATCH_REQUIRE(clusters.size() == n_centroids);
+    }
+
+    // Test with very few Level 1 clusters
+    {
+        const size_t n_centroids = 60;
+        auto params = ivf::IVFBuildParameters()
+                          .num_centroids(n_centroids)
+                          .minibatch_size(25)
+                          .num_iterations(3)
+                          .is_hierarchical(true)
+                          .training_fraction(0.6f)
+                          .hierarchical_level1_clusters(2);
+
+        auto [centroids, clusters] =
+            hierarchical_kmeans_clustering<BuildType>(params, data, distance, threadpool);
+
+        CATCH_REQUIRE(centroids.size() == n_centroids);
+        CATCH_REQUIRE(clusters.size() == n_centroids);
+    }
+
+    // Test with different training fractions
+    for (float training_fraction : {0.3f, 0.5f, 0.8f, 1.0f}) {
+        auto params = ivf::IVFBuildParameters()
+                          .num_centroids(24)
+                          .minibatch_size(20)
+                          .num_iterations(2)
+                          .is_hierarchical(true)
+                          .training_fraction(training_fraction)
+                          .hierarchical_level1_clusters(4);
+
+        auto [centroids, clusters] =
+            hierarchical_kmeans_clustering<BuildType>(params, data, distance, threadpool);
+
+        CATCH_REQUIRE(centroids.size() == 24);
+        CATCH_REQUIRE(clusters.size() == 24);
+
+        // Verify centroids are valid
+        for (size_t i = 0; i < centroids.size(); ++i) {
+            auto centroid = centroids.get_datum(i);
+            for (size_t j = 0; j < centroids.dimensions(); ++j) {
+                CATCH_REQUIRE(std::isfinite(centroid[j]));
+            }
+        }
+    }
+}
+
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_hierarchical_kmeans_cluster_distribution(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+
+    // Test that Level 2 clusters are reasonably distributed across Level 1 clusters
+    const size_t n_centroids = 48;
+    const size_t l1_clusters = 6;
+
+    auto params = ivf::IVFBuildParameters()
+                      .num_centroids(n_centroids)
+                      .minibatch_size(25)
+                      .num_iterations(4)
+                      .is_hierarchical(true)
+                      .training_fraction(0.7f)
+                      .hierarchical_level1_clusters(l1_clusters)
+                      .seed(777);
+
+    auto threadpool = svs::threads::as_threadpool(4);
+    auto [centroids, clusters] =
+        hierarchical_kmeans_clustering<BuildType>(params, data, distance, threadpool);
+
+    CATCH_REQUIRE(centroids.size() == n_centroids);
+    CATCH_REQUIRE(clusters.size() == n_centroids);
+
+    // Verify we have some reasonable distribution of cluster sizes
+    size_t empty_clusters = 0;
+    size_t total_assigned = 0;
+
+    for (const auto& cluster : clusters) {
+        if (cluster.empty()) {
+            empty_clusters++;
+        }
+        total_assigned += cluster.size();
+    }
+
+    CATCH_REQUIRE(total_assigned == data.size());
+    // Allow some empty clusters but not too many (less than half)
+    CATCH_REQUIRE(empty_clusters < n_centroids / 2);
+}
+
 } // namespace
 
 CATCH_TEST_CASE("Hierarchical Kmeans Param Check", "[ivf][hierarchial_parameter_check]") {
-    CATCH_SECTION("Uncompressed Data") {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
         auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
         test_hierarchical_kmeans_clustering<float>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_clustering<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
         test_hierarchical_kmeans_clustering<svs::Float16>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_clustering<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
         test_hierarchical_kmeans_clustering<svs::BFloat16>(data, svs::DistanceIP());
         test_hierarchical_kmeans_clustering<svs::BFloat16>(data, svs::DistanceL2());
     }
 }
+
+CATCH_TEST_CASE(
+    "Hierarchical Kmeans Level1 Clusters", "[ivf][hierarchical_kmeans][level1]"
+) {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
+        auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
+        test_hierarchical_kmeans_level1_clusters<float>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_level1_clusters<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
+        test_hierarchical_kmeans_level1_clusters<svs::Float16>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_level1_clusters<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
+        test_hierarchical_kmeans_level1_clusters<svs::BFloat16>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_level1_clusters<svs::BFloat16>(data, svs::DistanceL2());
+    }
+}
+
+CATCH_TEST_CASE(
+    "Hierarchical Kmeans Reproducibility", "[ivf][hierarchical_kmeans][reproducibility]"
+) {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
+        auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
+        test_hierarchical_kmeans_reproducibility<float>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_reproducibility<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
+        test_hierarchical_kmeans_reproducibility<svs::Float16>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_reproducibility<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
+        test_hierarchical_kmeans_reproducibility<svs::BFloat16>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_reproducibility<svs::BFloat16>(data, svs::DistanceL2());
+    }
+}
+
+CATCH_TEST_CASE("Hierarchical vs Flat Kmeans", "[ivf][hierarchical_kmeans][comparison]") {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
+        auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
+        test_hierarchical_vs_flat_kmeans<float>(data, svs::DistanceIP());
+        test_hierarchical_vs_flat_kmeans<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
+        test_hierarchical_vs_flat_kmeans<svs::Float16>(data, svs::DistanceIP());
+        test_hierarchical_vs_flat_kmeans<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
+        test_hierarchical_vs_flat_kmeans<svs::BFloat16>(data, svs::DistanceIP());
+        test_hierarchical_vs_flat_kmeans<svs::BFloat16>(data, svs::DistanceL2());
+    }
+}
+
+CATCH_TEST_CASE(
+    "Hierarchical Kmeans Edge Cases", "[ivf][hierarchical_kmeans][edge_cases]"
+) {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
+        auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
+        test_hierarchical_kmeans_edge_cases<float>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_edge_cases<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
+        test_hierarchical_kmeans_edge_cases<svs::Float16>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_edge_cases<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
+        test_hierarchical_kmeans_edge_cases<svs::BFloat16>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_edge_cases<svs::BFloat16>(data, svs::DistanceL2());
+    }
+}
+
+CATCH_TEST_CASE(
+    "Hierarchical Kmeans Cluster Distribution", "[ivf][hierarchical_kmeans][distribution]"
+) {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
+        auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
+        test_hierarchical_kmeans_cluster_distribution<float>(data, svs::DistanceIP());
+        test_hierarchical_kmeans_cluster_distribution<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
+        test_hierarchical_kmeans_cluster_distribution<svs::Float16>(
+            data, svs::DistanceIP()
+        );
+        test_hierarchical_kmeans_cluster_distribution<svs::Float16>(
+            data, svs::DistanceL2()
+        );
+
+        // Test BFloat16 (bf16)
+        test_hierarchical_kmeans_cluster_distribution<svs::BFloat16>(
+            data, svs::DistanceIP()
+        );
+        test_hierarchical_kmeans_cluster_distribution<svs::BFloat16>(
+            data, svs::DistanceL2()
+        );
+    }
+}
+
+CATCH_TEST_CASE("Train Only Centroids Match", "[ivf][kmeans][train_only]") {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
+        auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
+        test_train_only_centroids_match<float>(data, svs::DistanceIP());
+        test_train_only_centroids_match<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
+        test_train_only_centroids_match<svs::Float16>(data, svs::DistanceIP());
+        test_train_only_centroids_match<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
+        test_train_only_centroids_match<svs::BFloat16>(data, svs::DistanceIP());
+        test_train_only_centroids_match<svs::BFloat16>(data, svs::DistanceL2());
+    }
+}
diff --git a/tests/svs/index/ivf/kmeans.cpp b/tests/svs/index/ivf/kmeans.cpp
index 49e20a10f..eceb77520 100644
--- a/tests/svs/index/ivf/kmeans.cpp
+++ b/tests/svs/index/ivf/kmeans.cpp
@@ -57,14 +57,400 @@ void test_kmeans_clustering(const Data& data, Distance distance) {
     }
 }
 
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_kmeans_train_only_functionality(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+
+    // Test train_only functionality
+    for (size_t n_centroids : {25, 50}) {
+        for (size_t minibatch : {25}) {
+            for (size_t iters : {3}) {
+                for (float training_fraction : {0.6f}) {
+                    auto params = ivf::IVFBuildParameters()
+                                      .num_centroids(n_centroids)
+                                      .minibatch_size(minibatch)
+                                      .num_iterations(iters)
+                                      .is_hierarchical(false)
+                                      .training_fraction(training_fraction)
+                                      .seed(42); // Fixed seed for reproducibility
+
+                    auto threadpool = svs::threads::as_threadpool(4);
+
+                    // Test train_only = false (normal mode)
+                    auto [centroids_normal, clusters_normal] =
+                        ivf::kmeans_clustering<BuildType>(
+                            params, data, distance, threadpool, false
+                        );
+
+                    // Test train_only = true
+                    auto [centroids_train_only, clusters_train_only] =
+                        ivf::kmeans_clustering<BuildType>(
+                            params, data, distance, threadpool, true
+                        );
+
+                    // Verify basic structure
+                    CATCH_REQUIRE(centroids_normal.size() == n_centroids);
+                    CATCH_REQUIRE(centroids_train_only.size() == n_centroids);
+                    CATCH_REQUIRE(centroids_normal.dimensions() == data.dimensions());
+                    CATCH_REQUIRE(centroids_train_only.dimensions() == data.dimensions());
+
+                    CATCH_REQUIRE(clusters_normal.size() == n_centroids);
+                    CATCH_REQUIRE(clusters_train_only.size() == n_centroids);
+
+                    // Verify train_only produces empty clusters
+                    for (const auto& cluster : clusters_train_only) {
+                        CATCH_REQUIRE(cluster.empty());
+                    }
+
+                    // Verify normal mode has at least some non-empty clusters
+                    bool has_non_empty = false;
+                    for (const auto& cluster : clusters_normal) {
+                        if (!cluster.empty()) {
+                            has_non_empty = true;
+                            break;
+                        }
+                    }
+                    CATCH_REQUIRE(has_non_empty);
+
+                    // Verify centroids are identical (using same seed)
+                    constexpr float tolerance = 1e-6f;
+                    for (size_t i = 0; i < n_centroids; ++i) {
+                        auto normal_centroid = centroids_normal.get_datum(i);
+                        auto train_only_centroid = centroids_train_only.get_datum(i);
+
+                        for (size_t j = 0; j < data.dimensions(); ++j) {
+                            float diff =
+                                std::abs(normal_centroid[j] - train_only_centroid[j]);
+                            CATCH_REQUIRE(diff < tolerance);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_kmeans_train_only_performance(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+
+    // Test that train_only mode is at least as fast as normal mode
+    // (it should be faster since it skips assignment, but we just check it doesn't slow
+    // down)
+    size_t n_centroids = 50;
+    auto params = ivf::IVFBuildParameters()
+                      .num_centroids(n_centroids)
+                      .minibatch_size(25)
+                      .num_iterations(3)
+                      .is_hierarchical(false)
+                      .training_fraction(0.5f)
+                      .seed(123);
+
+    auto threadpool = svs::threads::as_threadpool(4);
+
+    // Time normal mode
+    auto start_normal = std::chrono::high_resolution_clock::now();
+    auto [centroids_normal, clusters_normal] =
+        ivf::kmeans_clustering<BuildType>(params, data, distance, threadpool, false);
+    auto end_normal = std::chrono::high_resolution_clock::now();
+
+    // Time train_only mode
+    auto start_train_only = std::chrono::high_resolution_clock::now();
+    auto [centroids_train_only, clusters_train_only] =
+        ivf::kmeans_clustering<BuildType>(params, data, distance, threadpool, true);
+    auto end_train_only = std::chrono::high_resolution_clock::now();
+
+    auto normal_duration =
+        std::chrono::duration_cast<std::chrono::milliseconds>(end_normal - start_normal);
+    auto train_only_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
+        end_train_only - start_train_only
+    );
+
+    // Verify train_only doesn't take significantly longer (allow some variance)
+    // In practice, train_only should be faster, but we allow it to be up to 50% longer due
+    // to variance
+    CATCH_REQUIRE(train_only_duration.count() <= normal_duration.count() * 1.5);
+
+    // Verify results are still valid
+    CATCH_REQUIRE(centroids_train_only.size() == n_centroids);
+    for (const auto& cluster : clusters_train_only) {
+        CATCH_REQUIRE(cluster.empty());
+    }
+}
+
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_kmeans_edge_cases(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+
+    // Test with minimum centroids
+    {
+        auto params = ivf::IVFBuildParameters()
+                          .num_centroids(1)
+                          .minibatch_size(10)
+                          .num_iterations(2)
+                          .is_hierarchical(false)
+                          .training_fraction(0.5f);
+        auto threadpool = svs::threads::as_threadpool(2);
+        auto [centroids, clusters] =
+            ivf::kmeans_clustering<BuildType>(params, data, distance, threadpool);
+
+        CATCH_REQUIRE(centroids.size() == 1);
+        CATCH_REQUIRE(clusters.size() == 1);
+        CATCH_REQUIRE(clusters[0].size() > 0); // Should contain all points
+    }
+
+    // Test with large number of centroids (but less than data points)
+    if (data.size() > 100) {
+        auto params = ivf::IVFBuildParameters()
+                          .num_centroids(std::min(data.size() - 1, size_t(100)))
+                          .minibatch_size(20)
+                          .num_iterations(3)
+                          .is_hierarchical(false)
+                          .training_fraction(0.7f);
+        auto threadpool = svs::threads::as_threadpool(4);
+        auto [centroids, clusters] =
+            ivf::kmeans_clustering<BuildType>(params, data, distance, threadpool);
+
+        CATCH_REQUIRE(centroids.size() == std::min(data.size() - 1, size_t(100)));
+        CATCH_REQUIRE(clusters.size() == std::min(data.size() - 1, size_t(100)));
+    }
+}
+
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_kmeans_reproducibility(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+
+    // Test that same seed produces same results
+    const size_t seed = 12345;
+    const size_t n_centroids = 25;
+
+    auto params1 = ivf::IVFBuildParameters()
+                       .num_centroids(n_centroids)
+                       .minibatch_size(25)
+                       .num_iterations(3)
+                       .is_hierarchical(false)
+                       .training_fraction(0.6f)
+                       .seed(seed);
+
+    auto params2 = ivf::IVFBuildParameters()
+                       .num_centroids(n_centroids)
+                       .minibatch_size(25)
+                       .num_iterations(3)
+                       .is_hierarchical(false)
+                       .training_fraction(0.6f)
+                       .seed(seed);
+
+    auto threadpool = svs::threads::as_threadpool(4);
+
+    auto [centroids1, clusters1] =
+        ivf::kmeans_clustering<BuildType>(params1, data, distance, threadpool);
+
+    auto [centroids2, clusters2] =
+        ivf::kmeans_clustering<BuildType>(params2, data, distance, threadpool);
+
+    // Verify centroids are identical
+    CATCH_REQUIRE(centroids1.size() == centroids2.size());
+    constexpr float tolerance = 1e-6f;
+
+    for (size_t i = 0; i < centroids1.size(); ++i) {
+        auto centroid1 = centroids1.get_datum(i);
+        auto centroid2 = centroids2.get_datum(i);
+
+        for (size_t j = 0; j < centroids1.dimensions(); ++j) {
+            float diff = std::abs(centroid1[j] - centroid2[j]);
+            CATCH_REQUIRE(diff < tolerance);
+        }
+    }
+
+    // Verify cluster assignments are identical
+    CATCH_REQUIRE(clusters1.size() == clusters2.size());
+    for (size_t i = 0; i < clusters1.size(); ++i) {
+        CATCH_REQUIRE(clusters1[i].size() == clusters2[i].size());
+        // Note: We don't check exact order as cluster assignment might vary with same
+        // centroids
+    }
+}
+
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_kmeans_cluster_assignment_validity(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+
+    auto params = ivf::IVFBuildParameters()
+                      .num_centroids(20)
+                      .minibatch_size(25)
+                      .num_iterations(5)
+                      .is_hierarchical(false)
+                      .training_fraction(0.8f);
+
+    auto threadpool = svs::threads::as_threadpool(4);
+    auto [centroids, clusters] =
+        ivf::kmeans_clustering<BuildType>(params, data, distance, threadpool);
+
+    // Verify all data points are assigned to exactly one cluster
+    std::unordered_set<uint32_t> assigned_points;
+    for (size_t i = 0; i < clusters.size(); ++i) {
+        for (auto point_id : clusters[i]) {
+            CATCH_REQUIRE(point_id < data.size()); // Valid point index
+            CATCH_REQUIRE(
+                assigned_points.find(point_id) == assigned_points.end()
+            ); // Not already assigned
+            assigned_points.insert(point_id);
+        }
+    }
+
+    CATCH_REQUIRE(assigned_points.size() == data.size()); // All points assigned
+
+    // Verify centroids have valid values (no NaN or infinity)
+    for (size_t i = 0; i < centroids.size(); ++i) {
+        auto centroid = centroids.get_datum(i);
+        for (size_t j = 0; j < centroids.dimensions(); ++j) {
+            CATCH_REQUIRE(std::isfinite(centroid[j]));
+        }
+    }
+}
+
+template <typename BuildType, svs::data::ImmutableMemoryDataset Data, typename Distance>
+void test_kmeans_parameter_variations(const Data& data, Distance distance) {
+    namespace ivf = svs::index::ivf;
+    auto threadpool = svs::threads::as_threadpool(4);
+
+    // Test different minibatch sizes
+    for (size_t minibatch : {10, 25, 50}) {
+        auto params = ivf::IVFBuildParameters()
+                          .num_centroids(15)
+                          .minibatch_size(minibatch)
+                          .num_iterations(3)
+                          .is_hierarchical(false)
+                          .training_fraction(0.6f);
+
+        auto [centroids, clusters] =
+            ivf::kmeans_clustering<BuildType>(params, data, distance, threadpool);
+
+        CATCH_REQUIRE(centroids.size() == 15);
+        CATCH_REQUIRE(clusters.size() == 15);
+    }
+
+    // Test different iteration counts
+    for (size_t iters : {1, 3, 5, 10}) {
+        auto params = ivf::IVFBuildParameters()
+                          .num_centroids(10)
+                          .minibatch_size(25)
+                          .num_iterations(iters)
+                          .is_hierarchical(false)
+                          .training_fraction(0.6f);
+
+        auto [centroids, clusters] =
+            ivf::kmeans_clustering<BuildType>(params, data, distance, threadpool);
+
+        CATCH_REQUIRE(centroids.size() == 10);
+        CATCH_REQUIRE(clusters.size() == 10);
+    }
+
+    // Test different training fractions
+    for (float training_fraction : {0.3f, 0.5f, 0.7f, 0.9f}) {
+        auto params = ivf::IVFBuildParameters()
+                          .num_centroids(12)
+                          .minibatch_size(25)
+                          .num_iterations(3)
+                          .is_hierarchical(false)
+                          .training_fraction(training_fraction);
+
+        auto [centroids, clusters] =
+            ivf::kmeans_clustering<BuildType>(params, data, distance, threadpool);
+
+        CATCH_REQUIRE(centroids.size() == 12);
+        CATCH_REQUIRE(clusters.size() == 12);
+    }
+}
+
 } // namespace
 
 CATCH_TEST_CASE("Build Kmeans Param Check", "[ivf][parameter_check]") {
-    CATCH_SECTION("Uncompressed Data") {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
         auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
         test_kmeans_clustering<float>(data, svs::DistanceIP());
+        test_kmeans_clustering<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
         test_kmeans_clustering<svs::Float16>(data, svs::DistanceIP());
+        test_kmeans_clustering<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
         test_kmeans_clustering<svs::BFloat16>(data, svs::DistanceIP());
         test_kmeans_clustering<svs::BFloat16>(data, svs::DistanceL2());
     }
 }
+
+CATCH_TEST_CASE("Kmeans Edge Cases", "[ivf][kmeans][edge_cases]") {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
+        auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
+        test_kmeans_edge_cases<float>(data, svs::DistanceIP());
+        test_kmeans_edge_cases<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
+        test_kmeans_edge_cases<svs::Float16>(data, svs::DistanceIP());
+        test_kmeans_edge_cases<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
+        test_kmeans_edge_cases<svs::BFloat16>(data, svs::DistanceIP());
+        test_kmeans_edge_cases<svs::BFloat16>(data, svs::DistanceL2());
+    }
+}
+
+CATCH_TEST_CASE("Kmeans Reproducibility", "[ivf][kmeans][reproducibility]") {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
+        auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
+        test_kmeans_reproducibility<float>(data, svs::DistanceIP());
+        test_kmeans_reproducibility<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
+        test_kmeans_reproducibility<svs::Float16>(data, svs::DistanceIP());
+        test_kmeans_reproducibility<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
+        test_kmeans_reproducibility<svs::BFloat16>(data, svs::DistanceIP());
+        test_kmeans_reproducibility<svs::BFloat16>(data, svs::DistanceL2());
+    }
+}
+
+CATCH_TEST_CASE("Kmeans Cluster Assignment Validity", "[ivf][kmeans][cluster_validity]") {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
+        auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
+        test_kmeans_cluster_assignment_validity<float>(data, svs::DistanceIP());
+        test_kmeans_cluster_assignment_validity<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
+        test_kmeans_cluster_assignment_validity<svs::Float16>(data, svs::DistanceIP());
+        test_kmeans_cluster_assignment_validity<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
+        test_kmeans_cluster_assignment_validity<svs::BFloat16>(data, svs::DistanceIP());
+        test_kmeans_cluster_assignment_validity<svs::BFloat16>(data, svs::DistanceL2());
+    }
+}
+
+CATCH_TEST_CASE("Kmeans Parameter Variations", "[ivf][kmeans][parameters]") {
+    CATCH_SECTION("Uncompressed Data - All Data Types") {
+        auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+
+        // Test float32
+        test_kmeans_parameter_variations<float>(data, svs::DistanceIP());
+        test_kmeans_parameter_variations<float>(data, svs::DistanceL2());
+
+        // Test Float16 (fp16)
+        test_kmeans_parameter_variations<svs::Float16>(data, svs::DistanceIP());
+        test_kmeans_parameter_variations<svs::Float16>(data, svs::DistanceL2());
+
+        // Test BFloat16 (bf16)
+        test_kmeans_parameter_variations<svs::BFloat16>(data, svs::DistanceIP());
+        test_kmeans_parameter_variations<svs::BFloat16>(data, svs::DistanceL2());
+    }
+}

From 7505c68869f0d70da3c40518e057f490ed0630f2 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Thu, 13 Nov 2025 14:18:09 -0800
Subject: [PATCH 02/23] Add cluster assignment functionality and tests

---
 include/svs/index/ivf/common.h |  97 +++++++++
 tests/svs/index/ivf/common.cpp | 353 +++++++++++++++++++++++++++++++++
 2 files changed, 450 insertions(+)

diff --git a/include/svs/index/ivf/common.h b/include/svs/index/ivf/common.h
index a75ccff29..6a6712399 100644
--- a/include/svs/index/ivf/common.h
+++ b/include/svs/index/ivf/common.h
@@ -740,6 +740,103 @@ std::vector<std::vector<I>> group_assignments(
     return clusters;
 }
 
+/// @brief Perform cluster assignment for data given pre-trained centroids
+///
+/// @tparam BuildType The numeric type used for matrix operations (float, Float16, BFloat16)
+/// @tparam Data The dataset type
+/// @tparam Centroids The centroids dataset type
+/// @tparam Distance The distance metric type (DistanceIP or DistanceL2)
+/// @tparam Pool The thread pool type
+/// @tparam I The integer type for cluster indices
+///
+/// @param data The dataset to assign to clusters
+/// @param centroids The pre-trained centroids
+/// @param distance The distance metric
+/// @param threadpool The thread pool for parallel execution
+/// @param minibatch_size Size of each processing batch (default: 10000)
+/// @param integer_type Type tag for cluster indices (default: uint32_t)
+///
+/// @return A vector of vectors where each inner vector contains the indices of data
+///         points assigned to that cluster
+template <
+    typename BuildType,
+    data::ImmutableMemoryDataset Data,
+    data::ImmutableMemoryDataset Centroids,
+    typename Distance,
+    threads::ThreadPool Pool,
+    std::integral I = uint32_t>
+auto cluster_assignment(
+    Data& data,
+    Centroids& centroids,
+    Distance& distance,
+    Pool& threadpool,
+    size_t minibatch_size = 10'000,
+    lib::Type<I> SVS_UNUSED(integer_type) = {}
+) {
+    size_t ndims = data.dimensions();
+    size_t num_centroids = centroids.size();
+
+    if (data.dimensions() != centroids.dimensions()) {
+        throw ANNEXCEPTION(
+            "Data and centroids must have the same dimensions! Data dims: {}, Centroids "
+            "dims: {}",
+            data.dimensions(),
+            centroids.dimensions()
+        );
+    }
+
+    // Allocate memory for assignments and matmul results
+    auto assignments = std::vector<size_t>(data.size());
+    auto matmul_results = data::SimpleData<float>{minibatch_size, num_centroids};
+
+    // Convert centroids to BuildType if necessary
+    using CentroidType = typename Centroids::element_type;
+    data::SimpleData<BuildType> centroids_build;
+    if constexpr (!std::is_same_v<BuildType, CentroidType>) {
+        centroids_build = convert_data<BuildType>(centroids, threadpool);
+    } else {
+        centroids_build =
+            data::SimpleData<BuildType>{centroids.size(), centroids.dimensions()};
+        convert_data(centroids, centroids_build, threadpool);
+    }
+
+    // Compute norms if using L2 distance
+    auto data_norm = maybe_compute_norms<Distance>(data, threadpool);
+    auto centroids_norm = maybe_compute_norms<Distance>(centroids_build, threadpool);
+
+    // Process data in batches
+    size_t batchsize = minibatch_size;
+    size_t num_batches = lib::div_round_up(data.size(), batchsize);
+
+    using Alloc = svs::HugepageAllocator<BuildType>;
+    auto data_batch = data::SimpleData<BuildType, Data::extent, Alloc>{batchsize, ndims};
+
+    for (size_t batch = 0; batch < num_batches; ++batch) {
+        auto this_batch = threads::UnitRange{
+            batch * batchsize, std::min((batch + 1) * batchsize, data.size())};
+        auto data_batch_view = data::make_view(data, this_batch);
+        convert_data(data_batch_view, data_batch, threadpool);
+
+        // Use the existing centroid_assignment function to compute assignments
+        auto timer = lib::Timer();
+        centroid_assignment(
+            data_batch,
+            data_norm,
+            this_batch,
+            distance,
+            centroids_build,
+            centroids_norm,
+            assignments,
+            matmul_results,
+            threadpool,
+            timer
+        );
+    }
+
+    // Group assignments into clusters
+    return group_assignments<I>(assignments, num_centroids, data);
+}
+
 template <typename Query, typename Dist, typename MatMulResults, typename Buffer>
 void search_centroids(
     const Query& query,
diff --git a/tests/svs/index/ivf/common.cpp b/tests/svs/index/ivf/common.cpp
index 8a7841af3..a23efc4bc 100644
--- a/tests/svs/index/ivf/common.cpp
+++ b/tests/svs/index/ivf/common.cpp
@@ -27,11 +27,14 @@
 // svs
 #include "svs/core/data.h"
 #include "svs/core/distance.h"
+#include "svs/index/ivf/hierarchical_kmeans.h"
+#include "svs/index/ivf/kmeans.h"
 #include "svs/lib/threads.h"
 
 // stl
 #include <algorithm>
 #include <numeric>
+#include <unordered_set>
 #include <vector>
 
 CATCH_TEST_CASE("Kmeans Clustering", "[ivf][parameters]") {
@@ -368,3 +371,353 @@ CATCH_TEST_CASE("Common Utility Functions", "[ivf][common][core]") {
         }
     }
 }
+
+CATCH_TEST_CASE("Cluster Assignment Utility", "[ivf][common][cluster_assignment]") {
+    namespace ivf = svs::index::ivf;
+
+    auto test_cluster_assignment =
+        [&]<typename BuildType, typename DataType, typename Distance>() {
+            auto threadpool = svs::threads::as_threadpool(4);
+
+            // Create test data
+            size_t num_points = 1000;
+            size_t num_centroids = 10;
+            size_t dims = 128;
+
+            auto data = svs::data::SimpleData<DataType>(num_points, dims);
+            auto centroids = svs::data::SimpleData<float>(num_centroids, dims);
+
+            // Initialize data with structured patterns
+            for (size_t i = 0; i < num_points; ++i) {
+                auto datum = data.get_datum(i);
+                size_t cluster_id = i % num_centroids;
+                for (size_t j = 0; j < dims; ++j) {
+                    // Create data that naturally clusters around centroids
+                    datum[j] = static_cast<DataType>(
+                        cluster_id * 10.0f + j * 0.1f + (i % 10) * 0.01f
+                    );
+                }
+            }
+
+            // Initialize centroids to match cluster centers
+            for (size_t i = 0; i < num_centroids; ++i) {
+                auto centroid = centroids.get_datum(i);
+                for (size_t j = 0; j < dims; ++j) {
+                    centroid[j] = static_cast<float>(i * 10.0f + j * 0.1f);
+                }
+            }
+
+            // Normalize for IP distance if needed
+            if constexpr (std::is_same_v<Distance, svs::DistanceIP>) {
+                auto timer = svs::lib::Timer();
+                ivf::normalize_centroids(centroids, threadpool, timer);
+
+                // Normalize data as well for IP
+                for (size_t i = 0; i < num_points; ++i) {
+                    auto datum = data.get_datum(i);
+                    float norm = 0.0f;
+                    for (size_t j = 0; j < dims; ++j) {
+                        norm += static_cast<float>(datum[j]) * static_cast<float>(datum[j]);
+                    }
+                    norm = std::sqrt(norm);
+                    if (norm > 0.0f) {
+                        for (size_t j = 0; j < dims; ++j) {
+                            datum[j] =
+                                static_cast<DataType>(static_cast<float>(datum[j]) / norm);
+                        }
+                    }
+                }
+            }
+
+            auto distance = Distance();
+
+            // Call cluster_assignment utility
+            auto clusters = ivf::cluster_assignment<BuildType>(
+                data, centroids, distance, threadpool, 10'000, svs::lib::Type<uint32_t>()
+            );
+
+            // Verify results
+            CATCH_REQUIRE(clusters.size() == num_centroids);
+
+            // Count total assigned points
+            size_t total_assigned = 0;
+            for (const auto& cluster : clusters) {
+                total_assigned += cluster.size();
+            }
+            CATCH_REQUIRE(total_assigned == num_points);
+
+            // Verify no cluster is empty (with our structured data)
+            size_t empty_clusters = 0;
+            for (const auto& cluster : clusters) {
+                if (cluster.empty()) {
+                    empty_clusters++;
+                }
+            }
+            // With structured data, we expect most clusters to have points
+            // but allow a few empty clusters due to random initialization
+            CATCH_REQUIRE(empty_clusters <= 2);
+        };
+
+    CATCH_SECTION("Float32 with L2 Distance") {
+        test_cluster_assignment.operator()<float, float, svs::DistanceL2>();
+    }
+
+    CATCH_SECTION("Float32 with IP Distance") {
+        test_cluster_assignment.operator()<float, float, svs::DistanceIP>();
+    }
+
+    CATCH_SECTION("Float16 with L2 Distance") {
+        test_cluster_assignment.operator()<svs::Float16, float, svs::DistanceL2>();
+    }
+
+    CATCH_SECTION("Float16 with IP Distance") {
+        test_cluster_assignment.operator()<svs::Float16, float, svs::DistanceIP>();
+    }
+
+    CATCH_SECTION("BFloat16 with L2 Distance") {
+        test_cluster_assignment.operator()<svs::BFloat16, float, svs::DistanceL2>();
+    }
+
+    CATCH_SECTION("BFloat16 with IP Distance") {
+        test_cluster_assignment.operator()<svs::BFloat16, float, svs::DistanceIP>();
+    }
+}
+
+CATCH_TEST_CASE(
+    "IVF Train-Only and Cluster Assignment", "[ivf][common][train_only][cluster_assignment]"
+) {
+    namespace ivf = svs::index::ivf;
+    auto threadpool = svs::threads::as_threadpool(4);
+    auto data = test_dataset::data_f32();
+
+    auto parameters = ivf::IVFBuildParameters()
+                          .num_centroids(50)
+                          .minibatch_size(500)
+                          .num_iterations(10)
+                          .is_hierarchical(false)
+                          .training_fraction(0.5)
+                          .seed(12345);
+
+    CATCH_SECTION("Flat K-means: train_only + cluster_assignment vs full clustering") {
+        auto distance_l2 = svs::DistanceL2();
+
+        // Method 1: Full clustering (without train_only)
+        auto [centroids_full, clusters_full] = ivf::kmeans_clustering<float>(
+            parameters,
+            data,
+            distance_l2,
+            threadpool,
+            svs::lib::Type<uint32_t>(),
+            svs::logging::get(),
+            false // train_only = false
+        );
+
+        // Method 2: Train-only + cluster_assignment
+        auto [centroids_train, clusters_train] = ivf::kmeans_clustering<float>(
+            parameters,
+            data,
+            distance_l2,
+            threadpool,
+            svs::lib::Type<uint32_t>(),
+            svs::logging::get(),
+            true // train_only = true
+        );
+
+        // Verify train_only returns empty clusters
+        CATCH_REQUIRE(clusters_train.size() == parameters.num_centroids_);
+        for (const auto& cluster : clusters_train) {
+            CATCH_REQUIRE(cluster.empty());
+        }
+
+        // Now assign data using the cluster_assignment utility
+        auto clusters_assigned = ivf::cluster_assignment<float>(
+            data,
+            centroids_train,
+            distance_l2,
+            threadpool,
+            500, // minibatch_size
+            svs::lib::Type<uint32_t>()
+        );
+
+        // Verify centroids match (within tolerance)
+        CATCH_REQUIRE(centroids_train.size() == centroids_full.size());
+        CATCH_REQUIRE(centroids_train.dimensions() == centroids_full.dimensions());
+
+        for (size_t i = 0; i < centroids_train.size(); ++i) {
+            auto c1 = centroids_train.get_datum(i);
+            auto c2 = centroids_full.get_datum(i);
+            for (size_t j = 0; j < centroids_train.dimensions(); ++j) {
+                CATCH_REQUIRE(std::abs(c1[j] - c2[j]) < 1e-5f);
+            }
+        }
+
+        // Verify cluster assignments match
+        CATCH_REQUIRE(clusters_assigned.size() == clusters_full.size());
+        for (size_t i = 0; i < clusters_assigned.size(); ++i) {
+            CATCH_REQUIRE(clusters_assigned[i].size() == clusters_full[i].size());
+
+            // Sort both to compare
+            auto a = clusters_assigned[i];
+            auto b = clusters_full[i];
+            std::sort(a.begin(), a.end());
+            std::sort(b.begin(), b.end());
+            CATCH_REQUIRE(a == b);
+        }
+
+        // Verify all points are assigned
+        size_t total_assigned = 0;
+        for (const auto& cluster : clusters_assigned) {
+            total_assigned += cluster.size();
+        }
+        CATCH_REQUIRE(total_assigned == data.size());
+    }
+
+    CATCH_SECTION("Hierarchical K-means: train_only + cluster_assignment vs full clustering"
+    ) {
+        auto distance_ip = svs::DistanceIP();
+
+        // Use hierarchical k-means
+        auto hier_params =
+            parameters.is_hierarchical(true).hierarchical_level1_clusters(10);
+
+        // Method 1: Full clustering (without train_only)
+        auto [centroids_full, clusters_full] = ivf::hierarchical_kmeans_clustering<float>(
+            hier_params,
+            data,
+            distance_ip,
+            threadpool,
+            svs::lib::Type<uint32_t>(),
+            svs::logging::get(),
+            false // train_only = false
+        );
+
+        // Method 2: Train-only + cluster_assignment
+        auto [centroids_train, clusters_train] = ivf::hierarchical_kmeans_clustering<float>(
+            hier_params,
+            data,
+            distance_ip,
+            threadpool,
+            svs::lib::Type<uint32_t>(),
+            svs::logging::get(),
+            true // train_only = true
+        );
+
+        // Verify train_only returns empty clusters
+        CATCH_REQUIRE(clusters_train.size() == hier_params.num_centroids_);
+        for (const auto& cluster : clusters_train) {
+            CATCH_REQUIRE(cluster.empty());
+        }
+
+        // Now assign data using the cluster_assignment utility
+        auto clusters_assigned = ivf::cluster_assignment<float>(
+            data,
+            centroids_train,
+            distance_ip,
+            threadpool,
+            500, // minibatch_size
+            svs::lib::Type<uint32_t>()
+        );
+
+        // Verify centroids match (within tolerance)
+        CATCH_REQUIRE(centroids_train.size() == centroids_full.size());
+        CATCH_REQUIRE(centroids_train.dimensions() == centroids_full.dimensions());
+
+        for (size_t i = 0; i < centroids_train.size(); ++i) {
+            auto c1 = centroids_train.get_datum(i);
+            auto c2 = centroids_full.get_datum(i);
+            for (size_t j = 0; j < centroids_train.dimensions(); ++j) {
+                CATCH_REQUIRE(std::abs(c1[j] - c2[j]) < 1e-5f);
+            }
+        }
+
+        // Verify cluster structure is reasonable
+        CATCH_REQUIRE(clusters_assigned.size() == clusters_full.size());
+
+        // Verify all points are assigned in both methods
+        size_t total_assigned = 0;
+        size_t total_full = 0;
+        for (size_t i = 0; i < clusters_assigned.size(); ++i) {
+            total_assigned += clusters_assigned[i].size();
+            total_full += clusters_full[i].size();
+        }
+        CATCH_REQUIRE(total_assigned == data.size());
+        CATCH_REQUIRE(total_full == data.size());
+
+        // For hierarchical k-means, assignments may differ slightly due to
+        // precision differences in the two-level clustering process.
+        // The important thing is that both methods produce valid clusterings.
+        // We verify this by checking that the distribution of cluster sizes
+        // is reasonable and similar.
+
+        // Check no cluster is excessively large (> 50% of data)
+        for (const auto& cluster : clusters_assigned) {
+            CATCH_REQUIRE(cluster.size() <= data.size() / 2);
+        }
+        for (const auto& cluster : clusters_full) {
+            CATCH_REQUIRE(cluster.size() <= data.size() / 2);
+        }
+
+        // Count non-empty clusters in both
+        size_t non_empty_assigned = 0;
+        size_t non_empty_full = 0;
+        for (size_t i = 0; i < clusters_assigned.size(); ++i) {
+            if (!clusters_assigned[i].empty())
+                non_empty_assigned++;
+            if (!clusters_full[i].empty())
+                non_empty_full++;
+        }
+
+        // Both should have similar number of non-empty clusters (within 20%)
+        double ratio = static_cast<double>(non_empty_assigned) / non_empty_full;
+        CATCH_REQUIRE(ratio >= 0.8);
+        CATCH_REQUIRE(ratio <= 1.2);
+    }
+
+    CATCH_SECTION("Different data types with train_only workflow") {
+        auto distance_l2 = svs::DistanceL2();
+
+        // Test with Float16
+        auto [centroids_fp16, clusters_empty_fp16] = ivf::kmeans_clustering<svs::Float16>(
+            parameters,
+            data,
+            distance_l2,
+            threadpool,
+            svs::lib::Type<uint32_t>(),
+            svs::logging::get(),
+            true // train_only = true
+        );
+
+        auto clusters_fp16 = ivf::cluster_assignment<svs::Float16>(
+            data, centroids_fp16, distance_l2, threadpool, 500, svs::lib::Type<uint32_t>()
+        );
+
+        CATCH_REQUIRE(clusters_fp16.size() == parameters.num_centroids_);
+        size_t total_fp16 = 0;
+        for (const auto& cluster : clusters_fp16) {
+            total_fp16 += cluster.size();
+        }
+        CATCH_REQUIRE(total_fp16 == data.size());
+
+        // Test with BFloat16
+        auto [centroids_bf16, clusters_empty_bf16] = ivf::kmeans_clustering<svs::BFloat16>(
+            parameters,
+            data,
+            distance_l2,
+            threadpool,
+            svs::lib::Type<uint32_t>(),
+            svs::logging::get(),
+            true // train_only = true
+        );
+
+        auto clusters_bf16 = ivf::cluster_assignment<svs::BFloat16>(
+            data, centroids_bf16, distance_l2, threadpool, 500, svs::lib::Type<uint32_t>()
+        );
+
+        CATCH_REQUIRE(clusters_bf16.size() == parameters.num_centroids_);
+        size_t total_bf16 = 0;
+        for (const auto& cluster : clusters_bf16) {
+            total_bf16 += cluster.size();
+        }
+        CATCH_REQUIRE(total_bf16 == data.size());
+    }
+}

From dbef61e2ece25a7944bb20f2cc2ac8bff46b52e7 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Thu, 13 Nov 2025 17:13:35 -0800
Subject: [PATCH 03/23] Add integration for train only scenario

---
 include/svs/index/ivf/common.h        |  22 +++---
 include/svs/index/ivf/index.h         |   2 +-
 tests/integration/ivf/index_build.cpp | 102 ++++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 10 deletions(-)

diff --git a/include/svs/index/ivf/common.h b/include/svs/index/ivf/common.h
index 6a6712399..26f62bc83 100644
--- a/include/svs/index/ivf/common.h
+++ b/include/svs/index/ivf/common.h
@@ -404,7 +404,9 @@ void centroid_assignment(
                     data.dimensions()
                 );
             }
-            if constexpr (std::is_same_v<Distance, distance::DistanceIP>) {
+            if constexpr (std::is_same_v<
+                              std::remove_cvref_t<Distance>,
+                              distance::DistanceIP>) {
                 for (auto i : indices) {
                     auto nearest =
                         type_traits::sentinel_v<Neighbor<size_t>, std::greater<>>;
@@ -414,7 +416,9 @@ void centroid_assignment(
                     }
                     assignments[batch_range.start() + i] = nearest.id();
                 }
-            } else if constexpr (std::is_same_v<Distance, distance::DistanceL2>) {
+            } else if constexpr (std::is_same_v<
+                                     std::remove_cvref_t<Distance>,
+                                     distance::DistanceL2>) {
                 for (auto i : indices) {
                     auto nearest = type_traits::sentinel_v<Neighbor<size_t>, std::less<>>;
                     auto dists = matmul_results.get_datum(i);
@@ -563,13 +567,13 @@ auto kmeans_training(
     auto training_timer = timer.push_back("Kmeans training");
     data::SimpleData<float> centroids_fp32 = convert_data<float>(centroids, threadpool);
 
-    if constexpr (std::is_same_v<Distance, distance::DistanceIP>) {
+    if constexpr (std::is_same_v<std::remove_cvref_t<Distance>, distance::DistanceIP>) {
         normalize_centroids(centroids_fp32, threadpool, timer);
     }
 
     auto assignments = std::vector<size_t>(data.size());
     std::vector<float> data_norm;
-    if constexpr (std::is_same_v<Distance, distance::DistanceL2>) {
+    if constexpr (std::is_same_v<std::remove_cvref_t<Distance>, distance::DistanceL2>) {
         generate_norms(data, data_norm, threadpool);
     }
     std::vector<float> centroids_norm;
@@ -578,7 +582,7 @@ auto kmeans_training(
         auto iter_timer = timer.push_back("iteration");
         auto batchsize = parameters.minibatch_size_;
         auto num_batches = lib::div_round_up(data.size(), batchsize);
-        if constexpr (std::is_same_v<Distance, distance::DistanceL2>) {
+        if constexpr (std::is_same_v<std::remove_cvref_t<Distance>, distance::DistanceL2>) {
             generate_norms(centroids_fp32, centroids_norm, threadpool);
         }
 
@@ -611,7 +615,7 @@ auto kmeans_training(
 
         centroid_split(data, centroids_fp32, counts, rng, threadpool, timer);
 
-        if constexpr (std::is_same_v<Distance, distance::DistanceIP>) {
+        if constexpr (std::is_same_v<std::remove_cvref_t<Distance>, distance::DistanceIP>) {
             normalize_centroids(centroids_fp32, threadpool, timer);
         }
     }
@@ -723,7 +727,7 @@ data::SimpleData<BuildType> init_centroids(
 template <typename Distance, typename Data, threads::ThreadPool Pool>
 std::vector<float> maybe_compute_norms(const Data& data, Pool& threadpool) {
     std::vector<float> norms;
-    if constexpr (std::is_same_v<Distance, distance::DistanceL2>) {
+    if constexpr (std::is_same_v<std::remove_cvref_t<Distance>, distance::DistanceL2>) {
         generate_norms(data, norms, threadpool);
     }
     return norms;
@@ -849,7 +853,7 @@ void search_centroids(
 ) {
     unsigned int count = 0;
     buffer.clear();
-    if constexpr (std::is_same_v<Dist, distance::DistanceIP>) {
+    if constexpr (std::is_same_v<std::remove_cvref_t<Dist>, distance::DistanceIP>) {
         for (size_t j = 0; j < num_threads; j++) {
             auto distance = matmul_results[j].get_datum(query_id);
             for (size_t k = 0; k < distance.size(); k++) {
@@ -857,7 +861,7 @@ void search_centroids(
                 count++;
             }
         }
-    } else if constexpr (std::is_same_v<Dist, distance::DistanceL2>) {
+    } else if constexpr (std::is_same_v<std::remove_cvref_t<Dist>, distance::DistanceL2>) {
         float query_norm = distance::norm_square(query);
         for (size_t j = 0; j < num_threads; j++) {
             auto distance = matmul_results[j].get_datum(query_id);
diff --git a/include/svs/index/ivf/index.h b/include/svs/index/ivf/index.h
index 59eb83cfe..8e93f8d55 100644
--- a/include/svs/index/ivf/index.h
+++ b/include/svs/index/ivf/index.h
@@ -320,7 +320,7 @@ class IVFIndex {
 
     void initialize_distance_metadata() {
         // Precalculate centroid norms for L2 distance
-        if constexpr (std::is_same_v<Dist, distance::DistanceL2>) {
+        if constexpr (std::is_same_v<std::remove_cvref_t<Dist>, distance::DistanceL2>) {
             centroids_norm_.reserve(centroids_.size());
             for (size_t i = 0; i < centroids_.size(); i++) {
                 centroids_norm_.push_back(distance::norm_square(centroids_.get_datum(i)));
diff --git a/tests/integration/ivf/index_build.cpp b/tests/integration/ivf/index_build.cpp
index 36a2d6e50..eeb0ca24e 100644
--- a/tests/integration/ivf/index_build.cpp
+++ b/tests/integration/ivf/index_build.cpp
@@ -17,6 +17,10 @@
 // svs
 #include "svs/core/data/simple.h"
 #include "svs/core/recall.h"
+#include "svs/index/ivf/clustering.h"
+#include "svs/index/ivf/common.h"
+#include "svs/index/ivf/hierarchical_kmeans.h"
+#include "svs/lib/float16.h"
 #include "svs/lib/timing.h"
 #include "svs/orchestrators/ivf.h"
 
@@ -99,6 +103,96 @@ void test_build(const Distance& distance, size_t num_inner_threads = 1) {
     }
 }
 
+template <typename T, typename Distance>
+void test_build_train_only(const Distance& distance, size_t num_inner_threads = 1) {
+    const double epsilon = 0.06; // Wider tolerance for train_only workflow
+    const auto queries = svs::data::SimpleData<float>::load(test_dataset::query_file());
+    CATCH_REQUIRE(svs_test::prepare_temp_directory());
+    size_t num_threads = 2;
+
+    auto expected_result = test_dataset::ivf::expected_build_results(
+        svs::distance_type_v<Distance>, svsbenchmark::Uncompressed(svs::datatype_v<T>)
+    );
+
+    // Load data
+    auto data = svs::data::SimpleData<float>::load(test_dataset::data_svs_file());
+    auto threadpool = svs::threads::as_threadpool(num_threads);
+    auto parameters = expected_result.build_parameters_.value();
+
+    // Step 1: Use train_only mode to get centroids
+    svs::data::SimpleData<T> centroids_train;
+    std::vector<std::vector<uint32_t>> clusters_train;
+    fmt::print(
+        "Starting Train-Only Mode Clustering with {} centroids\n", parameters.num_centroids_
+    );
+
+    if (parameters.is_hierarchical_) {
+        fmt::print("Using Hierarchical KMeans Clustering\n");
+        std::tie(centroids_train, clusters_train) =
+            svs::index::ivf::hierarchical_kmeans_clustering<T>(
+                parameters,
+                data,
+                distance,
+                threadpool,
+                svs::lib::Type<uint32_t>(),
+                svs::logging::get(),
+                true // train_only = true
+            );
+    } else {
+        std::tie(centroids_train, clusters_train) = svs::index::ivf::kmeans_clustering<T>(
+            parameters,
+            data,
+            distance,
+            threadpool,
+            svs::lib::Type<uint32_t>(),
+            svs::logging::get(),
+            true // train_only = true
+        );
+    }
+
+    fmt::print("Train-Only Mode - Obtained {} centroids\n", centroids_train.size());
+
+    // Step 2: Assign data to clusters using cluster_assignment
+    auto clusters = svs::index::ivf::cluster_assignment<T>(
+        data,
+        centroids_train,
+        distance,
+        threadpool,
+        10'000, // minibatch_size
+        svs::lib::Type<uint32_t>()
+    );
+
+    // Step 3: Create clustering and assemble index
+    svs::index::ivf::Clustering clustering(std::move(centroids_train), std::move(clusters));
+
+    auto index = svs::IVF::assemble_from_clustering<float>(
+        std::move(clustering), std::move(data), distance, num_threads, num_inner_threads
+    );
+
+    // Test the index with the same expected results
+    auto groundtruth = test_dataset::load_groundtruth(svs::distance_type_v<Distance>);
+    for (const auto& expected : expected_result.config_and_recall_) {
+        auto these_queries = test_dataset::get_test_set(queries, expected.num_queries_);
+        auto these_groundtruth =
+            test_dataset::get_test_set(groundtruth, expected.num_queries_);
+        index.set_search_parameters(expected.search_parameters_);
+        auto results = index.search(these_queries, expected.num_neighbors_);
+        double recall = svs::k_recall_at_n(
+            these_groundtruth, results, expected.num_neighbors_, expected.recall_k_
+        );
+
+        fmt::print(
+            "Train-Only Mode - n_probes: {}, Expected Recall: {}, Actual Recall: {}\n",
+            index.get_search_parameters().n_probes_,
+            expected.recall_,
+            recall
+        );
+        // Just check that recall is reasonable (within wider tolerance)
+        CATCH_REQUIRE(recall > expected.recall_ - epsilon);
+        CATCH_REQUIRE(recall < expected.recall_ + epsilon);
+    }
+}
+
 } // namespace
 
 CATCH_TEST_CASE("IVF Build/Clustering", "[integration][build][ivf]") {
@@ -113,3 +207,11 @@ CATCH_TEST_CASE("IVF Build/Clustering", "[integration][build][ivf]") {
     // test_build<svs::BFloat16>(svs::DistanceL2(), 4);
     // test_build<svs::BFloat16>(svs::DistanceIP(), 4);
 }
+
+CATCH_TEST_CASE("IVF Build/Clustering", "[integration][build][ivf][train_only]") {
+    test_build_train_only<float>(svs::DistanceL2());
+    test_build_train_only<svs::Float16>(svs::DistanceIP());
+
+    test_build_train_only<svs::BFloat16>(svs::DistanceL2());
+    test_build_train_only<svs::BFloat16>(svs::DistanceIP());
+}
\ No newline at end of file

From 6fa85c09bc88c550aa9edfc21244534276e2175b Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Wed, 19 Nov 2025 15:03:52 -0800
Subject: [PATCH 04/23] Minor fixes

---
 include/svs/index/ivf/common.h              | 18 ++++++------------
 include/svs/index/ivf/hierarchical_kmeans.h |  8 +-------
 2 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/include/svs/index/ivf/common.h b/include/svs/index/ivf/common.h
index 26f62bc83..863729461 100644
--- a/include/svs/index/ivf/common.h
+++ b/include/svs/index/ivf/common.h
@@ -250,19 +250,16 @@ void compute_matmul(
             m_int,         // const int M
             n_int,         // const int N
             k_int,         // const int K
-            1.0f,          // float alpha (explicitly float)
+            1.0f,          // float alpha
             data,          // const float* A
             k_int,         // const int lda
             centroids,     // const float* B
             k_int,         // const int ldb
-            0.0f,          // const float beta (explicitly float)
+            0.0f,          // const float beta
             results,       // float* c
             n_int          // const int ldc
         );
     } else if constexpr (std::is_same_v<T, BFloat16>) {
-        // Intel MKL BFloat16 GEMM requires careful parameter casting to avoid parameter
-        // errors Ensure all integer parameters are properly cast to int (MKL expects int,
-        // not size_t)
         int m_int = static_cast<int>(m);
         int n_int = static_cast<int>(n);
         int k_int = static_cast<int>(k);
@@ -274,19 +271,16 @@ void compute_matmul(
             m_int,                      // const int M
             n_int,                      // const int N
             k_int,                      // const int K
-            1.0f,                       // float alpha (explicitly float)
+            1.0f,                       // float alpha
             (const uint16_t*)data,      // const *uint16_t A
             k_int,                      // const int lda
             (const uint16_t*)centroids, // const uint16_t* B
             k_int,                      // const int ldb
-            0.0f,                       // const float beta (explicitly float)
+            0.0f,                       // const float beta
             results,                    // float* c
             n_int                       // const int ldc
         );
     } else if constexpr (std::is_same_v<T, Float16>) {
-        // Intel MKL Float16 GEMM requires careful parameter casting to avoid parameter
-        // errors Ensure all integer parameters are properly cast to int (MKL expects int,
-        // not size_t)
         int m_int = static_cast<int>(m);
         int n_int = static_cast<int>(n);
         int k_int = static_cast<int>(k);
@@ -298,12 +292,12 @@ void compute_matmul(
             m_int,                      // const int M
             n_int,                      // const int N
             k_int,                      // const int K
-            1.0f,                       // float alpha (explicitly float)
+            1.0f,                       // float alpha
             (const uint16_t*)data,      // const *uint16_t A
             k_int,                      // const int lda
             (const uint16_t*)centroids, // const uint16_t* B
             k_int,                      // const int ldb
-            0.0f,                       // const float beta (explicitly float)
+            0.0f,                       // const float beta
             results,                    // float* c
             n_int                       // const int ldc
         );
diff --git a/include/svs/index/ivf/hierarchical_kmeans.h b/include/svs/index/ivf/hierarchical_kmeans.h
index cf4384141..12abb960b 100644
--- a/include/svs/index/ivf/hierarchical_kmeans.h
+++ b/include/svs/index/ivf/hierarchical_kmeans.h
@@ -160,14 +160,12 @@ auto hierarchical_kmeans_clustering_impl(
 
     std::vector<std::vector<I>> clusters_level1_all;
 
-    // Declare timer outside if block to avoid scope issues
+    // Declare timer outside of block to avoid scope issues
     auto all_assignments_time = timer.push_back("level1 all assignments");
 
     if (!train_only) {
         // Step 5: Assign all data to clusters
-        auto all_assignments_alloc = timer.push_back("level1 all assignments alloc");
         auto assignments_level1_all = std::vector<size_t>(data.size());
-        all_assignments_alloc.finish();
 
         batchsize = parameters.minibatch_size_;
         num_batches = lib::div_round_up(data.size(), batchsize);
@@ -178,10 +176,7 @@ auto hierarchical_kmeans_clustering_impl(
             auto this_batch = threads::UnitRange{
                 batch * batchsize, std::min((batch + 1) * batchsize, data.size())};
             auto data_batch_view = data::make_view(data, this_batch);
-            auto all_assignments_convert =
-                timer.push_back("level1 all assignments convert");
             convert_data(data_batch_view, data_batch, threadpool);
-            all_assignments_convert.finish();
             centroid_assignment(
                 data_batch,
                 data_norm,
@@ -199,7 +194,6 @@ auto hierarchical_kmeans_clustering_impl(
         clusters_level1_all =
             group_assignments(assignments_level1_all, num_level1_clusters, data);
         all_assignments_cluster.finish();
-        all_assignments_time.finish();
     } else {
         // For train_only, create empty clusters
         clusters_level1_all.resize(num_level1_clusters);

From 149ff834dcd8d97d9dcdf59f0fe1bb6cd4e1f380 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Mon, 1 Dec 2025 16:13:22 -0800
Subject: [PATCH 05/23] First attempt at dynamic index

---
 include/svs/index/ivf/dynamic_ivf.h | 855 ++++++++++++++++++++++++++++
 tests/CMakeLists.txt                |   1 +
 tests/svs/index/ivf/dynamic_ivf.cpp | 264 +++++++++
 3 files changed, 1120 insertions(+)
 create mode 100644 include/svs/index/ivf/dynamic_ivf.h
 create mode 100644 tests/svs/index/ivf/dynamic_ivf.cpp

diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
new file mode 100644
index 000000000..d45917107
--- /dev/null
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -0,0 +1,855 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+// Include the IVF index
+#include "svs/index/ivf/clustering.h"
+#include "svs/index/ivf/index.h"
+
+// svs
+#include "svs/concepts/distance.h"
+#include "svs/core/data.h"
+#include "svs/core/loading.h"
+#include "svs/core/logging.h"
+#include "svs/core/query_result.h"
+#include "svs/core/translation.h"
+#include "svs/lib/boundscheck.h"
+#include "svs/lib/invoke.h"
+#include "svs/lib/misc.h"
+#include "svs/lib/threads.h"
+
+// stdlib
+#include <filesystem>
+#include <memory>
+#include <vector>
+
+namespace svs::index::ivf {
+
+///
+/// Metadata tracking the state of a particular data index for DynamicIVFIndex.
+/// The following states have the given meaning for their corresponding slot:
+///
+/// * Valid: Valid and present in the associated dataset.
+/// * Empty: Available slot that can be used for new data or reclaimed after deletion.
+///
+enum class IVFSlotMetadata : uint8_t { Empty = 0x00, Valid = 0x01 };
+
+///
+/// @brief Dynamic cluster implementation using blocked data for resizeability
+///
+/// Similar to DenseCluster but uses BlockedData to support dynamic operations
+///
+template <typename Data, std::integral I> struct DynamicDenseCluster {
+    using data_type = Data;
+    using index_type = I;
+
+    template <typename Callback>
+    void on_leaves(Callback&& f, size_t prefetch_offset) const {
+        size_t p = prefetch_offset;
+        for (size_t i = 0; i < data_.size(); ++i) {
+            if (p < data_.size()) {
+                data_.prefetch(p);
+                ++p;
+            }
+            f(accessor(data_, i), ids_[i], i);
+        }
+    }
+
+    auto get_datum(size_t id) const { return data_.get_datum(id); }
+    auto get_secondary(size_t id) const { return data_.get_secondary(id); }
+    auto get_global_id(size_t local_id) const { return ids_[local_id]; }
+    const Data& view_cluster() const { return data_; }
+    Data& view_cluster() { return data_; }
+
+    // Allow resizing for dynamic operations
+    void resize(size_t new_size) {
+        data_.resize(new_size);
+        ids_.resize(new_size);
+    }
+
+    size_t size() const { return data_.size(); }
+    size_t capacity() const { return data_.capacity(); }
+
+  public:
+    Data data_;
+    std::vector<I> ids_;
+};
+
+///
+/// @brief Dynamic IVF Index with insertion and deletion support
+///
+/// Uses the same cluster framework as static IVF (DenseClusteredDataset pattern)
+/// but with BlockedData allocators for resizeability.
+///
+/// @tparam Centroids The type of centroid storage
+/// @tparam Cluster Type representing cluster storage (DynamicDenseCluster with BlockedData)
+/// @tparam Dist The distance functor used to compare queries with the elements
+/// @tparam ThreadPoolProto Thread pool prototype type
+///
+/// An IVF index implementation that supports dynamic insertion and deletion of vectors
+/// while maintaining the inverted file structure for efficient similarity search.
+///
+template <typename Centroids, typename Cluster, typename Dist, typename ThreadPoolProto>
+class DynamicIVFIndex {
+  public:
+    // Traits
+    static constexpr bool supports_insertions = true;
+    static constexpr bool supports_deletions = true;
+    static constexpr bool supports_saving = true;
+    static constexpr bool needs_id_translation = true;
+
+    // Type Aliases
+    using Idx = typename Cluster::index_type;
+    using Data = typename Cluster::data_type;
+    using internal_id_type = size_t;
+    using external_id_type = size_t;
+    using distance_type = Dist;
+    using centroids_type = Centroids;
+    using cluster_type = Cluster;
+    using search_parameters_type = IVFSearchParameters;
+    using compare = distance::compare_t<Dist>;
+
+    // Thread-related type aliases
+    using InterQueryThreadPool = threads::ThreadPoolHandle;
+    using IntraQueryThreadPool = threads::DefaultThreadPool;
+
+  private:
+    // Core IVF components (same structure as static IVF)
+    centroids_type centroids_;
+    std::vector<cluster_type> clusters_; // Each cluster contains data_ and ids_
+
+    // Metadata tracking for dynamic operations
+    std::vector<IVFSlotMetadata> status_; // Status of each global slot
+    std::vector<size_t> id_to_cluster_;   // Maps global ID to cluster index
+    std::vector<size_t> id_in_cluster_;   // Maps global ID to position in cluster
+    size_t first_empty_ = 0;
+    size_t prefetch_offset_ = 8;
+
+    // Translation and distance
+    IDTranslator translator_;
+    distance_type distance_;
+
+    // Threading infrastructure (same as static IVF)
+    InterQueryThreadPool inter_query_threadpool_;
+    const size_t intra_query_thread_count_;
+    std::vector<IntraQueryThreadPool> intra_query_threadpools_;
+
+    // Search infrastructure (same as static IVF)
+    std::vector<data::SimpleData<float>> matmul_results_;
+    std::vector<float> centroids_norm_;
+    search_parameters_type search_parameters_{};
+
+    // Logger
+    svs::logging::logger_ptr logger_;
+
+  public:
+    /// @brief Construct a Dynamic IVF Index from clusters
+    ///
+    /// @param centroids Centroid collection for space partitioning
+    /// @param clusters Vector of cluster data structures (each with data_ and ids_)
+    /// @param external_ids External IDs for all vectors
+    /// @param distance_function Distance metric for similarity computation
+    /// @param threadpool_proto Primary thread pool prototype
+    /// @param intra_query_thread_count Number of threads for intra-query parallelism
+    /// @param logger Logger for per-index logging customization
+    template <typename ExternalIds, typename TP>
+    DynamicIVFIndex(
+        centroids_type centroids,
+        std::vector<cluster_type> clusters,
+        const ExternalIds& external_ids,
+        Dist distance_function,
+        TP threadpool_proto,
+        const size_t intra_query_thread_count = 1,
+        svs::logging::logger_ptr logger = svs::logging::get()
+    )
+        : centroids_{std::move(centroids)}
+        , clusters_{std::move(clusters)}
+        , status_()
+        , id_to_cluster_()
+        , id_in_cluster_()
+        , first_empty_{0}
+        , prefetch_offset_{8}
+        , translator_()
+        , distance_{std::move(distance_function)}
+        , inter_query_threadpool_{threads::as_threadpool(std::move(threadpool_proto))}
+        , intra_query_thread_count_{intra_query_thread_count}
+        , logger_{std::move(logger)} {
+        // Initialize metadata structures
+        size_t total_size = 0;
+        for (const auto& cluster : clusters_) {
+            for (size_t pos = 0; pos < cluster.ids_.size(); ++pos) {
+                total_size =
+                    std::max(total_size, static_cast<size_t>(cluster.ids_[pos]) + 1);
+            }
+        }
+
+        status_.resize(total_size, IVFSlotMetadata::Valid);
+        id_to_cluster_.resize(total_size);
+        id_in_cluster_.resize(total_size);
+        first_empty_ = total_size;
+
+        // Build reverse mapping from global ID to cluster location
+        for (size_t cluster_idx = 0; cluster_idx < clusters_.size(); ++cluster_idx) {
+            const auto& cluster = clusters_[cluster_idx];
+            for (size_t pos = 0; pos < cluster.ids_.size(); ++pos) {
+                Idx global_id = cluster.ids_[pos];
+                id_to_cluster_[global_id] = cluster_idx;
+                id_in_cluster_[global_id] = pos;
+            }
+        }
+
+        // Initialize ID translation
+        translator_.insert(
+            external_ids, threads::UnitRange<size_t>(0, external_ids.size())
+        );
+
+        // Initialize thread pools and search infrastructure
+        validate_thread_configuration();
+        initialize_thread_pools();
+        initialize_search_buffers();
+        initialize_distance_metadata();
+    }
+
+    /// @brief Constructor with pre-existing translator (for loading from saved state)
+    template <typename TP>
+    DynamicIVFIndex(
+        centroids_type centroids,
+        std::vector<cluster_type> clusters,
+        IDTranslator translator,
+        Dist distance_function,
+        TP threadpool_proto,
+        const size_t intra_query_thread_count = 1,
+        svs::logging::logger_ptr logger = svs::logging::get()
+    )
+        : centroids_{std::move(centroids)}
+        , clusters_{std::move(clusters)}
+        , status_()
+        , id_to_cluster_()
+        , id_in_cluster_()
+        , first_empty_{0}
+        , prefetch_offset_{8}
+        , translator_{std::move(translator)}
+        , distance_{std::move(distance_function)}
+        , inter_query_threadpool_{threads::as_threadpool(std::move(threadpool_proto))}
+        , intra_query_thread_count_{intra_query_thread_count}
+        , logger_{std::move(logger)} {
+        // Initialize metadata structures based on cluster contents
+        size_t total_size = 0;
+        for (const auto& cluster : clusters_) {
+            for (size_t pos = 0; pos < cluster.ids_.size(); ++pos) {
+                total_size =
+                    std::max(total_size, static_cast<size_t>(cluster.ids_[pos]) + 1);
+            }
+        }
+
+        status_.resize(total_size, IVFSlotMetadata::Valid);
+        id_to_cluster_.resize(total_size);
+        id_in_cluster_.resize(total_size);
+        first_empty_ = total_size;
+
+        // Build reverse mapping from global ID to cluster location
+        for (size_t cluster_idx = 0; cluster_idx < clusters_.size(); ++cluster_idx) {
+            const auto& cluster = clusters_[cluster_idx];
+            for (size_t pos = 0; pos < cluster.ids_.size(); ++pos) {
+                Idx global_id = cluster.ids_[pos];
+                id_to_cluster_[global_id] = cluster_idx;
+                id_in_cluster_[global_id] = pos;
+            }
+        }
+
+        // Initialize thread pools and search infrastructure
+        validate_thread_configuration();
+        initialize_thread_pools();
+        initialize_search_buffers();
+        initialize_distance_metadata();
+    }
+
+    ///// Basic Properties /////
+
+    /// @brief Get logger
+    svs::logging::logger_ptr get_logger() const { return logger_; }
+
+    /// @brief Return the number of valid entries in the index
+    size_t size() const { return translator_.size(); }
+
+    /// @brief Return the number of centroids/clusters
+    size_t num_clusters() const { return centroids_.size(); }
+
+    /// @brief Return the logical number of dimensions
+    size_t dimensions() const { return centroids_.dimensions(); }
+
+    /// @brief Get index name
+    std::string name() const { return "Dynamic IVF Index"; }
+
+    ///// Search Parameters /////
+
+    /// @brief Get current search parameters
+    search_parameters_type get_search_parameters() const { return search_parameters_; }
+
+    /// @brief Set search parameters
+    void set_search_parameters(const search_parameters_type& params) {
+        search_parameters_ = params;
+    }
+
+    ///// Threading Configuration /////
+
+    /// @brief Get number of threads for inter-query parallelism
+    size_t get_num_threads() const { return inter_query_threadpool_.size(); }
+
+    /// @brief Get number of threads for intra-query parallelism
+    size_t get_num_intra_query_threads() const { return intra_query_thread_count_; }
+
+    /// @brief Set threadpool for inter-query parallelism
+    void set_threadpool(InterQueryThreadPool threadpool) {
+        if (threadpool.size() != inter_query_threadpool_.size()) {
+            throw std::runtime_error(
+                "Threadpool change not supported - thread count must remain constant"
+            );
+        }
+        inter_query_threadpool_ = std::move(threadpool);
+    }
+
+    /// @brief Get threadpool handle
+    InterQueryThreadPool& get_threadpool_handle() { return inter_query_threadpool_; }
+
+    /// @brief Get const threadpool handle
+    const InterQueryThreadPool& get_threadpool_handle() const {
+        return inter_query_threadpool_;
+    }
+
+    ///// Index Translation /////
+
+    /// @brief Translate external ID to internal ID
+    size_t translate_external_id(size_t e) const { return translator_.get_internal(e); }
+
+    /// @brief Translate internal ID to external ID
+    size_t translate_internal_id(size_t i) const { return translator_.get_external(i); }
+
+    /// @brief Check whether external ID exists
+    bool has_id(size_t e) const { return translator_.has_external(e); }
+
+    /// @brief Get the raw data for external id
+    auto get_datum(size_t e) const {
+        size_t internal_id = translate_external_id(e);
+        size_t cluster_idx = id_to_cluster_[internal_id];
+        size_t pos = id_in_cluster_[internal_id];
+        return clusters_[cluster_idx].get_datum(pos);
+    }
+
+    /// @brief Iterate over all external IDs
+    template <typename F> void on_ids(F&& f) const {
+        for (size_t i = 0; i < status_.size(); ++i) {
+            if (is_valid(i)) {
+                f(translator_.get_external(i));
+            }
+        }
+    }
+
+    /// @brief Get external IDs (compatibility method)
+    auto external_ids() const {
+        std::vector<size_t> ids;
+        ids.reserve(size());
+        on_ids([&ids](size_t id) { ids.push_back(id); });
+        return ids;
+    }
+
+    ///// Insertion /////
+
+    /// @brief Add points to the index
+    ///
+    /// New points are assigned to clusters based on nearest centroid.
+    /// Empty slots from previous deletions can be reused if reuse_empty is enabled.
+    ///
+    /// @param points Dataset of points to add
+    /// @param external_ids External IDs for the points
+    /// @param reuse_empty Whether to reuse empty slots from deletions
+    /// @return Vector of internal IDs where points were inserted
+    template <typename Points, class ExternalIds>
+    std::vector<size_t> add_points(
+        const Points& points, const ExternalIds& external_ids, bool reuse_empty = false
+    ) {
+        const size_t num_points = points.size();
+        const size_t num_ids = external_ids.size();
+
+        if (num_points != num_ids) {
+            throw ANNEXCEPTION(
+                "Number of points ({}) not equal to number of external ids ({})!",
+                num_points,
+                num_ids
+            );
+        }
+
+        // Assign each point to its nearest centroid
+        std::vector<size_t> assigned_clusters(num_points);
+        assign_to_clusters(points, assigned_clusters);
+
+        // Allocate global IDs
+        std::vector<size_t> global_ids = allocate_ids(num_points, reuse_empty);
+
+        // Try to update ID translation
+        translator_.insert(external_ids, global_ids);
+
+        // Insert points into their assigned clusters
+        insert_into_clusters(points, global_ids, assigned_clusters);
+
+        return global_ids;
+    }
+
+    ///// Deletion /////
+
+    /// @brief Delete entries by external ID
+    ///
+    /// Entries are marked as Empty and can be reused immediately.
+    /// Call compact() periodically to reclaim memory and reorganize clusters.
+    ///
+    /// @param ids Container of external IDs to delete
+    /// @return Number of entries deleted
+    template <typename T> size_t delete_entries(const T& ids) {
+        translator_.check_external_exist(ids.begin(), ids.end());
+
+        for (auto external_id : ids) {
+            size_t internal_id = translator_.get_internal(external_id);
+            assert(internal_id < status_.size());
+            assert(status_[internal_id] == IVFSlotMetadata::Valid);
+            status_[internal_id] = IVFSlotMetadata::Empty;
+            first_empty_ = std::min(first_empty_, internal_id);
+        }
+
+        translator_.delete_external(ids);
+        return ids.size();
+    }
+
+    ///// Compaction /////
+
+    /// @brief Compact the data structure
+    ///
+    /// Compact removes all empty slots, rebuilding the index structure
+    /// for optimal memory usage and search performance.
+    ///
+    /// @param batch_size Granularity at which points are shuffled (unused for IVF)
+    void compact(size_t SVS_UNUSED(batch_size) = 1'000) {
+        // Collect all valid indices
+        auto valid_indices = nonmissing_indices();
+
+        // Rebuild clusters compactly, removing empty slots
+        rebuild_clusters_compact(valid_indices);
+
+        // Update metadata
+        size_t new_size = valid_indices.size();
+        status_.resize(new_size);
+        // After compaction, all retained entries are valid
+        std::fill(status_.begin(), status_.end(), IVFSlotMetadata::Valid);
+        id_to_cluster_.resize(new_size);
+        id_in_cluster_.resize(new_size);
+        first_empty_ = new_size;
+
+        svs::logging::info(logger_, "Compaction complete: {} valid entries", new_size);
+    }
+
+    ///// Search /////
+
+    /// @brief Perform similarity search
+    ///
+    /// Search process:
+    /// 1. Find n_probe nearest centroids for each query
+    /// 2. Search within those clusters, skipping empty entries
+    /// 3. Return top-k neighbors
+    ///
+    /// @param results View for storing search results
+    /// @param queries Query vectors
+    /// @param search_parameters Search configuration
+    /// @param cancel Optional cancellation predicate
+    template <data::ImmutableMemoryDataset Queries>
+    void search(
+        QueryResultView<size_t> results,
+        const Queries& queries,
+        const search_parameters_type& search_parameters,
+        const lib::DefaultPredicate& cancel = lib::Returns(lib::Const<false>())
+    ) {
+        validate_query_batch_size(queries.size());
+
+        size_t num_neighbors = results.n_neighbors();
+        size_t buffer_leaves_size = search_parameters.k_reorder_ * num_neighbors;
+
+        // Phase 1: Compute distances to centroids
+        compute_centroid_distances(
+            queries, centroids_, matmul_results_, inter_query_threadpool_
+        );
+
+        // Phase 2: Process queries in parallel
+        threads::parallel_for(
+            inter_query_threadpool_,
+            threads::StaticPartition(queries.size()),
+            [&](auto is, auto tid) {
+                // Check for cancellation
+                if (cancel()) {
+                    return;
+                }
+
+                // Create buffers for this thread
+                auto buffer_centroids = create_centroid_buffer(search_parameters.n_probes_);
+                auto buffer_leaves = create_leaf_buffers(buffer_leaves_size);
+
+                // Search for each query
+                for (auto query_idx : is) {
+                    search_single_query(
+                        queries,
+                        query_idx,
+                        results,
+                        buffer_centroids,
+                        buffer_leaves,
+                        search_parameters,
+                        tid
+                    );
+                }
+            }
+        );
+    }
+
+    ///// Saving /////
+
+    static constexpr lib::Version save_version = lib::Version(0, 0, 0);
+
+    void save(
+        const std::filesystem::path& config_directory,
+        const std::filesystem::path& data_directory
+    ) {
+        // Compact before saving to remove empty slots
+        compact();
+
+        // Save configuration
+        lib::save_to_disk(
+            lib::SaveOverride([&](const lib::SaveContext& ctx) {
+                return lib::SaveTable(
+                    "dynamic_ivf_config",
+                    save_version,
+                    {
+                        {"name", lib::save(name())},
+                        {"translation", lib::save(translator_, ctx)},
+                        {"num_clusters", lib::save(clusters_.size())},
+                    }
+                );
+            }),
+            config_directory
+        );
+
+        // Save centroids and cluster data
+        lib::save_to_disk(centroids_, data_directory / "centroids");
+
+        for (size_t i = 0; i < clusters_.size(); ++i) {
+            auto cluster_path = data_directory / fmt::format("cluster_{}", i);
+            lib::save_to_disk(clusters_[i].data_, cluster_path);
+
+            auto ids_path = data_directory / fmt::format("cluster_ids_{}", i);
+            lib::save_to_disk(clusters_[i].ids_, ids_path);
+        }
+    }
+
+  private:
+    ///// Helper Methods /////
+
+    void validate_thread_configuration() {
+        if (intra_query_thread_count_ < 1) {
+            throw std::invalid_argument("Intra-query thread count must be at least 1");
+        }
+    }
+
+    void initialize_thread_pools() {
+        for (size_t i = 0; i < inter_query_threadpool_.size(); i++) {
+            intra_query_threadpools_.push_back(
+                threads::as_threadpool(intra_query_thread_count_)
+            );
+        }
+    }
+
+    void initialize_search_buffers() {
+        auto batches =
+            std::vector<threads::UnitRange<uint64_t>>(inter_query_threadpool_.size());
+
+        threads::parallel_for(
+            inter_query_threadpool_,
+            threads::StaticPartition(centroids_.size()),
+            [&](auto is, auto tid) { batches[tid] = threads::UnitRange{is}; }
+        );
+
+        for (size_t i = 0; i < inter_query_threadpool_.size(); i++) {
+            matmul_results_.emplace_back(MAX_QUERY_BATCH_SIZE, batches[i].size());
+        }
+    }
+
+    void initialize_distance_metadata() {
+        if constexpr (std::is_same_v<std::remove_cvref_t<Dist>, distance::DistanceL2>) {
+            centroids_norm_.reserve(centroids_.size());
+            for (size_t i = 0; i < centroids_.size(); ++i) {
+                centroids_norm_.push_back(distance::norm_square(centroids_.get_datum(i)));
+            }
+        }
+    }
+
+    void validate_query_batch_size(size_t query_size) const {
+        if (query_size > MAX_QUERY_BATCH_SIZE) {
+            throw std::runtime_error(fmt::format(
+                "Query batch size {} exceeds maximum allowed {}",
+                query_size,
+                MAX_QUERY_BATCH_SIZE
+            ));
+        }
+    }
+
+    auto create_centroid_buffer(size_t n_probes) const {
+        return SortedBuffer<Idx, compare>(n_probes, distance::comparator(distance_));
+    }
+
+    auto create_leaf_buffers(size_t buffer_size) const {
+        std::vector<SortedBuffer<Idx, compare>> buffers;
+        buffers.reserve(intra_query_thread_count_);
+        for (size_t j = 0; j < intra_query_thread_count_; j++) {
+            buffers.push_back(
+                SortedBuffer<Idx, compare>(buffer_size, distance::comparator(distance_))
+            );
+        }
+        return buffers;
+    }
+
+    bool is_empty(size_t i) const { return status_[i] == IVFSlotMetadata::Empty; }
+
+    bool is_valid(size_t i) const { return status_[i] == IVFSlotMetadata::Valid; }
+
+    std::vector<size_t> nonmissing_indices() const {
+        std::vector<size_t> indices;
+        indices.reserve(size());
+        for (size_t i = 0; i < status_.size(); ++i) {
+            if (is_valid(i)) {
+                indices.push_back(i);
+            }
+        }
+        return indices;
+    }
+
+    template <typename Points>
+    void assign_to_clusters(const Points& points, std::vector<size_t>& assignments) {
+        // For each point, find nearest centroid
+        for (size_t i = 0; i < points.size(); ++i) {
+            auto point = points.get_datum(i);
+            float min_dist = std::numeric_limits<float>::max();
+            size_t best_cluster = 0;
+
+            for (size_t c = 0; c < centroids_.size(); ++c) {
+                auto centroid = centroids_.get_datum(c);
+                float dist = distance::compute(distance_, point, centroid);
+                if (dist < min_dist) {
+                    min_dist = dist;
+                    best_cluster = c;
+                }
+            }
+
+            assignments[i] = best_cluster;
+        }
+    }
+
+    std::vector<size_t> allocate_ids(size_t count, bool reuse_empty) {
+        std::vector<size_t> ids;
+        ids.reserve(count);
+
+        // Try to find empty slots if reuse is enabled
+        if (reuse_empty) {
+            for (size_t i = 0; i < status_.size() && ids.size() < count; ++i) {
+                if (is_empty(i)) {
+                    ids.push_back(i);
+                    status_[i] = IVFSlotMetadata::Valid; // Mark as valid when reusing
+                }
+            }
+        }
+
+        // Allocate new slots as needed
+        size_t current_size = status_.size();
+        while (ids.size() < count) {
+            ids.push_back(current_size++);
+        }
+
+        // Resize metadata if we added new slots
+        if (current_size > status_.size()) {
+            status_.resize(current_size, IVFSlotMetadata::Valid);
+            id_to_cluster_.resize(current_size);
+            id_in_cluster_.resize(current_size);
+            first_empty_ = current_size;
+        }
+
+        return ids;
+    }
+
+    template <typename Points>
+    void insert_into_clusters(
+        const Points& points,
+        const std::vector<size_t>& global_ids,
+        const std::vector<size_t>& assigned_clusters
+    ) {
+        for (size_t i = 0; i < points.size(); ++i) {
+            size_t global_id = global_ids[i];
+            size_t cluster_idx = assigned_clusters[i];
+
+            // Add to cluster
+            auto& cluster = clusters_[cluster_idx];
+
+            size_t pos = cluster.size();
+            cluster.resize(cluster.size() + 1);
+            cluster.data_.set_datum(pos, points.get_datum(i));
+            cluster.ids_.push_back(static_cast<Idx>(global_id));
+
+            // Update metadata
+            status_[global_id] = IVFSlotMetadata::Valid;
+            id_to_cluster_[global_id] = cluster_idx;
+            id_in_cluster_[global_id] = pos;
+        }
+    }
+
+    void rebuild_clusters_compact(const std::vector<size_t>& valid_indices) {
+        // Group valid indices by cluster
+        // cluster_valid_indices[cluster_idx] contains pairs of (new_id, old_id)
+        std::vector<std::vector<std::pair<size_t, size_t>>> cluster_valid_indices(
+            clusters_.size()
+        );
+
+        // Collect all mappings: (external_id, new_internal_id)
+        // NOTE: This must be done BEFORE we modify the translator
+        std::vector<Idx> external_ids;
+        std::vector<size_t> new_internal_ids;
+        external_ids.reserve(valid_indices.size());
+        new_internal_ids.reserve(valid_indices.size());
+
+        for (size_t new_id = 0; new_id < valid_indices.size(); ++new_id) {
+            size_t old_id = valid_indices[new_id];
+            size_t cluster_idx = id_to_cluster_[old_id];
+            cluster_valid_indices[cluster_idx].push_back({new_id, old_id});
+
+            // Save the external ID mapping for later
+            auto external_id = translator_.get_external(old_id);
+            external_ids.push_back(external_id);
+            new_internal_ids.push_back(new_id);
+        }
+
+        // Phase 1: Clear the translator completely
+        // This is simpler and safer than trying to selectively delete entries
+        translator_ = IDTranslator();
+
+        // Phase 2: Rebuild clusters and update metadata
+        for (size_t cluster_idx = 0; cluster_idx < clusters_.size(); ++cluster_idx) {
+            const auto& indices = cluster_valid_indices[cluster_idx];
+            if (indices.empty()) {
+                clusters_[cluster_idx].data_ =
+                    Data(0, clusters_[cluster_idx].data_.dimensions());
+                clusters_[cluster_idx].ids_.clear();
+                continue;
+            }
+
+            Data new_data(indices.size(), clusters_[cluster_idx].data_.dimensions());
+            std::vector<Idx> new_ids;
+            new_ids.reserve(indices.size());
+
+            for (size_t pos = 0; pos < indices.size(); ++pos) {
+                auto [new_global_id, old_global_id] = indices[pos];
+                size_t old_cluster = id_to_cluster_[old_global_id];
+                size_t old_pos = id_in_cluster_[old_global_id];
+
+                new_data.set_datum(pos, clusters_[old_cluster].data_.get_datum(old_pos));
+                new_ids.push_back(static_cast<Idx>(new_global_id));
+
+                // Update metadata
+                id_to_cluster_[new_global_id] = cluster_idx;
+                id_in_cluster_[new_global_id] = pos;
+            }
+
+            clusters_[cluster_idx].data_ = std::move(new_data);
+            clusters_[cluster_idx].ids_ = std::move(new_ids);
+        }
+
+        // Phase 3: Re-add all IDs to the translator with their new internal IDs
+        translator_.insert(external_ids, new_internal_ids, false);
+    }
+
+    template <typename Queries>
+    void search_single_query(
+        const Queries& queries,
+        size_t query_idx,
+        QueryResultView<size_t>& results,
+        auto& buffer_centroids,
+        auto& buffer_leaves,
+        const search_parameters_type& search_parameters,
+        size_t tid
+    ) {
+        // Find nearest centroids
+        auto query = queries.get_datum(query_idx);
+        search_centroids(
+            query,
+            distance_,
+            matmul_results_,
+            buffer_centroids,
+            tid,
+            centroids_norm_,
+            get_num_threads()
+        );
+
+        // Search within selected clusters
+        size_t n_probes = std::min(search_parameters.n_probes_, buffer_centroids.size());
+
+        for (size_t probe_idx = 0; probe_idx < n_probes; ++probe_idx) {
+            size_t cluster_idx = buffer_centroids[probe_idx].id();
+            search_cluster(query, cluster_idx, buffer_leaves[0]);
+        }
+
+        // Write results (translating to external IDs)
+        size_t num_neighbors = results.n_neighbors();
+        for (size_t i = 0; i < std::min(num_neighbors, buffer_leaves[0].size()); ++i) {
+            size_t internal_id = buffer_leaves[0][i].id();
+            float dist = buffer_leaves[0][i].distance();
+            size_t external_id = translate_internal_id(internal_id);
+
+            results.set(Neighbor<size_t>(external_id, dist), query_idx, i);
+        }
+
+        // Fill remaining slots with invalid neighbors if needed
+        for (size_t i = buffer_leaves[0].size(); i < num_neighbors; ++i) {
+            results.set(
+                Neighbor<size_t>(
+                    std::numeric_limits<size_t>::max(), std::numeric_limits<float>::max()
+                ),
+                query_idx,
+                i
+            );
+        }
+    }
+
+    template <typename Query>
+    void search_cluster(const Query& query, size_t cluster_idx, auto& buffer) {
+        const auto& cluster = clusters_[cluster_idx];
+
+        for (size_t pos = 0; pos < cluster.size(); ++pos) {
+            Idx global_id = cluster.ids_[pos];
+
+            // Skip empty entries
+            if (!is_valid(global_id)) {
+                continue;
+            }
+
+            auto datum = cluster.data_.get_datum(pos);
+            float dist = distance::compute(distance_, query, datum);
+            buffer.insert({global_id, dist});
+        }
+    }
+};
+
+} // namespace svs::index::ivf
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index ad82db1c3..5ce49b07a 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -207,6 +207,7 @@ if (SVS_EXPERIMENTAL_ENABLE_IVF)
         ${TEST_DIR}/svs/index/ivf/kmeans.cpp
         ${TEST_DIR}/svs/index/ivf/hierarchical_kmeans.cpp
         ${TEST_DIR}/svs/index/ivf/common.cpp
+        ${TEST_DIR}/svs/index/ivf/dynamic_ivf.cpp
     )
 endif()
 
diff --git a/tests/svs/index/ivf/dynamic_ivf.cpp b/tests/svs/index/ivf/dynamic_ivf.cpp
new file mode 100644
index 000000000..3f22c8997
--- /dev/null
+++ b/tests/svs/index/ivf/dynamic_ivf.cpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// svs
+#include "svs/index/ivf/dynamic_ivf.h"
+#include "svs/core/data.h"
+#include "svs/core/distance.h"
+#include "svs/core/query_result.h"
+#include "svs/core/recall.h"
+#include "svs/index/ivf/clustering.h"
+#include "svs/lib/preprocessor.h"
+#include "svs/lib/threads.h"
+#include "svs/lib/timing.h"
+#include "svs/misc/dynamic_helper.h"
+
+// tests
+#include "tests/utils/test_dataset.h"
+#include "tests/utils/utils.h"
+
+// catch
+#include "catch2/catch_test_macros.hpp"
+
+// stl
+#include <algorithm>
+#include <cmath>
+#include <concepts>
+#include <random>
+#include <sstream>
+
+using Idx = uint32_t;
+using Eltype = float;
+using QueryEltype = float;
+using Distance = svs::distance::DistanceL2;
+const size_t N = 128;
+
+const size_t NUM_NEIGHBORS = 10;
+const size_t NUM_CLUSTERS = 10;
+
+///
+/// Utility Methods
+///
+
+template <std::integral I> I div(I i, float fraction) {
+    return svs::lib::narrow<I>(std::floor(svs::lib::narrow<float>(i) * fraction));
+}
+
+template <typename... Args> std::string stringify(Args&&... args) {
+    std::ostringstream stream{};
+    ((stream << args), ...);
+    return stream.str();
+}
+
+///
+/// Main Loop.
+///
+
+template <typename MutableIndex, typename Queries>
+void do_check(
+    MutableIndex& index,
+    svs::misc::ReferenceDataset<Idx, Eltype, N, Distance>& reference,
+    const Queries& queries,
+    double operation_time,
+    std::string message
+) {
+    // Compute groundtruth
+    auto tic = svs::lib::now();
+    auto gt = reference.groundtruth();
+    CATCH_REQUIRE(gt.n_neighbors() == NUM_NEIGHBORS);
+    CATCH_REQUIRE(gt.n_queries() == queries.size());
+
+    double groundtruth_time = svs::lib::time_difference(tic);
+
+    // Run search
+    tic = svs::lib::now();
+    auto results = svs::QueryResult<size_t>(gt.n_queries(), NUM_NEIGHBORS);
+    auto search_parameters = svs::index::ivf::IVFSearchParameters(
+        NUM_CLUSTERS, // n_probes - search all clusters for accuracy
+        NUM_NEIGHBORS // k_reorder
+    );
+
+    index.search(
+        results.view(),
+        svs::data::ConstSimpleDataView<QueryEltype>{
+            queries.data(), queries.size(), queries.dimensions()},
+        search_parameters
+    );
+    double search_time = svs::lib::time_difference(tic);
+
+    // Extra ID checks
+    reference.check_ids(results);
+    reference.check_equal_ids(index);
+
+    // compute recall
+    double recall = svs::k_recall_at_n(gt, results, NUM_NEIGHBORS, NUM_NEIGHBORS);
+
+    std::cout << "[" << message << "] -- {"
+              << "operation: " << operation_time << ", groundtruth: " << groundtruth_time
+              << ", search: " << search_time << ", recall: " << recall << "}\n";
+}
+
+template <typename MutableIndex, typename Queries>
+void test_loop(
+    MutableIndex& index,
+    svs::misc::ReferenceDataset<Idx, Eltype, N, Distance>& reference,
+    const Queries& queries,
+    size_t num_points,
+    size_t consolidate_every,
+    size_t iterations
+) {
+    size_t consolidate_count = 0;
+    for (size_t i = 0; i < iterations; ++i) {
+        // Add Points
+        {
+            auto [points, time] = reference.add_points(index, num_points);
+            CATCH_REQUIRE(points <= num_points);
+            CATCH_REQUIRE(points > num_points - reference.bucket_size());
+            do_check(index, reference, queries, time, stringify("add ", points, " points"));
+        }
+
+        // Delete Points
+        {
+            auto [points, time] = reference.delete_points(index, num_points);
+            CATCH_REQUIRE(points <= num_points);
+            CATCH_REQUIRE(points > num_points - reference.bucket_size());
+            do_check(
+                index, reference, queries, time, stringify("delete ", points, " points")
+            );
+        }
+
+        // Maybe compact.
+        ++consolidate_count;
+        if (consolidate_count == consolidate_every) {
+            auto tic = svs::lib::now();
+            // Use a batchsize smaller than the whole dataset to ensure that the compaction
+            // algorithm correctly handles this case.
+            index.compact(reference.valid() / 10);
+            double diff = svs::lib::time_difference(tic);
+            do_check(index, reference, queries, diff, "compact");
+            consolidate_count = 0;
+        }
+    }
+}
+
+CATCH_TEST_CASE("Testing Dynamic IVF Index", "[dynamic_ivf]") {
+#if defined(NDEBUG)
+    const float initial_fraction = 0.25;
+    const float modify_fraction = 0.05;
+#else
+    const float initial_fraction = 0.05;
+    const float modify_fraction = 0.005;
+#endif
+    const size_t num_threads = 10;
+
+    // Load the base dataset and queries.
+    auto data = svs::data::SimpleData<Eltype, N>::load(test_dataset::data_svs_file());
+    auto num_points = data.size();
+    auto queries = test_dataset::queries();
+
+    auto reference = svs::misc::ReferenceDataset<Idx, Eltype, N, Distance>(
+        std::move(data),
+        Distance(),
+        num_threads,
+        div(num_points, 0.5 * modify_fraction),
+        NUM_NEIGHBORS,
+        queries,
+        0x12345678
+    );
+
+    auto num_indices_to_add = div(reference.size(), initial_fraction);
+
+    // Generate initial vectors and indices
+    std::vector<Idx> initial_indices{};
+    auto initial_data = svs::data::SimpleData<Eltype, N>(num_indices_to_add, N);
+    {
+        auto [vectors, indices] = reference.generate(num_indices_to_add);
+        auto num_points_added = indices.size();
+        CATCH_REQUIRE(vectors.size() == num_points_added);
+        CATCH_REQUIRE(num_points_added <= num_indices_to_add);
+        CATCH_REQUIRE(num_points_added > num_indices_to_add - reference.bucket_size());
+
+        initial_indices = indices;
+        if (vectors.size() != num_indices_to_add || indices.size() != num_indices_to_add) {
+            throw ANNEXCEPTION("Something went horribly wrong!");
+        }
+
+        for (size_t i = 0; i < num_indices_to_add; ++i) {
+            initial_data.set_datum(i, vectors.get_datum(i));
+        }
+    }
+
+    // Build IVF clustering
+    auto build_params = svs::index::ivf::IVFBuildParameters(
+        NUM_CLUSTERS,
+        /* max_iters */ 10,
+        /* is_hierarchical */ false
+    );
+
+    auto threadpool = svs::threads::SequentialThreadPool();
+    auto clustering = svs::index::ivf::build_clustering<Eltype>(
+        build_params,
+        svs::lib::Lazy([&initial_data]() { return initial_data; }),
+        Distance(),
+        threadpool,
+        /* train_only */ false
+    );
+
+    // Create dynamic clusters from the clustering result
+    using ClusterType =
+        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
+
+    std::vector<ClusterType> clusters;
+    for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
+        const auto& cluster_indices = clustering.cluster(c);
+        size_t cluster_size = cluster_indices.size();
+
+        ClusterType cluster;
+        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_size, N);
+        cluster.ids_.resize(cluster_size);
+
+        for (size_t i = 0; i < cluster_size; ++i) {
+            Idx global_id = cluster_indices[i];
+            cluster.data_.set_datum(i, initial_data.get_datum(global_id));
+            cluster.ids_[i] = global_id;
+        }
+
+        clusters.push_back(std::move(cluster));
+    }
+
+    // Create the dynamic IVF index
+    auto centroids = clustering.centroids();
+    auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
+    using IndexType = svs::index::ivf::DynamicIVFIndex<
+        decltype(centroids),
+        ClusterType,
+        Distance,
+        decltype(threadpool_for_index)>;
+
+    auto index = IndexType(
+        std::move(centroids),
+        std::move(clusters),
+        initial_indices,
+        Distance(),
+        std::move(threadpool_for_index),
+        1 // intra_query_threads
+    );
+
+    reference.configure_extra_checks(true);
+    CATCH_REQUIRE(reference.extra_checks_enabled());
+
+    test_loop(index, reference, queries, div(reference.size(), modify_fraction), 2, 6);
+}

From 8700c6d901954fc592ca447e9d56aba8b6dd9109 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Tue, 2 Dec 2025 16:14:07 -0800
Subject: [PATCH 06/23] Optimized search and add_points functions

---
 include/svs/index/ivf/common.h              |   7 +
 include/svs/index/ivf/dynamic_ivf.h         | 238 ++++++----
 include/svs/index/ivf/hierarchical_kmeans.h |  25 +-
 include/svs/index/ivf/kmeans.h              |  25 +-
 tests/svs/index/ivf/dynamic_ivf.cpp         | 487 ++++++++++++++++++++
 5 files changed, 671 insertions(+), 111 deletions(-)

diff --git a/include/svs/index/ivf/common.h b/include/svs/index/ivf/common.h
index 863729461..5bb89ebd1 100644
--- a/include/svs/index/ivf/common.h
+++ b/include/svs/index/ivf/common.h
@@ -47,6 +47,13 @@ namespace svs::index::ivf {
 // threshold for numerical stability in algorithms such as k-means clustering, where exact
 constexpr double EPSILON = 1.0 / 1024.0;
 
+/// Minimum training sample multiplier for clustering algorithms.
+/// When training data size is small relative to the number of clusters, we ensure
+/// at least (num_clusters * MIN_TRAINING_SAMPLE_MULTIPLIER) samples are used for
+/// training to maintain clustering quality. This prevents degenerate cases where
+/// training_fraction would produce insufficient samples.
+constexpr size_t MIN_TRAINING_SAMPLE_MULTIPLIER = 2;
+
 /// @brief Parameters controlling the IVF build/k-means algortihm.
 struct IVFBuildParameters {
   public:
diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index d45917107..a82016f7b 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -462,12 +462,35 @@ class DynamicIVFIndex {
 
     ///// Search /////
 
+    /// Translate internal IDs to external IDs in search results.
+    /// This method converts all IDs in the result view from internal (global) IDs
+    /// to external IDs using the ID map.
+    ///
+    /// @param ids Result indices to translate (2D array)
+    template <class Dims, class Base>
+        requires(std::tuple_size_v<Dims> == 2)
+    void translate_to_external(DenseArray<size_t, Dims, Base>& ids) {
+        threads::parallel_for(
+            inter_query_threadpool_,
+            threads::StaticPartition{getsize<0>(ids)},
+            [&](const auto is, auto /*tid*/) {
+                for (auto i : is) {
+                    for (size_t j = 0, jmax = getsize<1>(ids); j < jmax; ++j) {
+                        auto internal = lib::narrow_cast<Idx>(ids.at(i, j));
+                        ids.at(i, j) = translate_internal_id(internal);
+                    }
+                }
+            }
+        );
+    }
+
     /// @brief Perform similarity search
     ///
-    /// Search process:
-    /// 1. Find n_probe nearest centroids for each query
-    /// 2. Search within those clusters, skipping empty entries
-    /// 3. Return top-k neighbors
+    /// Search Process:
+    /// 1. Inter-query parallel: Distribute queries across primary threads
+    /// 2. For each query: Find n_probe nearest centroids
+    /// 3. Intra-query parallel: Explore identified clusters using inner threads
+    /// 4. Combine results from all explored clusters (skipping empty entries)
     ///
     /// @param results View for storing search results
     /// @param queries Query vectors
@@ -478,14 +501,14 @@ class DynamicIVFIndex {
         QueryResultView<size_t> results,
         const Queries& queries,
         const search_parameters_type& search_parameters,
-        const lib::DefaultPredicate& cancel = lib::Returns(lib::Const<false>())
+        const lib::DefaultPredicate& SVS_UNUSED(cancel) = lib::Returns(lib::Const<false>())
     ) {
         validate_query_batch_size(queries.size());
 
         size_t num_neighbors = results.n_neighbors();
         size_t buffer_leaves_size = search_parameters.k_reorder_ * num_neighbors;
 
-        // Phase 1: Compute distances to centroids
+        // Phase 1: Inter-query parallel - Compute distances to centroids
         compute_centroid_distances(
             queries, centroids_, matmul_results_, inter_query_threadpool_
         );
@@ -495,29 +518,35 @@ class DynamicIVFIndex {
             inter_query_threadpool_,
             threads::StaticPartition(queries.size()),
             [&](auto is, auto tid) {
-                // Check for cancellation
-                if (cancel()) {
-                    return;
-                }
-
-                // Create buffers for this thread
+                // Initialize search buffers
                 auto buffer_centroids = create_centroid_buffer(search_parameters.n_probes_);
                 auto buffer_leaves = create_leaf_buffers(buffer_leaves_size);
 
-                // Search for each query
-                for (auto query_idx : is) {
-                    search_single_query(
-                        queries,
-                        query_idx,
-                        results,
-                        buffer_centroids,
-                        buffer_leaves,
-                        search_parameters,
-                        tid
-                    );
-                }
+                // Prepare cluster search scratch space (distance copy)
+                auto scratch =
+                    extensions::per_thread_batch_search_setup(centroids_, distance_);
+
+                // Execute search with intra-query parallelism
+                // Note: We pass centroids_ as the data parameter (unused) and this as
+                // cluster
+                extensions::per_thread_batch_search(
+                    centroids_,
+                    *this,
+                    buffer_centroids,
+                    buffer_leaves,
+                    scratch,
+                    queries,
+                    results,
+                    threads::UnitRange{is},
+                    tid,
+                    search_centroids_closure(),
+                    search_leaves_closure()
+                );
             }
         );
+
+        // Convert internal IDs to external IDs
+        this->translate_to_external(results.indices());
     }
 
     ///// Saving /////
@@ -640,24 +669,48 @@ class DynamicIVFIndex {
         return indices;
     }
 
+    /// @brief Assign points to their nearest centroids using parallel processing
+    ///
+    /// Uses centroid_assignment with batching to handle matmul_results size constraints.
+    /// Processes points in batches for efficient parallel centroid assignment.
+    ///
+    /// @param points Dataset to assign to clusters
+    /// @param assignments Output vector for cluster assignments
     template <typename Points>
     void assign_to_clusters(const Points& points, std::vector<size_t>& assignments) {
-        // For each point, find nearest centroid
-        for (size_t i = 0; i < points.size(); ++i) {
-            auto point = points.get_datum(i);
-            float min_dist = std::numeric_limits<float>::max();
-            size_t best_cluster = 0;
-
-            for (size_t c = 0; c < centroids_.size(); ++c) {
-                auto centroid = centroids_.get_datum(c);
-                float dist = distance::compute(distance_, point, centroid);
-                if (dist < min_dist) {
-                    min_dist = dist;
-                    best_cluster = c;
-                }
-            }
-
-            assignments[i] = best_cluster;
+        size_t num_points = points.size();
+        size_t num_centroids = centroids_.size();
+
+        // Compute norms if using L2 distance
+        auto data_norm = maybe_compute_norms<Dist>(points, inter_query_threadpool_);
+
+        // Determine batch size based on matmul_results capacity
+        // matmul_results_ is sized for queries, reuse for point assignment
+        size_t batch_size = matmul_results_[0].size(); // Number of queries it can hold
+        size_t num_batches = lib::div_round_up(num_points, batch_size);
+
+        // Create a local matmul buffer for assignments (batch_size x num_centroids)
+        auto matmul_buffer = data::SimpleData<float>{batch_size, num_centroids};
+        auto timer = lib::Timer();
+
+        // Process points in batches
+        for (size_t batch = 0; batch < num_batches; ++batch) {
+            auto batch_range = threads::UnitRange{
+                batch * batch_size, std::min((batch + 1) * batch_size, num_points)};
+
+            // Use centroid_assignment to compute assignments for this batch
+            centroid_assignment(
+                const_cast<Points&>(points), // centroid_assignment expects non-const
+                data_norm,
+                batch_range,
+                distance_,
+                centroids_,
+                centroids_norm_,
+                assignments,
+                matmul_buffer,
+                inter_query_threadpool_,
+                timer
+            );
         }
     }
 
@@ -781,75 +834,70 @@ class DynamicIVFIndex {
         translator_.insert(external_ids, new_internal_ids, false);
     }
 
-    template <typename Queries>
-    void search_single_query(
-        const Queries& queries,
-        size_t query_idx,
-        QueryResultView<size_t>& results,
-        auto& buffer_centroids,
-        auto& buffer_leaves,
-        const search_parameters_type& search_parameters,
-        size_t tid
-    ) {
-        // Find nearest centroids
-        auto query = queries.get_datum(query_idx);
-        search_centroids(
-            query,
-            distance_,
-            matmul_results_,
-            buffer_centroids,
-            tid,
-            centroids_norm_,
-            get_num_threads()
-        );
-
-        // Search within selected clusters
-        size_t n_probes = std::min(search_parameters.n_probes_, buffer_centroids.size());
+    ///// Search Closures /////
 
-        for (size_t probe_idx = 0; probe_idx < n_probes; ++probe_idx) {
-            size_t cluster_idx = buffer_centroids[probe_idx].id();
-            search_cluster(query, cluster_idx, buffer_leaves[0]);
-        }
-
-        // Write results (translating to external IDs)
-        size_t num_neighbors = results.n_neighbors();
-        for (size_t i = 0; i < std::min(num_neighbors, buffer_leaves[0].size()); ++i) {
-            size_t internal_id = buffer_leaves[0][i].id();
-            float dist = buffer_leaves[0][i].distance();
-            size_t external_id = translate_internal_id(internal_id);
-
-            results.set(Neighbor<size_t>(external_id, dist), query_idx, i);
-        }
-
-        // Fill remaining slots with invalid neighbors if needed
-        for (size_t i = buffer_leaves[0].size(); i < num_neighbors; ++i) {
-            results.set(
-                Neighbor<size_t>(
-                    std::numeric_limits<size_t>::max(), std::numeric_limits<float>::max()
-                ),
+    /// @brief Create closure for searching centroids
+    auto search_centroids_closure() const {
+        return [this](const auto& query, auto& buffer_centroids, size_t query_idx) {
+            search_centroids(
+                query,
+                distance_,
+                matmul_results_,
+                buffer_centroids,
                 query_idx,
-                i
+                centroids_norm_,
+                get_num_threads()
             );
-        }
+        };
     }
 
-    template <typename Query>
-    void search_cluster(const Query& query, size_t cluster_idx, auto& buffer) {
-        const auto& cluster = clusters_[cluster_idx];
+    /// @brief Create closure for searching clusters/leaves
+    auto search_leaves_closure() {
+        return [this](
+                   const auto& query,
+                   auto& distance,
+                   const auto& buffer_centroids,
+                   auto& buffer_leaves,
+                   size_t tid
+               ) {
+            // Use the common search_leaves function
+            search_leaves(
+                query,
+                distance,
+                *this,
+                buffer_centroids,
+                buffer_leaves,
+                intra_query_threadpools_[tid]
+            );
+        };
+    }
 
-        for (size_t pos = 0; pos < cluster.size(); ++pos) {
-            Idx global_id = cluster.ids_[pos];
+  public:
+    /// @brief Cluster accessor interface for search_leaves
+    /// This method provides filtered access to cluster leaves, skipping empty entries
+    ///
+    /// Note: For DynamicIVFIndex, we pass the global_id as the local_id (3rd parameter)
+    /// because the common search_leaves function will combine it with cluster_id to get
+    /// the final ID. Our get_global_id() just returns the local_id unchanged.
+    template <typename Callback> void on_leaves(Callback&& f, size_t cluster_id) const {
+        const auto& cluster = clusters_[cluster_id];
+        for (size_t i = 0; i < cluster.size(); ++i) {
+            Idx global_id = cluster.ids_[i];
 
             // Skip empty entries
             if (!is_valid(global_id)) {
                 continue;
             }
 
-            auto datum = cluster.data_.get_datum(pos);
-            float dist = distance::compute(distance_, query, datum);
-            buffer.insert({global_id, dist});
+            auto datum = cluster.data_.get_datum(i);
+            // Pass global_id as the local_id (3rd param) since get_global_id returns it
+            // unchanged
+            f(datum, 0 /* unused gid */, global_id);
         }
     }
+
+    /// @brief Get global ID for a point (identity function for dynamic IVF)
+    size_t get_global_id(size_t /*cluster_id*/, size_t local_id) const { return local_id; }
 };
 
 } // namespace svs::index::ivf
diff --git a/include/svs/index/ivf/hierarchical_kmeans.h b/include/svs/index/ivf/hierarchical_kmeans.h
index 12abb960b..30670d9e7 100644
--- a/include/svs/index/ivf/hierarchical_kmeans.h
+++ b/include/svs/index/ivf/hierarchical_kmeans.h
@@ -91,15 +91,24 @@ auto hierarchical_kmeans_clustering_impl(
     svs::logging::debug(logger, "Level1 clusters: {}\n", num_level1_clusters);
 
     // Step 1: Create training set
-    size_t num_training_data =
-        lib::narrow<size_t>(std::ceil(data.size() * parameters.training_fraction_));
-    if (num_training_data < num_clusters || num_training_data > data.size()) {
+    // Use at least MIN_TRAINING_SAMPLE_MULTIPLIER times the number of clusters,
+    // or training_fraction of data, whichever is larger.
+    // This ensures we have enough training data even for small datasets
+    size_t min_training_data =
+        std::min(num_clusters * MIN_TRAINING_SAMPLE_MULTIPLIER, data.size());
+    size_t num_training_data = std::max(
+        min_training_data,
+        lib::narrow<size_t>(std::ceil(data.size() * parameters.training_fraction_))
+    );
+    // Ensure we don't exceed the data size
+    num_training_data = std::min(num_training_data, data.size());
+
+    if (num_training_data < num_clusters) {
         throw ANNEXCEPTION(
-            "Invalid number of training data: {}, num_clusters: {}, total data size: "
-            "{}\n",
-            num_training_data,
-            num_clusters,
-            data.size()
+            "Insufficient data for clustering: {} datapoints, {} clusters required. "
+            "Need at least as many datapoints as clusters.\n",
+            data.size(),
+            num_clusters
         );
     }
     auto rng = std::mt19937(parameters.seed_);
diff --git a/include/svs/index/ivf/kmeans.h b/include/svs/index/ivf/kmeans.h
index 51df16357..ae20b061a 100644
--- a/include/svs/index/ivf/kmeans.h
+++ b/include/svs/index/ivf/kmeans.h
@@ -45,15 +45,24 @@ auto kmeans_clustering_impl(
     auto num_centroids = parameters.num_centroids_;
 
     // Step 1: Create training set
-    size_t num_training_data =
-        lib::narrow<size_t>(std::ceil(data.size() * parameters.training_fraction_));
-    if (num_training_data < num_centroids || num_training_data > data.size()) {
+    // Use at least MIN_TRAINING_SAMPLE_MULTIPLIER times the number of centroids,
+    // or training_fraction of data, whichever is larger.
+    // This ensures we have enough training data even for small datasets
+    size_t min_training_data =
+        std::min(num_centroids * MIN_TRAINING_SAMPLE_MULTIPLIER, data.size());
+    size_t num_training_data = std::max(
+        min_training_data,
+        lib::narrow<size_t>(std::ceil(data.size() * parameters.training_fraction_))
+    );
+    // Ensure we don't exceed the data size
+    num_training_data = std::min(num_training_data, data.size());
+
+    if (num_training_data < num_centroids) {
         throw ANNEXCEPTION(
-            "Invalid number of training data: {}, num_centroids: {}, total data size: "
-            "{}\n",
-            num_training_data,
-            num_centroids,
-            data.size()
+            "Insufficient data for clustering: {} datapoints, {} centroids required. "
+            "Need at least as many datapoints as centroids.\n",
+            data.size(),
+            num_centroids
         );
     }
     auto rng = std::mt19937(parameters.seed_);
diff --git a/tests/svs/index/ivf/dynamic_ivf.cpp b/tests/svs/index/ivf/dynamic_ivf.cpp
index 3f22c8997..fc4a4192f 100644
--- a/tests/svs/index/ivf/dynamic_ivf.cpp
+++ b/tests/svs/index/ivf/dynamic_ivf.cpp
@@ -262,3 +262,490 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index", "[dynamic_ivf]") {
 
     test_loop(index, reference, queries, div(reference.size(), modify_fraction), 2, 6);
 }
+
+CATCH_TEST_CASE("Dynamic IVF - Edge Cases", "[dynamic_ivf]") {
+    const size_t num_threads = 4;
+    const size_t num_points = 100;
+
+    // Create a small dataset
+    auto data = svs::data::SimpleData<Eltype, N>(num_points, N);
+    std::mt19937 rng(42);
+    std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+    for (size_t i = 0; i < num_points; ++i) {
+        std::vector<float> vec(N);
+        for (size_t j = 0; j < N; ++j) {
+            vec[j] = dist(rng);
+        }
+        data.set_datum(i, vec);
+    }
+
+    // Build clustering with more clusters than points to test empty clusters
+    // With the fix, this should now work by using all 100 datapoints for training
+    auto build_params = svs::index::ivf::IVFBuildParameters(
+        50,   // More clusters than 10% of data (which would be 10 points)
+        10,   // max_iters
+        false // is_hierarchical
+    );
+
+    auto threadpool = svs::threads::SequentialThreadPool();
+    auto clustering = svs::index::ivf::build_clustering<Eltype>(
+        build_params,
+        svs::lib::Lazy([&data]() { return data; }),
+        Distance(),
+        threadpool,
+        false
+    );
+
+    using ClusterType =
+        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
+
+    std::vector<ClusterType> clusters;
+    std::vector<Idx> initial_indices;
+
+    for (size_t c = 0; c < 50; ++c) {
+        const auto& cluster_indices = clustering.cluster(c);
+        size_t cluster_size = cluster_indices.size();
+
+        ClusterType cluster;
+        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_size, N);
+        cluster.ids_.resize(cluster_size);
+
+        for (size_t i = 0; i < cluster_size; ++i) {
+            Idx global_id = cluster_indices[i];
+            cluster.data_.set_datum(i, data.get_datum(global_id));
+            cluster.ids_[i] = global_id;
+            initial_indices.push_back(global_id);
+        }
+
+        clusters.push_back(std::move(cluster));
+    }
+
+    auto centroids = clustering.centroids();
+    auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
+    using IndexType = svs::index::ivf::DynamicIVFIndex<
+        decltype(centroids),
+        ClusterType,
+        Distance,
+        decltype(threadpool_for_index)>;
+
+    auto index = IndexType(
+        std::move(centroids),
+        std::move(clusters),
+        initial_indices,
+        Distance(),
+        std::move(threadpool_for_index),
+        1
+    );
+
+    // Test 1: Search with sparse/empty clusters (should not crash)
+    auto query = svs::data::SimpleData<QueryEltype, N>(1, N);
+    std::vector<float> query_vec(N);
+    for (size_t j = 0; j < N; ++j) {
+        query_vec[j] = dist(rng);
+    }
+    query.set_datum(0, query_vec);
+
+    auto results = svs::QueryResult<size_t>(1, NUM_NEIGHBORS);
+    auto search_params = svs::index::ivf::IVFSearchParameters(50, NUM_NEIGHBORS);
+
+    index.search(
+        results.view(),
+        svs::data::ConstSimpleDataView<QueryEltype>{query.data(), 1, N},
+        search_params
+    );
+
+    // Verify results are valid (not all max values)
+    bool found_valid = false;
+    for (size_t i = 0; i < NUM_NEIGHBORS; ++i) {
+        if (results.index(0, i) != std::numeric_limits<size_t>::max()) {
+            found_valid = true;
+            break;
+        }
+    }
+    CATCH_REQUIRE(found_valid);
+
+    // Test 2: Delete and compact
+    std::vector<Idx> to_delete;
+    for (size_t i = 0; i < 20 && i < initial_indices.size(); ++i) {
+        to_delete.push_back(initial_indices[i]);
+    }
+
+    index.delete_entries(to_delete);
+
+    index.compact(10);
+
+    // Search after compaction
+    index.search(
+        results.view(),
+        svs::data::ConstSimpleDataView<QueryEltype>{query.data(), 1, N},
+        search_params
+    );
+
+    CATCH_REQUIRE(results.index(0, 0) != std::numeric_limits<size_t>::max());
+}
+
+CATCH_TEST_CASE("Dynamic IVF - Search Parameters Variations", "[dynamic_ivf]") {
+    const size_t num_threads = 4;
+    auto data = svs::data::SimpleData<Eltype, N>::load(test_dataset::data_svs_file());
+    auto queries = test_dataset::queries();
+
+    // Build with standard parameters
+    auto build_params = svs::index::ivf::IVFBuildParameters(NUM_CLUSTERS, 10, false);
+    auto threadpool = svs::threads::SequentialThreadPool();
+    auto clustering = svs::index::ivf::build_clustering<Eltype>(
+        build_params,
+        svs::lib::Lazy([&data]() { return data; }),
+        Distance(),
+        threadpool,
+        false
+    );
+
+    using ClusterType =
+        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
+
+    std::vector<ClusterType> clusters;
+    std::vector<Idx> indices;
+
+    for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
+        const auto& cluster_indices = clustering.cluster(c);
+        ClusterType cluster;
+        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_indices.size(), N);
+        cluster.ids_.resize(cluster_indices.size());
+
+        for (size_t i = 0; i < cluster_indices.size(); ++i) {
+            Idx global_id = cluster_indices[i];
+            cluster.data_.set_datum(i, data.get_datum(global_id));
+            cluster.ids_[i] = global_id;
+            indices.push_back(global_id);
+        }
+        clusters.push_back(std::move(cluster));
+    }
+
+    auto centroids = clustering.centroids();
+    auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
+    using IndexType = svs::index::ivf::DynamicIVFIndex<
+        decltype(centroids),
+        ClusterType,
+        Distance,
+        decltype(threadpool_for_index)>;
+
+    auto index = IndexType(
+        std::move(centroids),
+        std::move(clusters),
+        indices,
+        Distance(),
+        std::move(threadpool_for_index),
+        1
+    );
+
+    auto results = svs::QueryResult<size_t>(queries.size(), NUM_NEIGHBORS);
+
+    // Test with different n_probes values
+    std::vector<size_t> probe_counts = {1, 3, 5, NUM_CLUSTERS};
+    std::vector<double> recalls;
+
+    for (auto n_probes : probe_counts) {
+        auto params = svs::index::ivf::IVFSearchParameters(n_probes, NUM_NEIGHBORS);
+        index.search(
+            results.view(),
+            svs::data::ConstSimpleDataView<QueryEltype>{
+                queries.data(), queries.size(), queries.dimensions()},
+            params
+        );
+
+        // Verify all results are valid
+        for (size_t i = 0; i < queries.size(); ++i) {
+            for (size_t j = 0; j < NUM_NEIGHBORS; ++j) {
+                auto idx = results.index(i, j);
+                CATCH_REQUIRE(
+                    (idx < data.size() || idx == std::numeric_limits<size_t>::max())
+                );
+            }
+        }
+    }
+}
+
+CATCH_TEST_CASE("Dynamic IVF - Threading Configurations", "[dynamic_ivf]") {
+    auto data = svs::data::SimpleData<Eltype, N>::load(test_dataset::data_svs_file());
+    auto queries = test_dataset::queries();
+
+    auto build_params = svs::index::ivf::IVFBuildParameters(NUM_CLUSTERS, 10, false);
+    auto threadpool = svs::threads::SequentialThreadPool();
+    auto clustering = svs::index::ivf::build_clustering<Eltype>(
+        build_params,
+        svs::lib::Lazy([&data]() { return data; }),
+        Distance(),
+        threadpool,
+        false
+    );
+
+    using ClusterType =
+        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
+
+    // Test with different thread configurations
+    std::vector<size_t> thread_configs = {1, 2, 4, 8};
+    std::vector<size_t> intra_query_configs = {1, 2};
+
+    for (auto num_threads : thread_configs) {
+        for (auto intra_threads : intra_query_configs) {
+            std::vector<ClusterType> clusters;
+            std::vector<Idx> indices;
+
+            for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
+                const auto& cluster_indices = clustering.cluster(c);
+                ClusterType cluster;
+                cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_indices.size(), N);
+                cluster.ids_.resize(cluster_indices.size());
+
+                for (size_t i = 0; i < cluster_indices.size(); ++i) {
+                    Idx global_id = cluster_indices[i];
+                    cluster.data_.set_datum(i, data.get_datum(global_id));
+                    cluster.ids_[i] = global_id;
+                    indices.push_back(global_id);
+                }
+                clusters.push_back(std::move(cluster));
+            }
+
+            auto centroids_copy = clustering.centroids();
+            auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
+            using IndexType = svs::index::ivf::DynamicIVFIndex<
+                decltype(centroids_copy),
+                ClusterType,
+                Distance,
+                decltype(threadpool_for_index)>;
+
+            auto index = IndexType(
+                std::move(centroids_copy),
+                std::move(clusters),
+                indices,
+                Distance(),
+                std::move(threadpool_for_index),
+                intra_threads
+            );
+
+            auto results = svs::QueryResult<size_t>(queries.size(), NUM_NEIGHBORS);
+            auto params = svs::index::ivf::IVFSearchParameters(NUM_CLUSTERS, NUM_NEIGHBORS);
+
+            index.search(
+                results.view(),
+                svs::data::ConstSimpleDataView<QueryEltype>{
+                    queries.data(), queries.size(), queries.dimensions()},
+                params
+            );
+
+            // Verify results are consistent
+            for (size_t i = 0; i < queries.size(); ++i) {
+                for (size_t j = 0; j < NUM_NEIGHBORS; ++j) {
+                    auto idx = results.index(i, j);
+                    CATCH_REQUIRE(
+                        (idx < data.size() || idx == std::numeric_limits<size_t>::max())
+                    );
+                }
+            }
+        }
+    }
+}
+
+CATCH_TEST_CASE("Dynamic IVF - Add/Delete Stress Test", "[dynamic_ivf]") {
+    const size_t num_threads = 4;
+    auto data = svs::data::SimpleData<Eltype, N>::load(test_dataset::data_svs_file());
+    auto queries = test_dataset::queries();
+
+    auto build_params = svs::index::ivf::IVFBuildParameters(NUM_CLUSTERS, 10, false);
+    auto threadpool = svs::threads::SequentialThreadPool();
+
+    // Start with half the data
+    size_t initial_size = data.size() / 2;
+    auto initial_data = svs::data::SimpleData<Eltype, N>(initial_size, N);
+    for (size_t i = 0; i < initial_size; ++i) {
+        initial_data.set_datum(i, data.get_datum(i));
+    }
+
+    auto clustering = svs::index::ivf::build_clustering<Eltype>(
+        build_params,
+        svs::lib::Lazy([&initial_data]() { return initial_data; }),
+        Distance(),
+        threadpool,
+        false
+    );
+
+    using ClusterType =
+        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
+
+    std::vector<ClusterType> clusters;
+    std::vector<Idx> indices;
+
+    for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
+        const auto& cluster_indices = clustering.cluster(c);
+        ClusterType cluster;
+        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_indices.size(), N);
+        cluster.ids_.resize(cluster_indices.size());
+
+        for (size_t i = 0; i < cluster_indices.size(); ++i) {
+            Idx global_id = cluster_indices[i];
+            cluster.data_.set_datum(i, initial_data.get_datum(global_id));
+            cluster.ids_[i] = global_id;
+            indices.push_back(global_id);
+        }
+        clusters.push_back(std::move(cluster));
+    }
+
+    auto centroids = clustering.centroids();
+    auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
+    using IndexType = svs::index::ivf::DynamicIVFIndex<
+        decltype(centroids),
+        ClusterType,
+        Distance,
+        decltype(threadpool_for_index)>;
+
+    auto index = IndexType(
+        std::move(centroids),
+        std::move(clusters),
+        indices,
+        Distance(),
+        std::move(threadpool_for_index),
+        1
+    );
+
+    auto results = svs::QueryResult<size_t>(queries.size(), NUM_NEIGHBORS);
+    auto params = svs::index::ivf::IVFSearchParameters(NUM_CLUSTERS, NUM_NEIGHBORS);
+
+    // Test: Rapid add/delete cycles
+    std::mt19937 rng(12345);
+    std::uniform_int_distribution<size_t> idx_dist(0, indices.size() - 1);
+
+    for (size_t cycle = 0; cycle < 5; ++cycle) {
+        // Delete random entries
+        std::vector<Idx> deleted;
+        for (size_t i = 0; i < 10 && i < indices.size(); ++i) {
+            size_t idx = idx_dist(rng) % indices.size();
+            deleted.push_back(indices[idx]);
+        }
+        if (!deleted.empty()) {
+            index.delete_entries(deleted);
+        }
+
+        // Search after deletion
+        index.search(
+            results.view(),
+            svs::data::ConstSimpleDataView<QueryEltype>{
+                queries.data(), queries.size(), queries.dimensions()},
+            params
+        );
+
+        // Verify deleted IDs don't appear in results
+        for (size_t q = 0; q < queries.size(); ++q) {
+            for (size_t k = 0; k < NUM_NEIGHBORS; ++k) {
+                auto result_id = results.index(q, k);
+                for (auto deleted_id : deleted) {
+                    CATCH_REQUIRE(result_id != deleted_id);
+                }
+            }
+        }
+
+        // Add new entries
+        std::vector<Idx> new_ids;
+        auto new_data = svs::data::SimpleData<Eltype, N>(10, N);
+        Idx new_base_id = 10000 + cycle * 100;
+        for (size_t i = 0; i < 10; ++i) {
+            new_ids.push_back(new_base_id + i);
+            new_data.set_datum(i, data.get_datum(i % data.size()));
+        }
+        index.add_points(new_data, new_ids, false);
+
+        // Search after addition
+        index.search(
+            results.view(),
+            svs::data::ConstSimpleDataView<QueryEltype>{
+                queries.data(), queries.size(), queries.dimensions()},
+            params
+        );
+
+        // All results should be valid
+        for (size_t q = 0; q < queries.size(); ++q) {
+            CATCH_REQUIRE(results.index(q, 0) != std::numeric_limits<size_t>::max());
+        }
+
+        // Compact periodically
+        if (cycle % 2 == 1) {
+            index.compact(50);
+        }
+    }
+}
+
+CATCH_TEST_CASE("Dynamic IVF - Single Query Search", "[dynamic_ivf]") {
+    const size_t num_threads = 2;
+    auto data = svs::data::SimpleData<Eltype, N>::load(test_dataset::data_svs_file());
+    auto queries = test_dataset::queries();
+
+    auto build_params = svs::index::ivf::IVFBuildParameters(NUM_CLUSTERS, 10, false);
+    auto threadpool = svs::threads::SequentialThreadPool();
+    auto clustering = svs::index::ivf::build_clustering<Eltype>(
+        build_params,
+        svs::lib::Lazy([&data]() { return data; }),
+        Distance(),
+        threadpool,
+        false
+    );
+
+    using ClusterType =
+        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
+
+    std::vector<ClusterType> clusters;
+    std::vector<Idx> indices;
+
+    for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
+        const auto& cluster_indices = clustering.cluster(c);
+        ClusterType cluster;
+        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_indices.size(), N);
+        cluster.ids_.resize(cluster_indices.size());
+
+        for (size_t i = 0; i < cluster_indices.size(); ++i) {
+            Idx global_id = cluster_indices[i];
+            cluster.data_.set_datum(i, data.get_datum(global_id));
+            cluster.ids_[i] = global_id;
+            indices.push_back(global_id);
+        }
+        clusters.push_back(std::move(cluster));
+    }
+
+    auto centroids = clustering.centroids();
+    auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
+    using IndexType = svs::index::ivf::DynamicIVFIndex<
+        decltype(centroids),
+        ClusterType,
+        Distance,
+        decltype(threadpool_for_index)>;
+
+    auto index = IndexType(
+        std::move(centroids),
+        std::move(clusters),
+        indices,
+        Distance(),
+        std::move(threadpool_for_index),
+        1
+    );
+
+    // Test single query search
+    auto single_query = svs::data::SimpleData<QueryEltype, N>(1, N);
+    single_query.set_datum(0, queries.get_datum(0));
+
+    auto results = svs::QueryResult<size_t>(1, NUM_NEIGHBORS);
+    auto params = svs::index::ivf::IVFSearchParameters(NUM_CLUSTERS, NUM_NEIGHBORS);
+
+    index.search(
+        results.view(),
+        svs::data::ConstSimpleDataView<QueryEltype>{single_query.data(), 1, N},
+        params
+    );
+
+    // Verify we got valid results
+    CATCH_REQUIRE(results.index(0, 0) != std::numeric_limits<size_t>::max());
+
+    // Verify distances are in ascending order
+    for (size_t k = 1; k < NUM_NEIGHBORS; ++k) {
+        if (results.index(0, k) != std::numeric_limits<size_t>::max()) {
+            CATCH_REQUIRE(results.distance(0, k) >= results.distance(0, k - 1));
+        }
+    }
+}

From fcc8d443258db1aaa068620987588d615890b743 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Fri, 5 Dec 2025 07:43:44 -0800
Subject: [PATCH 07/23] Clang tidy

---
 include/svs/index/ivf/common.h              | 29 ++++++++++++---------
 include/svs/index/ivf/dynamic_ivf.h         | 19 +++-----------
 include/svs/index/ivf/hierarchical_kmeans.h |  2 +-
 include/svs/index/ivf/index.h               |  6 +++--
 4 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/include/svs/index/ivf/common.h b/include/svs/index/ivf/common.h
index 5bb89ebd1..5751bfe34 100644
--- a/include/svs/index/ivf/common.h
+++ b/include/svs/index/ivf/common.h
@@ -257,12 +257,12 @@ void compute_matmul(
             m_int,         // const int M
             n_int,         // const int N
             k_int,         // const int K
-            1.0f,          // float alpha
+            1.0F,          // float alpha
             data,          // const float* A
             k_int,         // const int lda
             centroids,     // const float* B
             k_int,         // const int ldb
-            0.0f,          // const float beta
+            0.0F,          // const float beta
             results,       // float* c
             n_int          // const int ldc
         );
@@ -278,12 +278,12 @@ void compute_matmul(
             m_int,                      // const int M
             n_int,                      // const int N
             k_int,                      // const int K
-            1.0f,                       // float alpha
+            1.0F,                       // float alpha
             (const uint16_t*)data,      // const *uint16_t A
             k_int,                      // const int lda
             (const uint16_t*)centroids, // const uint16_t* B
             k_int,                      // const int ldb
-            0.0f,                       // const float beta
+            0.0F,                       // const float beta
             results,                    // float* c
             n_int                       // const int ldc
         );
@@ -299,12 +299,12 @@ void compute_matmul(
             m_int,                      // const int M
             n_int,                      // const int N
             k_int,                      // const int K
-            1.0f,                       // float alpha
+            1.0F,                       // float alpha
             (const uint16_t*)data,      // const *uint16_t A
             k_int,                      // const int lda
             (const uint16_t*)centroids, // const uint16_t* B
             k_int,                      // const int ldb
-            0.0f,                       // const float beta
+            0.0F,                       // const float beta
             results,                    // float* c
             n_int                       // const int ldc
         );
@@ -343,7 +343,7 @@ void normalize_centroids(
                 auto datum = centroids.get_datum(i);
                 float norm = distance::norm(datum);
                 if (norm != 0.0) {
-                    float norm_inv = 1.0 / norm;
+                    float norm_inv = 1.0F / norm;
                     for (size_t j = 0; j < datum.size(); j++) {
                         datum[j] = datum[j] * norm_inv;
                     }
@@ -425,7 +425,7 @@ void centroid_assignment(
                     auto dists = matmul_results.get_datum(i);
                     for (size_t j = 0; j < centroids.size(); j++) {
                         auto dist = data_norm[batch_range.start() + i] + centroids_norm[j] -
-                                    2 * dists[j];
+                                    (2 * dists[j]);
                         nearest = std::min(nearest, Neighbor<size_t>(j, dist));
                     }
                     assignments[batch_range.start() + i] = nearest.id();
@@ -513,7 +513,7 @@ void centroid_split(
                 if (counts.at(j) == 0) {
                     continue;
                 }
-                float p = counts.at(j) / float(num_data);
+                float p = static_cast<float>(counts.at(j)) / static_cast<float>(num_data);
                 float r = distribution(rng);
                 if (r < p) {
                     break;
@@ -695,8 +695,9 @@ data::SimpleData<BuildType, Data::extent, Alloc> make_training_set(
         threadpool,
         threads::StaticPartition{num_training},
         [&](auto indices, auto /*tid*/) {
-            for (auto i : indices)
+            for (auto i : indices) {
                 trainset.set_datum(i, data.get_datum(ids[i]));
+            }
         }
     );
     return trainset;
@@ -717,8 +718,9 @@ data::SimpleData<BuildType> init_centroids(
         threadpool,
         threads::StaticPartition{num_centroids},
         [&](auto indices, auto) {
-            for (auto i : indices)
+            for (auto i : indices) {
                 centroids.set_datum(i, trainset.get_datum(ids[i]));
+            }
         }
     );
     return centroids;
@@ -740,8 +742,9 @@ std::vector<std::vector<I>> group_assignments(
     const std::vector<size_t>& assignments, size_t num_clusters, const Data& data
 ) {
     std::vector<std::vector<I>> clusters(num_clusters);
-    for (auto i : data.eachindex())
+    for (auto i : data.eachindex()) {
         clusters[assignments[i]].push_back(i);
+    }
     return clusters;
 }
 
@@ -867,7 +870,7 @@ void search_centroids(
         for (size_t j = 0; j < num_threads; j++) {
             auto distance = matmul_results[j].get_datum(query_id);
             for (size_t k = 0; k < distance.size(); k++) {
-                float dist = query_norm + centroids_norm[count] - 2 * distance[k];
+                float dist = query_norm + centroids_norm[count] - (2 * distance[k]);
                 buffer.insert({count, dist});
                 count++;
             }
diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index a82016f7b..3ea00c7bf 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -17,24 +17,18 @@
 #pragma once
 
 // Include the IVF index
-#include "svs/index/ivf/clustering.h"
 #include "svs/index/ivf/index.h"
 
 // svs
 #include "svs/concepts/distance.h"
-#include "svs/core/data.h"
-#include "svs/core/loading.h"
 #include "svs/core/logging.h"
 #include "svs/core/query_result.h"
 #include "svs/core/translation.h"
-#include "svs/lib/boundscheck.h"
-#include "svs/lib/invoke.h"
 #include "svs/lib/misc.h"
 #include "svs/lib/threads.h"
 
 // stdlib
 #include <filesystem>
-#include <memory>
 #include <vector>
 
 namespace svs::index::ivf {
@@ -151,7 +145,7 @@ class DynamicIVFIndex {
     // Search infrastructure (same as static IVF)
     std::vector<data::SimpleData<float>> matmul_results_;
     std::vector<float> centroids_norm_;
-    search_parameters_type search_parameters_{};
+    search_parameters_type search_parameters_;
 
     // Logger
     svs::logging::logger_ptr logger_;
@@ -178,12 +172,8 @@ class DynamicIVFIndex {
     )
         : centroids_{std::move(centroids)}
         , clusters_{std::move(clusters)}
-        , status_()
-        , id_to_cluster_()
-        , id_in_cluster_()
         , first_empty_{0}
         , prefetch_offset_{8}
-        , translator_()
         , distance_{std::move(distance_function)}
         , inter_query_threadpool_{threads::as_threadpool(std::move(threadpool_proto))}
         , intra_query_thread_count_{intra_query_thread_count}
@@ -237,9 +227,6 @@ class DynamicIVFIndex {
     )
         : centroids_{std::move(centroids)}
         , clusters_{std::move(clusters)}
-        , status_()
-        , id_to_cluster_()
-        , id_in_cluster_()
         , first_empty_{0}
         , prefetch_offset_{8}
         , translator_{std::move(translator)}
@@ -506,7 +493,9 @@ class DynamicIVFIndex {
         validate_query_batch_size(queries.size());
 
         size_t num_neighbors = results.n_neighbors();
-        size_t buffer_leaves_size = search_parameters.k_reorder_ * num_neighbors;
+        size_t buffer_leaves_size = static_cast<size_t>(
+            search_parameters.k_reorder_ * static_cast<float>(num_neighbors)
+        );
 
         // Phase 1: Inter-query parallel - Compute distances to centroids
         compute_centroid_distances(
diff --git a/include/svs/index/ivf/hierarchical_kmeans.h b/include/svs/index/ivf/hierarchical_kmeans.h
index 30670d9e7..585be5a5e 100644
--- a/include/svs/index/ivf/hierarchical_kmeans.h
+++ b/include/svs/index/ivf/hierarchical_kmeans.h
@@ -85,7 +85,7 @@ auto hierarchical_kmeans_clustering_impl(
 
     size_t num_level1_clusters = parameters.hierarchical_level1_clusters_;
     if (num_level1_clusters == 0) {
-        num_level1_clusters = std::sqrt(num_clusters);
+        num_level1_clusters = static_cast<size_t>(std::sqrt(num_clusters));
     }
 
     svs::logging::debug(logger, "Level1 clusters: {}\n", num_level1_clusters);
diff --git a/include/svs/index/ivf/index.h b/include/svs/index/ivf/index.h
index 8e93f8d55..4d4c59901 100644
--- a/include/svs/index/ivf/index.h
+++ b/include/svs/index/ivf/index.h
@@ -227,7 +227,9 @@ class IVFIndex {
         validate_query_batch_size(queries.size());
 
         size_t num_neighbors = results.n_neighbors();
-        size_t buffer_leaves_size = search_parameters.k_reorder_ * num_neighbors;
+        size_t buffer_leaves_size = static_cast<size_t>(
+            search_parameters.k_reorder_ * static_cast<float>(num_neighbors)
+        );
 
         // Phase 1: Inter-query parallel - Compute distances to centroids
         compute_centroid_distances(
@@ -281,7 +283,7 @@ class IVFIndex {
     ///// Search Data /////
     std::vector<data::SimpleData<float>> matmul_results_;
     std::vector<float> centroids_norm_;
-    search_parameters_type search_parameters_{};
+    search_parameters_type search_parameters_;
 
     // SVS logger for per index logging
     svs::logging::logger_ptr logger_;

From 769e7ea98214b2fde6c48c0cda5641ec9b9b04c0 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Fri, 5 Dec 2025 11:56:17 -0800
Subject: [PATCH 08/23] Python bindings for dynamic IVF, first version

---
 bindings/python/CMakeLists.txt                |   1 +
 .../python/include/svs/python/dynamic_ivf.h   |  39 ++
 bindings/python/src/dynamic_ivf.cpp           | 431 ++++++++++++++++++
 bindings/python/src/python_bindings.cpp       |   2 +
 include/svs/index/ivf/dynamic_ivf.h           |  56 +++
 include/svs/orchestrators/dynamic_ivf.h       | 292 ++++++++++++
 6 files changed, 821 insertions(+)
 create mode 100644 bindings/python/include/svs/python/dynamic_ivf.h
 create mode 100644 bindings/python/src/dynamic_ivf.cpp
 create mode 100644 include/svs/orchestrators/dynamic_ivf.h

diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt
index 41aa381b0..618cd1b87 100644
--- a/bindings/python/CMakeLists.txt
+++ b/bindings/python/CMakeLists.txt
@@ -43,6 +43,7 @@ set(CPP_FILES
 # ivf
 if (SVS_EXPERIMENTAL_ENABLE_IVF)
     list(APPEND CPP_FILES
+        src/dynamic_ivf.cpp
         src/ivf.cpp
     )
 endif()
diff --git a/bindings/python/include/svs/python/dynamic_ivf.h b/bindings/python/include/svs/python/dynamic_ivf.h
new file mode 100644
index 000000000..d18a608c0
--- /dev/null
+++ b/bindings/python/include/svs/python/dynamic_ivf.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+// svs python bindings
+#include "svs/python/core.h"
+
+#include <pybind11/pybind11.h>
+
+namespace svs::python::dynamic_ivf {
+
+// Specializations
+template <typename F> void for_standard_specializations(F&& f) {
+#define X(Q, T, Dist, N) f.template operator()<Q, T, Dist, N>()
+    X(float, float, DistanceL2, Dynamic);
+    X(float, float, DistanceIP, Dynamic);
+    X(float, svs::Float16, DistanceL2, Dynamic);
+    X(float, svs::Float16, DistanceIP, Dynamic);
+    X(float, svs::BFloat16, DistanceL2, Dynamic);
+    X(float, svs::BFloat16, DistanceIP, Dynamic);
+#undef X
+}
+
+void wrap(pybind11::module& m);
+} // namespace svs::python::dynamic_ivf
diff --git a/bindings/python/src/dynamic_ivf.cpp b/bindings/python/src/dynamic_ivf.cpp
new file mode 100644
index 000000000..98c20f868
--- /dev/null
+++ b/bindings/python/src/dynamic_ivf.cpp
@@ -0,0 +1,431 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// svs python bindings
+#include "svs/python/dynamic_ivf.h"
+#include "svs/python/common.h"
+#include "svs/python/core.h"
+#include "svs/python/ivf.h"
+#include "svs/python/manager.h"
+
+// svs
+#include "svs/lib/dispatcher.h"
+#include "svs/orchestrators/dynamic_ivf.h"
+
+// pybind
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+// fmt
+#include <fmt/format.h>
+
+// stl
+#include <span>
+
+/////
+///// DynamicIVF
+/////
+
+namespace py = pybind11;
+namespace svs::python::dynamic_ivf {
+
+namespace {
+
+template <typename ElementType>
+svs::DynamicIVF build_from_array(
+    const svs::index::ivf::IVFBuildParameters& parameters,
+    py_contiguous_array_t<ElementType> py_data,
+    py_contiguous_array_t<size_t> py_ids,
+    svs::DistanceType distance_type,
+    size_t num_threads
+) {
+    auto dispatcher = svs::DistanceDispatcher(distance_type);
+    return dispatcher([&](auto distance) {
+        // Create a view for building - build_clustering needs immutable data
+        // Note: Even though we use SimpleDataView (non-const), the data won't be modified
+        // during clustering, and BlockedData created from it will have mutable element type
+        auto data_view = data::SimpleDataView<ElementType>(
+            const_cast<ElementType*>(py_data.data()), py_data.shape(0), py_data.shape(1)
+        );
+        return svs::DynamicIVF::build<ElementType>(
+            parameters,
+            data_view,
+            std::span(py_ids.data(), py_ids.size()),
+            distance,
+            num_threads
+        );
+    });
+}
+
+const char* BUILD_FROM_ARRAY_DOC = R"(
+Construct a DynamicIVF index over the given data, returning a searchable index.
+
+Args:
+    parameters: Parameters controlling IVF construction (clustering and search parameters).
+        See below for the documentation of this class.
+    data: The dataset to index. **NOTE**: SVS will maintain an internal copy of the
+        dataset. This may change in future releases.
+    ids: Vector of ids to assign to each row in the dataset; must match dataset length and contain unique values.
+    distance_type: The distance type to use for this dataset.
+    num_threads: Number of threads to use for index construction.
+)";
+
+template <typename ElementType>
+void add_build_specialization(py::class_<svs::DynamicIVF>& index) {
+    index.def_static(
+        "build",
+        &build_from_array<ElementType>,
+        py::arg("parameters"),
+        py::arg("data"),
+        py::arg("ids"),
+        py::arg("distance_type"),
+        py::arg("num_threads"),
+        BUILD_FROM_ARRAY_DOC
+    );
+}
+
+/////
+///// Build from file (data loader)
+/////
+
+template <typename Q, typename T, typename Dist, size_t N>
+svs::DynamicIVF dynamic_ivf_build_uncompressed(
+    const svs::index::ivf::IVFBuildParameters& parameters,
+    svs::VectorDataLoader<T, N, RebindAllocator<T>> data_loader,
+    std::span<const size_t> ids,
+    svs::DistanceType distance_type,
+    size_t num_threads
+) {
+    return svs::DynamicIVF::build<Q>(
+        parameters, std::move(data_loader), ids, distance_type, num_threads
+    );
+}
+
+using DynamicIVFBuildFromFileDispatcher = svs::lib::Dispatcher<
+    svs::DynamicIVF,
+    const svs::index::ivf::IVFBuildParameters&,
+    UnspecializedVectorDataLoader,
+    std::span<const size_t>,
+    svs::DistanceType,
+    size_t>;
+
+DynamicIVFBuildFromFileDispatcher dynamic_ivf_build_from_file_dispatcher() {
+    auto dispatcher = DynamicIVFBuildFromFileDispatcher{};
+    // Register uncompressed specializations (Dynamic dimensionality only)
+    for_standard_specializations([&]<typename Q, typename T, typename D, size_t N>() {
+        auto method = &dynamic_ivf_build_uncompressed<Q, T, D, N>;
+        dispatcher.register_target(svs::lib::dispatcher_build_docs, method);
+    });
+    return dispatcher;
+}
+
+svs::DynamicIVF dynamic_ivf_build_from_file(
+    const svs::index::ivf::IVFBuildParameters& parameters,
+    UnspecializedVectorDataLoader data_loader,
+    const py_contiguous_array_t<size_t>& py_ids,
+    svs::DistanceType distance_type,
+    size_t num_threads
+) {
+    auto ids = std::span<const size_t>(py_ids.data(), py_ids.size());
+    return dynamic_ivf_build_from_file_dispatcher().invoke(
+        parameters, std::move(data_loader), ids, distance_type, num_threads
+    );
+}
+
+constexpr std::string_view DYNAMIC_IVF_BUILD_FROM_FILE_DOCSTRING_PROTO = R"(
+Construct a DynamicIVF index using a data loader, returning the index.
+
+Args:
+    parameters: Build parameters controlling IVF construction (clustering and search parameters).
+    data_loader: Data loader (e.g., a VectorDataLoader instance).
+    ids: Vector of ids to assign to each row in the dataset; must match dataset length and contain unique values.
+    distance_type: The similarity function to use for this index.
+    num_threads: Number of threads to use for index construction. Default: 1.
+
+Specializations compiled into the binary are listed below.
+
+{}  # (Method listing auto-generated)
+)";
+
+template <typename ElementType>
+void add_points(
+    svs::DynamicIVF& index,
+    const py_contiguous_array_t<ElementType>& py_data,
+    const py_contiguous_array_t<size_t>& ids,
+    bool reuse_empty = false
+) {
+    if (py_data.ndim() != 2) {
+        throw ANNEXCEPTION("Expected points to have 2 dimensions!");
+    }
+    if (ids.ndim() != 1) {
+        throw ANNEXCEPTION("Expected ids to have 1 dimension!");
+    }
+    if (py_data.shape(0) != ids.shape(0)) {
+        throw ANNEXCEPTION(
+            "Expected IDs to be the same length as the number of rows in points!"
+        );
+    }
+    index.add_points(data_view(py_data), std::span(ids.data(), ids.size()), reuse_empty);
+}
+
+const char* ADD_POINTS_DOCSTRING = R"(
+Add every point in ``points`` to the index, assigning the element-wise corresponding ID to
+each point.
+
+Args:
+    points: A matrix of data whose rows, corresponding to points in R^n, will be added to
+        the index.
+    ids: Vector of ids to assign to each row in ``points``. Must have the same number of
+        elements as ``points`` has rows.
+    reuse_empty: A flag that determines whether to reuse empty entries that may exist after deletion and consolidation. When enabled,
+    scan from the beginning to find and fill these empty entries when adding new points.
+
+Furthermore, all entries in ``ids`` must be unique and not already exist in the index.
+If either of these does not hold, an exception will be thrown without mutating the
+underlying index.
+
+When ``delete_entries`` is called, a soft deletion is performed, marking the entries as ``deleted``.
+When ``consolidate`` is called, the state of these deleted entries becomes ``empty``.
+When ``add_points`` is called with the ``reuse_empty`` flag enabled, the memory is scanned from the beginning to locate and fill these empty entries with new points.
+)";
+
+template <typename ElementType>
+void add_points_specialization(py::class_<svs::DynamicIVF>& index) {
+    index.def(
+        "add",
+        &add_points<ElementType>,
+        py::arg("points"),
+        py::arg("ids"),
+        py::arg("reuse_empty") = false,
+        ADD_POINTS_DOCSTRING
+    );
+}
+
+///// Docstrings
+// Put docstrings here to hopefully make the implementation of `wrap` a bit less
+// cluttered.
+const char* COMPACT_DOCSTRING = R"(
+Remove any holes created in the data by renumbering internal IDs.
+Shrink the underlying data structures.
+This can potentially reduce the memory footprint of the index
+if a sufficient number of points were deleted.
+)";
+
+const char* DELETE_DOCSTRING = R"(
+Soft delete the IDs from the index. Soft deletion does not remove the IDs from the index,
+but prevents them from being returned from future searches.
+
+Args:
+    ids: The IDs to delete.
+
+Each element in IDs must be unique and must correspond to a valid ID stored in the index.
+Otherwise, an exception will be thrown. If an exception is thrown for this reason, the
+index will be left unchanged from before the function call.
+)";
+
+const char* ALL_IDS_DOCSTRING = R"(
+Return a Numpy vector of all IDs currently in the index.
+)";
+
+// Index saving.
+void save_index(
+    svs::DynamicIVF& index, const std::string& config_path, const std::string& data_dir
+) {
+    index.save(config_path, data_dir);
+}
+
+/////
+///// Assembly
+/////
+
+template <typename Q, typename T, typename Dist, size_t N>
+svs::DynamicIVF assemble_uncompressed(
+    svs::VectorDataLoader<float, N, RebindAllocator<float>> centroids_loader,
+    svs::VectorDataLoader<T, N, RebindAllocator<T>> datafile,
+    std::span<const size_t> ids,
+    Dist distance,
+    size_t num_threads
+) {
+    using DataAlloc = RebindAllocator<T>;
+
+    // Load centroids as SimpleData - they are immutable in IVF
+    auto centroids = svs::data::SimpleData<float, N>::load(centroids_loader.path_);
+
+    // Load data as BlockedData - it will grow/shrink with insertions/deletions
+    auto data = svs::data::BlockedData<T, N, DataAlloc>::load(
+        datafile.path_, as_blocked(DataAlloc(datafile.allocator_))
+    );
+
+    return svs::DynamicIVF::assemble<Q>(
+        std::move(centroids), std::move(data), ids, distance, num_threads
+    );
+}
+
+template <typename Dispatcher> void register_assembly(Dispatcher& dispatcher) {
+    for_standard_specializations([&]<typename Q, typename T, typename D, size_t N>() {
+        dispatcher.register_target(&assemble_uncompressed<Q, T, D, N>);
+    });
+}
+
+using DynamicIVFAssembleTypes = std::variant<UnspecializedVectorDataLoader>;
+
+svs::DynamicIVF assemble(
+    DynamicIVFAssembleTypes centroids_loader,
+    DynamicIVFAssembleTypes data_loader,
+    const py_contiguous_array_t<size_t>& py_ids,
+    svs::DistanceType distance_type,
+    svs::DataType SVS_UNUSED(query_type),
+    size_t num_threads
+) {
+    auto dispatcher = svs::lib::Dispatcher<
+        svs::DynamicIVF,
+        DynamicIVFAssembleTypes,
+        DynamicIVFAssembleTypes,
+        std::span<const size_t>,
+        svs::DistanceType,
+        size_t>();
+
+    register_assembly(dispatcher);
+    auto ids = std::span<const size_t>(py_ids.data(), py_ids.size());
+    return dispatcher.invoke(
+        std::move(centroids_loader), std::move(data_loader), ids, distance_type, num_threads
+    );
+}
+
+} // namespace
+
+void wrap(py::module& m) {
+    std::string name = "DynamicIVF";
+    py::class_<svs::DynamicIVF> ivf_index(
+        m, name.c_str(), "Top level class for the dynamic IVF index."
+    );
+
+    add_search_specialization<float>(ivf_index);
+    add_threading_interface(ivf_index);
+    add_data_interface(ivf_index);
+
+    // IVF specific extensions.
+    ivf::add_interface(ivf_index);
+
+    // Dynamic interface.
+    ivf_index.def(
+        "compact",
+        &svs::DynamicIVF::compact,
+        py::arg("batchsize") = 1'000'000,
+        COMPACT_DOCSTRING
+    );
+
+    // Reloading/Assembly
+    ivf_index.def(
+        py::init(&assemble),
+        py::arg("centroids_loader"),
+        py::arg("data_loader"),
+        py::arg("ids"),
+        py::arg("distance") = svs::L2,
+        py::arg("query_type") = svs::DataType::float32,
+        py::arg("num_threads") = 1
+    );
+
+    // Index building.
+    add_build_specialization<float>(ivf_index);
+
+    // Build from file / data loader (dynamic docstring)
+    {
+        auto dispatcher = dynamic_ivf_build_from_file_dispatcher();
+        std::string dynamic;
+        for (size_t i = 0; i < dispatcher.size(); ++i) {
+            fmt::format_to(
+                std::back_inserter(dynamic),
+                R"(Method {}:\n    - data_loader: {}\n    - distance: {}\n)",
+                i,
+                dispatcher.description(i, 1),
+                dispatcher.description(i, 3)
+            );
+        }
+        ivf_index.def_static(
+            "build",
+            &dynamic_ivf_build_from_file,
+            py::arg("parameters"),
+            py::arg("data_loader"),
+            py::arg("ids"),
+            py::arg("distance_type"),
+            py::arg("num_threads") = 1,
+            fmt::format(DYNAMIC_IVF_BUILD_FROM_FILE_DOCSTRING_PROTO, dynamic).c_str()
+        );
+    }
+
+    // Index modification.
+    add_points_specialization<float>(ivf_index);
+
+    // Note: DynamicIVFIndex doesn't support reconstruct_at, so we don't add reconstruct
+    // interface
+
+    // Index Deletion.
+    ivf_index.def(
+        "delete",
+        [](svs::DynamicIVF& index, const py_contiguous_array_t<size_t>& ids) {
+            return index.delete_entries(as_span(ids));
+        },
+        py::arg("ids"),
+        DELETE_DOCSTRING
+    );
+
+    // ID inspection
+    ivf_index.def(
+        "has_id",
+        &svs::DynamicIVF::has_id,
+        py::arg("id"),
+        "Return whether the ID exists in the index."
+    );
+
+    ivf_index.def(
+        "all_ids",
+        [](const svs::DynamicIVF& index) {
+            const auto& v = index.all_ids();
+            // Populate a numpy-set
+            auto npv = numpy_vector<size_t>(v.size());
+            std::copy(v.begin(), v.end(), npv.mutable_unchecked().mutable_data());
+            return npv;
+        },
+        ALL_IDS_DOCSTRING
+    );
+
+    // Saving
+    ivf_index.def(
+        "save",
+        &save_index,
+        py::arg("config_directory"),
+        py::arg("data_directory"),
+        R"(
+Save a constructed index to disk (useful following index construction).
+
+Args:
+    config_directory: Directory where index configuration information will be saved.
+    data_directory: Directory where the dataset will be saved.
+
+Note: All directories should be separate to avoid accidental name collision with any
+auxiliary files that are needed when saving the various components of the index.
+
+If the directory does not exist, it will be created if its parent exists.
+
+It is the caller's responsibility to ensure that no existing data will be
+overwritten when saving the index to this directory.
+    )"
+    );
+}
+
+} // namespace svs::python::dynamic_ivf
diff --git a/bindings/python/src/python_bindings.cpp b/bindings/python/src/python_bindings.cpp
index 67baf9049..e7c14bf6f 100644
--- a/bindings/python/src/python_bindings.cpp
+++ b/bindings/python/src/python_bindings.cpp
@@ -24,6 +24,7 @@
 
 SVS_VALIDATE_BOOL_ENV(SVS_ENABLE_IVF)
 #if SVS_ENABLE_IVF
+#include "svs/python/dynamic_ivf.h"
 #include "svs/python/ivf.h"
 #endif // SVS_ENABLE_IVF
 
@@ -255,5 +256,6 @@ Convert the `fvecs` file on disk with 32-bit floating point entries to a `fvecs`
     SVS_VALIDATE_BOOL_ENV(SVS_ENABLE_IVF)
 #if SVS_ENABLE_IVF
     svs::python::ivf::wrap(m);
+    svs::python::dynamic_ivf::wrap(m);
 #endif // SVS_ENABLE_IVF
 }
diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index 3ea00c7bf..fd7b2bd98 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -889,4 +889,60 @@ class DynamicIVFIndex {
     size_t get_global_id(size_t /*cluster_id*/, size_t local_id) const { return local_id; }
 };
 
+///
+/// @brief Build a DynamicIVFIndex from clustering and data
+///
+template <
+    typename Centroids,
+    data::ImmutableMemoryDataset SourceData,
+    typename Distance,
+    typename ThreadPoolProto>
+auto build_dynamic_ivf(
+    Centroids centroids,
+    const index::ivf::Clustering<Centroids, uint32_t>& clustering,
+    const SourceData& source_data,
+    std::span<const size_t> ids,
+    Distance distance,
+    ThreadPoolProto threadpool_proto
+) {
+    using I = uint32_t;
+    using ElementType = typename SourceData::element_type;
+    using BlockedDataType = data::
+        SimpleData<ElementType, Dynamic, data::Blocked<HugepageAllocator<ElementType>>>;
+    using Cluster = DynamicDenseCluster<BlockedDataType, I>;
+
+    auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
+
+    // Create dynamic clusters from the clustering
+    std::vector<Cluster> clusters;
+    clusters.reserve(clustering.size());
+
+    for (size_t cluster_idx = 0; cluster_idx < clustering.size(); ++cluster_idx) {
+        const auto& cluster_assignments = clustering.cluster(cluster_idx);
+        size_t cluster_size = cluster_assignments.size();
+
+        // Create BlockedData for this cluster
+        auto cluster_data = BlockedDataType(cluster_size, source_data.dimensions());
+        std::vector<I> cluster_ids;
+        cluster_ids.reserve(cluster_size);
+
+        for (size_t i = 0; i < cluster_size; ++i) {
+            I data_idx = cluster_assignments[i];
+            cluster_data.set_datum(i, source_data.get_datum(data_idx));
+            cluster_ids.push_back(ids[data_idx]);
+        }
+
+        clusters.emplace_back(std::move(cluster_data), std::move(cluster_ids));
+    }
+
+    // Create the index
+    return DynamicIVFIndex<Centroids, Cluster, Distance, decltype(threadpool)>(
+        std::move(centroids),
+        std::move(clusters),
+        ids,
+        std::move(distance),
+        std::move(threadpool)
+    );
+}
+
 } // namespace svs::index::ivf
diff --git a/include/svs/orchestrators/dynamic_ivf.h b/include/svs/orchestrators/dynamic_ivf.h
new file mode 100644
index 000000000..0bdc36257
--- /dev/null
+++ b/include/svs/orchestrators/dynamic_ivf.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "svs/index/ivf/dynamic_ivf.h"
+#include "svs/index/ivf/index.h"
+#include "svs/orchestrators/ivf.h"
+#include "svs/orchestrators/manager.h"
+
+namespace svs {
+
+///
+/// @brief Type-erased wrapper for DynamicIVF.
+///
+/// Implementation details: The DynamicIVF implementation implements a superset of the
+/// operations supported by the IVFInterface.
+///
+class DynamicIVFInterface : public IVFInterface {
+  public:
+    // TODO: For now - only accept floating point entries.
+    virtual void add_points(
+        const float* data,
+        size_t dim0,
+        size_t dim1,
+        std::span<const size_t> ids,
+        bool reuse_empty = false
+    ) = 0;
+
+    virtual size_t delete_entries(std::span<const size_t> ids) = 0;
+    virtual void compact(size_t batchsize = 1'000'000) = 0;
+
+    // ID inspection.
+    virtual bool has_id(size_t id) const = 0;
+    virtual void all_ids(std::vector<size_t>& ids) const = 0;
+
+    // Saving
+    virtual void save(
+        const std::filesystem::path& config_directory,
+        const std::filesystem::path& data_directory
+    ) = 0;
+};
+
+template <lib::TypeList QueryTypes, typename Impl>
+class DynamicIVFImpl : public IVFImpl<QueryTypes, Impl, DynamicIVFInterface> {
+  public:
+    using base_type = IVFImpl<QueryTypes, Impl, DynamicIVFInterface>;
+    using base_type::impl;
+
+    explicit DynamicIVFImpl(Impl impl)
+        : base_type{std::move(impl)} {}
+
+    template <typename... Args>
+    explicit DynamicIVFImpl(Args&&... args)
+        : base_type{std::forward<Args>(args)...} {}
+
+    // Implement the interface.
+    void add_points(
+        const float* data,
+        size_t dim0,
+        size_t dim1,
+        std::span<const size_t> ids,
+        bool reuse_empty = false
+    ) override {
+        auto points = data::ConstSimpleDataView<float>(data, dim0, dim1);
+        impl().add_points(points, ids, reuse_empty);
+    }
+
+    size_t delete_entries(std::span<const size_t> ids) override {
+        return impl().delete_entries(ids);
+    }
+
+    void compact(size_t batchsize) override { impl().compact(batchsize); }
+
+    // ID inspection.
+    bool has_id(size_t id) const override { return impl().has_id(id); }
+
+    void all_ids(std::vector<size_t>& ids) const override {
+        ids.clear();
+        impl().on_ids([&ids](size_t id) { ids.push_back(id); });
+    }
+
+    ///// Saving
+    void save(
+        const std::filesystem::path& config_directory,
+        const std::filesystem::path& data_directory
+    ) override {
+        impl().save(config_directory, data_directory);
+    }
+};
+
+// Forward Declarations.
+class DynamicIVF;
+
+template <lib::TypeList QueryTypes, typename... Args>
+DynamicIVF make_dynamic_ivf(Args&&... args);
+
+///
+/// DynamicIVF
+///
+class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
+  public:
+    using base_type = manager::IndexManager<DynamicIVFInterface>;
+    using IVFSearchParameters = index::ivf::IVFSearchParameters;
+
+    struct AssembleTag {};
+
+    ///
+    /// @brief Construct a new DynamicIVF instance.
+    ///
+    /// @param impl A pointer to a concrete implementation of the full
+    ///     DynamicIVFInterface.
+    ///
+    explicit DynamicIVF(std::unique_ptr<manager::ManagerInterface<DynamicIVFInterface>> impl
+    )
+        : base_type{std::move(impl)} {}
+
+    template <lib::TypeList QueryTypes, typename Impl>
+    explicit DynamicIVF(AssembleTag SVS_UNUSED(tag), QueryTypes SVS_UNUSED(type), Impl impl)
+        : base_type{std::make_unique<DynamicIVFImpl<QueryTypes, Impl>>(std::move(impl))} {}
+
+    // Mutable Interface.
+    DynamicIVF& compact(size_t batchsize = 1'000'000) {
+        impl_->compact(batchsize);
+        return *this;
+    }
+
+    DynamicIVF& add_points(
+        data::ConstSimpleDataView<float> points,
+        std::span<const size_t> ids,
+        bool reuse_empty = false
+    ) {
+        impl_->add_points(
+            points.data(), points.size(), points.dimensions(), ids, reuse_empty
+        );
+        return *this;
+    }
+
+    size_t delete_entries(std::span<const size_t> ids) {
+        return impl_->delete_entries(ids);
+    }
+
+    // Backend String
+    std::string experimental_backend_string() const {
+        return impl_->experimental_backend_string();
+    }
+
+    // ID Inspection
+
+    ///
+    /// @brief Return whether ``id`` is in the index.
+    ///
+    bool has_id(size_t id) const { return impl_->has_id(id); }
+
+    ///
+    /// @brief Return all ``ids`` currently in the index.
+    ///
+    /// Note: If the stored index is large, the returned container may result in a
+    /// significant memory allocation.
+    ///
+    /// If more precise handling is required, please work with the lower level C++ class
+    /// directly.
+    ///
+    std::vector<size_t> all_ids() const {
+        auto v = std::vector<size_t>();
+        impl_->all_ids(v);
+        return v;
+    }
+
+    void save(
+        const std::filesystem::path& config_directory,
+        const std::filesystem::path& data_directory
+    ) {
+        impl_->save(config_directory, data_directory);
+    }
+
+    // Building
+    template <
+        manager::QueryTypeDefinition QueryTypes,
+        typename DataProto,
+        typename Distance,
+        typename ThreadPoolProto>
+    static DynamicIVF build(
+        const index::ivf::IVFBuildParameters& build_parameters,
+        const DataProto& data_proto,
+        std::span<const size_t> ids,
+        Distance distance,
+        ThreadPoolProto threadpool_proto
+    ) {
+        // Handle DistanceType enum by dispatching to concrete distance types
+        if constexpr (std::is_same_v<std::decay_t<Distance>, DistanceType>) {
+            auto dispatcher = DistanceDispatcher(distance);
+            return dispatcher([&](auto distance_function) {
+                return build<QueryTypes>(
+                    build_parameters,
+                    data_proto,
+                    ids,
+                    distance_function,
+                    std::move(threadpool_proto)
+                );
+            });
+        } else {
+            auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
+
+            // Load the data (handles both loaders and views)
+            auto data = svs::detail::dispatch_load(data_proto, threadpool);
+
+            // Build clustering first
+            using BuildType = float; // Use float for building centroids
+            // Note: build_clustering takes threadpool by value, so we need a copy
+            auto clustering = [&]() {
+                auto threadpool_copy = threads::NativeThreadPool(threadpool.size());
+                return index::ivf::build_clustering<BuildType>(
+                    build_parameters, data, distance, std::move(threadpool_copy), false
+                );
+            }();
+
+            // Now build the dynamic IVF index from the clustering
+            auto impl = index::ivf::build_dynamic_ivf(
+                std::move(clustering.centroids_),
+                clustering,
+                data, // Pass by const reference - build_dynamic_ivf will create BlockedData
+                ids,
+                distance,
+                std::move(threadpool)
+            );
+
+            return DynamicIVF(
+                AssembleTag(), manager::as_typelist<QueryTypes>{}, std::move(impl)
+            );
+        }
+    }
+
+    // Assembly
+    template <
+        manager::QueryTypeDefinition QueryTypes,
+        typename Centroids,
+        typename Data,
+        typename Distance,
+        typename ThreadPoolProto>
+    static DynamicIVF assemble(
+        Centroids centroids,
+        Data data,
+        std::span<const size_t> ids,
+        Distance distance,
+        ThreadPoolProto threadpool_proto
+    ) {
+        using I = uint32_t;
+        using Cluster = index::ivf::DynamicDenseCluster<Data, I>;
+
+        auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
+
+        // For assembly, create empty clusters - user will add points later
+        std::vector<Cluster> clusters;
+        clusters.reserve(centroids.size());
+
+        for (size_t i = 0; i < centroids.size(); ++i) {
+            auto cluster_data = Data(0, data.dimensions());
+            std::vector<I> cluster_ids;
+            clusters.emplace_back(std::move(cluster_data), std::move(cluster_ids));
+        }
+
+        // Create the index with empty clusters
+        auto impl =
+            index::ivf::DynamicIVFIndex<Centroids, Cluster, Distance, decltype(threadpool)>(
+                std::move(centroids),
+                std::move(clusters),
+                ids,
+                std::move(distance),
+                std::move(threadpool)
+            );
+
+        return DynamicIVF(
+            AssembleTag(), manager::as_typelist<QueryTypes>{}, std::move(impl)
+        );
+    }
+};
+
+} // namespace svs

From aef3044e0fe2b75382397b7a16f4d2be757bbd4a Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Fri, 5 Dec 2025 16:00:38 -0800
Subject: [PATCH 09/23] Added get_distance and consolidate

---
 bindings/python/src/dynamic_ivf.cpp     |  39 ++++-
 include/svs/index/ivf/dynamic_ivf.h     |  41 ++++++
 include/svs/index/ivf/extensions.h      |  42 ++++++
 include/svs/orchestrators/dynamic_ivf.h |  46 ++++--
 tests/svs/index/ivf/dynamic_ivf.cpp     | 186 ++++++++++++++++++++++++
 5 files changed, 340 insertions(+), 14 deletions(-)

diff --git a/bindings/python/src/dynamic_ivf.cpp b/bindings/python/src/dynamic_ivf.cpp
index 98c20f868..81b8f04ac 100644
--- a/bindings/python/src/dynamic_ivf.cpp
+++ b/bindings/python/src/dynamic_ivf.cpp
@@ -198,7 +198,7 @@ Furthermore, all entries in ``ids`` must be unique and not already exist in the
 If either of these does not hold, an exception will be thrown without mutating the
 underlying index.
 
-When ``delete_entries`` is called, a soft deletion is performed, marking the entries as ``deleted``.
+When ``delete`` is called, a soft deletion is performed, marking the entries as ``deleted``.
 When ``consolidate`` is called, the state of these deleted entries becomes ``empty``.
 When ``add_points`` is called with the ``reuse_empty`` flag enabled, the memory is scanned from the beginning to locate and fill these empty entries with new points.
 )";
@@ -215,9 +215,12 @@ void add_points_specialization(py::class_<svs::DynamicIVF>& index) {
     );
 }
 
-///// Docstrings
-// Put docstrings here to hopefully make the implementation of `wrap` a bit less
-// cluttered.
+const char* CONSOLIDATE_DOCSTRING = R"(
+No-op method for compatibility with dynamic index interface.
+For the IVF index, deletion marks entries as Empty and they are excluded from searches.
+Empty slots can be reused when adding new points.
+)";
+
 const char* COMPACT_DOCSTRING = R"(
 Remove any holes created in the data by renumbering internal IDs.
 Shrink the underlying data structures.
@@ -322,6 +325,7 @@ void wrap(py::module& m) {
     ivf::add_interface(ivf_index);
 
     // Dynamic interface.
+    ivf_index.def("consolidate", &svs::DynamicIVF::consolidate, CONSOLIDATE_DOCSTRING);
     ivf_index.def(
         "compact",
         &svs::DynamicIVF::compact,
@@ -378,7 +382,7 @@ void wrap(py::module& m) {
     ivf_index.def(
         "delete",
         [](svs::DynamicIVF& index, const py_contiguous_array_t<size_t>& ids) {
-            return index.delete_entries(as_span(ids));
+            return index.delete_points(as_span(ids));
         },
         py::arg("ids"),
         DELETE_DOCSTRING
@@ -404,6 +408,31 @@ void wrap(py::module& m) {
         ALL_IDS_DOCSTRING
     );
 
+    // Distance calculation
+    ivf_index.def(
+        "get_distance",
+        [](const svs::DynamicIVF& index,
+           size_t id,
+           const py_contiguous_array_t<float>& query) {
+            return index.get_distance(id, as_span(query));
+        },
+        py::arg("id"),
+        py::arg("query"),
+        R"(
+        Compute the distance between a query vector and a vector in the index.
+
+        Args:
+            id: The external ID of the vector in the index.
+            query: The query vector as a numpy array.
+
+        Returns:
+            The distance between the query and the indexed vector.
+
+        Raises:
+            RuntimeError: If the ID doesn't exist or dimensions don't match.
+        )"
+    );
+
     // Saving
     ivf_index.def(
         "save",
diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index fd7b2bd98..3d8e2a2b8 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -337,6 +337,37 @@ class DynamicIVFIndex {
         return clusters_[cluster_idx].get_datum(pos);
     }
 
+    ///// Distance
+
+    /// @brief Compute the distance between an external vector and a vector in the index.
+    template <typename Query> double get_distance(size_t id, const Query& query) const {
+        // Check if id exists
+        if (!has_id(id)) {
+            throw ANNEXCEPTION("ID {} does not exist in the index!", id);
+        }
+
+        // Verify dimensions match
+        const size_t query_size = query.size();
+        const size_t index_vector_size = dimensions();
+        if (query_size != index_vector_size) {
+            throw ANNEXCEPTION(
+                "Incompatible dimensions. Query has {} while the index expects {}.",
+                query_size,
+                index_vector_size
+            );
+        }
+
+        // Translate external ID to internal ID and get cluster location
+        size_t internal_id = translate_external_id(id);
+        size_t cluster_idx = id_to_cluster_[internal_id];
+        size_t pos = id_in_cluster_[internal_id];
+
+        // Call extension for distance computation
+        return svs::index::ivf::extensions::get_distance_ext(
+            clusters_, distance_, cluster_idx, pos, query
+        );
+    }
+
     /// @brief Iterate over all external IDs
     template <typename F> void on_ids(F&& f) const {
         for (size_t i = 0; i < status_.size(); ++i) {
@@ -422,6 +453,16 @@ class DynamicIVFIndex {
 
     ///// Compaction /////
 
+    /// @brief Consolidate the data structure (no-op for IVF).
+    ///
+    /// In the IVF index implementation, deletion marks entries as Empty in metadata,
+    /// making them invalid for searches. These empty slots can be reused by add_points.
+    /// This method is a no-op for compatibility with the dynamic index interface.
+    ///
+    void consolidate() {
+        // No-op: Deleted entries are marked Empty and excluded from searches
+    }
+
     /// @brief Compact the data structure
     ///
     /// Compact removes all empty slots, rebuilding the index structure
diff --git a/include/svs/index/ivf/extensions.h b/include/svs/index/ivf/extensions.h
index 0ef83587b..c708c1af3 100644
--- a/include/svs/index/ivf/extensions.h
+++ b/include/svs/index/ivf/extensions.h
@@ -217,4 +217,46 @@ void svs_invoke(
     }
 }
 
+/////
+///// Distance Computation
+/////
+
+struct ComputeDistanceType {
+    template <typename Clusters, typename Distance, typename Query>
+    double operator()(
+        const Clusters& clusters,
+        const Distance& distance,
+        size_t cluster_idx,
+        size_t pos,
+        const Query& query
+    ) const {
+        return svs_invoke(*this, clusters, distance, cluster_idx, pos, query);
+    }
+};
+
+// CPO for distance computation
+inline constexpr ComputeDistanceType get_distance_ext{};
+
+template <typename Clusters, typename Distance, typename Query>
+double svs_invoke(
+    svs::tag_t<get_distance_ext>,
+    const Clusters& clusters,
+    const Distance& distance,
+    size_t cluster_idx,
+    size_t pos,
+    const Query& query
+) {
+    // Get distance function
+    auto dist_f = per_thread_batch_search_setup(clusters[cluster_idx].data_, distance);
+    svs::distance::maybe_fix_argument(dist_f, query);
+
+    // Get the vector from the cluster
+    auto indexed_span = clusters[cluster_idx].get_datum(pos);
+
+    // Compute the distance using the appropriate distance function
+    auto dist = svs::distance::compute(dist_f, query, indexed_span);
+
+    return static_cast<double>(dist);
+}
+
 } // namespace svs::index::ivf::extensions
diff --git a/include/svs/orchestrators/dynamic_ivf.h b/include/svs/orchestrators/dynamic_ivf.h
index 0bdc36257..d89096cf1 100644
--- a/include/svs/orchestrators/dynamic_ivf.h
+++ b/include/svs/orchestrators/dynamic_ivf.h
@@ -40,13 +40,17 @@ class DynamicIVFInterface : public IVFInterface {
         bool reuse_empty = false
     ) = 0;
 
-    virtual size_t delete_entries(std::span<const size_t> ids) = 0;
+    virtual size_t delete_points(std::span<const size_t> ids) = 0;
+    virtual void consolidate() = 0;
     virtual void compact(size_t batchsize = 1'000'000) = 0;
 
     // ID inspection.
     virtual bool has_id(size_t id) const = 0;
     virtual void all_ids(std::vector<size_t>& ids) const = 0;
 
+    // Distance calculation
+    virtual double get_distance(size_t id, const AnonymousArray<1>& query) const = 0;
+
     // Saving
     virtual void save(
         const std::filesystem::path& config_directory,
@@ -79,10 +83,12 @@ class DynamicIVFImpl : public IVFImpl<QueryTypes, Impl, DynamicIVFInterface> {
         impl().add_points(points, ids, reuse_empty);
     }
 
-    size_t delete_entries(std::span<const size_t> ids) override {
+    size_t delete_points(std::span<const size_t> ids) override {
         return impl().delete_entries(ids);
     }
 
+    void consolidate() override { impl().consolidate(); }
+
     void compact(size_t batchsize) override { impl().compact(batchsize); }
 
     // ID inspection.
@@ -93,6 +99,18 @@ class DynamicIVFImpl : public IVFImpl<QueryTypes, Impl, DynamicIVFInterface> {
         impl().on_ids([&ids](size_t id) { ids.push_back(id); });
     }
 
+    ///// Distance
+    double get_distance(size_t id, const AnonymousArray<1>& query) const override {
+        return svs::lib::match(
+            QueryTypes{},
+            query.type(),
+            [&]<typename T>(svs::lib::Type<T>) {
+                auto query_span = std::span<const T>(get<T>(query), query.size(0));
+                return impl().get_distance(id, query_span);
+            }
+        );
+    }
+
     ///// Saving
     void save(
         const std::filesystem::path& config_directory,
@@ -133,11 +151,6 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         : base_type{std::make_unique<DynamicIVFImpl<QueryTypes, Impl>>(std::move(impl))} {}
 
     // Mutable Interface.
-    DynamicIVF& compact(size_t batchsize = 1'000'000) {
-        impl_->compact(batchsize);
-        return *this;
-    }
-
     DynamicIVF& add_points(
         data::ConstSimpleDataView<float> points,
         std::span<const size_t> ids,
@@ -149,8 +162,16 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         return *this;
     }
 
-    size_t delete_entries(std::span<const size_t> ids) {
-        return impl_->delete_entries(ids);
+    size_t delete_points(std::span<const size_t> ids) { return impl_->delete_points(ids); }
+
+    DynamicIVF& consolidate() {
+        impl_->consolidate();
+        return *this;
+    }
+
+    DynamicIVF& compact(size_t batchsize = 1'000'000) {
+        impl_->compact(batchsize);
+        return *this;
     }
 
     // Backend String
@@ -187,6 +208,13 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         impl_->save(config_directory, data_directory);
     }
 
+    ///// Distance
+    template <typename Query> double get_distance(size_t id, const Query& query) const {
+        // Create AnonymousArray from the query
+        AnonymousArray<1> query_array{query.data(), query.size()};
+        return impl_->get_distance(id, query_array);
+    }
+
     // Building
     template <
         manager::QueryTypeDefinition QueryTypes,
diff --git a/tests/svs/index/ivf/dynamic_ivf.cpp b/tests/svs/index/ivf/dynamic_ivf.cpp
index fc4a4192f..0dece7a26 100644
--- a/tests/svs/index/ivf/dynamic_ivf.cpp
+++ b/tests/svs/index/ivf/dynamic_ivf.cpp
@@ -749,3 +749,189 @@ CATCH_TEST_CASE("Dynamic IVF - Single Query Search", "[dynamic_ivf]") {
         }
     }
 }
+
+CATCH_TEST_CASE("Dynamic IVF Get Distance", "[index][ivf][dynamic_ivf]") {
+    const size_t num_threads = 2;
+    const size_t num_points = 200;
+
+    // Create test dataset
+    auto data = svs::data::SimpleData<Eltype, N>(num_points, N);
+    std::mt19937 rng(42);
+    std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+    for (size_t i = 0; i < num_points; ++i) {
+        std::vector<float> vec(N);
+        for (size_t j = 0; j < N; ++j) {
+            vec[j] = dist(rng);
+        }
+        data.set_datum(i, vec);
+    }
+
+    // Create queries
+    const size_t num_queries = 20;
+    auto queries = svs::data::SimpleData<QueryEltype, N>(num_queries, N);
+    for (size_t i = 0; i < num_queries; ++i) {
+        std::vector<float> vec(N);
+        for (size_t j = 0; j < N; ++j) {
+            vec[j] = dist(rng);
+        }
+        queries.set_datum(i, vec);
+    }
+
+    // Build IVF clustering
+    auto build_params = svs::index::ivf::IVFBuildParameters(
+        NUM_CLUSTERS,
+        /* max_iters */ 10,
+        /* is_hierarchical */ false
+    );
+
+    auto threadpool = svs::threads::SequentialThreadPool();
+    auto clustering = svs::index::ivf::build_clustering<Eltype>(
+        build_params,
+        svs::lib::Lazy([&data]() { return data; }),
+        Distance(),
+        threadpool,
+        /* train_only */ false
+    );
+
+    // Create dynamic clusters from the clustering result
+    using ClusterType =
+        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
+
+    std::vector<ClusterType> clusters;
+    std::vector<Idx> initial_indices; // External IDs in order
+    size_t internal_id = 0;           // Sequential internal IDs
+
+    for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
+        const auto& cluster_indices = clustering.cluster(c);
+        size_t cluster_size = cluster_indices.size();
+
+        ClusterType cluster;
+        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_size, N);
+        cluster.ids_.resize(cluster_size);
+
+        for (size_t i = 0; i < cluster_size; ++i) {
+            Idx external_id = cluster_indices[i]; // Use clustering index as external ID
+            cluster.data_.set_datum(i, data.get_datum(external_id));
+            cluster.ids_[i] = internal_id;          // Sequential internal ID
+            initial_indices.push_back(external_id); // Map internal_id -> external_id
+            internal_id++;
+        }
+
+        clusters.push_back(std::move(cluster));
+    }
+
+    // Create the dynamic IVF index
+    auto centroids = clustering.centroids();
+    auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
+    using IndexType = svs::index::ivf::DynamicIVFIndex<
+        decltype(centroids),
+        ClusterType,
+        Distance,
+        decltype(threadpool_for_index)>;
+
+    auto index = IndexType(
+        std::move(centroids),
+        std::move(clusters),
+        initial_indices,
+        Distance(),
+        std::move(threadpool_for_index),
+        1 // intra_query_threads
+    );
+
+    // Test get_distance functionality using the standard tester
+    CATCH_SECTION("Get Distance Test") {
+        // Test with strict tolerance to verify correctness
+        constexpr double TOLERANCE = 1e-2; // 1% tolerance, same as flat index
+
+        // Test with a few different IDs
+        std::vector<size_t> test_ids = {0, 10, 50};
+        if (index.size() > 100) {
+            test_ids.push_back(100);
+        }
+
+        for (size_t test_id : test_ids) {
+            if (test_id >= index.size()) {
+                continue;
+            }
+
+            // Get a query vector
+            size_t query_id = std::min<size_t>(5, queries.size() - 1);
+            auto query = queries.get_datum(query_id);
+
+            // Get distance from index
+            double index_distance = index.get_distance(test_id, query);
+
+            // Compute expected distance from original data
+            // test_id is the external ID which maps to data[test_id]
+            auto datum = data.get_datum(test_id);
+            Distance dist_copy = Distance();
+            svs::distance::maybe_fix_argument(dist_copy, query);
+            double expected_distance = svs::distance::compute(dist_copy, query, datum);
+
+            // Verify the distance is correct
+            double relative_diff =
+                std::abs((index_distance - expected_distance) / expected_distance);
+            CATCH_REQUIRE(relative_diff < TOLERANCE);
+        }
+
+        // Test with out of bounds ID - should throw
+        CATCH_REQUIRE_THROWS_AS(
+            index.get_distance(index.size() + 1000, queries.get_datum(0)), svs::ANNException
+        );
+    }
+
+    // Test get_distance after adding and removing points
+    CATCH_SECTION("Get Distance After Modifications") {
+        // Test with strict tolerance to verify correctness
+        constexpr double TOLERANCE = 1e-2; // 1% tolerance, same as flat index
+
+        // Add some new points
+        std::vector<Idx> new_ids = {10000, 10001, 10002};
+
+        // Prepare data for batch insertion
+        auto new_data = svs::data::SimpleData<Eltype, N>(new_ids.size(), N);
+        for (size_t i = 0; i < new_ids.size(); ++i) {
+            new_data.set_datum(i, data.get_datum(i));
+        }
+
+        // Add points in batch
+        index.add_points(new_data, new_ids);
+
+        // Test get_distance for newly added points
+        for (size_t i = 0; i < new_ids.size(); ++i) {
+            size_t query_id = std::min<size_t>(7, queries.size() - 1);
+            auto query = queries.get_datum(query_id);
+
+            double index_distance = index.get_distance(new_ids[i], query);
+
+            // Compute expected distance from the original data we added
+            auto datum = data.get_datum(i);
+            Distance dist_copy = Distance();
+            svs::distance::maybe_fix_argument(dist_copy, query);
+            double expected_distance = svs::distance::compute(dist_copy, query, datum);
+
+            double relative_diff =
+                std::abs((index_distance - expected_distance) / expected_distance);
+            CATCH_REQUIRE(relative_diff < TOLERANCE);
+        }
+
+        // Delete a point
+        std::vector<Idx> ids_to_delete = {new_ids[0]};
+        index.delete_entries(ids_to_delete);
+
+        // Verify the deleted point throws exception
+        CATCH_REQUIRE_THROWS_AS(
+            index.get_distance(new_ids[0], queries.get_datum(0)), svs::ANNException
+        );
+
+        // Verify other points still work
+        for (size_t i = 1; i < new_ids.size(); ++i) {
+            size_t query_id = std::min<size_t>(8, queries.size() - 1);
+            auto query = queries.get_datum(query_id);
+
+            // Should not throw
+            double distance = index.get_distance(new_ids[i], query);
+            CATCH_REQUIRE(distance >= 0.0);
+        }
+    }
+}

From 8d24b99fca7bfc3466d2b3acf667f9edba02845b Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Fri, 5 Dec 2025 17:20:37 -0800
Subject: [PATCH 10/23] Add get_distance support in static ivf index

---
 bindings/python/include/svs/python/ivf.h | 22 +++++++
 bindings/python/tests/test_ivf.py        |  7 ++-
 include/svs/index/ivf/extensions.h       | 40 +++++++++++-
 include/svs/index/ivf/index.h            | 77 ++++++++++++++++++++++++
 include/svs/orchestrators/ivf.h          | 24 ++++++++
 tests/integration/ivf/index_search.cpp   | 51 ++++++++++++++++
 6 files changed, 217 insertions(+), 4 deletions(-)

diff --git a/bindings/python/include/svs/python/ivf.h b/bindings/python/include/svs/python/ivf.h
index 936c715bf..3fdbb53b3 100644
--- a/bindings/python/include/svs/python/ivf.h
+++ b/bindings/python/include/svs/python/ivf.h
@@ -82,6 +82,28 @@ template <typename Manager> void add_interface(pybind11::class_<Manager>& manage
 
             See also: `svs.IVFSearchParameters`.)"
     );
+
+    manager.def(
+        "get_distance",
+        [](const Manager& index, size_t id, const py_contiguous_array_t<float>& query) {
+            return index.get_distance(id, as_span(query));
+        },
+        pybind11::arg("id"),
+        pybind11::arg("query"),
+        R"(
+        Compute the distance between a query vector and a vector in the index.
+
+        Args:
+            id: The ID of the vector in the index.
+            query: The query vector as a numpy array.
+
+        Returns:
+            The distance between the query and the indexed vector.
+
+        Raises:
+            RuntimeError: If the ID doesn't exist or dimensions don't match.
+        )"
+    );
 }
 
 void wrap(pybind11::module& m);
diff --git a/bindings/python/tests/test_ivf.py b/bindings/python/tests/test_ivf.py
index b5bdf7b2c..08968e607 100644
--- a/bindings/python/tests/test_ivf.py
+++ b/bindings/python/tests/test_ivf.py
@@ -39,7 +39,8 @@
     test_number_of_clusters, \
     test_dimensions, \
     timed, \
-    get_test_set
+    get_test_set, \
+    test_get_distance
 
 from .dataset import UncompressedMatcher
 
@@ -161,6 +162,10 @@ def _test_basic_inner(
         self.assertEqual(queries.shape, (1000, 128))
         self.assertEqual(groundtruth.shape, (1000, 100))
 
+        # Test get_distance
+        data = svs.read_vecs(test_data_vecs)
+        test_get_distance(ivf, svs.DistanceType.L2, data)
+
         # Data interface
         self.assertEqual(ivf.size, test_number_of_clusters)
 
diff --git a/include/svs/index/ivf/extensions.h b/include/svs/index/ivf/extensions.h
index c708c1af3..e8e0688cb 100644
--- a/include/svs/index/ivf/extensions.h
+++ b/include/svs/index/ivf/extensions.h
@@ -237,7 +237,13 @@ struct ComputeDistanceType {
 // CPO for distance computation
 inline constexpr ComputeDistanceType get_distance_ext{};
 
+// Overload for container types with view_cluster/get_datum methods (e.g.,
+// DenseClusteredDataset)
 template <typename Clusters, typename Distance, typename Query>
+    requires requires(const Clusters& c, size_t i) {
+                 { c.view_cluster(i) };
+                 { c.get_datum(i, i) };
+             }
 double svs_invoke(
     svs::tag_t<get_distance_ext>,
     const Clusters& clusters,
@@ -247,11 +253,39 @@ double svs_invoke(
     const Query& query
 ) {
     // Get distance function
-    auto dist_f = per_thread_batch_search_setup(clusters[cluster_idx].data_, distance);
-    svs::distance::maybe_fix_argument(dist_f, query);
+    auto cluster_data = clusters.view_cluster(cluster_idx);
+    auto dist_f = per_thread_batch_search_setup(cluster_data, distance);
 
     // Get the vector from the cluster
-    auto indexed_span = clusters[cluster_idx].get_datum(pos);
+    auto indexed_span = clusters.get_datum(cluster_idx, pos);
+
+    // Compute the distance using the appropriate distance function
+    auto dist = svs::distance::compute(dist_f, query, indexed_span);
+
+    return static_cast<double>(dist);
+}
+
+// Overload for vector-like containers (e.g., std::vector<DynamicDenseCluster>)
+template <typename Clusters, typename Distance, typename Query>
+    requires requires(const Clusters& c, size_t i) {
+                 { c[i] };
+                 { c[i].data_ };
+                 { c[i].get_datum(i) };
+             }
+double svs_invoke(
+    svs::tag_t<get_distance_ext>,
+    const Clusters& clusters,
+    const Distance& distance,
+    size_t cluster_idx,
+    size_t pos,
+    const Query& query
+) {
+    // Get distance function
+    const auto& cluster = clusters[cluster_idx];
+    auto dist_f = per_thread_batch_search_setup(cluster.data_, distance);
+
+    // Get the vector from the cluster
+    auto indexed_span = cluster.get_datum(pos);
 
     // Compute the distance using the appropriate distance function
     auto dist = svs::distance::compute(dist_f, query, indexed_span);
diff --git a/include/svs/index/ivf/index.h b/include/svs/index/ivf/index.h
index 4d4c59901..221e44c73 100644
--- a/include/svs/index/ivf/index.h
+++ b/include/svs/index/ivf/index.h
@@ -164,6 +164,52 @@ class IVFIndex {
         search_parameters_ = search_parameters;
     }
 
+    ///// ID Mapping /////
+
+    /// @brief Check if an ID exists in the index
+    bool has_id(size_t id) const {
+        return id < id_to_cluster_.size() && id_to_cluster_[id] != SIZE_MAX;
+    }
+
+    ///// Distance Computation /////
+
+    /// @brief Compute the distance between a query vector and a vector in the index
+    template <typename Query> double get_distance(size_t id, const Query& query) const {
+        // Lazily initialize ID mapping on first call
+        if (id_to_cluster_.empty()) {
+            const_cast<IVFIndex*>(this)->initialize_id_mapping();
+        }
+
+        // Check if id exists
+        if (!has_id(id)) {
+            throw ANNEXCEPTION("ID {} does not exist in the index!", id);
+        }
+
+        // Verify dimensions match
+        const size_t query_size = query.size();
+        const size_t index_vector_size = dimensions();
+        if (query_size != index_vector_size) {
+            throw ANNEXCEPTION(
+                "Incompatible dimensions. Query has {} while the index expects {}.",
+                query_size,
+                index_vector_size
+            );
+        }
+
+        // Get cluster and position
+        size_t cluster_id = id_to_cluster_[id];
+        size_t pos = id_in_cluster_[id];
+
+        // Fix distance argument if needed (e.g., for cosine similarity)
+        auto distance_copy = distance_;
+        svs::distance::maybe_fix_argument(distance_copy, query);
+
+        // Call extension for distance computation
+        return svs::index::ivf::extensions::get_distance_ext(
+            cluster_, distance_copy, cluster_id, pos, query
+        );
+    }
+
     ///// Search Implementation /////
 
     /// @brief Search closure for centroid distance computation
@@ -274,6 +320,12 @@ class IVFIndex {
     Data cluster0_;
     Dist distance_;
 
+    ///// ID Mapping for get_distance /////
+    // Maps ID -> cluster_id
+    std::vector<size_t> id_to_cluster_{};
+    // Maps ID -> position within cluster
+    std::vector<size_t> id_in_cluster_{};
+
     ///// Threading Infrastructure /////
     InterQueryThreadPool inter_query_threadpool_; // Handles parallelism across queries
     const size_t intra_query_thread_count_;       // Number of threads per query processing
@@ -330,6 +382,31 @@ class IVFIndex {
         }
     }
 
+    void initialize_id_mapping() {
+        // Build ID-to-location mapping from cluster data
+        // Compute total size by summing all cluster sizes
+        size_t total_size = 0;
+        size_t num_clusters = centroids_.size();
+        for (size_t cluster_id = 0; cluster_id < num_clusters; ++cluster_id) {
+            total_size += cluster_.view_cluster(cluster_id).size();
+        }
+
+        // Initialize mapping vectors with sentinel value
+        id_to_cluster_.resize(total_size, SIZE_MAX);
+        id_in_cluster_.resize(total_size, SIZE_MAX);
+
+        // Populate mappings
+        for (size_t cluster_id = 0; cluster_id < num_clusters; ++cluster_id) {
+            auto cluster_view = cluster_.view_cluster(cluster_id);
+            size_t cluster_size = cluster_view.size();
+            for (size_t pos = 0; pos < cluster_size; ++pos) {
+                size_t id = cluster_.get_global_id(cluster_id, pos);
+                id_to_cluster_[id] = cluster_id;
+                id_in_cluster_[id] = pos;
+            }
+        }
+    }
+
     ///// Helper Methods /////
 
     void validate_query_batch_size(size_t query_size) const {
diff --git a/include/svs/orchestrators/ivf.h b/include/svs/orchestrators/ivf.h
index 7c035f11c..f0f86f84b 100644
--- a/include/svs/orchestrators/ivf.h
+++ b/include/svs/orchestrators/ivf.h
@@ -27,6 +27,9 @@ class IVFInterface {
 
     ///// Backend information interface
     virtual std::string experimental_backend_string() const = 0;
+
+    ///// Distance calculation
+    virtual double get_distance(size_t id, const AnonymousArray<1>& query) const = 0;
 };
 
 template <lib::TypeList QueryTypes, typename Impl, typename IFace = IVFInterface>
@@ -56,6 +59,19 @@ class IVFImpl : public manager::ManagerImpl<QueryTypes, Impl, IFace> {
     [[nodiscard]] std::string experimental_backend_string() const override {
         return std::string{typename_impl.begin(), typename_impl.end() - 1};
     }
+
+    ///// Distance Calculation
+    [[nodiscard]] double
+    get_distance(size_t id, const AnonymousArray<1>& query) const override {
+        return svs::lib::match(
+            QueryTypes{},
+            query.type(),
+            [&]<typename T>(svs::lib::Type<T>) {
+                auto query_span = std::span<const T>(get<T>(query), query.size(0));
+                return impl().get_distance(id, query_span);
+            }
+        );
+    }
 };
 
 /////
@@ -81,6 +97,14 @@ class IVF : public manager::IndexManager<IVFInterface> {
         return impl_->experimental_backend_string();
     }
 
+    ///// Distance Calculation
+    template <typename QueryType>
+    double get_distance(size_t id, const QueryType& query) const {
+        // Create AnonymousArray from the query
+        AnonymousArray<1> query_array{query.data(), query.size()};
+        return impl_->get_distance(id, query_array);
+    }
+
     ///// Assembling
     template <
         manager::QueryTypeDefinition QueryTypes,
diff --git a/tests/integration/ivf/index_search.cpp b/tests/integration/ivf/index_search.cpp
index cec5fcdad..7d3e7f53d 100644
--- a/tests/integration/ivf/index_search.cpp
+++ b/tests/integration/ivf/index_search.cpp
@@ -139,3 +139,54 @@ CATCH_TEST_CASE("IVF Search", "[integration][search][ivf]") {
     test_search(data_f16, dist_ip, queries, gt_ip);
     test_search(data_f16, dist_ip, queries, gt_ip, 2);
 }
+
+CATCH_TEST_CASE("IVF get_distance", "[integration][ivf][get_distance]") {
+    auto datafile = test_dataset::data_svs_file();
+    auto queries = test_dataset::queries();
+    auto dist_l2 = svs::distance::DistanceL2();
+
+    auto data = svs::data::SimpleData<float>::load(datafile);
+
+    size_t num_threads = 2;
+    auto index = svs::IVF::assemble_from_file<float, svs::BFloat16>(
+        test_dataset::clustering_directory(), data, dist_l2, num_threads, 1
+    );
+
+    // Test get_distance functionality with strict tolerance
+    constexpr double TOLERANCE = 1e-2; // 1% tolerance
+
+    // Test with a few different IDs
+    std::vector<size_t> test_ids = {0, 10, 50};
+    if (data.size() > 100) {
+        test_ids.push_back(100);
+    }
+
+    for (size_t test_id : test_ids) {
+        if (test_id >= data.size()) {
+            continue;
+        }
+
+        // Get a query vector
+        size_t query_id = std::min<size_t>(5, queries.size() - 1);
+        auto query = queries.get_datum(query_id);
+
+        // Get distance from index
+        double index_distance = index.get_distance(test_id, query);
+
+        // Compute expected distance from original data
+        auto datum = data.get_datum(test_id);
+        svs::distance::DistanceL2 dist_copy;
+        svs::distance::maybe_fix_argument(dist_copy, query);
+        double expected_distance = svs::distance::compute(dist_copy, query, datum);
+
+        // Verify the distance is correct
+        double relative_diff =
+            std::abs((index_distance - expected_distance) / expected_distance);
+        CATCH_REQUIRE(relative_diff < TOLERANCE);
+    }
+
+    // Test with out of bounds ID - should throw
+    CATCH_REQUIRE_THROWS_AS(
+        index.get_distance(data.size() + 1000, queries.get_datum(0)), svs::ANNException
+    );
+}

From 60434e628632e6b0b6c300afc32d43690b6623e4 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Tue, 9 Dec 2025 13:00:41 -0800
Subject: [PATCH 11/23] Entable intra_query_threads and separate clustering
 threads

---
 bindings/python/src/dynamic_ivf.cpp     | 50 ++++++++++++++++++++-----
 include/svs/index/ivf/dynamic_ivf.h     | 25 ++++++++++---
 include/svs/orchestrators/dynamic_ivf.h | 19 +++++++---
 3 files changed, 73 insertions(+), 21 deletions(-)

diff --git a/bindings/python/src/dynamic_ivf.cpp b/bindings/python/src/dynamic_ivf.cpp
index 81b8f04ac..d14d992ec 100644
--- a/bindings/python/src/dynamic_ivf.cpp
+++ b/bindings/python/src/dynamic_ivf.cpp
@@ -51,7 +51,9 @@ svs::DynamicIVF build_from_array(
     py_contiguous_array_t<ElementType> py_data,
     py_contiguous_array_t<size_t> py_ids,
     svs::DistanceType distance_type,
-    size_t num_threads
+    size_t num_index_threads,
+    size_t intra_query_threads,
+    size_t num_clustering_threads
 ) {
     auto dispatcher = svs::DistanceDispatcher(distance_type);
     return dispatcher([&](auto distance) {
@@ -66,7 +68,9 @@ svs::DynamicIVF build_from_array(
             data_view,
             std::span(py_ids.data(), py_ids.size()),
             distance,
-            num_threads
+            num_index_threads,
+            intra_query_threads,
+            num_clustering_threads
         );
     });
 }
@@ -81,7 +85,9 @@ Construct a DynamicIVF index over the given data, returning a searchable index.
         dataset. This may change in future releases.
     ids: Vector of ids to assign to each row in the dataset; must match dataset length and contain unique values.
     distance_type: The distance type to use for this dataset.
-    num_threads: Number of threads to use for index construction.
+    num_index_threads: Number of threads to use for index construction and search.
+    intra_query_threads: Number of threads to use for intra-query parallelism. Default: 1.
+    num_clustering_threads: Number of threads to use for clustering. Default: 0 (use num_index_threads).
 )";
 
 template <typename ElementType>
@@ -93,7 +99,9 @@ void add_build_specialization(py::class_<svs::DynamicIVF>& index) {
         py::arg("data"),
         py::arg("ids"),
         py::arg("distance_type"),
-        py::arg("num_threads"),
+        py::arg("num_index_threads"),
+        py::arg("intra_query_threads") = 1,
+        py::arg("num_clustering_threads") = 0,
         BUILD_FROM_ARRAY_DOC
     );
 }
@@ -108,10 +116,18 @@ svs::DynamicIVF dynamic_ivf_build_uncompressed(
     svs::VectorDataLoader<T, N, RebindAllocator<T>> data_loader,
     std::span<const size_t> ids,
     svs::DistanceType distance_type,
-    size_t num_threads
+    size_t num_index_threads,
+    size_t intra_query_threads,
+    size_t num_clustering_threads
 ) {
     return svs::DynamicIVF::build<Q>(
-        parameters, std::move(data_loader), ids, distance_type, num_threads
+        parameters,
+        std::move(data_loader),
+        ids,
+        distance_type,
+        num_index_threads,
+        intra_query_threads,
+        num_clustering_threads
     );
 }
 
@@ -121,6 +137,8 @@ using DynamicIVFBuildFromFileDispatcher = svs::lib::Dispatcher<
     UnspecializedVectorDataLoader,
     std::span<const size_t>,
     svs::DistanceType,
+    size_t,
+    size_t,
     size_t>;
 
 DynamicIVFBuildFromFileDispatcher dynamic_ivf_build_from_file_dispatcher() {
@@ -138,11 +156,19 @@ svs::DynamicIVF dynamic_ivf_build_from_file(
     UnspecializedVectorDataLoader data_loader,
     const py_contiguous_array_t<size_t>& py_ids,
     svs::DistanceType distance_type,
-    size_t num_threads
+    size_t num_index_threads,
+    size_t intra_query_threads,
+    size_t num_clustering_threads
 ) {
     auto ids = std::span<const size_t>(py_ids.data(), py_ids.size());
     return dynamic_ivf_build_from_file_dispatcher().invoke(
-        parameters, std::move(data_loader), ids, distance_type, num_threads
+        parameters,
+        std::move(data_loader),
+        ids,
+        distance_type,
+        num_index_threads,
+        intra_query_threads,
+        num_clustering_threads
     );
 }
 
@@ -154,7 +180,9 @@ Construct a DynamicIVF index using a data loader, returning the index.
     data_loader: Data loader (e.g., a VectorDataLoader instance).
     ids: Vector of ids to assign to each row in the dataset; must match dataset length and contain unique values.
     distance_type: The similarity function to use for this index.
-    num_threads: Number of threads to use for index construction. Default: 1.
+    num_index_threads: Number of threads to use for index construction and search. Default: 1.
+    intra_query_threads: Number of threads to use for intra-query parallelism. Default: 1.
+    num_clustering_threads: Number of threads to use for clustering. Default: 0 (use num_index_threads).
 
 Specializations compiled into the binary are listed below.
 
@@ -367,7 +395,9 @@ void wrap(py::module& m) {
             py::arg("data_loader"),
             py::arg("ids"),
             py::arg("distance_type"),
-            py::arg("num_threads") = 1,
+            py::arg("num_index_threads") = 1,
+            py::arg("intra_query_threads") = 1,
+            py::arg("num_clustering_threads") = 0,
             fmt::format(DYNAMIC_IVF_BUILD_FROM_FILE_DOCSTRING_PROTO, dynamic).c_str()
         );
     }
diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index 3d8e2a2b8..431ae4cd3 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -944,16 +944,27 @@ auto build_dynamic_ivf(
     const SourceData& source_data,
     std::span<const size_t> ids,
     Distance distance,
-    ThreadPoolProto threadpool_proto
+    ThreadPoolProto threadpool_proto,
+    const size_t intra_query_thread_count = 1
 ) {
     using I = uint32_t;
     using ElementType = typename SourceData::element_type;
-    using BlockedDataType = data::
-        SimpleData<ElementType, Dynamic, data::Blocked<HugepageAllocator<ElementType>>>;
+    // Use default lib::Allocator instead of HugepageAllocator to avoid memory issues
+    using BlockedDataType =
+        data::SimpleData<ElementType, Dynamic, data::Blocked<lib::Allocator<ElementType>>>;
     using Cluster = DynamicDenseCluster<BlockedDataType, I>;
 
     auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
 
+    // Use a small block size for IVF clusters (1MB instead of 1GB default)
+    // With many clusters, large blocks cause excessive memory usage
+    auto blocking_params = data::BlockingParameters{
+        .blocksize_bytes = lib::PowerOfTwo(20) // 2^20 = 1MB
+    };
+    auto blocked_allocator = data::Blocked<lib::Allocator<ElementType>>(
+        blocking_params, lib::Allocator<ElementType>()
+    );
+
     // Create dynamic clusters from the clustering
     std::vector<Cluster> clusters;
     clusters.reserve(clustering.size());
@@ -962,8 +973,9 @@ auto build_dynamic_ivf(
         const auto& cluster_assignments = clustering.cluster(cluster_idx);
         size_t cluster_size = cluster_assignments.size();
 
-        // Create BlockedData for this cluster
-        auto cluster_data = BlockedDataType(cluster_size, source_data.dimensions());
+        // Create BlockedData for this cluster with custom block size
+        auto cluster_data =
+            BlockedDataType(cluster_size, source_data.dimensions(), blocked_allocator);
         std::vector<I> cluster_ids;
         cluster_ids.reserve(cluster_size);
 
@@ -982,7 +994,8 @@ auto build_dynamic_ivf(
         std::move(clusters),
         ids,
         std::move(distance),
-        std::move(threadpool)
+        std::move(threadpool),
+        intra_query_thread_count
     );
 }
 
diff --git a/include/svs/orchestrators/dynamic_ivf.h b/include/svs/orchestrators/dynamic_ivf.h
index d89096cf1..8714d0490 100644
--- a/include/svs/orchestrators/dynamic_ivf.h
+++ b/include/svs/orchestrators/dynamic_ivf.h
@@ -18,6 +18,7 @@
 
 #include "svs/index/ivf/dynamic_ivf.h"
 #include "svs/index/ivf/index.h"
+#include "svs/lib/bfloat16.h"
 #include "svs/orchestrators/ivf.h"
 #include "svs/orchestrators/manager.h"
 
@@ -226,7 +227,9 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         const DataProto& data_proto,
         std::span<const size_t> ids,
         Distance distance,
-        ThreadPoolProto threadpool_proto
+        ThreadPoolProto threadpool_proto,
+        size_t intra_query_threads = 1,
+        size_t num_clustering_threads = 0
     ) {
         // Handle DistanceType enum by dispatching to concrete distance types
         if constexpr (std::is_same_v<std::decay_t<Distance>, DistanceType>) {
@@ -237,7 +240,9 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
                     data_proto,
                     ids,
                     distance_function,
-                    std::move(threadpool_proto)
+                    std::move(threadpool_proto),
+                    intra_query_threads,
+                    num_clustering_threads
                 );
             });
         } else {
@@ -247,10 +252,13 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
             auto data = svs::detail::dispatch_load(data_proto, threadpool);
 
             // Build clustering first
-            using BuildType = float; // Use float for building centroids
+            using BuildType = BFloat16; // Use BFloat16 for building centroids
             // Note: build_clustering takes threadpool by value, so we need a copy
             auto clustering = [&]() {
-                auto threadpool_copy = threads::NativeThreadPool(threadpool.size());
+                size_t clustering_threads = (num_clustering_threads == 0)
+                                                ? threadpool.size()
+                                                : num_clustering_threads;
+                auto threadpool_copy = threads::NativeThreadPool(clustering_threads);
                 return index::ivf::build_clustering<BuildType>(
                     build_parameters, data, distance, std::move(threadpool_copy), false
                 );
@@ -263,7 +271,8 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
                 data, // Pass by const reference - build_dynamic_ivf will create BlockedData
                 ids,
                 distance,
-                std::move(threadpool)
+                std::move(threadpool),
+                intra_query_threads
             );
 
             return DynamicIVF(

From da236b5efe4ab874bdae53c99e6a3610d7d9194e Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Tue, 9 Dec 2025 14:30:59 -0800
Subject: [PATCH 12/23] Add examples and support fp16 clusters from Python

---
 bindings/python/src/ivf.cpp             | 105 ++++++++++--
 examples/python/example_ivf.py          | 195 ++++++++++++++++++++++
 examples/python/example_ivf_dynamic.py  | 209 ++++++++++++++++++++++++
 include/svs/orchestrators/dynamic_ivf.h |  10 +-
 4 files changed, 504 insertions(+), 15 deletions(-)
 create mode 100644 examples/python/example_ivf.py
 create mode 100644 examples/python/example_ivf_dynamic.py

diff --git a/bindings/python/src/ivf.cpp b/bindings/python/src/ivf.cpp
index 06a651fe7..55e3519c0 100644
--- a/bindings/python/src/ivf.cpp
+++ b/bindings/python/src/ivf.cpp
@@ -21,6 +21,9 @@
 #include "svs/python/dispatch.h"
 #include "svs/python/manager.h"
 
+// pybind11
+#include <pybind11/stl.h> // For std::variant support
+
 // svs
 #include "svs/core/data/simple.h"
 #include "svs/core/distance.h"
@@ -54,10 +57,12 @@ namespace svs::python::ivf {
 // This sparse clustering can be saved with centroids stored as float datatype.
 // While assembling, the sparse clustering is used to create DenseClusters and
 // centroids datatype can be changed as per the search specializations.
-// By default, BFloat16 centroids are used to take advantage of AMX
-// template <typename T = svs::BFloat16, std::integral I = uint32_t>
-using Clustering =
+// Support both BFloat16 and Float16 centroids to match data types and leverage AMX.
+using ClusteringBF16 =
     svs::index::ivf::Clustering<svs::data::SimpleData<svs::BFloat16>, uint32_t>;
+using ClusteringF16 =
+    svs::index::ivf::Clustering<svs::data::SimpleData<svs::Float16>, uint32_t>;
+using Clustering = std::variant<ClusteringBF16, ClusteringF16>;
 
 namespace detail {
 
@@ -73,12 +78,18 @@ svs::IVF assemble_uncompressed(
     size_t num_threads,
     size_t intra_query_threads = 1
 ) {
-    return svs::IVF::assemble_from_clustering<Q>(
-        std::move(clustering),
-        std::move(data),
-        distance_type,
-        num_threads,
-        intra_query_threads
+    // Use std::visit to handle the variant clustering type
+    return std::visit(
+        [&](auto&& actual_clustering) {
+            return svs::IVF::assemble_from_clustering<Q>(
+                std::move(actual_clustering),
+                std::move(data),
+                distance_type,
+                num_threads,
+                intra_query_threads
+            );
+        },
+        std::move(clustering)
     );
 }
 
@@ -145,9 +156,21 @@ Clustering build_uncompressed(
     svs::DistanceType distance_type,
     size_t num_threads
 ) {
-    return svs::IVF::build_clustering<svs::BFloat16>(
+    // Choose build type for clustering to leverage AMX instructions:
+    // - Float32 data -> BFloat16 (AMX supports BFloat16)
+    // - Float16 data -> Float16 (AMX supports Float16)
+    // - BFloat16 data -> BFloat16 (already optimal)
+    using BuildType = std::conditional_t<std::is_same_v<T, float>, svs::BFloat16, T>;
+    auto clustering = svs::IVF::build_clustering<BuildType>(
         parameters, std::move(data), distance_type, num_threads
     );
+
+    // Return as variant - Float16 or BFloat16 based on BuildType
+    if constexpr (std::is_same_v<BuildType, svs::Float16>) {
+        return Clustering(std::in_place_index<1>, std::move(clustering));
+    } else {
+        return Clustering(std::in_place_index<0>, std::move(clustering));
+    }
 }
 
 template <typename Dispatcher>
@@ -182,9 +205,21 @@ Clustering uncompressed_build_from_array(
     auto data =
         svs::data::SimpleData<T, N, RebindAllocator<T>>(view.size(), view.dimensions());
     svs::data::copy(view, data);
-    return svs::IVF::build_clustering<svs::BFloat16>(
+    // Choose build type for clustering to leverage AMX instructions:
+    // - Float32 data -> BFloat16 (AMX supports BFloat16)
+    // - Float16 data -> Float16 (AMX supports Float16)
+    // - BFloat16 data -> BFloat16 (already optimal)
+    using BuildType = std::conditional_t<std::is_same_v<T, float>, svs::BFloat16, T>;
+    auto clustering = svs::IVF::build_clustering<BuildType>(
         parameters, std::move(data), distance_type, num_threads
     );
+
+    // Return as variant - Float16 or BFloat16 based on BuildType
+    if constexpr (std::is_same_v<BuildType, svs::Float16>) {
+        return Clustering(std::in_place_index<1>, std::move(clustering));
+    } else {
+        return Clustering(std::in_place_index<0>, std::move(clustering));
+    }
 }
 
 template <typename Dispatcher> void register_ivf_build_from_array(Dispatcher& dispatcher) {
@@ -480,13 +515,31 @@ void wrap_build_from_file(py::class_<Clustering>& clustering) {
 
 // Save the sparse clustering to a directory
 void save_clustering(Clustering& clustering, const std::string& clustering_path) {
-    svs::lib::save_to_disk(clustering, clustering_path);
+    std::visit(
+        [&](auto&& actual_clustering) {
+            svs::lib::save_to_disk(actual_clustering, clustering_path);
+        },
+        clustering
+    );
 }
 
-// Save the sparse clustering to a directory
+// Load the sparse clustering from a directory
+// Try loading as BFloat16 first, then Float16 if that fails
 auto load_clustering(const std::string& clustering_path, size_t num_threads = 1) {
     auto threadpool = threads::as_threadpool(num_threads);
-    return svs::lib::load_from_disk<Clustering>(clustering_path, threadpool);
+    try {
+        auto bf16_clustering = svs::lib::load_from_disk<
+            svs::index::ivf::Clustering<svs::data::SimpleData<svs::BFloat16>, uint32_t>>(
+            clustering_path, threadpool
+        );
+        return Clustering(std::in_place_index<0>, std::move(bf16_clustering));
+    } catch (...) {
+        auto f16_clustering = svs::lib::load_from_disk<
+            svs::index::ivf::Clustering<svs::data::SimpleData<svs::Float16>, uint32_t>>(
+            clustering_path, threadpool
+        );
+        return Clustering(std::in_place_index<1>, std::move(f16_clustering));
+    }
 }
 
 } // namespace detail
@@ -590,6 +643,30 @@ void wrap(py::module& m) {
     // Reconstruction.
     // add_reconstruct_interface(ivf);
 
+    // Register both clustering types that make up the variant
+    name = "ClusteringBFloat16";
+    py::class_<ClusteringBF16> clustering_bf16(m, name.c_str());
+    clustering_bf16.def(
+        "save",
+        [](ClusteringBF16& clustering, const std::string& clustering_path) {
+            svs::lib::save_to_disk(clustering, clustering_path);
+        },
+        py::arg("clustering_directory"),
+        "Save a constructed IVF clustering to disk."
+    );
+
+    name = "ClusteringFloat16";
+    py::class_<ClusteringF16> clustering_f16(m, name.c_str());
+    clustering_f16.def(
+        "save",
+        [](ClusteringF16& clustering, const std::string& clustering_path) {
+            svs::lib::save_to_disk(clustering, clustering_path);
+        },
+        py::arg("clustering_directory"),
+        "Save a constructed IVF clustering to disk."
+    );
+
+    // Register the variant type as the main Clustering class
     name = "Clustering";
     py::class_<Clustering> clustering(
         m, name.c_str(), "Top level class for sparse IVF clustering"
diff --git a/examples/python/example_ivf.py b/examples/python/example_ivf.py
new file mode 100644
index 000000000..1027ff4b7
--- /dev/null
+++ b/examples/python/example_ivf.py
@@ -0,0 +1,195 @@
+# Copyright 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Example: Static IVF Index
+
+This example demonstrates how to:
+1. Build clustering for IVF index
+2. Assemble an IVF index from clustering
+3. Search the index
+4. Save and reload clustering
+5. Load index from saved clustering
+"""
+
+import os
+import svs
+import numpy as np
+
+def main():
+    print("=" * 80)
+    print("Static IVF Index Example")
+    print("=" * 80)
+    
+    # [generate-dataset]
+    # Create a test dataset
+    test_data_dir = "./example_data_ivf"
+    print(f"\n1. Generating test dataset in '{test_data_dir}'...")
+    
+    svs.generate_test_dataset(
+        10000,                          # Create 10,000 vectors in the dataset
+        1000,                           # Generate 1,000 query vectors
+        128,                            # Set vector dimensionality to 128
+        test_data_dir,                  # Directory where results will be generated
+        data_seed = 1234,               # Random seed for reproducibility
+        query_seed = 5678,              # Random seed for reproducibility
+        num_threads = 4,                # Number of threads to use
+        distance = svs.DistanceType.L2, # Distance metric
+    )
+    print("   ✓ Dataset generated")
+    # [generate-dataset]
+    
+    # [build-parameters]
+    # Configure clustering parameters for IVF
+    print("\n2. Configuring build parameters...")
+    build_parameters = svs.IVFBuildParameters(
+        num_centroids = 50,             # Number of clusters/centroids
+        minibatch_size = 2000,          # Minibatch size for k-means
+        num_iterations = 20,            # Number of k-means iterations
+        is_hierarchical = True,         # Use hierarchical k-means
+        training_fraction = 0.5,        # Fraction of data for training
+        seed = 0xc0ffee,                # Random seed for clustering
+    )
+    print(f"   ✓ Configured {build_parameters.num_centroids} centroids")
+    # [build-parameters]
+    
+    # [load-data]
+    # Load the dataset
+    print("\n3. Loading dataset...")
+    data_path = os.path.join(test_data_dir, "data.fvecs")
+    data_loader = svs.VectorDataLoader(
+        data_path,
+        svs.DataType.float32,
+        dims = 128
+    )
+    print(f"   ✓ Data loader created")
+    # [load-data]
+    
+    # [build-clustering]
+    # Build the clustering
+    print("\n4. Building clustering (k-means)...")
+    clustering = svs.Clustering.build(
+        build_parameters = build_parameters,
+        data_loader = data_loader,
+        distance = svs.DistanceType.L2,
+        num_threads = 4,
+    )
+    print(f"   ✓ Clustering built with {build_parameters.num_centroids} centroids")
+    # [build-clustering]
+    
+    # [assemble-index]
+    # Assemble the IVF index from clustering
+    print("\n5. Assembling IVF index from clustering...")
+    index = svs.IVF.assemble_from_clustering(
+        clustering = clustering,
+        data_loader = data_loader,
+        distance = svs.DistanceType.L2,
+        num_threads = 4,
+        intra_query_threads = 1,
+    )
+    print(f"   ✓ Index assembled with {index.size} vectors")
+    print(f"   ✓ Index dimensions: {index.dimensions}")
+    # [assemble-index]
+    
+    # [configure-search]
+    # Configure search parameters
+    print("\n6. Configuring search parameters...")
+    search_params = svs.IVFSearchParameters(
+        n_probes = 10,      # Number of clusters to search
+        k_reorder = 1.0     # Reorder factor (1.0 = no reordering)
+    )
+    index.search_parameters = search_params
+    print(f"   ✓ Search parameters: n_probes={search_params.n_probes}")
+    # [configure-search]
+    
+    # [search]
+    # Perform search
+    print("\n7. Searching the index...")
+    queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs"))
+    groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs"))
+    
+    num_neighbors = 10
+    I, D = index.search(queries, num_neighbors)
+    recall = svs.k_recall_at(groundtruth, I, num_neighbors, num_neighbors)
+    print(f"   ✓ Recall@{num_neighbors}: {recall:.4f}")
+    print(f"   ✓ Result shape: {I.shape}")
+    # [search]
+    
+    # [save-clustering]
+    # Save the clustering for later use
+    print("\n8. Saving clustering...")
+    clustering_path = os.path.join(test_data_dir, "clustering")
+    clustering.save(clustering_path)
+    print(f"   ✓ Clustering saved to '{clustering_path}'")
+    # [save-clustering]
+    
+    # [load-and-assemble]
+    # Load clustering and assemble a new index
+    print("\n9. Loading clustering and assembling new index...")
+    loaded_clustering = svs.Clustering.load_clustering(clustering_path)
+    
+    new_index = svs.IVF.assemble_from_clustering(
+        clustering = loaded_clustering,
+        data_loader = data_loader,
+        distance = svs.DistanceType.L2,
+        num_threads = 4,
+        intra_query_threads = 1,
+    )
+    print(f"   ✓ New index assembled with {new_index.size} vectors")
+    # [load-and-assemble]
+    
+    # [assemble-from-file]
+    # Or directly assemble from file
+    print("\n10. Assembling index directly from clustering file...")
+    index_from_file = svs.IVF.assemble_from_file(
+        clustering_path = clustering_path,
+        data_loader = data_loader,
+        distance = svs.DistanceType.L2,
+        num_threads = 4,
+        intra_query_threads = 1,
+    )
+    print(f"   ✓ Index assembled with {index_from_file.size} vectors")
+    # [assemble-from-file]
+    
+    # [search-verification]
+    # Verify both indices produce the same results
+    print("\n11. Verifying search results consistency...")
+    index_from_file.search_parameters = search_params
+    I2, D2 = index_from_file.search(queries, num_neighbors)
+    recall2 = svs.k_recall_at(groundtruth, I2, num_neighbors, num_neighbors)
+    print(f"   ✓ Recall@{num_neighbors}: {recall2:.4f}")
+    
+    if np.allclose(D, D2):
+        print("   ✓ Both indices produce identical results")
+    else:
+        print("   ✗ Warning: Results differ slightly (expected due to floating point)")
+    # [search-verification]
+    
+    # [tune-search-parameters]
+    # Experiment with different search parameters
+    print("\n12. Tuning search parameters...")
+    for n_probes in [5, 10, 20]:
+        search_params.n_probes = n_probes
+        index.search_parameters = search_params
+        I_tuned, _ = index.search(queries, num_neighbors)
+        recall_tuned = svs.k_recall_at(groundtruth, I_tuned, num_neighbors, num_neighbors)
+        print(f"   ✓ n_probes={n_probes:2d}: Recall@{num_neighbors} = {recall_tuned:.4f}")
+    # [tune-search-parameters]
+    
+    print("\n" + "=" * 80)
+    print("Example completed successfully!")
+    print("=" * 80)
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/python/example_ivf_dynamic.py b/examples/python/example_ivf_dynamic.py
new file mode 100644
index 000000000..264585969
--- /dev/null
+++ b/examples/python/example_ivf_dynamic.py
@@ -0,0 +1,209 @@
+# Copyright 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Example: Dynamic IVF Index
+
+This example demonstrates how to:
+1. Build a dynamic IVF index from scratch
+2. Add new vectors to the index
+3. Remove vectors from the index
+4. Search the index
+5. Compute distances between queries and indexed vectors
+6. Save and reload the index
+"""
+
+import os
+import svs
+import numpy as np
+
+def main():
+    print("=" * 80)
+    print("Dynamic IVF Index Example")
+    print("=" * 80)
+    
+    # [generate-dataset]
+    # Create a test dataset with 10,000 vectors
+    test_data_dir = "./example_data_ivf_dynamic"
+    print(f"\n1. Generating test dataset in '{test_data_dir}'...")
+    
+    svs.generate_test_dataset(
+        1000,                           # Create 1000 vectors in the dataset
+        100,                            # Generate 100 query vectors
+        128,                            # Set vector dimensionality to 128
+        test_data_dir,                  # Directory where results will be generated
+        data_seed = 1234,               # Random seed for reproducibility
+        query_seed = 5678,              # Random seed for reproducibility
+        num_threads = 4,                # Number of threads to use
+        distance = svs.DistanceType.L2, # Distance metric
+    )
+    print("   ✓ Dataset generated")
+    # [generate-dataset]
+    
+    # [build-parameters]
+    # Configure clustering parameters for IVF
+    print("\n2. Configuring build parameters...")
+    build_parameters = svs.IVFBuildParameters(
+        num_centroids = 20,             # Number of clusters/centroids
+        minibatch_size = 1000,          # Minibatch size for k-means
+        num_iterations = 10,            # Number of k-means iterations
+        is_hierarchical = True,         # Use hierarchical k-means
+        training_fraction = 0.1,        # Fraction of data for training
+        seed = 0xc0ffee,                # Random seed for clustering
+    )
+    print(f"   ✓ Configured {build_parameters.num_centroids} centroids")
+    # [build-parameters]
+    
+    # [build-index]
+    # Build the dynamic IVF index with initial vectors
+    print("\n3. Building dynamic IVF index...")
+    n = 900  # Use 900 vectors for initial index
+    
+    # Load the data and create IDs
+    data = svs.read_vecs(os.path.join(test_data_dir, "data.fvecs"))
+    ids = np.arange(data.shape[0]).astype('uint64')
+    
+    # Build the index
+    index = svs.DynamicIVF.build(
+        parameters = build_parameters,
+        data = data[:n],
+        ids = ids[:n],
+        distance_type = svs.DistanceType.L2,
+        num_index_threads = 4,
+    )
+    print(f"   ✓ Index built with {index.size} vectors")
+    print(f"   ✓ Index dimensions: {index.dimensions}")
+    # [build-index]
+    
+    # [add-vectors]
+    # Add new vectors to the index
+    print("\n4. Adding 100 new vectors to the index...")
+    initial_size = index.size
+    index.add(data[n:n+100], ids[n:n+100])
+    print(f"   ✓ Index size: {initial_size} → {index.size}")
+    # [add-vectors]
+    
+    # [search-before-delete]
+    # Search before deletion
+    print("\n5. Searching the index...")
+    queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs"))
+    groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs"))
+    
+    # Configure search parameters
+    search_params = svs.IVFSearchParameters(
+        n_probes = 10,      # Number of clusters to search
+        k_reorder = 1.0     # Reorder factor
+    )
+    index.search_parameters = search_params
+    
+    # Perform search
+    num_neighbors = 10
+    I, D = index.search(queries, num_neighbors)
+    recall = svs.k_recall_at(groundtruth, I, num_neighbors, num_neighbors)
+    print(f"   ✓ Recall@{num_neighbors}: {recall:.4f}")
+    # [search-before-delete]
+    
+    # [get-distance]
+    # Compute distance between a query and a specific indexed vector
+    print("\n6. Computing distances with get_distance()...")
+    query_vector = queries[0]
+    test_id = 100
+    
+    if index.has_id(test_id):
+        distance = index.get_distance(test_id, query_vector)
+        print(f"   ✓ Distance from query to vector {test_id}: {distance:.6f}")
+    else:
+        print(f"   ✗ Vector {test_id} not found in index")
+    # [get-distance]
+    
+    # [remove-vectors]
+    # Remove vectors from the index
+    print("\n7. Removing the first 50 vectors...")
+    ids_to_delete = ids[:50]
+    num_deleted = index.delete(ids_to_delete)
+    print(f"   ✓ Deleted {num_deleted} vectors")
+    print(f"   ✓ Index size after deletion: {index.size}")
+    
+    # Verify vectors are deleted
+    if not index.has_id(25):
+        print(f"   ✓ Verified: Vector ID 25 no longer in index")
+    # [remove-vectors]
+    
+    # [consolidate-index]
+    # Consolidate and compact the index
+    print("\n8. Consolidating and compacting the index...")
+    index.consolidate().compact(1000)
+    print(f"   ✓ Index consolidated and compacted")
+    # [consolidate-index]
+    
+    # [search-after-modifications]
+    # Search after modifications
+    print("\n9. Searching after modifications...")
+    I, D = index.search(queries, num_neighbors)
+    recall = svs.k_recall_at(groundtruth, I, num_neighbors, num_neighbors)
+    print(f"   ✓ Recall@{num_neighbors}: {recall:.4f}")
+    # [search-after-modifications]
+    
+    # [tune-search-parameters]
+    # Experiment with different search parameters
+    print("\n10. Tuning search parameters...")
+    for n_probes in [5, 10, 20, 30]:
+        search_params.n_probes = n_probes
+        index.search_parameters = search_params
+        
+        I, D = index.search(queries, num_neighbors)
+        recall = svs.k_recall_at(groundtruth, I, num_neighbors, num_neighbors)
+        print(f"    n_probes={n_probes:2d} → Recall@{num_neighbors}: {recall:.4f}")
+    # [tune-search-parameters]
+    
+    # [save-index]
+    # Save the index to disk
+    print("\n11. Saving the index...")
+    config_dir = os.path.join(test_data_dir, "saved_config")
+    data_dir = os.path.join(test_data_dir, "saved_data")
+    
+    # Create directories if they don't exist
+    os.makedirs(config_dir, exist_ok=True)
+    os.makedirs(data_dir, exist_ok=True)
+    
+    index.save(config_dir, data_dir)
+    print(f"   ✓ Index saved to:")
+    print(f"     Config: {config_dir}")
+    print(f"     Data:   {data_dir}")
+    # [save-index]
+    
+    # [load-index]
+    # Note: DynamicIVF.load() is being implemented for easier reload
+    # For now, the index has been successfully saved and can be accessed at:
+    print("\n12. Index saved successfully!")
+    print(f"   ✓ Config: {config_dir}")
+    print(f"   ✓ Data:   {data_dir}")
+    print(f"   Note: load() API coming soon for simplified reload")
+    # [load-index]
+    
+    # [get-all-ids]
+    # Inspect final index state
+    print("\n13. Final index inspection...")
+    all_ids = index.all_ids()
+    print(f"   ✓ Index contains {len(all_ids)} unique IDs")
+    print(f"   ✓ ID range: [{np.min(all_ids)}, {np.max(all_ids)}]")
+    # [get-all-ids]
+    
+    print("\n" + "=" * 80)
+    print("Dynamic IVF Example Completed Successfully!")
+    print("=" * 80)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/include/svs/orchestrators/dynamic_ivf.h b/include/svs/orchestrators/dynamic_ivf.h
index 8714d0490..bb445b6c7 100644
--- a/include/svs/orchestrators/dynamic_ivf.h
+++ b/include/svs/orchestrators/dynamic_ivf.h
@@ -252,7 +252,15 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
             auto data = svs::detail::dispatch_load(data_proto, threadpool);
 
             // Build clustering first
-            using BuildType = BFloat16; // Use BFloat16 for building centroids
+            // Choose build type for clustering to leverage AMX instructions:
+            // - Float32 data -> BFloat16 (AMX supports BFloat16)
+            // - Float16 data -> Float16 (AMX supports Float16)
+            // - BFloat16 data -> BFloat16 (already optimal)
+            using DataElementType = typename decltype(data)::element_type;
+            using BuildType = std::conditional_t<
+                std::is_same_v<DataElementType, float>,
+                BFloat16,
+                DataElementType>;
             // Note: build_clustering takes threadpool by value, so we need a copy
             auto clustering = [&]() {
                 size_t clustering_threads = (num_clustering_threads == 0)

From 9dcec84bb4d14b2fb771369846d06c68462ca82e Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Wed, 10 Dec 2025 14:27:56 -0800
Subject: [PATCH 13/23] Simplify get_distance implementation

---
 include/svs/index/ivf/clustering.h |  6 +++++
 include/svs/index/ivf/extensions.h | 41 ++++--------------------------
 2 files changed, 11 insertions(+), 36 deletions(-)

diff --git a/include/svs/index/ivf/clustering.h b/include/svs/index/ivf/clustering.h
index 8f0555171..e62d4c9ab 100644
--- a/include/svs/index/ivf/clustering.h
+++ b/include/svs/index/ivf/clustering.h
@@ -335,6 +335,12 @@ class DenseClusteredDataset {
 
     size_t get_prefetch_offset() const { return prefetch_offset_; }
     void set_prefetch_offset(size_t offset) { prefetch_offset_ = offset; }
+
+    // Cluster access
+    const DenseCluster<Data, I>& operator[](size_t cluster) const {
+        return clusters_[cluster];
+    }
+
     auto get_datum(size_t cluster, size_t id) const {
         return clusters_.at(cluster).get_datum(id);
     }
diff --git a/include/svs/index/ivf/extensions.h b/include/svs/index/ivf/extensions.h
index e8e0688cb..cf71ee7c1 100644
--- a/include/svs/index/ivf/extensions.h
+++ b/include/svs/index/ivf/extensions.h
@@ -237,13 +237,7 @@ struct ComputeDistanceType {
 // CPO for distance computation
 inline constexpr ComputeDistanceType get_distance_ext{};
 
-// Overload for container types with view_cluster/get_datum methods (e.g.,
-// DenseClusteredDataset)
 template <typename Clusters, typename Distance, typename Query>
-    requires requires(const Clusters& c, size_t i) {
-                 { c.view_cluster(i) };
-                 { c.get_datum(i, i) };
-             }
 double svs_invoke(
     svs::tag_t<get_distance_ext>,
     const Clusters& clusters,
@@ -252,37 +246,12 @@ double svs_invoke(
     size_t pos,
     const Query& query
 ) {
-    // Get distance function
-    auto cluster_data = clusters.view_cluster(cluster_idx);
-    auto dist_f = per_thread_batch_search_setup(cluster_data, distance);
-
-    // Get the vector from the cluster
-    auto indexed_span = clusters.get_datum(cluster_idx, pos);
-
-    // Compute the distance using the appropriate distance function
-    auto dist = svs::distance::compute(dist_f, query, indexed_span);
-
-    return static_cast<double>(dist);
-}
-
-// Overload for vector-like containers (e.g., std::vector<DynamicDenseCluster>)
-template <typename Clusters, typename Distance, typename Query>
-    requires requires(const Clusters& c, size_t i) {
-                 { c[i] };
-                 { c[i].data_ };
-                 { c[i].get_datum(i) };
-             }
-double svs_invoke(
-    svs::tag_t<get_distance_ext>,
-    const Clusters& clusters,
-    const Distance& distance,
-    size_t cluster_idx,
-    size_t pos,
-    const Query& query
-) {
-    // Get distance function
+    // Get cluster reference
     const auto& cluster = clusters[cluster_idx];
-    auto dist_f = per_thread_batch_search_setup(cluster.data_, distance);
+
+    // Get distance function using the cluster's data view
+    auto dist_f = per_thread_batch_search_setup(cluster.view_cluster(), distance);
+    svs::distance::maybe_fix_argument(dist_f, query);
 
     // Get the vector from the cluster
     auto indexed_span = cluster.get_datum(pos);

From 8a2270c8ade1dde2be8b41b8e7e231f59caaee61 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Wed, 10 Dec 2025 17:57:34 -0800
Subject: [PATCH 14/23] progress

---
 include/svs/index/ivf/dynamic_ivf.h | 21 ++++++++++++++++-----
 include/svs/index/ivf/extensions.h  | 23 +++++++++++------------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index 431ae4cd3..2f5a8daaf 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -337,6 +337,16 @@ class DynamicIVFIndex {
         return clusters_[cluster_idx].get_datum(pos);
     }
 
+    /// @brief Get raw data by cluster and local position (for extension compatibility)
+    auto get_datum(size_t cluster_idx, size_t local_pos) const {
+        return clusters_[cluster_idx].get_datum(local_pos);
+    }
+
+    /// @brief Get secondary data by cluster and local position (for LeanVec)
+    auto get_secondary(size_t cluster_idx, size_t local_pos) const {
+        return clusters_[cluster_idx].data_.get_secondary(local_pos);
+    }
+
     ///// Distance
 
     /// @brief Compute the distance between an external vector and a vector in the index.
@@ -553,14 +563,15 @@ class DynamicIVFIndex {
                 auto buffer_leaves = create_leaf_buffers(buffer_leaves_size);
 
                 // Prepare cluster search scratch space (distance copy)
-                auto scratch =
-                    extensions::per_thread_batch_search_setup(centroids_, distance_);
+                // Pass cluster data (not centroids) to support quantized datasets
+                auto scratch = extensions::per_thread_batch_search_setup(
+                    clusters_[0].data_, distance_
+                );
 
                 // Execute search with intra-query parallelism
-                // Note: We pass centroids_ as the data parameter (unused) and this as
-                // cluster
+                // Pass cluster data as first parameter to enable dataset-specific overrides
                 extensions::per_thread_batch_search(
-                    centroids_,
+                    clusters_[0].data_,
                     *this,
                     buffer_centroids,
                     buffer_leaves,
diff --git a/include/svs/index/ivf/extensions.h b/include/svs/index/ivf/extensions.h
index cf71ee7c1..995bca32c 100644
--- a/include/svs/index/ivf/extensions.h
+++ b/include/svs/index/ivf/extensions.h
@@ -230,33 +230,32 @@ struct ComputeDistanceType {
         size_t pos,
         const Query& query
     ) const {
-        return svs_invoke(*this, clusters, distance, cluster_idx, pos, query);
+        return svs_invoke(
+            *this, clusters[cluster_idx].view_cluster(), distance, pos, query
+        );
     }
 };
 
 // CPO for distance computation
 inline constexpr ComputeDistanceType get_distance_ext{};
 
-template <typename Clusters, typename Distance, typename Query>
+// Default overload
+template <typename Data, typename Distance, typename Query>
 double svs_invoke(
     svs::tag_t<get_distance_ext>,
-    const Clusters& clusters,
+    const Data& data,
     const Distance& distance,
-    size_t cluster_idx,
     size_t pos,
     const Query& query
 ) {
-    // Get cluster reference
-    const auto& cluster = clusters[cluster_idx];
-
-    // Get distance function using the cluster's data view
-    auto dist_f = per_thread_batch_search_setup(cluster.view_cluster(), distance);
+    // Get distance function
+    auto dist_f = per_thread_batch_search_setup(data, distance);
     svs::distance::maybe_fix_argument(dist_f, query);
 
-    // Get the vector from the cluster
-    auto indexed_span = cluster.get_datum(pos);
+    // Get the vector
+    auto indexed_span = data.get_datum(pos);
 
-    // Compute the distance using the appropriate distance function
+    // Compute the distance
     auto dist = svs::distance::compute(dist_f, query, indexed_span);
 
     return static_cast<double>(dist);

From 74b2672803a7fc1397d2a97e7d8dafecc0573b5b Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Thu, 11 Dec 2025 16:55:40 -0800
Subject: [PATCH 15/23] Restructured the Python APIs for dynamic IVF to match
 static

---
 .../python/include/svs/python/dynamic_ivf.h   |  14 +-
 bindings/python/include/svs/python/ivf.h      |  16 +
 bindings/python/src/dynamic_ivf.cpp           | 444 ++++++++++--------
 bindings/python/src/ivf.cpp                   |  10 -
 examples/python/example_ivf_dynamic.py        |  73 ++-
 include/svs/index/ivf/clustering.h            |  50 +-
 include/svs/index/ivf/dynamic_ivf.h           |  95 +---
 include/svs/index/ivf/extensions.h            |  22 +-
 include/svs/orchestrators/dynamic_ivf.h       | 186 +++++---
 tests/svs/index/ivf/dynamic_ivf.cpp           | 341 ++++++++------
 10 files changed, 722 insertions(+), 529 deletions(-)

diff --git a/bindings/python/include/svs/python/dynamic_ivf.h b/bindings/python/include/svs/python/dynamic_ivf.h
index d18a608c0..48486d7a6 100644
--- a/bindings/python/include/svs/python/dynamic_ivf.h
+++ b/bindings/python/include/svs/python/dynamic_ivf.h
@@ -25,13 +25,13 @@ namespace svs::python::dynamic_ivf {
 
 // Specializations
 template <typename F> void for_standard_specializations(F&& f) {
-#define X(Q, T, Dist, N) f.template operator()<Q, T, Dist, N>()
-    X(float, float, DistanceL2, Dynamic);
-    X(float, float, DistanceIP, Dynamic);
-    X(float, svs::Float16, DistanceL2, Dynamic);
-    X(float, svs::Float16, DistanceIP, Dynamic);
-    X(float, svs::BFloat16, DistanceL2, Dynamic);
-    X(float, svs::BFloat16, DistanceIP, Dynamic);
+#define X(Q, T, N) f.template operator()<Q, T, N>()
+    X(float, float, Dynamic);
+    X(float, float, Dynamic);
+    X(float, svs::Float16, Dynamic);
+    X(float, svs::Float16, Dynamic);
+    X(float, svs::BFloat16, Dynamic);
+    X(float, svs::BFloat16, Dynamic);
 #undef X
 }
 
diff --git a/bindings/python/include/svs/python/ivf.h b/bindings/python/include/svs/python/ivf.h
index 3fdbb53b3..ab9be6449 100644
--- a/bindings/python/include/svs/python/ivf.h
+++ b/bindings/python/include/svs/python/ivf.h
@@ -20,7 +20,9 @@
 #include "svs/python/common.h"
 #include "svs/python/core.h"
 
+#include "svs/core/data/simple.h"
 #include "svs/core/distance.h"
+#include "svs/index/ivf/clustering.h"
 #include "svs/lib/bfloat16.h"
 #include "svs/lib/datatype.h"
 #include "svs/lib/float16.h"
@@ -30,6 +32,8 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/stl/filesystem.h>
 
+#include <variant>
+
 namespace svs::python {
 namespace ivf_specializations {
 ///
@@ -61,6 +65,18 @@ template <typename F> void for_standard_specializations(F&& f) {
 } // namespace ivf_specializations
 
 namespace ivf {
+
+// The build process in IVF uses Kmeans to get centroids and assignments of data.
+// This sparse clustering can be saved with centroids stored as float datatype.
+// While assembling, the sparse clustering is used to create DenseClusters and
+// centroids datatype can be changed as per the search specializations.
+// Support both BFloat16 and Float16 centroids to match data types and leverage AMX.
+using ClusteringBF16 =
+    svs::index::ivf::Clustering<svs::data::SimpleData<svs::BFloat16>, uint32_t>;
+using ClusteringF16 =
+    svs::index::ivf::Clustering<svs::data::SimpleData<svs::Float16>, uint32_t>;
+using Clustering = std::variant<ClusteringBF16, ClusteringF16>;
+
 template <typename Manager> void add_interface(pybind11::class_<Manager>& manager) {
     manager.def_property_readonly(
         "experimental_backend_string",
diff --git a/bindings/python/src/dynamic_ivf.cpp b/bindings/python/src/dynamic_ivf.cpp
index d14d992ec..1b61ea4b0 100644
--- a/bindings/python/src/dynamic_ivf.cpp
+++ b/bindings/python/src/dynamic_ivf.cpp
@@ -43,152 +43,208 @@
 namespace py = pybind11;
 namespace svs::python::dynamic_ivf {
 
-namespace {
+// Reuse the Clustering type from static IVF since clustering is the same
+using Clustering = svs::python::ivf::Clustering;
 
-template <typename ElementType>
-svs::DynamicIVF build_from_array(
-    const svs::index::ivf::IVFBuildParameters& parameters,
-    py_contiguous_array_t<ElementType> py_data,
-    py_contiguous_array_t<size_t> py_ids,
+using IVFAssembleTypes =
+    std::variant<UnspecializedVectorDataLoader, svs::lib::SerializedObject>;
+
+/////
+///// Dispatch Invocation
+/////
+
+/////
+///// Assembly from Clustering
+/////
+
+template <typename Q, typename T, size_t N>
+svs::DynamicIVF assemble_uncompressed(
+    Clustering clustering,
+    svs::VectorDataLoader<T, N, RebindAllocator<T>> data,
+    std::span<const size_t> ids,
     svs::DistanceType distance_type,
-    size_t num_index_threads,
-    size_t intra_query_threads,
-    size_t num_clustering_threads
+    size_t num_threads,
+    size_t intra_query_threads = 1
 ) {
-    auto dispatcher = svs::DistanceDispatcher(distance_type);
-    return dispatcher([&](auto distance) {
-        // Create a view for building - build_clustering needs immutable data
-        // Note: Even though we use SimpleDataView (non-const), the data won't be modified
-        // during clustering, and BlockedData created from it will have mutable element type
-        auto data_view = data::SimpleDataView<ElementType>(
-            const_cast<ElementType*>(py_data.data()), py_data.shape(0), py_data.shape(1)
-        );
-        return svs::DynamicIVF::build<ElementType>(
-            parameters,
-            data_view,
-            std::span(py_ids.data(), py_ids.size()),
-            distance,
-            num_index_threads,
-            intra_query_threads,
-            num_clustering_threads
-        );
-    });
+    // Use std::visit to handle the variant clustering type
+    return std::visit(
+        [&](auto&& actual_clustering) {
+            return svs::DynamicIVF::assemble_from_clustering<Q>(
+                std::move(actual_clustering),
+                std::move(data),
+                ids,
+                distance_type,
+                num_threads,
+                intra_query_threads
+            );
+        },
+        std::move(clustering)
+    );
 }
 
-const char* BUILD_FROM_ARRAY_DOC = R"(
-Construct a DynamicIVF index over the given data, returning a searchable index.
+template <typename Dispatcher>
+void register_uncompressed_ivf_assemble(Dispatcher& dispatcher) {
+    for_standard_specializations(
+        [&dispatcher]<typename Q, typename T, size_t N>() {
+            auto method = &assemble_uncompressed<Q, T, N>;
+            dispatcher.register_target(svs::lib::dispatcher_build_docs, method);
+        }
+    );
+}
 
-Args:
-    parameters: Parameters controlling IVF construction (clustering and search parameters).
-        See below for the documentation of this class.
-    data: The dataset to index. **NOTE**: SVS will maintain an internal copy of the
-        dataset. This may change in future releases.
-    ids: Vector of ids to assign to each row in the dataset; must match dataset length and contain unique values.
-    distance_type: The distance type to use for this dataset.
-    num_index_threads: Number of threads to use for index construction and search.
-    intra_query_threads: Number of threads to use for intra-query parallelism. Default: 1.
-    num_clustering_threads: Number of threads to use for clustering. Default: 0 (use num_index_threads).
-)";
+template <typename Dispatcher> void register_ivf_assembly(Dispatcher& dispatcher) {
+    register_uncompressed_ivf_assemble(dispatcher);
+}
 
-template <typename ElementType>
-void add_build_specialization(py::class_<svs::DynamicIVF>& index) {
-    index.def_static(
-        "build",
-        &build_from_array<ElementType>,
-        py::arg("parameters"),
-        py::arg("data"),
-        py::arg("ids"),
-        py::arg("distance_type"),
-        py::arg("num_index_threads"),
-        py::arg("intra_query_threads") = 1,
-        py::arg("num_clustering_threads") = 0,
-        BUILD_FROM_ARRAY_DOC
+/////
+///// Assembly from File
+/////
+template <typename Q, typename T, size_t N>
+svs::DynamicIVF assemble_from_file_uncompressed(
+    const std::filesystem::path& cluster_path,
+    svs::VectorDataLoader<T, N, RebindAllocator<T>> data,
+    std::span<const size_t> ids,
+    svs::DistanceType distance_type,
+    size_t num_threads,
+    size_t intra_query_threads = 1
+) {
+    return svs::DynamicIVF::assemble_from_file<Q, svs::BFloat16>(
+        cluster_path, std::move(data), ids, distance_type, num_threads, intra_query_threads
     );
 }
 
+template <typename Dispatcher>
+void register_uncompressed_ivf_assemble_from_file(Dispatcher& dispatcher) {
+    for_standard_specializations(
+        [&dispatcher]<typename Q, typename T, size_t N>() {
+            auto method = &assemble_from_file_uncompressed<Q, T, N>;
+            dispatcher.register_target(svs::lib::dispatcher_build_docs, method);
+        }
+    );
+}
+
+template <typename Dispatcher>
+void register_ivf_assembly_from_file(Dispatcher& dispatcher) {
+    register_uncompressed_ivf_assemble_from_file(dispatcher);
+}
+
+using IVFAssembleTypes =
+    std::variant<UnspecializedVectorDataLoader, svs::lib::SerializedObject>;
+
 /////
-///// Build from file (data loader)
+///// Dispatch Invocation
 /////
 
-template <typename Q, typename T, typename Dist, size_t N>
-svs::DynamicIVF dynamic_ivf_build_uncompressed(
-    const svs::index::ivf::IVFBuildParameters& parameters,
-    svs::VectorDataLoader<T, N, RebindAllocator<T>> data_loader,
-    std::span<const size_t> ids,
+using AssemblyDispatcher = svs::lib::
+    Dispatcher<svs::DynamicIVF, Clustering, IVFAssembleTypes, std::span<const size_t>, svs::DistanceType, size_t, size_t>;
+
+AssemblyDispatcher assembly_dispatcher() {
+    auto dispatcher = AssemblyDispatcher{};
+
+    // Register available backend methods.
+    register_ivf_assembly(dispatcher);
+    return dispatcher;
+}
+
+// Assemble
+svs::DynamicIVF assemble_from_clustering(
+    Clustering clustering,
+    IVFAssembleTypes data_kind,
+    const py_contiguous_array_t<size_t>& py_ids,
     svs::DistanceType distance_type,
-    size_t num_index_threads,
-    size_t intra_query_threads,
-    size_t num_clustering_threads
+    svs::DataType SVS_UNUSED(query_type),
+    bool SVS_UNUSED(enforce_dims),
+    size_t num_threads,
+    size_t intra_query_threads = 1
 ) {
-    return svs::DynamicIVF::build<Q>(
-        parameters,
-        std::move(data_loader),
+    auto ids = std::span<const size_t>(py_ids.data(), py_ids.size());
+    return assembly_dispatcher().invoke(
+        std::move(clustering),
+        std::move(data_kind),
         ids,
         distance_type,
-        num_index_threads,
-        intra_query_threads,
-        num_clustering_threads
+        num_threads,
+        intra_query_threads
     );
 }
 
-using DynamicIVFBuildFromFileDispatcher = svs::lib::Dispatcher<
+using AssemblyFromFileDispatcher = svs::lib::Dispatcher<
     svs::DynamicIVF,
-    const svs::index::ivf::IVFBuildParameters&,
-    UnspecializedVectorDataLoader,
+    const std::filesystem::path&,
+    IVFAssembleTypes,
     std::span<const size_t>,
     svs::DistanceType,
     size_t,
-    size_t,
     size_t>;
 
-DynamicIVFBuildFromFileDispatcher dynamic_ivf_build_from_file_dispatcher() {
-    auto dispatcher = DynamicIVFBuildFromFileDispatcher{};
-    // Register uncompressed specializations (Dynamic dimensionality only)
-    for_standard_specializations([&]<typename Q, typename T, typename D, size_t N>() {
-        auto method = &dynamic_ivf_build_uncompressed<Q, T, D, N>;
-        dispatcher.register_target(svs::lib::dispatcher_build_docs, method);
-    });
+AssemblyFromFileDispatcher assembly_from_file_dispatcher() {
+    auto dispatcher = AssemblyFromFileDispatcher{};
+
+    // Register available backend methods.
+    register_ivf_assembly_from_file(dispatcher);
     return dispatcher;
 }
 
-svs::DynamicIVF dynamic_ivf_build_from_file(
-    const svs::index::ivf::IVFBuildParameters& parameters,
-    UnspecializedVectorDataLoader data_loader,
+// Assemble from file
+svs::DynamicIVF assemble_from_file(
+    const std::string& cluster_path,
+    IVFAssembleTypes data_kind,
     const py_contiguous_array_t<size_t>& py_ids,
     svs::DistanceType distance_type,
-    size_t num_index_threads,
-    size_t intra_query_threads,
-    size_t num_clustering_threads
+    svs::DataType SVS_UNUSED(query_type),
+    bool SVS_UNUSED(enforce_dims),
+    size_t num_threads,
+    size_t intra_query_threads = 1
 ) {
     auto ids = std::span<const size_t>(py_ids.data(), py_ids.size());
-    return dynamic_ivf_build_from_file_dispatcher().invoke(
-        parameters,
-        std::move(data_loader),
-        ids,
-        distance_type,
-        num_index_threads,
-        intra_query_threads,
-        num_clustering_threads
+    return assembly_from_file_dispatcher().invoke(
+        cluster_path, std::move(data_kind), ids, distance_type, num_threads, intra_query_threads
     );
 }
 
-constexpr std::string_view DYNAMIC_IVF_BUILD_FROM_FILE_DOCSTRING_PROTO = R"(
-Construct a DynamicIVF index using a data loader, returning the index.
+constexpr std::string_view ASSEMBLE_DOCSTRING_PROTO = R"(
+Assemble a searchable IVF index from provided clustering and data
 
 Args:
-    parameters: Build parameters controlling IVF construction (clustering and search parameters).
-    data_loader: Data loader (e.g., a VectorDataLoader instance).
-    ids: Vector of ids to assign to each row in the dataset; must match dataset length and contain unique values.
-    distance_type: The similarity function to use for this index.
-    num_index_threads: Number of threads to use for index construction and search. Default: 1.
-    intra_query_threads: Number of threads to use for intra-query parallelism. Default: 1.
-    num_clustering_threads: Number of threads to use for clustering. Default: 0 (use num_index_threads).
+    clustering_path/clustering: Path to the directory where the clustering was generated.
+        OR directly provide the loaded Clustering.
+    data_loader: The loader for the dataset. See comment below for accepted types.
+    ids: External IDs for the vectors. Must match dataset length and contain unique values.
+    distance: The distance function to use.
+    query_type: The data type of the queries.
+    enforce_dims: Require that the compiled dimensionality of the returned index matches
+        the dimensionality provided in the ``data_loader`` argument. If a match is not
+        found, an exception is thrown.
+
+        This is meant to ensure that specialized dimensionality is provided without falling
+        back to generic implementations. Leaving the ``dims`` out when constructing the
+        ``data_loader`` will with `enable_dims = True` will always attempt to use a generic
+        implementation.
+    num_threads: The number of threads to use for queries (can't be changed after loading).
+    intra_query_threads: (default: 1) these many threads work on a single query.
+        Total number of threads required = ``query_batch_size`` * ``intra_query_threads``.
+        Where ``query_batch_size`` is the number of queries processed in parallel.
+        Use this parameter only when the ``query_batch_size`` is smaller and ensure your
+        system has sufficient threads available. Set ``num_threads`` = ``query_batch_size``
+
+The top level type is an abstract type backed by various specialized backends that will
+be instantiated based on their applicability to the particular problem instance.
+
+The arguments upon which specialization is conducted are:
+
+* `data_loader`: Both kind (type of loader) and inner aspects of the loader like data type,
+  quantization type, and number of dimensions.
+* `distance`: The distance measure being used.
 
 Specializations compiled into the binary are listed below.
 
-{}  # (Method listing auto-generated)
+{}
 )";
 
+/////
+///// Add points
+/////
+
 template <typename ElementType>
 void add_points(
     svs::DynamicIVF& index,
@@ -279,137 +335,119 @@ void save_index(
     index.save(config_path, data_dir);
 }
 
-/////
-///// Assembly
-/////
-
-template <typename Q, typename T, typename Dist, size_t N>
-svs::DynamicIVF assemble_uncompressed(
-    svs::VectorDataLoader<float, N, RebindAllocator<float>> centroids_loader,
-    svs::VectorDataLoader<T, N, RebindAllocator<T>> datafile,
-    std::span<const size_t> ids,
-    Dist distance,
-    size_t num_threads
-) {
-    using DataAlloc = RebindAllocator<T>;
-
-    // Load centroids as SimpleData - they are immutable in IVF
-    auto centroids = svs::data::SimpleData<float, N>::load(centroids_loader.path_);
-
-    // Load data as BlockedData - it will grow/shrink with insertions/deletions
-    auto data = svs::data::BlockedData<T, N, DataAlloc>::load(
-        datafile.path_, as_blocked(DataAlloc(datafile.allocator_))
-    );
-
-    return svs::DynamicIVF::assemble<Q>(
-        std::move(centroids), std::move(data), ids, distance, num_threads
-    );
-}
-
-template <typename Dispatcher> void register_assembly(Dispatcher& dispatcher) {
-    for_standard_specializations([&]<typename Q, typename T, typename D, size_t N>() {
-        dispatcher.register_target(&assemble_uncompressed<Q, T, D, N>);
-    });
-}
-
-using DynamicIVFAssembleTypes = std::variant<UnspecializedVectorDataLoader>;
-
-svs::DynamicIVF assemble(
-    DynamicIVFAssembleTypes centroids_loader,
-    DynamicIVFAssembleTypes data_loader,
-    const py_contiguous_array_t<size_t>& py_ids,
-    svs::DistanceType distance_type,
-    svs::DataType SVS_UNUSED(query_type),
-    size_t num_threads
-) {
-    auto dispatcher = svs::lib::Dispatcher<
-        svs::DynamicIVF,
-        DynamicIVFAssembleTypes,
-        DynamicIVFAssembleTypes,
-        std::span<const size_t>,
-        svs::DistanceType,
-        size_t>();
-
-    register_assembly(dispatcher);
-    auto ids = std::span<const size_t>(py_ids.data(), py_ids.size());
-    return dispatcher.invoke(
-        std::move(centroids_loader), std::move(data_loader), ids, distance_type, num_threads
-    );
-}
-
-} // namespace
-
 void wrap(py::module& m) {
     std::string name = "DynamicIVF";
-    py::class_<svs::DynamicIVF> ivf_index(
+    py::class_<svs::DynamicIVF> dynamic_ivf(
         m, name.c_str(), "Top level class for the dynamic IVF index."
     );
 
-    add_search_specialization<float>(ivf_index);
-    add_threading_interface(ivf_index);
-    add_data_interface(ivf_index);
+    add_search_specialization<float>(dynamic_ivf);
+    add_threading_interface(dynamic_ivf);
+    add_data_interface(dynamic_ivf);
 
     // IVF specific extensions.
-    ivf::add_interface(ivf_index);
+    ivf::add_interface(dynamic_ivf);
 
     // Dynamic interface.
-    ivf_index.def("consolidate", &svs::DynamicIVF::consolidate, CONSOLIDATE_DOCSTRING);
-    ivf_index.def(
+    dynamic_ivf.def("consolidate", &svs::DynamicIVF::consolidate, CONSOLIDATE_DOCSTRING);
+    dynamic_ivf.def(
         "compact",
         &svs::DynamicIVF::compact,
         py::arg("batchsize") = 1'000'000,
         COMPACT_DOCSTRING
     );
 
-    // Reloading/Assembly
-    ivf_index.def(
-        py::init(&assemble),
-        py::arg("centroids_loader"),
-        py::arg("data_loader"),
-        py::arg("ids"),
-        py::arg("distance") = svs::L2,
-        py::arg("query_type") = svs::DataType::float32,
-        py::arg("num_threads") = 1
-    );
-
-    // Index building.
-    add_build_specialization<float>(ivf_index);
-
-    // Build from file / data loader (dynamic docstring)
+    // Assemble interface
     {
-        auto dispatcher = dynamic_ivf_build_from_file_dispatcher();
-        std::string dynamic;
+        auto dispatcher = assembly_dispatcher();
+        // Procedurally generate the dispatch string.
+        auto dynamic = std::string{};
         for (size_t i = 0; i < dispatcher.size(); ++i) {
             fmt::format_to(
                 std::back_inserter(dynamic),
-                R"(Method {}:\n    - data_loader: {}\n    - distance: {}\n)",
+                R"(
+Method {}:
+    - data_loader: {}
+    - distance: {}
+)",
                 i,
-                dispatcher.description(i, 1),
+                dispatcher.description(i, 2),
                 dispatcher.description(i, 3)
             );
         }
-        ivf_index.def_static(
-            "build",
-            &dynamic_ivf_build_from_file,
-            py::arg("parameters"),
+
+        dynamic_ivf.def_static(
+            "assemble_from_clustering",
+            [](Clustering clustering,
+               IVFAssembleTypes data_loader,
+               const py_contiguous_array_t<size_t>& py_ids,
+               svs::DistanceType distance,
+               svs::DataType query_type,
+               bool enforce_dims,
+               size_t num_threads,
+               size_t intra_query_threads) {
+                return assemble_from_clustering(
+                    std::move(clustering),
+                    std::move(data_loader),
+                    py_ids,
+                    distance,
+                    query_type,
+                    enforce_dims,
+                    num_threads,
+                    intra_query_threads
+                );
+            },
+            py::arg("clustering"),
+            py::arg("data_loader"),
+            py::arg("ids"),
+            py::arg("distance") = svs::L2,
+            py::arg("query_type") = svs::DataType::float32,
+            py::arg("enforce_dims") = false,
+            py::arg("num_threads") = 1,
+            py::arg("intra_query_threads") = 1,
+            fmt::format(ASSEMBLE_DOCSTRING_PROTO, dynamic).c_str()
+        );
+        dynamic_ivf.def_static(
+            "assemble_from_file",
+            [](const std::string& clustering_path,
+               IVFAssembleTypes data_loader,
+               const py_contiguous_array_t<size_t>& py_ids,
+               svs::DistanceType distance,
+               svs::DataType query_type,
+               bool enforce_dims,
+               size_t num_threads,
+               size_t intra_query_threads) {
+                return assemble_from_file(
+                    clustering_path,
+                    std::move(data_loader),
+                    py_ids,
+                    distance,
+                    query_type,
+                    enforce_dims,
+                    num_threads,
+                    intra_query_threads
+                );
+            },
+            py::arg("clustering_path"),
             py::arg("data_loader"),
             py::arg("ids"),
-            py::arg("distance_type"),
-            py::arg("num_index_threads") = 1,
+            py::arg("distance") = svs::L2,
+            py::arg("query_type") = svs::DataType::float32,
+            py::arg("enforce_dims") = false,
+            py::arg("num_threads") = 1,
             py::arg("intra_query_threads") = 1,
-            py::arg("num_clustering_threads") = 0,
-            fmt::format(DYNAMIC_IVF_BUILD_FROM_FILE_DOCSTRING_PROTO, dynamic).c_str()
+            fmt::format(ASSEMBLE_DOCSTRING_PROTO, dynamic).c_str()
         );
     }
 
     // Index modification.
-    add_points_specialization<float>(ivf_index);
+    add_points_specialization<float>(dynamic_ivf);
 
     // Note: DynamicIVFIndex doesn't support reconstruct_at, so we don't add reconstruct
     // interface
 
     // Index Deletion.
-    ivf_index.def(
+    dynamic_ivf.def(
         "delete",
         [](svs::DynamicIVF& index, const py_contiguous_array_t<size_t>& ids) {
             return index.delete_points(as_span(ids));
@@ -419,14 +457,14 @@ void wrap(py::module& m) {
     );
 
     // ID inspection
-    ivf_index.def(
+    dynamic_ivf.def(
         "has_id",
         &svs::DynamicIVF::has_id,
         py::arg("id"),
         "Return whether the ID exists in the index."
     );
 
-    ivf_index.def(
+    dynamic_ivf.def(
         "all_ids",
         [](const svs::DynamicIVF& index) {
             const auto& v = index.all_ids();
@@ -439,7 +477,7 @@ void wrap(py::module& m) {
     );
 
     // Distance calculation
-    ivf_index.def(
+    dynamic_ivf.def(
         "get_distance",
         [](const svs::DynamicIVF& index,
            size_t id,
@@ -464,7 +502,7 @@ void wrap(py::module& m) {
     );
 
     // Saving
-    ivf_index.def(
+    dynamic_ivf.def(
         "save",
         &save_index,
         py::arg("config_directory"),
diff --git a/bindings/python/src/ivf.cpp b/bindings/python/src/ivf.cpp
index 55e3519c0..7d231c998 100644
--- a/bindings/python/src/ivf.cpp
+++ b/bindings/python/src/ivf.cpp
@@ -53,16 +53,6 @@ namespace py = pybind11;
 using namespace svs::python::ivf_specializations;
 
 namespace svs::python::ivf {
-// The build process in IVF uses Kmeans to get centroids and assignments of data.
-// This sparse clustering can be saved with centroids stored as float datatype.
-// While assembling, the sparse clustering is used to create DenseClusters and
-// centroids datatype can be changed as per the search specializations.
-// Support both BFloat16 and Float16 centroids to match data types and leverage AMX.
-using ClusteringBF16 =
-    svs::index::ivf::Clustering<svs::data::SimpleData<svs::BFloat16>, uint32_t>;
-using ClusteringF16 =
-    svs::index::ivf::Clustering<svs::data::SimpleData<svs::Float16>, uint32_t>;
-using Clustering = std::variant<ClusteringBF16, ClusteringF16>;
 
 namespace detail {
 
diff --git a/examples/python/example_ivf_dynamic.py b/examples/python/example_ivf_dynamic.py
index 264585969..e861d9854 100644
--- a/examples/python/example_ivf_dynamic.py
+++ b/examples/python/example_ivf_dynamic.py
@@ -65,34 +65,59 @@ def main():
     print(f"   ✓ Configured {build_parameters.num_centroids} centroids")
     # [build-parameters]
     
-    # [build-index]
-    # Build the dynamic IVF index with initial vectors
-    print("\n3. Building dynamic IVF index...")
-    n = 900  # Use 900 vectors for initial index
+    # [build-clustering-and-assemble]
+    # Build clustering and then assemble the dynamic IVF index
+    print("\n3. Building clustering and assembling dynamic IVF index...")
     
-    # Load the data and create IDs
+    # Load all data
     data = svs.read_vecs(os.path.join(test_data_dir, "data.fvecs"))
-    ids = np.arange(data.shape[0]).astype('uint64')
-    
-    # Build the index
-    index = svs.DynamicIVF.build(
-        parameters = build_parameters,
-        data = data[:n],
-        ids = ids[:n],
-        distance_type = svs.DistanceType.L2,
-        num_index_threads = 4,
+    n_total = data.shape[0]  # Total vectors (1000)
+    ids_all = np.arange(n_total).astype('uint64')
+    
+    # Build the clustering using all data
+    data_loader = svs.VectorDataLoader(
+        os.path.join(test_data_dir, "data.fvecs"),
+        svs.DataType.float32,
+        dims = 128
     )
-    print(f"   ✓ Index built with {index.size} vectors")
+    clustering = svs.Clustering.build(
+        build_parameters = build_parameters,
+        data_loader = data_loader,
+        distance = svs.DistanceType.L2,
+        num_threads = 4,
+    )
+    print(f"   ✓ Clustering built with {build_parameters.num_centroids} centroids")
+    
+    # Assemble the dynamic IVF index with all vectors
+    print("   Assembling dynamic IVF index from clustering...")
+    index = svs.DynamicIVF.assemble_from_clustering(
+        clustering = clustering,
+        data_loader = data_loader,
+        ids = ids_all,            # Index all vectors
+        distance = svs.DistanceType.L2,
+        num_threads = 4,
+        intra_query_threads = 1,
+    )
+    print(f"   ✓ Index assembled with {index.size} vectors")
     print(f"   ✓ Index dimensions: {index.dimensions}")
-    # [build-index]
+    # [build-clustering-and-assemble]
+    
+    # [demonstrate-dynamic-operations]
+    # Demonstrate add and delete operations (even though we already have all vectors)
+    print("\n4. Demonstrating dynamic operations...")
+    print(f"   Initial index size: {index.size}")
+    
+    # Delete some vectors
+    print("   Deleting first 100 vectors...")
+    ids_to_delete = np.arange(100).astype('uint64')
+    index.delete(ids_to_delete)
+    print(f"   After deletion: {index.size} vectors")
     
-    # [add-vectors]
-    # Add new vectors to the index
-    print("\n4. Adding 100 new vectors to the index...")
-    initial_size = index.size
-    index.add(data[n:n+100], ids[n:n+100])
-    print(f"   ✓ Index size: {initial_size} → {index.size}")
-    # [add-vectors]
+    # Add them back
+    print("   Adding 100 vectors back...")
+    index.add(data[:100], ids_to_delete)
+    print(f"   After addition: {index.size} vectors")
+    # [demonstrate-dynamic-operations]
     
     # [search-before-delete]
     # Search before deletion
@@ -130,7 +155,7 @@ def main():
     # [remove-vectors]
     # Remove vectors from the index
     print("\n7. Removing the first 50 vectors...")
-    ids_to_delete = ids[:50]
+    ids_to_delete = ids_all[:50]
     num_deleted = index.delete(ids_to_delete)
     print(f"   ✓ Deleted {num_deleted} vectors")
     print(f"   ✓ Index size after deletion: {index.size}")
diff --git a/include/svs/index/ivf/clustering.h b/include/svs/index/ivf/clustering.h
index e62d4c9ab..b3d29be3d 100644
--- a/include/svs/index/ivf/clustering.h
+++ b/include/svs/index/ivf/clustering.h
@@ -254,6 +254,12 @@ template <data::ImmutableMemoryDataset Data, std::integral I> class Clustering {
 
 template <typename Data, std::integral I> struct DenseCluster {
   public:
+    using data_type = Data;
+    using index_type = I;
+
+    // Default constructor for in-place initialization
+    DenseCluster() = default;
+
     DenseCluster(Data data, std::vector<I> ids)
         : data_{std::move(data)}
         , ids_{std::move(ids)} {
@@ -264,6 +270,12 @@ template <typename Data, std::integral I> struct DenseCluster {
 
     size_t size() const { return data_.size(); }
 
+    // Support for dynamic operations - SimpleData already has resize()
+    void resize(size_t new_size) {
+        data_.resize(new_size);
+        ids_.resize(new_size);
+    }
+
     template <typename Callback>
     void on_leaves(Callback&& f, size_t prefetch_offset) const {
         size_t p = 0;
@@ -287,6 +299,7 @@ template <typename Data, std::integral I> struct DenseCluster {
     auto get_secondary(size_t id) const { return data_.get_secondary(id); }
     auto get_global_id(size_t local_id) const { return ids_[local_id]; }
     const Data& view_cluster() const { return data_; }
+    Data& view_cluster() { return data_; }
 
   public:
     Data data_;
@@ -303,7 +316,7 @@ class DenseClusteredDataset {
     using index_type = I;
     using data_type = Data;
 
-    // Constructor
+    // Constructor from clustering (for building from existing data)
     template <typename Original, threads::ThreadPool Pool, typename Alloc>
     DenseClusteredDataset(
         const Clustering<Centroids, I>& clustering,
@@ -329,6 +342,23 @@ class DenseClusteredDataset {
         );
     }
 
+    // Constructor for empty clusters (for assembly/dynamic operations)
+    template <typename Alloc>
+    DenseClusteredDataset(
+        size_t num_clusters,
+        size_t dimensions,
+        const Alloc& allocator
+    )
+        : clusters_{} {
+        clusters_.reserve(num_clusters);
+        for (size_t i = 0; i < num_clusters; ++i) {
+            clusters_.emplace_back(
+                Data(0, dimensions, allocator),
+                std::vector<I>()
+            );
+        }
+    }
+
     template <typename Callback> void on_leaves(Callback&& f, size_t cluster) const {
         clusters_.at(cluster).on_leaves(SVS_FWD(f), prefetch_offset_);
     }
@@ -336,11 +366,20 @@ class DenseClusteredDataset {
     size_t get_prefetch_offset() const { return prefetch_offset_; }
     void set_prefetch_offset(size_t offset) { prefetch_offset_ = offset; }
 
-    // Cluster access
+    // Cluster access (const)
     const DenseCluster<Data, I>& operator[](size_t cluster) const {
         return clusters_[cluster];
     }
 
+    // Cluster access (mutable) - for dynamic IVF operations
+    DenseCluster<Data, I>& operator[](size_t cluster) {
+        return clusters_[cluster];
+    }
+
+    // Number of clusters
+    size_t size() const { return clusters_.size(); }
+
+    // Datum access (const)
     auto get_datum(size_t cluster, size_t id) const {
         return clusters_.at(cluster).get_datum(id);
     }
@@ -350,10 +389,17 @@ class DenseClusteredDataset {
     auto get_global_id(size_t cluster, size_t id) const {
         return clusters_.at(cluster).get_global_id(id);
     }
+
+    // View cluster data (const)
     const Data& view_cluster(size_t cluster) const {
         return clusters_.at(cluster).view_cluster();
     }
 
+    // View cluster data (mutable) - for dynamic IVF operations
+    Data& view_cluster(size_t cluster) {
+        return clusters_[cluster].view_cluster();
+    }
+
   private:
     std::vector<DenseCluster<Data, I>> clusters_;
     size_t prefetch_offset_ = 8;
diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index 2f5a8daaf..1e85d078b 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -16,7 +16,8 @@
 
 #pragma once
 
-// Include the IVF index
+// Include the IVF index and clustering
+#include "svs/index/ivf/clustering.h"
 #include "svs/index/ivf/index.h"
 
 // svs
@@ -42,55 +43,11 @@ namespace svs::index::ivf {
 ///
 enum class IVFSlotMetadata : uint8_t { Empty = 0x00, Valid = 0x01 };
 
-///
-/// @brief Dynamic cluster implementation using blocked data for resizeability
-///
-/// Similar to DenseCluster but uses BlockedData to support dynamic operations
-///
-template <typename Data, std::integral I> struct DynamicDenseCluster {
-    using data_type = Data;
-    using index_type = I;
-
-    template <typename Callback>
-    void on_leaves(Callback&& f, size_t prefetch_offset) const {
-        size_t p = prefetch_offset;
-        for (size_t i = 0; i < data_.size(); ++i) {
-            if (p < data_.size()) {
-                data_.prefetch(p);
-                ++p;
-            }
-            f(accessor(data_, i), ids_[i], i);
-        }
-    }
-
-    auto get_datum(size_t id) const { return data_.get_datum(id); }
-    auto get_secondary(size_t id) const { return data_.get_secondary(id); }
-    auto get_global_id(size_t local_id) const { return ids_[local_id]; }
-    const Data& view_cluster() const { return data_; }
-    Data& view_cluster() { return data_; }
-
-    // Allow resizing for dynamic operations
-    void resize(size_t new_size) {
-        data_.resize(new_size);
-        ids_.resize(new_size);
-    }
-
-    size_t size() const { return data_.size(); }
-    size_t capacity() const { return data_.capacity(); }
-
-  public:
-    Data data_;
-    std::vector<I> ids_;
-};
-
 ///
 /// @brief Dynamic IVF Index with insertion and deletion support
 ///
-/// Uses the same cluster framework as static IVF (DenseClusteredDataset pattern)
-/// but with BlockedData allocators for resizeability.
-///
 /// @tparam Centroids The type of centroid storage
-/// @tparam Cluster Type representing cluster storage (DynamicDenseCluster with BlockedData)
+/// @tparam Cluster Type representing cluster storage (DenseCluster with BlockedData)
 /// @tparam Dist The distance functor used to compare queries with the elements
 /// @tparam ThreadPoolProto Thread pool prototype type
 ///
@@ -124,7 +81,7 @@ class DynamicIVFIndex {
   private:
     // Core IVF components (same structure as static IVF)
     centroids_type centroids_;
-    std::vector<cluster_type> clusters_; // Each cluster contains data_ and ids_
+    Cluster clusters_; // Cluster container
 
     // Metadata tracking for dynamic operations
     std::vector<IVFSlotMetadata> status_; // Status of each global slot
@@ -151,10 +108,10 @@ class DynamicIVFIndex {
     svs::logging::logger_ptr logger_;
 
   public:
-    /// @brief Construct a Dynamic IVF Index from clusters
+    /// @brief Construct a new Dynamic IVF Index
     ///
     /// @param centroids Centroid collection for space partitioning
-    /// @param clusters Vector of cluster data structures (each with data_ and ids_)
+    /// @param clusters Cluster container
     /// @param external_ids External IDs for all vectors
     /// @param distance_function Distance metric for similarity computation
     /// @param threadpool_proto Primary thread pool prototype
@@ -163,7 +120,7 @@ class DynamicIVFIndex {
     template <typename ExternalIds, typename TP>
     DynamicIVFIndex(
         centroids_type centroids,
-        std::vector<cluster_type> clusters,
+        Cluster clusters,
         const ExternalIds& external_ids,
         Dist distance_function,
         TP threadpool_proto,
@@ -180,7 +137,8 @@ class DynamicIVFIndex {
         , logger_{std::move(logger)} {
         // Initialize metadata structures
         size_t total_size = 0;
-        for (const auto& cluster : clusters_) {
+        for (size_t cluster_idx = 0; cluster_idx < clusters_.size(); ++cluster_idx) {
+            const auto& cluster = clusters_[cluster_idx];
             for (size_t pos = 0; pos < cluster.ids_.size(); ++pos) {
                 total_size =
                     std::max(total_size, static_cast<size_t>(cluster.ids_[pos]) + 1);
@@ -218,7 +176,7 @@ class DynamicIVFIndex {
     template <typename TP>
     DynamicIVFIndex(
         centroids_type centroids,
-        std::vector<cluster_type> clusters,
+        Cluster clusters,
         IDTranslator translator,
         Dist distance_function,
         TP threadpool_proto,
@@ -960,10 +918,9 @@ auto build_dynamic_ivf(
 ) {
     using I = uint32_t;
     using ElementType = typename SourceData::element_type;
-    // Use default lib::Allocator instead of HugepageAllocator to avoid memory issues
+    // Use BlockedData with default lib::Allocator for dynamic operations
     using BlockedDataType =
         data::SimpleData<ElementType, Dynamic, data::Blocked<lib::Allocator<ElementType>>>;
-    using Cluster = DynamicDenseCluster<BlockedDataType, I>;
 
     auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
 
@@ -976,33 +933,15 @@ auto build_dynamic_ivf(
         blocking_params, lib::Allocator<ElementType>()
     );
 
-    // Create dynamic clusters from the clustering
-    std::vector<Cluster> clusters;
-    clusters.reserve(clustering.size());
-
-    for (size_t cluster_idx = 0; cluster_idx < clustering.size(); ++cluster_idx) {
-        const auto& cluster_assignments = clustering.cluster(cluster_idx);
-        size_t cluster_size = cluster_assignments.size();
-
-        // Create BlockedData for this cluster with custom block size
-        auto cluster_data =
-            BlockedDataType(cluster_size, source_data.dimensions(), blocked_allocator);
-        std::vector<I> cluster_ids;
-        cluster_ids.reserve(cluster_size);
-
-        for (size_t i = 0; i < cluster_size; ++i) {
-            I data_idx = cluster_assignments[i];
-            cluster_data.set_datum(i, source_data.get_datum(data_idx));
-            cluster_ids.push_back(ids[data_idx]);
-        }
-
-        clusters.emplace_back(std::move(cluster_data), std::move(cluster_ids));
-    }
+    // Use DenseClusteredDataset to create clusters, just like static IVF
+    auto dense_clusters = DenseClusteredDataset<Centroids, I, BlockedDataType>(
+        clustering, source_data, threadpool, blocked_allocator
+    );
 
     // Create the index
-    return DynamicIVFIndex<Centroids, Cluster, Distance, decltype(threadpool)>(
+    return DynamicIVFIndex<Centroids, decltype(dense_clusters), Distance, decltype(threadpool)>(
         std::move(centroids),
-        std::move(clusters),
+        std::move(dense_clusters),
         ids,
         std::move(distance),
         std::move(threadpool),
diff --git a/include/svs/index/ivf/extensions.h b/include/svs/index/ivf/extensions.h
index 995bca32c..79785f2a4 100644
--- a/include/svs/index/ivf/extensions.h
+++ b/include/svs/index/ivf/extensions.h
@@ -180,16 +180,32 @@ struct CreateDenseCluster {
 
 inline constexpr CreateDenseCluster create_dense_cluster{};
 
-template <typename T, size_t Extent, typename Alloc, typename NewAlloc>
+// Specialization for default allocator (backward compatibility)
+// When no specific allocator is provided, use default construction with same extent
+template <typename T, size_t Extent, typename SrcAlloc>
 svs::data::SimpleData<T, Extent> svs_invoke(
     svs::tag_t<create_dense_cluster>,
-    const svs::data::SimpleData<T, Extent, Alloc>& original,
+    const svs::data::SimpleData<T, Extent, SrcAlloc>& original,
     size_t new_size,
-    const NewAlloc& SVS_UNUSED(allocator)
+    const svs::lib::Allocator<std::byte>& SVS_UNUSED(allocator)
 ) {
     return svs::data::SimpleData<T, Extent>(new_size, original.dimensions());
 }
 
+// General implementation for Blocked allocators: Always use Dynamic extent for flexibility
+// This enables dynamic resizing which is essential for dynamic IVF operations
+template <typename T, size_t SrcExtent, typename SrcAlloc, typename BlockedAlloc>
+svs::data::SimpleData<T, svs::Dynamic, svs::data::Blocked<BlockedAlloc>> svs_invoke(
+    svs::tag_t<create_dense_cluster>,
+    const svs::data::SimpleData<T, SrcExtent, SrcAlloc>& original,
+    size_t new_size,
+    const svs::data::Blocked<BlockedAlloc>& allocator
+) {
+    return svs::data::SimpleData<T, svs::Dynamic, svs::data::Blocked<BlockedAlloc>>(
+        new_size, original.dimensions(), allocator
+    );
+}
+
 struct SetDenseCluster {
     template <typename Src, typename Dst, typename Idx>
     void operator()(
diff --git a/include/svs/orchestrators/dynamic_ivf.h b/include/svs/orchestrators/dynamic_ivf.h
index bb445b6c7..ac8a44377 100644
--- a/include/svs/orchestrators/dynamic_ivf.h
+++ b/include/svs/orchestrators/dynamic_ivf.h
@@ -216,80 +216,148 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         return impl_->get_distance(id, query_array);
     }
 
-    // Building
+    ///// Building - Build clustering from data
+    template <typename BuildType, typename DataProto, typename Distance>
+    static auto build_clustering(
+        const index::ivf::IVFBuildParameters& build_parameters,
+        const DataProto& data_proto,
+        const Distance& distance,
+        size_t num_threads
+    ) {
+        if constexpr (std::is_same_v<std::decay_t<Distance>, DistanceType>) {
+            auto dispatcher = DistanceDispatcher(distance);
+            return dispatcher([&](auto distance_function) {
+                return index::ivf::build_clustering<BuildType>(
+                    build_parameters, data_proto, std::move(distance_function), num_threads
+                );
+            });
+        } else {
+            return index::ivf::build_clustering<BuildType>(
+                build_parameters, data_proto, distance, num_threads
+            );
+        }
+    }
+
+    ///// Assembly - Assemble from clustering and data
     template <
         manager::QueryTypeDefinition QueryTypes,
-        typename DataProto,
+        typename Clustering,
+        typename Data,
         typename Distance,
         typename ThreadPoolProto>
-    static DynamicIVF build(
-        const index::ivf::IVFBuildParameters& build_parameters,
-        const DataProto& data_proto,
+    static DynamicIVF assemble_from_clustering(
+        Clustering clustering,
+        Data data,
         std::span<const size_t> ids,
         Distance distance,
         ThreadPoolProto threadpool_proto,
-        size_t intra_query_threads = 1,
-        size_t num_clustering_threads = 0
+        size_t intra_query_threads = 1
     ) {
-        // Handle DistanceType enum by dispatching to concrete distance types
+        auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
+        
         if constexpr (std::is_same_v<std::decay_t<Distance>, DistanceType>) {
             auto dispatcher = DistanceDispatcher(distance);
             return dispatcher([&](auto distance_function) {
-                return build<QueryTypes>(
-                    build_parameters,
-                    data_proto,
+                return assemble_from_clustering_impl<QueryTypes>(
+                    std::move(clustering),
+                    data,
                     ids,
-                    distance_function,
-                    std::move(threadpool_proto),
-                    intra_query_threads,
-                    num_clustering_threads
+                    std::move(distance_function),
+                    std::move(threadpool),
+                    intra_query_threads
                 );
             });
         } else {
-            auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
-
-            // Load the data (handles both loaders and views)
-            auto data = svs::detail::dispatch_load(data_proto, threadpool);
-
-            // Build clustering first
-            // Choose build type for clustering to leverage AMX instructions:
-            // - Float32 data -> BFloat16 (AMX supports BFloat16)
-            // - Float16 data -> Float16 (AMX supports Float16)
-            // - BFloat16 data -> BFloat16 (already optimal)
-            using DataElementType = typename decltype(data)::element_type;
-            using BuildType = std::conditional_t<
-                std::is_same_v<DataElementType, float>,
-                BFloat16,
-                DataElementType>;
-            // Note: build_clustering takes threadpool by value, so we need a copy
-            auto clustering = [&]() {
-                size_t clustering_threads = (num_clustering_threads == 0)
-                                                ? threadpool.size()
-                                                : num_clustering_threads;
-                auto threadpool_copy = threads::NativeThreadPool(clustering_threads);
-                return index::ivf::build_clustering<BuildType>(
-                    build_parameters, data, distance, std::move(threadpool_copy), false
-                );
-            }();
-
-            // Now build the dynamic IVF index from the clustering
-            auto impl = index::ivf::build_dynamic_ivf(
-                std::move(clustering.centroids_),
-                clustering,
-                data, // Pass by const reference - build_dynamic_ivf will create BlockedData
+            return assemble_from_clustering_impl<QueryTypes>(
+                std::move(clustering),
+                data,
                 ids,
                 distance,
                 std::move(threadpool),
                 intra_query_threads
             );
-
-            return DynamicIVF(
-                AssembleTag(), manager::as_typelist<QueryTypes>{}, std::move(impl)
-            );
         }
     }
 
-    // Assembly
+private:
+    template <
+        manager::QueryTypeDefinition QueryTypes,
+        typename Clustering,
+        typename Data,
+        typename Distance,
+        typename ThreadPool>
+    static DynamicIVF assemble_from_clustering_impl(
+        Clustering clustering,
+        Data data,
+        std::span<const size_t> ids,
+        Distance distance,
+        ThreadPool threadpool,
+        size_t intra_query_threads
+    ) {
+        using I = uint32_t;
+        // Centroids type is extracted from the clustering's centroids_ member
+        using Centroids = std::remove_reference_t<decltype(clustering.centroids())>;
+
+        // Load the data to get the actual data type
+        auto loaded_data = svs::detail::dispatch_load(data, threadpool);
+        using data_type = typename decltype(loaded_data)::lib_alloc_data_type;
+
+        // Get centroids from clustering
+        auto centroids = clustering.centroids();
+
+        // Create DenseClusteredDataset from clustering and loaded data
+        auto dense_clusters = index::ivf::DenseClusteredDataset<Centroids, I, data_type>(
+            clustering, loaded_data, threadpool, lib::Allocator<std::byte>()
+        );
+
+        // Create the index
+        auto impl = index::ivf::DynamicIVFIndex<Centroids, decltype(dense_clusters), Distance, decltype(threadpool)>(
+            std::move(centroids),
+            std::move(dense_clusters),
+            ids,
+            std::move(distance),
+            std::move(threadpool),
+            intra_query_threads
+        );
+
+        return DynamicIVF(
+            AssembleTag(), manager::as_typelist<QueryTypes>{}, std::move(impl)
+        );
+    }
+
+public:
+
+    ///// Assembly - Assemble from file (load clustering from disk)
+    template <
+        manager::QueryTypeDefinition QueryTypes,
+        typename BuildType,
+        typename Data,
+        typename Distance,
+        typename ThreadPoolProto>
+    static DynamicIVF assemble_from_file(
+        const std::filesystem::path& cluster_path,
+        Data data,
+        std::span<const size_t> ids,
+        Distance distance,
+        ThreadPoolProto threadpool_proto,
+        size_t intra_query_threads = 1
+    ) {
+        using centroids_type = data::SimpleData<BuildType>;
+        auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
+        auto clustering = lib::load_from_disk<index::ivf::Clustering<centroids_type, uint32_t>>(
+            cluster_path, threadpool
+        );
+        return assemble_from_clustering<QueryTypes>(
+            std::move(clustering),
+            data,
+            ids,
+            distance,
+            std::move(threadpool),
+            intra_query_threads
+        );
+    }
+
+    // Legacy assembly method for backward compatibility (used by Python bindings)
     template <
         manager::QueryTypeDefinition QueryTypes,
         typename Centroids,
@@ -304,23 +372,17 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         ThreadPoolProto threadpool_proto
     ) {
         using I = uint32_t;
-        using Cluster = index::ivf::DynamicDenseCluster<Data, I>;
+        using Clusters = index::ivf::DenseClusteredDataset<Centroids, I, Data>;
 
         auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
 
-        // For assembly, create empty clusters - user will add points later
-        std::vector<Cluster> clusters;
-        clusters.reserve(centroids.size());
-
-        for (size_t i = 0; i < centroids.size(); ++i) {
-            auto cluster_data = Data(0, data.dimensions());
-            std::vector<I> cluster_ids;
-            clusters.emplace_back(std::move(cluster_data), std::move(cluster_ids));
-        }
+        // Create empty DenseClusteredDataset for assembly
+        // Use default allocator - data already has the right allocator built in
+        auto clusters = Clusters(centroids.size(), data.dimensions(), data.get_allocator());
 
         // Create the index with empty clusters
         auto impl =
-            index::ivf::DynamicIVFIndex<Centroids, Cluster, Distance, decltype(threadpool)>(
+            index::ivf::DynamicIVFIndex<Centroids, Clusters, Distance, decltype(threadpool)>(
                 std::move(centroids),
                 std::move(clusters),
                 ids,
diff --git a/tests/svs/index/ivf/dynamic_ivf.cpp b/tests/svs/index/ivf/dynamic_ivf.cpp
index 0dece7a26..93d5d4902 100644
--- a/tests/svs/index/ivf/dynamic_ivf.cpp
+++ b/tests/svs/index/ivf/dynamic_ivf.cpp
@@ -217,40 +217,25 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index", "[dynamic_ivf]") {
         /* train_only */ false
     );
 
-    // Create dynamic clusters from the clustering result
-    using ClusterType =
-        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
-
-    std::vector<ClusterType> clusters;
-    for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
-        const auto& cluster_indices = clustering.cluster(c);
-        size_t cluster_size = cluster_indices.size();
-
-        ClusterType cluster;
-        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_size, N);
-        cluster.ids_.resize(cluster_size);
-
-        for (size_t i = 0; i < cluster_size; ++i) {
-            Idx global_id = cluster_indices[i];
-            cluster.data_.set_datum(i, initial_data.get_datum(global_id));
-            cluster.ids_[i] = global_id;
-        }
-
-        clusters.push_back(std::move(cluster));
-    }
+    // Create dynamic clusters using DenseClusteredDataset
+    auto centroids = clustering.centroids();
+    using DataType = svs::data::SimpleData<Eltype, N>;
+    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
+        decltype(centroids),
+        Idx,
+        DataType>(clustering, initial_data, threadpool, svs::lib::Allocator<std::byte>());
 
     // Create the dynamic IVF index
-    auto centroids = clustering.centroids();
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
     using IndexType = svs::index::ivf::DynamicIVFIndex<
         decltype(centroids),
-        ClusterType,
+        decltype(dense_clusters),
         Distance,
         decltype(threadpool_for_index)>;
 
     auto index = IndexType(
         std::move(centroids),
-        std::move(clusters),
+        std::move(dense_clusters),
         initial_indices,
         Distance(),
         std::move(threadpool_for_index),
@@ -263,6 +248,118 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index", "[dynamic_ivf]") {
     test_loop(index, reference, queries, div(reference.size(), modify_fraction), 2, 6);
 }
 
+CATCH_TEST_CASE("Testing Dynamic IVF Index with BlockedData", "[dynamic_ivf]") {
+    // This test verifies that BlockedData allocator works correctly for dynamic operations
+    const size_t num_threads = 4;
+
+    // Load data
+    auto data = svs::data::SimpleData<Eltype, N>::load(test_dataset::data_svs_file());
+    auto queries = test_dataset::queries();
+
+    // Build clustering
+    auto build_params = svs::index::ivf::IVFBuildParameters(10, 10, false);
+    auto threadpool = svs::threads::SequentialThreadPool();
+    auto clustering = svs::index::ivf::build_clustering<Eltype>(
+        build_params,
+        data,
+        Distance(),
+        threadpool,
+        false
+    );
+
+    // Use build_dynamic_ivf which automatically creates BlockedData clusters
+    std::vector<size_t> ids(data.size());
+    std::iota(ids.begin(), ids.end(), 0);
+    
+    auto index = svs::index::ivf::build_dynamic_ivf(
+        std::move(clustering.centroids_),
+        clustering,
+        data,
+        ids,
+        Distance(),
+        svs::threads::as_threadpool(num_threads),
+        1
+    );
+
+    // Test 1: Initial search works
+    auto params = svs::index::ivf::IVFSearchParameters(10, NUM_NEIGHBORS);
+    auto results = svs::QueryResult<size_t>(queries.size(), NUM_NEIGHBORS);
+    
+    index.search(
+        results.view(),
+        svs::data::ConstSimpleDataView<float>{
+            queries.data(), queries.size(), queries.dimensions()},
+        params
+    );
+    
+    // Verify we got results
+    size_t valid_results = 0;
+    for (size_t i = 0; i < results.n_queries(); ++i) {
+        if (results.index(i, 0) != std::numeric_limits<size_t>::max()) {
+            valid_results++;
+        }
+    }
+    CATCH_REQUIRE(valid_results > 0);
+    
+    // Test 2: Add points (BlockedData's resize capability)
+    constexpr size_t num_add = 100;
+    std::vector<size_t> new_ids;
+    auto new_data = svs::data::SimpleData<Eltype, N>(num_add, N);
+    for (size_t i = 0; i < num_add; ++i) {
+        new_ids.push_back(data.size() + i);
+        new_data.set_datum(i, data.get_datum(i % data.size()));
+    }
+    
+    size_t size_before = index.size();
+    index.add_points(new_data, new_ids, false);
+    CATCH_REQUIRE(index.size() == size_before + num_add);
+    
+    // Test 3: Search still works after adding
+    index.search(
+        results.view(),
+        svs::data::ConstSimpleDataView<float>{
+            queries.data(), queries.size(), queries.dimensions()},
+        params
+    );
+    
+    valid_results = 0;
+    for (size_t i = 0; i < results.n_queries(); ++i) {
+        if (results.index(i, 0) != std::numeric_limits<size_t>::max()) {
+            valid_results++;
+        }
+    }
+    CATCH_REQUIRE(valid_results > 0);
+    
+    // Test 4: Delete some points
+    std::vector<size_t> to_delete;
+    for (size_t i = 0; i < 50; ++i) {
+        to_delete.push_back(i);
+    }
+    size_t deleted = index.delete_entries(to_delete);
+    CATCH_REQUIRE(deleted == to_delete.size());
+    CATCH_REQUIRE(index.size() == size_before + num_add - deleted);
+    
+    // Test 5: Compact works with BlockedData
+    index.compact(1000);
+    CATCH_REQUIRE(index.size() == size_before + num_add - deleted);
+    
+    // Test 6: Search after compaction
+    index.search(
+        results.view(),
+        svs::data::ConstSimpleDataView<float>{
+            queries.data(), queries.size(), queries.dimensions()},
+        params
+    );
+    
+    valid_results = 0;
+    for (size_t i = 0; i < results.n_queries(); ++i) {
+        if (results.index(i, 0) != std::numeric_limits<size_t>::max()) {
+            valid_results++;
+        }
+    }
+    CATCH_REQUIRE(valid_results > 0);
+}
+
 CATCH_TEST_CASE("Dynamic IVF - Edge Cases", "[dynamic_ivf]") {
     const size_t num_threads = 4;
     const size_t num_points = 100;
@@ -296,41 +393,31 @@ CATCH_TEST_CASE("Dynamic IVF - Edge Cases", "[dynamic_ivf]") {
         false
     );
 
-    using ClusterType =
-        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
-
-    std::vector<ClusterType> clusters;
+    // Create dynamic clusters using DenseClusteredDataset
     std::vector<Idx> initial_indices;
-
-    for (size_t c = 0; c < 50; ++c) {
-        const auto& cluster_indices = clustering.cluster(c);
-        size_t cluster_size = cluster_indices.size();
-
-        ClusterType cluster;
-        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_size, N);
-        cluster.ids_.resize(cluster_size);
-
-        for (size_t i = 0; i < cluster_size; ++i) {
-            Idx global_id = cluster_indices[i];
-            cluster.data_.set_datum(i, data.get_datum(global_id));
-            cluster.ids_[i] = global_id;
-            initial_indices.push_back(global_id);
+    for (size_t c = 0; c < clustering.size(); ++c) {
+        for (auto idx : clustering.cluster(c)) {
+            initial_indices.push_back(idx);
         }
-
-        clusters.push_back(std::move(cluster));
     }
 
     auto centroids = clustering.centroids();
+    using DataType = svs::data::SimpleData<Eltype, N>;
+    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
+        decltype(centroids),
+        Idx,
+        DataType>(clustering, data, threadpool, svs::lib::Allocator<std::byte>());
+
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
     using IndexType = svs::index::ivf::DynamicIVFIndex<
         decltype(centroids),
-        ClusterType,
+        decltype(dense_clusters),
         Distance,
         decltype(threadpool_for_index)>;
 
     auto index = IndexType(
         std::move(centroids),
-        std::move(clusters),
+        std::move(dense_clusters),
         initial_indices,
         Distance(),
         std::move(threadpool_for_index),
@@ -400,38 +487,31 @@ CATCH_TEST_CASE("Dynamic IVF - Search Parameters Variations", "[dynamic_ivf]") {
         false
     );
 
-    using ClusterType =
-        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
-
-    std::vector<ClusterType> clusters;
+    // Create dynamic clusters using DenseClusteredDataset
     std::vector<Idx> indices;
-
-    for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
-        const auto& cluster_indices = clustering.cluster(c);
-        ClusterType cluster;
-        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_indices.size(), N);
-        cluster.ids_.resize(cluster_indices.size());
-
-        for (size_t i = 0; i < cluster_indices.size(); ++i) {
-            Idx global_id = cluster_indices[i];
-            cluster.data_.set_datum(i, data.get_datum(global_id));
-            cluster.ids_[i] = global_id;
-            indices.push_back(global_id);
+    for (size_t c = 0; c < clustering.size(); ++c) {
+        for (auto idx : clustering.cluster(c)) {
+            indices.push_back(idx);
         }
-        clusters.push_back(std::move(cluster));
     }
 
     auto centroids = clustering.centroids();
+    using DataType = svs::data::SimpleData<Eltype, N>;
+    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
+        decltype(centroids),
+        Idx,
+        DataType>(clustering, data, threadpool, svs::lib::Allocator<std::byte>());
+
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
     using IndexType = svs::index::ivf::DynamicIVFIndex<
         decltype(centroids),
-        ClusterType,
+        decltype(dense_clusters),
         Distance,
         decltype(threadpool_for_index)>;
 
     auto index = IndexType(
         std::move(centroids),
-        std::move(clusters),
+        std::move(dense_clusters),
         indices,
         Distance(),
         std::move(threadpool_for_index),
@@ -479,8 +559,6 @@ CATCH_TEST_CASE("Dynamic IVF - Threading Configurations", "[dynamic_ivf]") {
         false
     );
 
-    using ClusterType =
-        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
 
     // Test with different thread configurations
     std::vector<size_t> thread_configs = {1, 2, 4, 8};
@@ -488,35 +566,30 @@ CATCH_TEST_CASE("Dynamic IVF - Threading Configurations", "[dynamic_ivf]") {
 
     for (auto num_threads : thread_configs) {
         for (auto intra_threads : intra_query_configs) {
-            std::vector<ClusterType> clusters;
             std::vector<Idx> indices;
-
-            for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
-                const auto& cluster_indices = clustering.cluster(c);
-                ClusterType cluster;
-                cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_indices.size(), N);
-                cluster.ids_.resize(cluster_indices.size());
-
-                for (size_t i = 0; i < cluster_indices.size(); ++i) {
-                    Idx global_id = cluster_indices[i];
-                    cluster.data_.set_datum(i, data.get_datum(global_id));
-                    cluster.ids_[i] = global_id;
-                    indices.push_back(global_id);
+            for (size_t c = 0; c < clustering.size(); ++c) {
+                for (auto idx : clustering.cluster(c)) {
+                    indices.push_back(idx);
                 }
-                clusters.push_back(std::move(cluster));
             }
 
             auto centroids_copy = clustering.centroids();
+            using DataType = svs::data::SimpleData<Eltype, N>;
+            auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
+                decltype(centroids_copy),
+                Idx,
+                DataType>(clustering, data, threadpool, svs::lib::Allocator<std::byte>());
+
             auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
             using IndexType = svs::index::ivf::DynamicIVFIndex<
                 decltype(centroids_copy),
-                ClusterType,
+                decltype(dense_clusters),
                 Distance,
                 decltype(threadpool_for_index)>;
 
             auto index = IndexType(
                 std::move(centroids_copy),
-                std::move(clusters),
+                std::move(dense_clusters),
                 indices,
                 Distance(),
                 std::move(threadpool_for_index),
@@ -569,38 +642,31 @@ CATCH_TEST_CASE("Dynamic IVF - Add/Delete Stress Test", "[dynamic_ivf]") {
         false
     );
 
-    using ClusterType =
-        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
-
-    std::vector<ClusterType> clusters;
+    // Create dynamic clusters using DenseClusteredDataset
     std::vector<Idx> indices;
-
-    for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
-        const auto& cluster_indices = clustering.cluster(c);
-        ClusterType cluster;
-        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_indices.size(), N);
-        cluster.ids_.resize(cluster_indices.size());
-
-        for (size_t i = 0; i < cluster_indices.size(); ++i) {
-            Idx global_id = cluster_indices[i];
-            cluster.data_.set_datum(i, initial_data.get_datum(global_id));
-            cluster.ids_[i] = global_id;
-            indices.push_back(global_id);
+    for (size_t c = 0; c < clustering.size(); ++c) {
+        for (auto idx : clustering.cluster(c)) {
+            indices.push_back(idx);
         }
-        clusters.push_back(std::move(cluster));
     }
 
     auto centroids = clustering.centroids();
+    using DataType = svs::data::SimpleData<Eltype, N>;
+    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
+        decltype(centroids),
+        Idx,
+        DataType>(clustering, initial_data, threadpool, svs::lib::Allocator<std::byte>());
+
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
     using IndexType = svs::index::ivf::DynamicIVFIndex<
         decltype(centroids),
-        ClusterType,
+        decltype(dense_clusters),
         Distance,
         decltype(threadpool_for_index)>;
 
     auto index = IndexType(
         std::move(centroids),
-        std::move(clusters),
+        std::move(dense_clusters),
         indices,
         Distance(),
         std::move(threadpool_for_index),
@@ -688,38 +754,31 @@ CATCH_TEST_CASE("Dynamic IVF - Single Query Search", "[dynamic_ivf]") {
         false
     );
 
-    using ClusterType =
-        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
-
-    std::vector<ClusterType> clusters;
+    // Create dynamic clusters using DenseClusteredDataset
     std::vector<Idx> indices;
-
-    for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
-        const auto& cluster_indices = clustering.cluster(c);
-        ClusterType cluster;
-        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_indices.size(), N);
-        cluster.ids_.resize(cluster_indices.size());
-
-        for (size_t i = 0; i < cluster_indices.size(); ++i) {
-            Idx global_id = cluster_indices[i];
-            cluster.data_.set_datum(i, data.get_datum(global_id));
-            cluster.ids_[i] = global_id;
-            indices.push_back(global_id);
+    for (size_t c = 0; c < clustering.size(); ++c) {
+        for (auto idx : clustering.cluster(c)) {
+            indices.push_back(idx);
         }
-        clusters.push_back(std::move(cluster));
     }
 
     auto centroids = clustering.centroids();
+    using DataType = svs::data::SimpleData<Eltype, N>;
+    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
+        decltype(centroids),
+        Idx,
+        DataType>(clustering, data, threadpool, svs::lib::Allocator<std::byte>());
+
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
     using IndexType = svs::index::ivf::DynamicIVFIndex<
         decltype(centroids),
-        ClusterType,
+        decltype(dense_clusters),
         Distance,
         decltype(threadpool_for_index)>;
 
     auto index = IndexType(
         std::move(centroids),
-        std::move(clusters),
+        std::move(dense_clusters),
         indices,
         Distance(),
         std::move(threadpool_for_index),
@@ -793,45 +852,47 @@ CATCH_TEST_CASE("Dynamic IVF Get Distance", "[index][ivf][dynamic_ivf]") {
         /* train_only */ false
     );
 
-    // Create dynamic clusters from the clustering result
-    using ClusterType =
-        svs::index::ivf::DynamicDenseCluster<svs::data::SimpleData<Eltype, N>, Idx>;
-
-    std::vector<ClusterType> clusters;
+    // Create dynamic clusters using DenseClusteredDataset
+    // Note: This test uses sequential internal IDs, so we can't use the simple helper
     std::vector<Idx> initial_indices; // External IDs in order
     size_t internal_id = 0;           // Sequential internal IDs
 
-    for (size_t c = 0; c < NUM_CLUSTERS; ++c) {
+    // Build mapping: internal_id -> external_id
+    for (size_t c = 0; c < clustering.size(); ++c) {
         const auto& cluster_indices = clustering.cluster(c);
-        size_t cluster_size = cluster_indices.size();
-
-        ClusterType cluster;
-        cluster.data_ = svs::data::SimpleData<Eltype, N>(cluster_size, N);
-        cluster.ids_.resize(cluster_size);
-
-        for (size_t i = 0; i < cluster_size; ++i) {
+        for (size_t i = 0; i < cluster_indices.size(); ++i) {
             Idx external_id = cluster_indices[i]; // Use clustering index as external ID
-            cluster.data_.set_datum(i, data.get_datum(external_id));
-            cluster.ids_[i] = internal_id;          // Sequential internal ID
             initial_indices.push_back(external_id); // Map internal_id -> external_id
             internal_id++;
         }
+    }
 
-        clusters.push_back(std::move(cluster));
+    auto centroids = clustering.centroids();
+    using DataType = svs::data::SimpleData<Eltype, N>;
+    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
+        decltype(centroids),
+        Idx,
+        DataType>(clustering, data, threadpool, svs::lib::Allocator<std::byte>());
+
+    // Need to update cluster IDs to use sequential internal IDs
+    for (size_t c = 0, global_idx = 0; c < dense_clusters.size(); ++c) {
+        auto& cluster = dense_clusters[c];
+        for (size_t i = 0; i < cluster.ids_.size(); ++i) {
+            cluster.ids_[i] = global_idx++;
+        }
     }
 
     // Create the dynamic IVF index
-    auto centroids = clustering.centroids();
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
     using IndexType = svs::index::ivf::DynamicIVFIndex<
         decltype(centroids),
-        ClusterType,
+        decltype(dense_clusters),
         Distance,
         decltype(threadpool_for_index)>;
 
     auto index = IndexType(
         std::move(centroids),
-        std::move(clusters),
+        std::move(dense_clusters),
         initial_indices,
         Distance(),
         std::move(threadpool_for_index),

From c373c1c8c7cbe3bef405055de47a9266d998dca9 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Fri, 12 Dec 2025 07:15:06 -0800
Subject: [PATCH 16/23] formatting

---
 bindings/python/src/dynamic_ivf.cpp         | 37 +++++----
 include/svs/index/ivf/clustering.h          | 19 +----
 include/svs/index/ivf/common.h              |  4 +-
 include/svs/index/ivf/dynamic_ivf.h         |  6 +-
 include/svs/index/ivf/hierarchical_kmeans.h |  6 +-
 include/svs/index/ivf/kmeans.h              |  4 +-
 include/svs/orchestrators/dynamic_ivf.h     | 24 +++---
 tests/svs/index/ivf/dynamic_ivf.cpp         | 91 ++++++++++-----------
 tests/svs/index/ivf/kmeans.cpp              |  6 +-
 9 files changed, 97 insertions(+), 100 deletions(-)

diff --git a/bindings/python/src/dynamic_ivf.cpp b/bindings/python/src/dynamic_ivf.cpp
index 1b61ea4b0..52950ad38 100644
--- a/bindings/python/src/dynamic_ivf.cpp
+++ b/bindings/python/src/dynamic_ivf.cpp
@@ -84,12 +84,10 @@ svs::DynamicIVF assemble_uncompressed(
 
 template <typename Dispatcher>
 void register_uncompressed_ivf_assemble(Dispatcher& dispatcher) {
-    for_standard_specializations(
-        [&dispatcher]<typename Q, typename T, size_t N>() {
-            auto method = &assemble_uncompressed<Q, T, N>;
-            dispatcher.register_target(svs::lib::dispatcher_build_docs, method);
-        }
-    );
+    for_standard_specializations([&dispatcher]<typename Q, typename T, size_t N>() {
+        auto method = &assemble_uncompressed<Q, T, N>;
+        dispatcher.register_target(svs::lib::dispatcher_build_docs, method);
+    });
 }
 
 template <typename Dispatcher> void register_ivf_assembly(Dispatcher& dispatcher) {
@@ -115,12 +113,10 @@ svs::DynamicIVF assemble_from_file_uncompressed(
 
 template <typename Dispatcher>
 void register_uncompressed_ivf_assemble_from_file(Dispatcher& dispatcher) {
-    for_standard_specializations(
-        [&dispatcher]<typename Q, typename T, size_t N>() {
-            auto method = &assemble_from_file_uncompressed<Q, T, N>;
-            dispatcher.register_target(svs::lib::dispatcher_build_docs, method);
-        }
-    );
+    for_standard_specializations([&dispatcher]<typename Q, typename T, size_t N>() {
+        auto method = &assemble_from_file_uncompressed<Q, T, N>;
+        dispatcher.register_target(svs::lib::dispatcher_build_docs, method);
+    });
 }
 
 template <typename Dispatcher>
@@ -135,8 +131,14 @@ using IVFAssembleTypes =
 ///// Dispatch Invocation
 /////
 
-using AssemblyDispatcher = svs::lib::
-    Dispatcher<svs::DynamicIVF, Clustering, IVFAssembleTypes, std::span<const size_t>, svs::DistanceType, size_t, size_t>;
+using AssemblyDispatcher = svs::lib::Dispatcher<
+    svs::DynamicIVF,
+    Clustering,
+    IVFAssembleTypes,
+    std::span<const size_t>,
+    svs::DistanceType,
+    size_t,
+    size_t>;
 
 AssemblyDispatcher assembly_dispatcher() {
     auto dispatcher = AssemblyDispatcher{};
@@ -198,7 +200,12 @@ svs::DynamicIVF assemble_from_file(
 ) {
     auto ids = std::span<const size_t>(py_ids.data(), py_ids.size());
     return assembly_from_file_dispatcher().invoke(
-        cluster_path, std::move(data_kind), ids, distance_type, num_threads, intra_query_threads
+        cluster_path,
+        std::move(data_kind),
+        ids,
+        distance_type,
+        num_threads,
+        intra_query_threads
     );
 }
 
diff --git a/include/svs/index/ivf/clustering.h b/include/svs/index/ivf/clustering.h
index b3d29be3d..8017191ef 100644
--- a/include/svs/index/ivf/clustering.h
+++ b/include/svs/index/ivf/clustering.h
@@ -344,18 +344,11 @@ class DenseClusteredDataset {
 
     // Constructor for empty clusters (for assembly/dynamic operations)
     template <typename Alloc>
-    DenseClusteredDataset(
-        size_t num_clusters,
-        size_t dimensions,
-        const Alloc& allocator
-    )
+    DenseClusteredDataset(size_t num_clusters, size_t dimensions, const Alloc& allocator)
         : clusters_{} {
         clusters_.reserve(num_clusters);
         for (size_t i = 0; i < num_clusters; ++i) {
-            clusters_.emplace_back(
-                Data(0, dimensions, allocator),
-                std::vector<I>()
-            );
+            clusters_.emplace_back(Data(0, dimensions, allocator), std::vector<I>());
         }
     }
 
@@ -372,9 +365,7 @@ class DenseClusteredDataset {
     }
 
     // Cluster access (mutable) - for dynamic IVF operations
-    DenseCluster<Data, I>& operator[](size_t cluster) {
-        return clusters_[cluster];
-    }
+    DenseCluster<Data, I>& operator[](size_t cluster) { return clusters_[cluster]; }
 
     // Number of clusters
     size_t size() const { return clusters_.size(); }
@@ -396,9 +387,7 @@ class DenseClusteredDataset {
     }
 
     // View cluster data (mutable) - for dynamic IVF operations
-    Data& view_cluster(size_t cluster) {
-        return clusters_[cluster].view_cluster();
-    }
+    Data& view_cluster(size_t cluster) { return clusters_[cluster].view_cluster(); }
 
   private:
     std::vector<DenseCluster<Data, I>> clusters_;
diff --git a/include/svs/index/ivf/common.h b/include/svs/index/ivf/common.h
index 5751bfe34..bcb7062a8 100644
--- a/include/svs/index/ivf/common.h
+++ b/include/svs/index/ivf/common.h
@@ -231,7 +231,9 @@ template <typename T>
 void compute_matmul(
     const T* data, const T* centroids, float* results, size_t m, size_t n, size_t k
 ) {
-    // Validate parameters to avoid Intel MKL errors
+    // Early return for zero dimensions.
+    // Calling Intel MKL functions with zero dimensions may result in undefined behavior
+    // or runtime errors. This check ensures we avoid such cases.
     if (m == 0 || n == 0 || k == 0) {
         return; // Nothing to compute
     }
diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index 1e85d078b..eca80d06f 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -939,7 +939,11 @@ auto build_dynamic_ivf(
     );
 
     // Create the index
-    return DynamicIVFIndex<Centroids, decltype(dense_clusters), Distance, decltype(threadpool)>(
+    return DynamicIVFIndex<
+        Centroids,
+        decltype(dense_clusters),
+        Distance,
+        decltype(threadpool)>(
         std::move(centroids),
         std::move(dense_clusters),
         ids,
diff --git a/include/svs/index/ivf/hierarchical_kmeans.h b/include/svs/index/ivf/hierarchical_kmeans.h
index 585be5a5e..bf3d1e8c5 100644
--- a/include/svs/index/ivf/hierarchical_kmeans.h
+++ b/include/svs/index/ivf/hierarchical_kmeans.h
@@ -91,9 +91,9 @@ auto hierarchical_kmeans_clustering_impl(
     svs::logging::debug(logger, "Level1 clusters: {}\n", num_level1_clusters);
 
     // Step 1: Create training set
-    // Use at least MIN_TRAINING_SAMPLE_MULTIPLIER times the number of clusters,
-    // or training_fraction of data, whichever is larger.
-    // This ensures we have enough training data even for small datasets
+    // Use at least MIN_TRAINING_SAMPLE_MULTIPLIER times the number of centroids,
+    // but no more than the dataset size. This ensures we have enough training data
+    // even for small datasets, without exceeding the available data.
     size_t min_training_data =
         std::min(num_clusters * MIN_TRAINING_SAMPLE_MULTIPLIER, data.size());
     size_t num_training_data = std::max(
diff --git a/include/svs/index/ivf/kmeans.h b/include/svs/index/ivf/kmeans.h
index ae20b061a..98bc94e27 100644
--- a/include/svs/index/ivf/kmeans.h
+++ b/include/svs/index/ivf/kmeans.h
@@ -46,8 +46,8 @@ auto kmeans_clustering_impl(
 
     // Step 1: Create training set
     // Use at least MIN_TRAINING_SAMPLE_MULTIPLIER times the number of centroids,
-    // or training_fraction of data, whichever is larger.
-    // This ensures we have enough training data even for small datasets
+    // but no more than the dataset size. This ensures we have enough training data
+    // even for small datasets, without exceeding the available data.
     size_t min_training_data =
         std::min(num_centroids * MIN_TRAINING_SAMPLE_MULTIPLIER, data.size());
     size_t num_training_data = std::max(
diff --git a/include/svs/orchestrators/dynamic_ivf.h b/include/svs/orchestrators/dynamic_ivf.h
index ac8a44377..9be55eb28 100644
--- a/include/svs/orchestrators/dynamic_ivf.h
+++ b/include/svs/orchestrators/dynamic_ivf.h
@@ -254,7 +254,7 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         size_t intra_query_threads = 1
     ) {
         auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
-        
+
         if constexpr (std::is_same_v<std::decay_t<Distance>, DistanceType>) {
             auto dispatcher = DistanceDispatcher(distance);
             return dispatcher([&](auto distance_function) {
@@ -279,7 +279,7 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         }
     }
 
-private:
+  private:
     template <
         manager::QueryTypeDefinition QueryTypes,
         typename Clustering,
@@ -311,7 +311,11 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         );
 
         // Create the index
-        auto impl = index::ivf::DynamicIVFIndex<Centroids, decltype(dense_clusters), Distance, decltype(threadpool)>(
+        auto impl = index::ivf::DynamicIVFIndex<
+            Centroids,
+            decltype(dense_clusters),
+            Distance,
+            decltype(threadpool)>(
             std::move(centroids),
             std::move(dense_clusters),
             ids,
@@ -325,8 +329,7 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         );
     }
 
-public:
-
+  public:
     ///// Assembly - Assemble from file (load clustering from disk)
     template <
         manager::QueryTypeDefinition QueryTypes,
@@ -344,9 +347,10 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
     ) {
         using centroids_type = data::SimpleData<BuildType>;
         auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
-        auto clustering = lib::load_from_disk<index::ivf::Clustering<centroids_type, uint32_t>>(
-            cluster_path, threadpool
-        );
+        auto clustering =
+            lib::load_from_disk<index::ivf::Clustering<centroids_type, uint32_t>>(
+                cluster_path, threadpool
+            );
         return assemble_from_clustering<QueryTypes>(
             std::move(clustering),
             data,
@@ -381,8 +385,8 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         auto clusters = Clusters(centroids.size(), data.dimensions(), data.get_allocator());
 
         // Create the index with empty clusters
-        auto impl =
-            index::ivf::DynamicIVFIndex<Centroids, Clusters, Distance, decltype(threadpool)>(
+        auto impl = index::ivf::
+            DynamicIVFIndex<Centroids, Clusters, Distance, decltype(threadpool)>(
                 std::move(centroids),
                 std::move(clusters),
                 ids,
diff --git a/tests/svs/index/ivf/dynamic_ivf.cpp b/tests/svs/index/ivf/dynamic_ivf.cpp
index 93d5d4902..c0203e403 100644
--- a/tests/svs/index/ivf/dynamic_ivf.cpp
+++ b/tests/svs/index/ivf/dynamic_ivf.cpp
@@ -220,10 +220,10 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index", "[dynamic_ivf]") {
     // Create dynamic clusters using DenseClusteredDataset
     auto centroids = clustering.centroids();
     using DataType = svs::data::SimpleData<Eltype, N>;
-    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
-        decltype(centroids),
-        Idx,
-        DataType>(clustering, initial_data, threadpool, svs::lib::Allocator<std::byte>());
+    auto dense_clusters =
+        svs::index::ivf::DenseClusteredDataset<decltype(centroids), Idx, DataType>(
+            clustering, initial_data, threadpool, svs::lib::Allocator<std::byte>()
+        );
 
     // Create the dynamic IVF index
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
@@ -260,17 +260,13 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index with BlockedData", "[dynamic_ivf]") {
     auto build_params = svs::index::ivf::IVFBuildParameters(10, 10, false);
     auto threadpool = svs::threads::SequentialThreadPool();
     auto clustering = svs::index::ivf::build_clustering<Eltype>(
-        build_params,
-        data,
-        Distance(),
-        threadpool,
-        false
+        build_params, data, Distance(), threadpool, false
     );
 
     // Use build_dynamic_ivf which automatically creates BlockedData clusters
     std::vector<size_t> ids(data.size());
     std::iota(ids.begin(), ids.end(), 0);
-    
+
     auto index = svs::index::ivf::build_dynamic_ivf(
         std::move(clustering.centroids_),
         clustering,
@@ -284,14 +280,14 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index with BlockedData", "[dynamic_ivf]") {
     // Test 1: Initial search works
     auto params = svs::index::ivf::IVFSearchParameters(10, NUM_NEIGHBORS);
     auto results = svs::QueryResult<size_t>(queries.size(), NUM_NEIGHBORS);
-    
+
     index.search(
         results.view(),
         svs::data::ConstSimpleDataView<float>{
             queries.data(), queries.size(), queries.dimensions()},
         params
     );
-    
+
     // Verify we got results
     size_t valid_results = 0;
     for (size_t i = 0; i < results.n_queries(); ++i) {
@@ -300,7 +296,7 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index with BlockedData", "[dynamic_ivf]") {
         }
     }
     CATCH_REQUIRE(valid_results > 0);
-    
+
     // Test 2: Add points (BlockedData's resize capability)
     constexpr size_t num_add = 100;
     std::vector<size_t> new_ids;
@@ -309,11 +305,11 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index with BlockedData", "[dynamic_ivf]") {
         new_ids.push_back(data.size() + i);
         new_data.set_datum(i, data.get_datum(i % data.size()));
     }
-    
+
     size_t size_before = index.size();
     index.add_points(new_data, new_ids, false);
     CATCH_REQUIRE(index.size() == size_before + num_add);
-    
+
     // Test 3: Search still works after adding
     index.search(
         results.view(),
@@ -321,7 +317,7 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index with BlockedData", "[dynamic_ivf]") {
             queries.data(), queries.size(), queries.dimensions()},
         params
     );
-    
+
     valid_results = 0;
     for (size_t i = 0; i < results.n_queries(); ++i) {
         if (results.index(i, 0) != std::numeric_limits<size_t>::max()) {
@@ -329,7 +325,7 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index with BlockedData", "[dynamic_ivf]") {
         }
     }
     CATCH_REQUIRE(valid_results > 0);
-    
+
     // Test 4: Delete some points
     std::vector<size_t> to_delete;
     for (size_t i = 0; i < 50; ++i) {
@@ -338,11 +334,11 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index with BlockedData", "[dynamic_ivf]") {
     size_t deleted = index.delete_entries(to_delete);
     CATCH_REQUIRE(deleted == to_delete.size());
     CATCH_REQUIRE(index.size() == size_before + num_add - deleted);
-    
+
     // Test 5: Compact works with BlockedData
     index.compact(1000);
     CATCH_REQUIRE(index.size() == size_before + num_add - deleted);
-    
+
     // Test 6: Search after compaction
     index.search(
         results.view(),
@@ -350,7 +346,7 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index with BlockedData", "[dynamic_ivf]") {
             queries.data(), queries.size(), queries.dimensions()},
         params
     );
-    
+
     valid_results = 0;
     for (size_t i = 0; i < results.n_queries(); ++i) {
         if (results.index(i, 0) != std::numeric_limits<size_t>::max()) {
@@ -403,10 +399,10 @@ CATCH_TEST_CASE("Dynamic IVF - Edge Cases", "[dynamic_ivf]") {
 
     auto centroids = clustering.centroids();
     using DataType = svs::data::SimpleData<Eltype, N>;
-    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
-        decltype(centroids),
-        Idx,
-        DataType>(clustering, data, threadpool, svs::lib::Allocator<std::byte>());
+    auto dense_clusters =
+        svs::index::ivf::DenseClusteredDataset<decltype(centroids), Idx, DataType>(
+            clustering, data, threadpool, svs::lib::Allocator<std::byte>()
+        );
 
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
     using IndexType = svs::index::ivf::DynamicIVFIndex<
@@ -497,10 +493,10 @@ CATCH_TEST_CASE("Dynamic IVF - Search Parameters Variations", "[dynamic_ivf]") {
 
     auto centroids = clustering.centroids();
     using DataType = svs::data::SimpleData<Eltype, N>;
-    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
-        decltype(centroids),
-        Idx,
-        DataType>(clustering, data, threadpool, svs::lib::Allocator<std::byte>());
+    auto dense_clusters =
+        svs::index::ivf::DenseClusteredDataset<decltype(centroids), Idx, DataType>(
+            clustering, data, threadpool, svs::lib::Allocator<std::byte>()
+        );
 
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
     using IndexType = svs::index::ivf::DynamicIVFIndex<
@@ -559,7 +555,6 @@ CATCH_TEST_CASE("Dynamic IVF - Threading Configurations", "[dynamic_ivf]") {
         false
     );
 
-
     // Test with different thread configurations
     std::vector<size_t> thread_configs = {1, 2, 4, 8};
     std::vector<size_t> intra_query_configs = {1, 2};
@@ -575,10 +570,10 @@ CATCH_TEST_CASE("Dynamic IVF - Threading Configurations", "[dynamic_ivf]") {
 
             auto centroids_copy = clustering.centroids();
             using DataType = svs::data::SimpleData<Eltype, N>;
-            auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
-                decltype(centroids_copy),
-                Idx,
-                DataType>(clustering, data, threadpool, svs::lib::Allocator<std::byte>());
+            auto dense_clusters = svs::index::ivf::
+                DenseClusteredDataset<decltype(centroids_copy), Idx, DataType>(
+                    clustering, data, threadpool, svs::lib::Allocator<std::byte>()
+                );
 
             auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
             using IndexType = svs::index::ivf::DynamicIVFIndex<
@@ -652,10 +647,10 @@ CATCH_TEST_CASE("Dynamic IVF - Add/Delete Stress Test", "[dynamic_ivf]") {
 
     auto centroids = clustering.centroids();
     using DataType = svs::data::SimpleData<Eltype, N>;
-    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
-        decltype(centroids),
-        Idx,
-        DataType>(clustering, initial_data, threadpool, svs::lib::Allocator<std::byte>());
+    auto dense_clusters =
+        svs::index::ivf::DenseClusteredDataset<decltype(centroids), Idx, DataType>(
+            clustering, initial_data, threadpool, svs::lib::Allocator<std::byte>()
+        );
 
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
     using IndexType = svs::index::ivf::DynamicIVFIndex<
@@ -764,10 +759,10 @@ CATCH_TEST_CASE("Dynamic IVF - Single Query Search", "[dynamic_ivf]") {
 
     auto centroids = clustering.centroids();
     using DataType = svs::data::SimpleData<Eltype, N>;
-    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
-        decltype(centroids),
-        Idx,
-        DataType>(clustering, data, threadpool, svs::lib::Allocator<std::byte>());
+    auto dense_clusters =
+        svs::index::ivf::DenseClusteredDataset<decltype(centroids), Idx, DataType>(
+            clustering, data, threadpool, svs::lib::Allocator<std::byte>()
+        );
 
     auto threadpool_for_index = svs::threads::as_threadpool(num_threads);
     using IndexType = svs::index::ivf::DynamicIVFIndex<
@@ -853,26 +848,22 @@ CATCH_TEST_CASE("Dynamic IVF Get Distance", "[index][ivf][dynamic_ivf]") {
     );
 
     // Create dynamic clusters using DenseClusteredDataset
-    // Note: This test uses sequential internal IDs, so we can't use the simple helper
     std::vector<Idx> initial_indices; // External IDs in order
-    size_t internal_id = 0;           // Sequential internal IDs
 
-    // Build mapping: internal_id -> external_id
     for (size_t c = 0; c < clustering.size(); ++c) {
         const auto& cluster_indices = clustering.cluster(c);
         for (size_t i = 0; i < cluster_indices.size(); ++i) {
             Idx external_id = cluster_indices[i]; // Use clustering index as external ID
-            initial_indices.push_back(external_id); // Map internal_id -> external_id
-            internal_id++;
+            initial_indices.push_back(external_id);
         }
     }
 
     auto centroids = clustering.centroids();
     using DataType = svs::data::SimpleData<Eltype, N>;
-    auto dense_clusters = svs::index::ivf::DenseClusteredDataset<
-        decltype(centroids),
-        Idx,
-        DataType>(clustering, data, threadpool, svs::lib::Allocator<std::byte>());
+    auto dense_clusters =
+        svs::index::ivf::DenseClusteredDataset<decltype(centroids), Idx, DataType>(
+            clustering, data, threadpool, svs::lib::Allocator<std::byte>()
+        );
 
     // Need to update cluster IDs to use sequential internal IDs
     for (size_t c = 0, global_idx = 0; c < dense_clusters.size(); ++c) {
diff --git a/tests/svs/index/ivf/kmeans.cpp b/tests/svs/index/ivf/kmeans.cpp
index eceb77520..245b6f501 100644
--- a/tests/svs/index/ivf/kmeans.cpp
+++ b/tests/svs/index/ivf/kmeans.cpp
@@ -166,10 +166,10 @@ void test_kmeans_train_only_performance(const Data& data, Distance distance) {
         end_train_only - start_train_only
     );
 
-    // Verify train_only doesn't take significantly longer (allow some variance)
-    // In practice, train_only should be faster, but we allow it to be up to 50% longer due
-    // to variance
     CATCH_REQUIRE(train_only_duration.count() <= normal_duration.count() * 1.5);
+    // Note: We do not assert on performance here, as wall-clock timing is unreliable in CI.
+    // In practice, train_only should be faster, but this is best verified with dedicated
+    // benchmarks.
 
     // Verify results are still valid
     CATCH_REQUIRE(centroids_train_only.size() == n_centroids);

From dbe2ae88e5bd71a5666873de508ccb5faf08d0bd Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Fri, 12 Dec 2025 11:28:25 -0800
Subject: [PATCH 17/23] Make dynamic IVF APIs similar to static

---
 include/svs/core/data/simple.h          |   7 ++
 include/svs/index/ivf/dynamic_ivf.h     |  61 +++++++-----
 include/svs/orchestrators/dynamic_ivf.h | 121 ++----------------------
 tests/svs/index/ivf/dynamic_ivf.cpp     |   7 +-
 4 files changed, 56 insertions(+), 140 deletions(-)

diff --git a/include/svs/core/data/simple.h b/include/svs/core/data/simple.h
index df0a45c3f..0fcb31bbb 100644
--- a/include/svs/core/data/simple.h
+++ b/include/svs/core/data/simple.h
@@ -38,6 +38,9 @@
 namespace svs {
 namespace data {
 
+// Forward declaration for Blocked allocator
+template <typename Alloc> class Blocked;
+
 template <size_t M, size_t N> bool check_dims(size_t m, size_t n) {
     if constexpr (M == Dynamic || N == Dynamic) {
         return m == n;
@@ -247,6 +250,8 @@ class SimpleData {
 
     /// Data wrapped in the library allocator.
     using lib_alloc_data_type = SimpleData<T, Extent, lib::Allocator<T>>;
+    /// Data wrapped in the library blocked allocator for dynamic IVF.
+    using lib_blocked_alloc_data_type = SimpleData<T, Dynamic, Blocked<lib::Allocator<T>>>;
 
     /// Return the underlying allocator.
     const allocator_type& get_allocator() const { return data_.get_allocator(); }
@@ -607,6 +612,8 @@ class SimpleData<T, Extent, Blocked<Alloc>> {
     using const_value_type = std::span<const T, Extent>;
 
     using lib_alloc_data_type = SimpleData<T, Extent, Blocked<lib::Allocator<T>>>;
+    /// Already blocked, so lib_blocked_alloc_data_type is the same as lib_alloc_data_type.
+    using lib_blocked_alloc_data_type = SimpleData<T, Dynamic, Blocked<lib::Allocator<T>>>;
 
     ///// Constructors
     SimpleData(size_t n_elements, size_t n_dimensions, const Blocked<Alloc>& alloc)
diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index eca80d06f..cc0dc9924 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -22,6 +22,7 @@
 
 // svs
 #include "svs/concepts/distance.h"
+#include "svs/core/loading.h"
 #include "svs/core/logging.h"
 #include "svs/core/query_result.h"
 #include "svs/core/translation.h"
@@ -899,52 +900,66 @@ class DynamicIVFIndex {
     size_t get_global_id(size_t /*cluster_id*/, size_t local_id) const { return local_id; }
 };
 
+/// @brief Assemble a DynamicIVFIndex from clustering and data prototype
 ///
-/// @brief Build a DynamicIVFIndex from clustering and data
+/// @param clustering The clustering result containing centroids and assignments
+/// @param data_proto Data prototype (file path or data object) to load
+/// @param ids External IDs for the data points (must match data size)
+/// @param distance Distance function to use
+/// @param threadpool_proto Thread pool for parallel operations
+/// @param intra_query_thread_count Number of threads for intra-query parallelism
 ///
 template <
-    typename Centroids,
-    data::ImmutableMemoryDataset SourceData,
+    typename Clustering,
+    typename DataProto,
     typename Distance,
-    typename ThreadPoolProto>
-auto build_dynamic_ivf(
-    Centroids centroids,
-    const index::ivf::Clustering<Centroids, uint32_t>& clustering,
-    const SourceData& source_data,
+    typename ThreadpoolProto>
+auto assemble_dynamic_from_clustering(
+    Clustering clustering,
+    const DataProto& data_proto,
     std::span<const size_t> ids,
     Distance distance,
-    ThreadPoolProto threadpool_proto,
+    ThreadpoolProto threadpool_proto,
     const size_t intra_query_thread_count = 1
 ) {
     using I = uint32_t;
-    using ElementType = typename SourceData::element_type;
-    // Use BlockedData with default lib::Allocator for dynamic operations
-    using BlockedDataType =
-        data::SimpleData<ElementType, Dynamic, data::Blocked<lib::Allocator<ElementType>>>;
+    using centroids_type = data::SimpleData<typename Clustering::T>;
 
+    // Load the data
     auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
+    auto data = svs::detail::dispatch_load(data_proto, threadpool);
+
+    // Validate that ids size matches data size
+    if (ids.size() != data.size()) {
+        throw ANNEXCEPTION(
+            "IDs size (", ids.size(), ") does not match data size (", data.size(), ")"
+        );
+    }
+
+    // Use lib_blocked_alloc_data_type for Dynamic IVF
+    using blocked_data_type = typename decltype(data)::lib_blocked_alloc_data_type;
 
     // Use a small block size for IVF clusters (1MB instead of 1GB default)
-    // With many clusters, large blocks cause excessive memory usage
     auto blocking_params = data::BlockingParameters{
         .blocksize_bytes = lib::PowerOfTwo(20) // 2^20 = 1MB
     };
-    auto blocked_allocator = data::Blocked<lib::Allocator<ElementType>>(
-        blocking_params, lib::Allocator<ElementType>()
-    );
-
-    // Use DenseClusteredDataset to create clusters, just like static IVF
-    auto dense_clusters = DenseClusteredDataset<Centroids, I, BlockedDataType>(
-        clustering, source_data, threadpool, blocked_allocator
+    using allocator_type = typename blocked_data_type::allocator_type;
+    auto blocked_allocator =
+        allocator_type(blocking_params, typename allocator_type::allocator_type());
+
+    // Create clustered dataset - DenseClusteredDataset will use the extension system
+    // to create the appropriate data type with blocked allocator via create_dense_cluster
+    auto dense_clusters = DenseClusteredDataset<centroids_type, I, blocked_data_type>(
+        clustering, data, threadpool, blocked_allocator
     );
 
     // Create the index
     return DynamicIVFIndex<
-        Centroids,
+        centroids_type,
         decltype(dense_clusters),
         Distance,
         decltype(threadpool)>(
-        std::move(centroids),
+        std::move(clustering.centroids()),
         std::move(dense_clusters),
         ids,
         std::move(distance),
diff --git a/include/svs/orchestrators/dynamic_ivf.h b/include/svs/orchestrators/dynamic_ivf.h
index 9be55eb28..d0e15ff1a 100644
--- a/include/svs/orchestrators/dynamic_ivf.h
+++ b/include/svs/orchestrators/dynamic_ivf.h
@@ -216,28 +216,6 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         return impl_->get_distance(id, query_array);
     }
 
-    ///// Building - Build clustering from data
-    template <typename BuildType, typename DataProto, typename Distance>
-    static auto build_clustering(
-        const index::ivf::IVFBuildParameters& build_parameters,
-        const DataProto& data_proto,
-        const Distance& distance,
-        size_t num_threads
-    ) {
-        if constexpr (std::is_same_v<std::decay_t<Distance>, DistanceType>) {
-            auto dispatcher = DistanceDispatcher(distance);
-            return dispatcher([&](auto distance_function) {
-                return index::ivf::build_clustering<BuildType>(
-                    build_parameters, data_proto, std::move(distance_function), num_threads
-                );
-            });
-        } else {
-            return index::ivf::build_clustering<BuildType>(
-                build_parameters, data_proto, distance, num_threads
-            );
-        }
-    }
-
     ///// Assembly - Assemble from clustering and data
     template <
         manager::QueryTypeDefinition QueryTypes,
@@ -258,7 +236,7 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
         if constexpr (std::is_same_v<std::decay_t<Distance>, DistanceType>) {
             auto dispatcher = DistanceDispatcher(distance);
             return dispatcher([&](auto distance_function) {
-                return assemble_from_clustering_impl<QueryTypes>(
+                auto impl = index::ivf::assemble_dynamic_from_clustering(
                     std::move(clustering),
                     data,
                     ids,
@@ -266,9 +244,12 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
                     std::move(threadpool),
                     intra_query_threads
                 );
+                return DynamicIVF(
+                    AssembleTag(), manager::as_typelist<QueryTypes>{}, std::move(impl)
+                );
             });
         } else {
-            return assemble_from_clustering_impl<QueryTypes>(
+            auto impl = index::ivf::assemble_dynamic_from_clustering(
                 std::move(clustering),
                 data,
                 ids,
@@ -276,60 +257,12 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
                 std::move(threadpool),
                 intra_query_threads
             );
+            return DynamicIVF(
+                AssembleTag(), manager::as_typelist<QueryTypes>{}, std::move(impl)
+            );
         }
     }
 
-  private:
-    template <
-        manager::QueryTypeDefinition QueryTypes,
-        typename Clustering,
-        typename Data,
-        typename Distance,
-        typename ThreadPool>
-    static DynamicIVF assemble_from_clustering_impl(
-        Clustering clustering,
-        Data data,
-        std::span<const size_t> ids,
-        Distance distance,
-        ThreadPool threadpool,
-        size_t intra_query_threads
-    ) {
-        using I = uint32_t;
-        // Centroids type is extracted from the clustering's centroids_ member
-        using Centroids = std::remove_reference_t<decltype(clustering.centroids())>;
-
-        // Load the data to get the actual data type
-        auto loaded_data = svs::detail::dispatch_load(data, threadpool);
-        using data_type = typename decltype(loaded_data)::lib_alloc_data_type;
-
-        // Get centroids from clustering
-        auto centroids = clustering.centroids();
-
-        // Create DenseClusteredDataset from clustering and loaded data
-        auto dense_clusters = index::ivf::DenseClusteredDataset<Centroids, I, data_type>(
-            clustering, loaded_data, threadpool, lib::Allocator<std::byte>()
-        );
-
-        // Create the index
-        auto impl = index::ivf::DynamicIVFIndex<
-            Centroids,
-            decltype(dense_clusters),
-            Distance,
-            decltype(threadpool)>(
-            std::move(centroids),
-            std::move(dense_clusters),
-            ids,
-            std::move(distance),
-            std::move(threadpool),
-            intra_query_threads
-        );
-
-        return DynamicIVF(
-            AssembleTag(), manager::as_typelist<QueryTypes>{}, std::move(impl)
-        );
-    }
-
-  public:
     ///// Assembly - Assemble from file (load clustering from disk)
     template <
         manager::QueryTypeDefinition QueryTypes,
@@ -360,44 +293,6 @@ class DynamicIVF : public manager::IndexManager<DynamicIVFInterface> {
             intra_query_threads
         );
     }
-
-    // Legacy assembly method for backward compatibility (used by Python bindings)
-    template <
-        manager::QueryTypeDefinition QueryTypes,
-        typename Centroids,
-        typename Data,
-        typename Distance,
-        typename ThreadPoolProto>
-    static DynamicIVF assemble(
-        Centroids centroids,
-        Data data,
-        std::span<const size_t> ids,
-        Distance distance,
-        ThreadPoolProto threadpool_proto
-    ) {
-        using I = uint32_t;
-        using Clusters = index::ivf::DenseClusteredDataset<Centroids, I, Data>;
-
-        auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
-
-        // Create empty DenseClusteredDataset for assembly
-        // Use default allocator - data already has the right allocator built in
-        auto clusters = Clusters(centroids.size(), data.dimensions(), data.get_allocator());
-
-        // Create the index with empty clusters
-        auto impl = index::ivf::
-            DynamicIVFIndex<Centroids, Clusters, Distance, decltype(threadpool)>(
-                std::move(centroids),
-                std::move(clusters),
-                ids,
-                std::move(distance),
-                std::move(threadpool)
-            );
-
-        return DynamicIVF(
-            AssembleTag(), manager::as_typelist<QueryTypes>{}, std::move(impl)
-        );
-    }
 };
 
 } // namespace svs
diff --git a/tests/svs/index/ivf/dynamic_ivf.cpp b/tests/svs/index/ivf/dynamic_ivf.cpp
index c0203e403..690213b61 100644
--- a/tests/svs/index/ivf/dynamic_ivf.cpp
+++ b/tests/svs/index/ivf/dynamic_ivf.cpp
@@ -263,13 +263,12 @@ CATCH_TEST_CASE("Testing Dynamic IVF Index with BlockedData", "[dynamic_ivf]") {
         build_params, data, Distance(), threadpool, false
     );
 
-    // Use build_dynamic_ivf which automatically creates BlockedData clusters
+    // Use assemble_dynamic_from_clustering with external IDs
     std::vector<size_t> ids(data.size());
     std::iota(ids.begin(), ids.end(), 0);
 
-    auto index = svs::index::ivf::build_dynamic_ivf(
-        std::move(clustering.centroids_),
-        clustering,
+    auto index = svs::index::ivf::assemble_dynamic_from_clustering(
+        std::move(clustering),
         data,
         ids,
         Distance(),

From 0def7ab0704122aedc1adbadaa5832559c93e16a Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Fri, 12 Dec 2025 17:02:04 -0800
Subject: [PATCH 18/23] Improve compact implementation

---
 include/svs/index/ivf/dynamic_ivf.h | 188 ++++++++++++++--------------
 1 file changed, 95 insertions(+), 93 deletions(-)

diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index cc0dc9924..acdf9ab7c 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -438,22 +438,94 @@ class DynamicIVFIndex {
     /// for optimal memory usage and search performance.
     ///
     /// @param batch_size Granularity at which points are shuffled (unused for IVF)
-    void compact(size_t SVS_UNUSED(batch_size) = 1'000) {
-        // Collect all valid indices
+    void compact(size_t batch_size = 1'000) {
+        // Step 1: Compute mapping from new to old indices
         auto valid_indices = nonmissing_indices();
 
-        // Rebuild clusters compactly, removing empty slots
-        rebuild_clusters_compact(valid_indices);
+        // Step 2: Group valid indices by cluster
+        std::vector<std::vector<std::pair<size_t, size_t>>> cluster_valid_indices(
+            clusters_.size()
+        );
+
+        // Collect all external ID mappings BEFORE modifying translator
+        std::vector<Idx> external_ids;
+        std::vector<size_t> new_internal_ids;
+        external_ids.reserve(valid_indices.size());
+        new_internal_ids.reserve(valid_indices.size());
+
+        for (size_t new_id = 0; new_id < valid_indices.size(); ++new_id) {
+            size_t old_id = valid_indices[new_id];
+            size_t cluster_idx = id_to_cluster_[old_id];
+            cluster_valid_indices[cluster_idx].push_back({new_id, old_id});
+
+            auto external_id = translator_.get_external(old_id);
+            external_ids.push_back(external_id);
+            new_internal_ids.push_back(new_id);
+        }
+
+        // Step 3: Save old metadata before clearing
+        auto old_id_in_cluster = id_in_cluster_;
+        translator_ = IDTranslator();
+
+        // Step 4: Compact each cluster using data_.compact()
+        for (size_t cluster_idx = 0; cluster_idx < clusters_.size(); ++cluster_idx) {
+            const auto& indices = cluster_valid_indices[cluster_idx];
+            if (indices.empty()) {
+                clusters_[cluster_idx].data_.resize(0);
+                clusters_[cluster_idx].ids_.clear();
+                continue;
+            }
+
+            // Create a map from old position in cluster to new_global_id
+            // Use std::map to automatically sort by old position
+            std::map<size_t, size_t> old_pos_to_global_id;
+            std::vector<size_t> old_positions_sorted;
+            old_positions_sorted.reserve(indices.size());
+
+            for (const auto& [new_global_id, old_global_id] : indices) {
+                size_t old_pos = old_id_in_cluster[old_global_id];
+                old_pos_to_global_id[old_pos] = new_global_id;
+            }
+
+            // Extract sorted old positions (map keeps them sorted by key)
+            for (const auto& [old_pos, _] : old_pos_to_global_id) {
+                old_positions_sorted.push_back(old_pos);
+            }
 
-        // Update metadata
+            // Use data's compact() method - this reorders data in place
+            clusters_[cluster_idx].data_.compact(
+                lib::as_const_span(old_positions_sorted),
+                inter_query_threadpool_,
+                batch_size
+            );
+            clusters_[cluster_idx].data_.resize(indices.size());
+
+            // After compact(), data is at positions [0, 1, 2, ...] corresponding to
+            // the sorted old positions. Build new IDs and metadata.
+            std::vector<Idx> new_ids(indices.size());
+            size_t compacted_pos = 0;
+            for (size_t old_pos : old_positions_sorted) {
+                size_t new_global_id = old_pos_to_global_id[old_pos];
+                new_ids[compacted_pos] = static_cast<Idx>(new_global_id);
+                id_to_cluster_[new_global_id] = cluster_idx;
+                id_in_cluster_[new_global_id] = compacted_pos;
+                compacted_pos++;
+            }
+
+            clusters_[cluster_idx].ids_ = std::move(new_ids);
+        }
+
+        // Step 5: Update global metadata
         size_t new_size = valid_indices.size();
         status_.resize(new_size);
-        // After compaction, all retained entries are valid
         std::fill(status_.begin(), status_.end(), IVFSlotMetadata::Valid);
         id_to_cluster_.resize(new_size);
         id_in_cluster_.resize(new_size);
         first_empty_ = new_size;
 
+        // Step 6: Re-add all IDs to translator
+        translator_.insert(external_ids, new_internal_ids, false);
+
         svs::logging::info(logger_, "Compaction complete: {} valid entries", new_size);
     }
 
@@ -770,70 +842,6 @@ class DynamicIVFIndex {
         }
     }
 
-    void rebuild_clusters_compact(const std::vector<size_t>& valid_indices) {
-        // Group valid indices by cluster
-        // cluster_valid_indices[cluster_idx] contains pairs of (new_id, old_id)
-        std::vector<std::vector<std::pair<size_t, size_t>>> cluster_valid_indices(
-            clusters_.size()
-        );
-
-        // Collect all mappings: (external_id, new_internal_id)
-        // NOTE: This must be done BEFORE we modify the translator
-        std::vector<Idx> external_ids;
-        std::vector<size_t> new_internal_ids;
-        external_ids.reserve(valid_indices.size());
-        new_internal_ids.reserve(valid_indices.size());
-
-        for (size_t new_id = 0; new_id < valid_indices.size(); ++new_id) {
-            size_t old_id = valid_indices[new_id];
-            size_t cluster_idx = id_to_cluster_[old_id];
-            cluster_valid_indices[cluster_idx].push_back({new_id, old_id});
-
-            // Save the external ID mapping for later
-            auto external_id = translator_.get_external(old_id);
-            external_ids.push_back(external_id);
-            new_internal_ids.push_back(new_id);
-        }
-
-        // Phase 1: Clear the translator completely
-        // This is simpler and safer than trying to selectively delete entries
-        translator_ = IDTranslator();
-
-        // Phase 2: Rebuild clusters and update metadata
-        for (size_t cluster_idx = 0; cluster_idx < clusters_.size(); ++cluster_idx) {
-            const auto& indices = cluster_valid_indices[cluster_idx];
-            if (indices.empty()) {
-                clusters_[cluster_idx].data_ =
-                    Data(0, clusters_[cluster_idx].data_.dimensions());
-                clusters_[cluster_idx].ids_.clear();
-                continue;
-            }
-
-            Data new_data(indices.size(), clusters_[cluster_idx].data_.dimensions());
-            std::vector<Idx> new_ids;
-            new_ids.reserve(indices.size());
-
-            for (size_t pos = 0; pos < indices.size(); ++pos) {
-                auto [new_global_id, old_global_id] = indices[pos];
-                size_t old_cluster = id_to_cluster_[old_global_id];
-                size_t old_pos = id_in_cluster_[old_global_id];
-
-                new_data.set_datum(pos, clusters_[old_cluster].data_.get_datum(old_pos));
-                new_ids.push_back(static_cast<Idx>(new_global_id));
-
-                // Update metadata
-                id_to_cluster_[new_global_id] = cluster_idx;
-                id_in_cluster_[new_global_id] = pos;
-            }
-
-            clusters_[cluster_idx].data_ = std::move(new_data);
-            clusters_[cluster_idx].ids_ = std::move(new_ids);
-        }
-
-        // Phase 3: Re-add all IDs to the translator with their new internal IDs
-        translator_.insert(external_ids, new_internal_ids, false);
-    }
-
     ///// Search Closures /////
 
     /// @brief Create closure for searching centroids
@@ -860,7 +868,8 @@ class DynamicIVFIndex {
                    auto& buffer_leaves,
                    size_t tid
                ) {
-            // Use the common search_leaves function
+            // Use the common search_leaves function with *this as cluster accessor
+            // DynamicIVFIndex provides a custom on_leaves that filters invalid entries
             search_leaves(
                 query,
                 distance,
@@ -873,31 +882,24 @@ class DynamicIVFIndex {
     }
 
   public:
-    /// @brief Cluster accessor interface for search_leaves
-    /// This method provides filtered access to cluster leaves, skipping empty entries
-    ///
-    /// Note: For DynamicIVFIndex, we pass the global_id as the local_id (3rd parameter)
-    /// because the common search_leaves function will combine it with cluster_id to get
-    /// the final ID. Our get_global_id() just returns the local_id unchanged.
+    /// @brief Custom on_leaves that wraps DenseCluster::on_leaves with validity filtering
+    /// This ensures deleted entries are skipped during search
     template <typename Callback> void on_leaves(Callback&& f, size_t cluster_id) const {
-        const auto& cluster = clusters_[cluster_id];
-        for (size_t i = 0; i < cluster.size(); ++i) {
-            Idx global_id = cluster.ids_[i];
-
-            // Skip empty entries
-            if (!is_valid(global_id)) {
-                continue;
-            }
-
-            auto datum = cluster.data_.get_datum(i);
-            // Pass global_id as the local_id (3rd param) since get_global_id returns it
-            // unchanged
-            f(datum, 0 /* unused gid */, global_id);
-        }
+        clusters_[cluster_id].on_leaves(
+            [this, &f](const auto& datum, auto global_id, auto local_pos) {
+                // Only invoke callback for valid (non-deleted) entries
+                if (is_valid(global_id)) {
+                    f(datum, global_id, local_pos);
+                }
+            },
+            prefetch_offset_
+        );
     }
 
-    /// @brief Get global ID for a point (identity function for dynamic IVF)
-    size_t get_global_id(size_t /*cluster_id*/, size_t local_id) const { return local_id; }
+    /// @brief Get global ID - delegates to DenseClusteredDataset
+    size_t get_global_id(size_t cluster_id, size_t local_pos) const {
+        return clusters_.get_global_id(cluster_id, local_pos);
+    }
 };
 
 /// @brief Assemble a DynamicIVFIndex from clustering and data prototype

From 639ca0849142a2745fd99a56223d09441de99009 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Fri, 12 Dec 2025 18:15:26 -0800
Subject: [PATCH 19/23] Add support for SQDataset

---
 include/svs/extensions/ivf/scalar.h      |  17 ++
 include/svs/quantization/scalar/scalar.h |   3 +
 tests/CMakeLists.txt                     |   1 +
 tests/integration/ivf/dynamic_scalar.cpp | 230 +++++++++++++++++++++++
 4 files changed, 251 insertions(+)
 create mode 100644 tests/integration/ivf/dynamic_scalar.cpp

diff --git a/include/svs/extensions/ivf/scalar.h b/include/svs/extensions/ivf/scalar.h
index cf199a641..ff919d73f 100644
--- a/include/svs/extensions/ivf/scalar.h
+++ b/include/svs/extensions/ivf/scalar.h
@@ -45,4 +45,21 @@ auto svs_invoke(
     return new_sqdata;
 }
 
+// Specialization for blocked allocators (Dynamic IVF)
+template <IsSQData Data, typename BlockedAlloc>
+auto svs_invoke(
+    svs::tag_t<index::ivf::extensions::create_dense_cluster>,
+    const Data& original,
+    size_t new_size,
+    const data::Blocked<BlockedAlloc>& SVS_UNUSED(blocked_alloc)
+) {
+    auto new_sqdata = SQDataset<
+        typename Data::element_type,
+        Data::extent,
+        data::Blocked<BlockedAlloc>>(new_size, original.dimensions());
+    new_sqdata.set_scale(original.get_scale());
+    new_sqdata.set_bias(original.get_bias());
+    return new_sqdata;
+}
+
 } // namespace svs::quantization::scalar
diff --git a/include/svs/quantization/scalar/scalar.h b/include/svs/quantization/scalar/scalar.h
index 7ddf1cb9d..a2244d1fd 100644
--- a/include/svs/quantization/scalar/scalar.h
+++ b/include/svs/quantization/scalar/scalar.h
@@ -374,6 +374,9 @@ class SQDataset {
 
     // Data wrapped in the library allocator.
     using lib_alloc_data_type = SQDataset<T, Extent, lib::Allocator<T>>;
+    // Data wrapped in the blocked library allocator (for Dynamic IVF).
+    using lib_blocked_alloc_data_type =
+        SQDataset<T, Extent, data::Blocked<lib::Allocator<T>>>;
 
   private:
     float scale_;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 828450f75..a023608de 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -178,6 +178,7 @@ if (SVS_EXPERIMENTAL_ENABLE_IVF)
         ${TEST_DIR}/integration/ivf/index_build.cpp
         ${TEST_DIR}/integration/ivf/index_search.cpp
         ${TEST_DIR}/integration/ivf/scalar_search.cpp
+        ${TEST_DIR}/integration/ivf/dynamic_scalar.cpp
     )
 endif()
 
diff --git a/tests/integration/ivf/dynamic_scalar.cpp b/tests/integration/ivf/dynamic_scalar.cpp
new file mode 100644
index 000000000..77e4597f9
--- /dev/null
+++ b/tests/integration/ivf/dynamic_scalar.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// svs
+#include "svs/core/data.h"
+#include "svs/core/distance.h"
+#include "svs/core/recall.h"
+#include "svs/extensions/ivf/scalar.h"
+#include "svs/index/ivf/clustering.h"
+#include "svs/orchestrators/dynamic_ivf.h"
+#include "svs/quantization/scalar/scalar.h"
+
+// catch2
+#include "catch2/catch_test_macros.hpp"
+
+// tests
+#include "tests/utils/test_dataset.h"
+
+// fmt
+#include "fmt/core.h"
+
+// stl
+#include <numeric>
+#include <random>
+
+namespace sc = svs::quantization::scalar;
+
+namespace {
+
+constexpr size_t NUM_NEIGHBORS = 10;
+constexpr size_t NUM_CLUSTERS = 10;
+constexpr size_t EXTENT = 128;
+
+///
+/// Test Dynamic IVF with Scalar Quantization
+///
+template <typename ElementType, typename Distance>
+void test_dynamic_ivf_scalar(const Distance& distance) {
+    size_t num_threads = 2;
+    size_t intra_query_threads = 2;
+
+    // Load test dataset
+    auto data = svs::data::SimpleData<float, EXTENT>::load(test_dataset::data_svs_file());
+    auto queries = test_dataset::queries();
+    auto gt = test_dataset::groundtruth_euclidean();
+
+    // Build clustering on UNCOMPRESSED data
+    auto build_params = svs::index::ivf::IVFBuildParameters(NUM_CLUSTERS, 10, false);
+    auto threadpool = svs::threads::SequentialThreadPool();
+    auto clustering = svs::index::ivf::build_clustering<float>(
+        build_params, data, distance, threadpool, false
+    );
+
+    // Compress the data with Scalar Quantization
+    auto compressed_data = sc::SQDataset<ElementType, EXTENT>::compress(data);
+
+    // Generate external IDs for the data
+    std::vector<size_t> ids(data.size());
+    std::iota(ids.begin(), ids.end(), 0);
+
+    auto index = svs::DynamicIVF::assemble_from_clustering<float>(
+        std::move(clustering),
+        compressed_data,
+        ids,
+        distance,
+        svs::threads::as_threadpool(num_threads),
+        intra_query_threads
+    );
+
+    // Search
+    auto search_params = svs::index::ivf::IVFSearchParameters(
+        NUM_CLUSTERS, // n_probes
+        NUM_NEIGHBORS // k_reorder
+    );
+
+    auto results = svs::QueryResult<size_t>(queries.size(), NUM_NEIGHBORS);
+    index.search(
+        results.view(),
+        svs::data::ConstSimpleDataView<float>{
+            queries.data(), queries.size(), queries.dimensions()},
+        search_params
+    );
+
+    // Check recall
+    auto recall = svs::k_recall_at_n(gt, results, NUM_NEIGHBORS, NUM_NEIGHBORS);
+
+    // Set expected recall thresholds based on quantization level
+    CATCH_REQUIRE(recall > 0.9);
+}
+
+///
+/// Test Dynamic IVF with Scalar Quantization - Add/Delete/Compact stress test
+///
+template <typename ElementType, typename Distance>
+void test_dynamic_ivf_scalar_stress(const Distance& distance) {
+    size_t num_threads = 2;
+    size_t intra_query_threads = 2;
+
+    // Load test dataset
+    auto data = svs::data::SimpleData<float, EXTENT>::load(test_dataset::data_svs_file());
+    auto queries = test_dataset::queries();
+    auto gt = test_dataset::groundtruth_euclidean();
+
+    // Start with half the data
+    size_t initial_size = data.size() / 2;
+    auto initial_data = svs::data::SimpleData<float, EXTENT>(initial_size, EXTENT);
+    for (size_t i = 0; i < initial_size; ++i) {
+        initial_data.set_datum(i, data.get_datum(i));
+    }
+
+    // Build clustering on initial data
+    auto build_params = svs::index::ivf::IVFBuildParameters(NUM_CLUSTERS, 10, false);
+    auto threadpool = svs::threads::SequentialThreadPool();
+    auto clustering = svs::index::ivf::build_clustering<float>(
+        build_params, initial_data, distance, threadpool, false
+    );
+
+    // Compress with Scalar Quantization
+    auto compressed_data = sc::SQDataset<ElementType, EXTENT>::compress(initial_data);
+
+    // Generate external IDs
+    std::vector<size_t> ids(initial_size);
+    std::iota(ids.begin(), ids.end(), 0);
+
+    auto index = svs::DynamicIVF::assemble_from_clustering<float>(
+        std::move(clustering),
+        compressed_data,
+        ids,
+        distance,
+        svs::threads::as_threadpool(num_threads),
+        intra_query_threads
+    );
+
+    auto search_params = svs::index::ivf::IVFSearchParameters(NUM_CLUSTERS, NUM_NEIGHBORS);
+    auto results = svs::QueryResult<size_t>(queries.size(), NUM_NEIGHBORS);
+
+    // Perform add/delete/compact cycles
+    std::mt19937 rng(12345);
+    std::uniform_int_distribution<size_t> idx_dist(0, initial_size - 1);
+
+    for (size_t cycle = 0; cycle < 3; ++cycle) {
+        // Delete some entries
+        std::vector<size_t> to_delete;
+        for (size_t i = 0; i < 20 && i < ids.size(); ++i) {
+            size_t idx = idx_dist(rng) % ids.size();
+            to_delete.push_back(ids[idx]);
+        }
+        if (!to_delete.empty()) {
+            index.delete_points(to_delete);
+        }
+
+        // Add new entries (uncompressed - index will compress them)
+        size_t num_to_add = 30;
+        auto new_data = svs::data::SimpleData<float, EXTENT>(num_to_add, EXTENT);
+        std::vector<size_t> new_ids;
+        size_t new_base_id = 100000 + cycle * 1000;
+
+        for (size_t i = 0; i < num_to_add; ++i) {
+            new_ids.push_back(new_base_id + i);
+            new_data.set_datum(i, data.get_datum(i % data.size()));
+        }
+
+        // Pass uncompressed data as ConstSimpleDataView - index will compress
+        auto new_data_view = svs::data::ConstSimpleDataView<float>{
+            new_data.data(), new_data.size(), new_data.dimensions()
+        };
+        index.add_points(new_data_view, new_ids, false);
+
+        // Search after modifications
+        index.search(
+            results.view(),
+            svs::data::ConstSimpleDataView<float>{
+                queries.data(), queries.size(), queries.dimensions()},
+            search_params
+        );
+
+        // Verify no deleted IDs appear in results
+        for (size_t q = 0; q < queries.size(); ++q) {
+            for (size_t k = 0; k < NUM_NEIGHBORS; ++k) {
+                auto result_id = results.index(q, k);
+                for (auto deleted_id : to_delete) {
+                    CATCH_REQUIRE(result_id != deleted_id);
+                }
+            }
+        }
+
+        // Compact every cycle
+        index.compact(50);
+
+        // Search after compaction
+        index.search(
+            results.view(),
+            svs::data::ConstSimpleDataView<float>{
+                queries.data(), queries.size(), queries.dimensions()},
+            search_params
+        );
+
+        // Verify all results are valid
+        for (size_t q = 0; q < queries.size(); ++q) {
+            CATCH_REQUIRE(results.index(q, 0) != std::numeric_limits<size_t>::max());
+        }
+    }
+}
+
+} // anonymous namespace
+
+CATCH_TEST_CASE("Dynamic IVF with Scalar Quantization", "[integration][dynamic_ivf][scalar]") {
+    auto distance = svs::DistanceL2();
+
+    CATCH_SECTION("int8 quantization") {
+        test_dynamic_ivf_scalar<int8_t>(distance);
+    }
+
+    CATCH_SECTION("int8 stress test") {
+        test_dynamic_ivf_scalar_stress<int8_t>(distance);
+    }
+}

From 2f714b2da97c8b33ed4bf76be8bc7f134df93607 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Fri, 12 Dec 2025 18:16:46 -0800
Subject: [PATCH 20/23] clang format

---
 include/svs/extensions/ivf/scalar.h      |  8 ++++----
 tests/integration/ivf/dynamic_scalar.cpp | 15 ++++++---------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/include/svs/extensions/ivf/scalar.h b/include/svs/extensions/ivf/scalar.h
index ff919d73f..4732a3fa3 100644
--- a/include/svs/extensions/ivf/scalar.h
+++ b/include/svs/extensions/ivf/scalar.h
@@ -53,10 +53,10 @@ auto svs_invoke(
     size_t new_size,
     const data::Blocked<BlockedAlloc>& SVS_UNUSED(blocked_alloc)
 ) {
-    auto new_sqdata = SQDataset<
-        typename Data::element_type,
-        Data::extent,
-        data::Blocked<BlockedAlloc>>(new_size, original.dimensions());
+    auto new_sqdata =
+        SQDataset<typename Data::element_type, Data::extent, data::Blocked<BlockedAlloc>>(
+            new_size, original.dimensions()
+        );
     new_sqdata.set_scale(original.get_scale());
     new_sqdata.set_bias(original.get_bias());
     return new_sqdata;
diff --git a/tests/integration/ivf/dynamic_scalar.cpp b/tests/integration/ivf/dynamic_scalar.cpp
index 77e4597f9..df6a761fa 100644
--- a/tests/integration/ivf/dynamic_scalar.cpp
+++ b/tests/integration/ivf/dynamic_scalar.cpp
@@ -175,8 +175,7 @@ void test_dynamic_ivf_scalar_stress(const Distance& distance) {
 
         // Pass uncompressed data as ConstSimpleDataView - index will compress
         auto new_data_view = svs::data::ConstSimpleDataView<float>{
-            new_data.data(), new_data.size(), new_data.dimensions()
-        };
+            new_data.data(), new_data.size(), new_data.dimensions()};
         index.add_points(new_data_view, new_ids, false);
 
         // Search after modifications
@@ -217,14 +216,12 @@ void test_dynamic_ivf_scalar_stress(const Distance& distance) {
 
 } // anonymous namespace
 
-CATCH_TEST_CASE("Dynamic IVF with Scalar Quantization", "[integration][dynamic_ivf][scalar]") {
+CATCH_TEST_CASE(
+    "Dynamic IVF with Scalar Quantization", "[integration][dynamic_ivf][scalar]"
+) {
     auto distance = svs::DistanceL2();
 
-    CATCH_SECTION("int8 quantization") {
-        test_dynamic_ivf_scalar<int8_t>(distance);
-    }
+    CATCH_SECTION("int8 quantization") { test_dynamic_ivf_scalar<int8_t>(distance); }
 
-    CATCH_SECTION("int8 stress test") {
-        test_dynamic_ivf_scalar_stress<int8_t>(distance);
-    }
+    CATCH_SECTION("int8 stress test") { test_dynamic_ivf_scalar_stress<int8_t>(distance); }
 }

From 52d9b47fa507f00599111240208e25a190fdb658 Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Mon, 15 Dec 2025 11:20:11 -0800
Subject: [PATCH 21/23] Incorporated review comments

---
 include/svs/index/ivf/clustering.h  |  2 +-
 include/svs/index/ivf/common.h      | 25 ++++++++++---------------
 include/svs/index/ivf/dynamic_ivf.h |  2 +-
 include/svs/index/ivf/index.h       | 12 ++++++------
 4 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/include/svs/index/ivf/clustering.h b/include/svs/index/ivf/clustering.h
index 8017191ef..93a36526c 100644
--- a/include/svs/index/ivf/clustering.h
+++ b/include/svs/index/ivf/clustering.h
@@ -270,7 +270,7 @@ template <typename Data, std::integral I> struct DenseCluster {
 
     size_t size() const { return data_.size(); }
 
-    // Support for dynamic operations - SimpleData already has resize()
+    // Support for dynamic operations
     void resize(size_t new_size) {
         data_.resize(new_size);
         ids_.resize(new_size);
diff --git a/include/svs/index/ivf/common.h b/include/svs/index/ivf/common.h
index bcb7062a8..e914778d6 100644
--- a/include/svs/index/ivf/common.h
+++ b/include/svs/index/ivf/common.h
@@ -242,16 +242,19 @@ void compute_matmul(
     constexpr size_t max_int = static_cast<size_t>(std::numeric_limits<int>::max());
     if (m > max_int || n > max_int || k > max_int) {
         throw ANNEXCEPTION(
-            "Matrix dimensions too large for Intel MKL GEMM: m={}, n={}, k={}", m, n, k
+            "Matrix dimensions too large for Intel MKL GEMM: m={}, n={}, k={}, max={}",
+            m,
+            n,
+            k,
+            max_int
         );
     }
 
+    // Cast size_t parameters to int for MKL GEMM functions
+    int m_int = static_cast<int>(m);
+    int n_int = static_cast<int>(n);
+    int k_int = static_cast<int>(k);
     if constexpr (std::is_same_v<T, float>) {
-        // Cast size_t parameters to int for MKL GEMM functions
-        int m_int = static_cast<int>(m);
-        int n_int = static_cast<int>(n);
-        int k_int = static_cast<int>(k);
-
         cblas_sgemm(
             CblasRowMajor, // CBLAS_LAYOUT layout
             CblasNoTrans,  // CBLAS_TRANSPOSE TransA
@@ -269,10 +272,6 @@ void compute_matmul(
             n_int          // const int ldc
         );
     } else if constexpr (std::is_same_v<T, BFloat16>) {
-        int m_int = static_cast<int>(m);
-        int n_int = static_cast<int>(n);
-        int k_int = static_cast<int>(k);
-
         cblas_gemm_bf16bf16f32(
             CblasRowMajor,              // CBLAS_LAYOUT layout
             CblasNoTrans,               // CBLAS_TRANSPOSE TransA
@@ -290,10 +289,6 @@ void compute_matmul(
             n_int                       // const int ldc
         );
     } else if constexpr (std::is_same_v<T, Float16>) {
-        int m_int = static_cast<int>(m);
-        int n_int = static_cast<int>(n);
-        int k_int = static_cast<int>(k);
-
         cblas_gemm_f16f16f32(
             CblasRowMajor,              // CBLAS_LAYOUT layout
             CblasNoTrans,               // CBLAS_TRANSPOSE TransA
@@ -362,7 +357,7 @@ template <
     typename Distance,
     threads::ThreadPool Pool>
 void centroid_assignment(
-    Data& data,
+    const Data& data,
     std::vector<float>& data_norm,
     threads::UnitRange<uint64_t> batch_range,
     Distance& SVS_UNUSED(distance),
diff --git a/include/svs/index/ivf/dynamic_ivf.h b/include/svs/index/ivf/dynamic_ivf.h
index acdf9ab7c..966fc3b12 100644
--- a/include/svs/index/ivf/dynamic_ivf.h
+++ b/include/svs/index/ivf/dynamic_ivf.h
@@ -772,7 +772,7 @@ class DynamicIVFIndex {
 
             // Use centroid_assignment to compute assignments for this batch
             centroid_assignment(
-                const_cast<Points&>(points), // centroid_assignment expects non-const
+                points,
                 data_norm,
                 batch_range,
                 distance_,
diff --git a/include/svs/index/ivf/index.h b/include/svs/index/ivf/index.h
index 221e44c73..779b9e0f5 100644
--- a/include/svs/index/ivf/index.h
+++ b/include/svs/index/ivf/index.h
@@ -39,7 +39,7 @@ namespace svs::index::ivf {
 // performance. This value was chosen based on empirical testing to avoid excessive memory
 // allocation while supporting large batch operations typical in high-throughput
 // environments.
-const size_t MAX_QUERY_BATCH_SIZE = 10000;
+constexpr size_t MAX_QUERY_BATCH_SIZE = 10000;
 
 /// @brief IVF (Inverted File) Index implementation for efficient similarity search
 ///
@@ -177,7 +177,7 @@ class IVFIndex {
     template <typename Query> double get_distance(size_t id, const Query& query) const {
         // Lazily initialize ID mapping on first call
         if (id_to_cluster_.empty()) {
-            const_cast<IVFIndex*>(this)->initialize_id_mapping();
+            initialize_id_mapping();
         }
 
         // Check if id exists
@@ -200,7 +200,7 @@ class IVFIndex {
         size_t cluster_id = id_to_cluster_[id];
         size_t pos = id_in_cluster_[id];
 
-        // Fix distance argument if needed (e.g., for cosine similarity)
+        // Fix distance argument if needed
         auto distance_copy = distance_;
         svs::distance::maybe_fix_argument(distance_copy, query);
 
@@ -322,9 +322,9 @@ class IVFIndex {
 
     ///// ID Mapping for get_distance /////
     // Maps ID -> cluster_id
-    std::vector<size_t> id_to_cluster_{};
+    mutable std::vector<size_t> id_to_cluster_{};
     // Maps ID -> position within cluster
-    std::vector<size_t> id_in_cluster_{};
+    mutable std::vector<size_t> id_in_cluster_{};
 
     ///// Threading Infrastructure /////
     InterQueryThreadPool inter_query_threadpool_; // Handles parallelism across queries
@@ -382,7 +382,7 @@ class IVFIndex {
         }
     }
 
-    void initialize_id_mapping() {
+    void initialize_id_mapping() const {
         // Build ID-to-location mapping from cluster data
         // Compute total size by summing all cluster sizes
         size_t total_size = 0;

From 9ad794f02c7d7acbd3eb839ec9a1de46139aa6da Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Tue, 16 Dec 2025 11:01:51 -0800
Subject: [PATCH 22/23] Add thread safety for get_distance function

---
 include/svs/index/ivf/index.h          | 11 ++--
 tests/integration/ivf/index_search.cpp | 79 ++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/include/svs/index/ivf/index.h b/include/svs/index/ivf/index.h
index 779b9e0f5..dc11473f1 100644
--- a/include/svs/index/ivf/index.h
+++ b/include/svs/index/ivf/index.h
@@ -30,6 +30,8 @@
 #include "fmt/core.h"
 
 // stl
+#include <memory>
+#include <mutex>
 #include <random>
 #include <vector>
 
@@ -175,10 +177,8 @@ class IVFIndex {
 
     /// @brief Compute the distance between a query vector and a vector in the index
     template <typename Query> double get_distance(size_t id, const Query& query) const {
-        // Lazily initialize ID mapping on first call
-        if (id_to_cluster_.empty()) {
-            initialize_id_mapping();
-        }
+        // Thread-safe lazy initialization of ID mapping
+        std::call_once(*id_mapping_init_flag_, [this]() { initialize_id_mapping(); });
 
         // Check if id exists
         if (!has_id(id)) {
@@ -325,6 +325,9 @@ class IVFIndex {
     mutable std::vector<size_t> id_to_cluster_{};
     // Maps ID -> position within cluster
     mutable std::vector<size_t> id_in_cluster_{};
+    // Thread-safe initialization flag for ID mapping (wrapped in unique_ptr for movability)
+    mutable std::unique_ptr<std::once_flag> id_mapping_init_flag_{
+        std::make_unique<std::once_flag>()};
 
     ///// Threading Infrastructure /////
     InterQueryThreadPool inter_query_threadpool_; // Handles parallelism across queries
diff --git a/tests/integration/ivf/index_search.cpp b/tests/integration/ivf/index_search.cpp
index 7d3e7f53d..7de26e2e7 100644
--- a/tests/integration/ivf/index_search.cpp
+++ b/tests/integration/ivf/index_search.cpp
@@ -15,11 +15,13 @@
  */
 
 // stl
+#include <atomic>
 #include <cstdint>
 #include <filesystem>
 #include <iostream>
 #include <memory>
 #include <string>
+#include <thread>
 #include <unordered_map>
 
 // svs
@@ -190,3 +192,80 @@ CATCH_TEST_CASE("IVF get_distance", "[integration][ivf][get_distance]") {
         index.get_distance(data.size() + 1000, queries.get_datum(0)), svs::ANNException
     );
 }
+
+CATCH_TEST_CASE(
+    "IVF get_distance thread safety", "[integration][ivf][get_distance][thread_safety]"
+) {
+    auto datafile = test_dataset::data_svs_file();
+    auto queries = test_dataset::queries();
+    auto dist_l2 = svs::distance::DistanceL2();
+
+    auto data = svs::data::SimpleData<float>::load(datafile);
+
+    size_t num_threads = 2;
+    auto index = svs::IVF::assemble_from_file<float, svs::BFloat16>(
+        test_dataset::clustering_directory(), data, dist_l2, num_threads, 1
+    );
+
+    // Test thread safety of get_distance with concurrent calls
+    // The lazy initialization of ID mapping should be thread-safe with std::call_once
+    constexpr size_t NUM_TEST_THREADS = 8;
+    constexpr size_t CALLS_PER_THREAD = 100;
+    constexpr double TOLERANCE = 1e-2;
+
+    // Prepare test data
+    std::vector<size_t> test_ids;
+    for (size_t i = 0; i < std::min<size_t>(10, data.size()); ++i) {
+        test_ids.push_back(i * (data.size() / 10));
+    }
+
+    // Pre-compute expected distances for verification
+    std::vector<std::vector<double>> expected_distances(test_ids.size());
+    for (size_t i = 0; i < test_ids.size(); ++i) {
+        expected_distances[i].resize(queries.size());
+        auto datum = data.get_datum(test_ids[i]);
+        for (size_t q = 0; q < queries.size(); ++q) {
+            auto query = queries.get_datum(q);
+            svs::distance::DistanceL2 dist_copy;
+            svs::distance::maybe_fix_argument(dist_copy, query);
+            expected_distances[i][q] = svs::distance::compute(dist_copy, query, datum);
+        }
+    }
+
+    // Track results and errors from threads
+    std::atomic<size_t> success_count{0};
+    std::atomic<size_t> error_count{0};
+    std::vector<std::thread> threads;
+    threads.reserve(NUM_TEST_THREADS);
+
+    // Launch multiple threads that concurrently call get_distance
+    for (size_t t = 0; t < NUM_TEST_THREADS; ++t) {
+        threads.emplace_back([&, t]() {
+            for (size_t call = 0; call < CALLS_PER_THREAD; ++call) {
+                size_t id_idx = (t + call) % test_ids.size();
+                size_t query_idx = (t * CALLS_PER_THREAD + call) % queries.size();
+                size_t test_id = test_ids[id_idx];
+
+                auto query = queries.get_datum(query_idx);
+                double index_distance = index.get_distance(test_id, query);
+                double expected = expected_distances[id_idx][query_idx];
+
+                double relative_diff = std::abs((index_distance - expected) / expected);
+                if (relative_diff < TOLERANCE) {
+                    ++success_count;
+                } else {
+                    ++error_count;
+                }
+            }
+        });
+    }
+
+    // Wait for all threads to complete
+    for (auto& thread : threads) {
+        thread.join();
+    }
+
+    // Verify all calls succeeded
+    CATCH_REQUIRE(error_count == 0);
+    CATCH_REQUIRE(success_count == NUM_TEST_THREADS * CALLS_PER_THREAD);
+}

From 76c68b693720cf491c559403b0d94e552081469f Mon Sep 17 00:00:00 2001
From: Ishwar Bhati <ishwar.s.bhati@intel.com>
Date: Tue, 16 Dec 2025 12:46:39 -0800
Subject: [PATCH 23/23] formatting

---
 examples/python/example_ivf.py         | 34 ++++++++--------
 examples/python/example_ivf_dynamic.py | 54 +++++++++++++-------------
 tests/integration/ivf/index_build.cpp  |  2 +-
 3 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/examples/python/example_ivf.py b/examples/python/example_ivf.py
index 1027ff4b7..63f80dd62 100644
--- a/examples/python/example_ivf.py
+++ b/examples/python/example_ivf.py
@@ -31,12 +31,12 @@ def main():
     print("=" * 80)
     print("Static IVF Index Example")
     print("=" * 80)
-    
+
     # [generate-dataset]
     # Create a test dataset
     test_data_dir = "./example_data_ivf"
     print(f"\n1. Generating test dataset in '{test_data_dir}'...")
-    
+
     svs.generate_test_dataset(
         10000,                          # Create 10,000 vectors in the dataset
         1000,                           # Generate 1,000 query vectors
@@ -49,7 +49,7 @@ def main():
     )
     print("   ✓ Dataset generated")
     # [generate-dataset]
-    
+
     # [build-parameters]
     # Configure clustering parameters for IVF
     print("\n2. Configuring build parameters...")
@@ -63,7 +63,7 @@ def main():
     )
     print(f"   ✓ Configured {build_parameters.num_centroids} centroids")
     # [build-parameters]
-    
+
     # [load-data]
     # Load the dataset
     print("\n3. Loading dataset...")
@@ -75,7 +75,7 @@ def main():
     )
     print(f"   ✓ Data loader created")
     # [load-data]
-    
+
     # [build-clustering]
     # Build the clustering
     print("\n4. Building clustering (k-means)...")
@@ -87,7 +87,7 @@ def main():
     )
     print(f"   ✓ Clustering built with {build_parameters.num_centroids} centroids")
     # [build-clustering]
-    
+
     # [assemble-index]
     # Assemble the IVF index from clustering
     print("\n5. Assembling IVF index from clustering...")
@@ -101,7 +101,7 @@ def main():
     print(f"   ✓ Index assembled with {index.size} vectors")
     print(f"   ✓ Index dimensions: {index.dimensions}")
     # [assemble-index]
-    
+
     # [configure-search]
     # Configure search parameters
     print("\n6. Configuring search parameters...")
@@ -112,20 +112,20 @@ def main():
     index.search_parameters = search_params
     print(f"   ✓ Search parameters: n_probes={search_params.n_probes}")
     # [configure-search]
-    
+
     # [search]
     # Perform search
     print("\n7. Searching the index...")
     queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs"))
     groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs"))
-    
+
     num_neighbors = 10
     I, D = index.search(queries, num_neighbors)
     recall = svs.k_recall_at(groundtruth, I, num_neighbors, num_neighbors)
     print(f"   ✓ Recall@{num_neighbors}: {recall:.4f}")
     print(f"   ✓ Result shape: {I.shape}")
     # [search]
-    
+
     # [save-clustering]
     # Save the clustering for later use
     print("\n8. Saving clustering...")
@@ -133,12 +133,12 @@ def main():
     clustering.save(clustering_path)
     print(f"   ✓ Clustering saved to '{clustering_path}'")
     # [save-clustering]
-    
+
     # [load-and-assemble]
     # Load clustering and assemble a new index
     print("\n9. Loading clustering and assembling new index...")
     loaded_clustering = svs.Clustering.load_clustering(clustering_path)
-    
+
     new_index = svs.IVF.assemble_from_clustering(
         clustering = loaded_clustering,
         data_loader = data_loader,
@@ -148,7 +148,7 @@ def main():
     )
     print(f"   ✓ New index assembled with {new_index.size} vectors")
     # [load-and-assemble]
-    
+
     # [assemble-from-file]
     # Or directly assemble from file
     print("\n10. Assembling index directly from clustering file...")
@@ -161,7 +161,7 @@ def main():
     )
     print(f"   ✓ Index assembled with {index_from_file.size} vectors")
     # [assemble-from-file]
-    
+
     # [search-verification]
     # Verify both indices produce the same results
     print("\n11. Verifying search results consistency...")
@@ -169,13 +169,13 @@ def main():
     I2, D2 = index_from_file.search(queries, num_neighbors)
     recall2 = svs.k_recall_at(groundtruth, I2, num_neighbors, num_neighbors)
     print(f"   ✓ Recall@{num_neighbors}: {recall2:.4f}")
-    
+
     if np.allclose(D, D2):
         print("   ✓ Both indices produce identical results")
     else:
         print("   ✗ Warning: Results differ slightly (expected due to floating point)")
     # [search-verification]
-    
+
     # [tune-search-parameters]
     # Experiment with different search parameters
     print("\n12. Tuning search parameters...")
@@ -186,7 +186,7 @@ def main():
         recall_tuned = svs.k_recall_at(groundtruth, I_tuned, num_neighbors, num_neighbors)
         print(f"   ✓ n_probes={n_probes:2d}: Recall@{num_neighbors} = {recall_tuned:.4f}")
     # [tune-search-parameters]
-    
+
     print("\n" + "=" * 80)
     print("Example completed successfully!")
     print("=" * 80)
diff --git a/examples/python/example_ivf_dynamic.py b/examples/python/example_ivf_dynamic.py
index e861d9854..605ff9ecc 100644
--- a/examples/python/example_ivf_dynamic.py
+++ b/examples/python/example_ivf_dynamic.py
@@ -32,12 +32,12 @@ def main():
     print("=" * 80)
     print("Dynamic IVF Index Example")
     print("=" * 80)
-    
+
     # [generate-dataset]
     # Create a test dataset with 10,000 vectors
     test_data_dir = "./example_data_ivf_dynamic"
     print(f"\n1. Generating test dataset in '{test_data_dir}'...")
-    
+
     svs.generate_test_dataset(
         1000,                           # Create 1000 vectors in the dataset
         100,                            # Generate 100 query vectors
@@ -50,7 +50,7 @@ def main():
     )
     print("   ✓ Dataset generated")
     # [generate-dataset]
-    
+
     # [build-parameters]
     # Configure clustering parameters for IVF
     print("\n2. Configuring build parameters...")
@@ -64,16 +64,16 @@ def main():
     )
     print(f"   ✓ Configured {build_parameters.num_centroids} centroids")
     # [build-parameters]
-    
+
     # [build-clustering-and-assemble]
     # Build clustering and then assemble the dynamic IVF index
     print("\n3. Building clustering and assembling dynamic IVF index...")
-    
+
     # Load all data
     data = svs.read_vecs(os.path.join(test_data_dir, "data.fvecs"))
     n_total = data.shape[0]  # Total vectors (1000)
     ids_all = np.arange(n_total).astype('uint64')
-    
+
     # Build the clustering using all data
     data_loader = svs.VectorDataLoader(
         os.path.join(test_data_dir, "data.fvecs"),
@@ -87,7 +87,7 @@ def main():
         num_threads = 4,
     )
     print(f"   ✓ Clustering built with {build_parameters.num_centroids} centroids")
-    
+
     # Assemble the dynamic IVF index with all vectors
     print("   Assembling dynamic IVF index from clustering...")
     index = svs.DynamicIVF.assemble_from_clustering(
@@ -101,57 +101,57 @@ def main():
     print(f"   ✓ Index assembled with {index.size} vectors")
     print(f"   ✓ Index dimensions: {index.dimensions}")
     # [build-clustering-and-assemble]
-    
+
     # [demonstrate-dynamic-operations]
     # Demonstrate add and delete operations (even though we already have all vectors)
     print("\n4. Demonstrating dynamic operations...")
     print(f"   Initial index size: {index.size}")
-    
+
     # Delete some vectors
     print("   Deleting first 100 vectors...")
     ids_to_delete = np.arange(100).astype('uint64')
     index.delete(ids_to_delete)
     print(f"   After deletion: {index.size} vectors")
-    
+
     # Add them back
     print("   Adding 100 vectors back...")
     index.add(data[:100], ids_to_delete)
     print(f"   After addition: {index.size} vectors")
     # [demonstrate-dynamic-operations]
-    
+
     # [search-before-delete]
     # Search before deletion
     print("\n5. Searching the index...")
     queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs"))
     groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs"))
-    
+
     # Configure search parameters
     search_params = svs.IVFSearchParameters(
         n_probes = 10,      # Number of clusters to search
         k_reorder = 1.0     # Reorder factor
     )
     index.search_parameters = search_params
-    
+
     # Perform search
     num_neighbors = 10
     I, D = index.search(queries, num_neighbors)
     recall = svs.k_recall_at(groundtruth, I, num_neighbors, num_neighbors)
     print(f"   ✓ Recall@{num_neighbors}: {recall:.4f}")
     # [search-before-delete]
-    
+
     # [get-distance]
     # Compute distance between a query and a specific indexed vector
     print("\n6. Computing distances with get_distance()...")
     query_vector = queries[0]
     test_id = 100
-    
+
     if index.has_id(test_id):
         distance = index.get_distance(test_id, query_vector)
         print(f"   ✓ Distance from query to vector {test_id}: {distance:.6f}")
     else:
         print(f"   ✗ Vector {test_id} not found in index")
     # [get-distance]
-    
+
     # [remove-vectors]
     # Remove vectors from the index
     print("\n7. Removing the first 50 vectors...")
@@ -159,19 +159,19 @@ def main():
     num_deleted = index.delete(ids_to_delete)
     print(f"   ✓ Deleted {num_deleted} vectors")
     print(f"   ✓ Index size after deletion: {index.size}")
-    
+
     # Verify vectors are deleted
     if not index.has_id(25):
         print(f"   ✓ Verified: Vector ID 25 no longer in index")
     # [remove-vectors]
-    
+
     # [consolidate-index]
     # Consolidate and compact the index
     print("\n8. Consolidating and compacting the index...")
     index.consolidate().compact(1000)
     print(f"   ✓ Index consolidated and compacted")
     # [consolidate-index]
-    
+
     # [search-after-modifications]
     # Search after modifications
     print("\n9. Searching after modifications...")
@@ -179,35 +179,35 @@ def main():
     recall = svs.k_recall_at(groundtruth, I, num_neighbors, num_neighbors)
     print(f"   ✓ Recall@{num_neighbors}: {recall:.4f}")
     # [search-after-modifications]
-    
+
     # [tune-search-parameters]
     # Experiment with different search parameters
     print("\n10. Tuning search parameters...")
     for n_probes in [5, 10, 20, 30]:
         search_params.n_probes = n_probes
         index.search_parameters = search_params
-        
+
         I, D = index.search(queries, num_neighbors)
         recall = svs.k_recall_at(groundtruth, I, num_neighbors, num_neighbors)
         print(f"    n_probes={n_probes:2d} → Recall@{num_neighbors}: {recall:.4f}")
     # [tune-search-parameters]
-    
+
     # [save-index]
     # Save the index to disk
     print("\n11. Saving the index...")
     config_dir = os.path.join(test_data_dir, "saved_config")
     data_dir = os.path.join(test_data_dir, "saved_data")
-    
+
     # Create directories if they don't exist
     os.makedirs(config_dir, exist_ok=True)
     os.makedirs(data_dir, exist_ok=True)
-    
+
     index.save(config_dir, data_dir)
     print(f"   ✓ Index saved to:")
     print(f"     Config: {config_dir}")
     print(f"     Data:   {data_dir}")
     # [save-index]
-    
+
     # [load-index]
     # Note: DynamicIVF.load() is being implemented for easier reload
     # For now, the index has been successfully saved and can be accessed at:
@@ -216,7 +216,7 @@ def main():
     print(f"   ✓ Data:   {data_dir}")
     print(f"   Note: load() API coming soon for simplified reload")
     # [load-index]
-    
+
     # [get-all-ids]
     # Inspect final index state
     print("\n13. Final index inspection...")
@@ -224,7 +224,7 @@ def main():
     print(f"   ✓ Index contains {len(all_ids)} unique IDs")
     print(f"   ✓ ID range: [{np.min(all_ids)}, {np.max(all_ids)}]")
     # [get-all-ids]
-    
+
     print("\n" + "=" * 80)
     print("Dynamic IVF Example Completed Successfully!")
     print("=" * 80)
diff --git a/tests/integration/ivf/index_build.cpp b/tests/integration/ivf/index_build.cpp
index eeb0ca24e..3c6a11d90 100644
--- a/tests/integration/ivf/index_build.cpp
+++ b/tests/integration/ivf/index_build.cpp
@@ -214,4 +214,4 @@ CATCH_TEST_CASE("IVF Build/Clustering", "[integration][build][ivf][train_only]")
 
     test_build_train_only<svs::BFloat16>(svs::DistanceL2());
     test_build_train_only<svs::BFloat16>(svs::DistanceIP());
-}
\ No newline at end of file
+}