From 355e52b75ea4b9cf26c501cd74f59ff3484cb3dc Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Tue, 10 Feb 2026 11:05:16 +0300
Subject: [PATCH 01/18] new create index

---
 src/main.cpp | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
diff --git a/src/main.cpp b/src/main.cpp
index cf29bd03e..279c3262c 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -286,6 +286,120 @@ int main(int argc, char** argv) {
         return crow::response(200, response.dump());
     });
 
+    // new create index
+    CROW_ROUTE(app, "/api/v1/index/newcreate")
+            .CROW_MIDDLEWARES(app, AuthMiddleware)
+            .methods("POST"_method)([&index_manager, &app](const crow::request& req) {
+                auto& ctx = app.get_context<AuthMiddleware>(req);
+
+                auto body = crow::json::load(req.body);
+                if(!body) {
+                    return json_error(400, "Invalid JSON");
+                }
+
+                if(!body.has("index_name") || body["index_name"].t() != crow::json::type::String){
+                    return json_error(400, "Parameters error: 'index_name'");
+                }
+
+                std::string index_id = ctx.username + "/" + std::string(body["index_name"].s());
+                std::cout << "index id: " << index_id << "\n";
+
+                /**
+                 * create a struct for each dense vector in the list
+                 */
+                if(body.has("dense_vectors")){
+                    printf("index has dense vectors\n");
+                    auto& dense_blocks = body["dense_vectors"];
+
+                    for(auto& key: dense_blocks.keys()){
+                        // struct IndexConfig index_config;
+
+                        printf("dense_vectors has index_key:%s\n", key.c_str());
+                        auto& config = dense_blocks[key];
+
+                        // dim is mandatory
+                        size_t dim;
+                        if(!config.has("dim") || config["dim"].t() != crow::json::type::Number){
+                            return json_error(400, "Parameters error: 'dim'");
+                        }
+                        dim = (size_t)config["dim"].i();
+                        std::cout << "dim: " << dim << "\n";
+
+
+                        // Space_type is mandatory
+                        std::string space_type;
+                        if(!config.has("space_type") || config["space_type"].t() != crow::json::type::String){
+                            return json_error(400, "Parameters error: 'dim'");
+                        }
+                        space_type = (std::string)config["space_type"].s();
+                        std::cout << "space_type: " << space_type << "\n";
+
+
+                        size_t m = settings::DEFAULT_M;
+                        if(config.has("M")){
+                            if(config["M"].t() != crow::json::type::Number){
+                                return json_error(400, "Parameters error: 'M'");
+                            }
+                            m = (size_t)config["M"].i();
+                        }
+                        std::cout << "m: " << m << "\n";
+
+                        size_t ef_con = settings::DEFAULT_EF_CONSTRUCT;
+                        if(config.has("ef_con")){
+                            if(config["ef_con"].t() != crow::json::type::Number){
+                                return json_error(400, "Parameters error: 'ef_con'");
+                            }
+                            ef_con = (size_t)config["ef_con"].i();
+                        }
+                        std::cout << "ef_con: " << ef_con << "\n";
+
+                        ndd::quant::QuantizationLevel quant_level = ndd::quant::QuantizationLevel::INT8;
+                        if(config.has("precision")){
+                            if(config["precision"].t() != crow::json::type::String){
+                                return json_error(400, "Parameters error: 'precision'");
+                            }
+                            quant_level = stringToQuantLevel(config["precision"].s());
+                        }
+                        std::cout << "quant level: " << quantLevelToString(quant_level) << "\n";
+
+                        int32_t checksum = -1;
+                        if(config.has("checksum")){
+                            if(config["checksum"].t() != crow::json::type::Number){
+                                return json_error(400, "Parameters error: 'checksum'");
+                            }
+                            checksum = config["checksum"].i();
+                        }
+
+                        size_t size_in_millions = 0;
+                        if(config.has("size_in_millions")){
+                            if(config["size_in_millions"].t() != crow::json::type::Number){
+                                return json_error(400, "Parameters error: 'size_in_millions'");
+                            }
+                            checksum = config["size_in_millions"].i();
+                        }
+
+                        // index_config = IndexConfig {dim,
+                        //            sparse_dim,
+                        //            settings::MAX_ELEMENTS,  // max elements
+                        //            body["space_type"].s(),
+                        //            m,
+                        //            ef_con,
+                        //            quant_level,
+                        //            checksum};
+
+                        // if(!check_creation_sanity(struct IndexConfig conf)){
+                        //     return json_error(400, "error");
+                        // }
+                    }
+                }
+
+                if(body.has("sparse_vectors")){
+                    printf("index has sparse vectors\n");
+                }
+
+                return crow::response(200, "Index created successfully");
+            });
+
     // Create index
     CROW_ROUTE(app, "/api/v1/index/create")
             .CROW_MIDDLEWARES(app, AuthMiddleware)

From 73d7a2e9150a4b96d6ff7db1b925aba052f29908 Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Tue, 10 Feb 2026 18:47:28 +0300
Subject: [PATCH 02/18] sanity checks

---
 CMakeLists.txt         |  2 +-
 src/core/ndd.cpp       | 69 ++++++++++++++++++++++++++++++++++++++++++
 src/core/ndd.hpp       | 17 +++++++++++
 src/main.cpp           | 33 +++++++++++---------
 src/utils/settings.hpp |  1 +
 5 files changed, 106 insertions(+), 16 deletions(-)
 create mode 100644 src/core/ndd.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 86c261a0f..288cc51a3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -239,7 +239,7 @@ message(STATUS "Binary name: ${NDD_BINARY_NAME}")
 
 
 # Create the target
-add_executable(${NDD_BINARY_NAME} src/main.cpp ${LMDB_SOURCES} third_party/roaring_bitmap/roaring.c)
+add_executable(${NDD_BINARY_NAME} src/main.cpp src/core/ndd.cpp ${LMDB_SOURCES} third_party/roaring_bitmap/roaring.c)
 
 # Set MDBX-specific compile flags
 set_source_files_properties(${LMDB_SOURCES} PROPERTIES
diff --git a/src/core/ndd.cpp b/src/core/ndd.cpp
new file mode 100644
index 000000000..0be65e09a
--- /dev/null
+++ b/src/core/ndd.cpp
@@ -0,0 +1,69 @@
+#include <string>
+#include "ndd.hpp"
+
+/**
+ * returns <true, ""> if index config is sane
+ */
+std::pair<bool, std::string> check_index_config_sanity(struct NewIndexConfig index_config){
+    std::pair<bool, std::string> ret;
+    ret.first = true;
+    ret.second = "";
+
+    if(index_config.dim < settings::MIN_DIMENSION || index_config.dim > settings::MAX_DIMENSION) {
+        ret.first = false;
+        ret.second += "Invalid dimension: " + std::to_string(index_config.dim)
+                    + ". Should be between " + std::to_string(settings::MIN_DIMENSION)
+                    + " and " + std::to_string(settings::MAX_DIMENSION);
+        LOG_ERROR(ret.second);
+        return ret;
+    }
+
+    if(index_config.M < settings::MIN_M || index_config.M > settings::MAX_M) {
+        ret.first = false;
+        ret.second += "Invalid M: " + std::to_string(index_config.M)
+                    + ". Should be between " + std::to_string(settings::MIN_M)
+                    + " and " + std::to_string(settings::MAX_M);
+        LOG_ERROR(ret.second);
+        return ret;
+    }
+
+    if(index_config.ef_construction < settings::MIN_EF_CONSTRUCT ||
+        index_config.ef_construction > settings::MAX_EF_CONSTRUCT)
+    {
+        ret.first = false;
+        ret.second += "Invalid ef_con: " + std::to_string(index_config.ef_construction)
+                    + ". Should be between " + std::to_string(settings::MIN_EF_CONSTRUCT)
+                    + " and " + std::to_string(settings::MAX_EF_CONSTRUCT);
+        LOG_ERROR(ret.second);
+        return ret;
+    }
+
+    if(index_config.quant_level == ndd::quant::QuantizationLevel::UNKNOWN){
+        ret.first = false;
+        ret.second += "Invalid precision";
+        LOG_ERROR(ret.second);
+        return ret;
+    }
+
+    /**
+     * TODO: Check its need and update this as required
+     */
+    // if(index_config.size_in_millions == 0 ||
+    //     index_config.size_in_millions > settings::MAX_SIZE_IN_MILLIONS)
+    // {
+    //     ret.first = false;
+    //     ret.second += "Invalid size_in_millions: " + std::to_string(index_config.size_in_millions)
+    //         + ". Should be > 0 and < " + std::to_string(settings::MAX_SIZE_IN_MILLIONS);
+    //     LOG_ERROR(ret.second);
+    //     return ret;
+    // }
+
+    /**
+     * TODO: Check the following:
+     * sparse_dim needs to be of a certain max dimension.
+     * space type needs to be checked to a certain strings only
+     * what is the difference max_elements and size_in_millions ? 
+     */
+
+    return ret;
+}
\ No newline at end of file
diff --git a/src/core/ndd.hpp b/src/core/ndd.hpp
index 2a4b93ed0..34d930020 100644
--- a/src/core/ndd.hpp
+++ b/src/core/ndd.hpp
@@ -12,6 +12,7 @@
 #include "msgpack_ndd.hpp"
 #include "quant_vector.hpp"
 #include "wal.hpp"
+#include "log.hpp"
 #include "../quant/dispatch.hpp"
 #include "../utils/archive_utils.hpp"
 #include <memory>
@@ -31,6 +32,19 @@
 
 #define MAX_BACKUP_NAME_LENGTH 200
 
+struct NewIndexConfig {
+    size_t dim;
+    size_t sparse_dim = 0;  // 0 means dense-only
+    size_t max_elements = settings::MAX_ELEMENTS;
+    std::string space_type_str;
+    size_t M = settings::DEFAULT_M;
+    size_t ef_construction = settings::DEFAULT_EF_CONSTRUCT;
+    ndd::quant::QuantizationLevel quant_level =
+            ndd::quant::QuantizationLevel::INT8;  // Default to INT8 quantization
+    int32_t checksum;
+    size_t size_in_millions;
+};
+
 struct IndexConfig {
     size_t dim;
     size_t sparse_dim = 0;  // 0 means dense-only
@@ -139,6 +153,9 @@ struct PersistenceConfig {
     bool save_on_shutdown{true};
 };
 
+
+std::pair<bool, std::string> check_index_config_sanity(struct NewIndexConfig index_config);
+
 class IndexManager {
 private:
     std::deque<std::string> indices_list_;
diff --git a/src/main.cpp b/src/main.cpp
index 279c3262c..49125102c 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -300,6 +300,7 @@ int main(int argc, char** argv) {
                 if(!body.has("index_name") || body["index_name"].t() != crow::json::type::String){
                     return json_error(400, "Parameters error: 'index_name'");
                 }
+                //TODO: add an index name restriction check here
 
                 std::string index_id = ctx.username + "/" + std::string(body["index_name"].s());
                 std::cout << "index id: " << index_id << "\n";
@@ -312,7 +313,7 @@ int main(int argc, char** argv) {
                     auto& dense_blocks = body["dense_vectors"];
 
                     for(auto& key: dense_blocks.keys()){
-                        // struct IndexConfig index_config;
+                        struct NewIndexConfig index_config;
 
                         printf("dense_vectors has index_key:%s\n", key.c_str());
                         auto& config = dense_blocks[key];
@@ -325,7 +326,6 @@ int main(int argc, char** argv) {
                         dim = (size_t)config["dim"].i();
                         std::cout << "dim: " << dim << "\n";
 
-
                         // Space_type is mandatory
                         std::string space_type;
                         if(!config.has("space_type") || config["space_type"].t() != crow::json::type::String){
@@ -334,7 +334,6 @@ int main(int argc, char** argv) {
                         space_type = (std::string)config["space_type"].s();
                         std::cout << "space_type: " << space_type << "\n";
 
-
                         size_t m = settings::DEFAULT_M;
                         if(config.has("M")){
                             if(config["M"].t() != crow::json::type::Number){
@@ -378,18 +377,22 @@ int main(int argc, char** argv) {
                             checksum = config["size_in_millions"].i();
                         }
 
-                        // index_config = IndexConfig {dim,
-                        //            sparse_dim,
-                        //            settings::MAX_ELEMENTS,  // max elements
-                        //            body["space_type"].s(),
-                        //            m,
-                        //            ef_con,
-                        //            quant_level,
-                        //            checksum};
-
-                        // if(!check_creation_sanity(struct IndexConfig conf)){
-                        //     return json_error(400, "error");
-                        // }
+                        index_config = NewIndexConfig {
+                                    dim,
+                                    0,
+                                    settings::MAX_ELEMENTS,  // max elements
+                                    space_type,
+                                    m,
+                                    ef_con,
+                                    quant_level,
+                                    checksum,
+                                    size_in_millions
+                                };
+
+                        std::pair<bool, std::string> sanity_ret = check_index_config_sanity(index_config);
+                        if(!sanity_ret.first){
+                            return json_error(400, sanity_ret.second);
+                        }
                     }
                 }
 
diff --git a/src/utils/settings.hpp b/src/utils/settings.hpp
index d8b6fecdb..726789bb1 100644
--- a/src/utils/settings.hpp
+++ b/src/utils/settings.hpp
@@ -30,6 +30,7 @@ namespace settings {
     constexpr size_t MIN_EF_CONSTRUCT = 8;
     constexpr size_t MAX_EF_CONSTRUCT = 4096;
     constexpr size_t DEFAULT_EF_SEARCH = 128;
+    constexpr size_t MAX_SIZE_IN_MILLIONS = 10'000;
     constexpr size_t MIN_K = 1;
     constexpr size_t MAX_K = 4096;
     constexpr size_t RANDOM_SEED = 100;

From cd2473ca5b4f4a9531b7f9c21bc9ca672d0127a8 Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Tue, 10 Feb 2026 19:15:40 +0300
Subject: [PATCH 03/18] sparse picking aswell

---
 src/core/ndd.hpp | 17 ++++++++----
 src/main.cpp     | 71 +++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 72 insertions(+), 16 deletions(-)

diff --git a/src/core/ndd.hpp b/src/core/ndd.hpp
index 34d930020..03c70cdc9 100644
--- a/src/core/ndd.hpp
+++ b/src/core/ndd.hpp
@@ -33,18 +33,23 @@
 #define MAX_BACKUP_NAME_LENGTH 200
 
 struct NewIndexConfig {
+    std::string sub_index_name;
     size_t dim;
-    size_t sparse_dim = 0;  // 0 means dense-only
-    size_t max_elements = settings::MAX_ELEMENTS;
+    size_t max_elements;
     std::string space_type_str;
-    size_t M = settings::DEFAULT_M;
-    size_t ef_construction = settings::DEFAULT_EF_CONSTRUCT;
-    ndd::quant::QuantizationLevel quant_level =
-            ndd::quant::QuantizationLevel::INT8;  // Default to INT8 quantization
+    size_t M;
+    size_t ef_construction;
+    ndd::quant::QuantizationLevel quant_level;
     int32_t checksum;
     size_t size_in_millions;
 };
 
+struct SparseIndexConfig {
+    std::string sub_index_name;
+    size_t sparse_dim;
+    int32_t checksum;
+};
+
 struct IndexConfig {
     size_t dim;
     size_t sparse_dim = 0;  // 0 means dense-only
diff --git a/src/main.cpp b/src/main.cpp
index 49125102c..eabbd9fcc 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -305,9 +305,9 @@ int main(int argc, char** argv) {
                 std::string index_id = ctx.username + "/" + std::string(body["index_name"].s());
                 std::cout << "index id: " << index_id << "\n";
 
-                /**
-                 * create a struct for each dense vector in the list
-                 */
+                std::vector<struct NewIndexConfig> dense_indexes;
+
+
                 if(body.has("dense_vectors")){
                     printf("index has dense vectors\n");
                     auto& dense_blocks = body["dense_vectors"];
@@ -324,7 +324,7 @@ int main(int argc, char** argv) {
                             return json_error(400, "Parameters error: 'dim'");
                         }
                         dim = (size_t)config["dim"].i();
-                        std::cout << "dim: " << dim << "\n";
+                        // std::cout << "dim: " << dim << "\n";
 
                         // Space_type is mandatory
                         std::string space_type;
@@ -332,7 +332,7 @@ int main(int argc, char** argv) {
                             return json_error(400, "Parameters error: 'dim'");
                         }
                         space_type = (std::string)config["space_type"].s();
-                        std::cout << "space_type: " << space_type << "\n";
+                        // std::cout << "space_type: " << space_type << "\n";
 
                         size_t m = settings::DEFAULT_M;
                         if(config.has("M")){
@@ -341,7 +341,7 @@ int main(int argc, char** argv) {
                             }
                             m = (size_t)config["M"].i();
                         }
-                        std::cout << "m: " << m << "\n";
+                        // std::cout << "m: " << m << "\n";
 
                         size_t ef_con = settings::DEFAULT_EF_CONSTRUCT;
                         if(config.has("ef_con")){
@@ -350,7 +350,7 @@ int main(int argc, char** argv) {
                             }
                             ef_con = (size_t)config["ef_con"].i();
                         }
-                        std::cout << "ef_con: " << ef_con << "\n";
+                        // std::cout << "ef_con: " << ef_con << "\n";
 
                         ndd::quant::QuantizationLevel quant_level = ndd::quant::QuantizationLevel::INT8;
                         if(config.has("precision")){
@@ -359,7 +359,7 @@ int main(int argc, char** argv) {
                             }
                             quant_level = stringToQuantLevel(config["precision"].s());
                         }
-                        std::cout << "quant level: " << quantLevelToString(quant_level) << "\n";
+                        // std::cout << "quant level: " << quantLevelToString(quant_level) << "\n";
 
                         int32_t checksum = -1;
                         if(config.has("checksum")){
@@ -378,8 +378,8 @@ int main(int argc, char** argv) {
                         }
 
                         index_config = NewIndexConfig {
-                                    dim,
-                                    0,
+                                    key,
+                                    dim, //dense_dim
                                     settings::MAX_ELEMENTS,  // max elements
                                     space_type,
                                     m,
@@ -393,13 +393,64 @@ int main(int argc, char** argv) {
                         if(!sanity_ret.first){
                             return json_error(400, sanity_ret.second);
                         }
+                        dense_indexes.push_back(index_config);
                     }
                 }
 
+                for(int i=0; i<dense_indexes.size(); i++){
+                    printf("name:%s, M:%zu\n", dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M);
+                }
+
+                std::vector<struct SparseIndexConfig> sparse_indexes;
+
                 if(body.has("sparse_vectors")){
                     printf("index has sparse vectors\n");
+                    auto& sparse_blocks = body["sparse_vectors"];
+
+                    for(auto& key: sparse_blocks.keys()){
+                        struct SparseIndexConfig sparse_index_config;
+
+                        auto& sparse_config = sparse_blocks[key];
+
+                        // sparse_dim is mandatory
+                        size_t sparse_dim;
+                        if(!sparse_config.has("sparse_dim") || sparse_config["sparse_dim"].t() != crow::json::type::Number){
+                            return json_error(400, "Parameters error: 'dim'");
+                        }
+                        sparse_dim = (size_t)sparse_config["sparse_dim"].i();
+
+                        int32_t checksum = -1;
+                        if(sparse_config.has("checksum")){
+                            if(sparse_config["checksum"].t() != crow::json::type::Number){
+                                return json_error(400, "Parameters error: 'checksum'");
+                            }
+                            checksum = sparse_config["checksum"].i();
+                        }
+
+                        sparse_index_config = SparseIndexConfig{
+                            key,
+                            sparse_dim,
+                            checksum
+                        };
+
+                        //TODO: Add a sanity check for sparse vectors here
+
+                        sparse_indexes.push_back(sparse_index_config);
+                    }
                 }
 
+                for(int i=0; i<sparse_indexes.size(); i++){
+                    printf("name:%s, sparse_dim:%zu\n", sparse_indexes[i].sub_index_name.c_str(), sparse_indexes[i].sparse_dim);
+                }
+
+                // try{
+                //     index_manager.createNewIndex();
+                // }catch(const std::runtime_error& e) {
+                //     return json_error(409, e.what());
+                // } catch(const std::exception& e) {
+                //     return json_error_500(ctx.username, req.url, std::string("Error: ") + e.what());
+                // }
+
                 return crow::response(200, "Index created successfully");
             });
 

From d8ddc16dfbe3e11932c2f35d227b374407f687cf Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Wed, 11 Feb 2026 09:36:47 +0300
Subject: [PATCH 04/18] some bug fixes and guard rails

---
 src/main.cpp | 277 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 152 insertions(+), 125 deletions(-)

diff --git a/src/main.cpp b/src/main.cpp
index eabbd9fcc..5d48332c6 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -290,168 +290,195 @@ int main(int argc, char** argv) {
     CROW_ROUTE(app, "/api/v1/index/newcreate")
             .CROW_MIDDLEWARES(app, AuthMiddleware)
             .methods("POST"_method)([&index_manager, &app](const crow::request& req) {
-                auto& ctx = app.get_context<AuthMiddleware>(req);
+                
+                AuthMiddleware::context& ctx = app.get_context<AuthMiddleware>(req);;
 
-                auto body = crow::json::load(req.body);
-                if(!body) {
-                    return json_error(400, "Invalid JSON");
-                }
-
-                if(!body.has("index_name") || body["index_name"].t() != crow::json::type::String){
-                    return json_error(400, "Parameters error: 'index_name'");
-                }
-                //TODO: add an index name restriction check here
+                try{
 
-                std::string index_id = ctx.username + "/" + std::string(body["index_name"].s());
-                std::cout << "index id: " << index_id << "\n";
+                    auto body = crow::json::load(req.body);
+                    if(!body) {
+                        return json_error(400, "Invalid JSON");
+                    }
 
-                std::vector<struct NewIndexConfig> dense_indexes;
+                    if(!body.has("index_name") || body["index_name"].t() != crow::json::type::String){
+                        return json_error(400, "Parameters error: 'index_name'");
+                    }
+                    //TODO: add an index name restriction check here
 
+                    std::string index_id = ctx.username + "/" + std::string(body["index_name"].s());
+                    std::cout << "index id: " << index_id << "\n";
 
-                if(body.has("dense_vectors")){
-                    printf("index has dense vectors\n");
-                    auto& dense_blocks = body["dense_vectors"];
+                    std::vector<struct NewIndexConfig> dense_indexes;
 
-                    for(auto& key: dense_blocks.keys()){
-                        struct NewIndexConfig index_config;
 
-                        printf("dense_vectors has index_key:%s\n", key.c_str());
-                        auto& config = dense_blocks[key];
+                    if(body.has("dense_vectors")){
+                        printf("index has dense vectors\n");
+                        auto& dense_blocks = body["dense_vectors"];
 
-                        // dim is mandatory
-                        size_t dim;
-                        if(!config.has("dim") || config["dim"].t() != crow::json::type::Number){
-                            return json_error(400, "Parameters error: 'dim'");
+                        if (dense_blocks.t() != crow::json::type::Object) {
+                            return json_error(400, "'dense_vectors' must be an object");
                         }
-                        dim = (size_t)config["dim"].i();
-                        // std::cout << "dim: " << dim << "\n";
 
-                        // Space_type is mandatory
-                        std::string space_type;
-                        if(!config.has("space_type") || config["space_type"].t() != crow::json::type::String){
-                            return json_error(400, "Parameters error: 'dim'");
-                        }
-                        space_type = (std::string)config["space_type"].s();
-                        // std::cout << "space_type: " << space_type << "\n";
 
-                        size_t m = settings::DEFAULT_M;
-                        if(config.has("M")){
-                            if(config["M"].t() != crow::json::type::Number){
-                                return json_error(400, "Parameters error: 'M'");
+                        for(auto& key: dense_blocks.keys()){
+                            struct NewIndexConfig index_config;
+
+                            printf("dense_vectors has index_key:%s\n", key.c_str());
+                            auto& config = dense_blocks[key];
+
+                            // dim is mandatory
+                            size_t dim;
+                            long raw_dim;
+                            if(!config.has("dim") || config["dim"].t() != crow::json::type::Number){
+                                return json_error(400, "Parameters error: 'dim'");
                             }
-                            m = (size_t)config["M"].i();
-                        }
-                        // std::cout << "m: " << m << "\n";
+                            else{
+                                raw_dim = (long)config["dim"].i();
+                                if(raw_dim <= 0){
+                                    return json_error(400, "Parameters error: negative 'dim'");
+                                }
+                            }
+                            dim = raw_dim;
+                            // std::cout << "dim: " << dim << "\n";
 
-                        size_t ef_con = settings::DEFAULT_EF_CONSTRUCT;
-                        if(config.has("ef_con")){
-                            if(config["ef_con"].t() != crow::json::type::Number){
-                                return json_error(400, "Parameters error: 'ef_con'");
+                            // Space_type is mandatory
+                            std::string space_type;
+                            if(!config.has("space_type") || config["space_type"].t() != crow::json::type::String){
+                                return json_error(400, "Parameters error: 'space_type'");
                             }
-                            ef_con = (size_t)config["ef_con"].i();
-                        }
-                        // std::cout << "ef_con: " << ef_con << "\n";
+                            space_type = (std::string)config["space_type"].s();
+                            // std::cout << "space_type: " << space_type << "\n";
 
-                        ndd::quant::QuantizationLevel quant_level = ndd::quant::QuantizationLevel::INT8;
-                        if(config.has("precision")){
-                            if(config["precision"].t() != crow::json::type::String){
-                                return json_error(400, "Parameters error: 'precision'");
+                            size_t m = settings::DEFAULT_M;
+                            if(config.has("M")){
+                                if(config["M"].t() != crow::json::type::Number){
+                                    return json_error(400, "Parameters error: 'M'");
+                                }
+                                m = (size_t)config["M"].i();
                             }
-                            quant_level = stringToQuantLevel(config["precision"].s());
-                        }
-                        // std::cout << "quant level: " << quantLevelToString(quant_level) << "\n";
+                            // std::cout << "m: " << m << "\n";
 
-                        int32_t checksum = -1;
-                        if(config.has("checksum")){
-                            if(config["checksum"].t() != crow::json::type::Number){
-                                return json_error(400, "Parameters error: 'checksum'");
+                            size_t ef_con = settings::DEFAULT_EF_CONSTRUCT;
+                            if(config.has("ef_con")){
+                                if(config["ef_con"].t() != crow::json::type::Number){
+                                    return json_error(400, "Parameters error: 'ef_con'");
+                                }
+                                ef_con = (size_t)config["ef_con"].i();
                             }
-                            checksum = config["checksum"].i();
-                        }
+                            // std::cout << "ef_con: " << ef_con << "\n";
 
-                        size_t size_in_millions = 0;
-                        if(config.has("size_in_millions")){
-                            if(config["size_in_millions"].t() != crow::json::type::Number){
-                                return json_error(400, "Parameters error: 'size_in_millions'");
+                            ndd::quant::QuantizationLevel quant_level = ndd::quant::QuantizationLevel::INT8;
+                            if(config.has("precision")){
+                                if(config["precision"].t() != crow::json::type::String){
+                                    return json_error(400, "Parameters error: 'precision'");
+                                }
+                                quant_level = stringToQuantLevel(config["precision"].s());
+                            }
+                            // std::cout << "quant level: " << quantLevelToString(quant_level) << "\n";
+
+                            int32_t checksum = -1;
+                            if(config.has("checksum")){
+                                if(config["checksum"].t() != crow::json::type::Number){
+                                    return json_error(400, "Parameters error: 'checksum'");
+                                }
+                                checksum = config["checksum"].i();
+                            }
+
+                            size_t size_in_millions = 0;
+                            if(config.has("size_in_millions")){
+                                if(config["size_in_millions"].t() != crow::json::type::Number){
+                                    return json_error(400, "Parameters error: 'size_in_millions'");
+                                }
+                                size_in_millions = config["size_in_millions"].i();
                             }
-                            checksum = config["size_in_millions"].i();
-                        }
 
-                        index_config = NewIndexConfig {
-                                    key,
-                                    dim, //dense_dim
-                                    settings::MAX_ELEMENTS,  // max elements
-                                    space_type,
-                                    m,
-                                    ef_con,
-                                    quant_level,
-                                    checksum,
-                                    size_in_millions
-                                };
-
-                        std::pair<bool, std::string> sanity_ret = check_index_config_sanity(index_config);
-                        if(!sanity_ret.first){
-                            return json_error(400, sanity_ret.second);
+                            index_config = NewIndexConfig {
+                                        key,
+                                        dim, //dense_dim
+                                        settings::MAX_ELEMENTS,  // max elements
+                                        space_type,
+                                        m,
+                                        ef_con,
+                                        quant_level,
+                                        checksum,
+                                        size_in_millions
+                                    };
+
+                            std::pair<bool, std::string> sanity_ret = check_index_config_sanity(index_config);
+                            if(!sanity_ret.first){
+                                return json_error(400, sanity_ret.second);
+                            }
+                            dense_indexes.push_back(index_config);
                         }
-                        dense_indexes.push_back(index_config);
                     }
-                }
 
-                for(int i=0; i<dense_indexes.size(); i++){
-                    printf("name:%s, M:%zu\n", dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M);
-                }
+                    // for(int i=0; i<dense_indexes.size(); i++){
+                    //     printf("name:%s, M:%zu\n", dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M);
+                    // }
 
-                std::vector<struct SparseIndexConfig> sparse_indexes;
+                    std::vector<struct SparseIndexConfig> sparse_indexes;
 
-                if(body.has("sparse_vectors")){
-                    printf("index has sparse vectors\n");
-                    auto& sparse_blocks = body["sparse_vectors"];
+                    if(body.has("sparse_vectors")){
+                        printf("index has sparse vectors\n");
+                        auto& sparse_blocks = body["sparse_vectors"];
 
-                    for(auto& key: sparse_blocks.keys()){
-                        struct SparseIndexConfig sparse_index_config;
+                        if (sparse_blocks.t() != crow::json::type::Object) {
+                            return json_error(400, "'sparse_vectors' must be an object");
+                        }
 
-                        auto& sparse_config = sparse_blocks[key];
 
-                        // sparse_dim is mandatory
-                        size_t sparse_dim;
-                        if(!sparse_config.has("sparse_dim") || sparse_config["sparse_dim"].t() != crow::json::type::Number){
-                            return json_error(400, "Parameters error: 'dim'");
-                        }
-                        sparse_dim = (size_t)sparse_config["sparse_dim"].i();
+                        for(auto& key: sparse_blocks.keys()){
+                            struct SparseIndexConfig sparse_index_config;
 
-                        int32_t checksum = -1;
-                        if(sparse_config.has("checksum")){
-                            if(sparse_config["checksum"].t() != crow::json::type::Number){
-                                return json_error(400, "Parameters error: 'checksum'");
+                            auto& sparse_config = sparse_blocks[key];
+
+                            // sparse_dim is mandatory
+                            size_t sparse_dim;
+                            if(!sparse_config.has("sparse_dim") || sparse_config["sparse_dim"].t() != crow::json::type::Number){
+                                return json_error(400, "Parameters error: 'sparse_dim'");
                             }
-                            checksum = sparse_config["checksum"].i();
-                        }
+                            sparse_dim = (size_t)sparse_config["sparse_dim"].i();
 
-                        sparse_index_config = SparseIndexConfig{
-                            key,
-                            sparse_dim,
-                            checksum
-                        };
+                            int32_t checksum = -1;
+                            if(sparse_config.has("checksum")){
+                                if(sparse_config["checksum"].t() != crow::json::type::Number){
+                                    return json_error(400, "Parameters error: 'checksum'");
+                                }
+                                checksum = sparse_config["checksum"].i();
+                            }
 
-                        //TODO: Add a sanity check for sparse vectors here
+                            sparse_index_config = SparseIndexConfig{
+                                key,
+                                sparse_dim,
+                                checksum
+                            };
 
-                        sparse_indexes.push_back(sparse_index_config);
-                    }
-                }
+                            //TODO: Add a sanity check for sparse vectors here
 
-                for(int i=0; i<sparse_indexes.size(); i++){
-                    printf("name:%s, sparse_dim:%zu\n", sparse_indexes[i].sub_index_name.c_str(), sparse_indexes[i].sparse_dim);
-                }
+                            sparse_indexes.push_back(sparse_index_config);
+                        }
+                    }
 
-                // try{
-                //     index_manager.createNewIndex();
-                // }catch(const std::runtime_error& e) {
-                //     return json_error(409, e.what());
-                // } catch(const std::exception& e) {
-                //     return json_error_500(ctx.username, req.url, std::string("Error: ") + e.what());
-                // }
+                    // for(int i=0; i<sparse_indexes.size(); i++){
+                    //     printf("name:%s, sparse_dim:%zu\n", sparse_indexes[i].sub_index_name.c_str(), sparse_indexes[i].sparse_dim);
+                    // }
+
+                    // try{
+                    //     index_manager.createNewIndex();
+                    // }catch(const std::runtime_error& e) {
+                    //     return json_error(409, e.what());
+                    // } catch(const std::exception& e) {
+                    //     return json_error_500(ctx.username, req.url, std::string("Error: ") + e.what());
+                    // }
+                    return crow::response(200, "Index created successfully");
 
-                return crow::response(200, "Index created successfully");
+                } catch(const std::runtime_error& e){
+                    return json_error(409, e.what());
+                } catch(const std::exception& e){
+                        return json_error_500(ctx.username, req.url, std::string("Error: ") + e.what());
+                } catch (...){
+                    return json_error(500, "Unknown internal error");
+                }
             });
 
     // Create index

From 89910a4c1cec72a6e2edccd399fffbc7c4ea101b Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Wed, 11 Feb 2026 10:30:57 +0300
Subject: [PATCH 05/18] starting implementing IndexManager::newcreateIndex

---
 src/core/ndd.cpp |  52 +++++++-
 src/core/ndd.hpp |   5 +
 src/main.cpp     | 326 ++++++++++++++++++++++++-----------------------
 3 files changed, 220 insertions(+), 163 deletions(-)

diff --git a/src/core/ndd.cpp b/src/core/ndd.cpp
index 0be65e09a..c39c2b33c 100644
--- a/src/core/ndd.cpp
+++ b/src/core/ndd.cpp
@@ -1,6 +1,22 @@
 #include <string>
 #include "ndd.hpp"
 
+
+bool IndexManager::newcreateIndex(std::string& index_name, UserType user_type,
+                                    std::vector<struct NewIndexConfig> dense_indexes,
+                                    std::vector<struct SparseIndexConfig> sparse_indexes)
+{
+    for(int i=0; i<dense_indexes.size(); i++){
+        printf("%s: name:%s, M:%zu\n", __func__, dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M);
+    }
+
+    for(int i=0; i<sparse_indexes.size(); i++){
+        printf("%s: name:%s, sparse_dim:%zu\n", __func__, sparse_indexes[i].sub_index_name.c_str(), sparse_indexes[i].sparse_dim);
+    }
+    return true;
+}
+
+
 /**
  * returns <true, ""> if index config is sane
  */
@@ -66,4 +82,38 @@ std::pair<bool, std::string> check_index_config_sanity(struct NewIndexConfig ind
      */
 
     return ret;
-}
\ No newline at end of file
+}
+
+/**
+ * Check if this is okay for validating index name
+std::pair<bool, std::string> validate_index_name(const std::string& name) {
+    // Not empty
+    if (name.empty()) {
+        return {false, "Index name cannot be empty"};
+    }
+
+    // Length limit
+    if (name.size() > 128) {
+        return {false, "Index name too long (max 128 characters)"};
+    }
+
+    // Only allow alphanumeric, hyphens, underscores
+    for (char c : name) {
+        if (!std::isalnum(c) && c != '-' && c != '_') {
+            return {false, "Index name contains invalid character: '" + std::string(1, c) + "'. Only alphanumeric, hyphens, and underscores allowed"};
+        }
+    }
+
+    // Don't allow starting with a hyphen or underscore
+    if (name[0] == '-' || name[0] == '_') {
+        return {false, "Index name must start with an alphanumeric character"};
+    }
+
+    // Block path traversal attempts
+    if (name.find("..") != std::string::npos || name.find('/') != std::string::npos || name.find('\\') != std::string::npos) {
+        return {false, "Index name contains illegal sequence"};
+    }
+
+    return {true, ""};
+}
+*/
diff --git a/src/core/ndd.hpp b/src/core/ndd.hpp
index 03c70cdc9..fcaa124a2 100644
--- a/src/core/ndd.hpp
+++ b/src/core/ndd.hpp
@@ -815,6 +815,11 @@ class IndexManager {
         }
     }
 
+    bool newcreateIndex(std::string& index_name,
+                        UserType user_type,
+                        std::vector<struct NewIndexConfig> dense_indexes,
+                        std::vector<struct SparseIndexConfig> sparse_indexes);
+
     bool createIndex(const std::string& index_id,
                      const IndexConfig& config,
                      UserType user_type = UserType::Admin,
diff --git a/src/main.cpp b/src/main.cpp
index 5d48332c6..ef3d4c0a7 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -288,198 +288,200 @@ int main(int argc, char** argv) {
 
     // new create index
     CROW_ROUTE(app, "/api/v1/index/newcreate")
-            .CROW_MIDDLEWARES(app, AuthMiddleware)
-            .methods("POST"_method)([&index_manager, &app](const crow::request& req) {
-                
-                AuthMiddleware::context& ctx = app.get_context<AuthMiddleware>(req);;
-
-                try{
-
-                    auto body = crow::json::load(req.body);
-                    if(!body) {
-                        return json_error(400, "Invalid JSON");
-                    }
-
-                    if(!body.has("index_name") || body["index_name"].t() != crow::json::type::String){
-                        return json_error(400, "Parameters error: 'index_name'");
-                    }
-                    //TODO: add an index name restriction check here
+    .CROW_MIDDLEWARES(app, AuthMiddleware)
+    .methods("POST"_method)([&index_manager, &app](const crow::request& req) {
+
+        AuthMiddleware::context& ctx = app.get_context<AuthMiddleware>(req);;
+        try{
+            auto body = crow::json::load(req.body);
+            if(!body) {
+                return json_error(400, "Invalid JSON");
+            }
 
-                    std::string index_id = ctx.username + "/" + std::string(body["index_name"].s());
-                    std::cout << "index id: " << index_id << "\n";
+            if(!body.has("index_name") || body["index_name"].t() != crow::json::type::String){
+                return json_error(400, "Parameters error: 'index_name'");
+            }
 
-                    std::vector<struct NewIndexConfig> dense_indexes;
+            /**
+             * TODO:CRITICAL add an index name restriction check here
+             * validate_index_name in ndd.cpp
+             */
+            std::string index_name = std::string(body["index_name"].s());
+            // std::string index_id = ctx.username + "/" + index_name;
+            // std::cout << "index id: " << index_id << "\n";
+
+            /**
+             * TODO: add a simple case where no named vectors are present.
+             * Only one dense vector.
+             * In that case, the user will not provide a name to the vector during creating
+             * and searching. So, there should be a way to store that this is an unnamed vector.
+             *
+             * This can be implemented once create index is written well and tested.
+             */
+
+            std::vector<struct NewIndexConfig> dense_indexes;
+
+            if(body.has("dense_vectors")){
+                auto& dense_blocks = body["dense_vectors"];
+
+                if (dense_blocks.t() != crow::json::type::Object) {
+                    return json_error(400, "'dense_vectors' must be an object");
+                }
 
 
-                    if(body.has("dense_vectors")){
-                        printf("index has dense vectors\n");
-                        auto& dense_blocks = body["dense_vectors"];
+                for(auto& key: dense_blocks.keys()){
+                    struct NewIndexConfig index_config;
+                    auto& config = dense_blocks[key];
 
-                        if (dense_blocks.t() != crow::json::type::Object) {
-                            return json_error(400, "'dense_vectors' must be an object");
+                    // dim is mandatory
+                    size_t dim;
+                    long raw_dim;
+                    if(!config.has("dim") || config["dim"].t() != crow::json::type::Number){
+                        return json_error(400, "Parameters error: 'dim'");
+                    }
+                    else{
+                        raw_dim = (long)config["dim"].i();
+                        if(raw_dim <= 0){
+                            return json_error(400, "Parameters error: negative 'dim'");
                         }
+                    }
+                    dim = raw_dim;
+                    // std::cout << "dim: " << dim << "\n";
 
+                    // Space_type is mandatory
+                    std::string space_type;
+                    if(!config.has("space_type") || config["space_type"].t() != crow::json::type::String){
+                        return json_error(400, "Parameters error: 'space_type'");
+                    }
+                    space_type = (std::string)config["space_type"].s();
+                    // std::cout << "space_type: " << space_type << "\n";
 
-                        for(auto& key: dense_blocks.keys()){
-                            struct NewIndexConfig index_config;
-
-                            printf("dense_vectors has index_key:%s\n", key.c_str());
-                            auto& config = dense_blocks[key];
-
-                            // dim is mandatory
-                            size_t dim;
-                            long raw_dim;
-                            if(!config.has("dim") || config["dim"].t() != crow::json::type::Number){
-                                return json_error(400, "Parameters error: 'dim'");
-                            }
-                            else{
-                                raw_dim = (long)config["dim"].i();
-                                if(raw_dim <= 0){
-                                    return json_error(400, "Parameters error: negative 'dim'");
-                                }
-                            }
-                            dim = raw_dim;
-                            // std::cout << "dim: " << dim << "\n";
-
-                            // Space_type is mandatory
-                            std::string space_type;
-                            if(!config.has("space_type") || config["space_type"].t() != crow::json::type::String){
-                                return json_error(400, "Parameters error: 'space_type'");
-                            }
-                            space_type = (std::string)config["space_type"].s();
-                            // std::cout << "space_type: " << space_type << "\n";
-
-                            size_t m = settings::DEFAULT_M;
-                            if(config.has("M")){
-                                if(config["M"].t() != crow::json::type::Number){
-                                    return json_error(400, "Parameters error: 'M'");
-                                }
-                                m = (size_t)config["M"].i();
-                            }
-                            // std::cout << "m: " << m << "\n";
-
-                            size_t ef_con = settings::DEFAULT_EF_CONSTRUCT;
-                            if(config.has("ef_con")){
-                                if(config["ef_con"].t() != crow::json::type::Number){
-                                    return json_error(400, "Parameters error: 'ef_con'");
-                                }
-                                ef_con = (size_t)config["ef_con"].i();
-                            }
-                            // std::cout << "ef_con: " << ef_con << "\n";
-
-                            ndd::quant::QuantizationLevel quant_level = ndd::quant::QuantizationLevel::INT8;
-                            if(config.has("precision")){
-                                if(config["precision"].t() != crow::json::type::String){
-                                    return json_error(400, "Parameters error: 'precision'");
-                                }
-                                quant_level = stringToQuantLevel(config["precision"].s());
-                            }
-                            // std::cout << "quant level: " << quantLevelToString(quant_level) << "\n";
+                    size_t m = settings::DEFAULT_M;
+                    if(config.has("M")){
+                        if(config["M"].t() != crow::json::type::Number){
+                            return json_error(400, "Parameters error: 'M'");
+                        }
+                        m = (size_t)config["M"].i();
+                    }
+                    // std::cout << "m: " << m << "\n";
 
-                            int32_t checksum = -1;
-                            if(config.has("checksum")){
-                                if(config["checksum"].t() != crow::json::type::Number){
-                                    return json_error(400, "Parameters error: 'checksum'");
-                                }
-                                checksum = config["checksum"].i();
-                            }
+                    size_t ef_con = settings::DEFAULT_EF_CONSTRUCT;
+                    if(config.has("ef_con")){
+                        if(config["ef_con"].t() != crow::json::type::Number){
+                            return json_error(400, "Parameters error: 'ef_con'");
+                        }
+                        ef_con = (size_t)config["ef_con"].i();
+                    }
+                    // std::cout << "ef_con: " << ef_con << "\n";
 
-                            size_t size_in_millions = 0;
-                            if(config.has("size_in_millions")){
-                                if(config["size_in_millions"].t() != crow::json::type::Number){
-                                    return json_error(400, "Parameters error: 'size_in_millions'");
-                                }
-                                size_in_millions = config["size_in_millions"].i();
-                            }
+                    ndd::quant::QuantizationLevel quant_level = ndd::quant::QuantizationLevel::INT8;
+                    if(config.has("precision")){
+                        if(config["precision"].t() != crow::json::type::String){
+                            return json_error(400, "Parameters error: 'precision'");
+                        }
+                        quant_level = stringToQuantLevel(config["precision"].s());
+                    }
+                    // std::cout << "quant level: " << quantLevelToString(quant_level) << "\n";
 
-                            index_config = NewIndexConfig {
-                                        key,
-                                        dim, //dense_dim
-                                        settings::MAX_ELEMENTS,  // max elements
-                                        space_type,
-                                        m,
-                                        ef_con,
-                                        quant_level,
-                                        checksum,
-                                        size_in_millions
-                                    };
-
-                            std::pair<bool, std::string> sanity_ret = check_index_config_sanity(index_config);
-                            if(!sanity_ret.first){
-                                return json_error(400, sanity_ret.second);
-                            }
-                            dense_indexes.push_back(index_config);
+                    int32_t checksum = -1;
+                    if(config.has("checksum")){
+                        if(config["checksum"].t() != crow::json::type::Number){
+                            return json_error(400, "Parameters error: 'checksum'");
                         }
+                        checksum = config["checksum"].i();
                     }
 
-                    // for(int i=0; i<dense_indexes.size(); i++){
-                    //     printf("name:%s, M:%zu\n", dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M);
-                    // }
+                    size_t size_in_millions = 0;
+                    if(config.has("size_in_millions")){
+                        if(config["size_in_millions"].t() != crow::json::type::Number){
+                            return json_error(400, "Parameters error: 'size_in_millions'");
+                        }
+                        size_in_millions = config["size_in_millions"].i();
+                    }
 
-                    std::vector<struct SparseIndexConfig> sparse_indexes;
+                    index_config = NewIndexConfig {
+                                key,
+                                dim, //dense_dim
+                                settings::MAX_ELEMENTS,  // max elements
+                                space_type,
+                                m,
+                                ef_con,
+                                quant_level,
+                                checksum,
+                                size_in_millions
+                            };
 
-                    if(body.has("sparse_vectors")){
-                        printf("index has sparse vectors\n");
-                        auto& sparse_blocks = body["sparse_vectors"];
+                    std::pair<bool, std::string> sanity_ret = check_index_config_sanity(index_config);
+                    if(!sanity_ret.first){
+                        return json_error(400, sanity_ret.second);
+                    }
+                    dense_indexes.push_back(index_config);
+                }
+            }
 
-                        if (sparse_blocks.t() != crow::json::type::Object) {
-                            return json_error(400, "'sparse_vectors' must be an object");
-                        }
+            // for(int i=0; i<dense_indexes.size(); i++){
+            //     printf("name:%s, M:%zu\n", dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M);
+            // }
 
+            std::vector<struct SparseIndexConfig> sparse_indexes;
 
-                        for(auto& key: sparse_blocks.keys()){
-                            struct SparseIndexConfig sparse_index_config;
+            if(body.has("sparse_vectors")){
+                auto& sparse_blocks = body["sparse_vectors"];
 
-                            auto& sparse_config = sparse_blocks[key];
+                if (sparse_blocks.t() != crow::json::type::Object) {
+                    return json_error(400, "'sparse_vectors' must be an object");
+                }
 
-                            // sparse_dim is mandatory
-                            size_t sparse_dim;
-                            if(!sparse_config.has("sparse_dim") || sparse_config["sparse_dim"].t() != crow::json::type::Number){
-                                return json_error(400, "Parameters error: 'sparse_dim'");
-                            }
-                            sparse_dim = (size_t)sparse_config["sparse_dim"].i();
 
-                            int32_t checksum = -1;
-                            if(sparse_config.has("checksum")){
-                                if(sparse_config["checksum"].t() != crow::json::type::Number){
-                                    return json_error(400, "Parameters error: 'checksum'");
-                                }
-                                checksum = sparse_config["checksum"].i();
-                            }
+                for(auto& key: sparse_blocks.keys()){
+                    struct SparseIndexConfig sparse_index_config;
 
-                            sparse_index_config = SparseIndexConfig{
-                                key,
-                                sparse_dim,
-                                checksum
-                            };
+                    auto& sparse_config = sparse_blocks[key];
 
-                            //TODO: Add a sanity check for sparse vectors here
+                    // sparse_dim is mandatory
+                    size_t sparse_dim;
+                    if(!sparse_config.has("sparse_dim") || sparse_config["sparse_dim"].t() != crow::json::type::Number){
+                        return json_error(400, "Parameters error: 'sparse_dim'");
+                    }
+                    sparse_dim = (size_t)sparse_config["sparse_dim"].i();
 
-                            sparse_indexes.push_back(sparse_index_config);
+                    int32_t checksum = -1;
+                    if(sparse_config.has("checksum")){
+                        if(sparse_config["checksum"].t() != crow::json::type::Number){
+                            return json_error(400, "Parameters error: 'checksum'");
                         }
+                        checksum = sparse_config["checksum"].i();
                     }
 
-                    // for(int i=0; i<sparse_indexes.size(); i++){
-                    //     printf("name:%s, sparse_dim:%zu\n", sparse_indexes[i].sub_index_name.c_str(), sparse_indexes[i].sparse_dim);
-                    // }
-
-                    // try{
-                    //     index_manager.createNewIndex();
-                    // }catch(const std::runtime_error& e) {
-                    //     return json_error(409, e.what());
-                    // } catch(const std::exception& e) {
-                    //     return json_error_500(ctx.username, req.url, std::string("Error: ") + e.what());
-                    // }
-                    return crow::response(200, "Index created successfully");
+                    sparse_index_config = SparseIndexConfig{
+                        key,
+                        sparse_dim,
+                        checksum
+                    };
 
-                } catch(const std::runtime_error& e){
-                    return json_error(409, e.what());
-                } catch(const std::exception& e){
-                        return json_error_500(ctx.username, req.url, std::string("Error: ") + e.what());
-                } catch (...){
-                    return json_error(500, "Unknown internal error");
+                    //TODO: Add a sanity check for sparse vectors here
+
+                    sparse_indexes.push_back(sparse_index_config);
                 }
-            });
+            }
+
+            // for(int i=0; i<sparse_indexes.size(); i++){
+            //     printf("name:%s, sparse_dim:%zu\n", sparse_indexes[i].sub_index_name.c_str(), sparse_indexes[i].sparse_dim);
+            // }
+
+            if(!index_manager.newcreateIndex(index_name, UserType::Admin, dense_indexes, sparse_indexes)){
+                return json_error(400, "failed createNewIndex");
+            }
+            return crow::response(200, "Index created successfully");
+
+        } catch(const std::runtime_error& e){
+            return json_error(409, e.what());
+        } catch(const std::exception& e){
+                return json_error_500(ctx.username, req.url, std::string("Error: ") + e.what());
+        } catch (...){
+            return json_error(500, "Unknown internal error");
+        }
+    });
 
     // Create index
     CROW_ROUTE(app, "/api/v1/index/create")

From 254683b231c6f867542291be273839e78d533546 Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Thu, 12 Feb 2026 09:07:08 +0300
Subject: [PATCH 06/18] dirty commit

---
 src/core/ndd.cpp       | 53 +++++++++++++++++++++++++++++++++++++-----
 src/core/ndd.hpp       |  9 +++----
 src/main.cpp           | 16 +++++++++----
 src/server/auth.hpp    |  2 +-
 src/utils/settings.hpp |  4 ++--
 5 files changed, 67 insertions(+), 17 deletions(-)

diff --git a/src/core/ndd.cpp b/src/core/ndd.cpp
index c39c2b33c..f100b283c 100644
--- a/src/core/ndd.cpp
+++ b/src/core/ndd.cpp
@@ -2,18 +2,59 @@
 #include "ndd.hpp"
 
 
-bool IndexManager::newcreateIndex(std::string& index_name, UserType user_type,
+std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
+                                    UserType user_type, std::string& index_name,
                                     std::vector<struct NewIndexConfig> dense_indexes,
                                     std::vector<struct SparseIndexConfig> sparse_indexes)
 {
-    for(int i=0; i<dense_indexes.size(); i++){
-        printf("%s: name:%s, M:%zu\n", __func__, dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M);
+    std::pair<bool, std::string> ret;
+    struct NewIndexConfig dense_index;
+    ret.first = true;
+    ret.second = "";
+
+    // for(int i=0; i<dense_indexes.size(); i++){
+    //     printf("%s: name:%s, M:%zu\n", __func__, dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M);
+    // }
+
+    // for(int i=0; i<sparse_indexes.size(); i++){
+    //     printf("%s: name:%s, sparse_dim:%zu\n", __func__, sparse_indexes[i].sub_index_name.c_str(), sparse_indexes[i].sparse_dim);
+    // }
+
+    /**
+     * Check if indexname already exists
+     */
+    std::string index_id =  username + "/" + index_name;
+    auto existing_indices = metadata_manager_->listUserIndexes(username);
+    for(const auto& existing : existing_indices) {
+        if(existing.first == index_name) {
+            throw std::runtime_error("Index with this name already exists for this user");
+        }
+    }
+    std::string index_path = data_dir_ + "/" + index_id + "/main.idx";
+    if(std::filesystem::exists(index_path)) {
+        ret.first = false;
+        ret.second = "index_path exists for index_name: " + index_name;
+        goto exit_newcreateIndex;
     }
 
-    for(int i=0; i<sparse_indexes.size(); i++){
-        printf("%s: name:%s, sparse_dim:%zu\n", __func__, sparse_indexes[i].sub_index_name.c_str(), sparse_indexes[i].sparse_dim);
+    /**
+     * Check limits for this user's type
+     */
+
+    for(int i=0; i< dense_indexes.size(); i++){
+        dense_index = dense_indexes[i];
+
+        /**
+         * Check limits for this user's type
+         */
+        if(dense_index.size_in_millions > getMaxVectorsPerIndex(user_type)){
+            ret.first = false;
+            ret.second = "Size in millions is greater than max allowed";
+        }
     }
-    return true;
+
+exit_newcreateIndex:
+    return ret;
 }
 
 
diff --git a/src/core/ndd.hpp b/src/core/ndd.hpp
index 9f414ecf9..3ab9304cb 100644
--- a/src/core/ndd.hpp
+++ b/src/core/ndd.hpp
@@ -815,10 +815,11 @@ class IndexManager {
         }
     }
 
-    bool newcreateIndex(std::string& index_name,
-                        UserType user_type,
-                        std::vector<struct NewIndexConfig> dense_indexes,
-                        std::vector<struct SparseIndexConfig> sparse_indexes);
+    std::pair<bool, std::string> newcreateIndex(std::string& username,
+                                    UserType user_type, std::string& index_name,
+                                    std::vector<struct NewIndexConfig> dense_indexes,
+                                    std::vector<struct SparseIndexConfig> sparse_indexes);
+
 
     bool createIndex(const std::string& index_id,
                      const IndexConfig& config,
diff --git a/src/main.cpp b/src/main.cpp
index 9f3f406bc..73596e966 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -307,8 +307,6 @@ int main(int argc, char** argv) {
              * validate_index_name in ndd.cpp
              */
             std::string index_name = std::string(body["index_name"].s());
-            // std::string index_id = ctx.username + "/" + index_name;
-            // std::cout << "index id: " << index_id << "\n";
 
             /**
              * TODO: add a simple case where no named vectors are present.
@@ -465,12 +463,22 @@ int main(int argc, char** argv) {
                 }
             }
 
+            /**
+             * TODO:CRITICAL Add a sparse sanity test here.
+            */
+
             // for(int i=0; i<sparse_indexes.size(); i++){
             //     printf("name:%s, sparse_dim:%zu\n", sparse_indexes[i].sub_index_name.c_str(), sparse_indexes[i].sparse_dim);
             // }
 
-            if(!index_manager.newcreateIndex(index_name, UserType::Admin, dense_indexes, sparse_indexes)){
-                return json_error(400, "failed createNewIndex");
+            std::pair<bool, std::string> create_index_ret =
+                            index_manager.newcreateIndex(ctx.username,
+                                                        UserType::Admin,
+                                                        index_name,
+                                                        dense_indexes,
+                                                        sparse_indexes);
+            if(!create_index_ret.first){
+                return json_error(400, "failed createNewIndex: " + create_index_ret.second);
             }
             return crow::response(200, "Index created successfully");
 
diff --git a/src/server/auth.hpp b/src/server/auth.hpp
index 0d6cdd4f5..758354abf 100644
--- a/src/server/auth.hpp
+++ b/src/server/auth.hpp
@@ -27,7 +27,7 @@ inline int getMaxAllowedIndices(UserType type) {
 
 // Get max vectors per index - No limits in open-source mode
 inline size_t getMaxVectorsPerIndex(UserType type) {
-    return settings::MAX_VECTORS_ADMIN;  // 1 billion vectors
+    return settings::MAX_VECTORS_MILLION;  // 1 billion vectors
 }
 
 struct User {
diff --git a/src/utils/settings.hpp b/src/utils/settings.hpp
index d62245da3..aa20e5e64 100644
--- a/src/utils/settings.hpp
+++ b/src/utils/settings.hpp
@@ -68,8 +68,8 @@ namespace settings {
     constexpr uint16_t BLOCK_SPLIT_THRESHOLD =
             160;  // Bloc will be split if more than this many elements (including tombstones)
 
-    // Maximum number of elements in the index
-    constexpr size_t MAX_VECTORS_ADMIN = 1'000'000'000;
+    // Maximum number of elements in an index in millions
+    constexpr size_t MAX_VECTORS_MILLION = 10'000; //1 billion
 
     // Buffer for early exit in search base layer
     constexpr int EARLY_EXIT_BUFFER_INSERT = 16;

From ad217cf33dc69181074dd89b1cb64a68397051af Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Fri, 13 Feb 2026 08:09:20 +0300
Subject: [PATCH 07/18] newVectorCache implementation (incomplete)

---
 src/core/ndd.cpp               | 117 ++++++++++++++++++++++++++-------
 src/core/ndd.hpp               |  85 ++++++++++++++++++++++++
 src/main.cpp                   |   3 +-
 src/storage/vector_storage.hpp |  23 ++++---
 src/utils/settings.hpp         |   3 +
 5 files changed, 200 insertions(+), 31 deletions(-)

diff --git a/src/core/ndd.cpp b/src/core/ndd.cpp
index f100b283c..6d6ccf587 100644
--- a/src/core/ndd.cpp
+++ b/src/core/ndd.cpp
@@ -1,6 +1,14 @@
 #include <string>
 #include "ndd.hpp"
 
+template <typename Map, typename Key, typename Value>
+void insert_or_throw(Map& map, Key&& key, Value&& value) {
+    auto [it, inserted] = map.try_emplace(
+        std::forward<Key>(key), std::forward<Value>(value));
+    if (!inserted) {
+        throw std::runtime_error("Duplicate key: " + it->first);
+    }
+}
 
 std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
                                     UserType user_type, std::string& index_name,
@@ -8,20 +16,36 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
                                     std::vector<struct SparseIndexConfig> sparse_indexes)
 {
     std::pair<bool, std::string> ret;
-    struct NewIndexConfig dense_index;
     ret.first = true;
     ret.second = "";
 
-    // for(int i=0; i<dense_indexes.size(); i++){
-    //     printf("%s: name:%s, M:%zu\n", __func__, dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M);
-    // }
-
-    // for(int i=0; i<sparse_indexes.size(); i++){
-    //     printf("%s: name:%s, sparse_dim:%zu\n", __func__, sparse_indexes[i].sub_index_name.c_str(), sparse_indexes[i].sparse_dim);
-    // }
+    std::filesystem::space_info space_info;
+    std::error_code ec;
+    bool committed = false;
+    std::string index_path = "";
+    std::unordered_map<std::string, std::shared_ptr<SubDenseCacheEntry>> dense_map;
+    std::unordered_map<std::string, std::shared_ptr<SubSparseCacheEntry>> sparse_map;
+    std::shared_ptr<SubDenseCacheEntry> dense_sub_index_cache;
+    std::shared_ptr<SubSparseCacheEntry> sparse_sub_index_cache;
+
+    // Cleanup guard — removes partial artifacts if we don't reach commit
+    auto cleanup = [&]() {
+        if (committed)
+            return;
+        printf("%s: cleanup triggered\n", __func__);
+        // {
+        //     std::unique_lock<std::shared_mutex> lock(indices_mutex_);
+        //     indices_.erase(index_id);
+        //     indices_list_.remove(index_id);
+        // }
+        // // Remove partial directories
+        std::error_code ec;
+        // std::filesystem::remove_all(index_path, ec);
+        // // Log ec if needed, but don't throw
+    };
 
     /**
-     * Check if indexname already exists
+     * Check if index name already exists
      */
     std::string index_id =  username + "/" + index_name;
     auto existing_indices = metadata_manager_->listUserIndexes(username);
@@ -30,29 +54,77 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
             throw std::runtime_error("Index with this name already exists for this user");
         }
     }
-    std::string index_path = data_dir_ + "/" + index_id + "/main.idx";
+    // check if it exists in the filesystem
+    index_path = data_dir_ + "/" + index_id;
     if(std::filesystem::exists(index_path)) {
+        // throw std::runtime_error("Index with this name already exists for this user");
         ret.first = false;
-        ret.second = "index_path exists for index_name: " + index_name;
+        ret.second = "index_name: " + index_name + " already exists.";
         goto exit_newcreateIndex;
     }
 
-    /**
-     * Check limits for this user's type
-     */
+    // Check if there is enough space on the disk
+    space_info = std::filesystem::space(data_dir_, ec);
+    // std::cout << "space available: " << space_info.available/GB << "GB \n";
+    if (!ec && space_info.available < settings::MINIMUM_REQUIRED_FS_BYTES) {
+        throw std::runtime_error("Insufficient disk space to create index");
+    }
 
-    for(int i=0; i< dense_indexes.size(); i++){
-        dense_index = dense_indexes[i];
+    // Check if there exist any sub indexes
+    if(dense_indexes.size() == 0 && sparse_indexes.size() == 0){
+        throw std::runtime_error("No dense or sparse indexes passed");
+    }
+
+    // LOG_INFO("Creating IDMapper for index "
+    //             << index_id << " with user type: " << userTypeToString(user_type));
+
+    try{
+        std::string lmdb_dir = data_dir_ + "/" + index_id + "/ids";
+        auto id_mapper = std::make_shared<IDMapper>(lmdb_dir, true, user_type);
+        std::filesystem::create_directory(data_dir_ + "/" + index_id + "/vectors");
+
+        for(int i=0; i< dense_indexes.size(); i++){
+            auto& dense_sub_index = dense_indexes[i];
+            dense_sub_index_cache = std::make_shared<SubDenseCacheEntry>();
+
+            /**
+             * Check limits for this user's type
+             */
+            if(dense_sub_index.size_in_millions > getMaxVectorsPerIndex(user_type)){
+                ret.first = false;
+                ret.second = "Size in millions is greater than max allowed : " + std::to_string(dense_sub_index.size_in_millions);
+                goto exit_newcreateIndex_cleanup;
+            }
+
+            std::cout << "space type: " << dense_sub_index.space_type_str << "\n";
+            hnswlib::SpaceType space_type = hnswlib::getSpaceType(dense_sub_index.space_type_str);
+
+            std::string vector_storage_dir = data_dir_ + "/" + index_id + "/vectors" + "/vectors_" + dense_sub_index.sub_index_name;
+            if(!std::filesystem::create_directory(vector_storage_dir)){
+                if (std::filesystem::exists(vector_storage_dir)) {
+                    throw std::runtime_error("Duplicate named sub index: " + dense_sub_index.sub_index_name);
+                }else{
+                    throw std::runtime_error("Error: while creating Folder" + vector_storage_dir);
+                }
+            }
+
+            /**
+             * Check if there is a duplicate sub index from the filesystem.
+             */
+
+            // dense_sub_index_cache->vector_storage = std::make_shared<VectorStorage>(vector_storage_dir,
+            //                                                                 dense_sub_index.dim,
+            //                                                                 dense_sub_index.quant_level);
 
-        /**
-         * Check limits for this user's type
-         */
-        if(dense_index.size_in_millions > getMaxVectorsPerIndex(user_type)){
-            ret.first = false;
-            ret.second = "Size in millions is greater than max allowed";
         }
+    } catch (...){
+        cleanup();
+        throw;
     }
 
+exit_newcreateIndex_cleanup:
+cleanup();
+
 exit_newcreateIndex:
     return ret;
 }
@@ -117,6 +189,7 @@ std::pair<bool, std::string> check_index_config_sanity(struct NewIndexConfig ind
 
     /**
      * TODO: Check the following:
+     * sub_index_name needs to be sane.
      * sparse_dim needs to be of a certain max dimension.
      * space type needs to be checked to a certain strings only
      * what is the difference max_elements and size_in_millions ? 
diff --git a/src/core/ndd.hpp b/src/core/ndd.hpp
index 3ab9304cb..e3059258a 100644
--- a/src/core/ndd.hpp
+++ b/src/core/ndd.hpp
@@ -74,6 +74,91 @@ struct IndexInfo {
     size_t ef_con;
 };
 
+
+struct SubDenseCacheEntry{
+    // struct CacheEntry* cache_entry;
+    std::shared_ptr<VectorStorage> vector_storage;
+    std::unique_ptr<hnswlib::HierarchicalNSW<float>> alg;
+    // Number of searches performed on this sub index. For a search with k=10 it will be 10
+    size_t searchCount{0};
+    // Per-sub-index operation mutex for coordinating addVectors, saveIndex, deleteVectors
+    std::mutex operation_mutex;
+};
+
+struct SubSparseCacheEntry{
+    std::unique_ptr<ndd::SparseVectorStorage> sparse_storage;
+    // Number of searches performed on this sub index. For a search with k=10 it will be 10
+    size_t searchCount{0};
+    // Per-sub-index operation mutex for coordinating addVectors, saveIndex, deleteVectors
+    std::mutex operation_mutex;
+};
+
+struct NewCacheEntry {
+    std::string index_id;
+    std::shared_ptr<IDMapper> id_mapper;
+    std::unordered_map<std::string, std::shared_ptr<SubDenseCacheEntry>> dense_vectors;
+    std::unordered_map<std::string, std::shared_ptr<SubSparseCacheEntry>> sparse_vectors;
+    std::chrono::system_clock::time_point last_access;
+    std::chrono::system_clock::time_point last_saved_at;
+    std::chrono::system_clock::time_point updated_at;
+    bool updated{false};
+    size_t searchCount{0};
+    std::mutex operation_mutex;
+
+    // Delete copy and move (mutex is non-movable)
+    NewCacheEntry(const NewCacheEntry&) = delete;
+    NewCacheEntry& operator=(const NewCacheEntry&) = delete;
+    NewCacheEntry(NewCacheEntry&&) = delete;
+    NewCacheEntry& operator=(NewCacheEntry&&) = delete;
+
+    // Factory method — returns nullptr on validation failure
+    [[nodiscard]] static std::unique_ptr<NewCacheEntry> create(
+        std::string index_id_,
+        std::shared_ptr<IDMapper> id_mapper_,
+        std::unordered_map<std::string, std::shared_ptr<SubDenseCacheEntry>> dense_,
+        std::unordered_map<std::string, std::shared_ptr<SubSparseCacheEntry>> sparse_,
+        std::chrono::system_clock::time_point access_time_)
+    {
+        if (!id_mapper_) {
+            LOG_ERROR("ID Mapper is null for index: " << index_id_);
+            return nullptr;
+        }
+        if (dense_.empty()) {
+            LOG_ERROR("Must have at least one dense sub-index for index: " << index_id_);
+            return nullptr;
+        }
+        // Private constructor — only accessible via this factory
+        return std::unique_ptr<NewCacheEntry>(
+            new NewCacheEntry(std::move(index_id_),
+                                std::move(id_mapper_),
+                                std::move(dense_),
+                                std::move(sparse_),
+                                access_time_));
+    }
+
+    void markUpdated() {
+        updated = true;
+        updated_at = std::chrono::system_clock::now();
+    }
+
+    void resetSearchCount() { searchCount = 0; }
+
+private:
+    NewCacheEntry(std::string index_id_,
+                    std::shared_ptr<IDMapper> id_mapper_,
+                    std::unordered_map<std::string, std::shared_ptr<SubDenseCacheEntry>> dense_,
+                    std::unordered_map<std::string, std::shared_ptr<SubSparseCacheEntry>> sparse_,
+                    std::chrono::system_clock::time_point access_time_)
+        : index_id(std::move(index_id_))
+        , id_mapper(std::move(id_mapper_))
+        , dense_vectors(std::move(dense_))
+        , sparse_vectors(std::move(sparse_))
+        , last_access(access_time_)
+        , last_saved_at(std::chrono::system_clock::now())
+    {}
+};
+
+
 struct CacheEntry {
     std::string index_id;
     size_t sparse_dim = 0;
diff --git a/src/main.cpp b/src/main.cpp
index 73596e966..2c7e9d5db 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -418,7 +418,8 @@ int main(int argc, char** argv) {
             }
 
             // for(int i=0; i<dense_indexes.size(); i++){
-            //     printf("name:%s, M:%zu\n", dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M);
+            //     printf("name:%s, M:%zu, space_type:%s\n",
+            //         dense_indexes[i].sub_index_name.c_str(), dense_indexes[i].M, dense_indexes[i].space_type_str.c_str());
             // }
 
             std::vector<struct SparseIndexConfig> sparse_indexes;
diff --git a/src/storage/vector_storage.hpp b/src/storage/vector_storage.hpp
index 993c63bd7..c4b6293d0 100644
--- a/src/storage/vector_storage.hpp
+++ b/src/storage/vector_storage.hpp
@@ -494,17 +494,20 @@ class VectorStorage {
     std::unique_ptr<Filter> filter_store_;
 
     VectorStorage(const std::string& base_path,
-                  size_t vector_dim,
-                  ndd::quant::QuantizationLevel quant_level) {
+                    size_t vector_dim,
+                    ndd::quant::QuantizationLevel quant_level)
+    {
         vector_store_ =
                 std::make_unique<VectorStore>(base_path + "/vectors", vector_dim, quant_level);
         meta_store_ = std::make_unique<MetaStore>(base_path + "/meta");
         filter_store_ = std::make_unique<Filter>(base_path + "/filters");
     }
-    VectorStore::Cursor getCursor() { return vector_store_->getCursor(); }
+
+
     // Get numeric ids of matching filters
-    std::vector<ndd::idInt> getIdsMatchingFilters(
-            const std::vector<std::pair<std::string, std::string>>& filter_pairs) const {
+    std::vector<ndd::idInt> getIdsMatchingFilters(const std::vector<std::pair<std::string,
+                                                    std::string>>& filter_pairs) const
+    {
         auto bitmap = filter_store_->combine_filters_and(filter_pairs);
         std::vector<ndd::idInt> numeric_ids;
         bitmap.iterate(
@@ -519,7 +522,8 @@ class VectorStorage {
 
     bool matches_filter(ndd::idInt numeric_id,
                         const ndd::VectorMeta& meta,
-                        const nlohmann::json& filter_query) {
+                        const nlohmann::json& filter_query)
+    {
         if(filter_query.empty()) {
             return true;
         }
@@ -702,9 +706,10 @@ class VectorStorage {
             meta_store_->remove(numeric_id);
         } catch(const std::exception& e) {
             throw std::runtime_error(std::string("Failed to remove vector and metadata: ")
-                                     + e.what());
+                                        + e.what());
         }
     }
+
     // Deletes filter only.
     void deleteFilter(ndd::idInt numeric_id, std::string filter) {
         filter_store_->remove_filters_from_json(numeric_id, filter);
@@ -730,7 +735,9 @@ class VectorStorage {
         }
     }
 
+    VectorStore::Cursor getCursor(){ return vector_store_->getCursor();}
     ndd::quant::QuantizationLevel getQuantLevel() const { return vector_store_->getQuantLevel(); }
     size_t dimension() const { return vector_store_->dimension(); }
     size_t get_vector_size() const { return vector_store_->get_vector_size(); }
-};
\ No newline at end of file
+
+};
diff --git a/src/utils/settings.hpp b/src/utils/settings.hpp
index aa20e5e64..9179b83dd 100644
--- a/src/utils/settings.hpp
+++ b/src/utils/settings.hpp
@@ -70,6 +70,9 @@ namespace settings {
 
     // Maximum number of elements in an index in millions
     constexpr size_t MAX_VECTORS_MILLION = 10'000; //1 billion
+    
+    //minimum bytes in filesystem before triggering out of storage sequence
+    constexpr size_t MINIMUM_REQUIRED_FS_BYTES = (1 * GB);
 
     // Buffer for early exit in search base layer
     constexpr int EARLY_EXIT_BUFFER_INSERT = 16;

From 908d4b92ab439653625adbc4d0638a26f8d3eb95 Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Fri, 13 Feb 2026 08:58:56 +0300
Subject: [PATCH 08/18] removing deadcode from class VectorStorage

---
 src/storage/vector_storage.hpp | 126 ---------------------------------
 1 file changed, 126 deletions(-)

diff --git a/src/storage/vector_storage.hpp b/src/storage/vector_storage.hpp
index c4b6293d0..2b73c481f 100644
--- a/src/storage/vector_storage.hpp
+++ b/src/storage/vector_storage.hpp
@@ -503,132 +503,6 @@ class VectorStorage {
         filter_store_ = std::make_unique<Filter>(base_path + "/filters");
     }
 
-
-    // Get numeric ids of matching filters
-    std::vector<ndd::idInt> getIdsMatchingFilters(const std::vector<std::pair<std::string,
-                                                    std::string>>& filter_pairs) const
-    {
-        auto bitmap = filter_store_->combine_filters_and(filter_pairs);
-        std::vector<ndd::idInt> numeric_ids;
-        bitmap.iterate(
-                [](ndd::idInt value, void* ptr) -> bool {
-                    auto* ids = static_cast<std::vector<ndd::idInt>*>(ptr);
-                    ids->push_back(value);
-                    return true;
-                },
-                &numeric_ids);
-        return numeric_ids;
-    }
-
-    bool matches_filter(ndd::idInt numeric_id,
-                        const ndd::VectorMeta& meta,
-                        const nlohmann::json& filter_query)
-    {
-        if(filter_query.empty()) {
-            return true;
-        }
-
-        // 1. Fast Pass: Check Numeric Filters using Index
-        bool has_non_numeric = false;
-
-        for(const auto& condition : filter_query) {
-            if(!condition.is_object() || condition.size() != 1) {
-                continue;
-            }
-            const auto& field = condition.begin().key();
-            const auto& expr = condition.begin().value();
-            if(!expr.is_object() || expr.size() != 1) {
-                continue;
-            }
-
-            const std::string op = expr.begin().key();
-            const auto& val = expr.begin().value();
-
-            bool is_numeric_query = false;
-            if(op == "$range") {
-                is_numeric_query = true;
-            } else if(op == "$eq" && (val.is_number())) {
-                is_numeric_query = true;
-            } else if(op == "$in" && val.is_array() && !val.empty() && val[0].is_number()) {
-                is_numeric_query = true;
-            }
-
-            if(is_numeric_query) {
-                if(!filter_store_->check_numeric(field, numeric_id, op, val)) {
-                    return false;
-                }
-            } else {
-                has_non_numeric = true;
-            }
-        }
-
-        if(!has_non_numeric) {
-            return true;
-        }
-
-        try {
-            // Parse the metadata associated with the vector
-            nlohmann::json meta_filter = nlohmann::json::parse(meta.filter);
-
-            // Each filter clause is ANDed
-            for(const auto& condition : filter_query) {
-                if(!condition.is_object() || condition.size() != 1) {
-                    continue;  // Skip malformed conditions
-                }
-
-                const auto& field = condition.begin().key();
-                const auto& expr = condition.begin().value();
-
-                if(!expr.is_object() || expr.size() != 1) {
-                    continue;
-                }
-
-                const std::string op = expr.begin().key();
-                const auto& val = expr.begin().value();
-
-                // Skip numeric queries as they are already checked
-                bool is_numeric_query = false;
-                if(op == "$range") {
-                    is_numeric_query = true;
-                } else if(op == "$eq" && (val.is_number())) {
-                    is_numeric_query = true;
-                } else if(op == "$in" && val.is_array() && !val.empty() && val[0].is_number()) {
-                    is_numeric_query = true;
-                }
-
-                if(is_numeric_query) {
-                    continue;
-                }
-
-                // If field is not present in the vector's metadata
-                if(!meta_filter.contains(field)) {
-                    return false;
-                }
-
-                const auto& actual_value = meta_filter[field];
-
-                if(op == "$eq") {
-                    if(actual_value != val) {
-                        return false;
-                    }
-                } else if(op == "$in") {
-                    if(!val.is_array()
-                       || std::find(val.begin(), val.end(), actual_value) == val.end()) {
-                        return false;
-                    }
-                } else {
-                    continue;
-                }
-            }
-
-            return true;
-
-        } catch(const std::exception& e) {
-            // std::cerr << "Error matching filter: " << e.what() << std::endl;
-            return false;
-        }
-    }
-
     // Optimized batch operation using pre-quantized QuantVectorObject
     // This avoids double quantization by using already quantized data
     void store_vectors_batch(const std::vector<std::pair<ndd::idInt, QuantVectorObject>>& vectors) {

From 4af11a2faecae350a89e54d7e02cbb206d8c545d Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Fri, 13 Feb 2026 13:23:09 +0300
Subject: [PATCH 09/18] adding deleteFilter duplicate function defn

---
 src/filter/filter.hpp | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/filter/filter.hpp b/src/filter/filter.hpp
index 35bc1b5bc..34d7d365d 100644
--- a/src/filter/filter.hpp
+++ b/src/filter/filter.hpp
@@ -375,7 +375,7 @@ class Filter {
 
     // Batch add operation for filters
     void add_to_filter_batch(const std::string& filter_key,
-                             const std::vector<ndd::idInt>& numeric_ids) {
+                                const std::vector<ndd::idInt>& numeric_ids) {
         if(numeric_ids.empty()) {
             return;
         }
@@ -518,6 +518,16 @@ class Filter {
         }
     }
 
+    /**
+     * Deletes filter only.
+     * This duplicate function is added here from its misplaced implementation in 
+     * class VectorStorage.
+     * XXX: Should be removed later for code readability
+     */
+    void deleteFilter(ndd::idInt numeric_id, std::string filter) {
+        remove_filters_from_json(numeric_id, filter);
+    }
+
     // Combine multiple filters using AND operation
     ndd::RoaringBitmap
     combine_filters_and(const std::vector<std::pair<std::string, std::string>>& filters) const {
@@ -546,9 +556,9 @@ class Filter {
 
     // Check if ID satisfies a numeric condition using Forward Index
     bool check_numeric(const std::string& field,
-                       ndd::idInt id,
-                       const std::string& op,
-                       const nlohmann::json& val) const {
+                        ndd::idInt id,
+                        const std::string& op,
+                        const nlohmann::json& val) const {
         if(op == "$eq") {
             uint32_t sortable_val;
             if(val.is_number_integer()) {

From 5fcea98d03ea91641e91f60dee9b523f60646b6d Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Fri, 13 Feb 2026 13:23:56 +0300
Subject: [PATCH 10/18] NOTTESTED: implementation of createIndex

---
 src/core/ndd.cpp | 97 ++++++++++++++++++++++++++++++++++++++----------
 src/core/ndd.hpp | 68 ++++++++++++++++++++++++++++-----
 2 files changed, 136 insertions(+), 29 deletions(-)

diff --git a/src/core/ndd.cpp b/src/core/ndd.cpp
index 6d6ccf587..fc88321d9 100644
--- a/src/core/ndd.cpp
+++ b/src/core/ndd.cpp
@@ -23,9 +23,9 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
     std::error_code ec;
     bool committed = false;
     std::string index_path = "";
-    std::unordered_map<std::string, std::shared_ptr<SubDenseCacheEntry>> dense_map;
-    std::unordered_map<std::string, std::shared_ptr<SubSparseCacheEntry>> sparse_map;
-    std::shared_ptr<SubDenseCacheEntry> dense_sub_index_cache;
+    std::unordered_map<std::string, std::shared_ptr<DenseCacheSubEntry>> dense_cache_map;
+    std::unordered_map<std::string, std::shared_ptr<SubSparseCacheEntry>> sparse_cache_map;
+    std::shared_ptr<DenseCacheSubEntry> dense_sub_index_cache;
     std::shared_ptr<SubSparseCacheEntry> sparse_sub_index_cache;
 
     // Cleanup guard — removes partial artifacts if we don't reach commit
@@ -79,13 +79,26 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
     //             << index_id << " with user type: " << userTypeToString(user_type));
 
     try{
-        std::string lmdb_dir = data_dir_ + "/" + index_id + "/ids";
+        std::string lmdb_dir = index_path + "/ids";
+        std::string vec_data_dir = index_path + "/vectors";
+
+        /**
+         * TODO: add error handing while creating directories here.
+         * check duplicate creates for the same name.
+         */
+
+        std::filesystem::create_directory(index_path);
+        std::filesystem::create_directory(vec_data_dir);
+
         auto id_mapper = std::make_shared<IDMapper>(lmdb_dir, true, user_type);
-        std::filesystem::create_directory(data_dir_ + "/" + index_id + "/vectors");
+
+        //TODO
+        // cache_entry.meta_store_ = std::make_unique<MetaStore>(vec_data_dir + "/meta");
+        // cache_entry.filter_store_ = std::make_unique<Filter>(vec_data_dir + "/filters");
 
         for(int i=0; i< dense_indexes.size(); i++){
             auto& dense_sub_index = dense_indexes[i];
-            dense_sub_index_cache = std::make_shared<SubDenseCacheEntry>();
+            dense_sub_index_cache = std::make_shared<DenseCacheSubEntry>();
 
             /**
              * Check limits for this user's type
@@ -99,24 +112,70 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
             std::cout << "space type: " << dense_sub_index.space_type_str << "\n";
             hnswlib::SpaceType space_type = hnswlib::getSpaceType(dense_sub_index.space_type_str);
 
-            std::string vector_storage_dir = data_dir_ + "/" + index_id + "/vectors" + "/vectors_" + dense_sub_index.sub_index_name;
-            if(!std::filesystem::create_directory(vector_storage_dir)){
-                if (std::filesystem::exists(vector_storage_dir)) {
-                    throw std::runtime_error("Duplicate named sub index: " + dense_sub_index.sub_index_name);
-                }else{
-                    throw std::runtime_error("Error: while creating Folder" + vector_storage_dir);
-                }
+            dense_sub_index_cache->vector_store = std::make_shared<VectorStore>(
+                                vec_data_dir + "/vectors_" + dense_sub_index.sub_index_name,
+                                dense_sub_index.dim, dense_sub_index.quant_level);
+
+
+            dense_sub_index_cache->alg = std::make_unique<hnswlib::HierarchicalNSW<float>>(
+                                                            dense_sub_index.max_elements,
+                                                            space_type,
+                                                            dense_sub_index.dim,
+                                                            dense_sub_index.M,
+                                                            dense_sub_index.ef_construction,
+                                                            settings::RANDOM_SEED,
+                                                            dense_sub_index.quant_level,
+                                                            dense_sub_index.checksum);
+
+            dense_sub_index_cache->alg->setVectorFetcher([vs = dense_sub_index_cache->vector_store]
+                    (ndd::idInt label, uint8_t* buffer) {
+                        return vs->get_vector_bytes(label, buffer);
+                    }
+            );
+
+            /* add this dense_sub_index_cache entry to dense_map*/
+            auto[it, inserted] = dense_cache_map.insert({dense_sub_index.space_type_str, std::move(dense_sub_index_cache)});
+            if(!inserted){
+                LOG_INFO("Duplicate sub index name: " + dense_sub_index.space_type_str);
+                ret.first = false;
+                ret.second = "duplicate sub index_name: " + dense_sub_index.space_type_str;
+                goto exit_newcreateIndex_cleanup;
             }
+        }
 
-            /**
-             * Check if there is a duplicate sub index from the filesystem.
-             */
+        /**
+         * TODO: Do a for loop for all sparse vectors
+         */
 
-            // dense_sub_index_cache->vector_storage = std::make_shared<VectorStorage>(vector_storage_dir,
-            //                                                                 dense_sub_index.dim,
-            //                                                                 dense_sub_index.quant_level);
 
+        //add NewCacheEntry against index name to IndexManager.newindices_
+        {
+            auto cache_entry = NewCacheEntry::create(index_path, index_id, id_mapper,
+                                                                    std::move(dense_cache_map),
+                                                                    std::move(sparse_cache_map),
+                                                                    std::chrono::system_clock::now());
+            if(!cache_entry){
+                ret.first = false;
+                ret.second = "unable to allocate NewCacheEntry";
+                goto exit_newcreateIndex_cleanup;
+            }
+
+            std::unique_lock<std::shared_mutex> lock(indices_mutex_);
+
+            auto[it, inserted] = newindices_.emplace(index_id, std::move(cache_entry));
+            if(!inserted){
+                ret.first = false;
+                ret.second = "found a duplicate entry in newindices_";
+                goto exit_newcreateIndex_cleanup;
+            }
+            it->second->markUpdated();
+            indices_list_.push_front(index_id);
+
+            /**
+             * TODO: print and test all the vector entries here.
+             */
         }
+
     } catch (...){
         cleanup();
         throw;
diff --git a/src/core/ndd.hpp b/src/core/ndd.hpp
index e3059258a..7143741da 100644
--- a/src/core/ndd.hpp
+++ b/src/core/ndd.hpp
@@ -75,14 +75,29 @@ struct IndexInfo {
 };
 
 
-struct SubDenseCacheEntry{
-    // struct CacheEntry* cache_entry;
-    std::shared_ptr<VectorStorage> vector_storage;
+struct DenseCacheSubEntry{
+    // struct CacheEntry* cache_entry; //back connection if required
+    std::shared_ptr<VectorStore> vector_store;
     std::unique_ptr<hnswlib::HierarchicalNSW<float>> alg;
     // Number of searches performed on this sub index. For a search with k=10 it will be 10
     size_t searchCount{0};
     // Per-sub-index operation mutex for coordinating addVectors, saveIndex, deleteVectors
     std::mutex operation_mutex;
+
+    VectorStore::Cursor getCursor(){
+        return vector_store->getCursor();
+    }
+
+    ndd::quant::QuantizationLevel getQuantLevel() const {
+        return vector_store->getQuantLevel();
+    }
+
+    size_t dimension() const {
+        return vector_store->dimension();
+    }
+    size_t get_vector_size() const {
+        return vector_store->get_vector_size();
+    }
 };
 
 struct SubSparseCacheEntry{
@@ -94,15 +109,24 @@ struct SubSparseCacheEntry{
 };
 
 struct NewCacheEntry {
-    std::string index_id;
+    std::string index_id; //of the form "username/indexname"
     std::shared_ptr<IDMapper> id_mapper;
-    std::unordered_map<std::string, std::shared_ptr<SubDenseCacheEntry>> dense_vectors;
+    std::unordered_map<std::string, std::shared_ptr<DenseCacheSubEntry>> dense_vectors;
     std::unordered_map<std::string, std::shared_ptr<SubSparseCacheEntry>> sparse_vectors;
     std::chrono::system_clock::time_point last_access;
     std::chrono::system_clock::time_point last_saved_at;
     std::chrono::system_clock::time_point updated_at;
+
+    std::unique_ptr<Filter> filter_store_;
+    std::unique_ptr<MetaStore> meta_store_;
+
+    // Flag to indicate if the index has been updated
     bool updated{false};
+
+    // Number of searches performed on this index. For a search with k=10 it will be 10
     size_t searchCount{0};
+
+    // Per-index operation mutex for coordinating
     std::mutex operation_mutex;
 
     // Delete copy and move (mutex is non-movable)
@@ -113,9 +137,10 @@ struct NewCacheEntry {
 
     // Factory method — returns nullptr on validation failure
     [[nodiscard]] static std::unique_ptr<NewCacheEntry> create(
+        std::string base_path,
         std::string index_id_,
         std::shared_ptr<IDMapper> id_mapper_,
-        std::unordered_map<std::string, std::shared_ptr<SubDenseCacheEntry>> dense_,
+        std::unordered_map<std::string, std::shared_ptr<DenseCacheSubEntry>> dense_,
         std::unordered_map<std::string, std::shared_ptr<SubSparseCacheEntry>> sparse_,
         std::chrono::system_clock::time_point access_time_)
     {
@@ -129,7 +154,7 @@ struct NewCacheEntry {
         }
         // Private constructor — only accessible via this factory
         return std::unique_ptr<NewCacheEntry>(
-            new NewCacheEntry(std::move(index_id_),
+            new NewCacheEntry(std::move(base_path), std::move(index_id_),
                                 std::move(id_mapper_),
                                 std::move(dense_),
                                 std::move(sparse_),
@@ -143,10 +168,29 @@ struct NewCacheEntry {
 
     void resetSearchCount() { searchCount = 0; }
 
+    void updateFilter(ndd::idInt numeric_id, const std::string& new_filter_json) {
+        // Get existing meta
+        auto meta = meta_store_->get_meta(numeric_id);
+
+        // Remove old filters
+        if(!meta.filter.empty()) {
+            filter_store_->remove_filters_from_json(numeric_id, meta.filter);
+        }
+
+        // Update meta
+        meta.filter = new_filter_json;
+        meta_store_->store_meta(numeric_id, meta);
+
+        // Add new filters
+        if(!new_filter_json.empty()) {
+            filter_store_->add_filters_from_json(numeric_id, new_filter_json);
+        }
+    }
+
 private:
-    NewCacheEntry(std::string index_id_,
+    NewCacheEntry(std::string base_path, std::string index_id_,
                     std::shared_ptr<IDMapper> id_mapper_,
-                    std::unordered_map<std::string, std::shared_ptr<SubDenseCacheEntry>> dense_,
+                    std::unordered_map<std::string, std::shared_ptr<DenseCacheSubEntry>> dense_,
                     std::unordered_map<std::string, std::shared_ptr<SubSparseCacheEntry>> sparse_,
                     std::chrono::system_clock::time_point access_time_)
         : index_id(std::move(index_id_))
@@ -155,7 +199,10 @@ struct NewCacheEntry {
         , sparse_vectors(std::move(sparse_))
         , last_access(access_time_)
         , last_saved_at(std::chrono::system_clock::now())
-    {}
+    {
+        meta_store_ = std::make_unique<MetaStore>(base_path + "/meta");
+        filter_store_ = std::make_unique<Filter>(base_path + "/filters");
+    }
 };
 
 
@@ -250,6 +297,7 @@ class IndexManager {
 private:
     std::deque<std::string> indices_list_;
     std::unordered_map<std::string, CacheEntry> indices_;
+    std::unordered_map<std::string, std::unique_ptr<NewCacheEntry>> newindices_; //index name -> its cache entry
     std::shared_mutex indices_mutex_;
     std::string data_dir_;
     // This is for locking the LRU

From b02ba1818678734daf305771b6d95f0d4840149a Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Fri, 13 Feb 2026 13:24:29 +0300
Subject: [PATCH 11/18] removing deadcode

---
 src/storage/vector_storage.hpp | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/src/storage/vector_storage.hpp b/src/storage/vector_storage.hpp
index 2b73c481f..64ea6fb7b 100644
--- a/src/storage/vector_storage.hpp
+++ b/src/storage/vector_storage.hpp
@@ -565,25 +565,6 @@ class VectorStorage {
         return meta_store_->get_meta(numeric_id);
     }
 
-    // NOT used anymore. Deletes filter, meta and vector data.
-    void deletePoint(ndd::idInt numeric_id) {
-        try {
-            // Get metadata first to get filter info
-            auto meta = meta_store_->get_meta(numeric_id);
-
-            // Remove filter entries if they exist
-            if(!meta.filter.empty()) {
-                filter_store_->remove_filters_from_json(numeric_id, meta.filter);
-            }
-            // Try to remove both vector and meta data
-            vector_store_->remove(numeric_id);
-            meta_store_->remove(numeric_id);
-        } catch(const std::exception& e) {
-            throw std::runtime_error(std::string("Failed to remove vector and metadata: ")
-                                        + e.what());
-        }
-    }
-
     // Deletes filter only.
     void deleteFilter(ndd::idInt numeric_id, std::string filter) {
         filter_store_->remove_filters_from_json(numeric_id, filter);

From c47161b8967da916f9ccf5e7918fda607c767205 Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Mon, 16 Feb 2026 09:09:55 +0530
Subject: [PATCH 12/18] bug fixing

---
 src/core/ndd.cpp | 48 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/src/core/ndd.cpp b/src/core/ndd.cpp
index fc88321d9..ff7a62c86 100644
--- a/src/core/ndd.cpp
+++ b/src/core/ndd.cpp
@@ -29,6 +29,7 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
     std::shared_ptr<SubSparseCacheEntry> sparse_sub_index_cache;
 
     // Cleanup guard — removes partial artifacts if we don't reach commit
+    // TODO: This function is not complete
     auto cleanup = [&]() {
         if (committed)
             return;
@@ -51,16 +52,15 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
     auto existing_indices = metadata_manager_->listUserIndexes(username);
     for(const auto& existing : existing_indices) {
         if(existing.first == index_name) {
-            throw std::runtime_error("Index with this name already exists for this user");
+            ret.first = false;
+            ret.second = "index_name: " + index_name + " already exists.";
+            goto exit_newcreateIndex;
         }
     }
     // check if it exists in the filesystem
     index_path = data_dir_ + "/" + index_id;
     if(std::filesystem::exists(index_path)) {
-        // throw std::runtime_error("Index with this name already exists for this user");
-        ret.first = false;
-        ret.second = "index_name: " + index_name + " already exists.";
-        goto exit_newcreateIndex;
+        throw std::runtime_error("index_name: " + index_name + " already exists.");
     }
 
     // Check if there is enough space on the disk
@@ -92,10 +92,6 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
 
         auto id_mapper = std::make_shared<IDMapper>(lmdb_dir, true, user_type);
 
-        //TODO
-        // cache_entry.meta_store_ = std::make_unique<MetaStore>(vec_data_dir + "/meta");
-        // cache_entry.filter_store_ = std::make_unique<Filter>(vec_data_dir + "/filters");
-
         for(int i=0; i< dense_indexes.size(); i++){
             auto& dense_sub_index = dense_indexes[i];
             dense_sub_index_cache = std::make_shared<DenseCacheSubEntry>();
@@ -134,11 +130,11 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
             );
 
             /* add this dense_sub_index_cache entry to dense_map*/
-            auto[it, inserted] = dense_cache_map.insert({dense_sub_index.space_type_str, std::move(dense_sub_index_cache)});
+            auto[it, inserted] = dense_cache_map.insert({dense_sub_index.sub_index_name, std::move(dense_sub_index_cache)});
             if(!inserted){
-                LOG_INFO("Duplicate sub index name: " + dense_sub_index.space_type_str);
+                LOG_ERROR("Duplicate sub index name: " + dense_sub_index.sub_index_name);
                 ret.first = false;
-                ret.second = "duplicate sub index_name: " + dense_sub_index.space_type_str;
+                ret.second = "duplicate sub index_name: " + dense_sub_index.sub_index_name;
                 goto exit_newcreateIndex_cleanup;
             }
         }
@@ -146,6 +142,8 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
         /**
          * TODO: Do a for loop for all sparse vectors
          */
+        // for(int i=0; i< sparse_indexes.size(); i++){
+        // }
 
 
         //add NewCacheEntry against index name to IndexManager.newindices_
@@ -170,12 +168,32 @@ std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
             }
             it->second->markUpdated();
             indices_list_.push_front(index_id);
+        }
 
-            /**
-             * TODO: print and test all the vector entries here.
-             */
+        /*
+        // TESTING CODE ONLY: Print dense sub-indexes
+        for (const auto& [idx_id, entry] : newindices_) {
+            LOG_INFO("Index: " << entry->index_id);
+
+            // Print dense sub-indexes
+            LOG_INFO("  Dense sub-indexes (" << entry->dense_vectors.size() << "):");
+            for (const auto& [sub_name, dense_entry] : entry->dense_vectors) {
+                LOG_INFO("   subvec_name: [" << sub_name << "]"
+                    << " dim=" << dense_entry->alg->getDimension()
+                    << " max_elements=" << dense_entry->alg->getMaxElements()
+                    << " elements=" << dense_entry->alg->getElementsCount()
+                    << " M=" << dense_entry->alg->getM()
+                    << " ef_construction=" << dense_entry->alg->getEfConstruction()
+                    << " space=" << dense_entry->alg->getSpaceTypeStr()
+                    << " quant=" << static_cast<int>(dense_entry->alg->getQuantLevel())
+                    << " checksum=" << dense_entry->alg->getChecksum()
+                    << " remaining_capacity=" << dense_entry->alg->getRemainingCapacity()
+                );
+            }
         }
+        */
 
+        goto exit_newcreateIndex;
     } catch (...){
         cleanup();
         throw;

From 41c7f84e67232d8b9cac424009ab2168ab79e39a Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Mon, 16 Feb 2026 14:20:47 +0530
Subject: [PATCH 13/18] multi vector insert start

---
 src/main.cpp              | 157 +++++++++++++++++++++++++++++++++++++-
 src/utils/msgpack_ndd.hpp |  21 +++++
 2 files changed, 176 insertions(+), 2 deletions(-)

diff --git a/src/main.cpp b/src/main.cpp
index 2c7e9d5db..fdcd92d66 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -847,7 +847,7 @@ int main(int argc, char** argv) {
     CROW_ROUTE(app, "/api/v1/index/<string>/search")
             .CROW_MIDDLEWARES(app, AuthMiddleware)
             .methods("POST"_method)([&index_manager, &app](const crow::request& req,
-                                                           std::string index_name) {
+                                                            std::string index_name) {
                 auto& ctx = app.get_context<AuthMiddleware>(req);
                 // Format full index_id
                 std::string index_id = ctx.username + "/" + index_name;
@@ -957,11 +957,164 @@ int main(int argc, char** argv) {
                 }
             });
 
+    //  newinsert a list of vectors
+    CROW_ROUTE(app, "/api/v1/index/<string>/vector/newinsert")
+            .CROW_MIDDLEWARES(app, AuthMiddleware)
+            .methods("POST"_method)([&index_manager, &app](const crow::request& req,
+                                                            std::string index_name) {
+        auto& ctx = app.get_context<AuthMiddleware>(req);
+        std::string index_id = ctx.username + "/" + index_name;
+        auto content_type = req.get_header_value("Content-Type");
+
+        std::vector<ndd::GenericVectorObject> vectors;
+
+        if(content_type == "application/json"){
+            auto body = crow::json::load(req.body);
+            if(!body) {
+                return json_error(400, "Invalid JSON");
+            }
+
+            if(body.t() == crow::json::type::List) {
+                for(const auto& item : body) {
+                    ndd::GenericVectorObject gvo;
+
+                    for(const auto& kv : item){
+                        std::string key = kv.key();
+                        if(key == "id"){
+                            if(kv.t() != crow::json::type::String) {
+                                return json_error(400, "Parameter error: 'id' must be a string");
+                            }
+                            gvo.id = kv.s();
+                            continue;
+                        }
+
+                        if(key == "meta") {
+                            if(kv.t() != crow::json::type::String) {
+                                return json_error(400, "Parameter error: 'meta' must be a string");
+                            }
+                            std::string meta_str = kv.s();
+                            gvo.meta.assign(meta_str.begin(), meta_str.end());
+                            continue;
+                        }
+
+                        if(key == "filter") {
+                            if(kv.t() != crow::json::type::String) {
+                                return json_error(400, "Parameter error: 'filter' must be a string");
+                            }
+                            gvo.filter = kv.s();
+                            continue;
+                        }
+
+                        /*This should be a named vector*/
+
+                        if(kv.t() != crow::json::type::Object) {
+                            return json_error(400, "Field '" + key + "' must be an object");
+                        }
+
+                        /*Dense vector*/
+                        if(kv.has("vector") && kv.has("norm")) {
+                            if(kv["vector"].t() != crow::json::type::List) {
+                                return json_error(400, "Dense vector '" + key + "': 'vector' must be an array");
+                            }
+                            ndd::DenseVectorObject dvo;
+                            dvo.norm = static_cast<float>(kv["norm"].d());
+                            for(const auto& val : kv["vector"]) {
+                                dvo.vector.push_back(static_cast<float>(val.d()));
+                            }
+                            /**
+                             * TODO: Add a check for duplicate keys here
+                             */
+                            gvo.dense_vectors[key] = std::move(dvo);
+                            continue;
+                        }
+
+                        /*Sparse vector*/
+                        if(kv.has("sparse_indices") && kv.has("sparse_values")) {
+                            if(kv["sparse_indices"].t() != crow::json::type::List ||
+                            kv["sparse_values"].t() != crow::json::type::List) {
+                                return json_error(400,
+                                    "Sparse vector '" + key + "': 'sparse_indices' and 'sparse_values' must be arrays");
+                            }
+                            if(kv["sparse_indices"].size() != kv["sparse_values"].size()) {
+                                return json_error(400,
+                                    "Sparse vector '" + key + "': sparse_indices and sparse_values must have same length");
+                            }
+
+                            ndd::SparseVectorObject svo;
+                            for(const auto& idx : kv["sparse_indices"]) {
+                                svo.sparse_ids.push_back(static_cast<uint32_t>(idx.i()));
+                            }
+                            for(const auto& val : kv["sparse_values"]) {
+                                svo.sparse_values.push_back(static_cast<float>(val.d()));
+                            }
+                            /**
+                             * TODO: Add a check for duplicate keys here
+                             */
+                            gvo.sparse_vectors[key] = std::move(svo);
+                            continue;
+                        }
+
+                        /**
+                         * At this point the request hasn't
+                         * fallen in any of the categories.
+                         */
+                        return json_error(400, "Format error");
+                    }
+                    vectors.push_back(std::move(gvo));
+                }
+            }else{
+                return crow::response(400, "Body should be a list.");
+            }
+            // return crow::response(400,
+            //         "Content-Type application/json not implemented yet");
+
+            /**
+             * TODO: Test if all the inserts actually do come in vectors
+             */
+            // Debug: print all inserted vectors
+            for (const auto& gvo : vectors) {
+                std::cout << "=== Vector ID: " << gvo.id << " ===" << std::endl;
+                std::cout << "  Filter: " << gvo.filter << std::endl;
+                std::cout << "  Meta: " << std::string(gvo.meta.begin(), gvo.meta.end()) << std::endl;
+
+                for (const auto& [name, dvo] : gvo.dense_vectors) {
+                    std::cout << "  Dense [" << name << "] norm=" << dvo.norm << " vector=[";
+                    for (size_t i = 0; i < dvo.vector.size(); ++i) {
+                        if (i > 0) std::cout << ", ";
+                        std::cout << dvo.vector[i];
+                    }
+                    std::cout << "]" << std::endl;
+                }
+
+                for (const auto& [name, svo] : gvo.sparse_vectors) {
+                    std::cout << "  Sparse [" << name << "] indices=[";
+                    for (size_t i = 0; i < svo.sparse_ids.size(); ++i) {
+                        if (i > 0) std::cout << ", ";
+                        std::cout << svo.sparse_ids[i] << ":" << svo.sparse_values[i];
+                    }
+                    std::cout << "]" << std::endl;
+                }
+            }
+            std::cout << "Total vectors inserted: " << vectors.size() << std::endl;
+
+            bool success = true; //= index_manager.addVectors(index_id, vectors);
+            return crow::response(success ? 200 : 400);
+        }
+        else if(content_type == "application/msgpack"){
+            return crow::response(400,
+                    "Content-Type application/msgpack not implemented yet");
+        }
+        else{
+            return crow::response(400,
+                    "Content-Type must be application/msgpack or application/json");
+        }
+    });
+
     //  Insert a list of vectors
     CROW_ROUTE(app, "/api/v1/index/<string>/vector/insert")
             .CROW_MIDDLEWARES(app, AuthMiddleware)
             .methods("POST"_method)([&index_manager, &app](const crow::request& req,
-                                                           std::string index_name) {
+                                                            std::string index_name) {
                 auto& ctx = app.get_context<AuthMiddleware>(req);
                 std::string index_id = ctx.username + "/" + index_name;
 
diff --git a/src/utils/msgpack_ndd.hpp b/src/utils/msgpack_ndd.hpp
index d0891daa7..76836e2a4 100644
--- a/src/utils/msgpack_ndd.hpp
+++ b/src/utils/msgpack_ndd.hpp
@@ -42,6 +42,27 @@ namespace ndd {
         MSGPACK_DEFINE(id, meta, filter, norm, vector)
     };
 
+    struct DenseVectorObject{
+        float norm;                        // Vector norm (only for cosine distance)
+        std::vector<float> vector;         // Vector data
+    };
+
+    struct SparseVectorObject{
+        std::vector<uint32_t> sparse_ids;  // Sparse vector indices
+        std::vector<float> sparse_values;  // Sparse vector values
+    };
+
+    struct GenericVectorObject {
+        std::string id;                    // String identifier
+        std::vector<uint8_t> meta;         // Binary metadata (zipped)
+        std::string filter;                // Filter as JSON string
+
+        std::unordered_map<std::string, struct DenseVectorObject> dense_vectors;
+        std::unordered_map<std::string, struct SparseVectorObject> sparse_vectors;
+
+        // MSGPACK_DEFINE(id, meta, filter, norm, vector, sparse_ids, sparse_values)
+    };
+
     struct HybridVectorObject {
         std::string id;                    // String identifier
         std::vector<uint8_t> meta;         // Binary metadata (zipped)

From 099368e6bdce21bf7eea2d6691fa7c2561b06337 Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Tue, 17 Feb 2026 14:06:52 +0530
Subject: [PATCH 14/18] small changes

---
 src/core/ndd.hpp               | 15 ++++++++++-----
 src/storage/vector_storage.hpp |  2 +-
 src/storage/wal.hpp            |  2 +-
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/core/ndd.hpp b/src/core/ndd.hpp
index 9e09491ad..832db1dc7 100644
--- a/src/core/ndd.hpp
+++ b/src/core/ndd.hpp
@@ -136,7 +136,7 @@ struct NewCacheEntry {
     NewCacheEntry& operator=(NewCacheEntry&&) = delete;
 
     // Factory method — returns nullptr on validation failure
-    [[nodiscard]] static std::unique_ptr<NewCacheEntry> create(
+    [[nodiscard]] static std::shared_ptr<NewCacheEntry> create(
         std::string base_path,
         std::string index_id_,
         std::shared_ptr<IDMapper> id_mapper_,
@@ -153,7 +153,7 @@ struct NewCacheEntry {
             return nullptr;
         }
         // Private constructor — only accessible via this factory
-        return std::unique_ptr<NewCacheEntry>(
+        return std::shared_ptr<NewCacheEntry>(
             new NewCacheEntry(std::move(base_path), std::move(index_id_),
                                 std::move(id_mapper_),
                                 std::move(dense_),
@@ -297,7 +297,7 @@ class IndexManager {
 private:
     std::deque<std::string> indices_list_;
     std::unordered_map<std::string, CacheEntry> indices_;
-    std::unordered_map<std::string, std::unique_ptr<NewCacheEntry>> newindices_; //index name -> its cache entry
+    std::unordered_map<std::string, std::shared_ptr<NewCacheEntry>> newindices_; //index name -> its cache entry
     std::shared_mutex indices_mutex_;
     std::string data_dir_;
     // This is for locking the LRU
@@ -446,6 +446,8 @@ class IndexManager {
         }
     }
 
+    std::shared_ptr<NewCacheEntry> newgetIndexEntry(std::string& index_id);
+
     // Get index entry with proper lock management - does NOT hold locks after return
     CacheEntry& getIndexEntry(const std::string& index_id) {
         // First try to find the index without write lock
@@ -487,7 +489,6 @@ class IndexManager {
         saveIndexInternal(entry);
     }
 
-private:
     // Internal saveIndex implementation that doesn't call getIndexEntry
     // Used by functions that already have the entry and mutex
     void saveIndexInternal(CacheEntry& entry) {
@@ -618,7 +619,6 @@ class IndexManager {
         return getUserPath(username) + "/" + index_name;
     }
 
-public:
     IndexManager(size_t max_indices,
                  const std::string& data_dir,
                  const PersistenceConfig& persistence_config = PersistenceConfig{}) :
@@ -1086,11 +1086,14 @@ class IndexManager {
         // Use the metadata manager directly to get the list of indexes
         return metadata_manager_->listUserIndexes(username);
     }
+
     std::vector<std::pair<std::string, IndexMetadata>> listAllIndexes() {
         // Use the metadata manager directly to get the list of indexes
         return metadata_manager_->listAllIndexes();
     }
 
+    void newloadIndex(const std::string& index_id);
+
     void loadIndex(const std::string& index_id) {
         std::string index_path = data_dir_ + "/" + index_id + "/main.idx";
         std::string lmdb_dir = data_dir_ + "/" + index_id + "/ids";
@@ -1235,6 +1238,8 @@ class IndexManager {
         entry.alg = std::move(new_alg);
     }
 
+    std::pair<bool, std::string> addNamedVectors(std::string& index_id, std::vector<ndd::GenericVectorObject>& vectors);
+
     template <typename VectorType>
     bool addVectors(const std::string& index_id, const std::vector<VectorType>& vectors) {
         try {
diff --git a/src/storage/vector_storage.hpp b/src/storage/vector_storage.hpp
index 64ea6fb7b..1455635ef 100644
--- a/src/storage/vector_storage.hpp
+++ b/src/storage/vector_storage.hpp
@@ -396,7 +396,7 @@ class MetaStore {
             int rc = mdbx_txn_commit(txn);
             if(rc != MDBX_SUCCESS) {
                 throw std::runtime_error("Failed to commit transaction: "
-                                         + std::string(mdbx_strerror(rc)));
+                                        + std::string(mdbx_strerror(rc)));
             }
         };
 
diff --git a/src/storage/wal.hpp b/src/storage/wal.hpp
index 08cebe0c3..88463244f 100644
--- a/src/storage/wal.hpp
+++ b/src/storage/wal.hpp
@@ -35,7 +35,7 @@ class WriteAheadLog {
         if(!log_file_) {
             std::string err_string;
             err_string = "Failed to open WAL file: " + log_path_
-                         + " errno: " + std::to_string(errno) + " errcode: " + std::strerror(errno);
+                        + " errno: " + std::to_string(errno) + " errcode: " + std::strerror(errno);
 
             LOG_ERROR(err_string);
             throw std::runtime_error(err_string);

From 3a3021827c007fd38507efcc8b0ec1bd0f48b4f1 Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Tue, 17 Feb 2026 14:07:54 +0530
Subject: [PATCH 15/18] starting implementation of addNamedVectors

---
 src/core/ndd.cpp | 122 +++++++++++++++++++++++++++++++++++++++++++++++
 src/main.cpp     |  49 +++++++------------
 2 files changed, 139 insertions(+), 32 deletions(-)

diff --git a/src/core/ndd.cpp b/src/core/ndd.cpp
index ff7a62c86..63f064784 100644
--- a/src/core/ndd.cpp
+++ b/src/core/ndd.cpp
@@ -10,6 +10,11 @@ void insert_or_throw(Map& map, Key&& key, Value&& value) {
     }
 }
 
+/*TODO: Critical*/
+void IndexManager::newloadIndex(const std::string& index_id){
+    std::runtime_error("IndexManager::newloadIndex is not implemented");
+}
+
 std::pair<bool, std::string> IndexManager::newcreateIndex(std::string& username,
                                     UserType user_type, std::string& index_name,
                                     std::vector<struct NewIndexConfig> dense_indexes,
@@ -207,6 +212,123 @@ cleanup();
 }
 
 
+/**
+ * new impl. of getIndexEntry. Copies the logic as is
+ */
+std::shared_ptr<NewCacheEntry> IndexManager::newgetIndexEntry(std::string &index_id){
+
+    /*First try with reader's lock*/
+    {
+        //std::shared_lock<std::shared_mutex> read_lock(indices_mutex_);
+        auto it = newindices_.find(index_id);
+        if(it != newindices_.end()) {
+            std::cout << __func__ << " it not end() for index_id: " << index_id << std::endl;
+            return it->second;
+        }
+    }
+
+    /*Try with writer's lock*/
+    {
+        /**
+         * XXX: incomplete IMPL. phase. This code snippet should not be called right now.
+         * This is because all the required functions are not implemented.
+         */
+        return nullptr;
+
+        std::unique_lock<std::shared_mutex> write_lock(indices_mutex_);
+            auto it = newindices_.find(index_id);
+            if(it == newindices_.end()) {
+                newloadIndex(index_id);  // modifies indices_ [NOT IMPLEMENTED]
+                evictIfNeeded();      // Clean eviction only
+            }
+            it = newindices_.find(index_id);
+            if(it == newindices_.end()) {
+                return nullptr;
+                // throw std::runtime_error("[ERROR] Failed to load index");
+            }
+            return it->second;
+    }
+}
+
+/**
+ * Adds list of named vectors.
+ */
+std::pair<bool, std::string> IndexManager::addNamedVectors(std::string& index_id,
+                                    std::vector<ndd::GenericVectorObject>& vectors)
+{
+    std::pair<bool, std::string> ret;
+    std::shared_ptr<NewCacheEntry> entry = nullptr;
+    ret.first = true;
+    ret.second = "";
+
+    if(vectors.empty()) {
+        ret.first = false;
+        ret.second = "no vectors to add";
+        LOG_ERROR(ret.second);
+        goto exit_addNamedVectors;
+    }
+
+#if 0
+    std::cout << "index_id: " << index_id << std::endl;
+
+    // Debug: print all inserted vectors
+    for (const auto& gvo : vectors) {
+        std::cout << "=== Vector ID: " << gvo.id << " ===" << std::endl;
+        std::cout << "  Filter: " << gvo.filter << std::endl;
+        std::cout << "  Meta: " << std::string(gvo.meta.begin(), gvo.meta.end()) << std::endl;
+
+        for (const auto& [name, dvo] : gvo.dense_vectors) {
+            std::cout << "  Dense [" << name << "] norm=" << dvo.norm << " vector=[";
+            for (size_t i = 0; i < dvo.vector.size(); ++i) {
+                if (i > 0) std::cout << ", ";
+                std::cout << dvo.vector[i];
+            }
+            std::cout << "]" << std::endl;
+        }
+
+        for (const auto& [name, svo] : gvo.sparse_vectors) {
+            std::cout << "  Sparse [" << name << "] indices=[";
+            for (size_t i = 0; i < svo.sparse_ids.size(); ++i) {
+                if (i > 0) std::cout << ", ";
+                std::cout << svo.sparse_ids[i] << ":" << svo.sparse_values[i];
+            }
+            std::cout << "]" << std::endl;
+        }
+    }
+    std::cout << "Total vectors inserted: " << vectors.size() << std::endl;
+
+#endif //if 0
+
+    /* Get index from index_id*/
+    entry = newgetIndexEntry(index_id);
+    if(!entry){
+        ret.first = false;
+        ret.second = "Could not find index: " + index_id;
+
+        /*For now*/
+        ret.second += " XXXX: MAYBE THIS IS BECAUSE the IMPLEMENTATION OF newgetIndexEntry is incomplete";
+        LOG_INFO(ret.second);
+        goto exit_addNamedVectors;
+    }
+
+    std::cout << "entry.index_id - " << entry->index_id << std::endl;
+    /* Create intID for each StringID using IDMapper*/
+
+
+
+    /**
+     * TODO: Critical
+     * Skipping usage of WAL right now.
+     * We have to decide if WAL needs to be a per sub-index concept or not
+     */
+
+
+
+exit_addNamedVectors:
+    return ret;
+}
+
+
 /**
  * returns <true, ""> if index config is sane
  */
diff --git a/src/main.cpp b/src/main.cpp
index fdcd92d66..adfeff26f 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1067,38 +1067,6 @@ int main(int argc, char** argv) {
             }
             // return crow::response(400,
             //         "Content-Type application/json not implemented yet");
-
-            /**
-             * TODO: Test if all the inserts actually do come in vectors
-             */
-            // Debug: print all inserted vectors
-            for (const auto& gvo : vectors) {
-                std::cout << "=== Vector ID: " << gvo.id << " ===" << std::endl;
-                std::cout << "  Filter: " << gvo.filter << std::endl;
-                std::cout << "  Meta: " << std::string(gvo.meta.begin(), gvo.meta.end()) << std::endl;
-
-                for (const auto& [name, dvo] : gvo.dense_vectors) {
-                    std::cout << "  Dense [" << name << "] norm=" << dvo.norm << " vector=[";
-                    for (size_t i = 0; i < dvo.vector.size(); ++i) {
-                        if (i > 0) std::cout << ", ";
-                        std::cout << dvo.vector[i];
-                    }
-                    std::cout << "]" << std::endl;
-                }
-
-                for (const auto& [name, svo] : gvo.sparse_vectors) {
-                    std::cout << "  Sparse [" << name << "] indices=[";
-                    for (size_t i = 0; i < svo.sparse_ids.size(); ++i) {
-                        if (i > 0) std::cout << ", ";
-                        std::cout << svo.sparse_ids[i] << ":" << svo.sparse_values[i];
-                    }
-                    std::cout << "]" << std::endl;
-                }
-            }
-            std::cout << "Total vectors inserted: " << vectors.size() << std::endl;
-
-            bool success = true; //= index_manager.addVectors(index_id, vectors);
-            return crow::response(success ? 200 : 400);
         }
         else if(content_type == "application/msgpack"){
             return crow::response(400,
@@ -1108,6 +1076,23 @@ int main(int argc, char** argv) {
             return crow::response(400,
                     "Content-Type must be application/msgpack or application/json");
         }
+
+        /* Insert the batch of vectors to the index */
+        try{
+            auto ret = index_manager.addNamedVectors(index_id, vectors);
+
+            if(!ret.first){
+                return json_error(400, ret.second);
+            }
+            return crow::response(200);
+
+        } catch(const std::runtime_error& e) {
+            return json_error(400, e.what());
+        } catch(const std::exception& e) {
+            LOG_DEBUG("Batch insertion failed: " << e.what());
+            return json_error_500(ctx.username, req.url, e.what());
+        }
+
     });
 
     //  Insert a list of vectors

From c0b676846aa93c1da8d6d4e1d0f35d638334db81 Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Tue, 17 Feb 2026 18:07:38 +0530
Subject: [PATCH 16/18] new idmapper without deep copy

---
 CMakeLists.txt            |   2 +-
 src/storage/id_mapper.cpp | 257 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 258 insertions(+), 1 deletion(-)
 create mode 100644 src/storage/id_mapper.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4eddff311..6363e05f0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -239,7 +239,7 @@ message(STATUS "Binary name: ${NDD_BINARY_NAME}")
 
 
 # Create the target
-add_executable(${NDD_BINARY_NAME} src/main.cpp src/core/ndd.cpp ${LMDB_SOURCES} third_party/roaring_bitmap/roaring.c)
+add_executable(${NDD_BINARY_NAME} src/main.cpp src/core/ndd.cpp src/storage/id_mapper.cpp ${LMDB_SOURCES} third_party/roaring_bitmap/roaring.c)
 
 # Set MDBX-specific compile flags
 set_source_files_properties(${LMDB_SOURCES} PROPERTIES
diff --git a/src/storage/id_mapper.cpp b/src/storage/id_mapper.cpp
new file mode 100644
index 000000000..bab90f0ab
--- /dev/null
+++ b/src/storage/id_mapper.cpp
@@ -0,0 +1,257 @@
+#include <string>
+#include "id_mapper.hpp"
+
+template <bool use_deleted_ids>
+bool IDMapper::newcreate_ids_batch(std::vector<ndd::GenericVectorObject>& vectors, void* wal_ptr)
+{
+    bool ret = false;
+    constexpr idInt INVALID_LABEL = static_cast<idInt>(-1);
+
+    if(vectors.empty()){
+        return ret;
+    }
+
+    LOG_DEBUG("=== create_ids_batch START ===");
+
+    std::vector<std::tuple<std::string, idInt, bool, bool>> id_tuples;
+
+    id_tuples.reserve(vectors.size());
+    for(const auto& vec : vectors) {
+        // true means that the ID is new and false means that the ID already exists
+        // is_reused defaults to false
+        id_tuples.emplace_back(vec.id, INVALID_LABEL, true, false);
+    }
+
+    LOG_DEBUG("--- STEP 2: LMDB database check ---");
+    {
+        MDBX_txn* txn;
+        int rc = mdbx_txn_begin(env_, nullptr, MDBX_TXN_RDONLY, &txn);
+        if(rc != MDBX_SUCCESS) {
+            LOG_DEBUG("ERROR: Failed to begin read-only transaction: " << mdbx_strerror(rc));
+            throw std::runtime_error("Failed to begin read-only transaction: "
+                                        + std::string(mdbx_strerror(rc)));
+        }
+        LOG_DEBUG("LMDB read-only transaction started successfully");
+
+        try {
+            int keys_checked = 0;
+            for(auto& tup : id_tuples) {
+                if(std::get<1>(tup) == INVALID_LABEL) {
+                    const std::string& str_id = std::get<0>(tup);
+                    MDBX_val key{(void*)str_id.c_str(), str_id.size()};
+                    MDBX_val data;
+
+                    // Add debug logging
+                    LOG_DEBUG("LMDB: Checking key[" << keys_checked << "]: [" << str_id
+                                                    << "] size: " << str_id.size());
+                    keys_checked++;
+
+                    rc = mdbx_get(txn, dbi_, &key, &data);
+                    if(rc == MDBX_SUCCESS) {
+                        idInt existing_id = *(idInt*)data.iov_base;
+                        LOG_DEBUG("LMDB: ✓ FOUND existing ID: " << existing_id << " for key: ["
+                                                                << str_id << "]");
+                        std::get<1>(tup) = existing_id;
+                        std::get<2>(tup) = false;  // ID already exists
+                    } else if(rc == MDBX_NOTFOUND) {
+                        LOG_DEBUG("LMDB: ✗ NOT FOUND: [" << str_id << "]");
+                        std::get<1>(tup) = 0;
+                    } else {
+                        LOG_DEBUG("LMDB: ERROR for key: [" << str_id
+                                                            << "] error: " << mdbx_strerror(rc));
+                        mdbx_txn_abort(txn);
+                        throw std::runtime_error("Database error checking ID: "
+                                                    + std::string(mdbx_strerror(rc)));
+                    }
+                }
+            }
+            LOG_DEBUG("LMDB: Checked " << keys_checked << " keys in database");
+            mdbx_txn_abort(txn);
+            LOG_DEBUG("LMDB check done");
+        } catch(...) {
+            mdbx_txn_abort(txn);
+            throw;
+        }
+    }
+
+    //Count and generate new IDs
+    LOG_DEBUG("--- STEP 3: Count and generate new IDs ---");
+    size_t total_new_ids_needed =
+            std::count_if(id_tuples.begin(), id_tuples.end(), [](const auto& t) {
+                return std::get<1>(t) == 0;
+            });
+    LOG_DEBUG("Total new IDs needed: " << total_new_ids_needed);
+
+    size_t fresh_ids_count = total_new_ids_needed;
+    size_t deleted_index = 0;
+
+    if(use_deleted_ids) {
+        // Use deleted IDs first, but ONLY for entries that are actually new (not found in DB)
+        std::vector<idInt> deletedIds = getDeletedIds(fresh_ids_count);
+
+        for(auto& tup : id_tuples) {
+            // Only assign deleted IDs to entries that are new (id=0 and is_new=true)
+            if(std::get<1>(tup) == 0 && std::get<2>(tup) == true
+                && deleted_index < deletedIds.size()) {
+                std::get<1>(tup) = deletedIds[deleted_index++];
+                std::get<3>(tup) = true;  // Mark as reused
+                // Keep std::get<2>(tup) as true because this still needs to be written to DB
+            }
+        }
+        fresh_ids_count -= deleted_index;  // Reduce by actual number of deleted IDs used
+    }
+
+    if(total_new_ids_needed > 0) {
+        LOG_DEBUG("Generating " << fresh_ids_count << " fresh IDs");
+
+        std::vector<idInt> new_ids;
+        if(fresh_ids_count > 0) {
+            new_ids = get_next_ids(fresh_ids_count);
+        }
+
+        // CRITICAL FIX: Log to WAL AFTER generating IDs (minimal risk window)
+        if(wal_ptr) {
+            WriteAheadLog* wal = static_cast<WriteAheadLog*>(wal_ptr);
+            std::vector<WriteAheadLog::WALEntry> wal_entries;
+
+            // Log reused IDs
+            for(const auto& tup : id_tuples) {
+                if(std::get<2>(tup) && std::get<1>(tup) != 0) {
+                    wal_entries.push_back({WALOperationType::VECTOR_ADD, std::get<1>(tup)});
+                }
+            }
+
+            // Log fresh IDs
+            for(idInt id : new_ids) {
+                wal_entries.push_back({WALOperationType::VECTOR_ADD, id});
+            }
+
+            if(!wal_entries.empty()) {
+                wal->log(wal_entries);
+            }
+        }
+
+        if(fresh_ids_count > 0 && new_ids.size() != fresh_ids_count) {
+            throw std::runtime_error("Mismatch: get_next_ids returned "
+                                        + std::to_string(new_ids.size()) + " but expected "
+                                        + std::to_string(fresh_ids_count));
+        }
+
+        size_t new_id_index = 0;
+
+        // Step 4: Write txn with auto-resize retry
+        LOG_DEBUG("--- STEP 4: Writing to database ---");
+        auto try_write = [&](MDBX_txn* txn) -> int {
+            int writes_attempted = 0;
+            for(auto& tup : id_tuples) {
+                // Write entries that need to be written to DB (is_new=true) but don't have ID=0
+                if(std::get<2>(tup) == true && std::get<1>(tup) != 0) {
+                    const std::string& str_id = std::get<0>(tup);
+                    idInt id = std::get<1>(tup);
+
+                    MDBX_val key{(void*)str_id.c_str(), str_id.size()};
+                    MDBX_val data{&id, sizeof(idInt)};
+
+                    // Add debug logging for write operations
+                    LOG_DEBUG("WRITE[" << writes_attempted << "]: key=[" << str_id
+                                        << "] size=" << str_id.size() << " ID=" << id);
+                    writes_attempted++;
+
+                    int rc = mdbx_put(txn, dbi_, &key, &data, MDBX_UPSERT);
+                    if(rc == MDBX_MAP_FULL) {
+                        LOG_DEBUG("WRITE ERROR: MDBX_MAP_FULL for key=[" << str_id << "]");
+                        return MDBX_MAP_FULL;
+                    }
+                    if(rc != MDBX_SUCCESS) {
+                        LOG_DEBUG("WRITE ERROR: [" << str_id
+                                                    << "] error: " << mdbx_strerror(rc));
+                        return rc;
+                    }
+
+                    LOG_DEBUG("WRITE SUCCESS: [" << str_id << "] with ID: " << id);
+
+                } else if(std::get<1>(tup) == 0) {
+                    // Handle remaining entries that still need new IDs
+                    if(new_id_index >= new_ids.size()) {
+                        LOG_DEBUG("ERROR: new_id_index ("
+                                    << new_id_index << ") >= new_ids.size() (" << new_ids.size()
+                                    << ")");
+                        return MDBX_PROBLEM;  // Internal error
+                    }
+                    idInt new_id = new_ids[new_id_index++];
+                    const std::string& str_id = std::get<0>(tup);
+
+                    MDBX_val key{(void*)str_id.c_str(), str_id.size()};
+                    MDBX_val data{&new_id, sizeof(idInt)};
+
+                    writes_attempted++;
+
+                    int rc = mdbx_put(txn, dbi_, &key, &data, MDBX_UPSERT);
+                    if(rc == MDBX_MAP_FULL) {
+                        LOG_DEBUG("WRITE_NEW ERROR: MDBX_MAP_FULL for key=[" << str_id << "]");
+                        return MDBX_MAP_FULL;
+                    }
+                    if(rc != MDBX_SUCCESS) {
+                        LOG_DEBUG("WRITE_NEW ERROR: [" << str_id
+                                                        << "] error: " << mdbx_strerror(rc));
+                        return rc;
+                    }
+
+                    std::get<1>(tup) = new_id;
+                }
+            }
+            return MDBX_SUCCESS;
+        };
+
+        MDBX_txn* txn;
+        int rc = mdbx_txn_begin(env_, nullptr, MDBX_TXN_READWRITE, &txn);
+        if(rc != MDBX_SUCCESS) {
+            throw std::runtime_error("Failed to begin write transaction: "
+                                        + std::string(mdbx_strerror(rc)));
+        }
+
+        rc = try_write(txn);
+        // MDBX auto-grows, no manual resize needed
+        if(rc != MDBX_SUCCESS) {
+            mdbx_txn_abort(txn);
+            throw std::runtime_error("Failed to insert new IDs: "
+                                        + std::string(mdbx_strerror(rc)));
+        }
+
+        rc = mdbx_txn_commit(txn);
+        if(rc != MDBX_SUCCESS) {
+            throw std::runtime_error("Failed to commit transaction: "
+                                        + std::string(mdbx_strerror(rc)));
+        }
+        LOG_DEBUG("Write transaction committed successfully");
+    } else {
+        LOG_DEBUG("No new IDs needed, skipping write transaction");
+    }
+
+    // Final state logging
+    LOG_DEBUG("--- FINAL RESULTS ---");
+    std::vector<std::pair<idInt, bool>> result;
+    result.reserve(id_tuples.size());
+    for(size_t i = 0; i < id_tuples.size(); i++) {
+        const auto& tup = id_tuples[i];
+        bool is_new_to_hnsw = std::get<2>(tup);
+        // If the ID was reused from deleted list, treat it as an update (not new to HNSW)
+        if(std::get<3>(tup)) {
+            is_new_to_hnsw = false;
+        }
+        vectors[i].numeric_id.first = std::get<1>(tup);
+        vectors[i].numeric_id.second = is_new_to_hnsw;
+    }
+    ret = true;
+
+    LOG_DEBUG("=== create_ids_batch END ===");
+
+exit_newcreate_ids_batch:
+    return ret;
+}
+
+template bool IDMapper::newcreate_ids_batch<false>(
+    std::vector<ndd::GenericVectorObject>&, void*);
+
+template bool IDMapper::newcreate_ids_batch<true>(
+    std::vector<ndd::GenericVectorObject>&, void*);
\ No newline at end of file

From 5f625ff65d1a349071c34310584a2675b3157aa1 Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Tue, 17 Feb 2026 18:10:09 +0530
Subject: [PATCH 17/18] adding NamedVectorAdd support

---
 src/core/ndd.cpp          | 106 +++++++++++++++++++++++++++++++++++---
 src/core/ndd.hpp          |   8 +--
 src/storage/id_mapper.hpp |  10 +++-
 src/utils/msgpack_ndd.hpp |  10 ++++
 4 files changed, 121 insertions(+), 13 deletions(-)

diff --git a/src/core/ndd.cpp b/src/core/ndd.cpp
index 63f064784..2ed6f1624 100644
--- a/src/core/ndd.cpp
+++ b/src/core/ndd.cpp
@@ -214,6 +214,7 @@ cleanup();
 
 /**
  * new impl. of getIndexEntry. Copies the logic as is
+ * XXX: Logic is incomplete. Check the function impl.
  */
 std::shared_ptr<NewCacheEntry> IndexManager::newgetIndexEntry(std::string &index_id){
 
@@ -222,7 +223,6 @@ std::shared_ptr<NewCacheEntry> IndexManager::newgetIndexEntry(std::string &index
         //std::shared_lock<std::shared_mutex> read_lock(indices_mutex_);
         auto it = newindices_.find(index_id);
         if(it != newindices_.end()) {
-            std::cout << __func__ << " it not end() for index_id: " << index_id << std::endl;
             return it->second;
         }
     }
@@ -257,7 +257,9 @@ std::pair<bool, std::string> IndexManager::addNamedVectors(std::string& index_id
                                     std::vector<ndd::GenericVectorObject>& vectors)
 {
     std::pair<bool, std::string> ret;
-    std::shared_ptr<NewCacheEntry> entry = nullptr;
+    std::shared_ptr<NewCacheEntry> index_cache_entry = nullptr;
+    std::vector<std::pair<ndd::idInt, ndd::VectorMeta>> meta_batch;
+    std::vector<std::pair<ndd::idInt, std::string>> filter_batch;
     ret.first = true;
     ret.second = "";
 
@@ -300,8 +302,8 @@ std::pair<bool, std::string> IndexManager::addNamedVectors(std::string& index_id
 #endif //if 0
 
     /* Get index from index_id*/
-    entry = newgetIndexEntry(index_id);
-    if(!entry){
+    index_cache_entry = newgetIndexEntry(index_id);
+    if(!index_cache_entry){
         ret.first = false;
         ret.second = "Could not find index: " + index_id;
 
@@ -311,10 +313,7 @@ std::pair<bool, std::string> IndexManager::addNamedVectors(std::string& index_id
         goto exit_addNamedVectors;
     }
 
-    std::cout << "entry.index_id - " << entry->index_id << std::endl;
-    /* Create intID for each StringID using IDMapper*/
-
-
+    std::cout << "index_cache_entry.index_id - " << index_cache_entry->index_id << std::endl;
 
     /**
      * TODO: Critical
@@ -323,6 +322,97 @@ std::pair<bool, std::string> IndexManager::addNamedVectors(std::string& index_id
      */
 
 
+    /* Create intID for each StringID using IDMapper*/
+    /**
+     * DELETES NOT SUPPORTED
+     * XXX: Here we check if there have been deletes before calling
+     * create_ids_batch appropriately. Right now it is not clear how
+     * deletes will be done - hence we arent checking deletes, just creating ids.
+     */
+    if(!index_cache_entry->id_mapper->newcreate_ids_batch<false>(vectors, nullptr)){
+        ret.first = false;
+        ret.second = "Could not create IDs for: " + index_id;
+        goto exit_addNamedVectors;
+    }
+
+    /*DEBUGGING ONLY */
+#if 0
+    for (size_t i = 0; i < vectors.size(); ++i) {
+        const auto& obj = vectors[i];
+
+        std::cout << "vector[" << i << "] "
+                    << "id: " << obj.id
+                    << " numeric_id: ("
+                    << obj.numeric_id.first << ", "
+                    << std::boolalpha << obj.numeric_id.second << ")\n";
+    }
+#endif //if 0
+
+    for (size_t i = 0; i < vectors.size(); ++i) {
+        const auto& obj = vectors[i];
+        ndd::VectorMeta meta;
+
+        meta.id = obj.id; //string id
+        meta.filter = obj.filter;
+        meta.meta = obj.meta;
+
+        meta_batch.emplace_back(obj.numeric_id.first, std::move(meta));
+
+        /*populate the filter*/
+        if(!obj.filter.empty()){
+            filter_batch.emplace_back(obj.numeric_id.first, obj.filter);
+        }
+
+        // Print vector object
+        std::cout << "=== vector[" << i << "] ===" << std::endl;
+        std::cout << "  id: " << obj.id
+                << "  numeric_id: (" << obj.numeric_id.first
+                << ", " << std::boolalpha << obj.numeric_id.second << ")"
+                << std::endl;
+        std::cout << "  filter: " << obj.filter << std::endl;
+
+        // Print corresponding meta_batch entry
+        auto& mb = meta_batch.back();
+        std::cout << "  meta_batch -> numeric_id: " << mb.first
+                << ", meta.id: " << mb.second.id
+                << ", meta.filter: " << mb.second.filter
+                << std::endl;
+
+        // Print filter_batch entry (if added this iteration)
+        if (!obj.filter.empty()) {
+            auto& fb = filter_batch.back();
+            std::cout << "  filter_batch -> numeric_id: " << fb.first
+                    << ", filter: " << fb.second
+                    << std::endl;
+        }
+    }
+
+    // /**
+    //  * Add filter and metadata [it is a per index property]
+    //  */
+    index_cache_entry->meta_store_->store_meta_batch(meta_batch);
+
+    if(!filter_batch.empty()) {
+            index_cache_entry->filter_store_->add_filters_from_json_batch(filter_batch);
+    }
+
+    /*RESTART FROM HERE*/
+
+    /**
+     * create a per-subindex list and then do for every.
+     */
+
+    /**
+     * Now iterate over each named sub index and save them individually
+     */
+
+    /**
+     * for each subindex:
+     * 1. quantize the vector based on its individual quantization level
+     * 2. TODO ...
+     */
+
+    /*TODO: Sparse Vectors support*/
 
 exit_addNamedVectors:
     return ret;
diff --git a/src/core/ndd.hpp b/src/core/ndd.hpp
index 832db1dc7..b8d64ef5e 100644
--- a/src/core/ndd.hpp
+++ b/src/core/ndd.hpp
@@ -1350,8 +1350,8 @@ class IndexManager {
             // Add to HNSW index in parallel using pre-quantized data from QuantVectorObject
             size_t available_threads = settings::NUM_PARALLEL_INSERTS;
             const size_t num_threads = (available_threads < quantized_vectors.size())
-                                               ? available_threads
-                                               : quantized_vectors.size();
+                                                ? available_threads
+                                                : quantized_vectors.size();
             std::vector<std::thread> threads;
             const size_t chunk_size =
                     (quantized_vectors.size() + num_threads - 1) / num_threads;  // Ceiling division
@@ -1362,8 +1362,8 @@ class IndexManager {
                     // Calculate start and end indices for this thread
                     size_t start_idx = t * chunk_size;
                     size_t end_idx = (start_idx + chunk_size < quantized_vectors.size())
-                                             ? (start_idx + chunk_size)
-                                             : quantized_vectors.size();
+                                                ? (start_idx + chunk_size)
+                                                : quantized_vectors.size();
 
                     // Process assigned chunk of vectors
                     for(size_t i = start_idx; i < end_idx; i++) {
diff --git a/src/storage/id_mapper.hpp b/src/storage/id_mapper.hpp
index eff79b30e..376673dcc 100644
--- a/src/storage/id_mapper.hpp
+++ b/src/storage/id_mapper.hpp
@@ -13,6 +13,7 @@
 #include <filesystem>
 #include <set>
 #include "../core/types.hpp"
+#include "../utils/msgpack_ndd.hpp"
 #include "../utils/settings.hpp"
 
 using ndd::idInt;
@@ -79,11 +80,18 @@ class IDMapper {
         mdbx_env_close(env_);
     }
 
+    /**
+     * is same as create_ids_batch
+     * Takes a vector of ndd::GenericVectorObject and adds the IDs to GenericVectorObject's pair
+     */
+    template <bool use_deleted_ids> bool newcreate_ids_batch(std::vector<ndd::GenericVectorObject>& vectors, void* wal_ptr = nullptr);
+
+
     // Create string ID to numeric ID mapping. If string ids exists in the database, it will return
     // the existing numeric ID along with flag It will also use old numeric IDs of deleted points
     template <bool use_deleted_ids>
     std::vector<std::pair<idInt, bool>> create_ids_batch(const std::vector<std::string>& str_ids,
-                                                         void* wal_ptr = nullptr) {
+                                                        void* wal_ptr = nullptr) {
         if(str_ids.empty()) {
             return {};
         }
diff --git a/src/utils/msgpack_ndd.hpp b/src/utils/msgpack_ndd.hpp
index 76836e2a4..a810417f9 100644
--- a/src/utils/msgpack_ndd.hpp
+++ b/src/utils/msgpack_ndd.hpp
@@ -45,11 +45,15 @@ namespace ndd {
     struct DenseVectorObject{
         float norm;                        // Vector norm (only for cosine distance)
         std::vector<float> vector;         // Vector data
+
+        MSGPACK_DEFINE(norm, vector);
     };
 
     struct SparseVectorObject{
         std::vector<uint32_t> sparse_ids;  // Sparse vector indices
         std::vector<float> sparse_values;  // Sparse vector values
+
+        MSGPACK_DEFINE(sparse_ids, sparse_values);
     };
 
     struct GenericVectorObject {
@@ -60,6 +64,12 @@ namespace ndd {
         std::unordered_map<std::string, struct DenseVectorObject> dense_vectors;
         std::unordered_map<std::string, struct SparseVectorObject> sparse_vectors;
 
+        /**
+         * Ignored by msgpack. Dont include in MSGPACK_DEFINE.
+         * this is populated later by the IDMapper.
+         */
+        std::pair<idInt, bool> numeric_id {static_cast<idInt>(-1), false}; //setting it to default
+
         // MSGPACK_DEFINE(id, meta, filter, norm, vector, sparse_ids, sparse_values)
     };
 

From 04fd011af1af7bd37e4406eadb776097736ee11c Mon Sep 17 00:00:00 2001
From: Shaleen Garg <shaleengarg.in@gmail.com>
Date: Fri, 20 Feb 2026 09:35:26 +0530
Subject: [PATCH 18/18] small changes

---
 src/core/ndd.cpp          |  7 +++++--
 src/utils/msgpack_ndd.hpp | 24 ++++++++++++------------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/core/ndd.cpp b/src/core/ndd.cpp
index 2ed6f1624..ccf29e866 100644
--- a/src/core/ndd.cpp
+++ b/src/core/ndd.cpp
@@ -252,6 +252,9 @@ std::shared_ptr<NewCacheEntry> IndexManager::newgetIndexEntry(std::string &index
 
 /**
  * Adds list of named vectors.
+ * XXX: Things that are omitted for now:
+ * 1. WAL
+ * 2. Sparse vector support
  */
 std::pair<bool, std::string> IndexManager::addNamedVectors(std::string& index_id,
                                     std::vector<ndd::GenericVectorObject>& vectors)
@@ -308,7 +311,7 @@ std::pair<bool, std::string> IndexManager::addNamedVectors(std::string& index_id
         ret.second = "Could not find index: " + index_id;
 
         /*For now*/
-        ret.second += " XXXX: MAYBE THIS IS BECAUSE the IMPLEMENTATION OF newgetIndexEntry is incomplete";
+        ret.second += " XXXX: THIS IS BECAUSE the IMPLEMENTATION OF newgetIndexEntry is incomplete";
         LOG_INFO(ret.second);
         goto exit_addNamedVectors;
     }
@@ -327,7 +330,7 @@ std::pair<bool, std::string> IndexManager::addNamedVectors(std::string& index_id
      * DELETES NOT SUPPORTED
      * XXX: Here we check if there have been deletes before calling
      * create_ids_batch appropriately. Right now it is not clear how
-     * deletes will be done - hence we arent checking deletes, just creating ids.
+     * deletes will be done - hence we aren't checking deletes, just creating ids.
      */
     if(!index_cache_entry->id_mapper->newcreate_ids_batch<false>(vectors, nullptr)){
         ret.first = false;
diff --git a/src/utils/msgpack_ndd.hpp b/src/utils/msgpack_ndd.hpp
index a810417f9..9107ac65d 100644
--- a/src/utils/msgpack_ndd.hpp
+++ b/src/utils/msgpack_ndd.hpp
@@ -42,6 +42,18 @@ namespace ndd {
         MSGPACK_DEFINE(id, meta, filter, norm, vector)
     };
 
+    struct HybridVectorObject {
+        std::string id;                    // String identifier
+        std::vector<uint8_t> meta;         // Binary metadata (zipped)
+        std::string filter;                // Filter as JSON string
+        float norm;                        // Vector norm (only for cosine distance)
+        std::vector<float> vector;         // Vector data
+        std::vector<uint32_t> sparse_ids;  // Sparse vector indices
+        std::vector<float> sparse_values;  // Sparse vector values
+
+        MSGPACK_DEFINE(id, meta, filter, norm, vector, sparse_ids, sparse_values)
+    };
+
     struct DenseVectorObject{
         float norm;                        // Vector norm (only for cosine distance)
         std::vector<float> vector;         // Vector data
@@ -73,18 +85,6 @@ namespace ndd {
         // MSGPACK_DEFINE(id, meta, filter, norm, vector, sparse_ids, sparse_values)
     };
 
-    struct HybridVectorObject {
-        std::string id;                    // String identifier
-        std::vector<uint8_t> meta;         // Binary metadata (zipped)
-        std::string filter;                // Filter as JSON string
-        float norm;                        // Vector norm (only for cosine distance)
-        std::vector<float> vector;         // Vector data
-        std::vector<uint32_t> sparse_ids;  // Sparse vector indices
-        std::vector<float> sparse_values;  // Sparse vector values
-
-        MSGPACK_DEFINE(id, meta, filter, norm, vector, sparse_ids, sparse_values)
-    };
-
     // Search result structure
     struct VectorResult {
         float similarity;           // Similarity from query (1-distance)